ganeti-2.9.3/0000755000000000000000000000000012271445545013003 5ustar00rootroot00000000000000ganeti-2.9.3/vcs-version0000644000000000000000000000000712271443364015176 0ustar00rootroot00000000000000v2.9.3 ganeti-2.9.3/configure.ac0000644000000000000000000005652712271422343015277 0ustar00rootroot00000000000000# Configure script for Ganeti m4_define([gnt_version_major], [2]) m4_define([gnt_version_minor], [9]) m4_define([gnt_version_revision], [3]) m4_define([gnt_version_suffix], []) m4_define([gnt_version_full], m4_format([%d.%d.%d%s], gnt_version_major, gnt_version_minor, gnt_version_revision, gnt_version_suffix)) AC_PREREQ(2.59) AC_INIT(ganeti, gnt_version_full, ganeti@googlegroups.com) AC_CONFIG_AUX_DIR(autotools) AC_CONFIG_SRCDIR(configure) AM_INIT_AUTOMAKE([1.9 foreign tar-ustar -Wall -Wno-portability]) AC_SUBST([VERSION_MAJOR], gnt_version_major) AC_SUBST([VERSION_MINOR], gnt_version_minor) AC_SUBST([VERSION_REVISION], gnt_version_revision) AC_SUBST([VERSION_SUFFIX], gnt_version_suffix) AC_SUBST([VERSION_FULL], gnt_version_full) # --with-ssh-initscript=... AC_ARG_WITH([ssh-initscript], [AS_HELP_STRING([--with-ssh-initscript=SCRIPT], [SSH init script to use (default is /etc/init.d/ssh)] )], [ssh_initd_script="$withval"], [ssh_initd_script="/etc/init.d/ssh"]) AC_SUBST(SSH_INITD_SCRIPT, $ssh_initd_script) # --with-export-dir=... AC_ARG_WITH([export-dir], [AS_HELP_STRING([--with-export-dir=DIR], [directory to use by default for instance image] [ exports (default is /srv/ganeti/export)] )], [export_dir="$withval"], [export_dir="/srv/ganeti/export"]) AC_SUBST(EXPORT_DIR, $export_dir) # --with-ssh-config-dir=... AC_ARG_WITH([ssh-config-dir], [AS_HELP_STRING([--with-ssh-config-dir=DIR], [ directory with ssh host keys ] [ (default is /etc/ssh)] )], [ssh_config_dir="$withval"], [ssh_config_dir="/etc/ssh"]) AC_SUBST(SSH_CONFIG_DIR, $ssh_config_dir) # --with-xen-config-dir=... AC_ARG_WITH([xen-config-dir], [AS_HELP_STRING([--with-xen-config-dir=DIR], m4_normalize([Xen configuration directory (default: /etc/xen)]))], [xen_config_dir="$withval"], [xen_config_dir=/etc/xen]) AC_SUBST(XEN_CONFIG_DIR, $xen_config_dir) # --with-os-search-path=... # do a bit of black sed magic to for quoting of the strings in the list AC_ARG_WITH([os-search-path], [AS_HELP_STRING([--with-os-search-path=LIST], [comma separated list of directories to] [ search for OS images (default is /srv/ganeti/os)] )], [os_search_path=`echo -n "$withval" | sed -e "s/\([[^,]]*\)/'\1'/g"`], [os_search_path="'/srv/ganeti/os'"]) AC_SUBST(OS_SEARCH_PATH, $os_search_path) # --with-extstorage-search-path=... # same black sed magic for quoting of the strings in the list AC_ARG_WITH([extstorage-search-path], [AS_HELP_STRING([--with-extstorage-search-path=LIST], [comma separated list of directories to] [ search for External Storage Providers] [ (default is /srv/ganeti/extstorage)] )], [es_search_path=`echo -n "$withval" | sed -e "s/\([[^,]]*\)/'\1'/g"`], [es_search_path="'/srv/ganeti/extstorage'"]) AC_SUBST(ES_SEARCH_PATH, $es_search_path) # --with-iallocator-search-path=... # do a bit of black sed magic to for quoting of the strings in the list AC_ARG_WITH([iallocator-search-path], [AS_HELP_STRING([--with-iallocator-search-path=LIST], [comma separated list of directories to] [ search for instance allocators (default is $libdir/ganeti/iallocators)] )], [iallocator_search_path=`echo -n "$withval" | sed -e "s/\([[^,]]*\)/'\1'/g"`], [iallocator_search_path="'$libdir/$PACKAGE_NAME/iallocators'"]) AC_SUBST(IALLOCATOR_SEARCH_PATH, $iallocator_search_path) # --with-xen-bootloader=... AC_ARG_WITH([xen-bootloader], [AS_HELP_STRING([--with-xen-bootloader=PATH], [bootloader for Xen hypervisor (default is empty)] )], [xen_bootloader="$withval"], [xen_bootloader=]) AC_SUBST(XEN_BOOTLOADER, $xen_bootloader) # --with-xen-kernel=... AC_ARG_WITH([xen-kernel], [AS_HELP_STRING([--with-xen-kernel=PATH], [DomU kernel image for Xen hypervisor (default is /boot/vmlinuz-3-xenU)] )], [xen_kernel="$withval"], [xen_kernel="/boot/vmlinuz-3-xenU"]) AC_SUBST(XEN_KERNEL, $xen_kernel) # --with-xen-initrd=... AC_ARG_WITH([xen-initrd], [AS_HELP_STRING([--with-xen-initrd=PATH], [DomU initrd image for Xen hypervisor (default is /boot/initrd-3-xenU)] )], [xen_initrd="$withval"], [xen_initrd="/boot/initrd-3-xenU"]) AC_SUBST(XEN_INITRD, $xen_initrd) # --with-kvm-kernel=... AC_ARG_WITH([kvm-kernel], [AS_HELP_STRING([--with-kvm-kernel=PATH], [Guest kernel image for KVM hypervisor (default is /boot/vmlinuz-3-kvmU)] )], [kvm_kernel="$withval"], [kvm_kernel="/boot/vmlinuz-3-kvmU"]) AC_SUBST(KVM_KERNEL, $kvm_kernel) # --with-kvm-path=... AC_ARG_WITH([kvm-path], [AS_HELP_STRING([--with-kvm-path=PATH], [absolute path to the kvm binary] [ (default is /usr/bin/kvm)] )], [kvm_path="$withval"], [kvm_path="/usr/bin/kvm"]) AC_SUBST(KVM_PATH, $kvm_path) # --with-lvm-stripecount=... AC_ARG_WITH([lvm-stripecount], [AS_HELP_STRING([--with-lvm-stripecount=NUM], [the default number of stripes to use for LVM volumes] [ (default is 1)] )], [lvm_stripecount="$withval"], [lvm_stripecount=1]) AC_SUBST(LVM_STRIPECOUNT, $lvm_stripecount) # --with-ssh-login-user=... AC_ARG_WITH([ssh-login-user], [AS_HELP_STRING([--with-ssh-login-user=USERNAME], [user to use for SSH logins within the cluster (default is root)] )], [ssh_login_user="$withval"], [ssh_login_user=root]) AC_SUBST(SSH_LOGIN_USER, $ssh_login_user) # --with-ssh-console-user=... AC_ARG_WITH([ssh-console-user], [AS_HELP_STRING([--with-ssh-console-user=USERNAME], [user to use for SSH logins to access instance consoles (default is root)] )], [ssh_console_user="$withval"], [ssh_console_user=root]) AC_SUBST(SSH_CONSOLE_USER, $ssh_console_user) # --with-default-user=... AC_ARG_WITH([default-user], [AS_HELP_STRING([--with-default-user=USERNAME], [default user for daemons] [ (default is to run all daemons as root)] )], [user_default="$withval"], [user_default=root]) # --with-default-group=... AC_ARG_WITH([default-group], [AS_HELP_STRING([--with-default-group=GROUPNAME], [default group for daemons] [ (default is to run all daemons under group root)] )], [group_default="$withval"], [group_default=root]) # --with-user-prefix=... AC_ARG_WITH([user-prefix], [AS_HELP_STRING([--with-user-prefix=PREFIX], [prefix for daemon users] [ (default is to run all daemons as root; use --with-default-user] [ to change the default)] )], [user_masterd="${withval}masterd"; user_rapi="${withval}rapi"; user_confd="${withval}confd"; user_luxid="${withval}luxid"; user_noded="$user_default"; user_mond="$user_default"], [user_masterd="$user_default"; user_rapi="$user_default"; user_confd="$user_default"; user_luxid="$user_default"; user_noded="$user_default"; user_mond="$user_default"]) AC_SUBST(MASTERD_USER, $user_masterd) AC_SUBST(RAPI_USER, $user_rapi) AC_SUBST(CONFD_USER, $user_confd) AC_SUBST(LUXID_USER, $user_luxid) AC_SUBST(NODED_USER, $user_noded) AC_SUBST(MOND_USER, $user_mond) # --with-group-prefix=... AC_ARG_WITH([group-prefix], [AS_HELP_STRING([--with-group-prefix=PREFIX], [prefix for daemon POSIX groups] [ (default is to run all daemons under group root; use] [ --with-default-group to change the default)] )], [group_rapi="${withval}rapi"; group_admin="${withval}admin"; group_confd="${withval}confd"; group_luxid="${withval}luxid"; group_masterd="${withval}masterd"; group_noded="$group_default"; group_daemons="${withval}daemons"; group_mond="$group_default"], [group_rapi="$group_default"; group_admin="$group_default"; group_confd="$group_default"; group_luxid="$group_default"; group_masterd="$group_default"; group_noded="$group_default"; group_daemons="$group_default"; group_mond="$group_default"]) AC_SUBST(RAPI_GROUP, $group_rapi) AC_SUBST(ADMIN_GROUP, $group_admin) AC_SUBST(CONFD_GROUP, $group_confd) AC_SUBST(LUXID_GROUP, $group_luxid) AC_SUBST(MASTERD_GROUP, $group_masterd) AC_SUBST(NODED_GROUP, $group_noded) AC_SUBST(DAEMONS_GROUP, $group_daemons) AC_SUBST(MOND_GROUP, $group_mond) # Print the config to the user AC_MSG_NOTICE([Running ganeti-masterd as $group_masterd:$group_masterd]) AC_MSG_NOTICE([Running ganeti-rapi as $user_rapi:$group_rapi]) AC_MSG_NOTICE([Running ganeti-confd as $user_confd:$group_confd]) AC_MSG_NOTICE([Running ganeti-luxid as $user_luxid:$group_luxid]) AC_MSG_NOTICE([Group for daemons is $group_daemons]) AC_MSG_NOTICE([Group for clients is $group_admin]) # --enable-drbd-barriers AC_ARG_ENABLE([drbd-barriers], [AS_HELP_STRING([--enable-drbd-barriers], m4_normalize([enable the DRBD barriers functionality by default (>= 8.0.12) (default: enabled)]))], [[if test "$enableval" != no; then DRBD_BARRIERS=n DRBD_NO_META_FLUSH=False else DRBD_BARRIERS=bf DRBD_NO_META_FLUSH=True fi ]], [DRBD_BARRIERS=n DRBD_NO_META_FLUSH=False ]) AC_SUBST(DRBD_BARRIERS, $DRBD_BARRIERS) AC_SUBST(DRBD_NO_META_FLUSH, $DRBD_NO_META_FLUSH) # --enable-syslog[=no/yes/only] AC_ARG_ENABLE([syslog], [AS_HELP_STRING([--enable-syslog], [enable use of syslog (default: disabled), one of no/yes/only])], [[case "$enableval" in no) SYSLOG=no ;; yes) SYSLOG=yes ;; only) SYSLOG=only ;; *) SYSLOG= ;; esac ]], [SYSLOG=no]) if test -z "$SYSLOG" then AC_MSG_ERROR([invalid value for syslog, choose one of no/yes/only]) fi AC_SUBST(SYSLOG_USAGE, $SYSLOG) AC_ARG_ENABLE([restricted-commands], [AS_HELP_STRING([--enable-restricted-commands], m4_normalize([enable restricted commands in the node daemon (default: disabled)]))], [[if test "$enableval" = no; then enable_restricted_commands=False else enable_restricted_commands=True fi ]], [enable_restricted_commands=False]) AC_SUBST(ENABLE_RESTRICTED_COMMANDS, $enable_restricted_commands) # --with-disk-separator=... AC_ARG_WITH([disk-separator], [AS_HELP_STRING([--with-disk-separator=STRING], [Disk index separator, useful if the default of ':' is handled] [ specially by the hypervisor] )], [disk_separator="$withval"], [disk_separator=":"]) AC_SUBST(DISK_SEPARATOR, $disk_separator) # Check common programs AC_PROG_INSTALL AC_PROG_LN_S # Check for the ip command AC_ARG_VAR(IP_PATH, [ip path]) AC_PATH_PROG(IP_PATH, [ip], []) if test -z "$IP_PATH" then AC_MSG_ERROR([ip command not found]) fi # Check for pandoc AC_ARG_VAR(PANDOC, [pandoc path]) AC_PATH_PROG(PANDOC, [pandoc], []) if test -z "$PANDOC" then AC_MSG_WARN([pandoc not found, man pages rebuild will not be possible]) fi # Check for python-sphinx AC_ARG_VAR(SPHINX, [sphinx-build path]) AC_PATH_PROG(SPHINX, [sphinx-build], []) if test -z "$SPHINX" then AC_MSG_WARN(m4_normalize([sphinx-build not found, documentation rebuild will not be possible])) else # Sphinx exits with code 1 when it prints its usage sphinxver=`{ $SPHINX --version 2>&1 || :; } | head -n 3` if ! echo "$sphinxver" | grep -q -w -e '^Sphinx' -e '^Usage:'; then AC_MSG_ERROR([Unable to determine Sphinx version]) # Note: Character classes ([...]) need to be double quoted due to autoconf # using m4 elif ! echo "$sphinxver" | grep -q -E \ '^Sphinx[[[:space:]]]+(\(sphinx-build\)[[[:space:]]]+|v)[[1-9]]\>'; then AC_MSG_ERROR([Sphinx 1.0 or higher is required]) fi fi AM_CONDITIONAL([HAS_SPHINX], [test -n "$SPHINX"]) AC_ARG_ENABLE([manpages-in-doc], [AS_HELP_STRING([--enable-manpages-in-doc], m4_normalize([include man pages in HTML documentation (requires sphinx; default disabled)]))], [case "$enableval" in yes) manpages_in_doc=yes ;; no) manpages_in_doc= ;; *) AC_MSG_ERROR([Bad value $enableval for --enable-manpages-in-doc]) ;; esac ], [manpages_in_doc=]) AM_CONDITIONAL([MANPAGES_IN_DOC], [test -n "$manpages_in_doc"]) AC_SUBST(MANPAGES_IN_DOC, $manpages_in_doc) if test -z "$SPHINX" -a -n "$manpages_in_doc"; then AC_MSG_ERROR([Including man pages in HTML documentation requires sphinx]) fi # Check for graphviz (dot) AC_ARG_VAR(DOT, [dot path]) AC_PATH_PROG(DOT, [dot], []) if test -z "$DOT" then AC_MSG_WARN(m4_normalize([dot (from the graphviz suite) not found, documentation rebuild not possible])) fi # Check for pylint AC_ARG_VAR(PYLINT, [pylint path]) AC_PATH_PROG(PYLINT, [pylint], []) if test -z "$PYLINT" then AC_MSG_WARN([pylint not found, checking code will not be possible]) fi # Check for pep8 AC_ARG_VAR(PEP8, [pep8 path]) AC_PATH_PROG(PEP8, [pep8], []) if test -z "$PEP8" then AC_MSG_WARN([pep8 not found, checking code will not be complete]) fi AM_CONDITIONAL([HAS_PEP8], [test -n "$PEP8"]) # Check for python-coverage AC_ARG_VAR(PYCOVERAGE, [python-coverage path]) AC_PATH_PROGS(PYCOVERAGE, [python-coverage coverage], []) if test -z "$PYCOVERAGE" then AC_MSG_WARN(m4_normalize([python-coverage or coverage not found, evaluating Python test coverage will not be possible])) fi # Check for socat AC_ARG_VAR(SOCAT, [socat path]) AC_PATH_PROG(SOCAT, [socat], []) if test -z "$SOCAT" then AC_MSG_ERROR([socat not found]) fi # Check for qemu-img AC_ARG_VAR(QEMUIMG_PATH, [qemu-img path]) AC_PATH_PROG(QEMUIMG_PATH, [qemu-img], []) if test -z "$QEMUIMG_PATH" then AC_MSG_WARN([qemu-img not found, using ovfconverter will not be possible]) fi # --enable-confd ENABLE_CONFD= AC_ARG_ENABLE([confd], [AS_HELP_STRING([--enable-confd], [enable the ganeti-confd daemon (default: check)])], [], [enable_confd=check]) ENABLE_MOND= AC_ARG_ENABLE([monitoring], [AS_HELP_STRING([--enable-monitoring], [enable the ganeti monitoring daemon (default: check)])], [], [enable_monitoring=check]) # Check for ghc AC_ARG_VAR(GHC, [ghc path]) AC_PATH_PROG(GHC, [ghc], []) if test -z "$GHC"; then AC_MSG_FAILURE([ghc not found, compilation will not possible]) fi AC_MSG_CHECKING([checking for extra GHC flags]) GHC_BYVERSION_FLAGS= # check for GHC supported flags that vary accross versions for flag in -fwarn-incomplete-uni-patterns; do if $GHC -e '0' $flag >/dev/null 2>/dev/null; then GHC_BYVERSION_FLAGS="$GHC_BYVERSION_FLAGS $flag" fi done AC_MSG_RESULT($GHC_BYVERSION_FLAGS) AC_SUBST(GHC_BYVERSION_FLAGS) # Check for ghc-pkg AC_ARG_VAR(GHC_PKG, [ghc-pkg path]) AC_PATH_PROG(GHC_PKG, [ghc-pkg], []) if test -z "$GHC_PKG"; then AC_MSG_FAILURE([ghc-pkg not found, compilation will not be possible]) fi # check for modules, first custom/special checks AC_MSG_NOTICE([checking for required haskell modules]) HS_PARALLEL3= AC_GHC_PKG_CHECK([parallel-3.*], [HS_PARALLEL3=-DPARALLEL3], [AC_GHC_PKG_REQUIRE(parallel)], t) AC_SUBST(HS_PARALLEL3) # and now standard modules AC_GHC_PKG_REQUIRE(curl) AC_GHC_PKG_REQUIRE(json) AC_GHC_PKG_REQUIRE(network) AC_GHC_PKG_REQUIRE(mtl) AC_GHC_PKG_REQUIRE(bytestring) AC_GHC_PKG_REQUIRE(utf8-string) AC_GHC_PKG_REQUIRE(hslogger) # extra modules for confd functionality HS_REGEX_PCRE=-DNO_REGEX_PCRE has_confd=False if test "$enable_confd" != no; then CONFD_PKG= AC_GHC_PKG_CHECK([regex-pcre], [HS_REGEX_PCRE=], [CONFD_PKG="$CONFD_PKG regex-pcre"]) AC_GHC_PKG_CHECK([Crypto], [], [CONFD_PKG="$CONFD_PKG Crypto"]) AC_GHC_PKG_CHECK([text], [], [CONFD_PKG="$CONFD_PKG text"]) AC_GHC_PKG_CHECK([hinotify], [], [CONFD_PKG="$CONFD_PKG hinotify"]) AC_GHC_PKG_CHECK([vector], [], [CONFD_PKG="$CONFD_PKG vector"]) if test -z "$CONFD_PKG"; then has_confd=True elif test "$enable_confd" = check; then AC_MSG_WARN(m4_normalize([The required extra libraries for confd were not found ($CONFD_PKG), confd disabled])) else AC_MSG_FAILURE(m4_normalize([The confd functionality was requested, but required libraries were not found: $CONFD_PKG])) fi fi AC_SUBST(HS_REGEX_PCRE) if test "$has_confd" = True; then AC_MSG_NOTICE([Enabling confd usage]) fi AC_SUBST(ENABLE_CONFD, $has_confd) AM_CONDITIONAL([ENABLE_CONFD], [test x$has_confd = xTrue]) #extra modules for monitoring daemon functionality has_monitoring=False if test "$enable_monitoring" != no; then MONITORING_PKG= AC_GHC_PKG_CHECK([attoparsec], [], [MONITORING_PKG="$MONITORING_PKG attoparsec"]) AC_GHC_PKG_CHECK([snap-server], [], [MONITORING_PKG="$MONITORING_PKG snap-server"]) AC_GHC_PKG_CHECK([process], [], [MONITORING_PKG="$MONITORING_PKG process"]) MONITORING_DEP= if test "$has_confd" = False; then MONITORING_DEP="$MONITORING_DEP confd" fi has_monitoring_pkg=False if test -z "$MONITORING_PKG"; then has_monitoring_pkg=True elif test "$enable_monitoring" = check; then AC_MSG_WARN(m4_normalize([The required extra libraries for the monitoring daemon were not found ($MONITORING_PKG), monitoring disabled])) else AC_MSG_FAILURE(m4_normalize([The monitoring functionality was requested, but required libraries were not found: $MONITORING_PKG])) fi has_monitoring_dep=False if test -z "$MONITORING_DEP"; then has_monitoring_dep=True elif test "$enable_monitoring" = check; then AC_MSG_WARN(m4_normalize([The optional Ganeti components required for the monitoring agent were not enabled ($MONITORING_DEP), monitoring disabled])) else AC_MSG_FAILURE(m4_normalize([The monitoring functionality was requested, but required optional Ganeti components were not found: $MONITORING_DEP])) fi fi if test "$has_monitoring_pkg" = True -a "$has_monitoring_dep" = True; then has_monitoring=True AC_MSG_NOTICE([Enabling the monitoring agent usage]) fi AC_SUBST(ENABLE_MOND, $has_monitoring) AM_CONDITIONAL([ENABLE_MOND], [test "$has_monitoring" = True]) # development modules HS_NODEV= AC_GHC_PKG_CHECK([QuickCheck-2.*], [], [HS_NODEV=1], t) AC_GHC_PKG_CHECK([test-framework-0.6*], [], [HS_NODEV=1], t) AC_GHC_PKG_CHECK([test-framework-hunit], [], [HS_NODEV=1]) AC_GHC_PKG_CHECK([test-framework-quickcheck2], [], [HS_NODEV=1]) AC_GHC_PKG_CHECK([temporary], [], [HS_NODEV=1]) # FIXME: unify checks for non-test libraries (attoparsec, hinotify, ...) # that are needed to execute the tests, avoiding the duplication # of the checks. AC_GHC_PKG_CHECK([attoparsec], [], [HS_NODEV=1]) AC_GHC_PKG_CHECK([vector], [], [HS_NODEV=1]) AC_GHC_PKG_CHECK([process], [], [HS_NODEV=1]) AC_GHC_PKG_CHECK([snap-server], [], [HS_NODEV=1]) AC_GHC_PKG_CHECK([regex-pcre], [], [HS_NODEV=1]) AC_GHC_PKG_CHECK([Crypto], [], [HS_NODEV=1]) AC_GHC_PKG_CHECK([text], [], [HS_NODEV=1]) AC_GHC_PKG_CHECK([hinotify], [], [HS_NODEV=1]) if test -n "$HS_NODEV"; then AC_MSG_WARN(m4_normalize([Required development modules were not found, you won't be able to run Haskell unittests])) else AC_MSG_NOTICE([Haskell development modules found, unittests enabled]) fi AC_SUBST(HS_NODEV) HTOOLS=yes AC_SUBST(HTOOLS) # --enable-split-query ENABLE_SPLIT_QUERY= AC_ARG_ENABLE([split-query], [AS_HELP_STRING([--enable-split-query], [enable use of custom query daemon via confd])], [[case "$enableval" in no) enable_split_query=False ;; yes) enable_split_query=True ;; *) echo "Invalid value for enable-confd '$enableval'" exit 1 ;; esac ]], [[case "x${has_confd}x" in xTruex) enable_split_query=True ;; *) enable_split_query=False ;; esac]]) AC_SUBST(ENABLE_SPLIT_QUERY, $enable_split_query) if test x$enable_split_query = xTrue -a x$has_confd != xTrue; then AC_MSG_ERROR([Split queries require the confd daemon]) fi if test x$enable_split_query = xTrue; then AC_MSG_NOTICE([Split query functionality enabled]) fi # Check for HsColour HS_APIDOC=no AC_ARG_VAR(HSCOLOUR, [HsColour path]) AC_PATH_PROG(HSCOLOUR, [HsColour], []) if test -z "$HSCOLOUR"; then AC_MSG_WARN(m4_normalize([HsColour not found, htools API documentation will not be generated])) fi # Check for haddock AC_ARG_VAR(HADDOCK, [haddock path]) AC_PATH_PROG(HADDOCK, [haddock], []) if test -z "$HADDOCK"; then AC_MSG_WARN(m4_normalize([haddock not found, htools API documentation will not be generated])) fi if test -n "$HADDOCK" && test -n "$HSCOLOUR"; then HS_APIDOC=yes fi AC_SUBST(HS_APIDOC) # Check for hlint AC_ARG_VAR(HLINT, [hlint path]) AC_PATH_PROG(HLINT, [hlint], []) if test -z "$HLINT"; then AC_MSG_WARN([hlint not found, checking code will not be possible]) fi if test "$HTOOLS" != yes && test "$ENABLE_CONFD" = True; then AC_MSG_ERROR(m4_normalize([cannot enable ganeti-confd if htools support is not enabled])) fi AM_CONDITIONAL([WANT_HTOOLS], [test "$HTOOLS" = yes]) AM_CONDITIONAL([WANT_HSTESTS], [test "x$HS_NODEV" = x]) AM_CONDITIONAL([WANT_HSAPIDOC], [test "$HS_APIDOC" = yes]) AM_CONDITIONAL([HAS_HLINT], [test "$HLINT"]) # Check for fakeroot AC_ARG_VAR(FAKEROOT_PATH, [fakeroot path]) AC_PATH_PROG(FAKEROOT_PATH, [fakeroot], []) if test -z "$FAKEROOT_PATH"; then AC_MSG_WARN(m4_normalize([fakeroot not found, tests that must run as root will not be executed])) fi AM_CONDITIONAL([HAS_FAKEROOT], [test "x$FAKEROOT_PATH" != x]) SOCAT_USE_ESCAPE= AC_ARG_ENABLE([socat-escape], [AS_HELP_STRING([--enable-socat-escape], [use escape functionality available in socat >= 1.7 (default: detect automatically)])], [[if test "$enableval" = yes; then SOCAT_USE_ESCAPE=True else SOCAT_USE_ESCAPE=False fi ]]) if test -z "$SOCAT_USE_ESCAPE" then if $SOCAT -hh | grep -w -q escape; then SOCAT_USE_ESCAPE=True else SOCAT_USE_ESCAPE=False fi fi AC_SUBST(SOCAT_USE_ESCAPE) SOCAT_USE_COMPRESS= AC_ARG_ENABLE([socat-compress], [AS_HELP_STRING([--enable-socat-compress], [use OpenSSL compression option available in patched socat builds (see INSTALL for details; default: detect automatically)])], [[if test "$enableval" = yes; then SOCAT_USE_COMPRESS=True else SOCAT_USE_COMPRESS=False fi ]]) if test -z "$SOCAT_USE_COMPRESS" then if $SOCAT -hhh | grep -w -q openssl-compress; then SOCAT_USE_COMPRESS=True else SOCAT_USE_COMPRESS=False fi fi AC_SUBST(SOCAT_USE_COMPRESS) if man --help | grep -q -e --warnings then MAN_HAS_WARNINGS=1 else MAN_HAS_WARNINGS= AC_MSG_WARN(m4_normalize([man does not support --warnings, man page checks will not be possible])) fi AC_SUBST(MAN_HAS_WARNINGS) # Check for Python AM_PATH_PYTHON(2.6) AC_PYTHON_MODULE(OpenSSL, t) AC_PYTHON_MODULE(simplejson, t) AC_PYTHON_MODULE(pyparsing, t) AC_PYTHON_MODULE(pyinotify, t) AC_PYTHON_MODULE(pycurl, t) AC_PYTHON_MODULE(bitarray, t) AC_PYTHON_MODULE(ipaddr, t) AC_PYTHON_MODULE(mock) AC_PYTHON_MODULE(affinity) AC_PYTHON_MODULE(paramiko) # Development-only Python modules PY_NODEV= AC_PYTHON_MODULE(yaml) if test $HAVE_PYMOD_YAML == "no"; then PY_NODEV="$PY_NODEV yaml" fi if test -n "$PY_NODEV"; then AC_MSG_WARN(m4_normalize([Required development modules ($PY_NODEV) were not found, you won't be able to run Python unittests])) else AC_MSG_NOTICE([Python development modules found, unittests enabled]) fi AC_SUBST(PY_NODEV) AM_CONDITIONAL([PY_UNIT], [test -n $PY_NODEV]) AC_CONFIG_FILES([ Makefile ]) AC_OUTPUT ganeti-2.9.3/daemons/0000755000000000000000000000000012271445544014430 5ustar00rootroot00000000000000ganeti-2.9.3/daemons/daemon-util.in0000644000000000000000000001741612271422343017177 0ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2009, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. set -e @SHELL_ENV_INIT@ readonly defaults_file="$SYSCONFDIR/default/ganeti" # This is a list of all daemons and the order in which they're started. The # order is important as there are dependencies between them. On shutdown, # they're stopped in reverse order. DAEMONS=( ganeti-noded ganeti-masterd ganeti-rapi ) _confd_enabled() { [[ "@CUSTOM_ENABLE_CONFD@" == True ]] } if _confd_enabled; then DAEMONS+=( ganeti-confd ) DAEMONS+=( ganeti-luxid ) fi _mond_enabled() { [[ "@CUSTOM_ENABLE_MOND@" == True ]] } if _mond_enabled; then DAEMONS+=( ganeti-mond ) fi NODED_ARGS= MASTERD_ARGS= CONFD_ARGS= LUXID_ARGS= RAPI_ARGS= MOND_ARGS= # Read defaults file if it exists if [[ -s $defaults_file ]]; then . $defaults_file fi # Meant to facilitate use utilities in /etc/rc.d/init.d/functions in case # start-stop-daemon is not available. _ignore_error() { eval "$@" || : } _daemon_pidfile() { echo "$RUN_DIR/$1.pid" } _daemon_executable() { echo "@PREFIX@/sbin/$1" } _daemon_usergroup() { case "$1" in masterd) echo "@GNTMASTERUSER@:@GNTMASTERDGROUP@" ;; confd) echo "@GNTCONFDUSER@:@GNTCONFDGROUP@" ;; luxid) echo "@GNTLUXIDUSER@:@GNTLUXIDGROUP@" ;; rapi) echo "@GNTRAPIUSER@:@GNTRAPIGROUP@" ;; noded) echo "@GNTNODEDUSER@:@GNTDAEMONSGROUP@" ;; mond) echo "@GNTMONDUSER@:@GNTMONDGROUP@" ;; *) echo "root:@GNTDAEMONSGROUP@" ;; esac } # Checks whether the local machine is part of a cluster check_config() { local server_pem=$DATA_DIR/server.pem local fname for fname in $server_pem; do if [[ ! -f $fname ]]; then echo "Missing configuration file $fname" >&2 return 1 fi done return 0 } # Checks the exit code of a daemon check_exitcode() { if [[ "$#" -lt 1 ]]; then echo 'Missing exit code.' >&2 return 1 fi local rc="$1"; shift case "$rc" in 0) ;; 11) echo "not master" ;; *) echo "exit code $rc" return 1 ;; esac return 0 } # Prints path to PID file for a daemon. daemon_pidfile() { if [[ "$#" -lt 1 ]]; then echo 'Missing daemon name.' >&2 return 1 fi local name="$1"; shift _daemon_pidfile $name } # Prints path to daemon executable. daemon_executable() { if [[ "$#" -lt 1 ]]; then echo 'Missing daemon name.' >&2 return 1 fi local name="$1"; shift _daemon_executable $name } # Prints a list of all daemons in the order in which they should be started list_start_daemons() { local name for name in "${DAEMONS[@]}"; do echo "$name" done } # Prints a list of all daemons in the order in which they should be stopped list_stop_daemons() { list_start_daemons | tac } # Checks whether a daemon name is known is_daemon_name() { if [[ "$#" -lt 1 ]]; then echo 'Missing daemon name.' >&2 return 1 fi local name="$1"; shift for i in "${DAEMONS[@]}"; do if [[ "$i" == "$name" ]]; then return 0 fi done echo "Unknown daemon name '$name'" >&2 return 1 } # Checks whether daemon is running check() { if [[ "$#" -lt 1 ]]; then echo 'Missing daemon name.' >&2 return 1 fi local name="$1"; shift local pidfile=$(_daemon_pidfile $name) local daemonexec=$(_daemon_executable $name) if type -p start-stop-daemon >/dev/null; then start-stop-daemon --stop --signal 0 --quiet \ --pidfile $pidfile else _ignore_error status \ -p $pidfile \ $daemonexec fi } # Starts a daemon start() { if [[ "$#" -lt 1 ]]; then echo 'Missing daemon name.' >&2 return 1 fi local name="$1"; shift # Convert daemon name to uppercase after removing "ganeti-" prefix local plain_name=${name#ganeti-} local ucname=$(tr a-z A-Z <<<$plain_name) local pidfile=$(_daemon_pidfile $name) local usergroup=$(_daemon_usergroup $plain_name) local daemonexec=$(_daemon_executable $name) if ( [[ "$name" == ganeti-confd ]] || [[ "$name" == ganeti-luxid ]] ) \ && ! _confd_enabled; then echo 'ganeti-confd disabled at build time' >&2 return 1 fi # Read $_ARGS and $EXTRA__ARGS eval local args="\"\$${ucname}_ARGS \$EXTRA_${ucname}_ARGS\"" @PKGLIBDIR@/ensure-dirs if type -p start-stop-daemon >/dev/null; then start-stop-daemon --start --quiet --oknodo \ --pidfile $pidfile \ --startas $daemonexec \ --chuid $usergroup \ -- $args "$@" else # TODO: Find a way to start daemon with a group, until then the group must # be removed _ignore_error daemon \ --pidfile $pidfile \ --user ${usergroup%:*} \ $daemonexec $args "$@" fi } # Stops a daemon stop() { if [[ "$#" -lt 1 ]]; then echo 'Missing daemon name.' >&2 return 1 fi local name="$1"; shift local pidfile=$(_daemon_pidfile $name) if type -p start-stop-daemon >/dev/null; then start-stop-daemon --stop --quiet --oknodo --retry 30 \ --pidfile $pidfile else _ignore_error killproc -p $pidfile $name fi } # Starts a daemon if it's not yet running check_and_start() { local name="$1" if ! check $name; then start $name fi } # Starts the master role start_master() { start ganeti-masterd start ganeti-rapi if _confd_enabled; then start ganeti-luxid else return 0 fi } # Stops the master role stop_master() { if _confd_enabled ; then stop ganeti-luxid fi stop ganeti-rapi stop ganeti-masterd } # Start all daemons start_all() { for i in $(list_start_daemons); do local rc=0 # Try to start daemon start $i || rc=$? if ! errmsg=$(check_exitcode $rc); then echo "$errmsg" >&2 return 1 fi done return 0 } # Stop all daemons stop_all() { for i in $(list_stop_daemons); do stop $i done } # SIGHUP a process to force re-opening its logfiles rotate_logs() { if [[ "$#" -lt 1 ]]; then echo 'Missing daemon name.' >&2 return 1 fi local name="$1"; shift local pidfile=$(_daemon_pidfile $name) local daemonexec=$(_daemon_executable $name) if type -p start-stop-daemon >/dev/null; then start-stop-daemon --stop --signal HUP --quiet \ --oknodo --pidfile $pidfile else _ignore_error killproc \ -p $pidfile \ $daemonexec -HUP fi } # SIGHUP all processes rotate_all_logs() { for i in $(list_stop_daemons); do rotate_logs $i done } # Reloads the SSH keys reload_ssh_keys() { @RPL_SSH_INITD_SCRIPT@ restart } # Read @SYSCONFDIR@/rc.d/init.d/functions if start-stop-daemon not available if ! type -p start-stop-daemon >/dev/null && \ [[ -f @SYSCONFDIR@/rc.d/init.d/functions ]]; then _ignore_error . @SYSCONFDIR@/rc.d/init.d/functions fi if [[ "$#" -lt 1 ]]; then echo "Usage: $0 " >&2 exit 1 fi orig_action=$1; shift if [[ "$orig_action" == *_* ]]; then echo "Command must not contain underscores" >&2 exit 1 fi # Replace all dashes (-) with underlines (_) action=${orig_action//-/_} # Is it a known function? if ! declare -F "$action" >/dev/null 2>&1; then echo "Unknown command: $orig_action" >&2 exit 1 fi # Call handler function $action "$@" ganeti-2.9.3/daemons/import-export0000744000000000000000000004710512244641676017220 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Import/export daemon. """ # pylint: disable=C0103 # C0103: Invalid name import-export import errno import logging import optparse import os import select import signal import subprocess import sys import time import math from ganeti import constants from ganeti import cli from ganeti import utils from ganeti import errors from ganeti import serializer from ganeti import objects from ganeti import impexpd from ganeti import netutils #: How many lines to keep in the status file MAX_RECENT_OUTPUT_LINES = 20 #: Don't update status file more than once every 5 seconds (unless forced) MIN_UPDATE_INTERVAL = 5.0 #: How long to wait for a connection to be established DEFAULT_CONNECT_TIMEOUT = 60 #: Get dd(1) statistics every few seconds DD_STATISTICS_INTERVAL = 5.0 #: Seconds for throughput calculation DD_THROUGHPUT_INTERVAL = 60.0 #: Number of samples for throughput calculation DD_THROUGHPUT_SAMPLES = int(math.ceil(float(DD_THROUGHPUT_INTERVAL) / DD_STATISTICS_INTERVAL)) # Global variable for options options = None def SetupLogging(): """Configures the logging module. """ formatter = logging.Formatter("%(asctime)s: %(message)s") stderr_handler = logging.StreamHandler() stderr_handler.setFormatter(formatter) stderr_handler.setLevel(logging.NOTSET) root_logger = logging.getLogger("") root_logger.addHandler(stderr_handler) if options.debug: root_logger.setLevel(logging.NOTSET) elif options.verbose: root_logger.setLevel(logging.INFO) else: root_logger.setLevel(logging.ERROR) # Create special logger for child process output child_logger = logging.Logger("child output") child_logger.addHandler(stderr_handler) child_logger.setLevel(logging.NOTSET) return child_logger class StatusFile: """Status file manager. """ def __init__(self, path): """Initializes class. """ self._path = path self._data = objects.ImportExportStatus(ctime=time.time(), mtime=None, recent_output=[]) def AddRecentOutput(self, line): """Adds a new line of recent output. """ self._data.recent_output.append(line) # Remove old lines del self._data.recent_output[:-MAX_RECENT_OUTPUT_LINES] def SetListenPort(self, port): """Sets the port the daemon is listening on. @type port: int @param port: TCP/UDP port """ assert isinstance(port, (int, long)) and 0 < port < (2 ** 16) self._data.listen_port = port def GetListenPort(self): """Returns the port the daemon is listening on. """ return self._data.listen_port def SetConnected(self): """Sets the connected flag. """ self._data.connected = True def GetConnected(self): """Determines whether the daemon is connected. """ return self._data.connected def SetProgress(self, mbytes, throughput, percent, eta): """Sets how much data has been transferred so far. @type mbytes: number @param mbytes: Transferred amount of data in MiB. @type throughput: float @param throughput: MiB/second @type percent: number @param percent: Percent processed @type eta: number @param eta: Expected number of seconds until done """ self._data.progress_mbytes = mbytes self._data.progress_throughput = throughput self._data.progress_percent = percent self._data.progress_eta = eta def SetExitStatus(self, exit_status, error_message): """Sets the exit status and an error message. """ # Require error message when status isn't 0 assert exit_status == 0 or error_message self._data.exit_status = exit_status self._data.error_message = error_message def ExitStatusIsSuccess(self): """Returns whether the exit status means "success". """ return not bool(self._data.error_message) def Update(self, force): """Updates the status file. @type force: bool @param force: Write status file in any case, not only when minimum interval is expired """ if not (force or self._data.mtime is None or time.time() > (self._data.mtime + MIN_UPDATE_INTERVAL)): return logging.debug("Updating status file %s", self._path) self._data.mtime = time.time() utils.WriteFile(self._path, data=serializer.DumpJson(self._data.ToDict()), mode=0400) def ProcessChildIO(child, socat_stderr_read_fd, dd_stderr_read_fd, dd_pid_read_fd, exp_size_read_fd, status_file, child_logger, signal_notify, signal_handler, mode): """Handles the child processes' output. """ assert not (signal_handler.signum - set([signal.SIGTERM, signal.SIGINT])), \ "Other signals are not handled in this function" # Buffer size 0 is important, otherwise .read() with a specified length # might buffer data while poll(2) won't mark its file descriptor as # readable again. socat_stderr_read = os.fdopen(socat_stderr_read_fd, "r", 0) dd_stderr_read = os.fdopen(dd_stderr_read_fd, "r", 0) dd_pid_read = os.fdopen(dd_pid_read_fd, "r", 0) exp_size_read = os.fdopen(exp_size_read_fd, "r", 0) tp_samples = DD_THROUGHPUT_SAMPLES if options.exp_size == constants.IE_CUSTOM_SIZE: exp_size = None else: exp_size = options.exp_size child_io_proc = impexpd.ChildIOProcessor(options.debug, status_file, child_logger, tp_samples, exp_size) try: fdmap = { child.stderr.fileno(): (child.stderr, child_io_proc.GetLineSplitter(impexpd.PROG_OTHER)), socat_stderr_read.fileno(): (socat_stderr_read, child_io_proc.GetLineSplitter(impexpd.PROG_SOCAT)), dd_pid_read.fileno(): (dd_pid_read, child_io_proc.GetLineSplitter(impexpd.PROG_DD_PID)), dd_stderr_read.fileno(): (dd_stderr_read, child_io_proc.GetLineSplitter(impexpd.PROG_DD)), exp_size_read.fileno(): (exp_size_read, child_io_proc.GetLineSplitter(impexpd.PROG_EXP_SIZE)), signal_notify.fileno(): (signal_notify, None), } poller = select.poll() for fd in fdmap: utils.SetNonblockFlag(fd, True) poller.register(fd, select.POLLIN) if options.connect_timeout and mode == constants.IEM_IMPORT: listen_timeout = utils.RunningTimeout(options.connect_timeout, True) else: listen_timeout = None exit_timeout = None dd_stats_timeout = None while True: # Break out of loop if only signal notify FD is left if len(fdmap) == 1 and signal_notify.fileno() in fdmap: break timeout = None if listen_timeout and not exit_timeout: assert mode == constants.IEM_IMPORT and options.connect_timeout if status_file.GetConnected(): listen_timeout = None elif listen_timeout.Remaining() < 0: errmsg = ("Child process didn't establish connection in time" " (%0.0fs), sending SIGTERM" % options.connect_timeout) logging.error(errmsg) status_file.AddRecentOutput(errmsg) status_file.Update(True) child.Kill(signal.SIGTERM) exit_timeout = \ utils.RunningTimeout(constants.CHILD_LINGER_TIMEOUT, True) # Next block will calculate timeout else: # Not yet connected, check again in a second timeout = 1000 if exit_timeout: timeout = exit_timeout.Remaining() * 1000 if timeout < 0: logging.info("Child process didn't exit in time") break if (not dd_stats_timeout) or dd_stats_timeout.Remaining() < 0: notify_status = child_io_proc.NotifyDd() if notify_status: # Schedule next notification dd_stats_timeout = utils.RunningTimeout(DD_STATISTICS_INTERVAL, True) else: # Try again soon (dd isn't ready yet) dd_stats_timeout = utils.RunningTimeout(1.0, True) if dd_stats_timeout: dd_timeout = max(0, dd_stats_timeout.Remaining() * 1000) if timeout is None: timeout = dd_timeout else: timeout = min(timeout, dd_timeout) for fd, event in utils.RetryOnSignal(poller.poll, timeout): if event & (select.POLLIN | event & select.POLLPRI): (from_, to) = fdmap[fd] # Read up to 1 KB of data data = from_.read(1024) if data: if to: to.write(data) elif fd == signal_notify.fileno(): # Signal handling if signal_handler.called: signal_handler.Clear() if exit_timeout: logging.info("Child process still has about %0.2f seconds" " to exit", exit_timeout.Remaining()) else: logging.info("Giving child process %0.2f seconds to exit", constants.CHILD_LINGER_TIMEOUT) exit_timeout = \ utils.RunningTimeout(constants.CHILD_LINGER_TIMEOUT, True) else: poller.unregister(fd) del fdmap[fd] elif event & (select.POLLNVAL | select.POLLHUP | select.POLLERR): poller.unregister(fd) del fdmap[fd] child_io_proc.FlushAll() # If there was a timeout calculator, we were waiting for the child to # finish, e.g. due to a signal return not bool(exit_timeout) finally: child_io_proc.CloseAll() def ParseOptions(): """Parses the options passed to the program. @return: Arguments to program """ global options # pylint: disable=W0603 parser = optparse.OptionParser(usage=("%%prog {%s|%s}" % (constants.IEM_IMPORT, constants.IEM_EXPORT))) parser.add_option(cli.DEBUG_OPT) parser.add_option(cli.VERBOSE_OPT) parser.add_option("--key", dest="key", action="store", type="string", help="RSA key file") parser.add_option("--cert", dest="cert", action="store", type="string", help="X509 certificate file") parser.add_option("--ca", dest="ca", action="store", type="string", help="X509 CA file") parser.add_option("--bind", dest="bind", action="store", type="string", help="Bind address") parser.add_option("--ipv4", dest="ipv4", action="store_true", help="Use IPv4 only") parser.add_option("--ipv6", dest="ipv6", action="store_true", help="Use IPv6 only") parser.add_option("--host", dest="host", action="store", type="string", help="Remote hostname") parser.add_option("--port", dest="port", action="store", type="int", help="Remote port") parser.add_option("--connect-retries", dest="connect_retries", action="store", type="int", default=0, help=("How many times the connection should be retried" " (export only)")) parser.add_option("--connect-timeout", dest="connect_timeout", action="store", type="int", default=DEFAULT_CONNECT_TIMEOUT, help="Timeout for connection to be established (seconds)") parser.add_option("--compress", dest="compress", action="store", type="choice", help="Compression method", metavar="[%s]" % "|".join(constants.IEC_ALL), choices=list(constants.IEC_ALL), default=constants.IEC_GZIP) parser.add_option("--expected-size", dest="exp_size", action="store", type="string", default=None, help="Expected import/export size (MiB)") parser.add_option("--magic", dest="magic", action="store", type="string", default=None, help="Magic string") parser.add_option("--cmd-prefix", dest="cmd_prefix", action="store", type="string", help="Command prefix") parser.add_option("--cmd-suffix", dest="cmd_suffix", action="store", type="string", help="Command suffix") (options, args) = parser.parse_args() if len(args) != 2: # Won't return parser.error("Expected exactly two arguments") (status_file_path, mode) = args if mode not in (constants.IEM_IMPORT, constants.IEM_EXPORT): # Won't return parser.error("Invalid mode: %s" % mode) # Normalize and check parameters if options.host is not None and not netutils.IPAddress.IsValid(options.host): try: options.host = netutils.Hostname.GetNormalizedName(options.host) except errors.OpPrereqError, err: parser.error("Invalid hostname '%s': %s" % (options.host, err)) if options.port is not None: options.port = utils.ValidateServiceName(options.port) if (options.exp_size is not None and options.exp_size != constants.IE_CUSTOM_SIZE): try: options.exp_size = int(options.exp_size) except (ValueError, TypeError), err: # Won't return parser.error("Invalid value for --expected-size: %s (%s)" % (options.exp_size, err)) if not (options.magic is None or constants.IE_MAGIC_RE.match(options.magic)): parser.error("Magic must match regular expression %s" % constants.IE_MAGIC_RE.pattern) if options.ipv4 and options.ipv6: parser.error("Can only use one of --ipv4 and --ipv6") return (status_file_path, mode) class ChildProcess(subprocess.Popen): def __init__(self, env, cmd, noclose_fds): """Initializes this class. """ self._noclose_fds = noclose_fds # Not using close_fds because doing so would also close the socat stderr # pipe, which we still need. subprocess.Popen.__init__(self, cmd, env=env, shell=False, close_fds=False, stderr=subprocess.PIPE, stdout=None, stdin=None, preexec_fn=self._ChildPreexec) self._SetProcessGroup() def _ChildPreexec(self): """Called before child executable is execve'd. """ # Move to separate process group. By sending a signal to its process group # we can kill the child process and all grandchildren. os.setpgid(0, 0) # Close almost all file descriptors utils.CloseFDs(noclose_fds=self._noclose_fds) def _SetProcessGroup(self): """Sets the child's process group. """ assert self.pid, "Can't be called in child process" # Avoid race condition by setting child's process group (as good as # possible in Python) before sending signals to child. For an # explanation, see preexec function for child. try: os.setpgid(self.pid, self.pid) except EnvironmentError, err: # If the child process was faster we receive EPERM or EACCES if err.errno not in (errno.EPERM, errno.EACCES): raise def Kill(self, signum): """Sends signal to child process. """ logging.info("Sending signal %s to child process", signum) utils.IgnoreProcessNotFound(os.killpg, self.pid, signum) def ForceQuit(self): """Ensure child process is no longer running. """ # Final check if child process is still alive if utils.RetryOnSignal(self.poll) is None: logging.error("Child process still alive, sending SIGKILL") self.Kill(signal.SIGKILL) utils.RetryOnSignal(self.wait) def main(): """Main function. """ # Option parsing (status_file_path, mode) = ParseOptions() # Configure logging child_logger = SetupLogging() status_file = StatusFile(status_file_path) try: try: # Pipe to receive socat's stderr output (socat_stderr_read_fd, socat_stderr_write_fd) = os.pipe() # Pipe to receive dd's stderr output (dd_stderr_read_fd, dd_stderr_write_fd) = os.pipe() # Pipe to receive dd's PID (dd_pid_read_fd, dd_pid_write_fd) = os.pipe() # Pipe to receive size predicted by export script (exp_size_read_fd, exp_size_write_fd) = os.pipe() # Get child process command cmd_builder = impexpd.CommandBuilder(mode, options, socat_stderr_write_fd, dd_stderr_write_fd, dd_pid_write_fd) cmd = cmd_builder.GetCommand() # Prepare command environment cmd_env = os.environ.copy() if options.exp_size == constants.IE_CUSTOM_SIZE: cmd_env["EXP_SIZE_FD"] = str(exp_size_write_fd) logging.debug("Starting command %r", cmd) # Start child process child = ChildProcess(cmd_env, cmd, [socat_stderr_write_fd, dd_stderr_write_fd, dd_pid_write_fd, exp_size_write_fd]) try: def _ForwardSignal(signum, _): """Forwards signals to child process. """ child.Kill(signum) signal_wakeup = utils.SignalWakeupFd() try: # TODO: There is a race condition between starting the child and # handling the signals here. While there might be a way to work around # it by registering the handlers before starting the child and # deferring sent signals until the child is available, doing so can be # complicated. signal_handler = utils.SignalHandler([signal.SIGTERM, signal.SIGINT], handler_fn=_ForwardSignal, wakeup=signal_wakeup) try: # Close child's side utils.RetryOnSignal(os.close, socat_stderr_write_fd) utils.RetryOnSignal(os.close, dd_stderr_write_fd) utils.RetryOnSignal(os.close, dd_pid_write_fd) utils.RetryOnSignal(os.close, exp_size_write_fd) if ProcessChildIO(child, socat_stderr_read_fd, dd_stderr_read_fd, dd_pid_read_fd, exp_size_read_fd, status_file, child_logger, signal_wakeup, signal_handler, mode): # The child closed all its file descriptors and there was no # signal # TODO: Implement timeout instead of waiting indefinitely utils.RetryOnSignal(child.wait) finally: signal_handler.Reset() finally: signal_wakeup.Reset() finally: child.ForceQuit() if child.returncode == 0: errmsg = None elif child.returncode < 0: errmsg = "Exited due to signal %s" % (-child.returncode, ) else: errmsg = "Exited with status %s" % (child.returncode, ) status_file.SetExitStatus(child.returncode, errmsg) except Exception, err: # pylint: disable=W0703 logging.exception("Unhandled error occurred") status_file.SetExitStatus(constants.EXIT_FAILURE, "Unhandled error occurred: %s" % (err, )) if status_file.ExitStatusIsSuccess(): sys.exit(constants.EXIT_SUCCESS) sys.exit(constants.EXIT_FAILURE) finally: status_file.Update(True) if __name__ == "__main__": main() ganeti-2.9.3/daemons/ganeti-cleaner.in0000644000000000000000000000563612244641676017654 0ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2009, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. set -e -u @SHELL_ENV_INIT@ # Overridden by unittest : ${CHECK_CERT_EXPIRED:=$PKGLIBDIR/check-cert-expired} usage() { echo "Usage: $0 node|master" 2>&1 exit $1 } if [[ "$#" -ne 1 ]]; then usage 1 fi case "$1" in node) readonly CLEANER_LOG_DIR=$LOG_DIR/cleaner ;; master) readonly CLEANER_LOG_DIR=$LOG_DIR/master-cleaner ;; --help-completion) echo "choices=node,master 1 1" exit 0 ;; --help) usage 0 ;; *) usage 1 ;; esac readonly CRYPTO_DIR=$RUN_DIR/crypto readonly QUEUE_ARCHIVE_DIR=$DATA_DIR/queue/archive in_cluster() { [[ -e $DATA_DIR/ssconf_master_node ]] } cleanup_node() { # Return if directory for crypto keys doesn't exist [[ -d $CRYPTO_DIR ]] || return 0 find $CRYPTO_DIR -mindepth 1 -maxdepth 1 -type d | \ while read dir; do if $CHECK_CERT_EXPIRED $dir/cert; then rm -vf $dir/{cert,key} rmdir -v --ignore-fail-on-non-empty $dir fi done } cleanup_watcher() { # Return if machine is not part of a cluster in_cluster || return 0 # Remove old watcher files find $DATA_DIR -maxdepth 1 -type f -mtime +$REMOVE_AFTER \ \( -name 'watcher.*-*-*-*.data' -or \ -name 'watcher.*-*-*-*.instance-status' \) -print0 | \ xargs -r0 rm -vf } cleanup_master() { # Return if machine is not part of a cluster in_cluster || return 0 # Return if queue archive directory doesn't exist [[ -d $QUEUE_ARCHIVE_DIR ]] || return 0 # Remove old jobs find $QUEUE_ARCHIVE_DIR -mindepth 2 -type f -mtime +$REMOVE_AFTER -print0 | \ xargs -r0 rm -vf } # Define how many days archived jobs should be left alone REMOVE_AFTER=21 # Define how many log files to keep around (usually one per day) KEEP_LOGS=50 # Log file for this run LOG_FILE=$CLEANER_LOG_DIR/cleaner-$(date +'%Y-%m-%dT%H_%M').$$.log # Create log directory mkdir -p $CLEANER_LOG_DIR # Redirect all output to log file exec >>$LOG_FILE 2>&1 echo "Cleaner started at $(date)" # Remove old cleaner log files find $CLEANER_LOG_DIR -maxdepth 1 -type f | sort | head -n -$KEEP_LOGS | \ xargs -r rm -vf case "$1" in node) cleanup_node cleanup_watcher ;; master) cleanup_master ;; esac exit 0 ganeti-2.9.3/autotools/0000755000000000000000000000000012271445544015033 5ustar00rootroot00000000000000ganeti-2.9.3/autotools/check-man-warnings0000744000000000000000000000166112230001635020417 0ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2010, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. set -e ! LANG=en_US.UTF-8 MANWIDTH=80 \ man --warnings --encoding=utf8 --local-file "$1" 2>&1 >/dev/null | \ grep -v -e "cannot adjust line" -e "can't break line" | \ grep . ganeti-2.9.3/autotools/ac_ghc_pkg.m40000644000000000000000000000407012244641676017347 0ustar00rootroot00000000000000##### # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. ##### # # SYNOPSIS # # AC_GHC_PKG_CHECK(modname, action_found, action_not_found, extended) # # DESCRIPTION # # Checks for a Haskell (GHC) module. If found, execute the second # argument, if not found, the third one. # # If the fourth argument is non-empty, then the check will be some # via 'ghc-pkg list' (which supports patterns), otherwise it will # use just 'ghc-pkg latest'. # # ##### AC_DEFUN([AC_GHC_PKG_CHECK],[ if test -z $GHC_PKG; then AC_MSG_ERROR([GHC_PKG not defined]) fi AC_MSG_CHECKING([haskell library $1]) if test -n "$4"; then GHC_PKG_RESULT=$($GHC_PKG --simple-output list '$1'|tail -n1) else GHC_PKG_RESULT=$($GHC_PKG latest '$1' 2>/dev/null) fi if test -n "$GHC_PKG_RESULT"; then AC_MSG_RESULT($GHC_PKG_RESULT) $2 else AC_MSG_RESULT([no]) $3 fi ]) ##### # # SYNOPSIS # # AC_GHC_PKG_REQUIRE(modname, extended) # # DESCRIPTION # # Checks for a Haskell (GHC) module, and abort if not found. If the # second argument is non-empty, then the check will be some via # 'ghc-pkg list' (which supports patterns), otherwise it will use # just 'ghc-pkg latest'. # # ##### AC_DEFUN([AC_GHC_PKG_REQUIRE],[ AC_GHC_PKG_CHECK($1, [], [AC_MSG_FAILURE([Required Haskell module $1 not found])], $2) ]) ganeti-2.9.3/autotools/check-imports0000744000000000000000000000450412230001635017512 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script to check module imports. """ # pylint: disable=C0103 # C0103: Invalid name import sys # All modules imported after this line are removed from the global list before # importing a module to be checked _STANDARD_MODULES = sys.modules.keys() import os.path from ganeti import build def main(): args = sys.argv[1:] # Get references to functions used later on load_module = build.LoadModule abspath = os.path.abspath commonprefix = os.path.commonprefix normpath = os.path.normpath script_path = abspath(__file__) srcdir = normpath(abspath(args.pop(0))) assert "ganeti" in sys.modules for filename in args: # Reset global state for name in sys.modules.keys(): if name not in _STANDARD_MODULES: sys.modules.pop(name, None) assert "ganeti" not in sys.modules # Load module (this might import other modules) module = load_module(filename) result = [] for (name, checkmod) in sorted(sys.modules.items()): if checkmod is None or checkmod == module: continue try: checkmodpath = getattr(checkmod, "__file__") except AttributeError: # Built-in module pass else: abscheckmodpath = os.path.abspath(checkmodpath) if abscheckmodpath == script_path: # Ignore check script continue if commonprefix([abscheckmodpath, srcdir]) == srcdir: result.append(name) if result: raise Exception("Module '%s' has illegal imports: %s" % (filename, ", ".join(result))) if __name__ == "__main__": main() ganeti-2.9.3/autotools/check-man-dashes0000744000000000000000000000156012244641676020060 0ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. set -e ! grep -F '\[em]' "$1" || \ { echo "Unescaped dashes found in $1, use \\-- instead of --" 1>&2; exit 1; } ganeti-2.9.3/autotools/docpp0000744000000000000000000000270012230001635016043 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script to replace special directives in documentation. """ import re import fileinput from ganeti import query from ganeti.build import sphinx_ext _DOC_RE = re.compile(r"^@(?P[A-Z_]+)_(?P[A-Z]+)@$") _DOC_CLASSES_DATA = { "CONSTANTS": (sphinx_ext.DOCUMENTED_CONSTANTS, sphinx_ext.BuildValuesDoc), "QUERY_FIELDS": (query.ALL_FIELDS, sphinx_ext.BuildQueryFields), } def main(): for line in fileinput.input(): m = _DOC_RE.match(line) if m: fields_dict, builder = _DOC_CLASSES_DATA[m.group("class")] fields = fields_dict[m.group("kind").lower()] for i in builder(fields): print i else: print line, if __name__ == "__main__": main() ganeti-2.9.3/autotools/build-bash-completion0000744000000000000000000006367612271422343021152 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2009, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script to generate bash_completion script for Ganeti. """ # pylint: disable=C0103 # [C0103] Invalid name build-bash-completion import os import os.path import re import itertools import optparse from cStringIO import StringIO from ganeti import constants from ganeti import cli from ganeti import utils from ganeti import build from ganeti import pathutils from ganeti.tools import burnin # _autoconf shouldn't be imported from anywhere except constants.py, but we're # making an exception here because this script is only used at build time. from ganeti import _autoconf #: Regular expression describing desired format of option names. Long names can #: contain lowercase characters, numbers and dashes only. _OPT_NAME_RE = re.compile(r"^-[a-zA-Z0-9]|--[a-z][-a-z0-9]+$") def WritePreamble(sw, support_debug): """Writes the script preamble. Helper functions should be written here. """ sw.Write("# This script is automatically generated at build time.") sw.Write("# Do not modify manually.") if support_debug: sw.Write("_gnt_log() {") sw.IncIndent() try: sw.Write("if [[ -n \"$GANETI_COMPL_LOG\" ]]; then") sw.IncIndent() try: sw.Write("{") sw.IncIndent() try: sw.Write("echo ---") sw.Write("echo \"$@\"") sw.Write("echo") finally: sw.DecIndent() sw.Write("} >> $GANETI_COMPL_LOG") finally: sw.DecIndent() sw.Write("fi") finally: sw.DecIndent() sw.Write("}") sw.Write("_ganeti_nodes() {") sw.IncIndent() try: node_list_path = os.path.join(pathutils.DATA_DIR, "ssconf_node_list") sw.Write("cat %s 2>/dev/null || :", utils.ShellQuote(node_list_path)) finally: sw.DecIndent() sw.Write("}") sw.Write("_ganeti_instances() {") sw.IncIndent() try: instance_list_path = os.path.join(pathutils.DATA_DIR, "ssconf_instance_list") sw.Write("cat %s 2>/dev/null || :", utils.ShellQuote(instance_list_path)) finally: sw.DecIndent() sw.Write("}") sw.Write("_ganeti_jobs() {") sw.IncIndent() try: # FIXME: this is really going into the internals of the job queue sw.Write(("local jlist=($( shopt -s nullglob &&" " cd %s 2>/dev/null && echo job-* || : ))"), utils.ShellQuote(pathutils.QUEUE_DIR)) sw.Write('echo "${jlist[@]/job-/}"') finally: sw.DecIndent() sw.Write("}") for (fnname, paths) in [ ("os", pathutils.OS_SEARCH_PATH), ("iallocator", constants.IALLOCATOR_SEARCH_PATH), ]: sw.Write("_ganeti_%s() {", fnname) sw.IncIndent() try: # FIXME: Make querying the master for all OSes cheap for path in paths: sw.Write("( shopt -s nullglob && cd %s 2>/dev/null && echo * || : )", utils.ShellQuote(path)) finally: sw.DecIndent() sw.Write("}") sw.Write("_ganeti_nodegroup() {") sw.IncIndent() try: nodegroups_path = os.path.join(pathutils.DATA_DIR, "ssconf_nodegroups") sw.Write("cat %s 2>/dev/null || :", utils.ShellQuote(nodegroups_path)) finally: sw.DecIndent() sw.Write("}") sw.Write("_ganeti_network() {") sw.IncIndent() try: networks_path = os.path.join(pathutils.DATA_DIR, "ssconf_networks") sw.Write("cat %s 2>/dev/null || :", utils.ShellQuote(networks_path)) finally: sw.DecIndent() sw.Write("}") # Params: # Result variable: $first_arg_idx sw.Write("_ganeti_find_first_arg() {") sw.IncIndent() try: sw.Write("local w i") sw.Write("first_arg_idx=") sw.Write("for (( i=$1; i < COMP_CWORD; ++i )); do") sw.IncIndent() try: sw.Write("w=${COMP_WORDS[$i]}") # Skip option value sw.Write("""if [[ -n "$2" && "$w" == @($2) ]]; then let ++i""") # Skip sw.Write("""elif [[ -n "$3" && "$w" == @($3) ]]; then :""") # Ah, we found the first argument sw.Write("else first_arg_idx=$i; break;") sw.Write("fi") finally: sw.DecIndent() sw.Write("done") finally: sw.DecIndent() sw.Write("}") # Params: # Input variable: $first_arg_idx # Result variables: $arg_idx, $choices sw.Write("_ganeti_list_options() {") sw.IncIndent() try: sw.Write("""if [[ -z "$first_arg_idx" ]]; then""") sw.IncIndent() try: sw.Write("arg_idx=0") # Show options only if the current word starts with a dash sw.Write("""if [[ "$cur" == -* ]]; then""") sw.IncIndent() try: sw.Write("choices=$1") finally: sw.DecIndent() sw.Write("fi") sw.Write("return") finally: sw.DecIndent() sw.Write("fi") # Calculate position of current argument sw.Write("arg_idx=$(( COMP_CWORD - first_arg_idx ))") sw.Write("choices=") finally: sw.DecIndent() sw.Write("}") # Params: # Result variable: $optcur sw.Write("_gnt_checkopt() {") sw.IncIndent() try: sw.Write("""if [[ -n "$1" && "$cur" == @($1) ]]; then""") sw.IncIndent() try: sw.Write("optcur=\"${cur#--*=}\"") sw.Write("return 0") finally: sw.DecIndent() sw.Write("""elif [[ -n "$2" && "$prev" == @($2) ]]; then""") sw.IncIndent() try: sw.Write("optcur=\"$cur\"") sw.Write("return 0") finally: sw.DecIndent() sw.Write("fi") if support_debug: sw.Write("_gnt_log optcur=\"'$optcur'\"") sw.Write("return 1") finally: sw.DecIndent() sw.Write("}") # Params: # Result variable: $COMPREPLY sw.Write("_gnt_compgen() {") sw.IncIndent() try: sw.Write("""COMPREPLY=( $(compgen "$@") )""") if support_debug: sw.Write("_gnt_log COMPREPLY=\"${COMPREPLY[@]}\"") finally: sw.DecIndent() sw.Write("}") def WriteCompReply(sw, args, cur="\"$cur\""): sw.Write("_gnt_compgen %s -- %s", args, cur) sw.Write("return") class CompletionWriter: """Command completion writer class. """ def __init__(self, arg_offset, opts, args, support_debug): self.arg_offset = arg_offset self.opts = opts self.args = args self.support_debug = support_debug for opt in opts: # While documented, these variables aren't seen as public attributes by # pylint. pylint: disable=W0212 opt.all_names = sorted(opt._short_opts + opt._long_opts) invalid = list(itertools.ifilterfalse(_OPT_NAME_RE.match, opt.all_names)) if invalid: raise Exception("Option names don't match regular expression '%s': %s" % (_OPT_NAME_RE.pattern, utils.CommaJoin(invalid))) def _FindFirstArgument(self, sw): ignore = [] skip_one = [] for opt in self.opts: if opt.takes_value(): # Ignore value for i in opt.all_names: if i.startswith("--"): ignore.append("%s=*" % utils.ShellQuote(i)) skip_one.append(utils.ShellQuote(i)) else: ignore.extend([utils.ShellQuote(i) for i in opt.all_names]) ignore = sorted(utils.UniqueSequence(ignore)) skip_one = sorted(utils.UniqueSequence(skip_one)) if ignore or skip_one: # Try to locate first argument sw.Write("_ganeti_find_first_arg %s %s %s", self.arg_offset + 1, utils.ShellQuote("|".join(skip_one)), utils.ShellQuote("|".join(ignore))) else: # When there are no options the first argument is always at position # offset + 1 sw.Write("first_arg_idx=%s", self.arg_offset + 1) def _CompleteOptionValues(self, sw): # Group by values # "values" -> [optname1, optname2, ...] values = {} for opt in self.opts: if not opt.takes_value(): continue # Only static choices implemented so far (e.g. no node list) suggest = getattr(opt, "completion_suggest", None) # our custom option type if opt.type == "bool": suggest = ["yes", "no"] if not suggest: suggest = opt.choices if (isinstance(suggest, (int, long)) and suggest in cli.OPT_COMPL_ALL): key = suggest elif suggest: key = " ".join(sorted(suggest)) else: key = "" values.setdefault(key, []).extend(opt.all_names) # Don't write any code if there are no option values if not values: return cur = "\"$optcur\"" wrote_opt = False for (suggest, allnames) in values.items(): longnames = [i for i in allnames if i.startswith("--")] if wrote_opt: condcmd = "elif" else: condcmd = "if" sw.Write("%s _gnt_checkopt %s %s; then", condcmd, utils.ShellQuote("|".join(["%s=*" % i for i in longnames])), utils.ShellQuote("|".join(allnames))) sw.IncIndent() try: if suggest == cli.OPT_COMPL_MANY_NODES: # TODO: Implement comma-separated values WriteCompReply(sw, "-W ''", cur=cur) elif suggest == cli.OPT_COMPL_ONE_NODE: WriteCompReply(sw, "-W \"$(_ganeti_nodes)\"", cur=cur) elif suggest == cli.OPT_COMPL_ONE_INSTANCE: WriteCompReply(sw, "-W \"$(_ganeti_instances)\"", cur=cur) elif suggest == cli.OPT_COMPL_ONE_OS: WriteCompReply(sw, "-W \"$(_ganeti_os)\"", cur=cur) elif suggest == cli.OPT_COMPL_ONE_EXTSTORAGE: WriteCompReply(sw, "-W \"$(_ganeti_extstorage)\"", cur=cur) elif suggest == cli.OPT_COMPL_ONE_IALLOCATOR: WriteCompReply(sw, "-W \"$(_ganeti_iallocator)\"", cur=cur) elif suggest == cli.OPT_COMPL_ONE_NODEGROUP: WriteCompReply(sw, "-W \"$(_ganeti_nodegroup)\"", cur=cur) elif suggest == cli.OPT_COMPL_ONE_NETWORK: WriteCompReply(sw, "-W \"$(_ganeti_network)\"", cur=cur) elif suggest == cli.OPT_COMPL_INST_ADD_NODES: sw.Write("local tmp= node1= pfx= curvalue=\"${optcur#*:}\"") sw.Write("if [[ \"$optcur\" == *:* ]]; then") sw.IncIndent() try: sw.Write("node1=\"${optcur%%:*}\"") sw.Write("if [[ \"$COMP_WORDBREAKS\" != *:* ]]; then") sw.IncIndent() try: sw.Write("pfx=\"$node1:\"") finally: sw.DecIndent() sw.Write("fi") finally: sw.DecIndent() sw.Write("fi") if self.support_debug: sw.Write("_gnt_log pfx=\"'$pfx'\" curvalue=\"'$curvalue'\"" " node1=\"'$node1'\"") sw.Write("for i in $(_ganeti_nodes); do") sw.IncIndent() try: sw.Write("if [[ -z \"$node1\" ]]; then") sw.IncIndent() try: sw.Write("tmp=\"$tmp $i $i:\"") finally: sw.DecIndent() sw.Write("elif [[ \"$i\" != \"$node1\" ]]; then") sw.IncIndent() try: sw.Write("tmp=\"$tmp $i\"") finally: sw.DecIndent() sw.Write("fi") finally: sw.DecIndent() sw.Write("done") WriteCompReply(sw, "-P \"$pfx\" -W \"$tmp\"", cur="\"$curvalue\"") else: WriteCompReply(sw, "-W %s" % utils.ShellQuote(suggest), cur=cur) finally: sw.DecIndent() wrote_opt = True if wrote_opt: sw.Write("fi") return def _CompleteArguments(self, sw): if not (self.opts or self.args): return all_option_names = [] for opt in self.opts: all_option_names.extend(opt.all_names) all_option_names.sort() # List options if no argument has been specified yet sw.Write("_ganeti_list_options %s", utils.ShellQuote(" ".join(all_option_names))) if self.args: last_idx = len(self.args) - 1 last_arg_end = 0 varlen_arg_idx = None wrote_arg = False sw.Write("compgenargs=") for idx, arg in enumerate(self.args): assert arg.min is not None and arg.min >= 0 assert not (idx < last_idx and arg.max is None) if arg.min != arg.max or arg.max is None: if varlen_arg_idx is not None: raise Exception("Only one argument can have a variable length") varlen_arg_idx = idx compgenargs = [] if isinstance(arg, cli.ArgUnknown): choices = "" elif isinstance(arg, cli.ArgSuggest): choices = utils.ShellQuote(" ".join(arg.choices)) elif isinstance(arg, cli.ArgInstance): choices = "$(_ganeti_instances)" elif isinstance(arg, cli.ArgNode): choices = "$(_ganeti_nodes)" elif isinstance(arg, cli.ArgGroup): choices = "$(_ganeti_nodegroup)" elif isinstance(arg, cli.ArgNetwork): choices = "$(_ganeti_network)" elif isinstance(arg, cli.ArgJobId): choices = "$(_ganeti_jobs)" elif isinstance(arg, cli.ArgOs): choices = "$(_ganeti_os)" elif isinstance(arg, cli.ArgExtStorage): choices = "$(_ganeti_extstorage)" elif isinstance(arg, cli.ArgFile): choices = "" compgenargs.append("-f") elif isinstance(arg, cli.ArgCommand): choices = "" compgenargs.append("-c") elif isinstance(arg, cli.ArgHost): choices = "" compgenargs.append("-A hostname") else: raise Exception("Unknown argument type %r" % arg) if arg.min == 1 and arg.max == 1: cmpcode = """"$arg_idx" == %d""" % (last_arg_end) elif arg.max is None: cmpcode = """"$arg_idx" -ge %d""" % (last_arg_end) elif arg.min <= arg.max: cmpcode = (""""$arg_idx" -ge %d && "$arg_idx" -lt %d""" % (last_arg_end, last_arg_end + arg.max)) else: raise Exception("Unable to generate argument position condition") last_arg_end += arg.min if choices or compgenargs: if wrote_arg: condcmd = "elif" else: condcmd = "if" sw.Write("""%s [[ %s ]]; then""", condcmd, cmpcode) sw.IncIndent() try: if choices: sw.Write("""choices="$choices "%s""", choices) if compgenargs: sw.Write("compgenargs=%s", utils.ShellQuote(" ".join(compgenargs))) finally: sw.DecIndent() wrote_arg = True if wrote_arg: sw.Write("fi") if self.args: WriteCompReply(sw, """-W "$choices" $compgenargs""") else: # $compgenargs exists only if there are arguments WriteCompReply(sw, '-W "$choices"') def WriteTo(self, sw): self._FindFirstArgument(sw) self._CompleteOptionValues(sw) self._CompleteArguments(sw) def WriteCompletion(sw, scriptname, funcname, support_debug, commands=None, opts=None, args=None): """Writes the completion code for one command. @type sw: ShellWriter @param sw: Script writer @type scriptname: string @param scriptname: Name of command line program @type funcname: string @param funcname: Shell function name @type commands: list @param commands: List of all subcommands in this program """ sw.Write("%s() {", funcname) sw.IncIndent() try: sw.Write("local " ' cur="${COMP_WORDS[COMP_CWORD]}"' ' prev="${COMP_WORDS[COMP_CWORD-1]}"' ' i first_arg_idx choices compgenargs arg_idx optcur') if support_debug: sw.Write("_gnt_log cur=\"$cur\" prev=\"$prev\"") sw.Write("[[ -n \"$GANETI_COMPL_LOG\" ]] &&" " _gnt_log \"$(set | grep ^COMP_)\"") sw.Write("COMPREPLY=()") if opts is not None and args is not None: assert not commands CompletionWriter(0, opts, args, support_debug).WriteTo(sw) else: sw.Write("""if [[ "$COMP_CWORD" == 1 ]]; then""") sw.IncIndent() try: # Complete the command name WriteCompReply(sw, ("-W %s" % utils.ShellQuote(" ".join(sorted(commands.keys()))))) finally: sw.DecIndent() sw.Write("fi") # Group commands by arguments and options grouped_cmds = {} for cmd, (_, argdef, optdef, _, _) in commands.items(): if not (argdef or optdef): continue grouped_cmds.setdefault((tuple(argdef), tuple(optdef)), set()).add(cmd) # We're doing options and arguments to commands sw.Write("""case "${COMP_WORDS[1]}" in""") sort_grouped = sorted(grouped_cmds.items(), key=lambda (_, y): sorted(y)[0]) for ((argdef, optdef), cmds) in sort_grouped: assert argdef or optdef sw.Write("%s)", "|".join(map(utils.ShellQuote, sorted(cmds)))) sw.IncIndent() try: CompletionWriter(1, optdef, argdef, support_debug).WriteTo(sw) finally: sw.DecIndent() sw.Write(";;") sw.Write("esac") finally: sw.DecIndent() sw.Write("}") sw.Write("complete -F %s -o filenames %s", utils.ShellQuote(funcname), utils.ShellQuote(scriptname)) def GetFunctionName(name): return "_" + re.sub(r"[^a-z0-9]+", "_", name.lower()) def GetCommands(filename, module): """Returns the commands defined in a module. Aliases are also added as commands. """ try: commands = getattr(module, "commands") except AttributeError: raise Exception("Script %s doesn't have 'commands' attribute" % filename) # Add the implicit "--help" option help_option = cli.cli_option("-h", "--help", default=False, action="store_true") for name, (_, _, optdef, _, _) in commands.items(): if help_option not in optdef: optdef.append(help_option) for opt in cli.COMMON_OPTS: if opt in optdef: raise Exception("Common option '%s' listed for command '%s' in %s" % (opt, name, filename)) optdef.append(opt) # Use aliases aliases = getattr(module, "aliases", {}) if aliases: commands = commands.copy() for name, target in aliases.items(): commands[name] = commands[target] return commands def HaskellOptToOptParse(opts, kind): """Converts a Haskell options to Python cli_options. @type opts: string @param opts: comma-separated string with short and long options @type kind: string @param kind: type generated by Common.hs/complToText; needs to be kept in sync """ # pylint: disable=W0142 # since we pass *opts in a number of places opts = opts.split(",") if kind == "none": return cli.cli_option(*opts, action="store_true") elif kind in ["file", "string", "host", "dir", "inetaddr"]: return cli.cli_option(*opts, type="string") elif kind == "integer": return cli.cli_option(*opts, type="int") elif kind == "float": return cli.cli_option(*opts, type="float") elif kind == "onegroup": return cli.cli_option(*opts, type="string", completion_suggest=cli.OPT_COMPL_ONE_NODEGROUP) elif kind == "onenode": return cli.cli_option(*opts, type="string", completion_suggest=cli.OPT_COMPL_ONE_NODE) elif kind == "manyinstances": # FIXME: no support for many instances return cli.cli_option(*opts, type="string") elif kind.startswith("choices="): choices = kind[len("choices="):].split(",") return cli.cli_option(*opts, type="choice", choices=choices) else: # FIXME: there are many other currently unused completion types, # should be added on an as-needed basis raise Exception("Unhandled option kind '%s'" % kind) #: serialised kind to arg type _ARG_MAP = { "choices": cli.ArgChoice, "command": cli.ArgCommand, "file": cli.ArgFile, "host": cli.ArgHost, "jobid": cli.ArgJobId, "onegroup": cli.ArgGroup, "oneinstance": cli.ArgInstance, "onenode": cli.ArgNode, "oneos": cli.ArgOs, "string": cli.ArgUnknown, "suggests": cli.ArgSuggest, } def HaskellArgToCliArg(kind, min_cnt, max_cnt): """Converts a Haskell options to Python _Argument. @type kind: string @param kind: type generated by Common.hs/argComplToText; needs to be kept in sync """ min_cnt = int(min_cnt) if max_cnt == "none": max_cnt = None else: max_cnt = int(max_cnt) # pylint: disable=W0142 # since we pass **kwargs kwargs = {"min": min_cnt, "max": max_cnt} if kind.startswith("choices=") or kind.startswith("suggest="): (kind, choices) = kind.split("=", 1) kwargs["choices"] = choices.split(",") if kind not in _ARG_MAP: raise Exception("Unhandled argument kind '%s'" % kind) else: return _ARG_MAP[kind](**kwargs) def ParseHaskellOptsArgs(script, output): """Computes list of options/arguments from help-completion output. """ cli_opts = [] cli_args = [] for line in output.splitlines(): v = line.split(None) exc = lambda msg: Exception("Invalid %s output from %s: %s" % (msg, script, v)) if len(v) < 2: raise exc("help completion") if v[0].startswith("-"): if len(v) != 2: raise exc("option format") (opts, kind) = v cli_opts.append(HaskellOptToOptParse(opts, kind)) else: if len(v) != 3: raise exc("argument format") (kind, min_cnt, max_cnt) = v cli_args.append(HaskellArgToCliArg(kind, min_cnt, max_cnt)) return (cli_opts, cli_args) def WriteHaskellCompletion(sw, script, htools=True, debug=True): """Generates completion information for a Haskell program. This converts completion info from a Haskell program into 'fake' cli_opts and then builds completion for them. """ if htools: cmd = "./src/htools" env = {"HTOOLS": script} script_name = script func_name = "htools_%s" % script else: cmd = "./" + script env = {} script_name = os.path.basename(script) func_name = script_name func_name = GetFunctionName(func_name) output = utils.RunCmd([cmd, "--help-completion"], env=env, cwd=".").output (opts, args) = ParseHaskellOptsArgs(script_name, output) WriteCompletion(sw, script_name, func_name, debug, opts=opts, args=args) def WriteHaskellCmdCompletion(sw, script, debug=True): """Generates completion information for a Haskell multi-command program. This gathers the list of commands from a Haskell program and computes the list of commands available, then builds the sub-command list of options/arguments for each command, using that for building a unified help output. """ cmd = "./" + script script_name = os.path.basename(script) func_name = script_name func_name = GetFunctionName(func_name) output = utils.RunCmd([cmd, "--help-completion"], cwd=".").output commands = {} lines = output.splitlines() if len(lines) != 1: raise Exception("Invalid lines in multi-command mode: %s" % str(lines)) v = lines[0].split(None) exc = lambda msg: Exception("Invalid %s output from %s: %s" % (msg, script, v)) if len(v) != 3: raise exc("help completion in multi-command mode") if not v[0].startswith("choices="): raise exc("invalid format in multi-command mode '%s'" % v[0]) for subcmd in v[0][len("choices="):].split(","): output = utils.RunCmd([cmd, subcmd, "--help-completion"], cwd=".").output (opts, args) = ParseHaskellOptsArgs(script, output) commands[subcmd] = (None, args, opts, None, None) WriteCompletion(sw, script_name, func_name, debug, commands=commands) def main(): parser = optparse.OptionParser(usage="%prog [--compact]") parser.add_option("--compact", action="store_true", help=("Don't indent output and don't include debugging" " facilities")) options, args = parser.parse_args() if args: parser.error("Wrong number of arguments") # Whether to build debug version of completion script debug = not options.compact buf = StringIO() sw = utils.ShellWriter(buf, indent=debug) # Remember original state of extglob and enable it (required for pattern # matching; must be enabled while parsing script) sw.Write("gnt_shopt_extglob=$(shopt -p extglob || :)") sw.Write("shopt -s extglob") WritePreamble(sw, debug) # gnt-* scripts for scriptname in _autoconf.GNT_SCRIPTS: filename = "scripts/%s" % scriptname WriteCompletion(sw, scriptname, GetFunctionName(scriptname), debug, commands=GetCommands(filename, build.LoadModule(filename))) # Burnin script WriteCompletion(sw, "%s/burnin" % pathutils.TOOLSDIR, "_ganeti_burnin", debug, opts=burnin.OPTIONS, args=burnin.ARGUMENTS) # ganeti-cleaner WriteHaskellCompletion(sw, "daemons/ganeti-cleaner", htools=False, debug=not options.compact) # htools, if enabled if _autoconf.HTOOLS: for script in _autoconf.HTOOLS_PROGS: WriteHaskellCompletion(sw, script, htools=True, debug=debug) # ganeti-confd, if enabled if _autoconf.ENABLE_CONFD: WriteHaskellCompletion(sw, "src/ganeti-confd", htools=False, debug=debug) # mon-collector, if monitoring is enabled if _autoconf.ENABLE_MOND: WriteHaskellCmdCompletion(sw, "src/mon-collector", debug=debug) # Reset extglob to original value sw.Write("[[ -n \"$gnt_shopt_extglob\" ]] && $gnt_shopt_extglob") sw.Write("unset gnt_shopt_extglob") print buf.getvalue() if __name__ == "__main__": main() ganeti-2.9.3/autotools/check-header0000744000000000000000000000740312230001635017246 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script to verify file header. """ # pylint: disable=C0103 # [C0103] Invalid name import sys import re import itertools from ganeti import constants from ganeti import utils from ganeti import compat #: Assume header is always in the first 8kB of a file _READ_SIZE = 8 * 1024 _GPLv2 = [ "This program is free software; you can redistribute it and/or modify", "it under the terms of the GNU General Public License as published by", "the Free Software Foundation; either version 2 of the License, or", "(at your option) any later version.", "", "This program is distributed in the hope that it will be useful, but", "WITHOUT ANY WARRANTY; without even the implied warranty of", "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU", "General Public License for more details.", "", "You should have received a copy of the GNU General Public License", "along with this program; if not, write to the Free Software", "Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA", "02110-1301, USA.", ] _SHEBANG = re.compile(r"^#(?:|!(?:/usr/bin/python(?:| -u)|/bin/(?:|ba)sh))$") _COPYRIGHT_YEAR = r"20[01][0-9]" _COPYRIGHT = re.compile(r"# Copyright \(C\) (%s(?:, %s)*) Google Inc\.$" % (_COPYRIGHT_YEAR, _COPYRIGHT_YEAR)) _COPYRIGHT_DESC = "Copyright (C) [, ...] Google Inc." _AUTOGEN = "# This file is automatically generated, do not edit!" class HeaderError(Exception): pass def _Fail(lineno, msg): raise HeaderError("Line %s: %s" % (lineno, msg)) def _CheckHeader(getline_fn): (lineno, line) = getline_fn() if line == _AUTOGEN: return if not _SHEBANG.match(line): _Fail(lineno, ("Must contain nothing but a hash character (#) or a" " shebang line (e.g. #!/bin/bash)")) (lineno, line) = getline_fn() if line == _AUTOGEN: return if line != "#": _Fail(lineno, "Must contain nothing but hash character (#)") (lineno, line) = getline_fn() if line: _Fail(lineno, "Must be empty") (lineno, line) = getline_fn() if not _COPYRIGHT.match(line): _Fail(lineno, "Must contain copyright information (%s)" % _COPYRIGHT_DESC) (lineno, line) = getline_fn() if line != "#": _Fail(lineno, "Must contain nothing but hash character (#)") for licence_line in _GPLv2: (lineno, line) = getline_fn() if line != ("# %s" % licence_line).rstrip(): _Fail(lineno, "Does not match expected licence line (%s)" % licence_line) (lineno, line) = getline_fn() if line: _Fail(lineno, "Must be empty") def Main(): """Main program. """ fail = False for filename in sys.argv[1:]: content = utils.ReadFile(filename, size=_READ_SIZE) lines = zip(itertools.count(1), content.splitlines()) try: _CheckHeader(compat.partial(lines.pop, 0)) except HeaderError, err: report = str(err) print "%s: %s" % (filename, report) fail = True if fail: sys.exit(constants.EXIT_FAILURE) else: sys.exit(constants.EXIT_SUCCESS) if __name__ == "__main__": Main() ganeti-2.9.3/autotools/check-news0000744000000000000000000001216312244641676017015 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script to check NEWS file. """ # pylint: disable=C0103 # [C0103] Invalid name import sys import time import datetime import locale import fileinput import re import os DASHES_RE = re.compile(r"^\s*-+\s*$") RELEASED_RE = re.compile(r"^\*\(Released (?P[A-Z][a-z]{2})," r" (?P.+)\)\*$") UNRELEASED_RE = re.compile(r"^\*\(unreleased\)\*$") VERSION_RE = re.compile(r"^Version (\d+(\.\d+)+( (alpha|beta|rc)\d+)?)$") #: How many days release timestamps may be in the future TIMESTAMP_FUTURE_DAYS_MAX = 3 errors = [] def Error(msg): """Log an error for later display. """ errors.append(msg) def ReqNLines(req, count_empty, lineno, line): """Check if we have N empty lines before the current one. """ if count_empty < req: Error("Line %s: Missing empty line(s) before %s," " %d needed but got only %d" % (lineno, line, req, count_empty)) if count_empty > req: Error("Line %s: Too many empty lines before %s," " %d needed but got %d" % (lineno, line, req, count_empty)) def IsAlphaVersion(version): return "alpha" in version def UpdateAllowUnreleased(allow_unreleased, version_match, release): if not allow_unreleased: return False if IsAlphaVersion(release): return True version = version_match.group(1) if version == release: return False return True def main(): # Ensure "C" locale is used curlocale = locale.getlocale() if curlocale != (None, None): Error("Invalid locale %s" % curlocale) # Get the release version, but replace "~" with " " as the version # in the NEWS file uses spaces for beta and rc releases. release = os.environ.get('RELEASE', "").replace("~", " ") prevline = None expect_date = False count_empty = 0 allow_unreleased = True found_versions = set() for line in fileinput.input(): line = line.rstrip("\n") version_match = VERSION_RE.match(line) if version_match: ReqNLines(2, count_empty, fileinput.filelineno(), line) version = version_match.group(1) if version in found_versions: Error("Line %s: Duplicate release %s found" % (fileinput.filelineno(), version)) found_versions.add(version) allow_unreleased = UpdateAllowUnreleased(allow_unreleased, version_match, release) unreleased_match = UNRELEASED_RE.match(line) if unreleased_match and not allow_unreleased: Error("Line %s: Unreleased version after current release %s" % (fileinput.filelineno(), release)) if unreleased_match or RELEASED_RE.match(line): ReqNLines(1, count_empty, fileinput.filelineno(), line) if line: count_empty = 0 else: count_empty += 1 if DASHES_RE.match(line): if not VERSION_RE.match(prevline): Error("Line %s: Invalid title" % (fileinput.filelineno() - 1)) if len(line) != len(prevline): Error("Line %s: Invalid dashes length" % (fileinput.filelineno())) expect_date = True elif expect_date: if not line: # Ignore empty lines continue if UNRELEASED_RE.match(line): # Ignore unreleased versions expect_date = False continue m = RELEASED_RE.match(line) if not m: Error("Line %s: Invalid release line" % fileinput.filelineno()) expect_date = False continue # Including the weekday in the date string does not work as time.strptime # would return an inconsistent result if the weekday is incorrect. parsed_ts = time.mktime(time.strptime(m.group("date"), "%d %b %Y")) parsed = datetime.date.fromtimestamp(parsed_ts) today = datetime.date.today() if (parsed - datetime.timedelta(TIMESTAMP_FUTURE_DAYS_MAX)) > today: Error("Line %s: %s is more than %s days in the future (today is %s)" % (fileinput.filelineno(), parsed, TIMESTAMP_FUTURE_DAYS_MAX, today)) weekday = parsed.strftime("%a") # Check weekday if m.group("day") != weekday: Error("Line %s: %s was/is a %s, not %s" % (fileinput.filelineno(), parsed, weekday, m.group("day"))) expect_date = False prevline = line if errors: for msg in errors: print >> sys.stderr, msg sys.exit(1) else: sys.exit(0) if __name__ == "__main__": main() ganeti-2.9.3/autotools/run-in-tempdir0000744000000000000000000000146312271422343017625 0ustar00rootroot00000000000000#!/bin/bash # Helper for running things in a temporary directory; used for docs # building, unittests, etc. set -e tmpdir=$(mktemp -d -t gntbuild.XXXXXXXX) trap "rm -rf $tmpdir" EXIT # fully copy items cp -r autotools daemons scripts lib tools qa $tmpdir if [[ -z "$COPY_DOC" ]]; then mkdir $tmpdir/doc ln -s $PWD/doc/examples $tmpdir/doc else # Building documentation requires all files cp -r doc $tmpdir fi mkdir $tmpdir/test/ cp -r test/py $tmpdir/test/py ln -s $PWD/test/data $tmpdir/test ln -s $PWD/test/hs $tmpdir/test mv $tmpdir/lib $tmpdir/ganeti ln -T -s $tmpdir/ganeti $tmpdir/lib mkdir -p $tmpdir/src $tmpdir/test/hs for hfile in htools ganeti-confd mon-collector; do if [ -e src/$hfile ]; then ln -s $PWD/src/$hfile $tmpdir/src/ fi done cd $tmpdir && GANETI_TEMP_DIR="$tmpdir" "$@" ganeti-2.9.3/autotools/convert-constants0000744000000000000000000002546212271422343020452 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for converting Python constants to Haskell code fragments. """ import re import types from ganeti import _autoconf from ganeti import compat from ganeti import constants from ganeti import errors from ganeti import luxi from ganeti import opcodes from ganeti import qlang from ganeti import jstore #: Constant name regex CONSTANT_RE = re.compile("^[A-Z][A-Z0-9_-]+$") #: Private name regex PRIVATE_RE = re.compile("^__.+__$") #: The type of regex objects RE_TYPE = type(CONSTANT_RE) #: Keys which do not declare a value (manually maintained). By adding # values here, we can make more lists use the actual names; otherwise # we'll have (e.g.) both DEFAULT_ENABLED_HYPERVISOR and HT_XEN_PVM # declare the same value, and thus the list of valid hypervisors will # have strings instead of easily looked-up names. IGNORED_DECL_NAMES = ["DEFAULT_ENABLED_HYPERVISOR"] def NameRules(name): """Converts the upper-cased Python name to Haskell camelCase. """ name = name.replace("-", "_") elems = name.split("_") return elems[0].lower() + "".join(e.capitalize() for e in elems[1:]) def StringValueRules(value): """Converts a string value from Python to Haskell. """ value = value.encode("string_escape") # escapes backslashes value = value.replace("\"", "\\\"") return value def DictKeyName(dict_name, key_name): """Converts a dict plus key name to a full name. """ return"%s_%s" % (dict_name, str(key_name).upper()) def HaskellTypeVal(value): """Returns the Haskell type and value for a Python value. Note that this only work for 'plain' Python types. @returns: (string, string) or None, if we can't determine the type. """ if isinstance(value, basestring): return ("String", "\"%s\"" % StringValueRules(value)) elif isinstance(value, bool): return ("Bool", "%s" % value) elif isinstance(value, int): return ("Int", "%d" % value) elif isinstance(value, long): return ("Integer", "%d" % value) elif isinstance(value, float): return ("Double", "%f" % value) else: return None def IdentifyOrigin(all_items, value): """Tries to identify a constant name from a constant's value. This uses a simple algorithm: is there a constant (and only one) with the same value? If so, then it returns that constants' name. @note: it is recommended to use this only for tuples/lists/sets, and not for individual (top-level) values @param all_items: a dictionary of name/values for the current module @param value: the value for which we try to find an origin """ found = [name for (name, v) in all_items.items() if v is value and name not in IGNORED_DECL_NAMES] if len(found) == 1: return found[0] else: return None def FormatListElems(all_items, pfx_name, ovals, tvals): """Formats a list's elements. This formats the elements as either values or, if we find all origins, as names. @param all_items: a dictionary of name/values for the current module @param pfx_name: the prefix name currently used @param ovals: the list of actual (Python) values @param tvals: the list of values we want to format in the Haskell form """ origins = [IdentifyOrigin(all_items, v) for v in ovals] if compat.all(x is not None for x in origins): values = [NameRules(pfx_name + origin) for origin in origins] else: values = tvals return ", ".join(values) def FormatDict(all_items, pfx_name, py_name, hs_name, mydict): """Converts a dictionary to a Haskell association list ([(k, v)]), if possible. @param all_items: a dictionary of name/values for the current module @param pfx_name: the prefix name currently used @param py_name: the Python name @param hs_name: the Haskell name @param mydict: a dictonary, unknown yet if homogenous or not """ # need this for ordering orig_list = mydict.items() list_form = [(HaskellTypeVal(k), HaskellTypeVal(v)) for k, v in orig_list] if compat.any(v is None or k is None for k, v in list_form): # type not known return [] all_keys = [k for k, _ in list_form] all_vals = [v for _, v in list_form] key_types = set(k[0] for k in all_keys) val_types = set(v[0] for v in all_vals) if not(len(key_types) == 1 and len(val_types) == 1): # multiple types return [] # record the key and value Haskell types key_type = key_types.pop() val_type = val_types.pop() # now try to find names for the keys, instead of raw values key_origins = [IdentifyOrigin(all_items, k) for k, _ in orig_list] if compat.all(x is not None for x in key_origins): key_v = [NameRules(pfx_name + origin) for origin in key_origins] else: key_v = [k[1] for k in all_keys] # ... and for values val_origins = [IdentifyOrigin(all_items, v) for _, v in orig_list] if compat.all(x is not None for x in val_origins): val_v = [NameRules(pfx_name + origin) for origin in val_origins] else: val_v = [v[1] for v in all_vals] # finally generate the output kv_pairs = ["(%s, %s)" % (k, v) for k, v in zip(key_v, val_v)] return ["-- | Converted from Python dictionary @%s@" % py_name, "%s :: [(%s, %s)]" % (hs_name, key_type, val_type), "%s = [%s]" % (hs_name, ", ".join(kv_pairs)), ] def ConvertVariable(prefix, name, value, all_items): """Converts a given variable to Haskell code. @param prefix: a prefix for the Haskell name (useful for module identification) @param name: the Python name @param value: the value @param all_items: a dictionary of name/value for the module being processed @return: a list of Haskell code lines """ lines = [] if prefix: pfx_name = prefix + "_" fqn = prefix + "." + name else: pfx_name = "" fqn = name hs_name = NameRules(pfx_name + name) hs_typeval = HaskellTypeVal(value) if (isinstance(value, types.ModuleType) or callable(value) or PRIVATE_RE.match(name)): # no sense in marking these, as we don't _want_ to convert them; the # message in the next if block is for datatypes we don't _know_ # (yet) how to convert pass elif not CONSTANT_RE.match(name): lines.append("-- Skipped %s %s, not constant" % (fqn, type(value))) elif hs_typeval is not None: # this is a simple value (hs_type, hs_val) = hs_typeval lines.append("-- | Converted from Python constant @%s@" % fqn) lines.append("%s :: %s" % (hs_name, hs_type)) lines.append("%s = %s" % (hs_name, hs_val)) elif isinstance(value, dict): if value: lines.append("-- Following lines come from dictionary %s" % fqn) # try to build a real map here, if all keys have same type, and # all values too (i.e. we have a homogeneous dictionary) lines.extend(FormatDict(all_items, pfx_name, fqn, hs_name, value)) # and now create individual names for k in sorted(value.keys()): lines.extend(ConvertVariable(prefix, DictKeyName(name, k), value[k], all_items)) elif isinstance(value, tuple): tvs = [HaskellTypeVal(elem) for elem in value] # Custom rule for special cluster verify error tuples if name.startswith("CV_E") and len(value) == 3 and tvs[1][0] is not None: cv_ename = hs_name + "Code" lines.append("-- | Special cluster verify code %s" % name) lines.append("%s :: %s" % (cv_ename, tvs[1][0])) lines.append("%s = %s" % (cv_ename, tvs[1][1])) lines.append("") if compat.all(e is not None for e in tvs): ttypes = ", ".join(e[0] for e in tvs) tvals = FormatListElems(all_items, pfx_name, value, [e[1] for e in tvs]) lines.append("-- | Converted from Python tuple @%s@" % fqn) lines.append("%s :: (%s)" % (hs_name, ttypes)) lines.append("%s = (%s)" % (hs_name, tvals)) else: lines.append("-- Skipped tuple %s, cannot convert all elements" % fqn) elif isinstance(value, (list, set, frozenset)): # Lists and frozensets are handled the same in Haskell: as lists, # since lists are immutable and we don't need for constants the # high-speed of an actual Set type. However, we can only convert # them if they have the same type for all elements (which is a # normal expectation for constants, our code should be well # behaved); note that this is different from the tuples case, # where we always (for some values of always) can convert tvs = [HaskellTypeVal(elem) for elem in value] if compat.all(e is not None for e in tvs): ttypes, tvals = zip(*tvs) uniq_types = set(ttypes) if len(uniq_types) == 1: values = FormatListElems(all_items, pfx_name, value, tvals) lines.append("-- | Converted from Python list or set @%s@" % fqn) lines.append("%s :: [%s]" % (hs_name, uniq_types.pop())) lines.append("%s = [%s]" % (hs_name, values)) else: lines.append("-- | Skipped list/set %s, is not homogeneous" % fqn) else: lines.append("-- | Skipped list/set %s, cannot convert all elems" % fqn) elif isinstance(value, RE_TYPE): tvs = HaskellTypeVal(value.pattern) assert tvs is not None lines.append("-- | Converted from Python RE object @%s@" % fqn) lines.append("%s :: %s" % (hs_name, tvs[0])) lines.append("%s = %s" % (hs_name, tvs[1])) else: lines.append("-- Skipped %s, %s not handled" % (fqn, type(value))) return lines def Convert(module, prefix): """Converts the constants to Haskell. """ lines = [""] all_items = dict((name, getattr(module, name)) for name in dir(module)) for name in sorted(all_items.keys()): value = all_items[name] new_lines = ConvertVariable(prefix, name, value, all_items) if new_lines: lines.extend(new_lines) lines.append("") return "\n".join(lines) def ConvertMisc(): """Convert some extra computed-values to Haskell. """ lines = [""] lines.extend(ConvertVariable("opcodes", "OP_IDS", opcodes.OP_MAPPING.keys(), {})) return "\n".join(lines) def main(): print Convert(constants, "") print Convert(luxi, "luxi") print Convert(qlang, "qlang") print Convert(_autoconf, "autoconf") print Convert(errors, "errors") print Convert(jstore, "jstore") print ConvertMisc() if __name__ == "__main__": main() ganeti-2.9.3/autotools/check-python-code0000744000000000000000000000423012230001635020242 0ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2009, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. set -e # Ensure the checks always use the same locale export LC_ALL=C readonly maxlinelen=$(for ((i=0; i<81; ++i)); do echo -n .; done) if [[ "${#maxlinelen}" != 81 ]]; then echo "Internal error: Check for line length is incorrect" >&2 exit 1 fi # "[...] If the last ARG evaluates to 0, let returns 1; 0 is returned # otherwise.", hence ignoring the return value. let problems=0 || : for script; do if grep -n -H -F $'\t' "$script"; then let ++problems echo "Found tabs in $script" >&2 fi if grep -n -H -E '[[:space:]]$' "$script"; then let ++problems echo "Found end-of-line-whitespace in $script" >&2 fi # FIXME: This will also match "foo.xrange(...)" if grep -n -H -E '^[^#]*\' "$script"; then let ++problems echo "Forbidden function 'xrange' used in $script" >&2 fi if grep -n -H -E -i '#[[:space:]]*(vim|Local[[:space:]]+Variables):' "$script" then let ++problems echo "Found editor-specific settings in $script" >&2 fi if grep -n -H "^$maxlinelen" "$script"; then let ++problems echo "Longest line in $script is longer than 80 characters" >&2 fi if grep -n -H -E -i \ '#.*\bpylint[[:space:]]*:[[:space:]]*disable-msg\b' "$script" then let ++problems echo "Found old-style pylint disable pragma in $script" >&2 fi done if [[ "$problems" -gt 0 ]]; then echo "Found $problems problem(s) while checking code." >&2 exit 1 fi ganeti-2.9.3/autotools/install-sh0000755000000000000000000003325611777117217017053 0ustar00rootroot00000000000000#!/bin/sh # install - install a program, script, or datafile scriptversion=2011-01-19.21; # UTC # This originates from X11R5 (mit/util/scripts/install.sh), which was # later released in X11R6 (xc/config/util/install.sh) with the # following copyright and license. # # Copyright (C) 1994 X Consortium # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN # AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- # TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # Except as contained in this notice, the name of the X Consortium shall not # be used in advertising or otherwise to promote the sale, use or other deal- # ings in this Software without prior written authorization from the X Consor- # tium. # # # FSF changes to this file are in the public domain. # # Calling this script install-sh is preferred over install.sh, to prevent # `make' implicit rules from creating a file called install from it # when there is no Makefile. # # This script is compatible with the BSD install script, but was written # from scratch. nl=' ' IFS=" "" $nl" # set DOITPROG to echo to test this script # Don't use :- since 4.3BSD and earlier shells don't like it. doit=${DOITPROG-} if test -z "$doit"; then doit_exec=exec else doit_exec=$doit fi # Put in absolute file names if you don't have them in your path; # or use environment vars. chgrpprog=${CHGRPPROG-chgrp} chmodprog=${CHMODPROG-chmod} chownprog=${CHOWNPROG-chown} cmpprog=${CMPPROG-cmp} cpprog=${CPPROG-cp} mkdirprog=${MKDIRPROG-mkdir} mvprog=${MVPROG-mv} rmprog=${RMPROG-rm} stripprog=${STRIPPROG-strip} posix_glob='?' initialize_posix_glob=' test "$posix_glob" != "?" || { if (set -f) 2>/dev/null; then posix_glob= else posix_glob=: fi } ' posix_mkdir= # Desired mode of installed file. mode=0755 chgrpcmd= chmodcmd=$chmodprog chowncmd= mvcmd=$mvprog rmcmd="$rmprog -f" stripcmd= src= dst= dir_arg= dst_arg= copy_on_change=false no_target_directory= usage="\ Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE or: $0 [OPTION]... SRCFILES... DIRECTORY or: $0 [OPTION]... -t DIRECTORY SRCFILES... or: $0 [OPTION]... -d DIRECTORIES... In the 1st form, copy SRCFILE to DSTFILE. In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. In the 4th, create DIRECTORIES. Options: --help display this help and exit. --version display version info and exit. -c (ignored) -C install only if different (preserve the last data modification time) -d create directories instead of installing files. -g GROUP $chgrpprog installed files to GROUP. -m MODE $chmodprog installed files to MODE. -o USER $chownprog installed files to USER. -s $stripprog installed files. -t DIRECTORY install into DIRECTORY. -T report an error if DSTFILE is a directory. Environment variables override the default commands: CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG " while test $# -ne 0; do case $1 in -c) ;; -C) copy_on_change=true;; -d) dir_arg=true;; -g) chgrpcmd="$chgrpprog $2" shift;; --help) echo "$usage"; exit $?;; -m) mode=$2 case $mode in *' '* | *' '* | *' '* | *'*'* | *'?'* | *'['*) echo "$0: invalid mode: $mode" >&2 exit 1;; esac shift;; -o) chowncmd="$chownprog $2" shift;; -s) stripcmd=$stripprog;; -t) dst_arg=$2 # Protect names problematic for `test' and other utilities. case $dst_arg in -* | [=\(\)!]) dst_arg=./$dst_arg;; esac shift;; -T) no_target_directory=true;; --version) echo "$0 $scriptversion"; exit $?;; --) shift break;; -*) echo "$0: invalid option: $1" >&2 exit 1;; *) break;; esac shift done if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then # When -d is used, all remaining arguments are directories to create. # When -t is used, the destination is already specified. # Otherwise, the last argument is the destination. Remove it from $@. for arg do if test -n "$dst_arg"; then # $@ is not empty: it contains at least $arg. set fnord "$@" "$dst_arg" shift # fnord fi shift # arg dst_arg=$arg # Protect names problematic for `test' and other utilities. case $dst_arg in -* | [=\(\)!]) dst_arg=./$dst_arg;; esac done fi if test $# -eq 0; then if test -z "$dir_arg"; then echo "$0: no input file specified." >&2 exit 1 fi # It's OK to call `install-sh -d' without argument. # This can happen when creating conditional directories. exit 0 fi if test -z "$dir_arg"; then do_exit='(exit $ret); exit $ret' trap "ret=129; $do_exit" 1 trap "ret=130; $do_exit" 2 trap "ret=141; $do_exit" 13 trap "ret=143; $do_exit" 15 # Set umask so as not to create temps with too-generous modes. # However, 'strip' requires both read and write access to temps. case $mode in # Optimize common cases. *644) cp_umask=133;; *755) cp_umask=22;; *[0-7]) if test -z "$stripcmd"; then u_plus_rw= else u_plus_rw='% 200' fi cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; *) if test -z "$stripcmd"; then u_plus_rw= else u_plus_rw=,u+rw fi cp_umask=$mode$u_plus_rw;; esac fi for src do # Protect names problematic for `test' and other utilities. case $src in -* | [=\(\)!]) src=./$src;; esac if test -n "$dir_arg"; then dst=$src dstdir=$dst test -d "$dstdir" dstdir_status=$? else # Waiting for this to be detected by the "$cpprog $src $dsttmp" command # might cause directories to be created, which would be especially bad # if $src (and thus $dsttmp) contains '*'. if test ! -f "$src" && test ! -d "$src"; then echo "$0: $src does not exist." >&2 exit 1 fi if test -z "$dst_arg"; then echo "$0: no destination specified." >&2 exit 1 fi dst=$dst_arg # If destination is a directory, append the input filename; won't work # if double slashes aren't ignored. if test -d "$dst"; then if test -n "$no_target_directory"; then echo "$0: $dst_arg: Is a directory" >&2 exit 1 fi dstdir=$dst dst=$dstdir/`basename "$src"` dstdir_status=0 else # Prefer dirname, but fall back on a substitute if dirname fails. dstdir=` (dirname "$dst") 2>/dev/null || expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$dst" : 'X\(//\)[^/]' \| \ X"$dst" : 'X\(//\)$' \| \ X"$dst" : 'X\(/\)' \| . 2>/dev/null || echo X"$dst" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q' ` test -d "$dstdir" dstdir_status=$? fi fi obsolete_mkdir_used=false if test $dstdir_status != 0; then case $posix_mkdir in '') # Create intermediate dirs using mode 755 as modified by the umask. # This is like FreeBSD 'install' as of 1997-10-28. umask=`umask` case $stripcmd.$umask in # Optimize common cases. *[2367][2367]) mkdir_umask=$umask;; .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; *[0-7]) mkdir_umask=`expr $umask + 22 \ - $umask % 100 % 40 + $umask % 20 \ - $umask % 10 % 4 + $umask % 2 `;; *) mkdir_umask=$umask,go-w;; esac # With -d, create the new directory with the user-specified mode. # Otherwise, rely on $mkdir_umask. if test -n "$dir_arg"; then mkdir_mode=-m$mode else mkdir_mode= fi posix_mkdir=false case $umask in *[123567][0-7][0-7]) # POSIX mkdir -p sets u+wx bits regardless of umask, which # is incompatible with FreeBSD 'install' when (umask & 300) != 0. ;; *) tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 if (umask $mkdir_umask && exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 then if test -z "$dir_arg" || { # Check for POSIX incompatibilities with -m. # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or # other-writeable bit of parent directory when it shouldn't. # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. ls_ld_tmpdir=`ls -ld "$tmpdir"` case $ls_ld_tmpdir in d????-?r-*) different_mode=700;; d????-?--*) different_mode=755;; *) false;; esac && $mkdirprog -m$different_mode -p -- "$tmpdir" && { ls_ld_tmpdir_1=`ls -ld "$tmpdir"` test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" } } then posix_mkdir=: fi rmdir "$tmpdir/d" "$tmpdir" else # Remove any dirs left behind by ancient mkdir implementations. rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null fi trap '' 0;; esac;; esac if $posix_mkdir && ( umask $mkdir_umask && $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" ) then : else # The umask is ridiculous, or mkdir does not conform to POSIX, # or it failed possibly due to a race condition. Create the # directory the slow way, step by step, checking for races as we go. case $dstdir in /*) prefix='/';; [-=\(\)!]*) prefix='./';; *) prefix='';; esac eval "$initialize_posix_glob" oIFS=$IFS IFS=/ $posix_glob set -f set fnord $dstdir shift $posix_glob set +f IFS=$oIFS prefixes= for d do test X"$d" = X && continue prefix=$prefix$d if test -d "$prefix"; then prefixes= else if $posix_mkdir; then (umask=$mkdir_umask && $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break # Don't fail if two instances are running concurrently. test -d "$prefix" || exit 1 else case $prefix in *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; *) qprefix=$prefix;; esac prefixes="$prefixes '$qprefix'" fi fi prefix=$prefix/ done if test -n "$prefixes"; then # Don't fail if two instances are running concurrently. (umask $mkdir_umask && eval "\$doit_exec \$mkdirprog $prefixes") || test -d "$dstdir" || exit 1 obsolete_mkdir_used=true fi fi fi if test -n "$dir_arg"; then { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 else # Make a couple of temp file names in the proper directory. dsttmp=$dstdir/_inst.$$_ rmtmp=$dstdir/_rm.$$_ # Trap to clean up those temp files at exit. trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 # Copy the file name to the temp name. (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && # and set any options; do chmod last to preserve setuid bits. # # If any of these fail, we abort the whole thing. If we want to # ignore errors from any of these, just make sure not to ignore # errors from the above "$doit $cpprog $src $dsttmp" command. # { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && # If -C, don't bother to copy if it wouldn't change the file. if $copy_on_change && old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && eval "$initialize_posix_glob" && $posix_glob set -f && set X $old && old=:$2:$4:$5:$6 && set X $new && new=:$2:$4:$5:$6 && $posix_glob set +f && test "$old" = "$new" && $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 then rm -f "$dsttmp" else # Rename the file to the real destination. $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || # The rename failed, perhaps because mv can't rename something else # to itself, or perhaps because mv is so ancient that it does not # support -f. { # Now remove or move aside any old file at destination location. # We try this two ways since rm can't unlink itself on some # systems and the destination file might be busy for other # reasons. In this case, the final cleanup might fail but the new # file should still install successfully. { test ! -f "$dst" || $doit $rmcmd -f "$dst" 2>/dev/null || { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } } || { echo "$0: cannot unlink or rename $dst" >&2 (exit 1); exit 1 } } && # Now rename the file to the real destination. $doit $mvcmd "$dsttmp" "$dst" } fi || exit 1 trap '' 0 fi done # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: ganeti-2.9.3/autotools/sphinx-wrapper0000744000000000000000000000216412244641676017755 0ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. set -e -u -o pipefail if [[ -e doc/manpages.rst ]]; then echo 'doc/manpages.rst should not exist' >&2 exit 1 fi if [[ -n "$ENABLE_MANPAGES" ]]; then mv doc/manpages-enabled.rst doc/manpages.rst rm doc/manpages-disabled.rst else mv doc/manpages-disabled.rst doc/manpages.rst if [[ -e doc/manpages-enabled.rst ]]; then rm doc/manpages-enabled.rst fi fi exec "$@" ganeti-2.9.3/autotools/ac_python_module.m40000644000000000000000000000175012230001635020611 0ustar00rootroot00000000000000##### http://autoconf-archive.cryp.to/ac_python_module.html # # SYNOPSIS # # AC_PYTHON_MODULE(modname[, fatal]) # # DESCRIPTION # # Checks for Python module. # # If fatal is non-empty then absence of a module will trigger an # error. # # LAST MODIFICATION # # 2007-01-09 # # COPYLEFT # # Copyright (c) 2007 Andrew Collier # # Copying and distribution of this file, with or without # modification, are permitted in any medium without royalty provided # the copyright notice and this notice are preserved. AC_DEFUN([AC_PYTHON_MODULE],[ if test -z $PYTHON; then PYTHON="python" fi PYTHON_NAME=`basename $PYTHON` AC_MSG_CHECKING($PYTHON_NAME module: $1) $PYTHON -c "import $1" 2>/dev/null if test $? -eq 0; then AC_MSG_RESULT(yes) eval AS_TR_CPP(HAVE_PYMOD_$1)=yes else AC_MSG_RESULT(no) eval AS_TR_CPP(HAVE_PYMOD_$1)=no # if test -n "$2" then AC_MSG_ERROR(failed to find required module $1) exit 1 fi fi ]) ganeti-2.9.3/autotools/py-compile0000755000000000000000000001040011777117217017035 0ustar00rootroot00000000000000#!/bin/sh # py-compile - Compile a Python program scriptversion=2011-06-08.12; # UTC # Copyright (C) 2000, 2001, 2003, 2004, 2005, 2008, 2009, 2011 Free # Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # This file is maintained in Automake, please report # bugs to or send patches to # . if [ -z "$PYTHON" ]; then PYTHON=python fi me=py-compile usage_error () { echo "$me: $*" >&2 echo "Try \`$me --help' for more information." >&2 exit 1 } basedir= destdir= while test $# -ne 0; do case "$1" in --basedir) if test $# -lt 2; then usage_error "option '--basedir' requires an argument" else basedir=$2 fi shift ;; --destdir) if test $# -lt 2; then usage_error "option '--destdir' requires an argument" else destdir=$2 fi shift ;; -h|--help) cat <<\EOF Usage: py-compile [--help] [--version] [--basedir DIR] [--destdir DIR] FILES..." Byte compile some python scripts FILES. Use --destdir to specify any leading directory path to the FILES that you don't want to include in the byte compiled file. Specify --basedir for any additional path information you do want to be shown in the byte compiled file. Example: py-compile --destdir /tmp/pkg-root --basedir /usr/share/test test.py test2.py Report bugs to . EOF exit $? ;; -v|--version) echo "$me $scriptversion" exit $? ;; --) shift break ;; -*) usage_error "unrecognized option '$1'" ;; *) break ;; esac shift done files=$* if test -z "$files"; then usage_error "no files given" fi # if basedir was given, then it should be prepended to filenames before # byte compilation. if [ -z "$basedir" ]; then pathtrans="path = file" else pathtrans="path = os.path.join('$basedir', file)" fi # if destdir was given, then it needs to be prepended to the filename to # byte compile but not go into the compiled file. if [ -z "$destdir" ]; then filetrans="filepath = path" else filetrans="filepath = os.path.normpath('$destdir' + os.sep + path)" fi $PYTHON -c " import sys, os, py_compile files = '''$files''' sys.stdout.write('Byte-compiling python modules...\n') for file in files.split(): $pathtrans $filetrans if not os.path.exists(filepath) or not (len(filepath) >= 3 and filepath[-3:] == '.py'): continue sys.stdout.write(file) sys.stdout.flush() py_compile.compile(filepath, filepath + 'c', path) sys.stdout.write('\n')" || exit $? # this will fail for python < 1.5, but that doesn't matter ... $PYTHON -O -c " import sys, os, py_compile files = '''$files''' sys.stdout.write('Byte-compiling python modules (optimized versions) ...\n') for file in files.split(): $pathtrans $filetrans if not os.path.exists(filepath) or not (len(filepath) >= 3 and filepath[-3:] == '.py'): continue sys.stdout.write(file) sys.stdout.flush() py_compile.compile(filepath, filepath + 'o', path) sys.stdout.write('\n')" 2>/dev/null || : # Local Variables: # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: ganeti-2.9.3/autotools/check-man-references0000744000000000000000000000272212244641676020733 0ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. set -e -u -o pipefail # Use array for arguments so that comments can be inline args=( # "...name*(8)" (missing backslash) -e '\w+\*+\([0-9]*\)' # "...name(8)" (no asterisk) -e '\w+\([0-9]*\)' # "...name(8)*" (asterisk after number) -e '\w+\([0-9]*\)\*' # "...name*\(8)" (only one asterisk before backslash) -e '\w+\*\\\([0-9]*\)' # ":manpage:..." (Sphinx-specific) -e ':manpage:' ) for fname; do # Ignore title and then look for faulty references if tail -n +2 $fname | grep -n -E -i "${args[@]}"; then { echo "Found faulty man page reference(s) in '$fname'."\ 'Use syntax "**name**\(number)" instead.'\ 'Example: **gnt-instance**\(8).' } >&2 exit 1 fi done ganeti-2.9.3/autotools/gen-py-coverage0000744000000000000000000000342412244641676017756 0ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. set -e set -u : ${PYTHON:=python} : ${COVERAGE:?} : ${COVERAGE_FILE:?} : ${TEXT_COVERAGE:?} : ${HTML_COVERAGE:=} : ${GANETI_TEMP_DIR:?} reportargs=( '--include=*' '--omit=test/py/*' ) $COVERAGE erase if [[ -n "$HTML_COVERAGE" ]]; then if [[ ! -d "$HTML_COVERAGE" ]]; then echo "Not a directory: $HTML_COVERAGE" >&2 exit 1 fi # At least coverage 3.4 fails to overwrite files find "$HTML_COVERAGE" \( -type f -o -type l \) -delete fi for script; do if [[ "$script" == *-runasroot.py ]]; then if [[ -z "$FAKEROOT" ]]; then echo "WARNING: FAKEROOT variable not set: skipping $script" >&2 continue fi cmdprefix="$FAKEROOT" else cmdprefix= fi $cmdprefix $COVERAGE run --branch --append "${reportargs[@]}" $script done echo "Writing text report to $TEXT_COVERAGE ..." >&2 $COVERAGE report "${reportargs[@]}" | tee "$TEXT_COVERAGE" if [[ -n "$HTML_COVERAGE" ]]; then echo "Generating HTML report in $HTML_COVERAGE ..." >&2 $COVERAGE html "${reportargs[@]}" -d "$HTML_COVERAGE" fi ganeti-2.9.3/autotools/check-tar0000744000000000000000000000275712230001635016613 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script to check tarball generated by Automake. """ import sys import stat import tarfile def ReportError(member, msg): print >>sys.stderr, "%s: %s" % (member.name, msg) def main(): tf = tarfile.open(fileobj=sys.stdin) success = True for member in tf.getmembers(): if member.uid != 0: success = False ReportError(member, "Owned by UID %s, not UID 0" % member.uid) if member.gid != 0: success = False ReportError(member, "Owned by GID %s, not GID 0" % member.gid) if member.mode & (stat.S_IWGRP | stat.S_IWOTH): success = False ReportError(member, "World or group writeable (mode is %o)" % member.mode) if success: sys.exit(0) sys.exit(1) if __name__ == "__main__": main() ganeti-2.9.3/autotools/wrong-hardcoded-paths0000644000000000000000000000011412230001635021116 0ustar00rootroot00000000000000/etc/ganeti /usr/(local/)?lib/ganeti /(usr/local/)?var/(lib|run|log)/ganeti ganeti-2.9.3/autotools/check-version0000744000000000000000000000274012244641676017526 0ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2010,2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. set -e # Enable Bash-specific patterns shopt -s extglob readonly version=$1 readonly newsfile=$2 readonly numpat='+([0-9])' case "$version" in # Format "x.y.z" $numpat.$numpat.$numpat) : ;; # Format "x.y.z~rcN" or "x.y.z~betaN" or "x.y.z~alphaN" for N > 0 $numpat.$numpat.$numpat~@(rc|beta|alpha)[1-9]*([0-9])) : ;; *) echo "Invalid version format: $version" >&2 exit 1 ;; esac readonly newsver="Version ${version/\~/ }" # Only alpha versions are allowed not to have their own NEWS section yet set +e FOUND=x`echo $version | grep "alpha[1-9]*[0-9]$"` set -e if [ $FOUND == "x" ] then if ! grep -q -x "$newsver" $newsfile then echo "Unable to find heading '$newsver' in NEWS" >&2 exit 1 fi fi exit 0 ganeti-2.9.3/autotools/missing0000755000000000000000000002415211777117217016441 0ustar00rootroot00000000000000#! /bin/sh # Common stub for a few missing GNU programs while installing. scriptversion=2012-01-06.13; # UTC # Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006, # 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. # Originally by Fran,cois Pinard , 1996. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. if test $# -eq 0; then echo 1>&2 "Try \`$0 --help' for more information" exit 1 fi run=: sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p' sed_minuso='s/.* -o \([^ ]*\).*/\1/p' # In the cases where this matters, `missing' is being run in the # srcdir already. if test -f configure.ac; then configure_ac=configure.ac else configure_ac=configure.in fi msg="missing on your system" case $1 in --run) # Try to run requested program, and just exit if it succeeds. run= shift "$@" && exit 0 # Exit code 63 means version mismatch. This often happens # when the user try to use an ancient version of a tool on # a file that requires a minimum version. In this case we # we should proceed has if the program had been absent, or # if --run hadn't been passed. if test $? = 63; then run=: msg="probably too old" fi ;; -h|--h|--he|--hel|--help) echo "\ $0 [OPTION]... PROGRAM [ARGUMENT]... Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an error status if there is no known handling for PROGRAM. Options: -h, --help display this help and exit -v, --version output version information and exit --run try to run the given command, and emulate it if it fails Supported PROGRAM values: aclocal touch file \`aclocal.m4' autoconf touch file \`configure' autoheader touch file \`config.h.in' autom4te touch the output file, or create a stub one automake touch all \`Makefile.in' files bison create \`y.tab.[ch]', if possible, from existing .[ch] flex create \`lex.yy.c', if possible, from existing .c help2man touch the output file lex create \`lex.yy.c', if possible, from existing .c makeinfo touch the output file yacc create \`y.tab.[ch]', if possible, from existing .[ch] Version suffixes to PROGRAM as well as the prefixes \`gnu-', \`gnu', and \`g' are ignored when checking the name. Send bug reports to ." exit $? ;; -v|--v|--ve|--ver|--vers|--versi|--versio|--version) echo "missing $scriptversion (GNU Automake)" exit $? ;; -*) echo 1>&2 "$0: Unknown \`$1' option" echo 1>&2 "Try \`$0 --help' for more information" exit 1 ;; esac # normalize program name to check for. program=`echo "$1" | sed ' s/^gnu-//; t s/^gnu//; t s/^g//; t'` # Now exit if we have it, but it failed. Also exit now if we # don't have it and --version was passed (most likely to detect # the program). This is about non-GNU programs, so use $1 not # $program. case $1 in lex*|yacc*) # Not GNU programs, they don't have --version. ;; *) if test -z "$run" && ($1 --version) > /dev/null 2>&1; then # We have it, but it failed. exit 1 elif test "x$2" = "x--version" || test "x$2" = "x--help"; then # Could not run --version or --help. This is probably someone # running `$TOOL --version' or `$TOOL --help' to check whether # $TOOL exists and not knowing $TOOL uses missing. exit 1 fi ;; esac # If it does not exist, or fails to run (possibly an outdated version), # try to emulate it. case $program in aclocal*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified \`acinclude.m4' or \`${configure_ac}'. You might want to install the \`Automake' and \`Perl' packages. Grab them from any GNU archive site." touch aclocal.m4 ;; autoconf*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified \`${configure_ac}'. You might want to install the \`Autoconf' and \`GNU m4' packages. Grab them from any GNU archive site." touch configure ;; autoheader*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified \`acconfig.h' or \`${configure_ac}'. You might want to install the \`Autoconf' and \`GNU m4' packages. Grab them from any GNU archive site." files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}` test -z "$files" && files="config.h" touch_files= for f in $files; do case $f in *:*) touch_files="$touch_files "`echo "$f" | sed -e 's/^[^:]*://' -e 's/:.*//'`;; *) touch_files="$touch_files $f.in";; esac done touch $touch_files ;; automake*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'. You might want to install the \`Automake' and \`Perl' packages. Grab them from any GNU archive site." find . -type f -name Makefile.am -print | sed 's/\.am$/.in/' | while read f; do touch "$f"; done ;; autom4te*) echo 1>&2 "\ WARNING: \`$1' is needed, but is $msg. You might have modified some files without having the proper tools for further handling them. You can get \`$1' as part of \`Autoconf' from any GNU archive site." file=`echo "$*" | sed -n "$sed_output"` test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` if test -f "$file"; then touch $file else test -z "$file" || exec >$file echo "#! /bin/sh" echo "# Created by GNU Automake missing as a replacement of" echo "# $ $@" echo "exit 0" chmod +x $file exit 1 fi ;; bison*|yacc*) echo 1>&2 "\ WARNING: \`$1' $msg. You should only need it if you modified a \`.y' file. You may need the \`Bison' package in order for those modifications to take effect. You can get \`Bison' from any GNU archive site." rm -f y.tab.c y.tab.h if test $# -ne 1; then eval LASTARG=\${$#} case $LASTARG in *.y) SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` if test -f "$SRCFILE"; then cp "$SRCFILE" y.tab.c fi SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` if test -f "$SRCFILE"; then cp "$SRCFILE" y.tab.h fi ;; esac fi if test ! -f y.tab.h; then echo >y.tab.h fi if test ! -f y.tab.c; then echo 'main() { return 0; }' >y.tab.c fi ;; lex*|flex*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified a \`.l' file. You may need the \`Flex' package in order for those modifications to take effect. You can get \`Flex' from any GNU archive site." rm -f lex.yy.c if test $# -ne 1; then eval LASTARG=\${$#} case $LASTARG in *.l) SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` if test -f "$SRCFILE"; then cp "$SRCFILE" lex.yy.c fi ;; esac fi if test ! -f lex.yy.c; then echo 'main() { return 0; }' >lex.yy.c fi ;; help2man*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified a dependency of a manual page. You may need the \`Help2man' package in order for those modifications to take effect. You can get \`Help2man' from any GNU archive site." file=`echo "$*" | sed -n "$sed_output"` test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` if test -f "$file"; then touch $file else test -z "$file" || exec >$file echo ".ab help2man is required to generate this page" exit $? fi ;; makeinfo*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified a \`.texi' or \`.texinfo' file, or any other file indirectly affecting the aspect of the manual. The spurious call might also be the consequence of using a buggy \`make' (AIX, DU, IRIX). You might want to install the \`Texinfo' package or the \`GNU make' package. Grab either from any GNU archive site." # The file to touch is that specified with -o ... file=`echo "$*" | sed -n "$sed_output"` test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` if test -z "$file"; then # ... or it is the one specified with @setfilename ... infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` file=`sed -n ' /^@setfilename/{ s/.* \([^ ]*\) *$/\1/ p q }' $infile` # ... or it is derived from the source name (dir/f.texi becomes f.info) test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info fi # If the file does not exist, the user really needs makeinfo; # let's fail without touching anything. test -f $file || exit 1 touch $file ;; *) echo 1>&2 "\ WARNING: \`$1' is needed, and is $msg. You might have modified some files without having the proper tools for further handling them. Check the \`README' file, it often tells you about the needed prerequisites for installing this package. You may also peek at any GNU archive site, in case some other package would contain this missing \`$1' program." exit 1 ;; esac exit 0 # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: ganeti-2.9.3/autotools/testrunner0000744000000000000000000000216012244641676017173 0ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. set -e filename=$1 execasroot() { local fname=$1 shift if [[ -z "$FAKEROOT" ]]; then echo "WARNING: FAKEROOT variable not set, skipping $fname" >&2 else exec "$FAKEROOT" "$@" fi } case "$filename" in *-runasroot.py) execasroot $filename $PYTHON "$@" ;; *.py) exec $PYTHON "$@" ;; *-runasroot) execasroot $filename "$@" ;; *) exec "$@" ;; esac ganeti-2.9.3/autotools/build-rpc0000744000000000000000000001246712244641676016656 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script to generate RPC code. """ # pylint: disable=C0103 # [C0103] Invalid name import sys import re import itertools import textwrap from cStringIO import StringIO from ganeti import utils from ganeti import compat from ganeti import build _SINGLE = "single-node" _MULTI = "multi-node" #: Expected length of a rpc definition _RPC_DEF_LEN = 8 def _WritePreamble(sw): """Writes a preamble for the RPC wrapper output. """ sw.Write("# This code is automatically generated at build time.") sw.Write("# Do not modify manually.") sw.Write("") sw.Write("\"\"\"Automatically generated RPC client wrappers.") sw.Write("") sw.Write("\"\"\"") sw.Write("") sw.Write("from ganeti import rpc_defs") sw.Write("") def _WrapCode(line): """Wraps Python code. """ return textwrap.wrap(line, width=70, expand_tabs=False, fix_sentence_endings=False, break_long_words=False, replace_whitespace=True, subsequent_indent=utils.ShellWriter.INDENT_STR) def _WriteDocstring(sw, name, timeout, kind, args, desc): """Writes a docstring for an RPC wrapper. """ sw.Write("\"\"\"Wrapper for RPC call '%s'", name) sw.Write("") if desc: sw.Write(desc) sw.Write("") note = ["This is a %s call" % kind] if timeout and not callable(timeout): note.append(" with a timeout of %s" % utils.FormatSeconds(timeout)) sw.Write("@note: %s", "".join(note)) if kind == _SINGLE: sw.Write("@type node: string") sw.Write("@param node: Node name") else: sw.Write("@type node_list: list of string") sw.Write("@param node_list: List of node names") if args: for (argname, _, argtext) in args: if argtext: docline = "@param %s: %s" % (argname, argtext) for line in _WrapCode(docline): sw.Write(line) sw.Write("") sw.Write("\"\"\"") def _WriteBaseClass(sw, clsname, calls): """Write RPC wrapper class. """ sw.Write("") sw.Write("class %s(object):", clsname) sw.IncIndent() try: sw.Write("# E1101: Non-existent members") sw.Write("# R0904: Too many public methods") sw.Write("# pylint: disable=E1101,R0904") if not calls: sw.Write("pass") return sw.Write("_CALLS = rpc_defs.CALLS[%r]", clsname) sw.Write("") for v in calls: if len(v) != _RPC_DEF_LEN: raise ValueError("Procedure %s has only %d elements, expected %d" % (v[0], len(v), _RPC_DEF_LEN)) for (name, kind, _, timeout, args, _, _, desc) in sorted(calls): funcargs = ["self"] if kind == _SINGLE: funcargs.append("node") elif kind == _MULTI: funcargs.append("node_list") else: raise Exception("Unknown kind '%s'" % kind) funcargs.extend(map(compat.fst, args)) funcargs.append("_def=_CALLS[%r]" % name) funcdef = "def call_%s(%s):" % (name, utils.CommaJoin(funcargs)) for line in _WrapCode(funcdef): sw.Write(line) sw.IncIndent() try: _WriteDocstring(sw, name, timeout, kind, args, desc) buf = StringIO() buf.write("return ") # In case line gets too long and is wrapped in a bad spot buf.write("(") buf.write("self._Call(_def, ") if kind == _SINGLE: buf.write("[node]") else: buf.write("node_list") buf.write(", [%s])" % # Function arguments utils.CommaJoin(map(compat.fst, args))) if kind == _SINGLE: buf.write("[node]") buf.write(")") for line in _WrapCode(buf.getvalue()): sw.Write(line) finally: sw.DecIndent() sw.Write("") finally: sw.DecIndent() def main(): """Main function. """ buf = StringIO() sw = utils.ShellWriter(buf) _WritePreamble(sw) for filename in sys.argv[1:]: sw.Write("# Definitions from '%s'", filename) module = build.LoadModule(filename) # Call types are re-defined in definitions file to avoid imports. Verify # here to ensure they're equal to local constants. assert module.SINGLE == _SINGLE assert module.MULTI == _MULTI dups = utils.FindDuplicates(itertools.chain(*map(lambda value: value.keys(), module.CALLS.values()))) if dups: raise Exception("Found duplicate RPC definitions for '%s'" % utils.CommaJoin(sorted(dups))) for (clsname, calls) in sorted(module.CALLS.items()): _WriteBaseClass(sw, clsname, calls.values()) print buf.getvalue() if __name__ == "__main__": main() ganeti-2.9.3/qa/0000755000000000000000000000000012271445545013404 5ustar00rootroot00000000000000ganeti-2.9.3/qa/ganeti-qa.py0000744000000000000000000007457212271422343015633 0ustar00rootroot00000000000000#!/usr/bin/python -u # # Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for doing QA on Ganeti. """ # pylint: disable=C0103 # due to invalid name import copy import datetime import optparse import sys import qa_cluster import qa_config import qa_daemon import qa_env import qa_error import qa_group import qa_instance import qa_monitoring import qa_network import qa_node import qa_os import qa_job import qa_rapi import qa_tags import qa_utils from ganeti import utils from ganeti import rapi # pylint: disable=W0611 from ganeti import constants from ganeti import pathutils from ganeti.http.auth import ParsePasswordFile import ganeti.rapi.client # pylint: disable=W0611 from ganeti.rapi.client import UsesRapiClient def _FormatHeader(line, end=72): """Fill a line up to the end column. """ line = "---- " + line + " " line += "-" * (end - len(line)) line = line.rstrip() return line def _DescriptionOf(fn): """Computes the description of an item. """ if fn.__doc__: desc = fn.__doc__.splitlines()[0].strip() else: desc = "%r" % fn return desc.rstrip(".") def RunTest(fn, *args, **kwargs): """Runs a test after printing a header. """ tstart = datetime.datetime.now() desc = _DescriptionOf(fn) print print _FormatHeader("%s start %s" % (tstart, desc)) try: retval = fn(*args, **kwargs) return retval finally: tstop = datetime.datetime.now() tdelta = tstop - tstart print _FormatHeader("%s time=%s %s" % (tstop, tdelta, desc)) def RunTestIf(testnames, fn, *args, **kwargs): """Runs a test conditionally. @param testnames: either a single test name in the configuration file, or a list of testnames (which will be AND-ed together) """ if qa_config.TestEnabled(testnames): RunTest(fn, *args, **kwargs) else: tstart = datetime.datetime.now() desc = _DescriptionOf(fn) # TODO: Formatting test names when non-string names are involved print _FormatHeader("%s skipping %s, test(s) %s disabled" % (tstart, desc, testnames)) def RunEnvTests(): """Run several environment tests. """ RunTestIf("env", qa_env.TestSshConnection) RunTestIf("env", qa_env.TestIcmpPing) RunTestIf("env", qa_env.TestGanetiCommands) def _LookupRapiSecret(rapi_user): """Find the RAPI secret for the given user. @param rapi_user: Login user @return: Login secret for the user """ CTEXT = "{CLEARTEXT}" master = qa_config.GetMasterNode() cmd = ["cat", qa_utils.MakeNodePath(master, pathutils.RAPI_USERS_FILE)] file_content = qa_utils.GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)) users = ParsePasswordFile(file_content) entry = users.get(rapi_user) if not entry: raise qa_error.Error("User %s not found in RAPI users file" % rapi_user) secret = entry.password if secret.upper().startswith(CTEXT): secret = secret[len(CTEXT):] elif secret.startswith("{"): raise qa_error.Error("Unsupported password schema for RAPI user %s:" " not a clear text password" % rapi_user) return secret def SetupCluster(rapi_user): """Initializes the cluster. @param rapi_user: Login user for RAPI @return: Login secret for RAPI """ rapi_secret = utils.GenerateSecret() RunTestIf("create-cluster", qa_cluster.TestClusterInit, rapi_user, rapi_secret) if not qa_config.TestEnabled("create-cluster"): # If the cluster is already in place, we assume that exclusive-storage is # already set according to the configuration qa_config.SetExclusiveStorage(qa_config.get("exclusive-storage", False)) if qa_rapi.Enabled(): # To support RAPI on an existing cluster we have to find out the secret rapi_secret = _LookupRapiSecret(rapi_user) # Test on empty cluster RunTestIf("node-list", qa_node.TestNodeList) RunTestIf("instance-list", qa_instance.TestInstanceList) RunTestIf("job-list", qa_job.TestJobList) RunTestIf("create-cluster", qa_node.TestNodeAddAll) if not qa_config.TestEnabled("create-cluster"): # consider the nodes are already there qa_node.MarkNodeAddedAll() RunTestIf("test-jobqueue", qa_cluster.TestJobqueue) RunTestIf("test-jobqueue", qa_job.TestJobCancellation) # enable the watcher (unconditionally) RunTest(qa_daemon.TestResumeWatcher) RunTestIf("node-list", qa_node.TestNodeList) # Test listing fields RunTestIf("node-list", qa_node.TestNodeListFields) RunTestIf("instance-list", qa_instance.TestInstanceListFields) RunTestIf("job-list", qa_job.TestJobListFields) RunTestIf("instance-export", qa_instance.TestBackupListFields) RunTestIf("node-info", qa_node.TestNodeInfo) return rapi_secret def RunClusterTests(): """Runs tests related to gnt-cluster. """ for test, fn in [ ("create-cluster", qa_cluster.TestClusterInitDisk), ("cluster-renew-crypto", qa_cluster.TestClusterRenewCrypto), ("cluster-verify", qa_cluster.TestClusterVerify), ("cluster-reserved-lvs", qa_cluster.TestClusterReservedLvs), # TODO: add more cluster modify tests ("cluster-modify", qa_cluster.TestClusterModifyEmpty), ("cluster-modify", qa_cluster.TestClusterModifyIPolicy), ("cluster-modify", qa_cluster.TestClusterModifyISpecs), ("cluster-modify", qa_cluster.TestClusterModifyBe), ("cluster-modify", qa_cluster.TestClusterModifyDisk), ("cluster-modify", qa_cluster.TestClusterModifyDiskTemplates), ("cluster-modify", qa_cluster.TestClusterModifyFileStorageDir), ("cluster-modify", qa_cluster.TestClusterModifySharedFileStorageDir), ("cluster-rename", qa_cluster.TestClusterRename), ("cluster-info", qa_cluster.TestClusterVersion), ("cluster-info", qa_cluster.TestClusterInfo), ("cluster-info", qa_cluster.TestClusterGetmaster), ("cluster-redist-conf", qa_cluster.TestClusterRedistConf), (["cluster-copyfile", qa_config.NoVirtualCluster], qa_cluster.TestClusterCopyfile), ("cluster-command", qa_cluster.TestClusterCommand), ("cluster-burnin", qa_cluster.TestClusterBurnin), ("cluster-master-failover", qa_cluster.TestClusterMasterFailover), ("cluster-master-failover", qa_cluster.TestClusterMasterFailoverWithDrainedQueue), (["cluster-oob", qa_config.NoVirtualCluster], qa_cluster.TestClusterOob), (qa_rapi.Enabled, qa_rapi.TestVersion), (qa_rapi.Enabled, qa_rapi.TestEmptyCluster), (qa_rapi.Enabled, qa_rapi.TestRapiQuery), ]: RunTestIf(test, fn) def RunRepairDiskSizes(): """Run the repair disk-sizes test. """ RunTestIf("cluster-repair-disk-sizes", qa_cluster.TestClusterRepairDiskSizes) def RunOsTests(): """Runs all tests related to gnt-os. """ os_enabled = ["os", qa_config.NoVirtualCluster] if qa_config.TestEnabled(qa_rapi.Enabled): rapi_getos = qa_rapi.GetOperatingSystems else: rapi_getos = None for fn in [ qa_os.TestOsList, qa_os.TestOsDiagnose, ]: RunTestIf(os_enabled, fn) for fn in [ qa_os.TestOsValid, qa_os.TestOsInvalid, qa_os.TestOsPartiallyValid, ]: RunTestIf(os_enabled, fn, rapi_getos) for fn in [ qa_os.TestOsModifyValid, qa_os.TestOsModifyInvalid, qa_os.TestOsStatesNonExisting, ]: RunTestIf(os_enabled, fn) def RunCommonInstanceTests(instance, inst_nodes): """Runs a few tests that are common to all disk types. """ RunTestIf("instance-shutdown", qa_instance.TestInstanceShutdown, instance) RunTestIf(["instance-shutdown", "instance-console", qa_rapi.Enabled], qa_rapi.TestRapiStoppedInstanceConsole, instance) RunTestIf(["instance-shutdown", "instance-modify"], qa_instance.TestInstanceStoppedModify, instance) RunTestIf("instance-shutdown", qa_instance.TestInstanceStartup, instance) # Test shutdown/start via RAPI RunTestIf(["instance-shutdown", qa_rapi.Enabled], qa_rapi.TestRapiInstanceShutdown, instance) RunTestIf(["instance-shutdown", qa_rapi.Enabled], qa_rapi.TestRapiInstanceStartup, instance) RunTestIf("instance-list", qa_instance.TestInstanceList) RunTestIf("instance-info", qa_instance.TestInstanceInfo, instance) RunTestIf("instance-modify", qa_instance.TestInstanceModify, instance) RunTestIf(["instance-modify", qa_rapi.Enabled], qa_rapi.TestRapiInstanceModify, instance) RunTestIf("instance-console", qa_instance.TestInstanceConsole, instance) RunTestIf(["instance-console", qa_rapi.Enabled], qa_rapi.TestRapiInstanceConsole, instance) RunTestIf("instance-device-names", qa_instance.TestInstanceDeviceNames, instance) DOWN_TESTS = qa_config.Either([ "instance-reinstall", "instance-rename", "instance-grow-disk", ]) # shutdown instance for any 'down' tests RunTestIf(DOWN_TESTS, qa_instance.TestInstanceShutdown, instance) # now run the 'down' state tests RunTestIf("instance-reinstall", qa_instance.TestInstanceReinstall, instance) RunTestIf(["instance-reinstall", qa_rapi.Enabled], qa_rapi.TestRapiInstanceReinstall, instance) if qa_config.TestEnabled("instance-rename"): tgt_instance = qa_config.AcquireInstance() try: rename_source = instance.name rename_target = tgt_instance.name # perform instance rename to the same name RunTest(qa_instance.TestInstanceRenameAndBack, rename_source, rename_source) RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack, rename_source, rename_source) if rename_target is not None: # perform instance rename to a different name, if we have one configured RunTest(qa_instance.TestInstanceRenameAndBack, rename_source, rename_target) RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack, rename_source, rename_target) finally: tgt_instance.Release() RunTestIf(["instance-grow-disk"], qa_instance.TestInstanceGrowDisk, instance) # and now start the instance again RunTestIf(DOWN_TESTS, qa_instance.TestInstanceStartup, instance) RunTestIf("instance-reboot", qa_instance.TestInstanceReboot, instance) RunTestIf("tags", qa_tags.TestInstanceTags, instance) if instance.disk_template == constants.DT_DRBD8: RunTestIf("cluster-verify", qa_cluster.TestClusterVerifyDisksBrokenDRBD, instance, inst_nodes) RunTestIf("cluster-verify", qa_cluster.TestClusterVerify) RunTestIf(qa_rapi.Enabled, qa_rapi.TestInstance, instance) # Lists instances, too RunTestIf("node-list", qa_node.TestNodeList) # Some jobs have been run, let's test listing them RunTestIf("job-list", qa_job.TestJobList) def RunCommonNodeTests(): """Run a few common node tests. """ RunTestIf("node-volumes", qa_node.TestNodeVolumes) RunTestIf("node-storage", qa_node.TestNodeStorage) RunTestIf(["node-oob", qa_config.NoVirtualCluster], qa_node.TestOutOfBand) def RunGroupListTests(): """Run tests for listing node groups. """ RunTestIf("group-list", qa_group.TestGroupList) RunTestIf("group-list", qa_group.TestGroupListFields) def RunNetworkTests(): """Run tests for network management. """ RunTestIf("network", qa_network.TestNetworkAddRemove) RunTestIf("network", qa_network.TestNetworkConnect) RunTestIf(["network", "tags"], qa_network.TestNetworkTags) def RunGroupRwTests(): """Run tests for adding/removing/renaming groups. """ RunTestIf("group-rwops", qa_group.TestGroupAddRemoveRename) RunTestIf("group-rwops", qa_group.TestGroupAddWithOptions) RunTestIf("group-rwops", qa_group.TestGroupModify) RunTestIf(["group-rwops", qa_rapi.Enabled], qa_rapi.TestRapiNodeGroups) RunTestIf(["group-rwops", "tags"], qa_tags.TestGroupTags, qa_group.GetDefaultGroup()) def RunExportImportTests(instance, inodes): """Tries to export and import the instance. @type inodes: list of nodes @param inodes: current nodes of the instance """ # FIXME: export explicitly bails out on file based storage. other non-lvm # based storage types are untested, though. Also note that import could still # work, but is deeply embedded into the "export" case. if (qa_config.TestEnabled("instance-export") and instance.disk_template not in [constants.DT_FILE, constants.DT_SHARED_FILE]): RunTest(qa_instance.TestInstanceExportNoTarget, instance) pnode = inodes[0] expnode = qa_config.AcquireNode(exclude=pnode) try: name = RunTest(qa_instance.TestInstanceExport, instance, expnode) RunTest(qa_instance.TestBackupList, expnode) if qa_config.TestEnabled("instance-import"): newinst = qa_config.AcquireInstance() try: RunTest(qa_instance.TestInstanceImport, newinst, pnode, expnode, name) # Check if starting the instance works RunTest(qa_instance.TestInstanceStartup, newinst) RunTest(qa_instance.TestInstanceRemove, newinst) finally: newinst.Release() finally: expnode.Release() # FIXME: inter-cluster-instance-move crashes on file based instances :/ # See Issue 414. if (qa_config.TestEnabled([qa_rapi.Enabled, "inter-cluster-instance-move"]) and (instance.disk_template not in [constants.DT_FILE, constants.DT_SHARED_FILE])): newinst = qa_config.AcquireInstance() try: tnode = qa_config.AcquireNode(exclude=inodes) try: RunTest(qa_rapi.TestInterClusterInstanceMove, instance, newinst, inodes, tnode) finally: tnode.Release() finally: newinst.Release() def RunDaemonTests(instance): """Test the ganeti-watcher script. """ RunTest(qa_daemon.TestPauseWatcher) RunTestIf("instance-automatic-restart", qa_daemon.TestInstanceAutomaticRestart, instance) RunTestIf("instance-consecutive-failures", qa_daemon.TestInstanceConsecutiveFailures, instance) RunTest(qa_daemon.TestResumeWatcher) def RunHardwareFailureTests(instance, inodes): """Test cluster internal hardware failure recovery. """ RunTestIf("instance-failover", qa_instance.TestInstanceFailover, instance) RunTestIf(["instance-failover", qa_rapi.Enabled], qa_rapi.TestRapiInstanceFailover, instance) RunTestIf("instance-migrate", qa_instance.TestInstanceMigrate, instance) RunTestIf(["instance-migrate", qa_rapi.Enabled], qa_rapi.TestRapiInstanceMigrate, instance) if qa_config.TestEnabled("instance-replace-disks"): # We just need alternative secondary nodes, hence "- 1" othernodes = qa_config.AcquireManyNodes(len(inodes) - 1, exclude=inodes) try: RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceReplaceDisks, instance) RunTest(qa_instance.TestReplaceDisks, instance, inodes, othernodes) finally: qa_config.ReleaseManyNodes(othernodes) del othernodes if qa_config.TestEnabled("instance-recreate-disks"): try: acquirednodes = qa_config.AcquireManyNodes(len(inodes), exclude=inodes) othernodes = acquirednodes except qa_error.OutOfNodesError: if len(inodes) > 1: # If the cluster is not big enough, let's reuse some of the nodes, but # with different roles. In this way, we can test a DRBD instance even on # a 3-node cluster. acquirednodes = [qa_config.AcquireNode(exclude=inodes)] othernodes = acquirednodes + inodes[:-1] else: raise try: RunTest(qa_instance.TestRecreateDisks, instance, inodes, othernodes) finally: qa_config.ReleaseManyNodes(acquirednodes) if len(inodes) >= 2: RunTestIf("node-evacuate", qa_node.TestNodeEvacuate, inodes[0], inodes[1]) RunTestIf("node-failover", qa_node.TestNodeFailover, inodes[0], inodes[1]) RunTestIf("node-migrate", qa_node.TestNodeMigrate, inodes[0], inodes[1]) def RunExclusiveStorageTests(): """Test exclusive storage.""" if not qa_config.TestEnabled("cluster-exclusive-storage"): return node = qa_config.AcquireNode() try: old_es = qa_cluster.TestSetExclStorCluster(False) qa_node.TestExclStorSingleNode(node) qa_cluster.TestSetExclStorCluster(True) qa_cluster.TestExclStorSharedPv(node) if qa_config.TestEnabled("instance-add-plain-disk"): # Make sure that the cluster doesn't have any pre-existing problem qa_cluster.AssertClusterVerify() # Create and allocate instances instance1 = qa_instance.TestInstanceAddWithPlainDisk([node]) try: instance2 = qa_instance.TestInstanceAddWithPlainDisk([node]) try: # cluster-verify checks that disks are allocated correctly qa_cluster.AssertClusterVerify() # Remove instances qa_instance.TestInstanceRemove(instance2) qa_instance.TestInstanceRemove(instance1) finally: instance2.Release() finally: instance1.Release() if qa_config.TestEnabled("instance-add-drbd-disk"): snode = qa_config.AcquireNode() try: qa_cluster.TestSetExclStorCluster(False) instance = qa_instance.TestInstanceAddWithDrbdDisk([node, snode]) try: qa_cluster.TestSetExclStorCluster(True) exp_err = [constants.CV_EINSTANCEUNSUITABLENODE] qa_cluster.AssertClusterVerify(fail=True, errors=exp_err) qa_instance.TestInstanceRemove(instance) finally: instance.Release() finally: snode.Release() qa_cluster.TestSetExclStorCluster(old_es) finally: node.Release() def _BuildSpecDict(par, mn, st, mx): return { constants.ISPECS_MINMAX: [{ constants.ISPECS_MIN: {par: mn}, constants.ISPECS_MAX: {par: mx}, }], constants.ISPECS_STD: {par: st}, } def _BuildDoubleSpecDict(index, par, mn, st, mx): new_spec = { constants.ISPECS_MINMAX: [{}, {}], } if st is not None: new_spec[constants.ISPECS_STD] = {par: st} new_spec[constants.ISPECS_MINMAX][index] = { constants.ISPECS_MIN: {par: mn}, constants.ISPECS_MAX: {par: mx}, } return new_spec def TestIPolicyPlainInstance(): """Test instance policy interaction with instances""" params = ["memory-size", "cpu-count", "disk-count", "disk-size", "nic-count"] if not qa_config.IsTemplateSupported(constants.DT_PLAIN): print "Template %s not supported" % constants.DT_PLAIN return # This test assumes that the group policy is empty (_, old_specs) = qa_cluster.TestClusterSetISpecs() # We also assume to have only one min/max bound assert len(old_specs[constants.ISPECS_MINMAX]) == 1 node = qa_config.AcquireNode() try: # Log of policy changes, list of tuples: # (full_change, incremental_change, policy_violated) history = [] instance = qa_instance.TestInstanceAddWithPlainDisk([node]) try: policyerror = [constants.CV_EINSTANCEPOLICY] for par in params: (iminval, imaxval) = qa_instance.GetInstanceSpec(instance.name, par) # Some specs must be multiple of 4 new_spec = _BuildSpecDict(par, imaxval + 4, imaxval + 4, imaxval + 4) history.append((None, new_spec, True)) if iminval > 0: # Some specs must be multiple of 4 if iminval >= 4: upper = iminval - 4 else: upper = iminval - 1 new_spec = _BuildSpecDict(par, 0, upper, upper) history.append((None, new_spec, True)) history.append((old_specs, None, False)) # Test with two instance specs double_specs = copy.deepcopy(old_specs) double_specs[constants.ISPECS_MINMAX] = \ double_specs[constants.ISPECS_MINMAX] * 2 (par1, par2) = params[0:2] (_, imaxval1) = qa_instance.GetInstanceSpec(instance.name, par1) (_, imaxval2) = qa_instance.GetInstanceSpec(instance.name, par2) old_minmax = old_specs[constants.ISPECS_MINMAX][0] history.extend([ (double_specs, None, False), # The first min/max limit is being violated (None, _BuildDoubleSpecDict(0, par1, imaxval1 + 4, imaxval1 + 4, imaxval1 + 4), False), # Both min/max limits are being violated (None, _BuildDoubleSpecDict(1, par2, imaxval2 + 4, None, imaxval2 + 4), True), # The second min/max limit is being violated (None, _BuildDoubleSpecDict(0, par1, old_minmax[constants.ISPECS_MIN][par1], old_specs[constants.ISPECS_STD][par1], old_minmax[constants.ISPECS_MAX][par1]), False), (old_specs, None, False), ]) # Apply the changes, and check policy violations after each change qa_cluster.AssertClusterVerify() for (new_specs, diff_specs, failed) in history: qa_cluster.TestClusterSetISpecs(new_specs=new_specs, diff_specs=diff_specs) if failed: qa_cluster.AssertClusterVerify(warnings=policyerror) else: qa_cluster.AssertClusterVerify() qa_instance.TestInstanceRemove(instance) finally: instance.Release() # Now we replay the same policy changes, and we expect that the instance # cannot be created for the cases where we had a policy violation above for (new_specs, diff_specs, failed) in history: qa_cluster.TestClusterSetISpecs(new_specs=new_specs, diff_specs=diff_specs) if failed: qa_instance.TestInstanceAddWithPlainDisk([node], fail=True) # Instance creation with no policy violation has been tested already finally: node.Release() def IsExclusiveStorageInstanceTestEnabled(): test_name = "exclusive-storage-instance-tests" if qa_config.TestEnabled(test_name): vgname = qa_config.get("vg-name", constants.DEFAULT_VG) vgscmd = utils.ShellQuoteArgs([ "vgs", "--noheadings", "-o", "pv_count", vgname, ]) nodes = qa_config.GetConfig()["nodes"] for node in nodes: try: pvnum = int(qa_utils.GetCommandOutput(node.primary, vgscmd)) except Exception, e: msg = ("Cannot get the number of PVs on %s, needed by '%s': %s" % (node.primary, test_name, e)) raise qa_error.Error(msg) if pvnum < 2: raise qa_error.Error("Node %s has not enough PVs (%s) to run '%s'" % (node.primary, pvnum, test_name)) res = True else: res = False return res def RunInstanceTests(): """Create and exercise instances.""" instance_tests = [ ("instance-add-plain-disk", constants.DT_PLAIN, qa_instance.TestInstanceAddWithPlainDisk, 1), ("instance-add-drbd-disk", constants.DT_DRBD8, qa_instance.TestInstanceAddWithDrbdDisk, 2), ("instance-add-diskless", constants.DT_DISKLESS, qa_instance.TestInstanceAddDiskless, 1), ("instance-add-file", constants.DT_FILE, qa_instance.TestInstanceAddFile, 1), ("instance-add-shared-file", constants.DT_SHARED_FILE, qa_instance.TestInstanceAddSharedFile, 1), ] for (test_name, templ, create_fun, num_nodes) in instance_tests: if (qa_config.TestEnabled(test_name) and qa_config.IsTemplateSupported(templ)): inodes = qa_config.AcquireManyNodes(num_nodes) try: instance = RunTest(create_fun, inodes) try: RunTestIf("cluster-epo", qa_cluster.TestClusterEpo) RunDaemonTests(instance) for node in inodes: RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, node) if len(inodes) > 1: RunTestIf("group-rwops", qa_group.TestAssignNodesIncludingSplit, constants.INITIAL_NODE_GROUP_NAME, inodes[0].primary, inodes[1].primary) if qa_config.TestEnabled("instance-convert-disk"): RunTest(qa_instance.TestInstanceShutdown, instance) RunTest(qa_instance.TestInstanceConvertDiskToPlain, instance, inodes) RunTest(qa_instance.TestInstanceStartup, instance) RunTestIf("instance-modify-disks", qa_instance.TestInstanceModifyDisks, instance) RunCommonInstanceTests(instance, inodes) if qa_config.TestEnabled("instance-modify-primary"): othernode = qa_config.AcquireNode() RunTest(qa_instance.TestInstanceModifyPrimaryAndBack, instance, inodes[0], othernode) othernode.Release() RunGroupListTests() RunExportImportTests(instance, inodes) RunHardwareFailureTests(instance, inodes) RunRepairDiskSizes() RunTest(qa_instance.TestInstanceRemove, instance) finally: instance.Release() del instance finally: qa_config.ReleaseManyNodes(inodes) qa_cluster.AssertClusterVerify() def RunMonitoringTests(): if qa_config.TestEnabled("mon-collector"): RunTest(qa_monitoring.TestInstStatusCollector) def RunQa(): """Main QA body. """ rapi_user = "ganeti-qa" RunEnvTests() rapi_secret = SetupCluster(rapi_user) if qa_rapi.Enabled(): # Load RAPI certificate qa_rapi.Setup(rapi_user, rapi_secret) RunClusterTests() RunOsTests() RunTestIf("tags", qa_tags.TestClusterTags) RunCommonNodeTests() RunGroupListTests() RunGroupRwTests() RunNetworkTests() # The master shouldn't be readded or put offline; "delay" needs a non-master # node to test pnode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode()) try: RunTestIf("node-readd", qa_node.TestNodeReadd, pnode) RunTestIf("node-modify", qa_node.TestNodeModify, pnode) RunTestIf("delay", qa_cluster.TestDelay, pnode) finally: pnode.Release() # Make sure the cluster is clean before running instance tests qa_cluster.AssertClusterVerify() pnode = qa_config.AcquireNode() try: RunTestIf("tags", qa_tags.TestNodeTags, pnode) if qa_rapi.Enabled(): RunTest(qa_rapi.TestNode, pnode) if (qa_config.TestEnabled("instance-add-plain-disk") and qa_config.IsTemplateSupported(constants.DT_PLAIN)): for use_client in [True, False]: rapi_instance = RunTest(qa_rapi.TestRapiInstanceAdd, pnode, use_client) try: if qa_config.TestEnabled("instance-plain-rapi-common-tests"): RunCommonInstanceTests(rapi_instance, [pnode]) RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client) finally: rapi_instance.Release() del rapi_instance finally: pnode.Release() config_list = [ ("default-instance-tests", lambda: None, lambda _: None), (IsExclusiveStorageInstanceTestEnabled, lambda: qa_cluster.TestSetExclStorCluster(True), qa_cluster.TestSetExclStorCluster), ] for (conf_name, setup_conf_f, restore_conf_f) in config_list: if qa_config.TestEnabled(conf_name): oldconf = setup_conf_f() RunInstanceTests() restore_conf_f(oldconf) pnode = qa_config.AcquireNode() try: if qa_config.TestEnabled(["instance-add-plain-disk", "instance-export"]): for shutdown in [False, True]: instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, [pnode]) try: expnode = qa_config.AcquireNode(exclude=pnode) try: if shutdown: # Stop instance before exporting and removing it RunTest(qa_instance.TestInstanceShutdown, instance) RunTest(qa_instance.TestInstanceExportWithRemove, instance, expnode) RunTest(qa_instance.TestBackupList, expnode) finally: expnode.Release() finally: instance.Release() del expnode del instance qa_cluster.AssertClusterVerify() finally: pnode.Release() RunExclusiveStorageTests() RunTestIf(["cluster-instance-policy", "instance-add-plain-disk"], TestIPolicyPlainInstance) RunTestIf( "instance-add-restricted-by-disktemplates", qa_instance.TestInstanceCreationRestrictedByDiskTemplates) # Test removing instance with offline drbd secondary if qa_config.TestEnabled(["instance-remove-drbd-offline", "instance-add-drbd-disk"]): # Make sure the master is not put offline snode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode()) try: pnode = qa_config.AcquireNode(exclude=snode) try: instance = qa_instance.TestInstanceAddWithDrbdDisk([pnode, snode]) set_offline = lambda node: qa_node.MakeNodeOffline(node, "yes") set_online = lambda node: qa_node.MakeNodeOffline(node, "no") RunTest(qa_instance.TestRemoveInstanceOfflineNode, instance, snode, set_offline, set_online) finally: pnode.Release() finally: snode.Release() qa_cluster.AssertClusterVerify() RunMonitoringTests() RunTestIf("create-cluster", qa_node.TestNodeRemoveAll) RunTestIf("cluster-destroy", qa_cluster.TestClusterDestroy) @UsesRapiClient def main(): """Main program. """ parser = optparse.OptionParser(usage="%prog [options] ") parser.add_option("--yes-do-it", dest="yes_do_it", action="store_true", help="Really execute the tests") (opts, args) = parser.parse_args() if len(args) == 1: (config_file, ) = args else: parser.error("Wrong number of arguments.") if not opts.yes_do_it: print ("Executing this script irreversibly destroys any Ganeti\n" "configuration on all nodes involved. If you really want\n" "to start testing, supply the --yes-do-it option.") sys.exit(1) qa_config.Load(config_file) primary = qa_config.GetMasterNode().primary qa_utils.StartMultiplexer(primary) print ("SSH command for primary node: %s" % utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, ""))) print ("SSH command for other nodes: %s" % utils.ShellQuoteArgs(qa_utils.GetSSHCommand("NODE", ""))) try: RunQa() finally: qa_utils.CloseMultiplexers() if __name__ == "__main__": main() ganeti-2.9.3/qa/qa_config.py0000644000000000000000000004565412271422343015711 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """QA configuration. """ import os from ganeti import constants from ganeti import utils from ganeti import serializer from ganeti import compat from ganeti import ht import qa_error _INSTANCE_CHECK_KEY = "instance-check" _ENABLED_HV_KEY = "enabled-hypervisors" _VCLUSTER_MASTER_KEY = "vcluster-master" _VCLUSTER_BASEDIR_KEY = "vcluster-basedir" _ENABLED_DISK_TEMPLATES_KEY = "enabled-disk-templates" #: QA configuration (L{_QaConfig}) _config = None class _QaInstance(object): __slots__ = [ "name", "nicmac", "_used", "_disk_template", ] def __init__(self, name, nicmac): """Initializes instances of this class. """ self.name = name self.nicmac = nicmac self._used = None self._disk_template = None @classmethod def FromDict(cls, data): """Creates instance object from JSON dictionary. """ nicmac = [] macaddr = data.get("nic.mac/0") if macaddr: nicmac.append(macaddr) return cls(name=data["name"], nicmac=nicmac) def __repr__(self): status = [ "%s.%s" % (self.__class__.__module__, self.__class__.__name__), "name=%s" % self.name, "nicmac=%s" % self.nicmac, "used=%s" % self._used, "disk_template=%s" % self._disk_template, ] return "<%s at %#x>" % (" ".join(status), id(self)) def Use(self): """Marks instance as being in use. """ assert not self._used assert self._disk_template is None self._used = True def Release(self): """Releases instance and makes it available again. """ assert self._used, \ ("Instance '%s' was never acquired or released more than once" % self.name) self._used = False self._disk_template = None def GetNicMacAddr(self, idx, default): """Returns MAC address for NIC. @type idx: int @param idx: NIC index @param default: Default value """ if len(self.nicmac) > idx: return self.nicmac[idx] else: return default def SetDiskTemplate(self, template): """Set the disk template. """ assert template in constants.DISK_TEMPLATES self._disk_template = template @property def used(self): """Returns boolean denoting whether instance is in use. """ return self._used @property def disk_template(self): """Returns the current disk template. """ return self._disk_template class _QaNode(object): __slots__ = [ "primary", "secondary", "_added", "_use_count", ] def __init__(self, primary, secondary): """Initializes instances of this class. """ self.primary = primary self.secondary = secondary self._added = False self._use_count = 0 @classmethod def FromDict(cls, data): """Creates node object from JSON dictionary. """ return cls(primary=data["primary"], secondary=data.get("secondary")) def __repr__(self): status = [ "%s.%s" % (self.__class__.__module__, self.__class__.__name__), "primary=%s" % self.primary, "secondary=%s" % self.secondary, "added=%s" % self._added, "use_count=%s" % self._use_count, ] return "<%s at %#x>" % (" ".join(status), id(self)) def Use(self): """Marks a node as being in use. """ assert self._use_count >= 0 self._use_count += 1 return self def Release(self): """Release a node (opposite of L{Use}). """ assert self.use_count > 0 self._use_count -= 1 def MarkAdded(self): """Marks node as having been added to a cluster. """ assert not self._added self._added = True def MarkRemoved(self): """Marks node as having been removed from a cluster. """ assert self._added self._added = False @property def added(self): """Returns whether a node is part of a cluster. """ return self._added @property def use_count(self): """Returns number of current uses (controlled by L{Use} and L{Release}). """ return self._use_count _RESOURCE_CONVERTER = { "instances": _QaInstance.FromDict, "nodes": _QaNode.FromDict, } def _ConvertResources((key, value)): """Converts cluster resources in configuration to Python objects. """ fn = _RESOURCE_CONVERTER.get(key, None) if fn: return (key, map(fn, value)) else: return (key, value) class _QaConfig(object): def __init__(self, data): """Initializes instances of this class. """ self._data = data #: Cluster-wide run-time value of the exclusive storage flag self._exclusive_storage = None @classmethod def Load(cls, filename): """Loads a configuration file and produces a configuration object. @type filename: string @param filename: Path to configuration file @rtype: L{_QaConfig} """ data = serializer.LoadJson(utils.ReadFile(filename)) result = cls(dict(map(_ConvertResources, data.items()))) # pylint: disable=E1103 result.Validate() return result def Validate(self): """Validates loaded configuration data. """ if not self.get("name"): raise qa_error.Error("Cluster name is required") if not self.get("nodes"): raise qa_error.Error("Need at least one node") if not self.get("instances"): raise qa_error.Error("Need at least one instance") disks = self.GetDiskOptions() if disks is None: raise qa_error.Error("Config option 'disks' must exist") else: for d in disks: if d.get("size") is None or d.get("growth") is None: raise qa_error.Error("Config options `size` and `growth` must exist" " for all `disks` items") check = self.GetInstanceCheckScript() if check: try: os.stat(check) except EnvironmentError, err: raise qa_error.Error("Can't find instance check script '%s': %s" % (check, err)) enabled_hv = frozenset(self.GetEnabledHypervisors()) if not enabled_hv: raise qa_error.Error("No hypervisor is enabled") difference = enabled_hv - constants.HYPER_TYPES if difference: raise qa_error.Error("Unknown hypervisor(s) enabled: %s" % utils.CommaJoin(difference)) (vc_master, vc_basedir) = self.GetVclusterSettings() if bool(vc_master) != bool(vc_basedir): raise qa_error.Error("All or none of the config options '%s' and '%s'" " must be set" % (_VCLUSTER_MASTER_KEY, _VCLUSTER_BASEDIR_KEY)) if vc_basedir and not utils.IsNormAbsPath(vc_basedir): raise qa_error.Error("Path given in option '%s' must be absolute and" " normalized" % _VCLUSTER_BASEDIR_KEY) def __getitem__(self, name): """Returns configuration value. @type name: string @param name: Name of configuration entry """ return self._data[name] def __setitem__(self, key, value): """Sets a configuration value. """ self._data[key] = value def __delitem__(self, key): """Deletes a value from the configuration. """ del(self._data[key]) def __len__(self): """Return the number of configuration items. """ return len(self._data) def get(self, name, default=None): """Returns configuration value. @type name: string @param name: Name of configuration entry @param default: Default value """ return self._data.get(name, default) def GetMasterNode(self): """Returns the default master node for the cluster. """ return self["nodes"][0] def GetInstanceCheckScript(self): """Returns path to instance check script or C{None}. """ return self._data.get(_INSTANCE_CHECK_KEY, None) def GetEnabledHypervisors(self): """Returns list of enabled hypervisors. @rtype: list """ return self._GetStringListParameter( _ENABLED_HV_KEY, [constants.DEFAULT_ENABLED_HYPERVISOR]) def GetDefaultHypervisor(self): """Returns the default hypervisor to be used. """ return self.GetEnabledHypervisors()[0] def GetEnabledDiskTemplates(self): """Returns the list of enabled disk templates. @rtype: list """ return self._GetStringListParameter( _ENABLED_DISK_TEMPLATES_KEY, constants.DEFAULT_ENABLED_DISK_TEMPLATES) def GetEnabledStorageTypes(self): """Returns the list of enabled storage types. @rtype: list @returns: the list of storage types enabled for QA """ enabled_disk_templates = self.GetEnabledDiskTemplates() enabled_storage_types = list( set([constants.MAP_DISK_TEMPLATE_STORAGE_TYPE[dt] for dt in enabled_disk_templates])) # Storage type 'lvm-pv' cannot be activated via a disk template, # therefore we add it if 'lvm-vg' is present. if constants.ST_LVM_VG in enabled_storage_types: enabled_storage_types.append(constants.ST_LVM_PV) return enabled_storage_types def GetDefaultDiskTemplate(self): """Returns the default disk template to be used. """ return self.GetEnabledDiskTemplates()[0] def _GetStringListParameter(self, key, default_values): """Retrieves a parameter's value that is supposed to be a list of strings. @rtype: list """ try: value = self._data[key] except KeyError: return default_values else: if value is None: return [] elif isinstance(value, basestring): return value.split(",") else: return value def SetExclusiveStorage(self, value): """Set the expected value of the C{exclusive_storage} flag for the cluster. """ self._exclusive_storage = bool(value) def GetExclusiveStorage(self): """Get the expected value of the C{exclusive_storage} flag for the cluster. """ value = self._exclusive_storage assert value is not None return value def IsTemplateSupported(self, templ): """Is the given disk template supported by the current configuration? """ enabled = templ in self.GetEnabledDiskTemplates() return enabled and (not self.GetExclusiveStorage() or templ in constants.DTS_EXCL_STORAGE) def IsStorageTypeSupported(self, storage_type): """Is the given storage type supported by the current configuration? This is determined by looking if at least one of the disk templates which is associated with the storage type is enabled in the configuration. """ enabled_disk_templates = self.GetEnabledDiskTemplates() if storage_type == constants.ST_LVM_PV: disk_templates = utils.GetDiskTemplatesOfStorageType(constants.ST_LVM_VG) else: disk_templates = utils.GetDiskTemplatesOfStorageType(storage_type) return bool(set(enabled_disk_templates).intersection(set(disk_templates))) def AreSpindlesSupported(self): """Are spindles supported by the current configuration? """ return self.GetExclusiveStorage() def GetVclusterSettings(self): """Returns settings for virtual cluster. """ master = self.get(_VCLUSTER_MASTER_KEY) basedir = self.get(_VCLUSTER_BASEDIR_KEY) return (master, basedir) def GetDiskOptions(self): """Return options for the disks of the instances. Get 'disks' parameter from the configuration data. If 'disks' is missing, try to create it from the legacy 'disk' and 'disk-growth' parameters. """ try: return self._data["disks"] except KeyError: pass # Legacy interface sizes = self._data.get("disk") growths = self._data.get("disk-growth") if sizes or growths: if (sizes is None or growths is None or len(sizes) != len(growths)): raise qa_error.Error("Config options 'disk' and 'disk-growth' must" " exist and have the same number of items") disks = [] for (size, growth) in zip(sizes, growths): disks.append({"size": size, "growth": growth}) return disks else: return None def Load(path): """Loads the passed configuration file. """ global _config # pylint: disable=W0603 _config = _QaConfig.Load(path) def GetConfig(): """Returns the configuration object. """ if _config is None: raise RuntimeError("Configuration not yet loaded") return _config def get(name, default=None): """Wrapper for L{_QaConfig.get}. """ return GetConfig().get(name, default=default) class Either: def __init__(self, tests): """Initializes this class. @type tests: list or string @param tests: List of test names @see: L{TestEnabled} for details """ self.tests = tests def _MakeSequence(value): """Make sequence of single argument. If the single argument is not already a list or tuple, a list with the argument as a single item is returned. """ if isinstance(value, (list, tuple)): return value else: return [value] def _TestEnabledInner(check_fn, names, fn): """Evaluate test conditions. @type check_fn: callable @param check_fn: Callback to check whether a test is enabled @type names: sequence or string @param names: Test name(s) @type fn: callable @param fn: Aggregation function @rtype: bool @return: Whether test is enabled """ names = _MakeSequence(names) result = [] for name in names: if isinstance(name, Either): value = _TestEnabledInner(check_fn, name.tests, compat.any) elif isinstance(name, (list, tuple)): value = _TestEnabledInner(check_fn, name, compat.all) elif callable(name): value = name() else: value = check_fn(name) result.append(value) return fn(result) def TestEnabled(tests, _cfg=None): """Returns True if the given tests are enabled. @param tests: A single test as a string, or a list of tests to check; can contain L{Either} for OR conditions, AND is default """ if _cfg is None: cfg = GetConfig() else: cfg = _cfg # Get settings for all tests cfg_tests = cfg.get("tests", {}) # Get default setting default = cfg_tests.get("default", True) return _TestEnabledInner(lambda name: cfg_tests.get(name, default), tests, compat.all) def GetInstanceCheckScript(*args): """Wrapper for L{_QaConfig.GetInstanceCheckScript}. """ return GetConfig().GetInstanceCheckScript(*args) def GetEnabledHypervisors(*args): """Wrapper for L{_QaConfig.GetEnabledHypervisors}. """ return GetConfig().GetEnabledHypervisors(*args) def GetDefaultHypervisor(*args): """Wrapper for L{_QaConfig.GetDefaultHypervisor}. """ return GetConfig().GetDefaultHypervisor(*args) def GetEnabledDiskTemplates(*args): """Wrapper for L{_QaConfig.GetEnabledDiskTemplates}. """ return GetConfig().GetEnabledDiskTemplates(*args) def GetEnabledStorageTypes(*args): """Wrapper for L{_QaConfig.GetEnabledStorageTypes}. """ return GetConfig().GetEnabledStorageTypes(*args) def GetDefaultDiskTemplate(*args): """Wrapper for L{_QaConfig.GetDefaultDiskTemplate}. """ return GetConfig().GetDefaultDiskTemplate(*args) def GetMasterNode(): """Wrapper for L{_QaConfig.GetMasterNode}. """ return GetConfig().GetMasterNode() def AcquireInstance(_cfg=None): """Returns an instance which isn't in use. """ if _cfg is None: cfg = GetConfig() else: cfg = _cfg # Filter out unwanted instances instances = filter(lambda inst: not inst.used, cfg["instances"]) if not instances: raise qa_error.OutOfInstancesError("No instances left") instance = instances[0] instance.Use() return instance def SetExclusiveStorage(value): """Wrapper for L{_QaConfig.SetExclusiveStorage}. """ return GetConfig().SetExclusiveStorage(value) def GetExclusiveStorage(): """Wrapper for L{_QaConfig.GetExclusiveStorage}. """ return GetConfig().GetExclusiveStorage() def IsTemplateSupported(templ): """Wrapper for L{_QaConfig.IsTemplateSupported}. """ return GetConfig().IsTemplateSupported(templ) def IsStorageTypeSupported(storage_type): """Wrapper for L{_QaConfig.IsTemplateSupported}. """ return GetConfig().IsStorageTypeSupported(storage_type) def AreSpindlesSupported(): """Wrapper for L{_QaConfig.AreSpindlesSupported}. """ return GetConfig().AreSpindlesSupported() def _NodeSortKey(node): """Returns sort key for a node. @type node: L{_QaNode} """ return (node.use_count, utils.NiceSortKey(node.primary)) def AcquireNode(exclude=None, _cfg=None): """Returns the least used node. """ if _cfg is None: cfg = GetConfig() else: cfg = _cfg master = cfg.GetMasterNode() # Filter out unwanted nodes # TODO: Maybe combine filters if exclude is None: nodes = cfg["nodes"][:] elif isinstance(exclude, (list, tuple)): nodes = filter(lambda node: node not in exclude, cfg["nodes"]) else: nodes = filter(lambda node: node != exclude, cfg["nodes"]) nodes = filter(lambda node: node.added or node == master, nodes) if not nodes: raise qa_error.OutOfNodesError("No nodes left") # Return node with least number of uses return sorted(nodes, key=_NodeSortKey)[0].Use() def AcquireManyNodes(num, exclude=None): """Return the least used nodes. @type num: int @param num: Number of nodes; can be 0. @type exclude: list of nodes or C{None} @param exclude: nodes to be excluded from the choice @rtype: list of nodes @return: C{num} different nodes """ nodes = [] if exclude is None: exclude = [] elif isinstance(exclude, (list, tuple)): # Don't modify the incoming argument exclude = list(exclude) else: exclude = [exclude] try: for _ in range(0, num): n = AcquireNode(exclude=exclude) nodes.append(n) exclude.append(n) except qa_error.OutOfNodesError: ReleaseManyNodes(nodes) raise return nodes def ReleaseManyNodes(nodes): for node in nodes: node.Release() def GetVclusterSettings(): """Wrapper for L{_QaConfig.GetVclusterSettings}. """ return GetConfig().GetVclusterSettings() def UseVirtualCluster(_cfg=None): """Returns whether a virtual cluster is used. @rtype: bool """ if _cfg is None: cfg = GetConfig() else: cfg = _cfg (master, _) = cfg.GetVclusterSettings() return bool(master) @ht.WithDesc("No virtual cluster") def NoVirtualCluster(): """Used to disable tests for virtual clusters. """ return not UseVirtualCluster() def GetDiskOptions(): """Wrapper for L{_QaConfig.GetDiskOptions}. """ return GetConfig().GetDiskOptions() ganeti-2.9.3/qa/qa_daemon.py0000644000000000000000000001011312244641676015701 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2008, 2009, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Daemon related QA tests. """ import time from ganeti import utils from ganeti import pathutils import qa_config import qa_utils import qa_error from qa_utils import AssertMatch, AssertCommand, StartSSH, GetCommandOutput def _InstanceRunning(name): """Checks whether an instance is running. @param name: full name of the instance """ master = qa_config.GetMasterNode() cmd = (utils.ShellQuoteArgs(["gnt-instance", "list", "-o", "status", name]) + ' | grep running') ret = StartSSH(master.primary, cmd).wait() return ret == 0 def _ShutdownInstance(name): """Shuts down instance without recording state and waits for completion. @param name: full name of the instance """ AssertCommand(["gnt-instance", "shutdown", "--no-remember", name]) if _InstanceRunning(name): raise qa_error.Error("instance shutdown failed") def _StartInstance(name): """Starts instance and waits for completion. @param name: full name of the instance """ AssertCommand(["gnt-instance", "start", name]) if not bool(_InstanceRunning(name)): raise qa_error.Error("instance start failed") def _ResetWatcherDaemon(): """Removes the watcher daemon's state file. """ path = \ qa_utils.MakeNodePath(qa_config.GetMasterNode(), pathutils.WATCHER_GROUP_STATE_FILE % "*-*-*-*") AssertCommand(["bash", "-c", "rm -vf %s" % path]) def _RunWatcherDaemon(): """Runs the ganeti-watcher daemon on the master node. """ AssertCommand(["ganeti-watcher", "-d", "--ignore-pause", "--wait-children"]) def TestPauseWatcher(): """Tests and pauses the watcher. """ master = qa_config.GetMasterNode() AssertCommand(["gnt-cluster", "watcher", "pause", "4h"]) cmd = ["gnt-cluster", "watcher", "info"] output = GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)) AssertMatch(output, r"^.*\bis paused\b.*") def TestResumeWatcher(): """Tests and unpauses the watcher. """ master = qa_config.GetMasterNode() AssertCommand(["gnt-cluster", "watcher", "continue"]) cmd = ["gnt-cluster", "watcher", "info"] output = GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)) AssertMatch(output, r"^.*\bis not paused\b.*") def TestInstanceAutomaticRestart(instance): """Test automatic restart of instance by ganeti-watcher. """ inst_name = qa_utils.ResolveInstanceName(instance.name) _ResetWatcherDaemon() _ShutdownInstance(inst_name) _RunWatcherDaemon() time.sleep(5) if not _InstanceRunning(inst_name): raise qa_error.Error("Daemon didn't restart instance") AssertCommand(["gnt-instance", "info", inst_name]) def TestInstanceConsecutiveFailures(instance): """Test five consecutive instance failures. """ inst_name = qa_utils.ResolveInstanceName(instance.name) inst_was_running = bool(_InstanceRunning(inst_name)) _ResetWatcherDaemon() for should_start in ([True] * 5) + [False]: _ShutdownInstance(inst_name) _RunWatcherDaemon() time.sleep(5) if bool(_InstanceRunning(inst_name)) != should_start: if should_start: msg = "Instance not started when it should" else: msg = "Instance started when it shouldn't" raise qa_error.Error(msg) AssertCommand(["gnt-instance", "info", inst_name]) if inst_was_running: _StartInstance(inst_name) ganeti-2.9.3/qa/qa_os.py0000644000000000000000000001555412244641676015075 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2008, 2009, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """OS related QA tests. """ import os import os.path from ganeti import utils from ganeti import constants from ganeti import pathutils import qa_config import qa_utils import qa_error from qa_utils import AssertCommand, AssertIn, AssertNotIn _TEMP_OS_NAME = "TEMP-Ganeti-QA-OS" _TEMP_OS_PATH = os.path.join(pathutils.OS_SEARCH_PATH[0], _TEMP_OS_NAME) (_ALL_VALID, _ALL_INVALID, _PARTIALLY_VALID) = range(1, 4) def TestOsList(): """gnt-os list""" AssertCommand(["gnt-os", "list"]) def TestOsDiagnose(): """gnt-os diagnose""" AssertCommand(["gnt-os", "diagnose"]) def _TestOsModify(hvp_dict, fail=False): """gnt-os modify""" cmd = ["gnt-os", "modify"] for hv_name, hv_params in hvp_dict.items(): cmd.append("-H") options = [] for key, value in hv_params.items(): options.append("%s=%s" % (key, value)) cmd.append("%s:%s" % (hv_name, ",".join(options))) cmd.append(_TEMP_OS_NAME) AssertCommand(cmd, fail=fail) def _TestOsStates(os_name): """gnt-os modify, more stuff""" cmd = ["gnt-os", "modify"] for param in ["hidden", "blacklisted"]: for val in ["yes", "no"]: new_cmd = cmd + ["--%s" % param, val, os_name] AssertCommand(new_cmd) # check that double-running the command is OK AssertCommand(new_cmd) def _SetupTempOs(node, dirname, variant, valid): """Creates a temporary OS definition on the given node. """ sq = utils.ShellQuoteArgs parts = [ sq(["rm", "-rf", dirname]), sq(["mkdir", "-p", dirname]), sq(["cd", dirname]), sq(["ln", "-fs", "/bin/true", "export"]), sq(["ln", "-fs", "/bin/true", "import"]), sq(["ln", "-fs", "/bin/true", "rename"]), sq(["ln", "-fs", "/bin/true", "verify"]), ] if valid: parts.append(sq(["ln", "-fs", "/bin/true", "create"])) parts.append(sq(["echo", str(constants.OS_API_V20)]) + " >ganeti_api_version") parts.append(sq(["echo", variant]) + " >variants.list") parts.append(sq(["echo", "funny this is funny"]) + " >parameters.list") cmd = " && ".join(parts) print qa_utils.FormatInfo("Setting up %s with %s OS definition" % (node.primary, ["an invalid", "a valid"][int(valid)])) AssertCommand(cmd, node=node) def _RemoveTempOs(node, dirname): """Removes a temporary OS definition. """ AssertCommand(["rm", "-rf", dirname], node=node) def _TestOs(mode, rapi_cb): """Generic function for OS definition testing """ master = qa_config.GetMasterNode() name = _TEMP_OS_NAME variant = "default" fullname = "%s+%s" % (name, variant) dirname = _TEMP_OS_PATH # Ensure OS is usable cmd = ["gnt-os", "modify", "--hidden=no", "--blacklisted=no", name] AssertCommand(cmd) nodes = [] try: for i, node in enumerate(qa_config.get("nodes")): nodes.append(node) if mode == _ALL_INVALID: valid = False elif mode == _ALL_VALID: valid = True elif mode == _PARTIALLY_VALID: valid = bool(i % 2) else: raise AssertionError("Unknown mode %s" % mode) _SetupTempOs(node, dirname, variant, valid) # TODO: Use Python 2.6's itertools.permutations for (hidden, blacklisted) in [(False, False), (True, False), (False, True), (True, True)]: # Change OS' visibility cmd = ["gnt-os", "modify", "--hidden", ["no", "yes"][int(hidden)], "--blacklisted", ["no", "yes"][int(blacklisted)], name] AssertCommand(cmd) # Diagnose, checking exit status AssertCommand(["gnt-os", "diagnose"], fail=(mode != _ALL_VALID)) # Diagnose again, ignoring exit status output = qa_utils.GetCommandOutput(master.primary, "gnt-os diagnose || :") for line in output.splitlines(): if line.startswith("OS: %s [global status:" % name): break else: raise qa_error.Error("Didn't find OS '%s' in 'gnt-os diagnose'" % name) # Check info for all cmd = ["gnt-os", "info"] output = qa_utils.GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)) AssertIn("%s:" % name, output.splitlines()) # Check info for OS cmd = ["gnt-os", "info", name] output = qa_utils.GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)).splitlines() AssertIn("%s:" % name, output) for (field, value) in [("valid", mode == _ALL_VALID), ("hidden", hidden), ("blacklisted", blacklisted)]: AssertIn(" - %s: %s" % (field, value), output) # Only valid OSes should be listed cmd = ["gnt-os", "list", "--no-headers"] output = qa_utils.GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)) if mode == _ALL_VALID and not (hidden or blacklisted): assert_fn = AssertIn else: assert_fn = AssertNotIn assert_fn(fullname, output.splitlines()) # Check via RAPI if rapi_cb: assert_fn(fullname, rapi_cb()) finally: for node in nodes: _RemoveTempOs(node, dirname) def TestOsValid(rapi_cb): """Testing valid OS definition""" return _TestOs(_ALL_VALID, rapi_cb) def TestOsInvalid(rapi_cb): """Testing invalid OS definition""" return _TestOs(_ALL_INVALID, rapi_cb) def TestOsPartiallyValid(rapi_cb): """Testing partially valid OS definition""" return _TestOs(_PARTIALLY_VALID, rapi_cb) def TestOsModifyValid(): """Testing a valid os modify invocation""" hv_dict = { constants.HT_XEN_PVM: { constants.HV_ROOT_PATH: "/dev/sda5", }, constants.HT_XEN_HVM: { constants.HV_ACPI: False, constants.HV_PAE: True, }, } return _TestOsModify(hv_dict) def TestOsModifyInvalid(): """Testing an invalid os modify invocation""" hv_dict = { "blahblahblubb": {"bar": ""}, } return _TestOsModify(hv_dict, fail=True) def TestOsStatesNonExisting(): """Testing OS states with non-existing OS""" AssertCommand(["test", "-e", _TEMP_OS_PATH], fail=True) return _TestOsStates(_TEMP_OS_NAME) ganeti-2.9.3/qa/qa_monitoring.py0000644000000000000000000000352612271422343016621 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Monitoring related QA tests. """ from ganeti import _autoconf from ganeti import constants import qa_config from qa_utils import AssertCommand from qa_instance_utils import CreateInstanceByDiskTemplate, \ RemoveInstance MON_COLLECTOR = _autoconf.PKGLIBDIR + "/mon-collector" def TestInstStatusCollector(): """Test the Xen instance status collector. """ enabled_hypervisors = qa_config.GetEnabledHypervisors() is_xen = (constants.HT_XEN_PVM in enabled_hypervisors or constants.HT_XEN_HVM in enabled_hypervisors) if not is_xen: return # Execute on master on an empty cluster AssertCommand([MON_COLLECTOR, "inst-status-xen"]) #Execute on cluster with instances node1 = qa_config.AcquireNode() node2 = qa_config.AcquireNode() template = qa_config.GetDefaultDiskTemplate() instance = CreateInstanceByDiskTemplate([node1, node2], template) AssertCommand([MON_COLLECTOR, "inst-status-xen"], node=node1) AssertCommand([MON_COLLECTOR, "inst-status-xen"], node=node2) RemoveInstance(instance) node1.Release() node2.Release() ganeti-2.9.3/qa/qa_rapi.py0000644000000000000000000006011312271422343015362 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Remote API QA tests. """ import tempfile import random import re import itertools from ganeti import utils from ganeti import constants from ganeti import errors from ganeti import cli from ganeti import rapi from ganeti import objects from ganeti import query from ganeti import compat from ganeti import qlang from ganeti import pathutils import ganeti.rapi.client # pylint: disable=W0611 import ganeti.rapi.client_utils import qa_config import qa_utils import qa_error from qa_instance import IsFailoverSupported from qa_instance import IsMigrationSupported from qa_instance import IsDiskReplacingSupported from qa_utils import (AssertEqual, AssertIn, AssertMatch, StartLocalCommand) from qa_utils import InstanceCheck, INST_DOWN, INST_UP, FIRST_ARG _rapi_ca = None _rapi_client = None _rapi_username = None _rapi_password = None def Setup(username, password): """Configures the RAPI client. """ # pylint: disable=W0603 # due to global usage global _rapi_ca global _rapi_client global _rapi_username global _rapi_password _rapi_username = username _rapi_password = password master = qa_config.GetMasterNode() # Load RAPI certificate from master node cmd = ["cat", qa_utils.MakeNodePath(master, pathutils.RAPI_CERT_FILE)] # Write to temporary file _rapi_ca = tempfile.NamedTemporaryFile() _rapi_ca.write(qa_utils.GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd))) _rapi_ca.flush() port = qa_config.get("rapi-port", default=constants.DEFAULT_RAPI_PORT) cfg_curl = rapi.client.GenericCurlConfig(cafile=_rapi_ca.name, proxy="") if qa_config.UseVirtualCluster(): # TODO: Implement full support for RAPI on virtual clusters print qa_utils.FormatWarning("RAPI tests are not yet supported on" " virtual clusters and will be disabled") assert _rapi_client is None else: _rapi_client = rapi.client.GanetiRapiClient(master.primary, port=port, username=username, password=password, curl_config_fn=cfg_curl) print "RAPI protocol version: %s" % _rapi_client.GetVersion() INSTANCE_FIELDS = ("name", "os", "pnode", "snodes", "admin_state", "disk_template", "disk.sizes", "disk.spindles", "nic.ips", "nic.macs", "nic.modes", "nic.links", "beparams", "hvparams", "oper_state", "oper_ram", "oper_vcpus", "status", "tags") NODE_FIELDS = ("name", "dtotal", "dfree", "sptotal", "spfree", "mtotal", "mnode", "mfree", "pinst_cnt", "sinst_cnt", "tags") GROUP_FIELDS = compat.UniqueFrozenset([ "name", "uuid", "alloc_policy", "node_cnt", "node_list", ]) JOB_FIELDS = compat.UniqueFrozenset([ "id", "ops", "status", "summary", "opstatus", "opresult", "oplog", "received_ts", "start_ts", "end_ts", ]) LIST_FIELDS = ("id", "uri") def Enabled(): """Return whether remote API tests should be run. """ # TODO: Implement RAPI tests for virtual clusters return (qa_config.TestEnabled("rapi") and not qa_config.UseVirtualCluster()) def _DoTests(uris): # pylint: disable=W0212 # due to _SendRequest usage results = [] for uri, verify, method, body in uris: assert uri.startswith("/") print "%s %s" % (method, uri) data = _rapi_client._SendRequest(method, uri, None, body) if verify is not None: if callable(verify): verify(data) else: AssertEqual(data, verify) results.append(data) return results def _VerifyReturnsJob(data): if not isinstance(data, int): AssertMatch(data, r"^\d+$") def TestVersion(): """Testing remote API version. """ _DoTests([ ("/version", constants.RAPI_VERSION, "GET", None), ]) def TestEmptyCluster(): """Testing remote API on an empty cluster. """ master = qa_config.GetMasterNode() master_full = qa_utils.ResolveNodeName(master) def _VerifyInfo(data): AssertIn("name", data) AssertIn("master", data) AssertEqual(data["master"], master_full) def _VerifyNodes(data): master_entry = { "id": master_full, "uri": "/2/nodes/%s" % master_full, } AssertIn(master_entry, data) def _VerifyNodesBulk(data): for node in data: for entry in NODE_FIELDS: AssertIn(entry, node) def _VerifyGroups(data): default_group = { "name": constants.INITIAL_NODE_GROUP_NAME, "uri": "/2/groups/" + constants.INITIAL_NODE_GROUP_NAME, } AssertIn(default_group, data) def _VerifyGroupsBulk(data): for group in data: for field in GROUP_FIELDS: AssertIn(field, group) _DoTests([ ("/", None, "GET", None), ("/2/info", _VerifyInfo, "GET", None), ("/2/tags", None, "GET", None), ("/2/nodes", _VerifyNodes, "GET", None), ("/2/nodes?bulk=1", _VerifyNodesBulk, "GET", None), ("/2/groups", _VerifyGroups, "GET", None), ("/2/groups?bulk=1", _VerifyGroupsBulk, "GET", None), ("/2/instances", [], "GET", None), ("/2/instances?bulk=1", [], "GET", None), ("/2/os", None, "GET", None), ]) # Test HTTP Not Found for method in ["GET", "PUT", "POST", "DELETE"]: try: _DoTests([("/99/resource/not/here/99", None, method, None)]) except rapi.client.GanetiApiError, err: AssertEqual(err.code, 404) else: raise qa_error.Error("Non-existent resource didn't return HTTP 404") # Test HTTP Not Implemented for method in ["PUT", "POST", "DELETE"]: try: _DoTests([("/version", None, method, None)]) except rapi.client.GanetiApiError, err: AssertEqual(err.code, 501) else: raise qa_error.Error("Non-implemented method didn't fail") def TestRapiQuery(): """Testing resource queries via remote API. """ # FIXME: the tests are failing if no LVM is enabled, investigate # if it is a bug in the QA or in the code if not qa_config.IsStorageTypeSupported(constants.ST_LVM_VG): return master_name = qa_utils.ResolveNodeName(qa_config.GetMasterNode()) rnd = random.Random(7818) for what in constants.QR_VIA_RAPI: if what == constants.QR_JOB: namefield = "id" elif what == constants.QR_EXPORT: namefield = "export" else: namefield = "name" all_fields = query.ALL_FIELDS[what].keys() rnd.shuffle(all_fields) # No fields, should return everything result = _rapi_client.QueryFields(what) qresult = objects.QueryFieldsResponse.FromDict(result) AssertEqual(len(qresult.fields), len(all_fields)) # One field result = _rapi_client.QueryFields(what, fields=[namefield]) qresult = objects.QueryFieldsResponse.FromDict(result) AssertEqual(len(qresult.fields), 1) # Specify all fields, order must be correct result = _rapi_client.QueryFields(what, fields=all_fields) qresult = objects.QueryFieldsResponse.FromDict(result) AssertEqual(len(qresult.fields), len(all_fields)) AssertEqual([fdef.name for fdef in qresult.fields], all_fields) # Unknown field result = _rapi_client.QueryFields(what, fields=["_unknown!"]) qresult = objects.QueryFieldsResponse.FromDict(result) AssertEqual(len(qresult.fields), 1) AssertEqual(qresult.fields[0].name, "_unknown!") AssertEqual(qresult.fields[0].kind, constants.QFT_UNKNOWN) # Try once more, this time without the client _DoTests([ ("/2/query/%s/fields" % what, None, "GET", None), ("/2/query/%s/fields?fields=name,name,%s" % (what, all_fields[0]), None, "GET", None), ]) # Try missing query argument try: _DoTests([ ("/2/query/%s" % what, None, "GET", None), ]) except rapi.client.GanetiApiError, err: AssertEqual(err.code, 400) else: raise qa_error.Error("Request missing 'fields' parameter didn't fail") def _Check(exp_fields, data): qresult = objects.QueryResponse.FromDict(data) AssertEqual([fdef.name for fdef in qresult.fields], exp_fields) if not isinstance(qresult.data, list): raise qa_error.Error("Query did not return a list") _DoTests([ # Specify fields in query ("/2/query/%s?fields=%s" % (what, ",".join(all_fields)), compat.partial(_Check, all_fields), "GET", None), ("/2/query/%s?fields=%s" % (what, namefield), compat.partial(_Check, [namefield]), "GET", None), # Note the spaces ("/2/query/%s?fields=%s,%%20%s%%09,%s%%20" % (what, namefield, namefield, namefield), compat.partial(_Check, [namefield] * 3), "GET", None), # PUT with fields in query ("/2/query/%s?fields=%s" % (what, namefield), compat.partial(_Check, [namefield]), "PUT", {}), ("/2/query/%s" % what, compat.partial(_Check, [namefield] * 4), "PUT", { "fields": [namefield] * 4, }), ("/2/query/%s" % what, compat.partial(_Check, all_fields), "PUT", { "fields": all_fields, }), ("/2/query/%s" % what, compat.partial(_Check, [namefield] * 4), "PUT", { "fields": [namefield] * 4 })]) def _CheckFilter(): _DoTests([ # With filter ("/2/query/%s" % what, compat.partial(_Check, all_fields), "PUT", { "fields": all_fields, "filter": [qlang.OP_TRUE, namefield], }), ]) if what == constants.QR_LOCK: # Locks can't be filtered try: _CheckFilter() except rapi.client.GanetiApiError, err: AssertEqual(err.code, 500) else: raise qa_error.Error("Filtering locks didn't fail") else: _CheckFilter() if what == constants.QR_NODE: # Test with filter (nodes, ) = _DoTests( [("/2/query/%s" % what, compat.partial(_Check, ["name", "master"]), "PUT", {"fields": ["name", "master"], "filter": [qlang.OP_TRUE, "master"], })]) qresult = objects.QueryResponse.FromDict(nodes) AssertEqual(qresult.data, [ [[constants.RS_NORMAL, master_name], [constants.RS_NORMAL, True]], ]) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestInstance(instance): """Testing getting instance(s) info via remote API. """ def _VerifyInstance(data): for entry in INSTANCE_FIELDS: AssertIn(entry, data) def _VerifyInstancesList(data): for instance in data: for entry in LIST_FIELDS: AssertIn(entry, instance) def _VerifyInstancesBulk(data): for instance_data in data: _VerifyInstance(instance_data) _DoTests([ ("/2/instances/%s" % instance.name, _VerifyInstance, "GET", None), ("/2/instances", _VerifyInstancesList, "GET", None), ("/2/instances?bulk=1", _VerifyInstancesBulk, "GET", None), ("/2/instances/%s/activate-disks" % instance.name, _VerifyReturnsJob, "PUT", None), ("/2/instances/%s/deactivate-disks" % instance.name, _VerifyReturnsJob, "PUT", None), ]) # Test OpBackupPrepare (job_id, ) = _DoTests([ ("/2/instances/%s/prepare-export?mode=%s" % (instance.name, constants.EXPORT_MODE_REMOTE), _VerifyReturnsJob, "PUT", None), ]) result = _WaitForRapiJob(job_id)[0] AssertEqual(len(result["handshake"]), 3) AssertEqual(result["handshake"][0], constants.RIE_VERSION) AssertEqual(len(result["x509_key_name"]), 3) AssertIn("-----BEGIN CERTIFICATE-----", result["x509_ca"]) def TestNode(node): """Testing getting node(s) info via remote API. """ def _VerifyNode(data): for entry in NODE_FIELDS: AssertIn(entry, data) def _VerifyNodesList(data): for node in data: for entry in LIST_FIELDS: AssertIn(entry, node) def _VerifyNodesBulk(data): for node_data in data: _VerifyNode(node_data) _DoTests([ ("/2/nodes/%s" % node.primary, _VerifyNode, "GET", None), ("/2/nodes", _VerifyNodesList, "GET", None), ("/2/nodes?bulk=1", _VerifyNodesBulk, "GET", None), ]) def _FilterTags(seq): """Removes unwanted tags from a sequence. """ ignore_re = qa_config.get("ignore-tags-re", None) if ignore_re: return itertools.ifilterfalse(re.compile(ignore_re).match, seq) else: return seq def TestTags(kind, name, tags): """Tests .../tags resources. """ if kind == constants.TAG_CLUSTER: uri = "/2/tags" elif kind == constants.TAG_NODE: uri = "/2/nodes/%s/tags" % name elif kind == constants.TAG_INSTANCE: uri = "/2/instances/%s/tags" % name elif kind == constants.TAG_NODEGROUP: uri = "/2/groups/%s/tags" % name elif kind == constants.TAG_NETWORK: uri = "/2/networks/%s/tags" % name else: raise errors.ProgrammerError("Unknown tag kind") def _VerifyTags(data): AssertEqual(sorted(tags), sorted(_FilterTags(data))) queryargs = "&".join("tag=%s" % i for i in tags) # Add tags (job_id, ) = _DoTests([ ("%s?%s" % (uri, queryargs), _VerifyReturnsJob, "PUT", None), ]) _WaitForRapiJob(job_id) # Retrieve tags _DoTests([ (uri, _VerifyTags, "GET", None), ]) # Remove tags (job_id, ) = _DoTests([ ("%s?%s" % (uri, queryargs), _VerifyReturnsJob, "DELETE", None), ]) _WaitForRapiJob(job_id) def _WaitForRapiJob(job_id): """Waits for a job to finish. """ def _VerifyJob(data): AssertEqual(data["id"], job_id) for field in JOB_FIELDS: AssertIn(field, data) _DoTests([ ("/2/jobs/%s" % job_id, _VerifyJob, "GET", None), ]) return rapi.client_utils.PollJob(_rapi_client, job_id, cli.StdioJobPollReportCb()) def TestRapiNodeGroups(): """Test several node group operations using RAPI. """ (group1, group2, group3) = qa_utils.GetNonexistentGroups(3) # Create a group with no attributes body = { "name": group1, } (job_id, ) = _DoTests([ ("/2/groups", _VerifyReturnsJob, "POST", body), ]) _WaitForRapiJob(job_id) # Create a group specifying alloc_policy body = { "name": group2, "alloc_policy": constants.ALLOC_POLICY_UNALLOCABLE, } (job_id, ) = _DoTests([ ("/2/groups", _VerifyReturnsJob, "POST", body), ]) _WaitForRapiJob(job_id) # Modify alloc_policy body = { "alloc_policy": constants.ALLOC_POLICY_UNALLOCABLE, } (job_id, ) = _DoTests([ ("/2/groups/%s/modify" % group1, _VerifyReturnsJob, "PUT", body), ]) _WaitForRapiJob(job_id) # Rename a group body = { "new_name": group3, } (job_id, ) = _DoTests([ ("/2/groups/%s/rename" % group2, _VerifyReturnsJob, "PUT", body), ]) _WaitForRapiJob(job_id) # Delete groups for group in [group1, group3]: (job_id, ) = _DoTests([ ("/2/groups/%s" % group, _VerifyReturnsJob, "DELETE", None), ]) _WaitForRapiJob(job_id) def TestRapiInstanceAdd(node, use_client): """Test adding a new instance via RAPI""" if not qa_config.IsTemplateSupported(constants.DT_PLAIN): return instance = qa_config.AcquireInstance() instance.SetDiskTemplate(constants.DT_PLAIN) try: disks = [{"size": utils.ParseUnit(d.get("size")), "name": str(d.get("name"))} for d in qa_config.GetDiskOptions()] nic0_mac = instance.GetNicMacAddr(0, constants.VALUE_GENERATE) nics = [{ constants.INIC_MAC: nic0_mac, }] beparams = { constants.BE_MAXMEM: utils.ParseUnit(qa_config.get(constants.BE_MAXMEM)), constants.BE_MINMEM: utils.ParseUnit(qa_config.get(constants.BE_MINMEM)), } if use_client: job_id = _rapi_client.CreateInstance(constants.INSTANCE_CREATE, instance.name, constants.DT_PLAIN, disks, nics, os=qa_config.get("os"), pnode=node.primary, beparams=beparams) else: body = { "__version__": 1, "mode": constants.INSTANCE_CREATE, "name": instance.name, "os_type": qa_config.get("os"), "disk_template": constants.DT_PLAIN, "pnode": node.primary, "beparams": beparams, "disks": disks, "nics": nics, } (job_id, ) = _DoTests([ ("/2/instances", _VerifyReturnsJob, "POST", body), ]) _WaitForRapiJob(job_id) return instance except: instance.Release() raise @InstanceCheck(None, INST_DOWN, FIRST_ARG) def TestRapiInstanceRemove(instance, use_client): """Test removing instance via RAPI""" # FIXME: this does not work if LVM is not enabled. Find out if this is a bug # in RAPI or in the test if not qa_config.IsStorageTypeSupported(constants.ST_LVM_VG): return if use_client: job_id = _rapi_client.DeleteInstance(instance.name) else: (job_id, ) = _DoTests([ ("/2/instances/%s" % instance.name, _VerifyReturnsJob, "DELETE", None), ]) _WaitForRapiJob(job_id) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestRapiInstanceMigrate(instance): """Test migrating instance via RAPI""" if not IsMigrationSupported(instance): print qa_utils.FormatInfo("Instance doesn't support migration, skipping" " test") return # Move to secondary node _WaitForRapiJob(_rapi_client.MigrateInstance(instance.name)) qa_utils.RunInstanceCheck(instance, True) # And back to previous primary _WaitForRapiJob(_rapi_client.MigrateInstance(instance.name)) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestRapiInstanceFailover(instance): """Test failing over instance via RAPI""" if not IsFailoverSupported(instance): print qa_utils.FormatInfo("Instance doesn't support failover, skipping" " test") return # Move to secondary node _WaitForRapiJob(_rapi_client.FailoverInstance(instance.name)) qa_utils.RunInstanceCheck(instance, True) # And back to previous primary _WaitForRapiJob(_rapi_client.FailoverInstance(instance.name)) @InstanceCheck(INST_UP, INST_DOWN, FIRST_ARG) def TestRapiInstanceShutdown(instance): """Test stopping an instance via RAPI""" _WaitForRapiJob(_rapi_client.ShutdownInstance(instance.name)) @InstanceCheck(INST_DOWN, INST_UP, FIRST_ARG) def TestRapiInstanceStartup(instance): """Test starting an instance via RAPI""" _WaitForRapiJob(_rapi_client.StartupInstance(instance.name)) @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG) def TestRapiInstanceRenameAndBack(rename_source, rename_target): """Test renaming instance via RAPI This must leave the instance with the original name (in the non-failure case). """ _WaitForRapiJob(_rapi_client.RenameInstance(rename_source, rename_target)) qa_utils.RunInstanceCheck(rename_source, False) qa_utils.RunInstanceCheck(rename_target, False) _WaitForRapiJob(_rapi_client.RenameInstance(rename_target, rename_source)) qa_utils.RunInstanceCheck(rename_target, False) @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG) def TestRapiInstanceReinstall(instance): """Test reinstalling an instance via RAPI""" if instance.disk_template == constants.DT_DISKLESS: print qa_utils.FormatInfo("Test not supported for diskless instances") return _WaitForRapiJob(_rapi_client.ReinstallInstance(instance.name)) # By default, the instance is started again qa_utils.RunInstanceCheck(instance, True) # Reinstall again without starting _WaitForRapiJob(_rapi_client.ReinstallInstance(instance.name, no_startup=True)) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestRapiInstanceReplaceDisks(instance): """Test replacing instance disks via RAPI""" if not IsDiskReplacingSupported(instance): print qa_utils.FormatInfo("Instance doesn't support disk replacing," " skipping test") return fn = _rapi_client.ReplaceInstanceDisks _WaitForRapiJob(fn(instance.name, mode=constants.REPLACE_DISK_AUTO, disks=[])) _WaitForRapiJob(fn(instance.name, mode=constants.REPLACE_DISK_SEC, disks="0")) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestRapiInstanceModify(instance): """Test modifying instance via RAPI""" default_hv = qa_config.GetDefaultHypervisor() def _ModifyInstance(**kwargs): _WaitForRapiJob(_rapi_client.ModifyInstance(instance.name, **kwargs)) _ModifyInstance(beparams={ constants.BE_VCPUS: 3, }) _ModifyInstance(beparams={ constants.BE_VCPUS: constants.VALUE_DEFAULT, }) if default_hv == constants.HT_XEN_PVM: _ModifyInstance(hvparams={ constants.HV_KERNEL_ARGS: "single", }) _ModifyInstance(hvparams={ constants.HV_KERNEL_ARGS: constants.VALUE_DEFAULT, }) elif default_hv == constants.HT_XEN_HVM: _ModifyInstance(hvparams={ constants.HV_BOOT_ORDER: "acn", }) _ModifyInstance(hvparams={ constants.HV_BOOT_ORDER: constants.VALUE_DEFAULT, }) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestRapiInstanceConsole(instance): """Test getting instance console information via RAPI""" result = _rapi_client.GetInstanceConsole(instance.name) console = objects.InstanceConsole.FromDict(result) AssertEqual(console.Validate(), True) AssertEqual(console.instance, qa_utils.ResolveInstanceName(instance.name)) @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG) def TestRapiStoppedInstanceConsole(instance): """Test getting stopped instance's console information via RAPI""" try: _rapi_client.GetInstanceConsole(instance.name) except rapi.client.GanetiApiError, err: AssertEqual(err.code, 503) else: raise qa_error.Error("Getting console for stopped instance didn't" " return HTTP 503") def GetOperatingSystems(): """Retrieves a list of all available operating systems. """ return _rapi_client.GetOperatingSystems() def TestInterClusterInstanceMove(src_instance, dest_instance, inodes, tnode): """Test tools/move-instance""" master = qa_config.GetMasterNode() rapi_pw_file = tempfile.NamedTemporaryFile() rapi_pw_file.write(_rapi_password) rapi_pw_file.flush() dest_instance.SetDiskTemplate(src_instance.disk_template) # TODO: Run some instance tests before moving back if len(inodes) > 1: # No disk template currently requires more than 1 secondary node. If this # changes, either this test must be skipped or the script must be updated. assert len(inodes) == 2 snode = inodes[1] else: # instance is not redundant, but we still need to pass a node # (which will be ignored) snode = tnode pnode = inodes[0] # note: pnode:snode are the *current* nodes, so we move it first to # tnode:pnode, then back to pnode:snode for si, di, pn, sn in [(src_instance.name, dest_instance.name, tnode.primary, pnode.primary), (dest_instance.name, src_instance.name, pnode.primary, snode.primary)]: cmd = [ "../tools/move-instance", "--verbose", "--src-ca-file=%s" % _rapi_ca.name, "--src-username=%s" % _rapi_username, "--src-password-file=%s" % rapi_pw_file.name, "--dest-instance-name=%s" % di, "--dest-primary-node=%s" % pn, "--dest-secondary-node=%s" % sn, "--net=0:mac=%s" % constants.VALUE_GENERATE, master.primary, master.primary, si, ] qa_utils.RunInstanceCheck(di, False) AssertEqual(StartLocalCommand(cmd).wait(), 0) qa_utils.RunInstanceCheck(si, False) qa_utils.RunInstanceCheck(di, True) ganeti-2.9.3/qa/qa_cluster.py0000644000000000000000000012355112271422343016116 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Cluster related QA tests. """ import re import tempfile import os.path from ganeti import constants from ganeti import compat from ganeti import utils from ganeti import pathutils import qa_config import qa_daemon import qa_utils import qa_error import qa_instance from qa_utils import AssertEqual, AssertCommand, GetCommandOutput # Prefix for LVM volumes created by QA code during tests _QA_LV_PREFIX = "qa-" #: cluster verify command _CLUSTER_VERIFY = ["gnt-cluster", "verify"] def _RemoveFileFromAllNodes(filename): """Removes a file from all nodes. """ for node in qa_config.get("nodes"): AssertCommand(["rm", "-f", filename], node=node) def _CheckFileOnAllNodes(filename, content): """Verifies the content of the given file on all nodes. """ cmd = utils.ShellQuoteArgs(["cat", filename]) for node in qa_config.get("nodes"): AssertEqual(qa_utils.GetCommandOutput(node.primary, cmd), content) def _GetClusterField(field_path): """Get the value of a cluster field. @type field_path: list of strings @param field_path: Names of the groups/fields to navigate to get the desired value, e.g. C{["Default node parameters", "oob_program"]} @return: The effective value of the field (the actual type depends on the chosen field) """ assert isinstance(field_path, list) assert field_path ret = qa_utils.GetObjectInfo(["gnt-cluster", "info"]) for key in field_path: ret = ret[key] return ret # Cluster-verify errors (date, "ERROR", then error code) _CVERROR_RE = re.compile(r"^[\w\s:]+\s+- (ERROR|WARNING):([A-Z0-9_-]+):") def _GetCVErrorCodes(cvout): errs = set() warns = set() for l in cvout.splitlines(): m = _CVERROR_RE.match(l) if m: etype = m.group(1) ecode = m.group(2) if etype == "ERROR": errs.add(ecode) elif etype == "WARNING": warns.add(ecode) return (errs, warns) def _CheckVerifyErrors(actual, expected, etype): exp_codes = compat.UniqueFrozenset(e for (_, e, _) in expected) if not actual.issuperset(exp_codes): missing = exp_codes.difference(actual) raise qa_error.Error("Cluster-verify didn't return these expected" " %ss: %s" % (etype, utils.CommaJoin(missing))) def AssertClusterVerify(fail=False, errors=None, warnings=None): """Run cluster-verify and check the result @type fail: bool @param fail: if cluster-verify is expected to fail instead of succeeding @type errors: list of tuples @param errors: List of CV_XXX errors that are expected; if specified, all the errors listed must appear in cluster-verify output. A non-empty value implies C{fail=True}. @type warnings: list of tuples @param warnings: Same as C{errors} but for warnings. """ cvcmd = "gnt-cluster verify" mnode = qa_config.GetMasterNode() if errors or warnings: cvout = GetCommandOutput(mnode.primary, cvcmd + " --error-codes", fail=(fail or errors)) (act_errs, act_warns) = _GetCVErrorCodes(cvout) if errors: _CheckVerifyErrors(act_errs, errors, "error") if warnings: _CheckVerifyErrors(act_warns, warnings, "warning") else: AssertCommand(cvcmd, fail=fail, node=mnode) # data for testing failures due to bad keys/values for disk parameters _FAIL_PARAMS = ["nonexistent:resync-rate=1", "drbd:nonexistent=1", "drbd:resync-rate=invalid", ] def TestClusterInitDisk(): """gnt-cluster init -D""" name = qa_config.get("name") for param in _FAIL_PARAMS: AssertCommand(["gnt-cluster", "init", "-D", param, name], fail=True) def TestClusterInit(rapi_user, rapi_secret): """gnt-cluster init""" master = qa_config.GetMasterNode() rapi_users_path = qa_utils.MakeNodePath(master, pathutils.RAPI_USERS_FILE) rapi_dir = os.path.dirname(rapi_users_path) # First create the RAPI credentials fh = tempfile.NamedTemporaryFile() try: fh.write("%s %s write\n" % (rapi_user, rapi_secret)) fh.flush() tmpru = qa_utils.UploadFile(master.primary, fh.name) try: AssertCommand(["mkdir", "-p", rapi_dir]) AssertCommand(["mv", tmpru, rapi_users_path]) finally: AssertCommand(["rm", "-f", tmpru]) finally: fh.close() # Initialize cluster enabled_disk_templates = qa_config.GetEnabledDiskTemplates() cmd = [ "gnt-cluster", "init", "--primary-ip-version=%d" % qa_config.get("primary_ip_version", 4), "--enabled-hypervisors=%s" % ",".join(qa_config.GetEnabledHypervisors()), "--enabled-disk-templates=%s" % ",".join(enabled_disk_templates), ] if constants.DT_FILE in enabled_disk_templates: cmd.append( "--file-storage-dir=%s" % qa_config.get("default-file-storage-dir", pathutils.DEFAULT_FILE_STORAGE_DIR)) for spec_type in ("mem-size", "disk-size", "disk-count", "cpu-count", "nic-count"): for spec_val in ("min", "max", "std"): spec = qa_config.get("ispec_%s_%s" % (spec_type.replace("-", "_"), spec_val), None) if spec is not None: cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec)) if master.secondary: cmd.append("--secondary-ip=%s" % master.secondary) if utils.IsLvmEnabled(qa_config.GetEnabledDiskTemplates()): vgname = qa_config.get("vg-name", constants.DEFAULT_VG) if vgname: cmd.append("--vg-name=%s" % vgname) else: raise qa_error.Error("Please specify a volume group if you enable" " lvm-based disk templates in the QA.") master_netdev = qa_config.get("master-netdev", None) if master_netdev: cmd.append("--master-netdev=%s" % master_netdev) nicparams = qa_config.get("default-nicparams", None) if nicparams: cmd.append("--nic-parameters=%s" % ",".join(utils.FormatKeyValue(nicparams))) # Cluster value of the exclusive-storage node parameter e_s = qa_config.get("exclusive-storage") if e_s is not None: cmd.extend(["--node-parameters", "exclusive_storage=%s" % e_s]) else: e_s = False qa_config.SetExclusiveStorage(e_s) extra_args = qa_config.get("cluster-init-args") if extra_args: cmd.extend(extra_args) cmd.append(qa_config.get("name")) AssertCommand(cmd) cmd = ["gnt-cluster", "modify"] # hypervisor parameter modifications hvp = qa_config.get("hypervisor-parameters", {}) for k, v in hvp.items(): cmd.extend(["-H", "%s:%s" % (k, v)]) # backend parameter modifications bep = qa_config.get("backend-parameters", "") if bep: cmd.extend(["-B", bep]) if len(cmd) > 2: AssertCommand(cmd) # OS parameters osp = qa_config.get("os-parameters", {}) for k, v in osp.items(): AssertCommand(["gnt-os", "modify", "-O", v, k]) # OS hypervisor parameters os_hvp = qa_config.get("os-hvp", {}) for os_name in os_hvp: for hv, hvp in os_hvp[os_name].items(): AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name]) def TestClusterRename(): """gnt-cluster rename""" cmd = ["gnt-cluster", "rename", "-f"] original_name = qa_config.get("name") rename_target = qa_config.get("rename", None) if rename_target is None: print qa_utils.FormatError('"rename" entry is missing') return for data in [ cmd + [rename_target], _CLUSTER_VERIFY, cmd + [original_name], _CLUSTER_VERIFY, ]: AssertCommand(data) def TestClusterOob(): """out-of-band framework""" oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID() AssertCommand(_CLUSTER_VERIFY) AssertCommand(["gnt-cluster", "modify", "--node-parameters", "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" % utils.NewUUID()]) AssertCommand(_CLUSTER_VERIFY, fail=True) AssertCommand(["touch", oob_path_exists]) AssertCommand(["chmod", "0400", oob_path_exists]) AssertCommand(["gnt-cluster", "copyfile", oob_path_exists]) try: AssertCommand(["gnt-cluster", "modify", "--node-parameters", "oob_program=%s" % oob_path_exists]) AssertCommand(_CLUSTER_VERIFY, fail=True) AssertCommand(["chmod", "0500", oob_path_exists]) AssertCommand(["gnt-cluster", "copyfile", oob_path_exists]) AssertCommand(_CLUSTER_VERIFY) finally: AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists]) AssertCommand(["gnt-cluster", "modify", "--node-parameters", "oob_program="]) def TestClusterEpo(): """gnt-cluster epo""" master = qa_config.GetMasterNode() # Assert that OOB is unavailable for all nodes result_output = GetCommandOutput(master.primary, "gnt-node list --verbose --no-headers -o" " powered") AssertEqual(compat.all(powered == "(unavail)" for powered in result_output.splitlines()), True) # Conflicting AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True) # --all doesn't expect arguments AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True) # Unless --all is given master is not allowed to be in the list AssertCommand(["gnt-cluster", "epo", "-f", master.primary], fail=True) # This shouldn't fail AssertCommand(["gnt-cluster", "epo", "-f", "--all"]) # All instances should have been stopped now result_output = GetCommandOutput(master.primary, "gnt-instance list --no-headers -o status") # ERROR_down because the instance is stopped but not recorded as such AssertEqual(compat.all(status == "ERROR_down" for status in result_output.splitlines()), True) # Now start everything again AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"]) # All instances should have been started now result_output = GetCommandOutput(master.primary, "gnt-instance list --no-headers -o status") AssertEqual(compat.all(status == "running" for status in result_output.splitlines()), True) def TestClusterVerify(): """gnt-cluster verify""" AssertCommand(_CLUSTER_VERIFY) AssertCommand(["gnt-cluster", "verify-disks"]) def TestClusterVerifyDisksBrokenDRBD(instance, inst_nodes): """gnt-cluster verify-disks with broken DRBD""" qa_daemon.TestPauseWatcher() try: info = qa_instance.GetInstanceInfo(instance.name) snode = inst_nodes[1] for idx, minor in enumerate(info["drbd-minors"][snode.primary]): if idx % 2 == 0: break_drbd_cmd = \ "(drbdsetup %d down >/dev/null 2>&1;" \ " drbdsetup down resource%d >/dev/null 2>&1) || /bin/true" % \ (minor, minor) else: break_drbd_cmd = \ "(drbdsetup %d detach >/dev/null 2>&1;" \ " drbdsetup detach %d >/dev/null 2>&1) || /bin/true" % \ (minor, minor) AssertCommand(break_drbd_cmd, node=snode) verify_output = GetCommandOutput(qa_config.GetMasterNode().primary, "gnt-cluster verify-disks") activation_msg = "Activating disks for instance '%s'" % instance.name if activation_msg not in verify_output: raise qa_error.Error("gnt-cluster verify-disks did not activate broken" " DRBD disks:\n%s" % verify_output) verify_output = GetCommandOutput(qa_config.GetMasterNode().primary, "gnt-cluster verify-disks") if activation_msg in verify_output: raise qa_error.Error("gnt-cluster verify-disks wants to activate broken" " DRBD disks on second attempt:\n%s" % verify_output) AssertCommand(_CLUSTER_VERIFY) finally: qa_daemon.TestResumeWatcher() def TestJobqueue(): """gnt-debug test-jobqueue""" AssertCommand(["gnt-debug", "test-jobqueue"]) def TestDelay(node): """gnt-debug delay""" AssertCommand(["gnt-debug", "delay", "1"]) AssertCommand(["gnt-debug", "delay", "--no-master", "1"]) AssertCommand(["gnt-debug", "delay", "--no-master", "-n", node.primary, "1"]) def TestClusterReservedLvs(): """gnt-cluster reserved lvs""" # if no lvm-based templates are supported, skip the test if not qa_config.IsStorageTypeSupported(constants.ST_LVM_VG): return vgname = qa_config.get("vg-name", constants.DEFAULT_VG) lvname = _QA_LV_PREFIX + "test" lvfullname = "/".join([vgname, lvname]) for fail, cmd in [ (False, _CLUSTER_VERIFY), (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]), (False, ["lvcreate", "-L1G", "-n", lvname, vgname]), (True, _CLUSTER_VERIFY), (False, ["gnt-cluster", "modify", "--reserved-lvs", "%s,.*/other-test" % lvfullname]), (False, _CLUSTER_VERIFY), (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/%s.*" % _QA_LV_PREFIX]), (False, _CLUSTER_VERIFY), (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]), (True, _CLUSTER_VERIFY), (False, ["lvremove", "-f", lvfullname]), (False, _CLUSTER_VERIFY), ]: AssertCommand(cmd, fail=fail) def TestClusterModifyEmpty(): """gnt-cluster modify""" AssertCommand(["gnt-cluster", "modify"], fail=True) def TestClusterModifyDisk(): """gnt-cluster modify -D""" for param in _FAIL_PARAMS: AssertCommand(["gnt-cluster", "modify", "-D", param], fail=True) def _GetOtherEnabledDiskTemplate(undesired_disk_templates, enabled_disk_templates): """Returns one template that is not in the undesired set. @type undesired_disk_templates: list of string @param undesired_disk_templates: a list of disk templates that we want to exclude when drawing one disk template from the list of enabled disk templates @type enabled_disk_templates: list of string @param enabled_disk_templates: list of enabled disk templates (in QA) """ desired_templates = list(set(enabled_disk_templates) - set(undesired_disk_templates)) if desired_templates: template = desired_templates[0] else: # If no desired disk template is available for QA, choose 'diskless' and # hope for the best. template = constants.ST_DISKLESS return template def TestClusterModifyFileBasedStorageDir( file_disk_template, dir_config_key, default_dir, option_name): """Tests gnt-cluster modify wrt to file-based directory options. @type file_disk_template: string @param file_disk_template: file-based disk template @type dir_config_key: string @param dir_config_key: key for the QA config to retrieve the default directory value @type default_dir: string @param default_dir: default directory, if the QA config does not specify it @type option_name: string @param option_name: name of the option of 'gnt-cluster modify' to change the directory """ enabled_disk_templates = qa_config.GetEnabledDiskTemplates() assert file_disk_template in [constants.DT_FILE, constants.DT_SHARED_FILE] if not qa_config.IsTemplateSupported(file_disk_template): return # Get some non-file-based disk template to disable file storage other_disk_template = _GetOtherEnabledDiskTemplate( utils.storage.GetDiskTemplatesOfStorageType(constants.ST_FILE), enabled_disk_templates) file_storage_dir = qa_config.get(dir_config_key, default_dir) invalid_file_storage_dir = "/boot/" for fail, cmd in [ (False, ["gnt-cluster", "modify", "--enabled-disk-templates=%s" % file_disk_template, "--ipolicy-disk-templates=%s" % file_disk_template]), (False, ["gnt-cluster", "modify", "--%s=%s" % (option_name, file_storage_dir)]), (False, ["gnt-cluster", "modify", "--%s=%s" % (option_name, invalid_file_storage_dir)]), # file storage dir is set to an inacceptable path, thus verify # should fail (True, ["gnt-cluster", "verify"]), # unsetting the storage dir while file storage is enabled # should fail (True, ["gnt-cluster", "modify", "--%s=" % option_name]), (False, ["gnt-cluster", "modify", "--%s=%s" % (option_name, file_storage_dir)]), (False, ["gnt-cluster", "modify", "--enabled-disk-templates=%s" % other_disk_template, "--ipolicy-disk-templates=%s" % other_disk_template]), (False, ["gnt-cluster", "modify", "--%s=%s" % (option_name, invalid_file_storage_dir)]), # file storage is set to an inacceptable path, but file storage # is disabled, thus verify should not fail (False, ["gnt-cluster", "verify"]), # unsetting the file storage dir while file storage is not enabled # should be fine (False, ["gnt-cluster", "modify", "--%s=" % option_name]), # resetting everything to sane values (False, ["gnt-cluster", "modify", "--%s=%s" % (option_name, file_storage_dir), "--enabled-disk-templates=%s" % ",".join(enabled_disk_templates), "--ipolicy-disk-templates=%s" % ",".join(enabled_disk_templates)]) ]: AssertCommand(cmd, fail=fail) def TestClusterModifyFileStorageDir(): """gnt-cluster modify --file-storage-dir=...""" TestClusterModifyFileBasedStorageDir( constants.DT_FILE, "default-file-storage-dir", pathutils.DEFAULT_FILE_STORAGE_DIR, "file-storage-dir") def TestClusterModifySharedFileStorageDir(): """gnt-cluster modify --shared-file-storage-dir=...""" TestClusterModifyFileBasedStorageDir( constants.DT_SHARED_FILE, "default-shared-file-storage-dir", pathutils.DEFAULT_SHARED_FILE_STORAGE_DIR, "shared-file-storage-dir") def TestClusterModifyDiskTemplates(): """gnt-cluster modify --enabled-disk-templates=...""" enabled_disk_templates = qa_config.GetEnabledDiskTemplates() default_disk_template = qa_config.GetDefaultDiskTemplate() _TestClusterModifyDiskTemplatesArguments(default_disk_template, enabled_disk_templates) _TestClusterModifyDiskTemplatesVgName(enabled_disk_templates) _RestoreEnabledDiskTemplates() nodes = qa_config.AcquireManyNodes(2) instance_template = enabled_disk_templates[0] instance = qa_instance.CreateInstanceByDiskTemplate(nodes, instance_template) _TestClusterModifyUnusedDiskTemplate(instance_template) _TestClusterModifyUsedDiskTemplate(instance_template, enabled_disk_templates) qa_instance.TestInstanceRemove(instance) _RestoreEnabledDiskTemplates() def _RestoreEnabledDiskTemplates(): """Sets the list of enabled disk templates back to the list of enabled disk templates from the QA configuration. This can be used to make sure that the tests that modify the list of disk templates do not interfere with other tests. """ enabled_disk_templates = qa_config.GetEnabledDiskTemplates() cmd = ["gnt-cluster", "modify", "--enabled-disk-templates=%s" % ",".join(enabled_disk_templates), "--ipolicy-disk-templates=%s" % ",".join(enabled_disk_templates), ] if utils.IsLvmEnabled(qa_config.GetEnabledDiskTemplates()): vgname = qa_config.get("vg-name", constants.DEFAULT_VG) cmd.append("--vg-name=%s" % vgname) AssertCommand(cmd, fail=False) def _TestClusterModifyDiskTemplatesArguments(default_disk_template, enabled_disk_templates): """Tests argument handling of 'gnt-cluster modify' with respect to the parameter '--enabled-disk-templates'. This test is independent of instances. """ _RestoreEnabledDiskTemplates() # bogus templates AssertCommand(["gnt-cluster", "modify", "--enabled-disk-templates=pinkbunny"], fail=True) # duplicate entries do no harm AssertCommand( ["gnt-cluster", "modify", "--enabled-disk-templates=%s,%s" % (default_disk_template, default_disk_template), "--ipolicy-disk-templates=%s" % default_disk_template], fail=False) if constants.DT_DRBD8 in enabled_disk_templates: # interaction with --drbd-usermode-helper option drbd_usermode_helper = qa_config.get("drbd-usermode-helper", None) if not drbd_usermode_helper: drbd_usermode_helper = "/bin/true" # specifying a helper when drbd gets disabled is ok. Note that drbd still # has to be installed on the nodes in this case AssertCommand(["gnt-cluster", "modify", "--drbd-usermode-helper=%s" % drbd_usermode_helper, "--enabled-disk-templates=%s" % constants.DT_DISKLESS, "--ipolicy-disk-templates=%s" % constants.DT_DISKLESS], fail=False) # specifying a helper when drbd is re-enabled AssertCommand(["gnt-cluster", "modify", "--drbd-usermode-helper=%s" % drbd_usermode_helper, "--enabled-disk-templates=%s" % ",".join(enabled_disk_templates), "--ipolicy-disk-templates=%s" % ",".join(enabled_disk_templates)], fail=False) def _TestClusterModifyDiskTemplatesVgName(enabled_disk_templates): """Tests argument handling of 'gnt-cluster modify' with respect to the parameter '--enabled-disk-templates' and '--vg-name'. This test is independent of instances. """ if not utils.IsLvmEnabled(enabled_disk_templates): # These tests only make sense if lvm is enabled for QA return # determine an LVM and a non-LVM disk template for the tests non_lvm_template = _GetOtherEnabledDiskTemplate(utils.GetLvmDiskTemplates(), enabled_disk_templates) lvm_template = list(set(enabled_disk_templates) .intersection(set(utils.GetLvmDiskTemplates())))[0] vgname = qa_config.get("vg-name", constants.DEFAULT_VG) # Clean start: unset volume group name, disable lvm storage AssertCommand( ["gnt-cluster", "modify", "--enabled-disk-templates=%s" % non_lvm_template, "--ipolicy-disk-templates=%s" % non_lvm_template, "--vg-name="], fail=False) # Try to enable lvm, when no volume group is given AssertCommand( ["gnt-cluster", "modify", "--enabled-disk-templates=%s" % lvm_template, "--ipolicy-disk-templates=%s" % lvm_template], fail=True) # Set volume group, with lvm still disabled: just a warning AssertCommand(["gnt-cluster", "modify", "--vg-name=%s" % vgname], fail=False) # Try unsetting vg name and enabling lvm at the same time AssertCommand( ["gnt-cluster", "modify", "--enabled-disk-templates=%s" % lvm_template, "--ipolicy-disk-templates=%s" % lvm_template, "--vg-name="], fail=True) # Enable lvm with vg name present AssertCommand( ["gnt-cluster", "modify", "--enabled-disk-templates=%s" % lvm_template, "--ipolicy-disk-templates=%s" % lvm_template], fail=False) # Try unsetting vg name with lvm still enabled AssertCommand(["gnt-cluster", "modify", "--vg-name="], fail=True) # Disable lvm with vg name still set AssertCommand( ["gnt-cluster", "modify", "--enabled-disk-templates=%s" % non_lvm_template, "--ipolicy-disk-templates=%s" % non_lvm_template, ], fail=False) # Try unsetting vg name with lvm disabled AssertCommand(["gnt-cluster", "modify", "--vg-name="], fail=False) # Set vg name and enable lvm at the same time AssertCommand( ["gnt-cluster", "modify", "--enabled-disk-templates=%s" % lvm_template, "--ipolicy-disk-templates=%s" % lvm_template, "--vg-name=%s" % vgname], fail=False) # Unset vg name and disable lvm at the same time AssertCommand( ["gnt-cluster", "modify", "--enabled-disk-templates=%s" % non_lvm_template, "--ipolicy-disk-templates=%s" % non_lvm_template, "--vg-name="], fail=False) _RestoreEnabledDiskTemplates() def _TestClusterModifyUsedDiskTemplate(instance_template, enabled_disk_templates): """Tests that disk templates that are currently in use by instances cannot be disabled on the cluster. """ # If the list of enabled disk templates contains only one template # we need to add some other templates, because the list of enabled disk # templates can only be set to a non-empty list. new_disk_templates = list(set(enabled_disk_templates) - set([instance_template])) if not new_disk_templates: new_disk_templates = list(set([constants.DT_DISKLESS, constants.DT_BLOCK]) - set([instance_template])) AssertCommand( ["gnt-cluster", "modify", "--enabled-disk-templates=%s" % ",".join(new_disk_templates), "--ipolicy-disk-templates=%s" % ",".join(new_disk_templates)], fail=True) def _TestClusterModifyUnusedDiskTemplate(instance_template): """Tests that unused disk templates can be disabled safely.""" all_disk_templates = constants.DISK_TEMPLATES if not utils.IsLvmEnabled(qa_config.GetEnabledDiskTemplates()): all_disk_templates = list(set(all_disk_templates) - set(utils.GetLvmDiskTemplates())) AssertCommand( ["gnt-cluster", "modify", "--enabled-disk-templates=%s" % ",".join(all_disk_templates), "--ipolicy-disk-templates=%s" % ",".join(all_disk_templates)], fail=False) new_disk_templates = [instance_template] AssertCommand( ["gnt-cluster", "modify", "--enabled-disk-templates=%s" % ",".join(new_disk_templates), "--ipolicy-disk-templates=%s" % ",".join(new_disk_templates)], fail=False) def TestClusterModifyBe(): """gnt-cluster modify -B""" for fail, cmd in [ # max/min mem (False, ["gnt-cluster", "modify", "-B", "maxmem=256"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]), (False, ["gnt-cluster", "modify", "-B", "minmem=256"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]), (True, ["gnt-cluster", "modify", "-B", "maxmem=a"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]), (True, ["gnt-cluster", "modify", "-B", "minmem=a"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]), (False, ["gnt-cluster", "modify", "-B", "maxmem=128,minmem=128"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 128$'"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 128$'"]), # vcpus (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]), (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]), (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]), # auto_balance (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]), (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]), (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]), (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]), ]: AssertCommand(cmd, fail=fail) # redo the original-requested BE parameters, if any bep = qa_config.get("backend-parameters", "") if bep: AssertCommand(["gnt-cluster", "modify", "-B", bep]) def _GetClusterIPolicy(): """Return the run-time values of the cluster-level instance policy. @rtype: tuple @return: (policy, specs), where: - policy is a dictionary of the policy values, instance specs excluded - specs is a dictionary containing only the specs, using the internal format (see L{constants.IPOLICY_DEFAULTS} for an example) """ info = qa_utils.GetObjectInfo(["gnt-cluster", "info"]) policy = info["Instance policy - limits for instances"] (ret_policy, ret_specs) = qa_utils.ParseIPolicy(policy) # Sanity checks assert "minmax" in ret_specs and "std" in ret_specs assert len(ret_specs["minmax"]) > 0 assert len(ret_policy) > 0 return (ret_policy, ret_specs) def TestClusterModifyIPolicy(): """gnt-cluster modify --ipolicy-*""" basecmd = ["gnt-cluster", "modify"] (old_policy, old_specs) = _GetClusterIPolicy() for par in ["vcpu-ratio", "spindle-ratio"]: curr_val = float(old_policy[par]) test_values = [ (True, 1.0), (True, 1.5), (True, 2), (False, "a"), # Restore the old value (True, curr_val), ] for (good, val) in test_values: cmd = basecmd + ["--ipolicy-%s=%s" % (par, val)] AssertCommand(cmd, fail=not good) if good: curr_val = val # Check the affected parameter (eff_policy, eff_specs) = _GetClusterIPolicy() AssertEqual(float(eff_policy[par]), curr_val) # Check everything else AssertEqual(eff_specs, old_specs) for p in eff_policy.keys(): if p == par: continue AssertEqual(eff_policy[p], old_policy[p]) # Allowing disk templates via ipolicy requires them to be # enabled on the cluster. if not (qa_config.IsTemplateSupported(constants.DT_PLAIN) and qa_config.IsTemplateSupported(constants.DT_DRBD8)): return # Disk templates are treated slightly differently par = "disk-templates" disp_str = "allowed disk templates" curr_val = old_policy[disp_str] test_values = [ (True, constants.DT_PLAIN), (True, "%s,%s" % (constants.DT_PLAIN, constants.DT_DRBD8)), (False, "thisisnotadisktemplate"), (False, ""), # Restore the old value (True, curr_val.replace(" ", "")), ] for (good, val) in test_values: cmd = basecmd + ["--ipolicy-%s=%s" % (par, val)] AssertCommand(cmd, fail=not good) if good: curr_val = val # Check the affected parameter (eff_policy, eff_specs) = _GetClusterIPolicy() AssertEqual(eff_policy[disp_str].replace(" ", ""), curr_val) # Check everything else AssertEqual(eff_specs, old_specs) for p in eff_policy.keys(): if p == disp_str: continue AssertEqual(eff_policy[p], old_policy[p]) def TestClusterSetISpecs(new_specs=None, diff_specs=None, fail=False, old_values=None): """Change instance specs. At most one of new_specs or diff_specs can be specified. @type new_specs: dict @param new_specs: new complete specs, in the same format returned by L{_GetClusterIPolicy} @type diff_specs: dict @param diff_specs: partial specs, it can be an incomplete specifications, but if min/max specs are specified, their number must match the number of the existing specs @type fail: bool @param fail: if the change is expected to fail @type old_values: tuple @param old_values: (old_policy, old_specs), as returned by L{_GetClusterIPolicy} @return: same as L{_GetClusterIPolicy} """ build_cmd = lambda opts: ["gnt-cluster", "modify"] + opts return qa_utils.TestSetISpecs( new_specs=new_specs, diff_specs=diff_specs, get_policy_fn=_GetClusterIPolicy, build_cmd_fn=build_cmd, fail=fail, old_values=old_values) def TestClusterModifyISpecs(): """gnt-cluster modify --specs-*""" params = ["memory-size", "disk-size", "disk-count", "cpu-count", "nic-count"] (cur_policy, cur_specs) = _GetClusterIPolicy() # This test assumes that there is only one min/max bound assert len(cur_specs[constants.ISPECS_MINMAX]) == 1 for par in params: test_values = [ (True, 0, 4, 12), (True, 4, 4, 12), (True, 4, 12, 12), (True, 4, 4, 4), (False, 4, 0, 12), (False, 4, 16, 12), (False, 4, 4, 0), (False, 12, 4, 4), (False, 12, 4, 0), (False, "a", 4, 12), (False, 0, "a", 12), (False, 0, 4, "a"), # This is to restore the old values (True, cur_specs[constants.ISPECS_MINMAX][0][constants.ISPECS_MIN][par], cur_specs[constants.ISPECS_STD][par], cur_specs[constants.ISPECS_MINMAX][0][constants.ISPECS_MAX][par]) ] for (good, mn, st, mx) in test_values: new_vals = { constants.ISPECS_MINMAX: [{ constants.ISPECS_MIN: {par: mn}, constants.ISPECS_MAX: {par: mx} }], constants.ISPECS_STD: {par: st} } cur_state = (cur_policy, cur_specs) # We update cur_specs, as we've copied the values to restore already (cur_policy, cur_specs) = TestClusterSetISpecs( diff_specs=new_vals, fail=not good, old_values=cur_state) # Get the ipolicy command mnode = qa_config.GetMasterNode() initcmd = GetCommandOutput(mnode.primary, "gnt-cluster show-ispecs-cmd") modcmd = ["gnt-cluster", "modify"] opts = initcmd.split() assert opts[0:2] == ["gnt-cluster", "init"] for k in range(2, len(opts) - 1): if opts[k].startswith("--ipolicy-"): assert k + 2 <= len(opts) modcmd.extend(opts[k:k + 2]) # Re-apply the ipolicy (this should be a no-op) AssertCommand(modcmd) new_initcmd = GetCommandOutput(mnode.primary, "gnt-cluster show-ispecs-cmd") AssertEqual(initcmd, new_initcmd) def TestClusterInfo(): """gnt-cluster info""" AssertCommand(["gnt-cluster", "info"]) def TestClusterRedistConf(): """gnt-cluster redist-conf""" AssertCommand(["gnt-cluster", "redist-conf"]) def TestClusterGetmaster(): """gnt-cluster getmaster""" AssertCommand(["gnt-cluster", "getmaster"]) def TestClusterVersion(): """gnt-cluster version""" AssertCommand(["gnt-cluster", "version"]) def TestClusterRenewCrypto(): """gnt-cluster renew-crypto""" master = qa_config.GetMasterNode() # Conflicting options cmd = ["gnt-cluster", "renew-crypto", "--force", "--new-cluster-certificate", "--new-confd-hmac-key"] conflicting = [ ["--new-rapi-certificate", "--rapi-certificate=/dev/null"], ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"], ] for i in conflicting: AssertCommand(cmd + i, fail=True) # Invalid RAPI certificate cmd = ["gnt-cluster", "renew-crypto", "--force", "--rapi-certificate=/dev/null"] AssertCommand(cmd, fail=True) rapi_cert_backup = qa_utils.BackupFile(master.primary, pathutils.RAPI_CERT_FILE) try: # Custom RAPI certificate fh = tempfile.NamedTemporaryFile() # Ensure certificate doesn't cause "gnt-cluster verify" to complain validity = constants.SSL_CERT_EXPIRATION_WARN * 3 utils.GenerateSelfSignedSslCert(fh.name, validity=validity) tmpcert = qa_utils.UploadFile(master.primary, fh.name) try: AssertCommand(["gnt-cluster", "renew-crypto", "--force", "--rapi-certificate=%s" % tmpcert]) finally: AssertCommand(["rm", "-f", tmpcert]) # Custom cluster domain secret cds_fh = tempfile.NamedTemporaryFile() cds_fh.write(utils.GenerateSecret()) cds_fh.write("\n") cds_fh.flush() tmpcds = qa_utils.UploadFile(master.primary, cds_fh.name) try: AssertCommand(["gnt-cluster", "renew-crypto", "--force", "--cluster-domain-secret=%s" % tmpcds]) finally: AssertCommand(["rm", "-f", tmpcds]) # Normal case AssertCommand(["gnt-cluster", "renew-crypto", "--force", "--new-cluster-certificate", "--new-confd-hmac-key", "--new-rapi-certificate", "--new-cluster-domain-secret"]) # Restore RAPI certificate AssertCommand(["gnt-cluster", "renew-crypto", "--force", "--rapi-certificate=%s" % rapi_cert_backup]) finally: AssertCommand(["rm", "-f", rapi_cert_backup]) def TestClusterBurnin(): """Burnin""" master = qa_config.GetMasterNode() options = qa_config.get("options", {}) disk_template = options.get("burnin-disk-template", constants.DT_DRBD8) parallel = options.get("burnin-in-parallel", False) check_inst = options.get("burnin-check-instances", False) do_rename = options.get("burnin-rename", "") do_reboot = options.get("burnin-reboot", True) reboot_types = options.get("reboot-types", constants.REBOOT_TYPES) # Get as many instances as we need instances = [] try: try: num = qa_config.get("options", {}).get("burnin-instances", 1) for _ in range(0, num): instances.append(qa_config.AcquireInstance()) except qa_error.OutOfInstancesError: print "Not enough instances, continuing anyway." if len(instances) < 1: raise qa_error.Error("Burnin needs at least one instance") script = qa_utils.UploadFile(master.primary, "../tools/burnin") try: disks = qa_config.GetDiskOptions() # Run burnin cmd = [script, "--os=%s" % qa_config.get("os"), "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM), "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM), "--disk-size=%s" % ",".join([d.get("size") for d in disks]), "--disk-growth=%s" % ",".join([d.get("growth") for d in disks]), "--disk-template=%s" % disk_template] if parallel: cmd.append("--parallel") cmd.append("--early-release") if check_inst: cmd.append("--http-check") if do_rename: cmd.append("--rename=%s" % do_rename) if not do_reboot: cmd.append("--no-reboot") else: cmd.append("--reboot-types=%s" % ",".join(reboot_types)) cmd += [inst.name for inst in instances] AssertCommand(cmd) finally: AssertCommand(["rm", "-f", script]) finally: for inst in instances: inst.Release() def TestClusterMasterFailover(): """gnt-cluster master-failover""" master = qa_config.GetMasterNode() failovermaster = qa_config.AcquireNode(exclude=master) cmd = ["gnt-cluster", "master-failover"] node_list_cmd = ["gnt-node", "list"] try: AssertCommand(cmd, node=failovermaster) AssertCommand(node_list_cmd, node=failovermaster) # Back to original master node AssertCommand(cmd, node=master) AssertCommand(node_list_cmd, node=master) finally: failovermaster.Release() def _NodeQueueDrainFile(node): """Returns path to queue drain file for a node. """ return qa_utils.MakeNodePath(node, pathutils.JOB_QUEUE_DRAIN_FILE) def _AssertDrainFile(node, **kwargs): """Checks for the queue drain file. """ AssertCommand(["test", "-f", _NodeQueueDrainFile(node)], node=node, **kwargs) def TestClusterMasterFailoverWithDrainedQueue(): """gnt-cluster master-failover with drained queue""" master = qa_config.GetMasterNode() failovermaster = qa_config.AcquireNode(exclude=master) # Ensure queue is not drained for node in [master, failovermaster]: _AssertDrainFile(node, fail=True) # Drain queue on failover master AssertCommand(["touch", _NodeQueueDrainFile(failovermaster)], node=failovermaster) cmd = ["gnt-cluster", "master-failover"] try: _AssertDrainFile(failovermaster) AssertCommand(cmd, node=failovermaster) _AssertDrainFile(master, fail=True) _AssertDrainFile(failovermaster, fail=True) # Back to original master node AssertCommand(cmd, node=master) finally: failovermaster.Release() # Ensure queue is not drained for node in [master, failovermaster]: _AssertDrainFile(node, fail=True) def TestClusterCopyfile(): """gnt-cluster copyfile""" master = qa_config.GetMasterNode() uniqueid = utils.NewUUID() # Create temporary file f = tempfile.NamedTemporaryFile() f.write(uniqueid) f.flush() f.seek(0) # Upload file to master node testname = qa_utils.UploadFile(master.primary, f.name) try: # Copy file to all nodes AssertCommand(["gnt-cluster", "copyfile", testname]) _CheckFileOnAllNodes(testname, uniqueid) finally: _RemoveFileFromAllNodes(testname) def TestClusterCommand(): """gnt-cluster command""" uniqueid = utils.NewUUID() rfile = "/tmp/gnt%s" % utils.NewUUID() rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid]) cmd = utils.ShellQuoteArgs(["gnt-cluster", "command", "%s >%s" % (rcmd, rfile)]) try: AssertCommand(cmd) _CheckFileOnAllNodes(rfile, uniqueid) finally: _RemoveFileFromAllNodes(rfile) def TestClusterDestroy(): """gnt-cluster destroy""" AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"]) def TestClusterRepairDiskSizes(): """gnt-cluster repair-disk-sizes""" AssertCommand(["gnt-cluster", "repair-disk-sizes"]) def TestSetExclStorCluster(newvalue): """Set the exclusive_storage node parameter at the cluster level. @type newvalue: bool @param newvalue: New value of exclusive_storage @rtype: bool @return: The old value of exclusive_storage """ es_path = ["Default node parameters", "exclusive_storage"] oldvalue = _GetClusterField(es_path) AssertCommand(["gnt-cluster", "modify", "--node-parameters", "exclusive_storage=%s" % newvalue]) effvalue = _GetClusterField(es_path) if effvalue != newvalue: raise qa_error.Error("exclusive_storage has the wrong value: %s instead" " of %s" % (effvalue, newvalue)) qa_config.SetExclusiveStorage(newvalue) return oldvalue def TestExclStorSharedPv(node): """cluster-verify reports LVs that share the same PV with exclusive_storage. """ vgname = qa_config.get("vg-name", constants.DEFAULT_VG) lvname1 = _QA_LV_PREFIX + "vol1" lvname2 = _QA_LV_PREFIX + "vol2" node_name = node.primary AssertCommand(["lvcreate", "-L1G", "-n", lvname1, vgname], node=node_name) AssertClusterVerify(fail=True, errors=[constants.CV_ENODEORPHANLV]) AssertCommand(["lvcreate", "-L1G", "-n", lvname2, vgname], node=node_name) AssertClusterVerify(fail=True, errors=[constants.CV_ENODELVM, constants.CV_ENODEORPHANLV]) AssertCommand(["lvremove", "-f", "/".join([vgname, lvname1])], node=node_name) AssertCommand(["lvremove", "-f", "/".join([vgname, lvname2])], node=node_name) AssertClusterVerify() ganeti-2.9.3/qa/qa_instance_utils.py0000644000000000000000000001532712267470014017465 0ustar00rootroot00000000000000# # # Copyright (C) 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """QA utility functions for managing instances """ import operator from ganeti import utils from ganeti import constants from ganeti import pathutils import qa_config import qa_error import qa_utils from qa_utils import AssertIn, AssertCommand def RemoveInstance(instance): AssertCommand(["gnt-instance", "remove", "-f", instance.name]) def GetGenericAddParameters(inst, disk_template, force_mac=None): params = ["-B"] params.append("%s=%s,%s=%s" % (constants.BE_MINMEM, qa_config.get(constants.BE_MINMEM), constants.BE_MAXMEM, qa_config.get(constants.BE_MAXMEM))) if disk_template != constants.DT_DISKLESS: for idx, disk in enumerate(qa_config.GetDiskOptions()): size = disk.get("size") name = disk.get("name") diskparams = "%s:size=%s" % (idx, size) if name: diskparams += ",name=%s" % name if qa_config.AreSpindlesSupported(): spindles = disk.get("spindles") if spindles is None: raise qa_error.Error("'spindles' is a required parameter for disks" " when you enable exclusive storage tests") diskparams += ",spindles=%s" % spindles params.extend(["--disk", diskparams]) # Set static MAC address if configured if force_mac: nic0_mac = force_mac else: nic0_mac = inst.GetNicMacAddr(0, None) if nic0_mac: params.extend(["--net", "0:mac=%s" % nic0_mac]) return params def _CreateInstanceByDiskTemplateRaw(nodes_spec, disk_template, fail=False): """Creates an instance with the given disk template on the given nodes(s). Note that this function does not check if enough nodes are given for the respective disk template. @type nodes_spec: string @param nodes_spec: string specification of one node (by node name) or several nodes according to the requirements of the disk template @type disk_template: string @param disk_template: the disk template to be used by the instance @return: the created instance """ instance = qa_config.AcquireInstance() try: cmd = (["gnt-instance", "add", "--os-type=%s" % qa_config.get("os"), "--disk-template=%s" % disk_template, "--node=%s" % nodes_spec] + GetGenericAddParameters(instance, disk_template)) cmd.append(instance.name) AssertCommand(cmd, fail=fail) if not fail: CheckSsconfInstanceList(instance.name) instance.SetDiskTemplate(disk_template) return instance except: instance.Release() raise # Handle the case where creation is expected to fail assert fail instance.Release() return None def CreateInstanceDrbd8(nodes, fail=False): """Creates an instance using disk template 'drbd' on the given nodes. @type nodes: list of nodes @param nodes: nodes to be used by the instance @return: the created instance """ assert len(nodes) > 1 return _CreateInstanceByDiskTemplateRaw( ":".join(map(operator.attrgetter("primary"), nodes)), constants.DT_DRBD8, fail=fail) def CreateInstanceByDiskTemplateOneNode(nodes, disk_template, fail=False): """Creates an instance using the given disk template for disk templates for which one given node is sufficient. These templates are for example: plain, diskless, file, sharedfile, blockdev, rados. @type nodes: list of nodes @param nodes: a list of nodes, whose first element is used to create the instance @type disk_template: string @param disk_template: the disk template to be used by the instance @return: the created instance """ assert len(nodes) > 0 return _CreateInstanceByDiskTemplateRaw(nodes[0].primary, disk_template, fail=fail) def CreateInstanceByDiskTemplate(nodes, disk_template, fail=False): """Given a disk template, this function creates an instance using the template. It uses the required number of nodes depending on the disk template. This function is intended to be used by tests that don't care about the specifics of the instance other than that it uses the given disk template. Note: If you use this function, make sure to call 'TestInstanceRemove' at the end of your tests to avoid orphaned instances hanging around and interfering with the following tests. @type nodes: list of nodes @param nodes: the list of the nodes on which the instance will be placed; it needs to have sufficiently many elements for the given disk template @type disk_template: string @param disk_template: the disk template to be used by the instance @return: the created instance """ if disk_template == constants.DT_DRBD8: return CreateInstanceDrbd8(nodes, fail=fail) elif disk_template in [constants.DT_DISKLESS, constants.DT_PLAIN, constants.DT_FILE]: return CreateInstanceByDiskTemplateOneNode(nodes, disk_template, fail=fail) else: # FIXME: This assumes that for all other disk templates, we only need one # node and no disk template specific parameters. This else-branch is # currently only used in cases where we expect failure. Extend it when # QA needs for these templates change. return CreateInstanceByDiskTemplateOneNode(nodes, disk_template, fail=fail) def _ReadSsconfInstanceList(): """Reads ssconf_instance_list from the master node. """ master = qa_config.GetMasterNode() ssconf_path = utils.PathJoin(pathutils.DATA_DIR, "ssconf_%s" % constants.SS_INSTANCE_LIST) cmd = ["cat", qa_utils.MakeNodePath(master, ssconf_path)] return qa_utils.GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)).splitlines() def CheckSsconfInstanceList(instance): """Checks if a certain instance is in the ssconf instance list. @type instance: string @param instance: Instance name """ AssertIn(qa_utils.ResolveInstanceName(instance), _ReadSsconfInstanceList()) ganeti-2.9.3/qa/__init__.py0000644000000000000000000000145212230001635015476 0ustar00rootroot00000000000000# # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # empty file for package definition """Ganeti QA scripts""" ganeti-2.9.3/qa/qa_job.py0000644000000000000000000001052412271422343015202 0ustar00rootroot00000000000000# # # Copyright (C) 2012, 2014 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Job-related QA tests. """ from ganeti.utils import retry from ganeti import constants from ganeti import query import functools import re import qa_config import qa_error import qa_utils from qa_utils import AssertCommand, GetCommandOutput def TestJobList(): """gnt-job list""" qa_utils.GenericQueryTest("gnt-job", query.JOB_FIELDS.keys(), namefield="id", test_unknown=False) def TestJobListFields(): """gnt-node list-fields""" qa_utils.GenericQueryFieldsTest("gnt-job", query.JOB_FIELDS.keys()) def _GetJobStatuses(): """ Invokes gnt-job list and extracts an id to status dictionary. @rtype: dict of string to string @return: A dictionary mapping job ids to matching statuses """ master = qa_config.GetMasterNode() list_output = GetCommandOutput( master.primary, "gnt-job list --no-headers --output=id,status" ) return dict(map(lambda s: s.split(), list_output.splitlines())) def _GetJobStatus(job_id): """ Retrieves the status of a job. @type job_id: string @param job_id: The job id, represented as a string. @rtype: string or None @return: The job status, or None if not present. """ return _GetJobStatuses().get(job_id, None) def _RetryingFetchJobStatus(retry_status, job_id): """ Used with C{retry.Retry}, waits for a status other than the one given. @type retry_status: string @param retry_status: The old job status, expected to change. @type job_id: string @param job_id: The job id, represented as a string. @rtype: string or None @return: The new job status, or None if none could be retrieved. """ status = _GetJobStatus(job_id) if status == retry_status: raise retry.RetryAgain() return status def TestJobCancellation(): """gnt-job cancel""" # The delay used for the first command should be large enough for the next # command and the cancellation command to complete before the first job is # done. The second delay should be small enough that not too much time is # spend waiting in the case of a failed cancel and a running command. FIRST_COMMAND_DELAY = 10.0 AssertCommand(["gnt-debug", "delay", "--submit", str(FIRST_COMMAND_DELAY)]) SECOND_COMMAND_DELAY = 1.0 master = qa_config.GetMasterNode() # Forcing tty usage does not work on buildbot, so force all output of this # command to be redirected to stdout job_id_output = GetCommandOutput( master.primary, "gnt-debug delay --submit %s 2>&1" % SECOND_COMMAND_DELAY ) possible_job_ids = re.findall("JobID: ([0-9]+)", job_id_output) if len(possible_job_ids) != 1: raise qa_error.Error("Cannot parse gnt-debug delay output to find job id") job_id = possible_job_ids[0] AssertCommand(["gnt-job", "cancel", job_id]) # Now wait until the second job finishes, and expect the watch to fail due to # job cancellation AssertCommand(["gnt-job", "watch", job_id], fail=True) # Then check for job cancellation job_status = _GetJobStatus(job_id) if job_status != constants.JOB_STATUS_CANCELED: # Try and see if the job is being cancelled, and wait until the status # changes or we hit a timeout if job_status == constants.JOB_STATUS_CANCELING: retry_fn = functools.partial(_RetryingFetchJobStatus, constants.JOB_STATUS_CANCELING, job_id) try: job_status = retry.Retry(retry_fn, 2.0, 2 * FIRST_COMMAND_DELAY) except retry.RetryTimeout: # The job status remains the same pass if job_status != constants.JOB_STATUS_CANCELED: raise qa_error.Error("Job was not successfully cancelled, status " "found: %s" % job_status) ganeti-2.9.3/qa/qa_instance.py0000644000000000000000000011624012271422343016236 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Instance related QA tests. """ import os import re from ganeti import utils from ganeti import constants from ganeti import query from ganeti import pathutils import qa_config import qa_utils import qa_error from qa_utils import AssertCommand, AssertEqual from qa_utils import InstanceCheck, INST_DOWN, INST_UP, FIRST_ARG, RETURN_VALUE from qa_instance_utils import CheckSsconfInstanceList, \ CreateInstanceDrbd8, \ CreateInstanceByDiskTemplate, \ CreateInstanceByDiskTemplateOneNode, \ GetGenericAddParameters def _GetDiskStatePath(disk): return "/sys/block/%s/device/state" % disk def GetInstanceInfo(instance): """Return information about the actual state of an instance. @type instance: string @param instance: the instance name @return: a dictionary with the following keys: - "nodes": instance nodes, a list of strings - "volumes": instance volume IDs, a list of strings - "drbd-minors": DRBD minors used by the instance, a dictionary where keys are nodes, and values are lists of integers (or an empty dictionary for non-DRBD instances) - "disk-template": instance disk template - "storage-type": storage type associated with the instance disk template """ node_elem = r"([^,()]+)(?:\s+\([^)]+\))?" # re_nodelist matches a list of nodes returned by gnt-instance info, e.g.: # node1.fqdn # node2.fqdn,node3.fqdn # node4.fqdn (group mygroup, group UUID 01234567-abcd-0123-4567-0123456789ab) # FIXME This works with no more than 2 secondaries re_nodelist = re.compile(node_elem + "(?:," + node_elem + ")?$") info = qa_utils.GetObjectInfo(["gnt-instance", "info", instance])[0] nodes = [] for nodeinfo in info["Nodes"]: if "primary" in nodeinfo: nodes.append(nodeinfo["primary"]) elif "secondaries" in nodeinfo: nodestr = nodeinfo["secondaries"] if nodestr: m = re_nodelist.match(nodestr) if m: nodes.extend(filter(None, m.groups())) else: nodes.append(nodestr) disk_template = info["Disk template"] if not disk_template: raise qa_error.Error("Can't get instance disk template") storage_type = constants.MAP_DISK_TEMPLATE_STORAGE_TYPE[disk_template] re_drbdnode = re.compile(r"^([^\s,]+),\s+minor=([0-9]+)$") vols = [] drbd_min = {} for (count, diskinfo) in enumerate(info["Disks"]): (dtype, _) = diskinfo["disk/%s" % count].split(",", 1) if dtype == constants.DT_DRBD8: for child in diskinfo["child devices"]: vols.append(child["logical_id"]) for key in ["nodeA", "nodeB"]: m = re_drbdnode.match(diskinfo[key]) if not m: raise qa_error.Error("Cannot parse DRBD info: %s" % diskinfo[key]) node = m.group(1) minor = int(m.group(2)) minorlist = drbd_min.setdefault(node, []) minorlist.append(minor) elif dtype == constants.DT_PLAIN: vols.append(diskinfo["logical_id"]) assert nodes assert len(nodes) < 2 or vols return { "nodes": nodes, "volumes": vols, "drbd-minors": drbd_min, "disk-template": disk_template, "storage-type": storage_type, } def _DestroyInstanceDisks(instance): """Remove all the backend disks of an instance. This is used to simulate HW errors (dead nodes, broken disks...); the configuration of the instance is not affected. @type instance: dictionary @param instance: the instance """ info = GetInstanceInfo(instance.name) # FIXME: destruction/removal should be part of the disk class if info["storage-type"] == constants.ST_LVM_VG: vols = info["volumes"] for node in info["nodes"]: AssertCommand(["lvremove", "-f"] + vols, node=node) elif info["storage-type"] == constants.ST_FILE: # Note that this works for both file and sharedfile, and this is intended. storage_dir = qa_config.get("file-storage-dir", pathutils.DEFAULT_FILE_STORAGE_DIR) idir = os.path.join(storage_dir, instance.name) for node in info["nodes"]: AssertCommand(["rm", "-rf", idir], node=node) elif info["storage-type"] == constants.ST_DISKLESS: pass def _GetInstanceField(instance, field): """Get the value of a field of an instance. @type instance: string @param instance: Instance name @type field: string @param field: Name of the field @rtype: string """ master = qa_config.GetMasterNode() infocmd = utils.ShellQuoteArgs(["gnt-instance", "list", "--no-headers", "--units", "m", "-o", field, instance]) return qa_utils.GetCommandOutput(master.primary, infocmd).strip() def _GetBoolInstanceField(instance, field): """Get the Boolean value of a field of an instance. @type instance: string @param instance: Instance name @type field: string @param field: Name of the field @rtype: bool """ info_out = _GetInstanceField(instance, field) if info_out == "Y": return True elif info_out == "N": return False else: raise qa_error.Error("Field %s of instance %s has a non-Boolean value:" " %s" % (field, instance, info_out)) def _GetNumInstanceField(instance, field): """Get a numeric value of a field of an instance. @type instance: string @param instance: Instance name @type field: string @param field: Name of the field @rtype: int or float """ info_out = _GetInstanceField(instance, field) try: ret = int(info_out) except ValueError: try: ret = float(info_out) except ValueError: raise qa_error.Error("Field %s of instance %s has a non-numeric value:" " %s" % (field, instance, info_out)) return ret def GetInstanceSpec(instance, spec): """Return the current spec for the given parameter. @type instance: string @param instance: Instance name @type spec: string @param spec: one of the supported parameters: "memory-size", "cpu-count", "disk-count", "disk-size", "nic-count" @rtype: tuple @return: (minspec, maxspec); minspec and maxspec can be different only for memory and disk size """ specmap = { "memory-size": ["be/minmem", "be/maxmem"], "cpu-count": ["vcpus"], "disk-count": ["disk.count"], "disk-size": ["disk.size/ "], "nic-count": ["nic.count"], } # For disks, first we need the number of disks if spec == "disk-size": (numdisk, _) = GetInstanceSpec(instance, "disk-count") fields = ["disk.size/%s" % k for k in range(0, numdisk)] else: assert spec in specmap, "%s not in %s" % (spec, specmap) fields = specmap[spec] values = [_GetNumInstanceField(instance, f) for f in fields] return (min(values), max(values)) def IsFailoverSupported(instance): return instance.disk_template in constants.DTS_MIRRORED def IsMigrationSupported(instance): return instance.disk_template in constants.DTS_MIRRORED def IsDiskReplacingSupported(instance): return instance.disk_template == constants.DT_DRBD8 def IsDiskSupported(instance): return instance.disk_template != constants.DT_DISKLESS def TestInstanceAddWithPlainDisk(nodes, fail=False): """gnt-instance add -t plain""" if constants.DT_PLAIN in qa_config.GetEnabledDiskTemplates(): instance = CreateInstanceByDiskTemplateOneNode(nodes, constants.DT_PLAIN, fail=fail) if not fail: qa_utils.RunInstanceCheck(instance, True) return instance @InstanceCheck(None, INST_UP, RETURN_VALUE) def TestInstanceAddWithDrbdDisk(nodes): """gnt-instance add -t drbd""" if constants.DT_DRBD8 in qa_config.GetEnabledDiskTemplates(): return CreateInstanceDrbd8(nodes) @InstanceCheck(None, INST_UP, RETURN_VALUE) def TestInstanceAddFile(nodes): """gnt-instance add -t file""" assert len(nodes) == 1 if constants.DT_FILE in qa_config.GetEnabledDiskTemplates(): return CreateInstanceByDiskTemplateOneNode(nodes, constants.DT_FILE) @InstanceCheck(None, INST_UP, RETURN_VALUE) def TestInstanceAddSharedFile(nodes): """gnt-instance add -t sharedfile""" assert len(nodes) == 1 if constants.DT_SHARED_FILE in qa_config.GetEnabledDiskTemplates(): return CreateInstanceByDiskTemplateOneNode(nodes, constants.DT_SHARED_FILE) @InstanceCheck(None, INST_UP, RETURN_VALUE) def TestInstanceAddDiskless(nodes): """gnt-instance add -t diskless""" assert len(nodes) == 1 if constants.DT_DISKLESS in qa_config.GetEnabledDiskTemplates(): return CreateInstanceByDiskTemplateOneNode(nodes, constants.DT_DISKLESS) @InstanceCheck(None, INST_DOWN, FIRST_ARG) def TestInstanceRemove(instance): """gnt-instance remove""" AssertCommand(["gnt-instance", "remove", "-f", instance.name]) @InstanceCheck(INST_DOWN, INST_UP, FIRST_ARG) def TestInstanceStartup(instance): """gnt-instance startup""" AssertCommand(["gnt-instance", "startup", instance.name]) @InstanceCheck(INST_UP, INST_DOWN, FIRST_ARG) def TestInstanceShutdown(instance): """gnt-instance shutdown""" AssertCommand(["gnt-instance", "shutdown", instance.name]) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestInstanceReboot(instance): """gnt-instance reboot""" options = qa_config.get("options", {}) reboot_types = options.get("reboot-types", constants.REBOOT_TYPES) name = instance.name for rtype in reboot_types: AssertCommand(["gnt-instance", "reboot", "--type=%s" % rtype, name]) AssertCommand(["gnt-instance", "shutdown", name]) qa_utils.RunInstanceCheck(instance, False) AssertCommand(["gnt-instance", "reboot", name]) master = qa_config.GetMasterNode() cmd = ["gnt-instance", "list", "--no-headers", "-o", "status", name] result_output = qa_utils.GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)) AssertEqual(result_output.strip(), constants.INSTST_RUNNING) @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG) def TestInstanceReinstall(instance): """gnt-instance reinstall""" if instance.disk_template == constants.DT_DISKLESS: print qa_utils.FormatInfo("Test not supported for diskless instances") return AssertCommand(["gnt-instance", "reinstall", "-f", instance.name]) # Test with non-existant OS definition AssertCommand(["gnt-instance", "reinstall", "-f", "--os-type=NonExistantOsForQa", instance.name], fail=True) @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG) def TestInstanceRenameAndBack(rename_source, rename_target): """gnt-instance rename This must leave the instance with the original name, not the target name. """ CheckSsconfInstanceList(rename_source) # first do a rename to a different actual name, expecting it to fail qa_utils.AddToEtcHosts(["meeeeh-not-exists", rename_target]) try: AssertCommand(["gnt-instance", "rename", rename_source, rename_target], fail=True) CheckSsconfInstanceList(rename_source) finally: qa_utils.RemoveFromEtcHosts(["meeeeh-not-exists", rename_target]) info = GetInstanceInfo(rename_source) # Check instance volume tags correctly updated. Note that this check is lvm # specific, so we skip it for non-lvm-based instances. # FIXME: This will need updating when instances will be able to have # different disks living on storage pools with etherogeneous storage types. # FIXME: This check should be put inside the disk/storage class themselves, # rather than explicitly called here. if info["storage-type"] == constants.ST_LVM_VG: # In the lvm world we can check for tags on the logical volume tags_cmd = ("lvs -o tags --noheadings %s | grep " % (" ".join(info["volumes"]), )) else: # Other storage types don't have tags, so we use an always failing command, # to make sure it never gets executed tags_cmd = "false" # and now rename instance to rename_target... AssertCommand(["gnt-instance", "rename", rename_source, rename_target]) CheckSsconfInstanceList(rename_target) qa_utils.RunInstanceCheck(rename_source, False) qa_utils.RunInstanceCheck(rename_target, False) # NOTE: tags might not be the exactly as the instance name, due to # charset restrictions; hence the test might be flaky if (rename_source != rename_target and info["storage-type"] == constants.ST_LVM_VG): for node in info["nodes"]: AssertCommand(tags_cmd + rename_source, node=node, fail=True) AssertCommand(tags_cmd + rename_target, node=node, fail=False) # and back AssertCommand(["gnt-instance", "rename", rename_target, rename_source]) CheckSsconfInstanceList(rename_source) qa_utils.RunInstanceCheck(rename_target, False) if (rename_source != rename_target and info["storage-type"] == constants.ST_LVM_VG): for node in info["nodes"]: AssertCommand(tags_cmd + rename_source, node=node, fail=False) AssertCommand(tags_cmd + rename_target, node=node, fail=True) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestInstanceFailover(instance): """gnt-instance failover""" if not IsFailoverSupported(instance): print qa_utils.FormatInfo("Instance doesn't support failover, skipping" " test") return cmd = ["gnt-instance", "failover", "--force", instance.name] # failover ... AssertCommand(cmd) qa_utils.RunInstanceCheck(instance, True) # ... and back AssertCommand(cmd) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestInstanceMigrate(instance, toggle_always_failover=True): """gnt-instance migrate""" if not IsMigrationSupported(instance): print qa_utils.FormatInfo("Instance doesn't support migration, skipping" " test") return cmd = ["gnt-instance", "migrate", "--force", instance.name] af_par = constants.BE_ALWAYS_FAILOVER af_field = "be/" + constants.BE_ALWAYS_FAILOVER af_init_val = _GetBoolInstanceField(instance.name, af_field) # migrate ... AssertCommand(cmd) # TODO: Verify the choice between failover and migration qa_utils.RunInstanceCheck(instance, True) # ... and back (possibly with always_failover toggled) if toggle_always_failover: AssertCommand(["gnt-instance", "modify", "-B", ("%s=%s" % (af_par, not af_init_val)), instance.name]) AssertCommand(cmd) # TODO: Verify the choice between failover and migration qa_utils.RunInstanceCheck(instance, True) if toggle_always_failover: AssertCommand(["gnt-instance", "modify", "-B", ("%s=%s" % (af_par, af_init_val)), instance.name]) # TODO: Split into multiple tests AssertCommand(["gnt-instance", "shutdown", instance.name]) qa_utils.RunInstanceCheck(instance, False) AssertCommand(cmd, fail=True) AssertCommand(["gnt-instance", "migrate", "--force", "--allow-failover", instance.name]) AssertCommand(["gnt-instance", "start", instance.name]) AssertCommand(cmd) # @InstanceCheck enforces the check that the instance is running qa_utils.RunInstanceCheck(instance, True) AssertCommand(["gnt-instance", "modify", "-B", ("%s=%s" % (constants.BE_ALWAYS_FAILOVER, constants.VALUE_TRUE)), instance.name]) AssertCommand(cmd) qa_utils.RunInstanceCheck(instance, True) # TODO: Verify that a failover has been done instead of a migration # TODO: Verify whether the default value is restored here (not hardcoded) AssertCommand(["gnt-instance", "modify", "-B", ("%s=%s" % (constants.BE_ALWAYS_FAILOVER, constants.VALUE_FALSE)), instance.name]) AssertCommand(cmd) qa_utils.RunInstanceCheck(instance, True) def TestInstanceInfo(instance): """gnt-instance info""" AssertCommand(["gnt-instance", "info", instance.name]) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestInstanceModify(instance): """gnt-instance modify""" default_hv = qa_config.GetDefaultHypervisor() # Assume /sbin/init exists on all systems test_kernel = "/sbin/init" test_initrd = test_kernel orig_maxmem = qa_config.get(constants.BE_MAXMEM) orig_minmem = qa_config.get(constants.BE_MINMEM) #orig_bridge = qa_config.get("bridge", "xen-br0") args = [ ["-B", "%s=128" % constants.BE_MINMEM], ["-B", "%s=128" % constants.BE_MAXMEM], ["-B", "%s=%s,%s=%s" % (constants.BE_MINMEM, orig_minmem, constants.BE_MAXMEM, orig_maxmem)], ["-B", "%s=2" % constants.BE_VCPUS], ["-B", "%s=1" % constants.BE_VCPUS], ["-B", "%s=%s" % (constants.BE_VCPUS, constants.VALUE_DEFAULT)], ["-B", "%s=%s" % (constants.BE_ALWAYS_FAILOVER, constants.VALUE_TRUE)], ["-B", "%s=%s" % (constants.BE_ALWAYS_FAILOVER, constants.VALUE_DEFAULT)], ["-H", "%s=%s" % (constants.HV_KERNEL_PATH, test_kernel)], ["-H", "%s=%s" % (constants.HV_KERNEL_PATH, constants.VALUE_DEFAULT)], # TODO: bridge tests #["--bridge", "xen-br1"], #["--bridge", orig_bridge], ] if default_hv == constants.HT_XEN_PVM: args.extend([ ["-H", "%s=%s" % (constants.HV_INITRD_PATH, test_initrd)], ["-H", "no_%s" % (constants.HV_INITRD_PATH, )], ["-H", "%s=%s" % (constants.HV_INITRD_PATH, constants.VALUE_DEFAULT)], ]) elif default_hv == constants.HT_XEN_HVM: args.extend([ ["-H", "%s=acn" % constants.HV_BOOT_ORDER], ["-H", "%s=%s" % (constants.HV_BOOT_ORDER, constants.VALUE_DEFAULT)], ]) for alist in args: AssertCommand(["gnt-instance", "modify"] + alist + [instance.name]) # check no-modify AssertCommand(["gnt-instance", "modify", instance.name], fail=True) # Marking offline while instance is running must fail... AssertCommand(["gnt-instance", "modify", "--offline", instance.name], fail=True) # ...while making it online is ok, and should work AssertCommand(["gnt-instance", "modify", "--online", instance.name]) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestInstanceModifyPrimaryAndBack(instance, currentnode, othernode): """gnt-instance modify --new-primary This will leave the instance on its original primary node, not other node. """ if instance.disk_template != constants.DT_FILE: print qa_utils.FormatInfo("Test only supported for the file disk template") return cluster_name = qa_config.get("name") name = instance.name current = currentnode.primary other = othernode.primary filestorage = qa_config.get("file-storage-dir", pathutils.DEFAULT_FILE_STORAGE_DIR) disk = os.path.join(filestorage, name) AssertCommand(["gnt-instance", "modify", "--new-primary=%s" % other, name], fail=True) AssertCommand(["gnt-instance", "shutdown", name]) AssertCommand(["scp", "-oGlobalKnownHostsFile=%s" % pathutils.SSH_KNOWN_HOSTS_FILE, "-oCheckHostIp=no", "-oStrictHostKeyChecking=yes", "-oHashKnownHosts=no", "-oHostKeyAlias=%s" % cluster_name, "-r", disk, "%s:%s" % (other, filestorage)], node=current) AssertCommand(["gnt-instance", "modify", "--new-primary=%s" % other, name]) AssertCommand(["gnt-instance", "startup", name]) # and back AssertCommand(["gnt-instance", "shutdown", name]) AssertCommand(["rm", "-rf", disk], node=other) AssertCommand(["gnt-instance", "modify", "--new-primary=%s" % current, name]) AssertCommand(["gnt-instance", "startup", name]) @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG) def TestInstanceStoppedModify(instance): """gnt-instance modify (stopped instance)""" name = instance.name # Instance was not marked offline; try marking it online once more AssertCommand(["gnt-instance", "modify", "--online", name]) # Mark instance as offline AssertCommand(["gnt-instance", "modify", "--offline", name]) # When the instance is offline shutdown should only work with --force, # while start should never work AssertCommand(["gnt-instance", "shutdown", name], fail=True) AssertCommand(["gnt-instance", "shutdown", "--force", name]) AssertCommand(["gnt-instance", "start", name], fail=True) AssertCommand(["gnt-instance", "start", "--force", name], fail=True) # Also do offline to offline AssertCommand(["gnt-instance", "modify", "--offline", name]) # And online again AssertCommand(["gnt-instance", "modify", "--online", name]) @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG) def TestInstanceConvertDiskToPlain(instance, inodes): """gnt-instance modify -t""" name = instance.name template = instance.disk_template if template != constants.DT_DRBD8: print qa_utils.FormatInfo("Unsupported template %s, skipping conversion" " test" % template) return assert len(inodes) == 2 AssertCommand(["gnt-instance", "modify", "-t", constants.DT_PLAIN, name]) AssertCommand(["gnt-instance", "modify", "-t", constants.DT_DRBD8, "-n", inodes[1].primary, name]) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestInstanceModifyDisks(instance): """gnt-instance modify --disk""" if not IsDiskSupported(instance): print qa_utils.FormatInfo("Instance doesn't support disks, skipping test") return disk_conf = qa_config.GetDiskOptions()[-1] size = disk_conf.get("size") name = instance.name build_cmd = lambda arg: ["gnt-instance", "modify", "--disk", arg, name] if qa_config.AreSpindlesSupported(): spindles = disk_conf.get("spindles") spindles_supported = True else: # Any number is good for spindles in this case spindles = 1 spindles_supported = False AssertCommand(build_cmd("add:size=%s,spindles=%s" % (size, spindles)), fail=not spindles_supported) AssertCommand(build_cmd("add:size=%s" % size), fail=spindles_supported) # Exactly one of the above commands has succeded, so we need one remove AssertCommand(build_cmd("remove")) @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG) def TestInstanceGrowDisk(instance): """gnt-instance grow-disk""" if instance.disk_template == constants.DT_DISKLESS: print qa_utils.FormatInfo("Test not supported for diskless instances") return name = instance.name disks = qa_config.GetDiskOptions() all_size = [d.get("size") for d in disks] all_grow = [d.get("growth") for d in disks] if not all_grow: # missing disk sizes but instance grow disk has been enabled, # let's set fixed/nomimal growth all_grow = ["128M" for _ in all_size] for idx, (size, grow) in enumerate(zip(all_size, all_grow)): # succeed in grow by amount AssertCommand(["gnt-instance", "grow-disk", name, str(idx), grow]) # fail in grow to the old size AssertCommand(["gnt-instance", "grow-disk", "--absolute", name, str(idx), size], fail=True) # succeed to grow to old size + 2 * growth int_size = utils.ParseUnit(size) int_grow = utils.ParseUnit(grow) AssertCommand(["gnt-instance", "grow-disk", "--absolute", name, str(idx), str(int_size + 2 * int_grow)]) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestInstanceDeviceNames(instance): if instance.disk_template == constants.DT_DISKLESS: print qa_utils.FormatInfo("Test not supported for diskless instances") return name = instance.name for dev_type in ["disk", "net"]: if dev_type == "disk": options = ",size=512M" if qa_config.AreSpindlesSupported(): options += ",spindles=1" else: options = "" # succeed in adding a device named 'test_device' AssertCommand(["gnt-instance", "modify", "--%s=-1:add,name=test_device%s" % (dev_type, options), name]) # succeed in removing the 'test_device' AssertCommand(["gnt-instance", "modify", "--%s=test_device:remove" % dev_type, name]) # fail to add two devices with the same name AssertCommand(["gnt-instance", "modify", "--%s=-1:add,name=test_device%s" % (dev_type, options), "--%s=-1:add,name=test_device%s" % (dev_type, options), name], fail=True) # fail to add a device with invalid name AssertCommand(["gnt-instance", "modify", "--%s=-1:add,name=2%s" % (dev_type, options), name], fail=True) # Rename disks disks = qa_config.GetDiskOptions() disk_names = [d.get("name") for d in disks] for idx, disk_name in enumerate(disk_names): # Refer to disk by idx AssertCommand(["gnt-instance", "modify", "--disk=%s:modify,name=renamed" % idx, name]) # Refer to by name and rename to original name AssertCommand(["gnt-instance", "modify", "--disk=renamed:modify,name=%s" % disk_name, name]) if len(disks) >= 2: # fail in renaming to disks to the same name AssertCommand(["gnt-instance", "modify", "--disk=0:modify,name=same_name", "--disk=1:modify,name=same_name", name], fail=True) def TestInstanceList(): """gnt-instance list""" qa_utils.GenericQueryTest("gnt-instance", query.INSTANCE_FIELDS.keys()) def TestInstanceListFields(): """gnt-instance list-fields""" qa_utils.GenericQueryFieldsTest("gnt-instance", query.INSTANCE_FIELDS.keys()) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestInstanceConsole(instance): """gnt-instance console""" AssertCommand(["gnt-instance", "console", "--show-cmd", instance.name]) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestReplaceDisks(instance, curr_nodes, other_nodes): """gnt-instance replace-disks""" def buildcmd(args): cmd = ["gnt-instance", "replace-disks"] cmd.extend(args) cmd.append(instance.name) return cmd if not IsDiskReplacingSupported(instance): print qa_utils.FormatInfo("Instance doesn't support disk replacing," " skipping test") return # Currently all supported templates have one primary and one secondary node assert len(curr_nodes) == 2 snode = curr_nodes[1] assert len(other_nodes) == 1 othernode = other_nodes[0] options = qa_config.get("options", {}) use_ialloc = options.get("use-iallocators", True) for data in [ ["-p"], ["-s"], # A placeholder; the actual command choice depends on use_ialloc None, # Restore the original secondary ["--new-secondary=%s" % snode.primary], ]: if data is None: if use_ialloc: data = ["-I", constants.DEFAULT_IALLOCATOR_SHORTCUT] else: data = ["--new-secondary=%s" % othernode.primary] AssertCommand(buildcmd(data)) AssertCommand(buildcmd(["-a"])) AssertCommand(["gnt-instance", "stop", instance.name]) AssertCommand(buildcmd(["-a"]), fail=True) AssertCommand(["gnt-instance", "activate-disks", instance.name]) AssertCommand(["gnt-instance", "activate-disks", "--wait-for-sync", instance.name]) AssertCommand(buildcmd(["-a"])) AssertCommand(["gnt-instance", "start", instance.name]) def _AssertRecreateDisks(cmdargs, instance, fail=False, check=True, destroy=True): """Execute gnt-instance recreate-disks and check the result @param cmdargs: Arguments (instance name excluded) @param instance: Instance to operate on @param fail: True if the command is expected to fail @param check: If True and fail is False, check that the disks work @prama destroy: If True, destroy the old disks first """ if destroy: _DestroyInstanceDisks(instance) AssertCommand((["gnt-instance", "recreate-disks"] + cmdargs + [instance.name]), fail) if not fail and check: # Quick check that the disks are there AssertCommand(["gnt-instance", "activate-disks", instance.name]) AssertCommand(["gnt-instance", "activate-disks", "--wait-for-sync", instance.name]) AssertCommand(["gnt-instance", "deactivate-disks", instance.name]) def _BuildRecreateDisksOpts(en_disks, with_spindles, with_growth, spindles_supported): if with_spindles: if spindles_supported: if with_growth: build_spindles_opt = (lambda disk: ",spindles=%s" % (disk["spindles"] + disk["spindles-growth"])) else: build_spindles_opt = (lambda disk: ",spindles=%s" % disk["spindles"]) else: build_spindles_opt = (lambda _: ",spindles=1") else: build_spindles_opt = (lambda _: "") if with_growth: build_size_opt = (lambda disk: "size=%s" % (utils.ParseUnit(disk["size"]) + utils.ParseUnit(disk["growth"]))) else: build_size_opt = (lambda disk: "size=%s" % disk["size"]) build_disk_opt = (lambda (idx, disk): "--disk=%s:%s%s" % (idx, build_size_opt(disk), build_spindles_opt(disk))) return map(build_disk_opt, en_disks) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestRecreateDisks(instance, inodes, othernodes): """gnt-instance recreate-disks @param instance: Instance to work on @param inodes: List of the current nodes of the instance @param othernodes: list/tuple of nodes where to temporarily recreate disks """ options = qa_config.get("options", {}) use_ialloc = options.get("use-iallocators", True) other_seq = ":".join([n.primary for n in othernodes]) orig_seq = ":".join([n.primary for n in inodes]) # These fail because the instance is running _AssertRecreateDisks(["-n", other_seq], instance, fail=True, destroy=False) if use_ialloc: _AssertRecreateDisks(["-I", "hail"], instance, fail=True, destroy=False) else: _AssertRecreateDisks(["-n", other_seq], instance, fail=True, destroy=False) AssertCommand(["gnt-instance", "stop", instance.name]) # Disks exist: this should fail _AssertRecreateDisks([], instance, fail=True, destroy=False) # Unsupported spindles parameters: fail if not qa_config.AreSpindlesSupported(): _AssertRecreateDisks(["--disk=0:spindles=2"], instance, fail=True, destroy=False) # Recreate disks in place _AssertRecreateDisks([], instance) # Move disks away if use_ialloc: _AssertRecreateDisks(["-I", "hail"], instance) # Move disks somewhere else _AssertRecreateDisks(["-I", constants.DEFAULT_IALLOCATOR_SHORTCUT], instance) else: _AssertRecreateDisks(["-n", other_seq], instance) # Move disks back _AssertRecreateDisks(["-n", orig_seq], instance) # Recreate resized disks # One of the two commands fails because either spindles are given when they # should not or vice versa alldisks = qa_config.GetDiskOptions() spindles_supported = qa_config.AreSpindlesSupported() disk_opts = _BuildRecreateDisksOpts(enumerate(alldisks), True, True, spindles_supported) _AssertRecreateDisks(disk_opts, instance, destroy=True, fail=not spindles_supported) disk_opts = _BuildRecreateDisksOpts(enumerate(alldisks), False, True, spindles_supported) _AssertRecreateDisks(disk_opts, instance, destroy=False, fail=spindles_supported) # Recreate the disks one by one (with the original size) for (idx, disk) in enumerate(alldisks): # Only the first call should destroy all the disk destroy = (idx == 0) # Again, one of the two commands is expected to fail disk_opts = _BuildRecreateDisksOpts([(idx, disk)], True, False, spindles_supported) _AssertRecreateDisks(disk_opts, instance, destroy=destroy, check=False, fail=not spindles_supported) disk_opts = _BuildRecreateDisksOpts([(idx, disk)], False, False, spindles_supported) _AssertRecreateDisks(disk_opts, instance, destroy=False, check=False, fail=spindles_supported) # This and InstanceCheck decoration check that the disks are working AssertCommand(["gnt-instance", "reinstall", "-f", instance.name]) AssertCommand(["gnt-instance", "start", instance.name]) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestInstanceExport(instance, node): """gnt-backup export -n ...""" name = instance.name # Export does not work for file-based templates, thus we skip the test if instance.disk_template in [constants.DT_FILE, constants.DT_SHARED_FILE]: return AssertCommand(["gnt-backup", "export", "-n", node.primary, name]) return qa_utils.ResolveInstanceName(name) @InstanceCheck(None, INST_DOWN, FIRST_ARG) def TestInstanceExportWithRemove(instance, node): """gnt-backup export --remove-instance""" AssertCommand(["gnt-backup", "export", "-n", node.primary, "--remove-instance", instance.name]) @InstanceCheck(INST_UP, INST_UP, FIRST_ARG) def TestInstanceExportNoTarget(instance): """gnt-backup export (without target node, should fail)""" AssertCommand(["gnt-backup", "export", instance.name], fail=True) @InstanceCheck(None, INST_DOWN, FIRST_ARG) def TestInstanceImport(newinst, node, expnode, name): """gnt-backup import""" templ = constants.DT_PLAIN if not qa_config.IsTemplateSupported(templ): return cmd = (["gnt-backup", "import", "--disk-template=%s" % templ, "--no-ip-check", "--src-node=%s" % expnode.primary, "--src-dir=%s/%s" % (pathutils.EXPORT_DIR, name), "--node=%s" % node.primary] + GetGenericAddParameters(newinst, templ, force_mac=constants.VALUE_GENERATE)) cmd.append(newinst.name) AssertCommand(cmd) newinst.SetDiskTemplate(templ) def TestBackupList(expnode): """gnt-backup list""" AssertCommand(["gnt-backup", "list", "--node=%s" % expnode.primary]) qa_utils.GenericQueryTest("gnt-backup", query.EXPORT_FIELDS.keys(), namefield=None, test_unknown=False) def TestBackupListFields(): """gnt-backup list-fields""" qa_utils.GenericQueryFieldsTest("gnt-backup", query.EXPORT_FIELDS.keys()) def TestRemoveInstanceOfflineNode(instance, snode, set_offline, set_online): """gnt-instance remove with an off-line node @param instance: instance @param snode: secondary node, to be set offline @param set_offline: function to call to set the node off-line @param set_online: function to call to set the node on-line """ info = GetInstanceInfo(instance.name) set_offline(snode) try: TestInstanceRemove(instance) finally: set_online(snode) # Clean up the disks on the offline node, if necessary if instance.disk_template not in constants.DTS_EXT_MIRROR: # FIXME: abstract the cleanup inside the disks if info["storage-type"] == constants.ST_LVM_VG: for minor in info["drbd-minors"][snode.primary]: # DRBD 8.3 syntax comes first, then DRBD 8.4 syntax. The 8.4 syntax # relies on the fact that we always create a resources for each minor, # and that this resources is always named resource{minor}. # As 'drbdsetup 0 down' does return success (even though that's invalid # syntax), we always have to perform both commands and ignore the # output. drbd_shutdown_cmd = \ "(drbdsetup %d down >/dev/null 2>&1;" \ " drbdsetup down resource%d >/dev/null 2>&1) || /bin/true" % \ (minor, minor) AssertCommand(drbd_shutdown_cmd, node=snode) AssertCommand(["lvremove", "-f"] + info["volumes"], node=snode) elif info["storage-type"] == constants.ST_FILE: filestorage = qa_config.get("file-storage-dir", pathutils.DEFAULT_FILE_STORAGE_DIR) disk = os.path.join(filestorage, instance.name) AssertCommand(["rm", "-rf", disk], node=snode) def TestInstanceCreationRestrictedByDiskTemplates(): """Test adding instances for disabled disk templates.""" if qa_config.TestEnabled("cluster-exclusive-storage"): # These tests are valid only for non-exclusive storage return enabled_disk_templates = qa_config.GetEnabledDiskTemplates() nodes = qa_config.AcquireManyNodes(2) # Setup the cluster with the enabled_disk_templates AssertCommand( ["gnt-cluster", "modify", "--enabled-disk-templates=%s" % ",".join(enabled_disk_templates), "--ipolicy-disk-templates=%s" % ",".join(enabled_disk_templates)], fail=False) # Test instance creation for enabled disk templates for disk_template in enabled_disk_templates: instance = CreateInstanceByDiskTemplate(nodes, disk_template, fail=False) TestInstanceRemove(instance) instance.Release() # Test that instance creation fails for disabled disk templates disabled_disk_templates = list(constants.DISK_TEMPLATES - set(enabled_disk_templates)) for disk_template in disabled_disk_templates: instance = CreateInstanceByDiskTemplate(nodes, disk_template, fail=True) # Test instance creation for after disabling enabled disk templates if (len(enabled_disk_templates) > 1): # Partition the disk templates, enable them separately and check if the # disabled ones cannot be used by instances. middle = len(enabled_disk_templates) / 2 templates1 = enabled_disk_templates[:middle] templates2 = enabled_disk_templates[middle:] for (enabled, disabled) in [(templates1, templates2), (templates2, templates1)]: AssertCommand(["gnt-cluster", "modify", "--enabled-disk-templates=%s" % ",".join(enabled), "--ipolicy-disk-templates=%s" % ",".join(enabled)], fail=False) for disk_template in disabled: CreateInstanceByDiskTemplate(nodes, disk_template, fail=True) elif (len(enabled_disk_templates) == 1): # If only one disk template is enabled in the QA config, we have to enable # some other templates in order to test if the disabling the only enabled # disk template prohibits creating instances of that template. other_disk_templates = list( set([constants.DT_DISKLESS, constants.DT_BLOCK]) - set(enabled_disk_templates)) AssertCommand(["gnt-cluster", "modify", "--enabled-disk-templates=%s" % ",".join(other_disk_templates), "--ipolicy-disk-templates=%s" % ",".join(other_disk_templates)], fail=False) CreateInstanceByDiskTemplate(nodes, enabled_disk_templates[0], fail=True) else: raise qa_error.Error("Please enable at least one disk template" " in your QA setup.") # Restore initially enabled disk templates AssertCommand(["gnt-cluster", "modify", "--enabled-disk-templates=%s" % ",".join(enabled_disk_templates), "--ipolicy-disk-templates=%s" % ",".join(enabled_disk_templates)], fail=False) ganeti-2.9.3/qa/qa_group.py0000644000000000000000000002400712271422343015565 0ustar00rootroot00000000000000# # # Copyright (C) 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """QA tests for node groups. """ from ganeti import constants from ganeti import query from ganeti import utils import qa_config import qa_utils from qa_utils import AssertCommand, AssertEqual, GetCommandOutput def GetDefaultGroup(): """Returns the default node group. """ groups = qa_config.get("groups", {}) return groups.get("group-with-nodes", constants.INITIAL_NODE_GROUP_NAME) def TestGroupAddRemoveRename(): """gnt-group add/remove/rename""" existing_group_with_nodes = GetDefaultGroup() (group1, group2, group3) = qa_utils.GetNonexistentGroups(3) AssertCommand(["gnt-group", "add", group1]) AssertCommand(["gnt-group", "add", group2]) AssertCommand(["gnt-group", "add", group2], fail=True) AssertCommand(["gnt-group", "add", existing_group_with_nodes], fail=True) AssertCommand(["gnt-group", "rename", group1, group2], fail=True) AssertCommand(["gnt-group", "rename", group1, group3]) try: AssertCommand(["gnt-group", "rename", existing_group_with_nodes, group1]) AssertCommand(["gnt-group", "remove", group2]) AssertCommand(["gnt-group", "remove", group3]) AssertCommand(["gnt-group", "remove", group1], fail=True) finally: # Try to ensure idempotency re groups that already existed. AssertCommand(["gnt-group", "rename", group1, existing_group_with_nodes]) def TestGroupAddWithOptions(): """gnt-group add with options""" (group1, ) = qa_utils.GetNonexistentGroups(1) AssertCommand(["gnt-group", "add", "--alloc-policy", "notvalid", group1], fail=True) AssertCommand(["gnt-group", "add", "--alloc-policy", "last_resort", "--node-parameters", "oob_program=/bin/true", group1]) AssertCommand(["gnt-group", "remove", group1]) def _GetGroupIPolicy(groupname): """Return the run-time values of the cluster-level instance policy. @type groupname: string @param groupname: node group name @rtype: tuple @return: (policy, specs), where: - policy is a dictionary of the policy values, instance specs excluded - specs is a dictionary containing only the specs, using the internal format (see L{constants.IPOLICY_DEFAULTS} for an example), but without the standard values """ info = qa_utils.GetObjectInfo(["gnt-group", "info", groupname]) assert len(info) == 1 policy = info[0]["Instance policy"] (ret_policy, ret_specs) = qa_utils.ParseIPolicy(policy) # Sanity checks assert "minmax" in ret_specs assert len(ret_specs["minmax"]) > 0 assert len(ret_policy) > 0 return (ret_policy, ret_specs) def _TestGroupSetISpecs(groupname, new_specs=None, diff_specs=None, fail=False, old_values=None): """Change instance specs on a group. At most one of new_specs or diff_specs can be specified. @type groupname: string @param groupname: group name @type new_specs: dict @param new_specs: new complete specs, in the same format returned by L{_GetGroupIPolicy} @type diff_specs: dict @param diff_specs: partial specs, it can be an incomplete specifications, but if min/max specs are specified, their number must match the number of the existing specs @type fail: bool @param fail: if the change is expected to fail @type old_values: tuple @param old_values: (old_policy, old_specs), as returned by L{_GetGroupIPolicy} @return: same as L{_GetGroupIPolicy} """ build_cmd = lambda opts: ["gnt-group", "modify"] + opts + [groupname] get_policy = lambda: _GetGroupIPolicy(groupname) return qa_utils.TestSetISpecs( new_specs=new_specs, diff_specs=diff_specs, get_policy_fn=get_policy, build_cmd_fn=build_cmd, fail=fail, old_values=old_values) def _TestGroupModifyISpecs(groupname): # This test is built on the assumption that the default ipolicy holds for # the node group under test old_values = _GetGroupIPolicy(groupname) samevals = dict((p, 4) for p in constants.ISPECS_PARAMETERS) base_specs = { constants.ISPECS_MINMAX: [{ constants.ISPECS_MIN: samevals, constants.ISPECS_MAX: samevals, }], } mod_values = _TestGroupSetISpecs(groupname, new_specs=base_specs, old_values=old_values) for par in constants.ISPECS_PARAMETERS: # First make sure that the test works with good values good_specs = { constants.ISPECS_MINMAX: [{ constants.ISPECS_MIN: {par: 8}, constants.ISPECS_MAX: {par: 8}, }], } mod_values = _TestGroupSetISpecs(groupname, diff_specs=good_specs, old_values=mod_values) bad_specs = { constants.ISPECS_MINMAX: [{ constants.ISPECS_MIN: {par: 8}, constants.ISPECS_MAX: {par: 4}, }], } _TestGroupSetISpecs(groupname, diff_specs=bad_specs, fail=True, old_values=mod_values) AssertCommand(["gnt-group", "modify", "--ipolicy-bounds-specs", "default", groupname]) AssertEqual(_GetGroupIPolicy(groupname), old_values) # Get the ipolicy command (from the cluster config) mnode = qa_config.GetMasterNode() addcmd = GetCommandOutput(mnode.primary, utils.ShellQuoteArgs([ "gnt-group", "show-ispecs-cmd", "--include-defaults", groupname, ])) modcmd = ["gnt-group", "modify"] opts = addcmd.split() assert opts[0:2] == ["gnt-group", "add"] for k in range(2, len(opts) - 1): if opts[k].startswith("--ipolicy-"): assert k + 2 <= len(opts) modcmd.extend(opts[k:k + 2]) modcmd.append(groupname) # Apply the ipolicy to the group and verify the result AssertCommand(modcmd) new_addcmd = GetCommandOutput(mnode.primary, utils.ShellQuoteArgs([ "gnt-group", "show-ispecs-cmd", groupname, ])) AssertEqual(addcmd, new_addcmd) def _TestGroupModifyIPolicy(groupname): _TestGroupModifyISpecs(groupname) # We assume that the default ipolicy holds (old_policy, old_specs) = _GetGroupIPolicy(groupname) for (par, setval, iname, expval) in [ ("vcpu-ratio", 1.5, None, 1.5), ("spindle-ratio", 1.5, None, 1.5), ("disk-templates", constants.DT_PLAIN, "allowed disk templates", constants.DT_PLAIN) ]: if not iname: iname = par build_cmdline = lambda val: ["gnt-group", "modify", "--ipolicy-" + par, str(val), groupname] AssertCommand(build_cmdline(setval)) (new_policy, new_specs) = _GetGroupIPolicy(groupname) AssertEqual(new_specs, old_specs) for (p, val) in new_policy.items(): if p == iname: AssertEqual(val, expval) else: AssertEqual(val, old_policy[p]) AssertCommand(build_cmdline("default")) (new_policy, new_specs) = _GetGroupIPolicy(groupname) AssertEqual(new_specs, old_specs) AssertEqual(new_policy, old_policy) def TestGroupModify(): """gnt-group modify""" # This tests assumes LVM to be enabled, thus it should skip if # this is not the case if not qa_config.IsStorageTypeSupported(constants.ST_LVM_VG): return (group1, ) = qa_utils.GetNonexistentGroups(1) AssertCommand(["gnt-group", "add", group1]) try: _TestGroupModifyIPolicy(group1) AssertCommand(["gnt-group", "modify", "--alloc-policy", "unallocable", "--node-parameters", "oob_program=/bin/false", group1]) AssertCommand(["gnt-group", "modify", "--alloc-policy", "notvalid", group1], fail=True) AssertCommand(["gnt-group", "modify", "--node-parameters", "spindle_count=10", group1]) if qa_config.TestEnabled("htools"): AssertCommand(["hbal", "-L", "-G", group1]) AssertCommand(["gnt-group", "modify", "--node-parameters", "spindle_count=default", group1]) finally: AssertCommand(["gnt-group", "remove", group1]) def TestGroupList(): """gnt-group list""" qa_utils.GenericQueryTest("gnt-group", query.GROUP_FIELDS.keys()) def TestGroupListFields(): """gnt-group list-fields""" qa_utils.GenericQueryFieldsTest("gnt-group", query.GROUP_FIELDS.keys()) def TestAssignNodesIncludingSplit(orig_group, node1, node2): """gnt-group assign-nodes --force Expects node1 and node2 to be primary and secondary for a common instance. """ assert node1 != node2 (other_group, ) = qa_utils.GetNonexistentGroups(1) master_node = qa_config.GetMasterNode().primary def AssertInGroup(group, nodes): real_output = GetCommandOutput(master_node, "gnt-node list --no-headers -o group " + utils.ShellQuoteArgs(nodes)) AssertEqual(real_output.splitlines(), [group] * len(nodes)) AssertInGroup(orig_group, [node1, node2]) AssertCommand(["gnt-group", "add", other_group]) try: AssertCommand(["gnt-group", "assign-nodes", other_group, node1, node2]) AssertInGroup(other_group, [node1, node2]) # This should fail because moving node1 to orig_group would leave their # common instance split between orig_group and other_group. AssertCommand(["gnt-group", "assign-nodes", orig_group, node1], fail=True) AssertInGroup(other_group, [node1, node2]) AssertCommand(["gnt-group", "assign-nodes", "--force", orig_group, node1]) AssertInGroup(orig_group, [node1]) AssertInGroup(other_group, [node2]) AssertCommand(["gnt-group", "assign-nodes", orig_group, node2]) AssertInGroup(orig_group, [node1, node2]) finally: AssertCommand(["gnt-group", "remove", other_group]) ganeti-2.9.3/qa/qa_tags.py0000644000000000000000000000420112271422343015361 0ustar00rootroot00000000000000# # # Copyright (C) 2007 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Tags related QA tests. """ from ganeti import constants import qa_rapi from qa_utils import AssertCommand _TEMP_TAG_NAMES = ["TEMP-Ganeti-QA-Tag%d" % i for i in range(3)] _TEMP_TAG_RE = r'^TEMP-Ganeti-QA-Tag\d+$' _KIND_TO_COMMAND = { constants.TAG_CLUSTER: "gnt-cluster", constants.TAG_NODE: "gnt-node", constants.TAG_INSTANCE: "gnt-instance", constants.TAG_NODEGROUP: "gnt-group", constants.TAG_NETWORK: "gnt-network", } def _TestTags(kind, name): """Generic function for add-tags. """ def cmdfn(subcmd): cmd = [_KIND_TO_COMMAND[kind], subcmd] if kind != constants.TAG_CLUSTER: cmd.append(name) return cmd for cmd in [ cmdfn("add-tags") + _TEMP_TAG_NAMES, cmdfn("list-tags"), ["gnt-cluster", "search-tags", _TEMP_TAG_RE], cmdfn("remove-tags") + _TEMP_TAG_NAMES, ]: AssertCommand(cmd) if qa_rapi.Enabled(): qa_rapi.TestTags(kind, name, _TEMP_TAG_NAMES) def TestClusterTags(): """gnt-cluster tags""" _TestTags(constants.TAG_CLUSTER, "") def TestNodeTags(node): """gnt-node tags""" _TestTags(constants.TAG_NODE, node.primary) def TestGroupTags(group): """gnt-group tags""" _TestTags(constants.TAG_NODEGROUP, group) def TestInstanceTags(instance): """gnt-instance tags""" _TestTags(constants.TAG_INSTANCE, instance.name) def TestNetworkTags(network): """gnt-network tags""" _TestTags(constants.TAG_NETWORK, network) ganeti-2.9.3/qa/qa_node.py0000644000000000000000000003754312271422343015367 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Node-related QA tests. """ from ganeti import utils from ganeti import constants from ganeti import query from ganeti import serializer import qa_config import qa_error import qa_utils from qa_utils import AssertCommand, AssertEqual def _NodeAdd(node, readd=False): if not readd and node.added: raise qa_error.Error("Node %s already in cluster" % node.primary) elif readd and not node.added: raise qa_error.Error("Node %s not yet in cluster" % node.primary) cmd = ["gnt-node", "add", "--no-ssh-key-check"] if node.secondary: cmd.append("--secondary-ip=%s" % node.secondary) if readd: cmd.append("--readd") cmd.append(node.primary) AssertCommand(cmd) if readd: assert node.added else: node.MarkAdded() def _NodeRemove(node): AssertCommand(["gnt-node", "remove", node.primary]) node.MarkRemoved() def MakeNodeOffline(node, value): """gnt-node modify --offline=value""" # value in ["yes", "no"] AssertCommand(["gnt-node", "modify", "--offline", value, node.primary]) def TestNodeAddAll(): """Adding all nodes to cluster.""" master = qa_config.GetMasterNode() for node in qa_config.get("nodes"): if node != master: _NodeAdd(node, readd=False) def MarkNodeAddedAll(): """Mark all nodes as added. This is useful if we don't create the cluster ourselves (in qa). """ master = qa_config.GetMasterNode() for node in qa_config.get("nodes"): if node != master: node.MarkAdded() def TestNodeRemoveAll(): """Removing all nodes from cluster.""" master = qa_config.GetMasterNode() for node in qa_config.get("nodes"): if node != master: _NodeRemove(node) def TestNodeReadd(node): """gnt-node add --readd""" _NodeAdd(node, readd=True) def TestNodeInfo(): """gnt-node info""" AssertCommand(["gnt-node", "info"]) def TestNodeVolumes(): """gnt-node volumes""" AssertCommand(["gnt-node", "volumes"]) def TestNodeStorage(): """gnt-node storage""" master = qa_config.GetMasterNode() # FIXME: test all storage_types in constants.STORAGE_TYPES # as soon as they are implemented. enabled_storage_types = qa_config.GetEnabledStorageTypes() testable_storage_types = list(set(enabled_storage_types).intersection( set([constants.ST_FILE, constants.ST_LVM_VG, constants.ST_LVM_PV]))) for storage_type in testable_storage_types: cmd = ["gnt-node", "list-storage", "--storage-type", storage_type] # Test simple list AssertCommand(cmd) # Test all storage fields cmd = ["gnt-node", "list-storage", "--storage-type", storage_type, "--output=%s" % ",".join(list(constants.VALID_STORAGE_FIELDS) + [constants.SF_NODE, constants.SF_TYPE])] AssertCommand(cmd) # Get list of valid storage devices cmd = ["gnt-node", "list-storage", "--storage-type", storage_type, "--output=node,name,allocatable", "--separator=|", "--no-headers"] output = qa_utils.GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)) # Test with up to two devices testdevcount = 2 for line in output.splitlines()[:testdevcount]: (node_name, st_name, st_allocatable) = line.split("|") # Dummy modification without any changes cmd = ["gnt-node", "modify-storage", node_name, storage_type, st_name] AssertCommand(cmd) # Make sure we end up with the same value as before if st_allocatable.lower() == "y": test_allocatable = ["no", "yes"] else: test_allocatable = ["yes", "no"] fail = (constants.SF_ALLOCATABLE not in constants.MODIFIABLE_STORAGE_FIELDS.get(storage_type, [])) for i in test_allocatable: AssertCommand(["gnt-node", "modify-storage", "--allocatable", i, node_name, storage_type, st_name], fail=fail) # Verify list output cmd = ["gnt-node", "list-storage", "--storage-type", storage_type, "--output=name,allocatable", "--separator=|", "--no-headers", node_name] listout = qa_utils.GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)) for line in listout.splitlines(): (vfy_name, vfy_allocatable) = line.split("|") if vfy_name == st_name and not fail: AssertEqual(vfy_allocatable, i[0].upper()) else: AssertEqual(vfy_allocatable, st_allocatable) # Test repair functionality fail = (constants.SO_FIX_CONSISTENCY not in constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])) AssertCommand(["gnt-node", "repair-storage", node_name, storage_type, st_name], fail=fail) def TestNodeFailover(node, node2): """gnt-node failover""" if qa_utils.GetNodeInstances(node2, secondaries=False): raise qa_error.UnusableNodeError("Secondary node has at least one" " primary instance. This test requires" " it to have no primary instances.") # Fail over to secondary node AssertCommand(["gnt-node", "failover", "-f", node.primary]) # ... and back again. AssertCommand(["gnt-node", "failover", "-f", node2.primary]) def TestNodeMigrate(node, node2): """gnt-node migrate""" if qa_utils.GetNodeInstances(node2, secondaries=False): raise qa_error.UnusableNodeError("Secondary node has at least one" " primary instance. This test requires" " it to have no primary instances.") # Migrate to secondary node AssertCommand(["gnt-node", "migrate", "-f", node.primary]) # ... and back again. AssertCommand(["gnt-node", "migrate", "-f", node2.primary]) def TestNodeEvacuate(node, node2): """gnt-node evacuate""" node3 = qa_config.AcquireNode(exclude=[node, node2]) try: if qa_utils.GetNodeInstances(node3, secondaries=True): raise qa_error.UnusableNodeError("Evacuation node has at least one" " secondary instance. This test requires" " it to have no secondary instances.") # Evacuate all secondary instances AssertCommand(["gnt-node", "evacuate", "-f", "--new-secondary=%s" % node3.primary, node2.primary]) # ... and back again. AssertCommand(["gnt-node", "evacuate", "-f", "--new-secondary=%s" % node2.primary, node3.primary]) finally: node3.Release() def TestNodeModify(node): """gnt-node modify""" # make sure enough master candidates will be available by disabling the # master candidate role first with --auto-promote AssertCommand(["gnt-node", "modify", "--master-candidate=no", "--auto-promote", node.primary]) # now it's save to force-remove the master candidate role for flag in ["master-candidate", "drained", "offline"]: for value in ["yes", "no"]: AssertCommand(["gnt-node", "modify", "--force", "--%s=%s" % (flag, value), node.primary]) AssertCommand(["gnt-node", "modify", "--master-candidate=yes", node.primary]) # Test setting secondary IP address AssertCommand(["gnt-node", "modify", "--secondary-ip=%s" % node.secondary, node.primary]) def _CreateOobScriptStructure(): """Create a simple OOB handling script and its structure.""" master = qa_config.GetMasterNode() data_path = qa_utils.UploadData(master.primary, "") verify_path = qa_utils.UploadData(master.primary, "") exit_code_path = qa_utils.UploadData(master.primary, "") oob_script = (("#!/bin/bash\n" "echo \"$@\" > %s\n" "cat %s\n" "exit $(< %s)\n") % (utils.ShellQuote(verify_path), utils.ShellQuote(data_path), utils.ShellQuote(exit_code_path))) oob_path = qa_utils.UploadData(master.primary, oob_script, mode=0700) return [oob_path, verify_path, data_path, exit_code_path] def _UpdateOobFile(path, data): """Updates the data file with data.""" master = qa_config.GetMasterNode() qa_utils.UploadData(master.primary, data, filename=path) def _AssertOobCall(verify_path, expected_args): """Assert the OOB call was performed with expetected args.""" master = qa_config.GetMasterNode() verify_output_cmd = utils.ShellQuoteArgs(["cat", verify_path]) output = qa_utils.GetCommandOutput(master.primary, verify_output_cmd, tty=False) AssertEqual(expected_args, output.strip()) def TestOutOfBand(): """gnt-node power""" master = qa_config.GetMasterNode() node = qa_config.AcquireNode(exclude=master) master_name = master.primary node_name = node.primary full_node_name = qa_utils.ResolveNodeName(node) (oob_path, verify_path, data_path, exit_code_path) = _CreateOobScriptStructure() try: AssertCommand(["gnt-cluster", "modify", "--node-parameters", "oob_program=%s" % oob_path]) # No data, exit 0 _UpdateOobFile(exit_code_path, "0") AssertCommand(["gnt-node", "power", "on", node_name]) _AssertOobCall(verify_path, "power-on %s" % full_node_name) AssertCommand(["gnt-node", "power", "-f", "off", node_name]) _AssertOobCall(verify_path, "power-off %s" % full_node_name) # Power off on master without options should fail AssertCommand(["gnt-node", "power", "-f", "off", master_name], fail=True) # With force master it should still fail AssertCommand(["gnt-node", "power", "-f", "--ignore-status", "off", master_name], fail=True) # Verify we can't transform back to online when not yet powered on AssertCommand(["gnt-node", "modify", "-O", "no", node_name], fail=True) # Now reset state AssertCommand(["gnt-node", "modify", "-O", "no", "--node-powered", "yes", node_name]) AssertCommand(["gnt-node", "power", "-f", "cycle", node_name]) _AssertOobCall(verify_path, "power-cycle %s" % full_node_name) # Those commands should fail as they expect output which isn't provided yet # But they should have called the oob helper nevermind AssertCommand(["gnt-node", "power", "status", node_name], fail=True) _AssertOobCall(verify_path, "power-status %s" % full_node_name) AssertCommand(["gnt-node", "health", node_name], fail=True) _AssertOobCall(verify_path, "health %s" % full_node_name) AssertCommand(["gnt-node", "health"], fail=True) # Correct Data, exit 0 _UpdateOobFile(data_path, serializer.DumpJson({"powered": True})) AssertCommand(["gnt-node", "power", "status", node_name]) _AssertOobCall(verify_path, "power-status %s" % full_node_name) _UpdateOobFile(data_path, serializer.DumpJson([["temp", "OK"], ["disk0", "CRITICAL"]])) AssertCommand(["gnt-node", "health", node_name]) _AssertOobCall(verify_path, "health %s" % full_node_name) AssertCommand(["gnt-node", "health"]) # Those commands should fail as they expect no data regardless of exit 0 AssertCommand(["gnt-node", "power", "on", node_name], fail=True) _AssertOobCall(verify_path, "power-on %s" % full_node_name) try: AssertCommand(["gnt-node", "power", "-f", "off", node_name], fail=True) _AssertOobCall(verify_path, "power-off %s" % full_node_name) finally: AssertCommand(["gnt-node", "modify", "-O", "no", node_name]) AssertCommand(["gnt-node", "power", "-f", "cycle", node_name], fail=True) _AssertOobCall(verify_path, "power-cycle %s" % full_node_name) # Data, exit 1 (all should fail) _UpdateOobFile(exit_code_path, "1") AssertCommand(["gnt-node", "power", "on", node_name], fail=True) _AssertOobCall(verify_path, "power-on %s" % full_node_name) try: AssertCommand(["gnt-node", "power", "-f", "off", node_name], fail=True) _AssertOobCall(verify_path, "power-off %s" % full_node_name) finally: AssertCommand(["gnt-node", "modify", "-O", "no", node_name]) AssertCommand(["gnt-node", "power", "-f", "cycle", node_name], fail=True) _AssertOobCall(verify_path, "power-cycle %s" % full_node_name) AssertCommand(["gnt-node", "power", "status", node_name], fail=True) _AssertOobCall(verify_path, "power-status %s" % full_node_name) AssertCommand(["gnt-node", "health", node_name], fail=True) _AssertOobCall(verify_path, "health %s" % full_node_name) AssertCommand(["gnt-node", "health"], fail=True) # No data, exit 1 (all should fail) _UpdateOobFile(data_path, "") AssertCommand(["gnt-node", "power", "on", node_name], fail=True) _AssertOobCall(verify_path, "power-on %s" % full_node_name) try: AssertCommand(["gnt-node", "power", "-f", "off", node_name], fail=True) _AssertOobCall(verify_path, "power-off %s" % full_node_name) finally: AssertCommand(["gnt-node", "modify", "-O", "no", node_name]) AssertCommand(["gnt-node", "power", "-f", "cycle", node_name], fail=True) _AssertOobCall(verify_path, "power-cycle %s" % full_node_name) AssertCommand(["gnt-node", "power", "status", node_name], fail=True) _AssertOobCall(verify_path, "power-status %s" % full_node_name) AssertCommand(["gnt-node", "health", node_name], fail=True) _AssertOobCall(verify_path, "health %s" % full_node_name) AssertCommand(["gnt-node", "health"], fail=True) # Different OOB script for node verify_path2 = qa_utils.UploadData(master.primary, "") oob_script = ("#!/bin/sh\n" "echo \"$@\" > %s\n") % verify_path2 oob_path2 = qa_utils.UploadData(master.primary, oob_script, mode=0700) try: AssertCommand(["gnt-node", "modify", "--node-parameters", "oob_program=%s" % oob_path2, node_name]) AssertCommand(["gnt-node", "power", "on", node_name]) _AssertOobCall(verify_path2, "power-on %s" % full_node_name) finally: AssertCommand(["gnt-node", "modify", "--node-parameters", "oob_program=default", node_name]) AssertCommand(["rm", "-f", oob_path2, verify_path2]) finally: AssertCommand(["gnt-cluster", "modify", "--node-parameters", "oob_program="]) AssertCommand(["rm", "-f", oob_path, verify_path, data_path, exit_code_path]) def TestNodeList(): """gnt-node list""" qa_utils.GenericQueryTest("gnt-node", query.NODE_FIELDS.keys()) def TestNodeListFields(): """gnt-node list-fields""" qa_utils.GenericQueryFieldsTest("gnt-node", query.NODE_FIELDS.keys()) def TestNodeListDrbd(node): """gnt-node list-drbd""" AssertCommand(["gnt-node", "list-drbd", node.primary]) def _BuildSetESCmd(action, value, node_name): cmd = ["gnt-node"] if action == "add": cmd.extend(["add", "--readd"]) else: cmd.append("modify") cmd.extend(["--node-parameters", "exclusive_storage=%s" % value, node_name]) return cmd def TestExclStorSingleNode(node): """gnt-node add/modify cannot change the exclusive_storage flag. """ for action in ["add", "modify"]: for value in (True, False, "default"): AssertCommand(_BuildSetESCmd(action, value, node.primary), fail=True) ganeti-2.9.3/qa/qa_utils.py0000644000000000000000000006207112271422343015574 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utilities for QA tests. """ import copy import operator import os import random import re import subprocess import sys import tempfile import yaml try: import functools except ImportError, err: raise ImportError("Python 2.5 or higher is required: %s" % err) from ganeti import utils from ganeti import compat from ganeti import constants from ganeti import ht from ganeti import pathutils from ganeti import vcluster import qa_config import qa_error _INFO_SEQ = None _WARNING_SEQ = None _ERROR_SEQ = None _RESET_SEQ = None _MULTIPLEXERS = {} #: Unique ID per QA run _RUN_UUID = utils.NewUUID() #: Path to the QA query output log file _QA_OUTPUT = pathutils.GetLogFilename("qa-output") (INST_DOWN, INST_UP) = range(500, 502) (FIRST_ARG, RETURN_VALUE) = range(1000, 1002) def _SetupColours(): """Initializes the colour constants. """ # pylint: disable=W0603 # due to global usage global _INFO_SEQ, _WARNING_SEQ, _ERROR_SEQ, _RESET_SEQ # Don't use colours if stdout isn't a terminal if not sys.stdout.isatty(): return try: import curses except ImportError: # Don't use colours if curses module can't be imported return curses.setupterm() _RESET_SEQ = curses.tigetstr("op") setaf = curses.tigetstr("setaf") _INFO_SEQ = curses.tparm(setaf, curses.COLOR_GREEN) _WARNING_SEQ = curses.tparm(setaf, curses.COLOR_YELLOW) _ERROR_SEQ = curses.tparm(setaf, curses.COLOR_RED) _SetupColours() def AssertIn(item, sequence): """Raises an error when item is not in sequence. """ if item not in sequence: raise qa_error.Error("%r not in %r" % (item, sequence)) def AssertNotIn(item, sequence): """Raises an error when item is in sequence. """ if item in sequence: raise qa_error.Error("%r in %r" % (item, sequence)) def AssertEqual(first, second): """Raises an error when values aren't equal. """ if not first == second: raise qa_error.Error("%r == %r" % (first, second)) def AssertMatch(string, pattern): """Raises an error when string doesn't match regexp pattern. """ if not re.match(pattern, string): raise qa_error.Error("%r doesn't match /%r/" % (string, pattern)) def _GetName(entity, fn): """Tries to get name of an entity. @type entity: string or dict @param fn: Function retrieving name from entity """ if isinstance(entity, basestring): result = entity else: result = fn(entity) if not ht.TNonEmptyString(result): raise Exception("Invalid name '%s'" % result) return result def _AssertRetCode(rcode, fail, cmdstr, nodename): """Check the return value from a command and possibly raise an exception. """ if fail and rcode == 0: raise qa_error.Error("Command '%s' on node %s was expected to fail but" " didn't" % (cmdstr, nodename)) elif not fail and rcode != 0: raise qa_error.Error("Command '%s' on node %s failed, exit code %s" % (cmdstr, nodename, rcode)) def AssertCommand(cmd, fail=False, node=None, log_cmd=True): """Checks that a remote command succeeds. @param cmd: either a string (the command to execute) or a list (to be converted using L{utils.ShellQuoteArgs} into a string) @type fail: boolean @param fail: if the command is expected to fail instead of succeeding @param node: if passed, it should be the node on which the command should be executed, instead of the master node (can be either a dict or a string) @param log_cmd: if False, the command won't be logged (simply passed to StartSSH) @return: the return code of the command @raise qa_error.Error: if the command fails when it shouldn't or vice versa """ if node is None: node = qa_config.GetMasterNode() nodename = _GetName(node, operator.attrgetter("primary")) if isinstance(cmd, basestring): cmdstr = cmd else: cmdstr = utils.ShellQuoteArgs(cmd) rcode = StartSSH(nodename, cmdstr, log_cmd=log_cmd).wait() _AssertRetCode(rcode, fail, cmdstr, nodename) return rcode def AssertRedirectedCommand(cmd, fail=False, node=None, log_cmd=True): """Executes a command with redirected output. The log will go to the qa-output log file in the ganeti log directory on the node where the command is executed. The fail and node parameters are passed unchanged to AssertCommand. @param cmd: the command to be executed, as a list; a string is not supported """ if not isinstance(cmd, list): raise qa_error.Error("Non-list passed to AssertRedirectedCommand") ofile = utils.ShellQuote(_QA_OUTPUT) cmdstr = utils.ShellQuoteArgs(cmd) AssertCommand("echo ---- $(date) %s ---- >> %s" % (cmdstr, ofile), fail=False, node=node, log_cmd=False) return AssertCommand(cmdstr + " >> %s" % ofile, fail=fail, node=node, log_cmd=log_cmd) def GetSSHCommand(node, cmd, strict=True, opts=None, tty=None): """Builds SSH command to be executed. @type node: string @param node: node the command should run on @type cmd: string @param cmd: command to be executed in the node; if None or empty string, no command will be executed @type strict: boolean @param strict: whether to enable strict host key checking @type opts: list @param opts: list of additional options @type tty: boolean or None @param tty: if we should use tty; if None, will be auto-detected """ args = ["ssh", "-oEscapeChar=none", "-oBatchMode=yes", "-lroot"] if tty is None: tty = sys.stdout.isatty() if tty: args.append("-t") if strict: tmp = "yes" else: tmp = "no" args.append("-oStrictHostKeyChecking=%s" % tmp) args.append("-oClearAllForwardings=yes") args.append("-oForwardAgent=yes") if opts: args.extend(opts) if node in _MULTIPLEXERS: spath = _MULTIPLEXERS[node][0] args.append("-oControlPath=%s" % spath) args.append("-oControlMaster=no") (vcluster_master, vcluster_basedir) = \ qa_config.GetVclusterSettings() if vcluster_master: args.append(vcluster_master) args.append("%s/%s/cmd" % (vcluster_basedir, node)) if cmd: # For virtual clusters the whole command must be wrapped using the "cmd" # script, as that script sets a number of environment variables. If the # command contains shell meta characters the whole command needs to be # quoted. args.append(utils.ShellQuote(cmd)) else: args.append(node) if cmd: args.append(cmd) return args def StartLocalCommand(cmd, _nolog_opts=False, log_cmd=True, **kwargs): """Starts a local command. """ if log_cmd: if _nolog_opts: pcmd = [i for i in cmd if not i.startswith("-")] else: pcmd = cmd print "Command: %s" % utils.ShellQuoteArgs(pcmd) return subprocess.Popen(cmd, shell=False, **kwargs) def StartSSH(node, cmd, strict=True, log_cmd=True): """Starts SSH. """ return StartLocalCommand(GetSSHCommand(node, cmd, strict=strict), _nolog_opts=True, log_cmd=log_cmd) def StartMultiplexer(node): """Starts a multiplexer command. @param node: the node for which to open the multiplexer """ if node in _MULTIPLEXERS: return # Note: yes, we only need mktemp, since we'll remove the file anyway sname = tempfile.mktemp(prefix="ganeti-qa-multiplexer.") utils.RemoveFile(sname) opts = ["-N", "-oControlPath=%s" % sname, "-oControlMaster=yes"] print "Created socket at %s" % sname child = StartLocalCommand(GetSSHCommand(node, None, opts=opts)) _MULTIPLEXERS[node] = (sname, child) def CloseMultiplexers(): """Closes all current multiplexers and cleans up. """ for node in _MULTIPLEXERS.keys(): (sname, child) = _MULTIPLEXERS.pop(node) utils.KillProcess(child.pid, timeout=10, waitpid=True) utils.RemoveFile(sname) def GetCommandOutput(node, cmd, tty=None, fail=False): """Returns the output of a command executed on the given node. @type node: string @param node: node the command should run on @type cmd: string @param cmd: command to be executed in the node (cannot be empty or None) @type tty: bool or None @param tty: if we should use tty; if None, it will be auto-detected @type fail: bool @param fail: whether the command is expected to fail """ assert cmd p = StartLocalCommand(GetSSHCommand(node, cmd, tty=tty), stdout=subprocess.PIPE) rcode = p.wait() _AssertRetCode(rcode, fail, cmd, node) return p.stdout.read() def GetObjectInfo(infocmd): """Get and parse information about a Ganeti object. @type infocmd: list of strings @param infocmd: command to be executed, e.g. ["gnt-cluster", "info"] @return: the information parsed, appropriately stored in dictionaries, lists... """ master = qa_config.GetMasterNode() cmdline = utils.ShellQuoteArgs(infocmd) info_out = GetCommandOutput(master.primary, cmdline) return yaml.load(info_out) def UploadFile(node, src): """Uploads a file to a node and returns the filename. Caller needs to remove the returned file on the node when it's not needed anymore. """ # Make sure nobody else has access to it while preserving local permissions mode = os.stat(src).st_mode & 0700 cmd = ('tmp=$(mktemp --tmpdir gnt.XXXXXX) && ' 'chmod %o "${tmp}" && ' '[[ -f "${tmp}" ]] && ' 'cat > "${tmp}" && ' 'echo "${tmp}"') % mode f = open(src, "r") try: p = subprocess.Popen(GetSSHCommand(node, cmd), shell=False, stdin=f, stdout=subprocess.PIPE) AssertEqual(p.wait(), 0) # Return temporary filename return p.stdout.read().strip() finally: f.close() def UploadData(node, data, mode=0600, filename=None): """Uploads data to a node and returns the filename. Caller needs to remove the returned file on the node when it's not needed anymore. """ if filename: tmp = "tmp=%s" % utils.ShellQuote(filename) else: tmp = ('tmp=$(mktemp --tmpdir gnt.XXXXXX) && ' 'chmod %o "${tmp}"') % mode cmd = ("%s && " "[[ -f \"${tmp}\" ]] && " "cat > \"${tmp}\" && " "echo \"${tmp}\"") % tmp p = subprocess.Popen(GetSSHCommand(node, cmd), shell=False, stdin=subprocess.PIPE, stdout=subprocess.PIPE) p.stdin.write(data) p.stdin.close() AssertEqual(p.wait(), 0) # Return temporary filename return p.stdout.read().strip() def BackupFile(node, path): """Creates a backup of a file on the node and returns the filename. Caller needs to remove the returned file on the node when it's not needed anymore. """ vpath = MakeNodePath(node, path) cmd = ("tmp=$(mktemp .gnt.XXXXXX --tmpdir=$(dirname %s)) && " "[[ -f \"$tmp\" ]] && " "cp %s $tmp && " "echo $tmp") % (utils.ShellQuote(vpath), utils.ShellQuote(vpath)) # Return temporary filename result = GetCommandOutput(node, cmd).strip() print "Backup filename: %s" % result return result def ResolveInstanceName(instance): """Gets the full name of an instance. @type instance: string @param instance: Instance name """ info = GetObjectInfo(["gnt-instance", "info", instance]) return info[0]["Instance name"] def ResolveNodeName(node): """Gets the full name of a node. """ info = GetObjectInfo(["gnt-node", "info", node.primary]) return info[0]["Node name"] def GetNodeInstances(node, secondaries=False): """Gets a list of instances on a node. """ master = qa_config.GetMasterNode() node_name = ResolveNodeName(node) # Get list of all instances cmd = ["gnt-instance", "list", "--separator=:", "--no-headers", "--output=name,pnode,snodes"] output = GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)) instances = [] for line in output.splitlines(): (name, pnode, snodes) = line.split(":", 2) if ((not secondaries and pnode == node_name) or (secondaries and node_name in snodes.split(","))): instances.append(name) return instances def _SelectQueryFields(rnd, fields): """Generates a list of fields for query tests. """ # Create copy for shuffling fields = list(fields) rnd.shuffle(fields) # Check all fields yield fields yield sorted(fields) # Duplicate fields yield fields + fields # Check small groups of fields while fields: yield [fields.pop() for _ in range(rnd.randint(2, 10)) if fields] def _List(listcmd, fields, names): """Runs a list command. """ master = qa_config.GetMasterNode() cmd = [listcmd, "list", "--separator=|", "--no-headers", "--output", ",".join(fields)] if names: cmd.extend(names) return GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd)).splitlines() def GenericQueryTest(cmd, fields, namefield="name", test_unknown=True): """Runs a number of tests on query commands. @param cmd: Command name @param fields: List of field names """ rnd = random.Random(hash(cmd)) fields = list(fields) rnd.shuffle(fields) # Test a number of field combinations for testfields in _SelectQueryFields(rnd, fields): AssertRedirectedCommand([cmd, "list", "--output", ",".join(testfields)]) if namefield is not None: namelist_fn = compat.partial(_List, cmd, [namefield]) # When no names were requested, the list must be sorted names = namelist_fn(None) AssertEqual(names, utils.NiceSort(names)) # When requesting specific names, the order must be kept revnames = list(reversed(names)) AssertEqual(namelist_fn(revnames), revnames) randnames = list(names) rnd.shuffle(randnames) AssertEqual(namelist_fn(randnames), randnames) if test_unknown: # Listing unknown items must fail AssertCommand([cmd, "list", "this.name.certainly.does.not.exist"], fail=True) # Check exit code for listing unknown field AssertEqual(AssertRedirectedCommand([cmd, "list", "--output=field/does/not/exist"], fail=True), constants.EXIT_UNKNOWN_FIELD) def GenericQueryFieldsTest(cmd, fields): master = qa_config.GetMasterNode() # Listing fields AssertRedirectedCommand([cmd, "list-fields"]) AssertRedirectedCommand([cmd, "list-fields"] + fields) # Check listed fields (all, must be sorted) realcmd = [cmd, "list-fields", "--separator=|", "--no-headers"] output = GetCommandOutput(master.primary, utils.ShellQuoteArgs(realcmd)).splitlines() AssertEqual([line.split("|", 1)[0] for line in output], utils.NiceSort(fields)) # Check exit code for listing unknown field AssertEqual(AssertCommand([cmd, "list-fields", "field/does/not/exist"], fail=True), constants.EXIT_UNKNOWN_FIELD) def _FormatWithColor(text, seq): if not seq: return text return "%s%s%s" % (seq, text, _RESET_SEQ) FormatWarning = lambda text: _FormatWithColor(text, _WARNING_SEQ) FormatError = lambda text: _FormatWithColor(text, _ERROR_SEQ) FormatInfo = lambda text: _FormatWithColor(text, _INFO_SEQ) def AddToEtcHosts(hostnames): """Adds hostnames to /etc/hosts. @param hostnames: List of hostnames first used A records, all other CNAMEs """ master = qa_config.GetMasterNode() tmp_hosts = UploadData(master.primary, "", mode=0644) data = [] for localhost in ("::1", "127.0.0.1"): data.append("%s %s" % (localhost, " ".join(hostnames))) try: AssertCommand("{ cat %s && echo -e '%s'; } > %s && mv %s %s" % (utils.ShellQuote(pathutils.ETC_HOSTS), "\\n".join(data), utils.ShellQuote(tmp_hosts), utils.ShellQuote(tmp_hosts), utils.ShellQuote(pathutils.ETC_HOSTS))) except Exception: AssertCommand(["rm", "-f", tmp_hosts]) raise def RemoveFromEtcHosts(hostnames): """Remove hostnames from /etc/hosts. @param hostnames: List of hostnames first used A records, all other CNAMEs """ master = qa_config.GetMasterNode() tmp_hosts = UploadData(master.primary, "", mode=0644) quoted_tmp_hosts = utils.ShellQuote(tmp_hosts) sed_data = " ".join(hostnames) try: AssertCommand((r"sed -e '/^\(::1\|127\.0\.0\.1\)\s\+%s/d' %s > %s" r" && mv %s %s") % (sed_data, utils.ShellQuote(pathutils.ETC_HOSTS), quoted_tmp_hosts, quoted_tmp_hosts, utils.ShellQuote(pathutils.ETC_HOSTS))) except Exception: AssertCommand(["rm", "-f", tmp_hosts]) raise def RunInstanceCheck(instance, running): """Check if instance is running or not. """ instance_name = _GetName(instance, operator.attrgetter("name")) script = qa_config.GetInstanceCheckScript() if not script: return master_node = qa_config.GetMasterNode() # Build command to connect to master node master_ssh = GetSSHCommand(master_node.primary, "--") if running: running_shellval = "1" running_text = "" else: running_shellval = "" running_text = "not " print FormatInfo("Checking if instance '%s' is %srunning" % (instance_name, running_text)) args = [script, instance_name] env = { "PATH": constants.HOOKS_PATH, "RUN_UUID": _RUN_UUID, "MASTER_SSH": utils.ShellQuoteArgs(master_ssh), "INSTANCE_NAME": instance_name, "INSTANCE_RUNNING": running_shellval, } result = os.spawnve(os.P_WAIT, script, args, env) if result != 0: raise qa_error.Error("Instance check failed with result %s" % result) def _InstanceCheckInner(expected, instarg, args, result): """Helper function used by L{InstanceCheck}. """ if instarg == FIRST_ARG: instance = args[0] elif instarg == RETURN_VALUE: instance = result else: raise Exception("Invalid value '%s' for instance argument" % instarg) if expected in (INST_DOWN, INST_UP): RunInstanceCheck(instance, (expected == INST_UP)) elif expected is not None: raise Exception("Invalid value '%s'" % expected) def InstanceCheck(before, after, instarg): """Decorator to check instance status before and after test. @param before: L{INST_DOWN} if instance must be stopped before test, L{INST_UP} if instance must be running before test, L{None} to not check. @param after: L{INST_DOWN} if instance must be stopped after test, L{INST_UP} if instance must be running after test, L{None} to not check. @param instarg: L{FIRST_ARG} to use first argument to test as instance (a dictionary), L{RETURN_VALUE} to use return value (disallows pre-checks) """ def decorator(fn): @functools.wraps(fn) def wrapper(*args, **kwargs): _InstanceCheckInner(before, instarg, args, NotImplemented) result = fn(*args, **kwargs) _InstanceCheckInner(after, instarg, args, result) return result return wrapper return decorator def GetNonexistentGroups(count): """Gets group names which shouldn't exist on the cluster. @param count: Number of groups to get @rtype: integer """ return GetNonexistentEntityNames(count, "groups", "group") def GetNonexistentEntityNames(count, name_config, name_prefix): """Gets entity names which shouldn't exist on the cluster. The actualy names can refer to arbitrary entities (for example groups, networks). @param count: Number of names to get @rtype: integer @param name_config: name of the leaf in the config containing this entity's configuration, including a 'inexistent-' element @rtype: string @param name_prefix: prefix of the entity's names, used to compose the default values; for example for groups, the prefix is 'group' and the generated names are then group1, group2, ... @rtype: string """ entities = qa_config.get(name_config, {}) default = [name_prefix + str(i) for i in range(count)] assert count <= len(default) name_config_inexistent = "inexistent-" + name_config candidates = entities.get(name_config_inexistent, default)[:count] if len(candidates) < count: raise Exception("At least %s non-existent %s are needed" % (count, name_config)) return candidates def MakeNodePath(node, path): """Builds an absolute path for a virtual node. @type node: string or L{qa_config._QaNode} @param node: Node @type path: string @param path: Path without node-specific prefix """ (_, basedir) = qa_config.GetVclusterSettings() if isinstance(node, basestring): name = node else: name = node.primary if basedir: assert path.startswith("/") return "%s%s" % (vcluster.MakeNodeRoot(basedir, name), path) else: return path def _GetParameterOptions(specs): """Helper to build policy options.""" values = ["%s=%s" % (par, val) for (par, val) in specs.items()] return ",".join(values) def TestSetISpecs(new_specs=None, diff_specs=None, get_policy_fn=None, build_cmd_fn=None, fail=False, old_values=None): """Change instance specs for an object. At most one of new_specs or diff_specs can be specified. @type new_specs: dict @param new_specs: new complete specs, in the same format returned by L{ParseIPolicy}. @type diff_specs: dict @param diff_specs: partial specs, it can be an incomplete specifications, but if min/max specs are specified, their number must match the number of the existing specs @type get_policy_fn: function @param get_policy_fn: function that returns the current policy as in L{ParseIPolicy} @type build_cmd_fn: function @param build_cmd_fn: function that return the full command line from the options alone @type fail: bool @param fail: if the change is expected to fail @type old_values: tuple @param old_values: (old_policy, old_specs), as returned by L{ParseIPolicy} @return: same as L{ParseIPolicy} """ assert get_policy_fn is not None assert build_cmd_fn is not None assert new_specs is None or diff_specs is None if old_values: (old_policy, old_specs) = old_values else: (old_policy, old_specs) = get_policy_fn() if diff_specs: new_specs = copy.deepcopy(old_specs) if constants.ISPECS_MINMAX in diff_specs: AssertEqual(len(new_specs[constants.ISPECS_MINMAX]), len(diff_specs[constants.ISPECS_MINMAX])) for (new_minmax, diff_minmax) in zip(new_specs[constants.ISPECS_MINMAX], diff_specs[constants.ISPECS_MINMAX]): for (key, parvals) in diff_minmax.items(): for (par, val) in parvals.items(): new_minmax[key][par] = val for (par, val) in diff_specs.get(constants.ISPECS_STD, {}).items(): new_specs[constants.ISPECS_STD][par] = val if new_specs: cmd = [] if (diff_specs is None or constants.ISPECS_MINMAX in diff_specs): minmax_opt_items = [] for minmax in new_specs[constants.ISPECS_MINMAX]: minmax_opts = [] for key in ["min", "max"]: keyopt = _GetParameterOptions(minmax[key]) minmax_opts.append("%s:%s" % (key, keyopt)) minmax_opt_items.append("/".join(minmax_opts)) cmd.extend([ "--ipolicy-bounds-specs", "//".join(minmax_opt_items) ]) if diff_specs is None: std_source = new_specs else: std_source = diff_specs std_opt = _GetParameterOptions(std_source.get("std", {})) if std_opt: cmd.extend(["--ipolicy-std-specs", std_opt]) AssertCommand(build_cmd_fn(cmd), fail=fail) # Check the new state (eff_policy, eff_specs) = get_policy_fn() AssertEqual(eff_policy, old_policy) if fail: AssertEqual(eff_specs, old_specs) else: AssertEqual(eff_specs, new_specs) else: (eff_policy, eff_specs) = (old_policy, old_specs) return (eff_policy, eff_specs) def ParseIPolicy(policy): """Parse and split instance an instance policy. @type policy: dict @param policy: policy, as returned by L{GetObjectInfo} @rtype: tuple @return: (policy, specs), where: - policy is a dictionary of the policy values, instance specs excluded - specs is a dictionary containing only the specs, using the internal format (see L{constants.IPOLICY_DEFAULTS} for an example) """ ret_specs = {} ret_policy = {} for (key, val) in policy.items(): if key == "bounds specs": ret_specs[constants.ISPECS_MINMAX] = [] for minmax in val: ret_minmax = {} for key in minmax: keyparts = key.split("/", 1) assert len(keyparts) > 1 ret_minmax[keyparts[0]] = minmax[key] ret_specs[constants.ISPECS_MINMAX].append(ret_minmax) elif key == constants.ISPECS_STD: ret_specs[key] = val else: ret_policy[key] = val return (ret_policy, ret_specs) ganeti-2.9.3/qa/qa_error.py0000644000000000000000000000206012230001635015545 0ustar00rootroot00000000000000# # # Copyright (C) 2007 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Error definitions for QA. """ class Error(Exception): """An error occurred during Q&A testing. """ pass class OutOfNodesError(Error): """Out of nodes. """ pass class OutOfInstancesError(Error): """Out of instances. """ pass class UnusableNodeError(Error): """Unusable node. """ pass ganeti-2.9.3/qa/qa_env.py0000644000000000000000000000444612244641676015242 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Cluster environment related QA tests. """ from ganeti import utils import qa_config from qa_utils import AssertCommand def TestSshConnection(): """Test SSH connection. """ for node in qa_config.get("nodes"): AssertCommand("exit", node=node) def TestGanetiCommands(): """Test availibility of Ganeti commands. """ cmds = (["gnt-backup", "--version"], ["gnt-cluster", "--version"], ["gnt-debug", "--version"], ["gnt-instance", "--version"], ["gnt-job", "--version"], ["gnt-node", "--version"], ["gnt-os", "--version"], ["ganeti-masterd", "--version"], ["ganeti-noded", "--version"], ["ganeti-rapi", "--version"], ["ganeti-watcher", "--version"], ["ganeti-confd", "--version"], ["ganeti-luxid", "--version"], ) cmd = " && ".join([utils.ShellQuoteArgs(i) for i in cmds]) for node in qa_config.get("nodes"): AssertCommand(cmd, node=node) def TestIcmpPing(): """ICMP ping each node. """ nodes = qa_config.get("nodes") pingprimary = pingsecondary = "fping" if qa_config.get("primary_ip_version") == 6: pingprimary = "fping6" pricmd = [pingprimary, "-e"] seccmd = [pingsecondary, "-e"] for i in nodes: pricmd.append(i.primary) if i.secondary: seccmd.append(i.secondary) pristr = utils.ShellQuoteArgs(pricmd) if seccmd: cmdall = "%s && %s" % (pristr, utils.ShellQuoteArgs(seccmd)) else: cmdall = pristr for node in nodes: AssertCommand(cmdall, node=node) ganeti-2.9.3/qa/qa-sample.json0000644000000000000000000001466712271422343016164 0ustar00rootroot00000000000000{ "# Note:": null, "# This file is stored in the JSON format and does not support": null, "# comments. As a work-around, comments are keys starting with a hash": null, "# sign (#).": null, "name": "xen-test", "# Name used for renaming cluster": null, "rename": "xen-test-rename", "# Virtual cluster": null, "#vcluster-master": "xen-vcluster", "#vcluster-basedir": "/srv/ganeti/vcluster", "enabled-hypervisors": "xen-pvm", "# Dict of hypervisor name and parameters (like on the cmd line)": null, "hypervisor-parameters": {}, "# Backend parameters (like on the cmd line)": null, "backend-parameters": "", "# Dict of OS name and parameters (like on the cmd line)": null, "os-parameters": {}, "# Dict of OS name and value dict of hypervisor parameters": null, "os-hvp": {}, "primary_ip_version": 4, "# Name of the LVM group for the cluster": null, "vg-name": "xenvg", "# Cluster-level value of the exclusive-storage flag": null, "exclusive-storage": null, "# Only enable disk templates that the QA machines can actually use.": null, "enabled-disk-templates": [ "plain", "drbd", "diskless" ], "# Default file storage directories": null, "default-file-storage-dir": "/srv/ganeti/file-storage", "default-shared-file-storage-dir": "/srv/ganeti/shared-file-storage", "# Additional arguments for initializing cluster": null, "cluster-init-args": [], "# Network interface for master role": null, "#master-netdev": "xen-br0", "# Default network interface parameters": null, "#default-nicparams": { "mode": "bridged", "link": "xen-br0" }, "os": "debian-etch", "maxmem": "1024M", "minmem": "512M", "# Instance policy specs": null, "#ispec_cpu_count_max": null, "#ispec_cpu_count_min": null, "#ispec_cpu_count_std": null, "#ispec_disk_count_max": null, "#ispec_disk_count_min": null, "#ispec_disk_count_std": null, "#ispec_disk_size_max": null, "ispec_disk_size_min": 512, "#ispec_disk_size_std": null, "ispec_mem_size_max": 1024, "#ispec_mem_size_min": null, "#ispec_mem_size_std": null, "#ispec_nic_count_max": null, "#ispec_nic_count_min": null, "#ispec_nic_count_std": null, "# Lists of disks": null, "disks": [ { "size": "1G", "spindles": 2, "name": "disk0", "growth": "2G", "spindles-growth": 1 }, { "size": "512M", "spindles": 1, "name": "disk1", "growth": "768M", "spindles-growth": 0 } ], "# Script to check instance status": null, "instance-check": null, "# Regular expression to ignore existing tags": null, "ignore-tags-re": null, "nodes": [ { "# Master node": null, "primary": "xen-test-0", "secondary": "192.0.2.1" }, { "primary": "xen-test-1", "secondary": "192.0.2.2" } ], "instances": [ { "name": "xen-test-inst1", "# Static MAC address": null, "#nic.mac/0": "AA:00:00:11:11:11" }, { "name": "xen-test-inst2", "# Static MAC address": null, "#nic.mac/0": "AA:00:00:22:22:22" } ], "groups": { "group-with-nodes": "default", "inexistent-groups": [ "group1", "group2", "group3" ] }, "networks": { "inexistent-networks": [ "network1", "network2", "network3" ] }, "tests": { "# Whether tests are enabled or disabled by default": null, "default": true, "env": true, "os": true, "tags": true, "rapi": true, "test-jobqueue": true, "delay": true, "create-cluster": true, "cluster-verify": true, "cluster-info": true, "cluster-burnin": true, "cluster-command": true, "cluster-copyfile": true, "cluster-master-failover": true, "cluster-renew-crypto": true, "cluster-destroy": true, "cluster-rename": true, "cluster-reserved-lvs": true, "cluster-modify": true, "cluster-oob": true, "cluster-epo": true, "cluster-redist-conf": true, "cluster-repair-disk-sizes": true, "cluster-exclusive-storage": true, "cluster-instance-policy": true, "haskell-confd": true, "htools": true, "group-list": true, "group-rwops": true, "network": false, "node-list": true, "node-info": true, "node-volumes": true, "node-readd": true, "node-storage": true, "node-modify": true, "node-oob": true, "# These tests need at least three nodes": null, "node-evacuate": false, "node-migrate": false, "# This test needs at least two nodes": null, "node-failover": false, "instance-add-plain-disk": true, "instance-add-file": true, "instance-add-shared-file": true, "instance-add-drbd-disk": true, "instance-add-diskless": true, "instance-add-restricted-by-disktemplates": true, "instance-convert-disk": true, "instance-plain-rapi-common-tests": true, "instance-remove-drbd-offline": true, "instance-export": true, "instance-failover": true, "instance-grow-disk": true, "instance-import": true, "instance-info": true, "instance-list": true, "instance-migrate": true, "instance-modify": true, "instance-modify-primary": true, "instance-modify-disks": false, "instance-reboot": true, "instance-reinstall": true, "instance-rename": true, "instance-shutdown": true, "instance-device-names": true, "job-list": true, "# cron/ganeti-watcher should be disabled for these tests": null, "instance-automatic-restart": false, "instance-consecutive-failures": false, "# This test might fail with certain hypervisor types, depending": null, "# on whether they support the `gnt-instance console' command.": null, "instance-console": false, "# Disabled by default because they take rather long": null, "instance-replace-disks": false, "instance-recreate-disks": false, "# Whether to test the tools/move-instance utility": null, "inter-cluster-instance-move": false, "# Run instance tests with different cluster configurations": null, "default-instance-tests": true, "exclusive-storage-instance-tests": false, "mon-collector": true }, "options": { "burnin-instances": 2, "burnin-disk-template": "drbd", "burnin-in-parallel": false, "burnin-check-instances": false, "burnin-rename": "xen-test-rename", "burnin-reboot": true, "reboot-types": ["soft", "hard", "full"], "use-iallocators": false }, "# vim: set syntax=javascript :": null } ganeti-2.9.3/test/0000755000000000000000000000000012271445544013761 5ustar00rootroot00000000000000ganeti-2.9.3/test/data/0000755000000000000000000000000012271445545014673 5ustar00rootroot00000000000000ganeti-2.9.3/test/data/bdev-drbd-8.4-no-disk-params.txt0000644000000000000000000000120312267470014022405 0ustar00rootroot00000000000000resource resource0 { options { } net { cram-hmac-alg "md5"; shared-secret "shared_secret_123"; after-sb-0pri discard-zero-changes; after-sb-1pri consensus; } _remote_host { address ipv4 192.0.2.2:11000; } _this_host { address ipv4 192.0.2.1:11000; volume 0 { device minor 0; disk "/dev/xenvg/test.data"; meta-disk "/dev/xenvg/test.meta"; disk { } } } } ganeti-2.9.3/test/data/proc_meminfo.txt0000644000000000000000000000222212244641676020112 0ustar00rootroot00000000000000MemTotal: 7870584 kB MemFree: 1549012 kB Buffers: 227328 kB Cached: 4647780 kB SwapCached: 2988 kB Active: 2787880 kB Inactive: 3160328 kB Active(anon): 1951852 kB Inactive(anon): 2007648 kB Active(file): 836028 kB Inactive(file): 1152680 kB Unevictable: 36 kB Mlocked: 32 kB SwapTotal: 8077308 kB SwapFree: 8029968 kB Dirty: 84 kB Writeback: 0 kB AnonPages: 1070164 kB Mapped: 194876 kB Shmem: 2886396 kB Slab: 226092 kB SReclaimable: 169140 kB SUnreclaim: 56952 kB KernelStack: 4232 kB PageTables: 40184 kB NFS_Unstable: 0 kB Bounce: 0 kB WritebackTmp: 0 kB CommitLimit: 12012600 kB Committed_AS: 7618508 kB VmallocTotal: 34359738367 kB VmallocUsed: 374800 kB VmallocChunk: 34359352560 kB HardwareCorrupted: 0 kB AnonHugePages: 0 kB HugePages_Total: 0 HugePages_Free: 0 HugePages_Rsvd: 0 HugePages_Surp: 0 Hugepagesize: 2048 kB DirectMap4k: 96448 kB DirectMap2M: 7983104 kB ganeti-2.9.3/test/data/sys_drbd_usermode_helper.txt0000644000000000000000000000001212230001635022457 0ustar00rootroot00000000000000/bin/true ganeti-2.9.3/test/data/proc_drbd84_sync.txt0000644000000000000000000000111212267470014020567 0ustar00rootroot00000000000000version: 8.4.2 (api:1/proto:86-101) GIT-hash: 7ad5f850d711223713d6dcadc3dd48860321070c build by root@example.com, 2013-04-10 07:45:25 0: cs:StandAlone ro:Primary/Unknown ds:UpToDate/DUnknown r----- ns:0 nr:0 dw:33318 dr:730 al:15 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:d oos:1048320 3: cs:Unconfigured 5: cs:SyncSource ro:Secondary/Secondary ds:UpToDate/Inconsistent C r---n- ns:716992 nr:0 dw:0 dr:719432 al:0 bm:43 lo:0 pe:33 ua:18 ap:0 ep:1 wo:f oos:335744 [============>.......] sync'ed: 68.5% (335744/1048576)K finish: 0:00:05 speed: 64,800 (64,800) K/sec ganeti-2.9.3/test/data/vgs-missing-pvs-2.02.02.txt0000644000000000000000000000045712230001635021274 0ustar00rootroot00000000000000 Couldn't find device with uuid 'gg4cmC-4lrT-EN1v-39OA-6S2b-6eEI-wWlJJJ'. Couldn't find all physical volumes for volume group xenvg. Couldn't find device with uuid 'gg4cmC-4lrT-EN1v-39OA-6S2b-6eEI-wWlJJJ'. Couldn't find all physical volumes for volume group xenvg. Volume group xenvg not found ganeti-2.9.3/test/data/proc_drbd83.txt0000644000000000000000000000225512230001635017530 0ustar00rootroot00000000000000version: 8.3.1 (api:88/proto:86-89) GIT-hash: fd40f4a8f9104941537d1afc8521e584a6d3003c build by phil@fat-tyre, 2009-03-27 12:19:49 0: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r---- ns:140978 nr:0 dw:9906 dr:131533 al:27 bm:8 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0 1: cs:Connected ro:Secondary/Primary ds:UpToDate/UpToDate C r--- ns:0 nr:140980 dw:140980 dr:0 al:0 bm:8 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:0 2: cs:Unconfigured 4: cs:WFConnection ro:Primary/Unknown ds:UpToDate/DUnknown C r---- ns:140978 nr:0 dw:9906 dr:131534 al:27 bm:8 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0 5: cs:Connected ro:Primary/Secondary ds:UpToDate/Diskless C r---- ns:140978 nr:0 dw:9906 dr:131533 al:19 bm:8 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0 6: cs:Connected ro:Secondary/Primary ds:Diskless/UpToDate C r--- ns:0 nr:140978 dw:140978 dr:0 al:0 bm:8 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:0 7: cs:WFConnection ro:Secondary/Unknown ds:UpToDate/DUnknown C r--- ns:0 nr:140978 dw:140978 dr:0 al:0 bm:8 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:0 8: cs:StandAlone ro:Secondary/Unknown ds:UpToDate/DUnknown r--- ns:0 nr:140978 dw:140978 dr:0 al:0 bm:8 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:0 ganeti-2.9.3/test/data/bdev-drbd-8.4.txt0000644000000000000000000000140212267470014017563 0ustar00rootroot00000000000000resource resource0 { options { } net { cram-hmac-alg "md5"; shared-secret "shared_secret_123"; after-sb-0pri discard-zero-changes; after-sb-1pri consensus; } _remote_host { address ipv4 192.0.2.2:11000; } _this_host { address ipv4 192.0.2.1:11000; volume 0 { device minor 0; disk "/dev/xenvg/test.data"; meta-disk "/dev/xenvg/test.meta" [ 0 ]; disk { size 2097152s; # bytes resync-rate 61440k; # bytes/second } } } } ganeti-2.9.3/test/data/cert2.pem0000644000000000000000000000227612244641676016427 0ustar00rootroot00000000000000-----BEGIN PRIVATE KEY----- MIIBUwIBADANBgkqhkiG9w0BAQEFAASCAT0wggE5AgEAAkEAt8OZYvvi8noVPlpR /SrHcya9ne7RG5DjvMssksUqyGriUs/WGnpZlL4nz+BcLFGwNNntoxqR30Tjk47S cmSBRQIDAQABAkAqTP5MCMuPIYcuWUAyVNygpzRS3JyKCepClUpnZreYdo4sUQE3 /AM7xeb92R06iZ3f9/MPrbaMKTWRh3uCyfKBAiEA5TxdacnVxdS8+ZLyys4p/C1s iajrarBb/j+NIAnsdnECIQDNOCDO7Jq/iN5qE4Vbi/3zmnP1Ca5aBo+KJ/hhSjRq FQIgIBpWEqybbXsfg+waaGB67MAHxTeM0IImP/LydpwtK2ECIB3SrlHj6Ik1Jr1b oOGw8nLYW0mc4o2KrolxTZM16XARAiBKW3aSjY5UrnoEqa8pAeiO8LJaRj73Epmr zC89IuLZfg== -----END PRIVATE KEY----- -----BEGIN CERTIFICATE----- MIIB0zCCAX2gAwIBAgIJAKrAqGX6UolVMA0GCSqGSIb3DQEBBQUAMEUxCzAJBgNV BAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBX aWRnaXRzIFB0eSBMdGQwHhcNMTIxMDE5MTQ1NjA4WhcNMTIxMDIwMTQ1NjA4WjBF MQswCQYDVQQGEwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwYSW50 ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMFwwDQYJKoZIhvcNAQEBBQADSwAwSAJBALfD mWL74vJ6FT5aUf0qx3MmvZ3u0RuQ47zLLJLFKshq4lLP1hp6WZS+J8/gXCxRsDTZ 7aMakd9E45OO0nJkgUUCAwEAAaNQME4wHQYDVR0OBBYEFA1Fc/GIVtd6nMocrSsA e5bxmVhMMB8GA1UdIwQYMBaAFA1Fc/GIVtd6nMocrSsAe5bxmVhMMAwGA1UdEwQF MAMBAf8wDQYJKoZIhvcNAQEFBQADQQCTUwzDGU+IJTQ3PIJrA3fHMyKbBvc4Rkvi ZNFsmgsidWhb+5APlPjtlS7rXlonNHBzDoGb4RNArtxhEx+rBcAE -----END CERTIFICATE----- ganeti-2.9.3/test/data/xen-xm-list-4.0.1-four-instances.txt0000644000000000000000000000061412244641676023221 0ustar00rootroot00000000000000Name ID Mem VCPUs State Time(s) Domain-0 0 1023 1 r----- 154706.1 server01.example.com 1 1024 1 -b---- 167643.2 web3106215069.example.com 3 4096 1 -b---- 466690.9 testinstance.example.com 2 2048 2 r----- 244443.0 ganeti-2.9.3/test/data/kvm_0.9.1_help_boot_test.txt0000644000000000000000000001361412244641676022060 0ustar00rootroot00000000000000QEMU PC emulator version 0.9.1 (kvm-72), Copyright (c) 2003-2008 Fabrice Bellard usage: qemu [options] [disk_image] 'disk_image' is a raw hard image image for IDE hard disk 0 Standard options: -M machine select emulated machine (-M ? for list) -cpu cpu select CPU (-cpu ? for list) -fda/-fdb file use 'file' as floppy disk 0/1 image -hda/-hdb file use 'file' as IDE hard disk 0/1 image -hdc/-hdd file use 'file' as IDE hard disk 2/3 image -cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master) -drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i] [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off] -fakeopt [,cache=on|off][,format=f][,boot=on|off] use 'file' as a drive image -mtdblock file use 'file' as on-board Flash memory image -sd file use 'file' as SecureDigital card image -pflash file use 'file' as a parallel flash image -boot [a|c|d|n] boot on floppy (a), hard disk (c), CD-ROM (d), or network (n) -snapshot write to temporary files instead of disk image files -no-frame open SDL window without a frame and window decorations -alt-grab use Ctrl-Alt-Shift to grab mouse (instead of Ctrl-Alt) -no-quit disable SDL window close capability -no-fd-bootchk disable boot signature checking for floppy disks -m megs set virtual RAM size to megs MB [default=128] -smp n set the number of CPUs to 'n' [default=1] -nographic disable graphical output and redirect serial I/Os to console -portrait rotate graphical output 90 deg left (only PXA LCD) -k language use keyboard layout (for example "fr" for French) -audio-help print list of audio drivers and their options -soundhw c1,... enable audio support and only specified sound cards (comma separated list) use -soundhw ? to get the list of supported cards use -soundhw all to enable all of them -localtime set the real time clock to local time [default=utc] -full-screen start in full screen -win2k-hack use it when installing Windows 2000 to avoid a disk full bug -usb enable the USB driver (will be the default soon) -usbdevice name add the host or guest USB device 'name' -name string set the name of the guest Network options: -net nic[,vlan=n][,macaddr=addr][,model=type] create a new Network Interface Card and connect it to VLAN 'n' -net user[,vlan=n][,hostname=host] connect the user mode network stack to VLAN 'n' and send hostname 'host' to DHCP clients -net tap[,vlan=n][,fd=h][,ifname=name][,script=file][,downscript=dfile] connect the host TAP network interface to VLAN 'n' and use the network scripts 'file' (default=/etc/kvm/kvm-ifup) and 'dfile' (default=/etc/kvm/kvm-ifdown); use '[down]script=no' to disable script execution; use 'fd=h' to connect to an already opened TAP interface -net socket[,vlan=n][,fd=h][,listen=[host]:port][,connect=host:port] connect the vlan 'n' to another VLAN using a socket connection -net socket[,vlan=n][,fd=h][,mcast=maddr:port] connect the vlan 'n' to multicast maddr and port -net none use it alone to have zero network devices; if no -net option is provided, the default is '-net nic -net user' -tftp dir allow tftp access to files in dir [-net user] -bootp file advertise file in BOOTP replies -smb dir allow SMB access to files in 'dir' [-net user] -redir [tcp|udp]:host-port:[guest-host]:guest-port redirect TCP or UDP connections from host to guest [-net user] Linux boot specific: -kernel bzImage use 'bzImage' as kernel image -append cmdline use 'cmdline' as kernel command line -initrd file use 'file' as initial ram disk Debug/Expert options: -monitor dev redirect the monitor to char device 'dev' -serial dev redirect the serial port to char device 'dev' -parallel dev redirect the parallel port to char device 'dev' -pidfile file Write PID to 'file' -S freeze CPU at startup (use 'c' to start execution) -s wait gdb connection to port -p port set gdb connection port [default=1234] -d item1,... output log to /tmp/qemu.log (use -d ? for a list of log items) -hdachs c,h,s[,t] force hard disk 0 physical geometry and the optional BIOS translation (t=none or lba) (usually qemu can guess them) -L path set the directory for the BIOS, VGA BIOS and keymaps -no-kvm disable KVM hardware virtualization -no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC -no-kvm-pit disable KVM kernel mode PIT -std-vga simulate a standard VGA card with VESA Bochs Extensions (default is CL-GD5446 PCI VGA) -no-acpi disable ACPI -curses use a curses/ncurses interface instead of SDL -no-reboot exit instead of rebooting -no-shutdown stop before shutdown -loadvm [tag|id] start right away with a saved state (loadvm in monitor) -vnc display start a VNC server on display -daemonize daemonize QEMU after initializing -tdf inject timer interrupts that got lost -kvm-shadow-memory megs set the amount of shadow pages to be allocated -mem-path set the path to hugetlbfs/tmpfs mounted directory, also enables allocation of guest memory with huge pages -option-rom rom load a file, rom, into the option ROM space -clock force the use of the given methods for timer alarm. To see what timers are available use -clock ? -startdate select initial date of the clock -icount [N|auto] Enable virtual instruction counter with 2^N clock ticks per instruction During emulation, the following keys are useful: ctrl-alt-f toggle full screen ctrl-alt-n switch to virtual console 'n' ctrl-alt toggle mouse and keyboard grab When using -nographic, press 'ctrl-a h' to get some help. ganeti-2.9.3/test/data/kvm_1.0_help.txt0000644000000000000000000003272512230001635017607 0ustar00rootroot00000000000000QEMU emulator version 1.0 (qemu-kvm-1.0 Debian 1.0+dfsg-2), Copyright (c) 2003-2008 Fabrice Bellard usage: qemu [options] [disk_image] 'disk_image' is a raw hard disk image for IDE hard disk 0 Standard options: -h or -help display this help and exit -version display version information and exit -machine [type=]name[,prop[=value][,...]] selects emulated machine (-machine ? for list) property accel=accel1[:accel2[:...]] selects accelerator supported accelerators are kvm, xen, tcg (default: tcg) -cpu cpu select CPU (-cpu ? for list) -smp n[,maxcpus=cpus][,cores=cores][,threads=threads][,sockets=sockets] set the number of CPUs to 'n' [default=1] maxcpus= maximum number of total cpus, including offline CPUs for hotplug, etc cores= number of CPU cores on one socket threads= number of threads on one CPU core sockets= number of discrete sockets in the system -numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node] -fda/-fdb file use 'file' as floppy disk 0/1 image -hda/-hdb file use 'file' as IDE hard disk 0/1 image -hdc/-hdd file use 'file' as IDE hard disk 2/3 image -cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master) -drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i] [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off] [,cache=writethrough|writeback|none|directsync|unsafe][,format=f] [,serial=s][,addr=A][,id=name][,aio=threads|native] [,readonly=on|off] use 'file' as a drive image -set group.id.arg=value set parameter for item of type i.e. -set drive.$id.file=/path/to/image -global driver.property=value set a global default for a driver property -mtdblock file use 'file' as on-board Flash memory image -sd file use 'file' as SecureDigital card image -pflash file use 'file' as a parallel flash image -boot [order=drives][,once=drives][,menu=on|off] [,splash=sp_name][,splash-time=sp_time] 'drives': floppy (a), hard disk (c), CD-ROM (d), network (n) 'sp_name': the file's name that would be passed to bios as logo picture, if menu=on 'sp_time': the period that splash picture last if menu=on, unit is ms -snapshot write to temporary files instead of disk image files -m megs set virtual RAM size to megs MB [default=128] -mem-path FILE provide backing storage for guest RAM -mem-prealloc preallocate guest memory (use with -mem-path) -k language use keyboard layout (for example 'fr' for French) -audio-help print list of audio drivers and their options -soundhw c1,... enable audio support and only specified sound cards (comma separated list) use -soundhw ? to get the list of supported cards use -soundhw all to enable all of them -usb enable the USB driver (will be the default soon) -usbdevice name add the host or guest USB device 'name' -device driver[,prop[=value][,...]] add device (based on driver) prop=value,... sets driver properties use -device ? to print all possible drivers use -device driver,? to print all possible properties File system options: -fsdev fsdriver,id=id,path=path,[security_model={mapped|passthrough|none}] [,writeout=immediate][,readonly] Virtual File system pass-through options: -virtfs local,path=path,mount_tag=tag,security_model=[mapped|passthrough|none] [,writeout=immediate][,readonly] -virtfs_synth Create synthetic file system image -name string1[,process=string2] set the name of the guest string1 sets the window title and string2 the process name (on Linux) -uuid %08x-%04x-%04x-%04x-%012x specify machine UUID Display options: -display sdl[,frame=on|off][,alt_grab=on|off][,ctrl_grab=on|off] [,window_close=on|off]|curses|none| vnc=[,] select display type -nographic disable graphical output and redirect serial I/Os to console -curses use a curses/ncurses interface instead of SDL -no-frame open SDL window without a frame and window decorations -alt-grab use Ctrl-Alt-Shift to grab mouse (instead of Ctrl-Alt) -ctrl-grab use Right-Ctrl to grab mouse (instead of Ctrl-Alt) -no-quit disable SDL window close capability -sdl enable SDL -spice enable spice -portrait rotate graphical output 90 deg left (only PXA LCD) -rotate rotate graphical output some deg left (only PXA LCD) -vga [std|cirrus|vmware|qxl|xenfb|none] select video card type -full-screen start in full screen -g WxH[xDEPTH] Set the initial graphical resolution and depth -vnc display start a VNC server on display i386 target only: -win2k-hack use it when installing Windows 2000 to avoid a disk full bug -no-fd-bootchk disable boot signature checking for floppy disks -no-acpi disable ACPI -no-hpet disable HPET -balloon none disable balloon device -balloon virtio[,addr=str] enable virtio balloon device (default) -acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...] ACPI table description -smbios file=binary load SMBIOS entry from binary file -smbios type=0[,vendor=str][,version=str][,date=str][,release=%d.%d] specify SMBIOS type 0 fields -smbios type=1[,manufacturer=str][,product=str][,version=str][,serial=str] [,uuid=uuid][,sku=str][,family=str] specify SMBIOS type 1 fields Network options: -net nic[,vlan=n][,macaddr=mac][,model=type][,name=str][,addr=str][,vectors=v] create a new Network Interface Card and connect it to VLAN 'n' -net user[,vlan=n][,name=str][,net=addr[/mask]][,host=addr][,restrict=on|off] [,hostname=host][,dhcpstart=addr][,dns=addr][,tftp=dir][,bootfile=f] [,hostfwd=rule][,guestfwd=rule][,smb=dir[,smbserver=addr]] connect the user mode network stack to VLAN 'n', configure its DHCP server and enabled optional services -net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h][,vhostforce=on|off] connect the host TAP network interface to VLAN 'n' and use the network scripts 'file' (default=/etc/kvm/kvm-ifup) and 'dfile' (default=/etc/kvm/kvm-ifdown) use '[down]script=no' to disable script execution use 'fd=h' to connect to an already opened TAP interface use 'sndbuf=nbytes' to limit the size of the send buffer (the default is disabled 'sndbuf=0' to enable flow control set 'sndbuf=1048576') use vnet_hdr=off to avoid enabling the IFF_VNET_HDR tap flag use vnet_hdr=on to make the lack of IFF_VNET_HDR support an error condition use vhost=on to enable experimental in kernel accelerator (only has effect for virtio guests which use MSIX) use vhostforce=on to force vhost on for non-MSIX virtio guests use 'vhostfd=h' to connect to an already opened vhost net device -net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port] connect the vlan 'n' to another VLAN using a socket connection -net socket[,vlan=n][,name=str][,fd=h][,mcast=maddr:port[,localaddr=addr]] connect the vlan 'n' to multicast maddr and port use 'localaddr=addr' to specify the host address to send packets from -net vde[,vlan=n][,name=str][,sock=socketpath][,port=n][,group=groupname][,mode=octalmode] connect the vlan 'n' to port 'n' of a vde switch running on host and listening for incoming connections on 'socketpath'. Use group 'groupname' and mode 'octalmode' to change default ownership and permissions for communication port. -net dump[,vlan=n][,file=f][,len=n] dump traffic on vlan 'n' to file 'f' (max n bytes per packet) -net none use it alone to have zero network devices. If no -net option is provided, the default is '-net nic -net user' -netdev [user|tap|vde|socket],id=str[,option][,option][,...] Character device options: -chardev null,id=id[,mux=on|off] -chardev socket,id=id[,host=host],port=host[,to=to][,ipv4][,ipv6][,nodelay] [,server][,nowait][,telnet][,mux=on|off] (tcp) -chardev socket,id=id,path=path[,server][,nowait][,telnet],[mux=on|off] (unix) -chardev udp,id=id[,host=host],port=port[,localaddr=localaddr] [,localport=localport][,ipv4][,ipv6][,mux=on|off] -chardev msmouse,id=id[,mux=on|off] -chardev vc,id=id[[,width=width][,height=height]][[,cols=cols][,rows=rows]] [,mux=on|off] -chardev file,id=id,path=path[,mux=on|off] -chardev pipe,id=id,path=path[,mux=on|off] -chardev pty,id=id[,mux=on|off] -chardev stdio,id=id[,mux=on|off][,signal=on|off] -chardev braille,id=id[,mux=on|off] -chardev tty,id=id,path=path[,mux=on|off] -chardev parport,id=id,path=path[,mux=on|off] -chardev spicevmc,id=id,name=name[,debug=debug] Bluetooth(R) options: -bt hci,null dumb bluetooth HCI - doesn't respond to commands -bt hci,host[:id] use host's HCI with the given name -bt hci[,vlan=n] emulate a standard HCI in virtual scatternet 'n' -bt vhci[,vlan=n] add host computer to virtual scatternet 'n' using VHCI -bt device:dev[,vlan=n] emulate a bluetooth device 'dev' in scatternet 'n' Linux/Multiboot boot specific: -kernel bzImage use 'bzImage' as kernel image -append cmdline use 'cmdline' as kernel command line -initrd file use 'file' as initial ram disk Debug/Expert options: -serial dev redirect the serial port to char device 'dev' -parallel dev redirect the parallel port to char device 'dev' -monitor dev redirect the monitor to char device 'dev' -qmp dev like -monitor but opens in 'control' mode -mon chardev=[name][,mode=readline|control][,default] -debugcon dev redirect the debug console to char device 'dev' -pidfile file write PID to 'file' -singlestep always run in singlestep mode -S freeze CPU at startup (use 'c' to start execution) -gdb dev wait for gdb connection on 'dev' -s shorthand for -gdb tcp::1234 -d item1,... output log to /tmp/qemu.log (use -d ? for a list of log items) -D logfile output log to logfile (instead of the default /tmp/qemu.log) -hdachs c,h,s[,t] force hard disk 0 physical geometry and the optional BIOS translation (t=none or lba) (usually qemu can guess them) -L path set the directory for the BIOS, VGA BIOS and keymaps -bios file set the filename for the BIOS -enable-kvm enable KVM full virtualization support -xen-domid id specify xen guest domain id -xen-create create domain using xen hypercalls, bypassing xend warning: should not be used when xend is in use -xen-attach attach to existing xen domain xend will use this when starting qemu -no-reboot exit instead of rebooting -no-shutdown stop before shutdown -loadvm [tag|id] start right away with a saved state (loadvm in monitor) -daemonize daemonize QEMU after initializing -option-rom rom load a file, rom, into the option ROM space -clock force the use of the given methods for timer alarm. To see what timers are available use -clock ? -rtc [base=utc|localtime|date][,clock=host|vm][,driftfix=none|slew] set the RTC base and clock, enable drift fix for clock ticks (x86 only) -icount [N|auto] enable virtual instruction counter with 2^N clock ticks per instruction -watchdog i6300esb|ib700 enable virtual hardware watchdog [default=none] -watchdog-action reset|shutdown|poweroff|pause|debug|none action when watchdog fires [default=reset] -echr chr set terminal escape character instead of ctrl-a -virtioconsole c set virtio console -show-cursor show cursor -tb-size n set TB size -incoming p prepare for incoming migration, listen on port p -nodefaults don't create default devices -chroot dir chroot to dir just before starting the VM -runas user change to user id user just before starting the VM -prom-env variable=value set OpenBIOS nvram variables -semihosting semihosting mode -old-param old param mode -readconfig -writeconfig read/write config file -nodefconfig do not load default config files at startup -trace [events=][,file=] specify tracing options -no-kvm disable KVM hardware virtualization -no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC -no-kvm-pit disable KVM kernel mode PIT -no-kvm-pit-reinjection disable KVM kernel mode PIT interrupt reinjection -tdf enable guest time drift compensation -kvm-shadow-memory MEGABYTES allocate MEGABYTES for kvm mmu shadowing During emulation, the following keys are useful: ctrl-alt-f toggle full screen ctrl-alt-n switch to virtual console 'n' ctrl-alt toggle mouse and keyboard grab When using -nographic, press 'ctrl-a h' to get some help. ganeti-2.9.3/test/data/proc_drbd83_sync.txt0000644000000000000000000000155012230001635020561 0ustar00rootroot00000000000000version: 8.3.1 (api:88/proto:86-89) GIT-hash: fd40f4a8f9104941537d1afc8521e584a6d3003c build by phil@fat-tyre, 2009-03-27 12:19:49 0: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r---- ns:140978 nr:0 dw:9906 dr:131533 al:27 bm:8 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0 1: cs:Connected ro:Secondary/Primary ds:UpToDate/UpToDate C r--- ns:0 nr:140980 dw:140980 dr:0 al:0 bm:8 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:0 2: cs:Unconfigured 3: cs:SyncTarget ro:Primary/Secondary ds:Inconsistent/UpToDate C r---- ns:0 nr:178176 dw:178176 dr:0 al:104 bm:42 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:346112 [=====>..............] sync'ed: 34.9% (346112/524288)M finish: 0:00:05 speed: 59,392 (59,392) K/sec 4: cs:WFConnection ro:Primary/Unknown ds:UpToDate/DUnknown C r---- ns:140978 nr:0 dw:9906 dr:131534 al:27 bm:8 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0 ganeti-2.9.3/test/data/ip-addr-show-lo-oneline-ipv6.txt0000644000000000000000000000012212230001635022625 0ustar00rootroot000000000000001: lo inet6 ::1/128 scope host \ valid_lft forever preferred_lft forever ganeti-2.9.3/test/data/bdev-drbd-8.0.txt0000644000000000000000000000165412230001635017555 0ustar00rootroot00000000000000disk { size 0s _is_default; # bytes on-io-error detach; fencing dont-care _is_default; } net { timeout 60 _is_default; # 1/10 seconds max-epoch-size 16384; max-buffers 16384; unplug-watermark 128 _is_default; connect-int 10 _is_default; # seconds ping-int 10 _is_default; # seconds sndbuf-size 8388608; # bytes ko-count 0 _is_default; after-sb-0pri disconnect _is_default; after-sb-1pri disconnect _is_default; after-sb-2pri disconnect _is_default; rr-conflict disconnect _is_default; ping-timeout 5 _is_default; # 1/10 seconds } syncer { rate 30720k; # bytes/second after -1 _is_default; al-extents 257; } protocol A; _this_host { device "/dev/drbd63"; disk "/dev/xenvg/test.data"; meta-disk "/dev/xenvg/test.meta" [ 0 ]; address 192.0.2.1:11000; } _remote_host { address 192.0.2.2:11000; } ganeti-2.9.3/test/data/proc_drbd84.txt0000644000000000000000000000157312267470014017546 0ustar00rootroot00000000000000version: 8.4.2 (api:1/proto:86-101) GIT-hash: 7ad5f850d711223713d6dcadc3dd48860321070c build by root@example.com, 2013-04-10 07:45:25 0: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r----- ns:1048576 nr:0 dw:0 dr:1048776 al:0 bm:64 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:0 1: cs:Connected ro:Secondary/Primary ds:UpToDate/UpToDate C r----- ns:0 nr:1048576 dw:1048576 dr:0 al:0 bm:64 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:0 2: cs:Unconfigured 4: cs:WFConnection ro:Primary/Unknown ds:UpToDate/DUnknown C r----- ns:0 nr:0 dw:0 dr:200 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:1048320 6: cs:Connected ro:Secondary/Primary ds:Diskless/UpToDate C r----- ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0 8: cs:StandAlone ro:Secondary/Unknown ds:UpToDate/DUnknown r----- ns:0 nr:0 dw:0 dr:200 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:1048320 ganeti-2.9.3/test/data/htools/0000755000000000000000000000000012271445545016203 5ustar00rootroot00000000000000ganeti-2.9.3/test/data/htools/hail-change-group.json0000644000000000000000000003163312267470014022370 0ustar00rootroot00000000000000{ "cluster_tags": [ "htools:iextags:test", "htools:iextags:service-group" ], "nodegroups": { "uuid-group-1": { "ipolicy": { "std": { "nic-count": 1, "disk-size": 1024, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 1 }, "minmax": [ { "min": { "nic-count": 1, "disk-size": 128, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 1 }, "max": { "nic-count": 8, "disk-size": 1048576, "disk-count": 16, "memory-size": 32768, "cpu-count": 8, "spindle-use": 8 } } ], "vcpu-ratio": 4.0, "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "spindle-ratio": 32.0 }, "alloc_policy": "preferred", "networks": [], "tags": [], "name": "default" }, "uuid-group-2": { "ipolicy": { "std": { "nic-count": 1, "disk-size": 1024, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 1 }, "minmax": [ { "min": { "nic-count": 1, "disk-size": 128, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 1 }, "max": { "nic-count": 8, "disk-size": 1048576, "disk-count": 16, "memory-size": 32768, "cpu-count": 8, "spindle-use": 8 } } ], "vcpu-ratio": 4.0, "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "spindle-ratio": 32.0 }, "alloc_policy": "preferred", "networks": [], "tags": [], "name": "empty" } }, "ipolicy": { "std": { "nic-count": 1, "disk-size": 1024, "memory-size": 128, "cpu-count": 1, "disk-count": 1, "spindle-use": 1 }, "minmax": [ { "min": { "nic-count": 1, "disk-size": 1024, "memory-size": 128, "cpu-count": 1, "disk-count": 1, "spindle-use": 1 }, "max": { "nic-count": 8, "disk-size": 1048576, "memory-size": 32768, "cpu-count": 8, "disk-count": 16, "spindle-use": 8 } } ], "vcpu-ratio": 4.0, "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "spindle-ratio": 32.0 }, "enabled_hypervisors": [ "xen-pvm", "xen-hvm" ], "cluster_name": "cluster", "instances": { "instance14": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:eb:0b:a5", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node3", "node4" ], "os": "debian-image" }, "instance13": { "disks": [ { "spindles": 1, "mode": "rw", "size": 512 } ], "disk_space_total": 512, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:7f:8c:9c", "link": "xen-br1", "mode": "bridged", "bridge": "xen-br1" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "instance-debootstrap" }, "instance18": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 128, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:55:94:93", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 8192, "nodes": [ "node4" ], "os": "instance-debootstrap" }, "instance19": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:15:92:6f", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node3", "node4" ], "os": "debian-image" }, "instance2": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:73:20:3e", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "up", "disk_template": "drbd", "memory": 128, "nodes": [ "node3", "node4" ], "os": "debian-image" }, "instance3": { "disks": [ { "spindles": 1, "mode": "rw", "size": 256 }, { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 384, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:ec:e8:a2", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "debian-image" }, "instance4": { "disks": [ { "spindles": 2, "mode": "rw", "size": 2048 } ], "disk_space_total": 2176, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:62:b0:76", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node4", "node3" ], "os": "instance-debootstrap" }, "instance8": { "disks": [ { "spindles": 1, "mode": "rw", "size": 256 } ], "disk_space_total": 256, "hypervisor": "kvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:3f:6d:e3", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "debian-image" }, "instance9": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [ "test:test" ], "nics": [ { "ip": null, "mac": "aa:00:00:10:d2:01", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node3", "node4" ], "os": "instance-debootstrap" }, "instance20": { "disks": [ { "spindles": 1, "mode": "rw", "size": 512 } ], "disk_space_total": 512, "hypervisor": "kvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:db:2a:6d", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "instance-debootstrap" } }, "version": 2, "nodes": { "node1": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.1", "i_pri_up_memory": 0, "tags": [], "master_candidate": true, "free_memory": 31389, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1377280, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.1", "i_pri_memory": 0, "free_spindles": 12, "total_spindles": 12, "vm_capable": true, "offline": false }, "node3": { "total_disk": 1377304, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.3", "i_pri_up_memory": 128, "tags": [], "master_candidate": true, "free_memory": 31234, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1373336, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.3", "i_pri_memory": 2432, "free_spindles": 6, "total_spindles": 12, "vm_capable": true, "offline": false }, "node4": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.4", "i_pri_up_memory": 128, "tags": [], "master_candidate": true, "free_memory": 22914, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1371520, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.4", "i_pri_memory": 23552, "free_spindles": 0, "total_spindles": 12, "vm_capable": true, "offline": false }, "node10": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-2", "secondary_ip": "192.168.2.10", "i_pri_up_memory": 128, "tags": [], "master_candidate": true, "free_memory": 31746, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1376640, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.10", "i_pri_memory": 23552, "free_spindles": 12, "total_spindles": 12, "vm_capable": true, "offline": false }, "node11": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-2", "secondary_ip": "192.168.2.11", "i_pri_up_memory": 128, "tags": [], "master_candidate": true, "free_memory": 31746, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1376640, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.11", "i_pri_memory": 23552, "free_spindles": 12, "total_spindles": 12, "vm_capable": true, "offline": false } }, "request": { "instances": [ "instance14" ], "target_groups": [], "type": "change-group" } } ganeti-2.9.3/test/data/htools/hroller-nonredundant.data0000644000000000000000000000232312267470014023174 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred| node-01-000|91552|0|91424|3100|1052|16|M|fake-uuid-01|1 node-01-001|91552|0|91424|3100|1052|16|N|fake-uuid-01|1 node-01-002|91552|0|91424|3100|1052|16|N|fake-uuid-01|1 node-01-003|91552|0|91424|3100|1052|16|N|fake-uuid-01|1 node-01-004|91552|0|91424|3100|1052|16|N|fake-uuid-01|1 node-01-005|91552|0|91424|3100|1052|16|N|fake-uuid-01|1 inst-00|128|1024|1|running|Y|node-01-000||plain||1 inst-01|128|1024|1|running|Y|node-01-000||plain||1 inst-10|128|1024|1|running|Y|node-01-001||plain||1 inst-11|128|1024|1|running|Y|node-01-001||plain||1 inst-20|128|1024|1|running|Y|node-01-002||plain||1 inst-21|128|1024|1|running|Y|node-01-002||plain||1 inst-30|128|1024|1|running|Y|node-01-003||plain||1 inst-31|128|1024|1|running|Y|node-01-003||plain||1 inst-40|128|1024|1|running|Y|node-01-004||plain||1 inst-41|128|1024|1|running|Y|node-01-004||plain||1 inst-50|128|1024|1|running|Y|node-01-005||plain||1 inst-51|128|1024|1|running|Y|node-01-005||plain||1 |128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 group-01|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/multiple-tags.data0000644000000000000000000000152412267470014021621 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred| node-01-001|91552|0|91424|953674|953674|16|M|fake-uuid-01|1|red node-01-002|91552|0|91296|953674|953674|16|N|fake-uuid-01|1|blue node-01-003|91552|0|91296|953674|953674|16|N|fake-uuid-01|1| node-01-004|91552|0|91296|953674|953674|16|N|fake-uuid-01|1|blue,red node-01-005|91552|0|91296|953674|953674|16|N|fake-uuid-01|1|red node-01-006|91552|0|91296|953674|953674|16|N|fake-uuid-01|1|blue new-0|128|1152|1|running|Y|node-01-001|node-01-002|drbd||1 new-1|128|1152|1|running|Y|node-01-003|node-01-004|drbd||1 new-1|128|1152|1|running|Y|node-01-005|node-01-006|drbd||1 |128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 group-01|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/rapi/0000755000000000000000000000000012271445545017136 5ustar00rootroot00000000000000ganeti-2.9.3/test/data/htools/rapi/instances.json0000644000000000000000000004317212267470014022021 0ustar00rootroot00000000000000[ { "disk_usage": 256, "oper_vcpus": 1, "serial_no": 7, "hvparams": { "root_path": "/dev/xvda1", "kernel_args": "ro", "blockdev_prefix": "sd", "use_bootloader": false, "bootloader_args": "", "bootloader_path": "", "cpu_mask": "all", "kernel_path": "/boot/vmlinuz-ganetixenu-2.6.38", "initrd_path": "", "reboot_behavior": "reboot" }, "oper_state": true, "disk_template": "drbd", "mtime": 1330349951.511833, "nic.modes": [ "bridged" ], "oper_ram": 128, "pnode": "node3", "nic.bridges": [ "xen-br0" ], "status": "running", "custom_hvparams": {}, "tags": [], "nic.ips": [ null ], "snodes": [ "node4" ], "nic.macs": [ "aa:00:00:73:20:3e" ], "name": "instance2", "network_port": null, "ctime": 1327334413.084552, "custom_beparams": {}, "custom_nicparams": [ {} ], "uuid": "4b9ff2a2-3399-4141-b4e1-cde418b1dfec", "disk.sizes": [ 128 ], "disk.spindles": [ null ], "admin_state": "up", "nic.links": [ "xen-br0" ], "os": "debian-image", "beparams": { "auto_balance": true, "minmem": 128, "vcpus": 1, "always_failover": false, "maxmem": 128, "spindle_use": 1 } }, { "disk_usage": 384, "oper_vcpus": null, "serial_no": 6, "hvparams": { "root_path": "/dev/xvda1", "kernel_args": "ro", "blockdev_prefix": "sd", "use_bootloader": false, "bootloader_args": "", "bootloader_path": "", "cpu_mask": "all", "kernel_path": "/boot/vmlinuz-ganetixenu-2.6.38", "initrd_path": "", "reboot_behavior": "reboot" }, "oper_state": false, "disk_template": "plain", "mtime": 1325681489.4059889, "nic.modes": [ "bridged" ], "oper_ram": null, "pnode": "node4", "nic.bridges": [ "xen-br0" ], "status": "ADMIN_down", "custom_hvparams": { "root_path": "/dev/xvda1", "kernel_args": "ro", "blockdev_prefix": "sd", "use_bootloader": false, "bootloader_args": "", "bootloader_path": "", "kernel_path": "/boot/vmlinuz-ganetixenu-2.6.38", "initrd_path": "" }, "tags": [], "nic.ips": [ null ], "snodes": [], "nic.macs": [ "aa:00:00:ec:e8:a2" ], "name": "instance3", "network_port": null, "ctime": 1312272250.96, "custom_beparams": { "auto_balance": true, "minmem": 128, "vcpus": 1, "maxmem": 128, "spindle_use": 1 }, "custom_nicparams": [ { "link": "xen-br0", "mode": "bridged" } ], "uuid": "3cecca87-eae7-476c-847c-818a28764989", "disk.sizes": [ 256, 128 ], "disk.spindles": [ null, null ], "admin_state": "down", "nic.links": [ "xen-br0" ], "os": "debian-image", "beparams": { "auto_balance": true, "minmem": 128, "vcpus": 1, "always_failover": false, "maxmem": 128, "spindle_use": 1 } }, { "disk_usage": 2176, "oper_vcpus": null, "serial_no": 23, "hvparams": { "root_path": "/dev/xvda1", "kernel_args": "ro", "blockdev_prefix": "sd", "use_bootloader": false, "bootloader_args": "", "bootloader_path": "", "cpu_mask": "all", "kernel_path": "/boot/vmlinuz-ganetixenu-2.6.38", "initrd_path": "", "reboot_behavior": "reboot" }, "oper_state": false, "disk_template": "drbd", "mtime": 1325681487.384176, "nic.modes": [ "bridged" ], "oper_ram": null, "pnode": "node4", "nic.bridges": [ "xen-br0" ], "status": "ADMIN_down", "custom_hvparams": {}, "tags": [ "service-group:dns" ], "nic.ips": [ null ], "snodes": [ "node3" ], "nic.macs": [ "aa:00:00:62:b0:76" ], "name": "instance4", "network_port": null, "ctime": 1274885795.4000001, "custom_beparams": {}, "custom_nicparams": [ {} ], "uuid": "33f4c063-bb65-41b2-af29-d8a631201bd7", "disk.sizes": [ 2048 ], "disk.spindles": [ null ], "admin_state": "down", "nic.links": [ "xen-br0" ], "os": "lenny-image", "beparams": { "auto_balance": true, "minmem": 128, "vcpus": 1, "always_failover": false, "maxmem": 128, "spindle_use": 1 } }, { "disk_usage": 256, "oper_vcpus": null, "serial_no": 9, "hvparams": { "spice_password_file": "", "spice_use_tls": false, "spice_use_vdagent": true, "nic_type": "paravirtual", "vnc_bind_address": "0.0.0.0", "cdrom2_image_path": "", "usb_mouse": "", "spice_streaming_video": "", "use_chroot": false, "spice_tls_ciphers": "HIGH:-DES:-3DES:-EXPORT:-ADH", "migration_downtime": 30, "floppy_image_path": "", "security_model": "none", "cdrom_image_path": "", "spice_ip_version": 0, "vhost_net": false, "cpu_mask": "all", "disk_cache": "default", "kernel_path": "/boot/vmlinuz-2.6.38-gg426-generic", "initrd_path": "/boot/initrd.img-2.6.38-gg426-generic", "spice_jpeg_wan_compression": "", "vnc_tls": false, "cdrom_disk_type": "", "use_localtime": false, "security_domain": "", "serial_console": false, "spice_bind": "", "spice_zlib_glz_wan_compression": "", "kvm_flag": "", "vnc_password_file": "", "disk_type": "paravirtual", "vnc_x509_verify": false, "spice_image_compression": "", "spice_playback_compression": true, "kernel_args": "ro", "root_path": "/dev/vda1", "vnc_x509_path": "", "acpi": true, "keymap": "", "boot_order": "disk", "mem_path": "", "reboot_behavior": "reboot" }, "oper_state": false, "disk_template": "plain", "mtime": 1325681492.191576, "nic.modes": [ "bridged" ], "oper_ram": null, "pnode": "node4", "nic.bridges": [ "xen-br0" ], "status": "ADMIN_down", "custom_hvparams": {}, "tags": [], "nic.ips": [ null ], "snodes": [], "nic.macs": [ "aa:00:00:3f:6d:e3" ], "name": "instance8", "network_port": 12111, "ctime": 1311771325.6600001, "custom_beparams": {}, "custom_nicparams": [ {} ], "uuid": "1ea53cc3-cc69-43da-b261-f22ac47896ea", "disk.sizes": [ 256 ], "disk.spindles": [ null ], "admin_state": "down", "nic.links": [ "xen-br0" ], "os": "debian-image", "beparams": { "auto_balance": true, "minmem": 128, "vcpus": 1, "always_failover": false, "maxmem": 128, "spindle_use": 1 } }, { "disk_usage": 256, "oper_vcpus": null, "serial_no": 31, "hvparams": { "root_path": "/dev/sda1", "kernel_args": "ro", "blockdev_prefix": "sd", "use_bootloader": false, "bootloader_args": "", "bootloader_path": "", "cpu_mask": "all", "kernel_path": "/boot/vmlinuz-2.6-xenU", "initrd_path": "", "reboot_behavior": "reboot" }, "oper_state": false, "disk_template": "drbd", "mtime": 1325681490.685926, "nic.modes": [ "bridged" ], "oper_ram": null, "pnode": "node3", "nic.bridges": [ "xen-br0" ], "status": "ADMIN_down", "custom_hvparams": { "root_path": "/dev/sda1", "kernel_args": "ro", "use_bootloader": false, "bootloader_args": "", "bootloader_path": "", "kernel_path": "/boot/vmlinuz-2.6-xenU", "initrd_path": "" }, "tags": [ "gogu:test" ], "nic.ips": [ null ], "snodes": [ "node4" ], "nic.macs": [ "aa:00:00:10:d2:01" ], "name": "instance9", "network_port": null, "ctime": 1271937489.76, "custom_beparams": { "auto_balance": true, "minmem": 128, "vcpus": 1, "maxmem": 128, "spindle_use": 1 }, "custom_nicparams": [ {} ], "uuid": "4927ac66-a3c5-45c6-be39-97e2b119557e", "disk.sizes": [ 128 ], "disk.spindles": [ null ], "admin_state": "down", "nic.links": [ "xen-br0" ], "os": "lenny-image", "beparams": { "auto_balance": true, "minmem": 128, "vcpus": 1, "always_failover": false, "maxmem": 128, "spindle_use": 1 } }, { "disk_usage": 512, "oper_vcpus": null, "serial_no": 11, "hvparams": { "root_path": "/dev/sda1", "kernel_args": "ro", "blockdev_prefix": "sd", "use_bootloader": false, "bootloader_args": "", "bootloader_path": "", "cpu_mask": "all", "kernel_path": "/boot/vmlinuz-2.6-xenU", "initrd_path": "", "reboot_behavior": "reboot" }, "oper_state": false, "disk_template": "plain", "mtime": 1325681493.0002201, "nic.modes": [ "bridged" ], "oper_ram": null, "pnode": "node4", "nic.bridges": [ "xen-br1" ], "status": "ADMIN_down", "custom_hvparams": {}, "tags": [], "nic.ips": [ null ], "snodes": [], "nic.macs": [ "aa:00:00:7f:8c:9c" ], "name": "instance13", "network_port": null, "ctime": 1305129727.7, "custom_beparams": {}, "custom_nicparams": [ { "link": "xen-br1" } ], "uuid": "b864e453-f072-41fe-9973-7673c2161e34", "disk.sizes": [ 512 ], "disk.spindles": [ null ], "admin_state": "down", "nic.links": [ "xen-br1" ], "os": "busybox", "beparams": { "auto_balance": true, "minmem": 128, "vcpus": 1, "always_failover": false, "maxmem": 128, "spindle_use": 1 } }, { "disk_usage": 256, "oper_vcpus": null, "serial_no": 11, "hvparams": { "root_path": "/dev/xvda1", "kernel_args": "ro", "blockdev_prefix": "sd", "use_bootloader": false, "bootloader_args": "", "bootloader_path": "", "cpu_mask": "all", "kernel_path": "/boot/vmlinuz-ganetixenu-2.6.38", "initrd_path": "", "reboot_behavior": "reboot" }, "oper_state": false, "disk_template": "drbd", "mtime": 1325681493.8268771, "nic.modes": [ "bridged" ], "oper_ram": null, "pnode": "node3", "nic.bridges": [ "xen-br0" ], "status": "ADMIN_down", "custom_hvparams": {}, "tags": [], "nic.ips": [ null ], "snodes": [ "node4" ], "nic.macs": [ "aa:00:00:eb:0b:a5" ], "name": "instance14", "network_port": null, "ctime": 1312285580.27, "custom_beparams": {}, "custom_nicparams": [ {} ], "uuid": "e9dae1c9-b4cb-4f11-b0e9-65931a6b3524", "disk.sizes": [ 128 ], "disk.spindles": [ null ], "admin_state": "down", "nic.links": [ "xen-br0" ], "os": "debian-image", "beparams": { "auto_balance": true, "minmem": 128, "vcpus": 1, "always_failover": false, "maxmem": 128, "spindle_use": 1 } }, { "disk_usage": 128, "oper_vcpus": null, "serial_no": 9, "hvparams": { "root_path": "/dev/sda1", "kernel_args": "ro", "blockdev_prefix": "sd", "use_bootloader": false, "bootloader_args": "", "bootloader_path": "", "cpu_mask": "all", "kernel_path": "/boot/vmlinuz-2.6-xenU", "initrd_path": "", "reboot_behavior": "reboot" }, "oper_state": false, "disk_template": "plain", "mtime": 1325681491.0986331, "nic.modes": [ "bridged" ], "oper_ram": null, "pnode": "node4", "nic.bridges": [ "xen-br0" ], "status": "ADMIN_down", "custom_hvparams": {}, "tags": [], "nic.ips": [ null ], "snodes": [], "nic.macs": [ "aa:00:00:55:94:93" ], "name": "instance18", "network_port": null, "ctime": 1297176343.1700001, "custom_beparams": { "minmem": 8192, "maxmem": 8192 }, "custom_nicparams": [ {} ], "uuid": "2f14bc3b-8448-4b2f-a592-d7a216244b22", "disk.sizes": [ 128 ], "disk.spindles": [ null ], "admin_state": "down", "nic.links": [ "xen-br0" ], "os": "busybox", "beparams": { "auto_balance": true, "minmem": 8192, "vcpus": 1, "always_failover": false, "maxmem": 8192, "spindle_use": 1 } }, { "disk_usage": 256, "oper_vcpus": null, "serial_no": 10, "hvparams": { "root_path": "/dev/xvda1", "kernel_args": "ro", "blockdev_prefix": "sd", "use_bootloader": false, "bootloader_args": "", "bootloader_path": "", "cpu_mask": "all", "kernel_path": "/boot/vmlinuz-ganetixenu-2.6.38", "initrd_path": "", "reboot_behavior": "reboot" }, "oper_state": false, "disk_template": "drbd", "mtime": 1325681491.5785329, "nic.modes": [ "bridged" ], "oper_ram": null, "pnode": "node3", "nic.bridges": [ "xen-br0" ], "status": "ADMIN_down", "custom_hvparams": {}, "tags": [], "nic.ips": [ null ], "snodes": [ "node4" ], "nic.macs": [ "aa:00:00:15:92:6f" ], "name": "instance19", "network_port": null, "ctime": 1312464490.7, "custom_beparams": {}, "custom_nicparams": [ {} ], "uuid": "624c1844-82a2-474e-bdaf-1bafa820fdcf", "disk.sizes": [ 128 ], "disk.spindles": [ null ], "admin_state": "down", "nic.links": [ "xen-br0" ], "os": "debian-image", "beparams": { "auto_balance": true, "minmem": 128, "vcpus": 1, "always_failover": false, "maxmem": 128, "spindle_use": 1 } }, { "disk_usage": 512, "oper_vcpus": null, "serial_no": 14, "hvparams": { "spice_password_file": "", "spice_use_tls": false, "spice_use_vdagent": true, "nic_type": "paravirtual", "vnc_bind_address": "0.0.0.0", "cdrom2_image_path": "", "usb_mouse": "", "spice_streaming_video": "", "use_chroot": false, "spice_tls_ciphers": "HIGH:-DES:-3DES:-EXPORT:-ADH", "migration_downtime": 30, "floppy_image_path": "", "security_model": "none", "cdrom_image_path": "", "spice_ip_version": 0, "vhost_net": false, "cpu_mask": "all", "disk_cache": "default", "kernel_path": "/boot/vmlinuz-2.6.38-gg426-generic", "initrd_path": "/boot/initrd.img-2.6.38-gg426-generic", "spice_jpeg_wan_compression": "", "vnc_tls": false, "cdrom_disk_type": "", "use_localtime": false, "security_domain": "", "serial_console": false, "spice_bind": "", "spice_zlib_glz_wan_compression": "", "kvm_flag": "", "vnc_password_file": "", "disk_type": "paravirtual", "vnc_x509_verify": false, "spice_image_compression": "", "spice_playback_compression": true, "kernel_args": "ro", "root_path": "/dev/vda1", "vnc_x509_path": "", "acpi": true, "keymap": "", "boot_order": "disk", "mem_path": "", "reboot_behavior": "reboot" }, "oper_state": false, "disk_template": "plain", "mtime": 1325681494.699162, "nic.modes": [ "bridged" ], "oper_ram": null, "pnode": "node4", "nic.bridges": [ "xen-br0" ], "status": "ADMIN_down", "custom_hvparams": {}, "tags": [], "nic.ips": [ null ], "snodes": [], "nic.macs": [ "aa:00:00:db:2a:6d" ], "name": "instance20", "network_port": 12107, "ctime": 1305208955.75, "custom_beparams": {}, "custom_nicparams": [ { "link": "xen-br0" } ], "uuid": "4f65c14d-be87-4303-a8dc-ba1b86e2a3b3", "disk.sizes": [ 512 ], "disk.spindles": [ null ], "admin_state": "down", "nic.links": [ "xen-br0" ], "os": "lenny-image+default", "beparams": { "auto_balance": true, "minmem": 128, "vcpus": 1, "always_failover": false, "maxmem": 128, "spindle_use": 1 } }, { "disk_usage": 256, "oper_vcpus": null, "serial_no": 10, "hvparams": { "root_path": "/dev/xvda1", "kernel_args": "ro", "blockdev_prefix": "sd", "use_bootloader": false, "bootloader_args": "", "bootloader_path": "", "cpu_mask": "all", "kernel_path": "/boot/vmlinuz-ganetixenu-2.6.38", "initrd_path": "", "reboot_behavior": "reboot" }, "oper_state": false, "disk_template": "drbd", "mtime": 1325681489.0591741, "nic.modes": [ "bridged" ], "oper_ram": null, "pnode": "node3", "nic.bridges": [ "xen-br0" ], "status": "ADMIN_down", "custom_hvparams": {}, "tags": [], "nic.ips": [ null ], "snodes": [ "node4" ], "nic.macs": [ "aa:00:00:cb:96:c1" ], "name": "instance21", "network_port": null, "ctime": 1312552008.1199999, "custom_beparams": {}, "custom_nicparams": [ {} ], "uuid": "6f2f7824-8392-408e-ac54-c938f4fb0638", "disk.sizes": [ 128 ], "disk.spindles": [ null ], "admin_state": "down", "nic.links": [ "xen-br0" ], "os": "debian-image", "beparams": { "auto_balance": true, "minmem": 128, "vcpus": 1, "always_failover": false, "maxmem": 128, "spindle_use": 1 } } ] ganeti-2.9.3/test/data/htools/rapi/groups.json0000644000000000000000000000222312244641676021352 0ustar00rootroot00000000000000[ { "uuid": "uuid-group-1", "tags": [], "ipolicy": { "std": { "cpu-count": 1, "nic-count": 1, "disk-size": 1024, "memory-size": 128, "disk-count": 1, "spindle-use": 1 }, "minmax": [ { "min": { "cpu-count": 1, "nic-count": 1, "disk-size": 1024, "memory-size": 128, "disk-count": 1, "spindle-use": 1 }, "max": { "cpu-count": 8, "nic-count": 8, "disk-size": 1048576, "memory-size": 32768, "disk-count": 16, "spindle-use": 8 } } ], "vcpu-ratio": 4.0, "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "spindle-ratio": 32.0 }, "node_cnt": 4, "serial_no": 15, "node_list": [ "node1", "node2", "node3", "node4" ], "ctime": null, "mtime": 1325251614.671967, "alloc_policy": "preferred", "name": "default" } ] ganeti-2.9.3/test/data/htools/rapi/info.json0000644000000000000000000000655612244641676021003 0ustar00rootroot00000000000000{ "maintain_node_health": true, "hvparams": { "xen-pvm": { "use_bootloader": false, "migration_mode": "live", "kernel_args": "ro", "migration_port": 8002, "bootloader_args": "", "root_path": "/dev/sda1", "blockdev_prefix": "sd", "bootloader_path": "", "cpu_mask": "all", "kernel_path": "/boot/vmlinuz-2.6-xenU", "initrd_path": "", "reboot_behavior": "reboot" }, "xen-hvm": { "nic_type": "rtl8139", "use_localtime": false, "migration_mode": "non-live", "boot_order": "cd", "migration_port": 8002, "cpu_mask": "all", "vnc_bind_address": "0.0.0.0", "reboot_behavior": "reboot", "blockdev_prefix": "hd", "cdrom_image_path": "", "device_model": "/usr/lib/xen/bin/qemu-dm", "pae": true, "vnc_password_file": "/etc/ganeti/vnc-cluster-password", "disk_type": "paravirtual", "kernel_path": "/usr/lib/xen/boot/hvmloader", "acpi": true } }, "default_hypervisor": "xen-pvm", "uid_pool": [], "prealloc_wipe_disks": false, "primary_ip_version": 4, "mtime": 1331075221.432734, "os_hvp": { "instance-debootstrap": { "xen-pvm": { "root_path": "/dev/xvda1", "kernel_path": "/boot/vmlinuz-2.6.38" } } }, "osparams": { "debootstrap": { "dhcp": "no", "partition_style": "none", "packages": "ssh" } }, "shared_file_storage_dir": "", "master_netmask": 32, "uuid": "1616c1cc-f793-499c-b1c5-48264c2d2976", "use_external_mip_script": false, "export_version": 0, "hidden_os": [ "lenny" ], "os_api_version": 20, "master": "node4", "nicparams": { "default": { "link": "xen-br0", "mode": "bridged" } }, "protocol_version": 2050000, "config_version": 2050000, "software_version": "2.5.0~rc5", "tags": [ "htools:iextags:test", "htools:iextags:service-group" ], "ipolicy": { "std": { "nic-count": 1, "disk-size": 1024, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 1 }, "minmax": [ { "min": { "nic-count": 1, "disk-size": 128, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 1 }, "max": { "nic-count": 8, "disk-size": 1048576, "disk-count": 16, "memory-size": 32768, "cpu-count": 8, "spindle-use": 8 } } ], "vcpu-ratio": 4.0, "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "spindle-ratio": 32.0 }, "candidate_pool_size": 3, "file_storage_dir": "/srv/ganeti/file-storage", "blacklisted_os": [], "enabled_hypervisors": [ "xen-pvm", "xen-hvm" ], "reserved_lvs": [ "xenvg/test" ], "drbd_usermode_helper": "/bin/true", "default_iallocator": "hail", "ctime": 1271079848.3199999, "name": "cluster", "master_netdev": "xen-br0", "ndparams": { "spindle_count": 1, "oob_program": null }, "architecture": [ "64bit", "x86_64" ], "volume_group_name": "xenvg", "beparams": { "default": { "auto_balance": true, "minmem": 128, "vcpus": 1, "always_failover": false, "maxmem": 128 } } } ganeti-2.9.3/test/data/htools/rapi/nodes.json0000644000000000000000000000703712267470014021142 0ustar00rootroot00000000000000[ { "cnodes": 2, "cnos": 1, "csockets": 2, "ctime": 1324472016.2968869, "ctotal": 4, "dfree": 1377280, "drained": false, "dtotal": 1377280, "group.uuid": "uuid-group-1", "master_candidate": true, "master_capable": true, "mfree": 31389, "mnode": 1017, "mtime": 1331075221.432734, "mtotal": 32763, "name": "node1", "offline": false, "pinst_cnt": 0, "pinst_list": [], "pip": "192.168.1.1", "role": "C", "serial_no": 3, "sinst_cnt": 0, "sinst_list": [], "sip": "192.168.1.2", "spfree": 0, "sptotal": 0, "tags": [], "uuid": "7750ef3d-450f-4724-9d3d-8726d6335417", "vm_capable": true, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false } }, { "cnodes": 2, "cnos": 1, "csockets": 2, "ctime": 1324472016.2968869, "ctotal": 4, "dfree": 1376640, "drained": false, "dtotal": 1377280, "group.uuid": "uuid-group-1", "master_candidate": true, "master_capable": true, "mfree": 31746, "mnode": 1017, "mtime": 1331075221.432734, "mtotal": 32763, "name": "node2", "offline": false, "pinst_cnt": 0, "pinst_list": [], "pip": "192.168.1.2", "role": "C", "serial_no": 3, "sinst_cnt": 0, "sinst_list": [], "sip": "192.168.2.2", "spfree": 0, "sptotal": 0, "tags": [], "uuid": "7750ef3d-450f-4724-9d3d-8726d6335417", "vm_capable": true, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false } }, { "cnodes": 2, "cnos": 1, "dfree": 1373336, "drained": false, "dtotal": 1377304, "mfree": 31234, "mtime": 1331075172.0123219, "pip": "192.168.1.3", "serial_no": 129, "sinst_cnt": 1, "sip": "192.168.2.3", "uuid": "2c7acf04-599d-4707-aba4-bf07a2685f63", "sinst_list": [ "instance4" ], "csockets": 2, "role": "C", "ctotal": 4, "offline": false, "vm_capable": true, "pinst_cnt": 5, "mtotal": 32763, "tags": [], "group.uuid": "uuid-group-1", "master_capable": true, "name": "node3", "master_candidate": true, "ctime": 1271425438.5, "mnode": 1017, "spfree": 0, "sptotal": 0, "pinst_list": [ "instance14", "instance19", "instance2", "instance21", "instance9" ], "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false } }, { "cnodes": 2, "cnos": 1, "dfree": 1371520, "drained": false, "dtotal": 1377280, "mfree": 31746, "mtime": 1318339824.54, "pip": "192.168.1.4", "serial_no": 8, "sinst_cnt": 5, "sip": "192.168.2.4", "uuid": "f25357c1-7fee-4471-b8a9-c7f28669e439", "sinst_list": [ "instance2", "instance21", "instance14", "instance9", "instance19" ], "csockets": 2, "role": "M", "ctotal": 4, "offline": false, "vm_capable": true, "pinst_cnt": 7, "mtotal": 32763, "tags": [], "group.uuid": "uuid-group-1", "master_capable": true, "name": "node4", "master_candidate": true, "ctime": 1309185898.51, "mnode": 1017, "spfree": 0, "sptotal": 0, "pinst_list": [ "instance20", "instance3", "instance15", "instance4", "instance13", "instance8", "instance18" ], "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false } } ] ganeti-2.9.3/test/data/htools/hspace-tiered-mixed.data0000644000000000000000000000141412267470014022651 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred|| group-02|fake-uuid-02|preferred|| node-01-001|262144|65536|196608|2097152|2097152|8|N|fake-uuid-01|10||Y|10 node-01-002|262144|65536|196608|2097152|2097152|8|N|fake-uuid-01|10||Y|10 node-01-003|262144|1024|261120|2097152|2097152|8|N|fake-uuid-02|8||N|8 node-01-004|262144|1024|261120|2097152|2097152|8|N|fake-uuid-02|8||N|8 |129024,4,1047552,1,1,1|129024,4,1047552,1,1,1;131072,4,1048576,16,8,12|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|4.0|32.0 group-01|129024,4,1047552,1,1,1|129024,4,1047552,1,1,1;131072,4,1048576,16,8,12|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|4.0|32.0 group-02|129024,4,1047552,1,1,1|129024,4,1047552,1,1,1;131072,4,1048576,16,8,12|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/hspace-tiered-exclusive.data0000644000000000000000000000113312267470014023550 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred|| node-01-001|262144|65536|196608|2097152|2097152|8|N|fake-uuid-01|10||Y|10 node-01-002|262144|65536|196608|2097152|2097152|8|N|fake-uuid-01|10||Y|9 node-01-003|262144|1024|261120|2097152|2097152|8|N|fake-uuid-01|8||Y|8 node-01-004|262144|1024|261120|2097152|2097152|8|N|fake-uuid-01|8||Y|8 |129024,4,1047552,1,1,1|129024,4,1047552,1,1,1;131072,4,1048576,16,8,12|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|4.0|32.0 group-01|129024,4,1047552,1,1,1|129024,4,1047552,1,1,1;131072,4,1048576,16,8,12|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/hbal-split-insts.data0000644000000000000000000002067112244641676022244 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred|| group-02|fake-uuid-02|preferred|| node-01-001|98304|0|96256|8388608|8355840|16|N|fake-uuid-01|1 node-01-002|98304|0|96256|8388608|8355840|16|N|fake-uuid-01|1 node-01-003|98304|0|96256|8388608|8355840|16|N|fake-uuid-01|1 node-01-004|98304|0|96256|8388608|8355840|16|N|fake-uuid-01|1 node-01-005|98304|0|96256|8388608|8355840|16|N|fake-uuid-01|1 node-01-006|98304|0|96256|8388608|8355840|16|N|fake-uuid-01|1 node-01-007|98304|0|96256|8388608|8355840|16|N|fake-uuid-02|1 node-01-008|98304|0|96256|8388608|8355840|16|N|fake-uuid-02|1 new-0|128|1024|1|running|Y|node-01-008|node-01-007|drbd||1 new-1|128|1024|1|running|Y|node-01-006|node-01-005|drbd||1 new-2|128|1024|1|running|Y|node-01-004|node-01-003|drbd||1 new-3|128|1024|1|running|Y|node-01-002|node-01-001|drbd||1 new-4|128|1024|1|running|Y|node-01-007|node-01-008|drbd||1 new-5|128|1024|1|running|Y|node-01-005|node-01-006|drbd||1 new-6|128|1024|1|running|Y|node-01-003|node-01-004|drbd||1 new-7|128|1024|1|running|Y|node-01-001|node-01-002|drbd||1 new-8|128|1024|1|running|Y|node-01-008|node-01-006|drbd||1 new-9|128|1024|1|running|Y|node-01-007|node-01-005|drbd||1 new-10|128|1024|1|running|Y|node-01-004|node-01-002|drbd||1 new-11|128|1024|1|running|Y|node-01-003|node-01-001|drbd||1 new-12|128|1024|1|running|Y|node-01-006|node-01-008|drbd||1 new-13|128|1024|1|running|Y|node-01-005|node-01-007|drbd||1 new-14|128|1024|1|running|Y|node-01-002|node-01-004|drbd||1 new-15|128|1024|1|running|Y|node-01-001|node-01-003|drbd||1 new-16|128|1024|1|running|Y|node-01-008|node-01-005|drbd||1 new-17|128|1024|1|running|Y|node-01-007|node-01-006|drbd||1 new-18|128|1024|1|running|Y|node-01-004|node-01-001|drbd||1 new-19|128|1024|1|running|Y|node-01-003|node-01-002|drbd||1 new-20|128|1024|1|running|Y|node-01-006|node-01-007|drbd||1 new-21|128|1024|1|running|Y|node-01-005|node-01-008|drbd||1 new-22|128|1024|1|running|Y|node-01-002|node-01-003|drbd||1 new-23|128|1024|1|running|Y|node-01-001|node-01-004|drbd||1 new-24|128|1024|1|running|Y|node-01-008|node-01-004|drbd||1 new-25|128|1024|1|running|Y|node-01-007|node-01-003|drbd||1 new-26|128|1024|1|running|Y|node-01-006|node-01-002|drbd||1 new-27|128|1024|1|running|Y|node-01-005|node-01-001|drbd||1 new-28|128|1024|1|running|Y|node-01-004|node-01-008|drbd||1 new-29|128|1024|1|running|Y|node-01-003|node-01-007|drbd||1 new-30|128|1024|1|running|Y|node-01-002|node-01-006|drbd||1 new-31|128|1024|1|running|Y|node-01-001|node-01-005|drbd||1 new-32|128|1024|1|running|Y|node-01-008|node-01-003|drbd||1 new-33|128|1024|1|running|Y|node-01-007|node-01-004|drbd||1 new-34|128|1024|1|running|Y|node-01-006|node-01-001|drbd||1 new-35|128|1024|1|running|Y|node-01-005|node-01-002|drbd||1 new-36|128|1024|1|running|Y|node-01-004|node-01-007|drbd||1 new-37|128|1024|1|running|Y|node-01-003|node-01-008|drbd||1 new-38|128|1024|1|running|Y|node-01-002|node-01-005|drbd||1 new-39|128|1024|1|running|Y|node-01-001|node-01-006|drbd||1 new-40|128|1024|1|running|Y|node-01-008|node-01-002|drbd||1 new-41|128|1024|1|running|Y|node-01-007|node-01-001|drbd||1 new-42|128|1024|1|running|Y|node-01-006|node-01-004|drbd||1 new-43|128|1024|1|running|Y|node-01-005|node-01-003|drbd||1 new-44|128|1024|1|running|Y|node-01-004|node-01-006|drbd||1 new-45|128|1024|1|running|Y|node-01-003|node-01-005|drbd||1 new-46|128|1024|1|running|Y|node-01-002|node-01-008|drbd||1 new-47|128|1024|1|running|Y|node-01-001|node-01-007|drbd||1 new-48|128|1024|1|running|Y|node-01-008|node-01-001|drbd||1 new-49|128|1024|1|running|Y|node-01-007|node-01-002|drbd||1 new-50|128|1024|1|running|Y|node-01-006|node-01-003|drbd||1 new-51|128|1024|1|running|Y|node-01-005|node-01-004|drbd||1 new-52|128|1024|1|running|Y|node-01-004|node-01-005|drbd||1 new-53|128|1024|1|running|Y|node-01-003|node-01-006|drbd||1 new-54|128|1024|1|running|Y|node-01-002|node-01-007|drbd||1 new-55|128|1024|1|running|Y|node-01-001|node-01-008|drbd||1 new-56|128|1024|1|running|Y|node-01-008|node-01-007|drbd||1 new-57|128|1024|1|running|Y|node-01-006|node-01-005|drbd||1 new-58|128|1024|1|running|Y|node-01-004|node-01-003|drbd||1 new-59|128|1024|1|running|Y|node-01-002|node-01-001|drbd||1 new-60|128|1024|1|running|Y|node-01-007|node-01-008|drbd||1 new-61|128|1024|1|running|Y|node-01-005|node-01-006|drbd||1 new-62|128|1024|1|running|Y|node-01-003|node-01-004|drbd||1 new-63|128|1024|1|running|Y|node-01-001|node-01-002|drbd||1 new-64|128|1024|1|running|Y|node-01-008|node-01-006|drbd||1 new-65|128|1024|1|running|Y|node-01-007|node-01-005|drbd||1 new-66|128|1024|1|running|Y|node-01-004|node-01-002|drbd||1 new-67|128|1024|1|running|Y|node-01-003|node-01-001|drbd||1 new-68|128|1024|1|running|Y|node-01-006|node-01-008|drbd||1 new-69|128|1024|1|running|Y|node-01-005|node-01-007|drbd||1 new-70|128|1024|1|running|Y|node-01-002|node-01-004|drbd||1 new-71|128|1024|1|running|Y|node-01-001|node-01-003|drbd||1 new-72|128|1024|1|running|Y|node-01-008|node-01-005|drbd||1 new-73|128|1024|1|running|Y|node-01-007|node-01-006|drbd||1 new-74|128|1024|1|running|Y|node-01-004|node-01-001|drbd||1 new-75|128|1024|1|running|Y|node-01-003|node-01-002|drbd||1 new-76|128|1024|1|running|Y|node-01-006|node-01-007|drbd||1 new-77|128|1024|1|running|Y|node-01-005|node-01-008|drbd||1 new-78|128|1024|1|running|Y|node-01-002|node-01-003|drbd||1 new-79|128|1024|1|running|Y|node-01-001|node-01-004|drbd||1 new-80|128|1024|1|running|Y|node-01-008|node-01-004|drbd||1 new-81|128|1024|1|running|Y|node-01-007|node-01-003|drbd||1 new-82|128|1024|1|running|Y|node-01-006|node-01-002|drbd||1 new-83|128|1024|1|running|Y|node-01-005|node-01-001|drbd||1 new-84|128|1024|1|running|Y|node-01-004|node-01-008|drbd||1 new-85|128|1024|1|running|Y|node-01-003|node-01-007|drbd||1 new-86|128|1024|1|running|Y|node-01-002|node-01-006|drbd||1 new-87|128|1024|1|running|Y|node-01-001|node-01-005|drbd||1 new-88|128|1024|1|running|Y|node-01-008|node-01-003|drbd||1 new-89|128|1024|1|running|Y|node-01-007|node-01-004|drbd||1 new-90|128|1024|1|running|Y|node-01-006|node-01-001|drbd||1 new-91|128|1024|1|running|Y|node-01-005|node-01-002|drbd||1 new-92|128|1024|1|running|Y|node-01-004|node-01-007|drbd||1 new-93|128|1024|1|running|Y|node-01-003|node-01-008|drbd||1 new-94|128|1024|1|running|Y|node-01-002|node-01-005|drbd||1 new-95|128|1024|1|running|Y|node-01-001|node-01-006|drbd||1 new-96|128|1024|1|running|Y|node-01-008|node-01-002|drbd||1 new-97|128|1024|1|running|Y|node-01-007|node-01-001|drbd||1 new-98|128|1024|1|running|Y|node-01-006|node-01-004|drbd||1 new-99|128|1024|1|running|Y|node-01-005|node-01-003|drbd||1 new-100|128|1024|1|running|Y|node-01-004|node-01-006|drbd||1 new-101|128|1024|1|running|Y|node-01-003|node-01-005|drbd||1 new-102|128|1024|1|running|Y|node-01-002|node-01-008|drbd||1 new-103|128|1024|1|running|Y|node-01-001|node-01-007|drbd||1 new-104|128|1024|1|running|Y|node-01-008|node-01-001|drbd||1 new-105|128|1024|1|running|Y|node-01-007|node-01-002|drbd||1 new-106|128|1024|1|running|Y|node-01-006|node-01-003|drbd||1 new-107|128|1024|1|running|Y|node-01-005|node-01-004|drbd||1 new-108|128|1024|1|running|Y|node-01-004|node-01-005|drbd||1 new-109|128|1024|1|running|Y|node-01-003|node-01-006|drbd||1 new-110|128|1024|1|running|Y|node-01-002|node-01-007|drbd||1 new-111|128|1024|1|running|Y|node-01-001|node-01-008|drbd||1 new-112|128|1024|1|running|Y|node-01-008|node-01-007|drbd||1 new-113|128|1024|1|running|Y|node-01-006|node-01-005|drbd||1 new-114|128|1024|1|running|Y|node-01-004|node-01-003|drbd||1 new-115|128|1024|1|running|Y|node-01-002|node-01-001|drbd||1 new-116|128|1024|1|running|Y|node-01-007|node-01-008|drbd||1 new-117|128|1024|1|running|Y|node-01-005|node-01-006|drbd||1 new-118|128|1024|1|running|Y|node-01-003|node-01-004|drbd||1 new-119|128|1024|1|running|Y|node-01-001|node-01-002|drbd||1 new-120|128|1024|1|running|Y|node-01-008|node-01-006|drbd||1 new-121|128|1024|1|running|Y|node-01-007|node-01-005|drbd||1 new-122|128|1024|1|running|Y|node-01-004|node-01-002|drbd||1 new-123|128|1024|1|running|Y|node-01-003|node-01-001|drbd||1 new-124|128|1024|1|running|Y|node-01-006|node-01-008|drbd||1 new-125|128|1024|1|running|Y|node-01-005|node-01-007|drbd||1 new-126|128|1024|1|running|Y|node-01-002|node-01-004|drbd||1 new-127|128|1024|1|running|Y|node-01-001|node-01-003|drbd||1 |128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd|4.0|32.0 group-01|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd|4.0|32.0 group-02|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd|4.0|32.0 ganeti-2.9.3/test/data/htools/hroller-online.data0000644000000000000000000000130012267470014021753 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred| node-01-001|91552|0|91424|953674|953674|16|N|fake-uuid-01|1 node-01-002|91552|0|91296|953674|953674|16|N|fake-uuid-01|1 node-01-003|91552|0|91296|953674|953674|16|M|fake-uuid-01|1 node-01-004|91552|0|91296|953674|953674|16|N|fake-uuid-01|1 new-0|128|1152|1|running|Y|node-01-001|node-01-002|drbd||1 new-1|128|1152|1|running|Y|node-01-003|node-01-002|drbd||1 new-2|128|1152|1|running|Y|node-01-004|node-01-003|drbd||1 |128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 group-01|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/multiple-master.data0000644000000000000000000000072412244641676022170 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred|| node-01-001|91552|0|91424|953674|953674|16|M|fake-uuid-01|1 node-01-002|91552|0|91296|953674|953674|16|N|fake-uuid-01|1 node-01-003|91552|0|91296|953674|953674|16|M|fake-uuid-01|1 |128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 group-01|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/hail-alloc-restricted-network.json0000644000000000000000000001455112267470014024740 0ustar00rootroot00000000000000{ "cluster_tags": [], "instances": { "instance1": { "disks": [ { "mode": "rw", "size": 1024 } ], "disk_space_total": 1024, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:eb:0b:a5", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node1_1", "node1_2" ], "os": "debian-image" }, "instance2": { "disks": [ { "mode": "rw", "size": 1024 } ], "disk_space_total": 1024, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:eb:0b:a5", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node1_2", "node1_1" ], "os": "debian-image" }, "instance2": { "disks": [ { "mode": "rw", "size": 1024 } ], "disk_space_total": 1024, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:eb:0b:a5", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node1_1", "node1_2" ], "os": "debian-image" } }, "ipolicy": { "max": { "disk-size": 2048 }, "min": { "disk-size": 1024 } }, "nodegroups": { "uuid-group-1": { "alloc_policy": "last_resort", "ipolicy": { "disk-templates": [ "drbd" ], "minmax": [ { "max": { "cpu-count": 2, "disk-count": 8, "disk-size": 2048, "memory-size": 12800, "nic-count": 8, "spindle-use": 8 }, "min": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 } } ], "spindle-ratio": 32.0, "std": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 }, "vcpu-ratio": 4.0 }, "name": "Group 1", "networks": ["uuid-net-1-1", "uuid-net-1-2"], "tags": [] }, "uuid-group-2": { "alloc_policy": "preferred", "ipolicy": { "disk-templates": [ "drbd" ], "minmax": [ { "max": { "cpu-count": 2, "disk-count": 8, "disk-size": 2048, "memory-size": 12800, "nic-count": 8, "spindle-use": 8 }, "min": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 } } ], "spindle-ratio": 32.0, "std": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 }, "vcpu-ratio": 4.0 }, "name": "Group 2", "networks": ["uuid-net-2-1", "uuid-net-2-2", "uuid-net-2-3"], "tags": [] } }, "nodes": { "node1_1": { "drained": false, "free_disk": 4096, "free_memory": 3840, "free_spindles": 0, "group": "uuid-group-1", "ndparams": { "spindle_count": 1, "exclusive_storage": false }, "offline": false, "reserved_memory": 1017, "reserved_cpus": 1, "total_cpus": 4, "total_disk": 7168, "total_memory": 4096, "total_spindles": 0 }, "node1_2": { "drained": false, "free_disk": 4096, "free_memory": 3968, "free_spindles": 0, "group": "uuid-group-1", "ndparams": { "spindle_count": 1, "exclusive_storage": false }, "offline": false, "reserved_memory": 1017, "reserved_cpus": 1, "total_cpus": 4, "total_disk": 7168, "total_memory": 32763, "total_spindles": 0 }, "node2_1": { "drained": false, "free_disk": 7168, "free_memory": 4096, "free_spindles": 0, "group": "uuid-group-2", "ndparams": { "spindle_count": 1, "exclusive_storage": false }, "offline": false, "reserved_memory": 1017, "reserved_cpus": 1, "total_cpus": 4, "total_disk": 7168, "total_memory": 4096, "total_spindles": 0 }, "node2_2": { "drained": false, "free_disk": 7168, "free_memory": 4096, "free_spindles": 0, "group": "uuid-group-2", "ndparams": { "spindle_count": 1, "exclusive_storage": false }, "offline": false, "reserved_memory": 1017, "reserved_cpus": 1, "total_cpus": 4, "total_disk": 7168, "total_memory": 4096, "total_spindles": 0 } }, "request": { "disk_space_total": 3072, "disk_template": "drbd", "disks": [ { "size": 1536 }, { "size": 1536 } ], "memory": 1024, "name": "instance1", "required_nodes": 2, "spindle_use": 2, "nics":[ { "mac":"aa:00:00:85:f3:a7", "network":"uuid-net-1-1", "nicparams":{} }, { "mac":"aa:00:00:85:f3:a8", "network":"uuid-net-1-2", "nicparams":{} } ], "tags": [], "type": "allocate", "vcpus": 1 }, "version": 2 } ganeti-2.9.3/test/data/htools/hspace-tiered-dualspec-exclusive.data0000644000000000000000000000125312267470014025351 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred|| node-01-001|262144|65536|196608|2097152|2097152|8|N|fake-uuid-01|10||Y|10 node-01-002|262144|65536|196608|2097152|2097152|8|N|fake-uuid-01|10||Y|9 node-01-003|262144|1024|261120|2097152|2097152|8|N|fake-uuid-01|8||Y|8 node-01-004|262144|1024|261120|2097152|2097152|8|N|fake-uuid-01|8||Y|8 |63488,2,522240,1,1,2|129024,4,1047552,1,1,4;131072,4,1048576,16,8,4;63488,2,522240,1,1,2;65536,2,524288,16,8,2|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|4.0|32.0 group-01|63488,2,522240,1,1,2|129024,4,1047552,1,1,4;131072,4,1048576,16,8,4;63488,2,522240,1,1,2;65536,2,524288,16,8,2|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/hroller-nodegroups.data0000644000000000000000000000152212267470014022662 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred| group-02|fake-uuid-02|preferred| node-01-000|91552|0|91424|3100|1052|16|N|fake-uuid-01|1 node-01-001|91552|0|91424|3100|1052|16|N|fake-uuid-01|1 node-01-002|91552|0|91424|3100|1052|16|N|fake-uuid-01|1 node-02-000|91552|0|91552|3100|3100|16|M|fake-uuid-02|1 inst-00|128|1024|1|running|Y|node-01-000||plain||1 inst-01|128|1024|1|running|Y|node-01-000||plain||1 inst-10|128|1024|1|running|Y|node-01-001||plain||1 inst-11|128|1024|1|running|Y|node-01-001||plain||1 inst-20|128|1024|1|running|Y|node-01-002||plain||1 inst-21|128|1024|1|running|Y|node-01-002||plain||1 |128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 group-01|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/hail-alloc-invalid-network.json0000644000000000000000000000730212267470014024212 0ustar00rootroot00000000000000{ "cluster_tags": [], "instances": {}, "ipolicy": { "max": { "disk-size": 2048 }, "min": { "disk-size": 1024 } }, "nodegroups": { "uuid-group-1": { "alloc_policy": "preferred", "ipolicy": { "disk-templates": [ "drbd" ], "minmax": [ { "max": { "cpu-count": 2, "disk-count": 8, "disk-size": 2048, "memory-size": 12800, "nic-count": 8, "spindle-use": 8 }, "min": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 } } ], "spindle-ratio": 32.0, "std": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 }, "vcpu-ratio": 4.0 }, "name": "Group 1", "networks": ["uuid-net-1-1", "uuid-net-1-2"], "tags": [] }, "uuid-group-2": { "alloc_policy": "preferred", "ipolicy": { "disk-templates": [ "file" ], "minmax": [ { "max": { "cpu-count": 2, "disk-count": 8, "disk-size": 2048, "memory-size": 12800, "nic-count": 8, "spindle-use": 8 }, "min": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 } } ], "spindle-ratio": 32.0, "std": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 }, "vcpu-ratio": 4.0 }, "name": "Group 2", "networks": ["uuid-net-2-1", "uuid-net-2-2", "uuid-net-2-3"], "tags": [] } }, "nodes": { "node1_1": { "drained": false, "free_disk": 7168, "free_memory": 4096, "free_spindles": 0, "group": "uuid-group-1", "ndparams": { "spindle_count": 1, "exclusive_storage": false }, "offline": false, "reserved_memory": 1017, "reserved_cpus": 1, "total_cpus": 4, "total_disk": 7168, "total_memory": 4096, "total_spindles": 0 }, "node2_1": { "drained": false, "free_disk": 7168, "free_memory": 4096, "free_spindles": 0, "group": "uuid-group-2", "ndparams": { "spindle_count": 1, "exclusive_storage": false }, "offline": false, "reserved_memory": 1017, "reserved_cpus": 1, "total_cpus": 4, "total_disk": 7168, "total_memory": 4096, "total_spindles": 0 } }, "request": { "disk_space_total": 1536, "disk_template": "file", "disks": [ { "size": 1536 } ], "memory": 1024, "name": "instance1", "required_nodes": 1, "spindle_use": 2, "nics":[ { "mac":"aa:00:00:85:f3:a7", "network":"uuid-net-1-1", "nicparams":{} }, { "mac":"aa:00:00:85:f3:a8", "network":"uuid-net-1-2", "nicparams":{} } ], "tags": [], "type": "allocate", "vcpus": 1 }, "version": 2 } ganeti-2.9.3/test/data/htools/hspace-tiered-vcpu.data0000644000000000000000000000111612267470014022517 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred|| node-01-001|262144|65536|196608|2097152|2097152|8|N|fake-uuid-01|1||N|1|2 node-01-002|262144|65536|196608|2097152|2097152|8|N|fake-uuid-01|1||N|1|2 node-01-003|262144|1024|261120|2097152|2097152|8|N|fake-uuid-01|1||N|1|3 node-01-004|262144|1024|261120|2097152|2097152|8|N|fake-uuid-01|1||N|1|4 |30720,2,64512,1,1,1|30720,2,64512,1,1,1;32768,4,65536,16,8,12|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|1.0|32.0 group-01|30720,2,64512,1,1,1|30720,2,64512,1,1,1;32768,4,65536,16,8,12|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|1.0|32.0 ganeti-2.9.3/test/data/htools/hail-alloc-invalid-twodisks.json0000644000000000000000000000371712267470014024376 0ustar00rootroot00000000000000{ "cluster_tags": [], "instances": {}, "ipolicy": { "max": { "disk-size": 2048 }, "min": { "disk-size": 1024 } }, "nodegroups": { "uuid-group-1": { "alloc_policy": "preferred", "ipolicy": { "disk-templates": [ "file" ], "minmax" : [ { "max": { "cpu-count": 2, "disk-count": 8, "disk-size": 2048, "memory-size": 12800, "nic-count": 8, "spindle-use": 8 }, "min": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 } } ], "spindle-ratio": 32.0, "std": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 }, "vcpu-ratio": 4.0 }, "name": "default", "networks": [], "tags": [] } }, "nodes": { "node1": { "drained": false, "free_disk": 1377280, "free_memory": 31389, "free_spindles": 12, "group": "uuid-group-1", "ndparams": { "spindle_count": 1, "exclusive_storage": false }, "offline": false, "reserved_memory": 1017, "reserved_cpus": 1, "total_cpus": 4, "total_disk": 1377280, "total_memory": 32763, "total_spindles": 12 } }, "request": { "disk_space_total": 1536, "disk_template": "file", "disks": [ { "spindles": 1, "size": 768 }, { "spindles": 1, "size": 768 } ], "memory": 1024, "name": "instance1", "required_nodes": 1, "spindle_use": 2, "tags": [], "type": "allocate", "vcpus": 1, "nics": [] }, "version": 2 } ganeti-2.9.3/test/data/htools/clean-nonzero-score.data0000644000000000000000000000133012244641676022721 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred|| node-01-001|91552|0|91424|953674|953674|16|N|fake-uuid-01|1 node-01-002|91552|0|91296|953674|953674|16|N|fake-uuid-01|1 node-01-003|91552|0|91296|953674|953674|16|N|fake-uuid-01|1 new-0|128|1024|1|running|Y|node-01-003||diskless||1 new-1|128|1024|1|running|Y|node-01-002||diskless||1 new-2|128|1024|1|running|Y|node-01-001||diskless||1 new-3|128|1024|1|running|Y|node-01-003||diskless||1 new-4|128|1024|1|running|Y|node-01-002||diskless||1 |128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 group-01|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/hail-node-evac.json0000644000000000000000000002604212267470014021650 0ustar00rootroot00000000000000{ "cluster_tags": [ "htools:iextags:test", "htools:iextags:service-group" ], "nodegroups": { "uuid-group-1": { "ipolicy": { "std": { "nic-count": 1, "disk-size": 1024, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 1 }, "minmax": [ { "min": { "nic-count": 1, "disk-size": 128, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 1 }, "max": { "nic-count": 8, "disk-size": 1048576, "disk-count": 16, "memory-size": 32768, "cpu-count": 8, "spindle-use": 8 } } ], "vcpu-ratio": 4.0, "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "spindle-ratio": 32.0 }, "alloc_policy": "preferred", "networks": [], "tags": [], "name": "default" } }, "ipolicy": { "std": { "nic-count": 1, "disk-size": 1024, "memory-size": 128, "cpu-count": 1, "disk-count": 1, "spindle-use": 1 }, "min": { "nic-count": 1, "disk-size": 1024, "memory-size": 128, "cpu-count": 1, "disk-count": 1, "spindle-use": 1 }, "max": { "nic-count": 8, "disk-size": 1048576, "memory-size": 32768, "cpu-count": 8, "disk-count": 16, "spindle-use": 8 }, "vcpu-ratio": 4.0, "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "spindle-ratio": 32.0 }, "enabled_hypervisors": [ "xen-pvm", "xen-hvm" ], "cluster_name": "cluster", "instances": { "instance14": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:eb:0b:a5", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node3", "node4" ], "os": "debian-image" }, "instance13": { "disks": [ { "spindles": 1, "mode": "rw", "size": 512 } ], "disk_space_total": 512, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:7f:8c:9c", "link": "xen-br1", "mode": "bridged", "bridge": "xen-br1" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "instance-debootstrap" }, "instance18": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 128, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:55:94:93", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 8192, "nodes": [ "node4" ], "os": "instance-debootstrap" }, "instance19": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:15:92:6f", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node3", "node4" ], "os": "debian-image" }, "instance2": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:73:20:3e", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "up", "disk_template": "drbd", "memory": 128, "nodes": [ "node3", "node4" ], "os": "debian-image" }, "instance3": { "disks": [ { "spindles": 1, "mode": "rw", "size": 256 }, { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 384, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:ec:e8:a2", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "debian-image" }, "instance4": { "disks": [ { "spindles": 2, "mode": "rw", "size": 2048 } ], "disk_space_total": 2176, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:62:b0:76", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node4", "node3" ], "os": "instance-debootstrap" }, "instance8": { "disks": [ { "spindles": 1, "mode": "rw", "size": 256 } ], "disk_space_total": 256, "hypervisor": "kvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:3f:6d:e3", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "debian-image" }, "instance9": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [ "test:test" ], "nics": [ { "ip": null, "mac": "aa:00:00:10:d2:01", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node3", "node4" ], "os": "instance-debootstrap" }, "instance20": { "disks": [ { "spindles": 1, "mode": "rw", "size": 512 } ], "disk_space_total": 512, "hypervisor": "kvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:db:2a:6d", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "instance-debootstrap" } }, "version": 2, "nodes": { "node1": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.1", "i_pri_up_memory": 0, "tags": [], "master_candidate": true, "free_memory": 31389, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1377280, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.1", "i_pri_memory": 0, "free_spindles": 12, "total_spindles": 12, "vm_capable": true, "offline": false }, "node2": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.2", "i_pri_up_memory": 0, "tags": [], "master_candidate": true, "free_memory": 31746, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1376640, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.2", "i_pri_memory": 0, "free_spindles": 12, "total_spindles": 12, "vm_capable": true, "offline": false }, "node3": { "total_disk": 1377304, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.3", "i_pri_up_memory": 128, "tags": [], "master_candidate": true, "free_memory": 31234, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1373336, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.3", "i_pri_memory": 2432, "free_spindles": 6, "total_spindles": 12, "vm_capable": true, "offline": false }, "node4": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.4", "i_pri_up_memory": 128, "tags": [], "master_candidate": true, "free_memory": 22914, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1371520, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.4", "i_pri_memory": 23552, "free_spindles": 0, "total_spindles": 12, "vm_capable": true, "offline": false } }, "request": { "evac_mode": "all", "instances": [ "instance2" ], "type": "node-evacuate" } } ganeti-2.9.3/test/data/htools/hail-invalid-reloc.json0000644000000000000000000000036112230001635022520 0ustar00rootroot00000000000000{ "cluster_tags": [], "nodegroups": {}, "nodes": {}, "instances": {}, "request": { "relocate_from": [ "node4" ], "required_nodes": "aaa", "type": "relocate", "name": 0, "disk_space_total": "aaa" } } ganeti-2.9.3/test/data/htools/hspace-tiered.data0000644000000000000000000000110412244641676021552 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred|| node-01-001|262144|65536|196608|2097152|2097152|8|N|fake-uuid-01|1 node-01-002|262144|65536|196608|2097152|2097152|8|N|fake-uuid-01|1 node-01-003|262144|1024|261120|2097152|2097152|8|N|fake-uuid-01|1 node-01-004|262144|1024|261120|2097152|2097152|8|N|fake-uuid-01|1 |129024,4,1047552,1,1,1|129024,4,1047552,1,1,1;131072,4,1048576,16,8,12|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|4.0|32.0 group-01|129024,4,1047552,1,1,1|129024,4,1047552,1,1,1;131072,4,1048576,16,8,12|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/hail-alloc-twodisks.json0000644000000000000000000000372012267470014022744 0ustar00rootroot00000000000000{ "cluster_tags": [], "instances": {}, "ipolicy": { "max": { "disk-size": 2048 }, "min": { "disk-size": 1024 } }, "nodegroups": { "uuid-group-1": { "alloc_policy": "preferred", "ipolicy": { "disk-templates": [ "file" ], "minmax": [ { "max": { "cpu-count": 2, "disk-count": 8, "disk-size": 2048, "memory-size": 12800, "nic-count": 8, "spindle-use": 8 }, "min": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 } } ], "spindle-ratio": 32.0, "std": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 }, "vcpu-ratio": 4.0 }, "name": "default", "networks": [], "tags": [] } }, "nodes": { "node1": { "drained": false, "free_disk": 1377280, "free_memory": 31389, "free_spindles": 12, "group": "uuid-group-1", "ndparams": { "spindle_count": 1, "exclusive_storage": false }, "offline": false, "reserved_memory": 1017, "reserved_cpus": 1, "total_cpus": 4, "total_disk": 1377280, "total_memory": 32763, "total_spindles": 12 } }, "request": { "disk_space_total": 3072, "disk_template": "file", "disks": [ { "spindles": 1, "size": 1536 }, { "spindles": 1, "size": 1536 } ], "memory": 1024, "name": "instance1", "required_nodes": 1, "spindle_use": 2, "tags": [], "type": "allocate", "vcpus": 1, "nics": [] }, "version": 2 } ganeti-2.9.3/test/data/htools/hroller-full.data0000644000000000000000000000214612267470014021442 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred| node-00|91552|0|91424|3102|1052|16|N|fake-uuid-01|1 node-01|91552|0|91424|3102|1052|16|N|fake-uuid-01|1 node-10|91552|0|91424|3102|1052|16|N|fake-uuid-01|1 node-11|91552|0|91424|3102|1052|16|N|fake-uuid-01|1 node-20|91552|0|91424|3102|1052|16|N|fake-uuid-01|1 node-21|91552|0|91424|3102|1052|16|N|fake-uuid-01|1 node-30|91553|0|91424|3102|1053|16|N|fake-uuid-01|1 node-31|91553|0|91424|3102|1053|16|M|fake-uuid-01|1 inst-00|128|1024|1|running|Y|node-00|node-01|drbd||1 inst-00|128|1024|1|running|Y|node-01|node-00|drbd||1 inst-10|128|1024|1|running|Y|node-10|node-11|drbd||1 inst-11|128|1024|1|running|Y|node-11|node-10|drbd||1 inst-20|128|1024|1|running|Y|node-20|node-21|drbd||1 inst-21|128|1024|1|running|Y|node-21|node-20|drbd||1 inst-30|128|1024|1|running|Y|node-30|node-31|drbd||1 inst-31|128|1024|1|running|Y|node-31|node-30|drbd||1 |128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 group-01|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/empty-cluster.data0000644000000000000000000000044012244641676021654 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred|| |128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 group-01|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/hail-reloc-drbd.json0000644000000000000000000002612212267470014022023 0ustar00rootroot00000000000000{ "cluster_tags": [ "htools:iextags:test", "htools:iextags:service-group" ], "nodegroups": { "uuid-group-1": { "ipolicy": { "std": { "nic-count": 1, "disk-size": 1024, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 1 }, "minmax": [ { "min": { "nic-count": 1, "disk-size": 128, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 1 }, "max": { "nic-count": 8, "disk-size": 1048576, "disk-count": 16, "memory-size": 32768, "cpu-count": 8, "spindle-use": 8 } } ], "vcpu-ratio": 4.0, "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "spindle-ratio": 32.0 }, "alloc_policy": "preferred", "networks": [], "tags": [], "name": "default" } }, "ipolicy": { "std": { "nic-count": 1, "disk-size": 1024, "memory-size": 128, "cpu-count": 1, "disk-count": 1, "spindle-use": 1 }, "minmax": [ { "min": { "nic-count": 1, "disk-size": 1024, "memory-size": 128, "cpu-count": 1, "disk-count": 1, "spindle-use": 1 }, "max": { "nic-count": 8, "disk-size": 1048576, "memory-size": 32768, "cpu-count": 8, "disk-count": 16, "spindle-use": 8 } } ], "vcpu-ratio": 4.0, "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "spindle-ratio": 32.0 }, "enabled_hypervisors": [ "xen-pvm", "xen-hvm" ], "cluster_name": "cluster", "instances": { "instance14": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:eb:0b:a5", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node3", "node4" ], "os": "debian-image" }, "instance13": { "disks": [ { "spindles": 1, "mode": "rw", "size": 512 } ], "disk_space_total": 512, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:7f:8c:9c", "link": "xen-br1", "mode": "bridged", "bridge": "xen-br1" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "instance-debootstrap" }, "instance18": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 128, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:55:94:93", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 8192, "nodes": [ "node4" ], "os": "instance-debootstrap" }, "instance19": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:15:92:6f", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node3", "node4" ], "os": "debian-image" }, "instance2": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:73:20:3e", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "up", "disk_template": "drbd", "memory": 128, "nodes": [ "node3", "node4" ], "os": "debian-image" }, "instance3": { "disks": [ { "spindles": 1, "mode": "rw", "size": 256 }, { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 384, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:ec:e8:a2", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "debian-image" }, "instance4": { "disks": [ { "spindles": 2, "mode": "rw", "size": 2048 } ], "disk_space_total": 2176, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:62:b0:76", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node4", "node3" ], "os": "instance-debootstrap" }, "instance8": { "disks": [ { "spindles": 1, "mode": "rw", "size": 256 } ], "disk_space_total": 256, "hypervisor": "kvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:3f:6d:e3", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "debian-image" }, "instance9": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [ "test:test" ], "nics": [ { "ip": null, "mac": "aa:00:00:10:d2:01", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node3", "node4" ], "os": "instance-debootstrap" }, "instance20": { "disks": [ { "spindles": 1, "mode": "rw", "size": 512 } ], "disk_space_total": 512, "hypervisor": "kvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:db:2a:6d", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "instance-debootstrap" } }, "version": 2, "nodes": { "node1": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.1", "i_pri_up_memory": 0, "tags": [], "master_candidate": true, "free_memory": 31389, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1377280, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.1", "i_pri_memory": 0, "free_spindles": 12, "total_spindles": 12, "vm_capable": true, "offline": false }, "node2": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.2", "i_pri_up_memory": 0, "tags": [], "master_candidate": true, "free_memory": 31746, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1376640, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.2", "i_pri_memory": 0, "free_spindles": 11, "total_spindles": 12, "vm_capable": true, "offline": false }, "node3": { "total_disk": 1377304, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.3", "i_pri_up_memory": 128, "tags": [], "master_candidate": true, "free_memory": 31234, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1373336, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.3", "i_pri_memory": 2432, "free_spindles": 6, "total_spindles": 12, "vm_capable": true, "offline": false }, "node4": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.4", "i_pri_up_memory": 128, "tags": [], "master_candidate": true, "free_memory": 22914, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1371520, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.4", "i_pri_memory": 23552, "free_spindles": 0, "total_spindles": 12, "vm_capable": true, "offline": false } }, "request": { "relocate_from": [ "node4" ], "required_nodes": 1, "type": "relocate", "name": "instance14", "disk_space_total": 256 } } ganeti-2.9.3/test/data/htools/missing-resources.data0000644000000000000000000000055712244641676022531 0ustar00rootroot00000000000000default|fake-uuid-01|preferred|| node1|1024|0|1024|95367|95367|4|N|fake-uuid-01|1 node2|1024|0|0|95367|0|4|N|fake-uuid-01|1 |128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,8|diskless,file,sharedfile,plain,blockdev,drbd,rbd|4.0|32.0 default|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,8|diskless,file,sharedfile,plain,blockdev,drbd,rbd|4.0|32.0 ganeti-2.9.3/test/data/htools/hail-alloc-spindles.json0000644000000000000000000002152712267470014022723 0ustar00rootroot00000000000000{ "cluster_tags": [ "htools:iextags:test", "htools:iextags:service-group" ], "nodegroups": { "uuid-group-1": { "ipolicy": { "std": { "nic-count": 1, "disk-size": 1024, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 1 }, "minmax": [ { "min": { "nic-count": 1, "disk-size": 128, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 1 }, "max": { "nic-count": 8, "disk-size": 1048576, "disk-count": 16, "memory-size": 32768, "cpu-count": 8, "spindle-use": 2 } } ], "vcpu-ratio": 4.0, "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "spindle-ratio": 32.0 }, "alloc_policy": "preferred", "networks": [], "tags": [], "name": "group1" }, "uuid-group-2": { "ipolicy": { "std": { "nic-count": 1, "disk-size": 1024, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 2 }, "minmax": [ { "min": { "nic-count": 1, "disk-size": 128, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 2 }, "max": { "nic-count": 8, "disk-size": 1048576, "disk-count": 16, "memory-size": 32768, "cpu-count": 8, "spindle-use": 3 } } ], "vcpu-ratio": 4.0, "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "spindle-ratio": 32.0 }, "alloc_policy": "preferred", "networks": [], "tags": [], "name": "group2" } }, "ipolicy": { "std": { "nic-count": 1, "disk-size": 1024, "memory-size": 128, "cpu-count": 1, "disk-count": 1, "spindle-use": 1 }, "minmax": [ { "min": { "nic-count": 1, "disk-size": 1024, "memory-size": 128, "cpu-count": 1, "disk-count": 1, "spindle-use": 1 }, "max": { "nic-count": 8, "disk-size": 1048576, "memory-size": 32768, "cpu-count": 8, "disk-count": 16, "spindle-use": 8 } } ], "vcpu-ratio": 4.0, "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "spindle-ratio": 32.0 }, "enabled_hypervisors": [ "xen-pvm", "xen-hvm" ], "cluster_name": "cluster", "instances": { "instance1": { "disks": [ { "spindles": 1, "mode": "rw", "size": 650000 } ], "disk_space_total": 650000, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:7f:8c:91", "link": "xen-br1", "mode": "bridged", "bridge": "xen-br1" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "up", "disk_template": "plain", "memory": 1024, "nodes": [ "node1" ], "os": "instance-debootstrap" }, "instance2": { "disks": [ { "spindles": 2, "mode": "rw", "size": 256 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:7f:8c:92", "link": "xen-br1", "mode": "bridged", "bridge": "xen-br1" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "up", "disk_template": "plain", "memory": 1024, "nodes": [ "node2" ], "os": "instance-debootstrap" }, "instance3": { "disks": [ { "spindles": 1, "mode": "rw", "size": 650000 } ], "disk_space_total": 650000, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:7f:8c:93", "link": "xen-br1", "mode": "bridged", "bridge": "xen-br1" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "up", "disk_template": "plain", "memory": 1024, "nodes": [ "node3" ], "os": "instance-debootstrap" }, "instance4": { "disks": [ { "spindles": 2, "mode": "rw", "size": 256 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:7f:8c:94", "link": "xen-br1", "mode": "bridged", "bridge": "xen-br1" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "up", "disk_template": "plain", "memory": 1024, "nodes": [ "node4" ], "os": "instance-debootstrap" } }, "version": 2, "nodes": { "node1": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.1", "i_pri_up_memory": 1024, "tags": [], "master_candidate": true, "free_memory": 30722, "ndparams": { "spindle_count": 2, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 687280, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.1", "i_pri_memory": 1024, "free_spindles": 1, "total_spindles": 2, "vm_capable": true, "offline": false }, "node2": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.2", "i_pri_up_memory": 1024, "tags": [], "master_candidate": true, "free_memory": 30722, "ndparams": { "spindle_count": 2, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1377024, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.2", "i_pri_memory": 1024, "free_spindles": 0, "total_spindles": 2, "vm_capable": true, "offline": false }, "node3": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-2", "secondary_ip": "192.168.2.3", "i_pri_up_memory": 1024, "tags": [], "master_candidate": true, "free_memory": 30722, "ndparams": { "spindle_count": 2, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 687280, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.3", "i_pri_memory": 1204, "free_spindles": 1, "total_spindles": 2, "vm_capable": true, "offline": false }, "node4": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-2", "secondary_ip": "192.168.2.4", "i_pri_up_memory": 1024, "tags": [], "master_candidate": true, "free_memory": 30722, "ndparams": { "spindle_count": 2, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1377024, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.4", "i_pri_memory": 1024, "free_spindles": 0, "total_spindles": 2, "vm_capable": true, "offline": false } }, "request": { "disks": [ { "spindles": 1, "mode": "rw", "size": 1024 } ], "required_nodes": 1, "name": "instance10", "tags": [], "hypervisor": "xen-pvm", "disk_space_total": 1024, "nics": [ { "ip": null, "mac": "00:11:22:33:44:55", "bridge": null } ], "vcpus": 1, "spindle_use": 3, "os": "instance-debootstrap", "disk_template": "plain", "memory": 1024, "type": "allocate" } } ganeti-2.9.3/test/data/htools/hail-alloc-drbd.json0000644000000000000000000002703012267470014022010 0ustar00rootroot00000000000000{ "cluster_tags": [ "htools:iextags:test", "htools:iextags:service-group" ], "nodegroups": { "uuid-group-1": { "ipolicy": { "std": { "nic-count": 1, "disk-size": 1024, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 1 }, "minmax": [ { "min": { "nic-count": 1, "disk-size": 128, "disk-count": 1, "memory-size": 128, "cpu-count": 1, "spindle-use": 1 }, "max": { "nic-count": 8, "disk-size": 1048576, "disk-count": 16, "memory-size": 32768, "cpu-count": 8, "spindle-use": 8 } } ], "vcpu-ratio": 4.0, "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "spindle-ratio": 32.0 }, "networks": [], "alloc_policy": "preferred", "tags": [], "name": "default" } }, "ipolicy": { "std": { "nic-count": 1, "disk-size": 1024, "memory-size": 128, "cpu-count": 1, "disk-count": 1, "spindle-use": 1 }, "minmax": [ { "min": { "nic-count": 1, "disk-size": 1024, "memory-size": 128, "cpu-count": 1, "disk-count": 1, "spindle-use": 1 }, "max": { "nic-count": 8, "disk-size": 1048576, "memory-size": 32768, "cpu-count": 8, "disk-count": 16, "spindle-use": 8 } } ], "vcpu-ratio": 4.0, "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "spindle-ratio": 32.0 }, "enabled_hypervisors": [ "xen-pvm", "xen-hvm" ], "cluster_name": "cluster", "instances": { "instance14": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:eb:0b:a5", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "admin_state": "down", "disk_template": "drbd", "spindle_use": 1, "memory": 128, "nodes": [ "node3", "node4" ], "os": "debian-image" }, "instance13": { "disks": [ { "spindles": 1, "mode": "rw", "size": 512 } ], "disk_space_total": 512, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:7f:8c:9c", "link": "xen-br1", "mode": "bridged", "bridge": "xen-br1" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "instance-debootstrap" }, "instance18": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 128, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:55:94:93", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 8192, "nodes": [ "node4" ], "os": "instance-debootstrap" }, "instance19": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:15:92:6f", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node3", "node4" ], "os": "debian-image" }, "instance2": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:73:20:3e", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "up", "disk_template": "drbd", "memory": 128, "nodes": [ "node3", "node4" ], "os": "debian-image" }, "instance3": { "disks": [ { "spindles": 1, "mode": "rw", "size": 256 }, { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 384, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:ec:e8:a2", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "debian-image" }, "instance4": { "disks": [ { "spindles": 2, "mode": "rw", "size": 2048 } ], "disk_space_total": 2176, "hypervisor": "xen-pvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:62:b0:76", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node4", "node3" ], "os": "instance-debootstrap" }, "instance8": { "disks": [ { "spindles": 1, "mode": "rw", "size": 256 } ], "disk_space_total": 256, "hypervisor": "kvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:3f:6d:e3", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "debian-image" }, "instance9": { "disks": [ { "spindles": 1, "mode": "rw", "size": 128 } ], "disk_space_total": 256, "hypervisor": "xen-pvm", "tags": [ "test:test" ], "nics": [ { "ip": null, "mac": "aa:00:00:10:d2:01", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "drbd", "memory": 128, "nodes": [ "node3", "node4" ], "os": "instance-debootstrap" }, "instance20": { "disks": [ { "spindles": 1, "mode": "rw", "size": 512 } ], "disk_space_total": 512, "hypervisor": "kvm", "tags": [], "nics": [ { "ip": null, "mac": "aa:00:00:db:2a:6d", "link": "xen-br0", "mode": "bridged", "bridge": "xen-br0" } ], "vcpus": 1, "spindle_use": 1, "admin_state": "down", "disk_template": "plain", "memory": 128, "nodes": [ "node4" ], "os": "instance-debootstrap" } }, "version": 2, "nodes": { "node1": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.1", "i_pri_up_memory": 0, "tags": [], "master_candidate": true, "free_memory": 31389, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1377280, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.1", "i_pri_memory": 0, "free_spindles": 12, "total_spindles": 12, "vm_capable": true, "offline": false }, "node2": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.2", "i_pri_up_memory": 0, "tags": [], "master_candidate": true, "free_memory": 31746, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1376640, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.2", "i_pri_memory": 0, "free_spindles": 12, "total_spindles": 12, "vm_capable": true, "offline": false }, "node3": { "total_disk": 1377304, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.3", "i_pri_up_memory": 128, "tags": [], "master_candidate": true, "free_memory": 31234, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1373336, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.3", "i_pri_memory": 2432, "free_spindles": 6, "total_spindles": 12, "vm_capable": true, "offline": false }, "node4": { "total_disk": 1377280, "total_cpus": 4, "group": "uuid-group-1", "secondary_ip": "192.168.2.4", "i_pri_up_memory": 128, "tags": [], "master_candidate": true, "free_memory": 22914, "ndparams": { "spindle_count": 1, "oob_program": null, "exclusive_storage": false }, "reserved_memory": 1017, "reserved_cpus": 1, "master_capable": true, "free_disk": 1371520, "drained": false, "total_memory": 32763, "primary_ip": "192.168.1.4", "i_pri_memory": 23552, "free_spindles": 0, "total_spindles": 12, "vm_capable": true, "offline": false } }, "request": { "disks": [ { "spindles": 1, "mode": "rw", "size": 1024 } ], "required_nodes": 2, "name": "instance1", "tags": [], "hypervisor": "xen-pvm", "disk_space_total": 1024, "nics": [ { "ip": null, "mac": "00:11:22:33:44:55", "bridge": null } ], "vcpus": 1, "spindle_use": 1, "os": "instance-debootstrap", "disk_template": "drbd", "memory": 1024, "type": "allocate" } } ganeti-2.9.3/test/data/htools/hspace-tiered-ipolicy.data0000644000000000000000000000074512244641676023232 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred| node-01-001|2000|200|1800|4300|4300|8|N|fake-uuid-01 node-01-002|2000|4|1996|3900|3900|8|N|fake-uuid-01 node-01-003|2000|4|1996|3900|3900|8|N|fake-uuid-01 node-01-004|2000|4|1996|3900|3900|8|N|fake-uuid-01 |936,4,1064,1,1,1|900,4,2200,1,1,1;1000,4,2600,16,8,12|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|4.0|32.0 group-01|900,4,2200,1,1,1|900,4,2200,1,1,1;1000,4,2600,16,8,12|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/n1-failure.data0000644000000000000000000000256512244641676021014 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred|| group-02|fake-uuid-02|unallocable|| node-01-001|256|0|0|7629394|7625298|16|N|fake-uuid-01|1 node-01-002|256|0|0|7629394|7625298|16|N|fake-uuid-01|1 node-01-003|256|0|0|7629394|7625298|16|N|fake-uuid-01|1 node-01-004|256|0|0|7629394|7625298|16|N|fake-uuid-01|1 node-02-001|65536|0|65536|7629394|7629394|16|N|fake-uuid-01|1 node-02-002|65536|0|65536|7629394|7629394|16|N|fake-uuid-01|1 node-02-003|65536|0|65536|7629394|7629394|16|N|fake-uuid-01|1 node-02-004|65536|0|65536|7629394|7629394|16|N|fake-uuid-01|1 new-0|128|1024|1|running|Y|node-01-004|node-01-003|drbd||1 new-1|128|1024|1|running|Y|node-01-002|node-01-001|drbd||1 new-2|128|1024|1|running|Y|node-01-003|node-01-001|drbd||1 new-3|128|1024|1|running|Y|node-01-001|node-01-004|drbd||1 new-4|128|1024|1|running|Y|node-01-002|node-01-004|drbd||1 new-5|128|1024|1|running|Y|node-01-003|node-01-002|drbd||1 new-6|128|1024|1|running|Y|node-01-004|node-01-002|drbd||1 new-7|128|1024|1|running|Y|node-01-001|node-01-003|drbd||1 |128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 group-01|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 group-02|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/invalid-node.data0000644000000000000000000000066312244641676021417 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred|| node-01-001|1024|0|1024|95367|95367|4|N|fake-uuid-01|1 node-01-002|1024|0|896|95367|94343|4|N|fake-uuid-01|1 new-0|128|1024|1|running|Y|no-such-node||plain| |128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,8|diskless,file,sharedfile,plain,blockdev,drbd,rbd|4.0|32.0 group-01|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,8|diskless,file,sharedfile,plain,blockdev,drbd,rbd|4.0|32.0 ganeti-2.9.3/test/data/htools/hbal-excl-tags.data0000644000000000000000000000077412244641676021644 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred|| node-01|16384|0|14336|409600|153600|16|N|fake-uuid-01|1 node-02|16384|0|13312|409600|153600|16|N|fake-uuid-01|1 dns1|1024|51200|1|running|Y|node-01|node-02|drbd|service-group:dns,foo|1 dns2|1024|51200|1|running|Y|node-01|node-02|drbd|service-group:dns,foo|1 ftp1|1024|51200|1|running|Y|node-02|node-01|drbd|test:ftp,bar|1 ftp2|1024|51200|1|running|Y|node-02|node-01|drbd|test:ftp,bar|1 admin|1024|51200|1|running|Y|node-02|node-01|drbd|foo|1 htools:iextags:service-group ganeti-2.9.3/test/data/htools/hspace-tiered-dualspec.data0000644000000000000000000000123012244641676023350 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred|| node-01-001|262144|65536|196608|2097152|2097152|8|N|fake-uuid-01|1 node-01-002|262144|65536|196608|2097152|2097152|8|N|fake-uuid-01|1 node-01-003|262144|1024|261120|2097152|2097152|8|N|fake-uuid-01|1 node-01-004|262144|1024|261120|2097152|2097152|8|N|fake-uuid-01|1 |63488,2,522240,1,1,1|129024,4,1047552,1,1,1;131072,4,1048576,16,8,12;63488,2,522240,1,1,1;65536,2,524288,16,8,12|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|4.0|32.0 group-01|63488,2,522240,1,1,1|129024,4,1047552,1,1,1;131072,4,1048576,16,8,12;63488,2,522240,1,1,1;65536,2,524288,16,8,12|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/hspace-tiered-resourcetypes.data0000644000000000000000000000074612244641676024477 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred|| node-01-001|2000|200|1700|5000|5000|8|N|fake-uuid-01 node-01-002|2000|4|1996|5000|4900|8|N|fake-uuid-01 node-01-003|2000|4|1996|5000|5000|8|N|fake-uuid-01 node-01-004|2000|4|1996|5000|5000|8|N|fake-uuid-01 |900,4,2200,1,1,1|900,4,2000,1,1,1;1000,4,2600,16,8,12|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|4.0|32.0 group-01|900,4,2200,1,1,1|900,4,2000,1,1,1;1000,4,2600,16,8,12|plain,diskless,file,sharedfile,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/htools/common-suffix.data0000644000000000000000000000072212244641676021634 0ustar00rootroot00000000000000default|fake-uuid-01|preferred|| node1.example.com|1024|0|1024|95367|95367|4|N|fake-uuid-01|1 node2.example.com|1024|0|896|95367|94343|4|N|fake-uuid-01|1 instance1.example.com|128|1024|1|running|Y|node2.example.com||plain| |128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,1|diskless,file,sharedfile,plain,blockdev,drbd,rbd|4.0|32.0 default|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,1|diskless,file,sharedfile,plain,blockdev,drbd,rbd|4.0|32.0 ganeti-2.9.3/test/data/htools/unique-reboot-order.data0000644000000000000000000000124612267470014022742 0ustar00rootroot00000000000000group-01|fake-uuid-01|preferred|| node-01-001|91552|0|91424|3500|1196|16|M|fake-uuid-01|1 node-01-002|91552|0|91296|3500|1196|16|N|fake-uuid-01|1 node-01-003|91552|0|91296|3500|1196|16|N|fake-uuid-01|1 new-0|128|1152|1|running|Y|node-01-001|node-01-002|drbd||1 new-1|128|1152|1|running|Y|node-01-002|node-01-003|drbd||1 nonred-0|128|1152|1|running|Y|node-01-001||plain||1 nonred-1|128|1152|1|running|Y|node-01-003||plain||1 |128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 group-01|128,1,1024,1,1,1|128,1,1024,1,1,1;32768,8,1048576,16,8,12|diskless,file,sharedfile,plain,blockdev,drbd,rbd,ext|4.0|32.0 ganeti-2.9.3/test/data/cluster_config_2.7.json0000644000000000000000000003321712244641676021173 0ustar00rootroot00000000000000{ "cluster": { "beparams": { "default": { "always_failover": false, "auto_balance": true, "maxmem": 128, "minmem": 128, "spindle_use": 1, "vcpus": 1 } }, "blacklisted_os": [], "candidate_pool_size": 10, "cluster_name": "cluster.name.example.com", "ctime": 1343869045.604884, "default_iallocator": "hail", "disk_state_static": {}, "diskparams": { "blockdev": {}, "diskless": {}, "drbd": { "c-delay-target": 1, "c-fill-target": 200, "c-max-rate": 2048, "c-min-rate": 1024, "c-plan-ahead": 1, "data-stripes": 2, "disk-barriers": "bf", "disk-custom": "", "dynamic-resync": false, "meta-barriers": true, "meta-stripes": 2, "metavg": "xenvg", "net-custom": "", "resync-rate": 1024 }, "ext": {}, "file": {}, "plain": { "stripes": 2 }, "rbd": { "pool": "rbd" }, "sharedfile": {} }, "drbd_usermode_helper": "/bin/true", "enabled_hypervisors": [ "xen-pvm" ], "file_storage_dir": "", "hidden_os": [], "highest_used_port": 32105, "hv_state_static": { "xen-pvm": { "cpu_node": 1, "cpu_total": 1, "mem_hv": 0, "mem_node": 0, "mem_total": 0 } }, "hvparams": { "chroot": { "init_script": "/ganeti-chroot" }, "fake": {}, "kvm": { "acpi": true, "boot_order": "disk", "cdrom2_image_path": "", "cdrom_disk_type": "", "cdrom_image_path": "", "cpu_cores": 0, "cpu_mask": "all", "cpu_sockets": 0, "cpu_threads": 0, "cpu_type": "", "disk_cache": "default", "disk_type": "paravirtual", "floppy_image_path": "", "initrd_path": "", "kernel_args": "ro", "kernel_path": "/boot/vmlinuz-kvmU", "keymap": "", "kvm_extra": "", "kvm_flag": "", "kvm_path": "/usr/bin/kvm", "machine_version": "", "mem_path": "", "migration_bandwidth": 4, "migration_downtime": 30, "migration_mode": "live", "migration_port": 4041, "nic_type": "paravirtual", "reboot_behavior": "reboot", "root_path": "/dev/vda1", "security_domain": "", "security_model": "none", "serial_console": true, "serial_speed": 38400, "soundhw": "", "spice_bind": "", "spice_image_compression": "", "spice_ip_version": 0, "spice_jpeg_wan_compression": "", "spice_password_file": "", "spice_playback_compression": true, "spice_streaming_video": "", "spice_tls_ciphers": "HIGH:-DES:-3DES:-EXPORT:-ADH", "spice_use_tls": false, "spice_use_vdagent": true, "spice_zlib_glz_wan_compression": "", "usb_devices": "", "usb_mouse": "", "use_chroot": false, "use_localtime": false, "vga": "", "vhost_net": false, "vnc_bind_address": "", "vnc_password_file": "", "vnc_tls": false, "vnc_x509_path": "", "vnc_x509_verify": false }, "lxc": { "cpu_mask": "" }, "xen-hvm": { "acpi": true, "blockdev_prefix": "hd", "boot_order": "cd", "cdrom_image_path": "", "cpu_cap": 0, "cpu_mask": "all", "cpu_weight": 256, "device_model": "/usr/lib/xen/bin/qemu-dm", "disk_type": "paravirtual", "kernel_path": "/usr/lib/xen/boot/hvmloader", "migration_mode": "non-live", "migration_port": 8082, "nic_type": "rtl8139", "pae": true, "pci_pass": "", "reboot_behavior": "reboot", "use_localtime": false, "vnc_bind_address": "0.0.0.0", "vnc_password_file": "/your/vnc-cluster-password" }, "xen-pvm": { "blockdev_prefix": "sd", "bootloader_args": "", "bootloader_path": "", "cpu_cap": 0, "cpu_mask": "all", "cpu_weight": 256, "initrd_path": "", "kernel_args": "ro", "kernel_path": "/boot/vmlinuz-xenU", "migration_mode": "live", "migration_port": 8082, "reboot_behavior": "reboot", "root_path": "/dev/xvda1", "use_bootloader": false } }, "ipolicy": { "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "max": { "cpu-count": 8, "disk-count": 16, "disk-size": 1048576, "memory-size": 32768, "nic-count": 8, "spindle-use": 12 }, "min": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 }, "spindle-ratio": 32.0, "std": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 }, "vcpu-ratio": 1.0 }, "mac_prefix": "aa:bb:cc", "maintain_node_health": false, "master_ip": "192.0.2.87", "master_netdev": "eth0", "master_netmask": 32, "master_node": "node1.example.com", "modify_etc_hosts": true, "modify_ssh_setup": true, "mtime": 1361964122.79471, "ndparams": { "exclusive_storage": false, "oob_program": "", "spindle_count": 1 }, "nicparams": { "default": { "link": "br974", "mode": "bridged" } }, "os_hvp": { "TEMP-Ganeti-QA-OS": { "xen-hvm": { "acpi": false, "pae": true }, "xen-pvm": { "root_path": "/dev/sda5" } } }, "osparams": {}, "prealloc_wipe_disks": false, "primary_ip_family": 2, "reserved_lvs": [], "rsahostkeypub": "YOURKEY", "serial_no": 3189, "shared_file_storage_dir": "/srv/ganeti/shared-file-storage", "tags": [ "mytag" ], "tcpudp_port_pool": [ 32101, 32102, 32103, 32104, 32105 ], "uid_pool": [], "use_external_mip_script": false, "uuid": "dddf8c12-f2d8-4718-a35b-7804daf12a3f", "volume_group_name": "xenvg" }, "ctime": 1343869045.605523, "instances": { "instance1.example.com": { "admin_state": "up", "beparams": {}, "ctime": 1363620258.608976, "disk_template": "drbd", "disks": [ { "children": [ { "dev_type": "lvm", "logical_id": [ "xenvg", "5c390722-6a7a-4bb4-9cef-98d896a8e6b1.disk0_data" ], "params": {}, "physical_id": [ "xenvg", "5c390722-6a7a-4bb4-9cef-98d896a8e6b1.disk0_data" ], "size": 1024 }, { "dev_type": "lvm", "logical_id": [ "xenvg", "5c390722-6a7a-4bb4-9cef-98d896a8e6b1.disk0_meta" ], "params": {}, "physical_id": [ "xenvg", "5c390722-6a7a-4bb4-9cef-98d896a8e6b1.disk0_meta" ], "size": 128 } ], "dev_type": "drbd8", "iv_name": "disk/0", "logical_id": [ "node1.example.com", "node3.example.com", 32100, 0, 0, "d3c3fd475fcbaf5fd177fb245ac43b71247ada38" ], "mode": "rw", "params": {}, "physical_id": [ "198.51.100.82", 32100, "198.51.100.84", 32100, 0, "d3c3fd475fcbaf5fd177fb245ac43b71247ada38" ], "size": 1024 } ], "hvparams": {}, "hypervisor": "xen-pvm", "mtime": 1363620320.874901, "name": "instance1.example.com", "nics": [ { "mac": "aa:bb:cc:b2:6e:0b", "nicparams": {} } ], "os": "busybox", "osparams": {}, "primary_node": "node1.example.com", "serial_no": 2, "uuid": "6c078d22-3eb6-4780-857d-81772e09eef1" }, "instance2.example.com": { "admin_state": "up", "beparams": {}, "ctime": 1355186880.451181, "disk_template": "plain", "disks": [ { "dev_type": "lvm", "iv_name": "disk/0", "logical_id": [ "xenvg", "3e559cd7-1024-4294-a923-a9fd13182b2f.disk0" ], "mode": "rw", "params": {}, "physical_id": [ "xenvg", "3e559cd7-1024-4294-a923-a9fd13182b2f.disk0" ], "size": 102400 } ], "hvparams": {}, "hypervisor": "xen-pvm", "mtime": 1355186898.307642, "name": "instance2.example.com", "nics": [ { "mac": "aa:bb:cc:56:83:fb", "nicparams": {} } ], "os": "debian-image", "osparams": {}, "primary_node": "node3.example.com", "serial_no": 2, "tags": [], "uuid": "8fde9f6d-e1f1-4850-9e9c-154966f622f5" }, "instance3.example.com": { "admin_state": "up", "beparams": {}, "ctime": 1354038435.343601, "disk_template": "plain", "disks": [ { "dev_type": "lvm", "iv_name": "disk/0", "logical_id": [ "xenvg", "b27a576a-13f7-4f07-885c-63fcad4fdfcc.disk0" ], "mode": "rw", "params": {}, "physical_id": [ "xenvg", "b27a576a-13f7-4f07-885c-63fcad4fdfcc.disk0" ], "size": 1280 } ], "hvparams": {}, "hypervisor": "xen-pvm", "mtime": 1354224585.700732, "name": "instance3.example.com", "nics": [ { "mac": "aa:bb:cc:5e:5c:75", "nicparams": {} } ], "os": "debian-image", "osparams": {}, "primary_node": "node2.example.com", "serial_no": 4, "tags": [], "uuid": "4e091bdc-e205-4ed7-8a47-0c9130a6619f" } }, "mtime": 1361984633.373014, "networks": { "99f0128a-1c84-44da-90b9-9581ea00c075": { "ext_reservations": "1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001", "name": "a network", "network": "203.0.113.0/24", "reservations": "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", "serial_no": 1, "uuid": "99f0128a-1c84-44da-90b9-9581ea00c075" } }, "nodegroups": { "5244a46d-7506-4e14-922d-02b58153dde1": { "alloc_policy": "preferred", "diskparams": {}, "ipolicy": { "max": {}, "min": {}, "std": {} }, "mtime": 1361963775.575009, "name": "default", "ndparams": {}, "networks": {}, "serial_no": 125, "tags": [], "uuid": "5244a46d-7506-4e14-922d-02b58153dde1" }, "6c0a8916-b719-45ad-95dd-82192b1e473f": { "alloc_policy": "preferred", "diskparams": {}, "ipolicy": { "disk-templates": [ "plain" ], "max": { "nic-count": 18, "spindle-use": 14 }, "min": { "cpu-count": 2, "disk-count": 2 }, "spindle-ratio": 5.2, "std": {}, "vcpu-ratio": 3.14 }, "mtime": 1361963775.575009, "name": "another", "ndparams": { "exclusive_storage": true }, "networks": {}, "serial_no": 125, "tags": [], "uuid": "6c0a8916-b719-45ad-95dd-82192b1e473f" } }, "nodes": { "node1.example.com": { "ctime": 1349722460.022264, "drained": false, "group": "5244a46d-7506-4e14-922d-02b58153dde1", "master_candidate": true, "master_capable": true, "mtime": 1359986533.353329, "name": "node1.example.com", "ndparams": {}, "offline": false, "powered": true, "primary_ip": "192.0.2.82", "secondary_ip": "198.51.100.82", "serial_no": 197, "tags": [], "uuid": "9a12d554-75c0-4cb1-8064-103365145db0", "vm_capable": true }, "node2.example.com": { "ctime": 1343869045.604884, "drained": false, "group": "5244a46d-7506-4e14-922d-02b58153dde1", "master_candidate": true, "master_capable": true, "mtime": 1358348755.779906, "name": "node2.example.com", "ndparams": {}, "offline": false, "powered": true, "primary_ip": "192.0.2.83", "secondary_ip": "198.51.100.83", "serial_no": 6, "tags": [], "uuid": "2ae3d962-2dad-44f2-bdb1-85f77107f907", "vm_capable": true }, "node3.example.com": { "ctime": 1343869205.934807, "drained": false, "group": "5244a46d-7506-4e14-922d-02b58153dde1", "master_candidate": true, "master_capable": true, "mtime": 1353019704.885368, "name": "node3.example.com", "ndparams": {}, "offline": false, "powered": true, "primary_ip": "192.0.2.84", "secondary_ip": "198.51.100.84", "serial_no": 2, "tags": [], "uuid": "41f9c238-173c-4120-9e41-04ad379b647a", "vm_capable": true } }, "serial_no": 7624, "version": 2070000 } ganeti-2.9.3/test/data/ovfdata/0000755000000000000000000000000012271445545016317 5ustar00rootroot00000000000000ganeti-2.9.3/test/data/ovfdata/second_disk.vmdk0000644000000000000000000020000012230001635021436 0ustar00rootroot00000000000000KDMVd€€ # Disk DescriptorFile version=1 CID=4e54f410 parentCID=ffffffff createType="monolithicSparse" # Extent description RW 100 SPARSE "second_disk.vmdk" # The Disk Data Base #DDB ddb.virtualHWVersion = "4" ddb.geometry.cylinders = "0" ddb.geometry.heads = "16" ddb.geometry.sectors = "63" ddb.adapterType = "ide" ganeti-2.9.3/test/data/ovfdata/wrong_extension.ovd0000644000000000000000000001206012230001635022237 0ustar00rootroot00000000000000 Virtual disk information The list of logical networks The bridged network A virtual machine AyertiennaSUSE.x86_64-0.0.2 The kind of installed guest operating system Virtual hardware requirements Virtual Hardware Family 0 AyertiennaSUSE.x86_64-0.0.2 vmx-04 hertz * 10^6 Number of Virtual CPUs 1 virtual CPU(s) 1 3 1 byte * 2^20 Memory Size 512MB of memory 2 4 512 0 USB Controller usb 3 23 0 SCSI Controller scsiController0 4 lsilogic 6 0 IDE Controller ideController0 5 5 0 false Floppy Drive floppy0 6 14 0 false cdrom1 7 5 15 0 disk1 ovf:/disk/vmdisk1 8 4 17 2 true bridged E1000 ethernet adapter on "bridged" ethernet0 9 E1000 10 ganeti-2.9.3/test/data/ovfdata/wrong_config.ini0000644000000000000000000000002012230001635021450 0ustar00rootroot00000000000000It's just wrong ganeti-2.9.3/test/data/ovfdata/no_os.ini0000644000000000000000000000055612230001635020122 0ustar00rootroot00000000000000[instance] disk0_dump = rawdisk.raw nic0_mode = bridged name = ganeti-test-xen hypervisor = xen-pvm disk_count = 1 nic0_mac = aa:00:00:d8:2c:1e nic_count = 1 nic0_link = xen-br0 nic0_ip = None disk0_ivname = disk/0 disk0_size = 0 [hypervisor] root-path = /dev/sda kernel_args = ro [export] version = 0 [os] [backend] auto_balance = False vcpus = 1 memory = 2048 ganeti-2.9.3/test/data/ovfdata/wrong_manifest.mf0000644000000000000000000000020112230001635021635 0ustar00rootroot00000000000000SHA1(new_disk.vmdk)= 0500304662fb8a6a7925b5a43bc0e05d6a03720d SHA1(wrong_manifest.ovf)= 0500304662fb8a6a7965b5a43bc0e05d6a03720d ganeti-2.9.3/test/data/ovfdata/wrong_ova.ova0000644000000000000000000001206012230001635021005 0ustar00rootroot00000000000000 Virtual disk information The list of logical networks The bridged network A virtual machine AyertiennaSUSE.x86_64-0.0.2 The kind of installed guest operating system Virtual hardware requirements Virtual Hardware Family 0 AyertiennaSUSE.x86_64-0.0.2 vmx-04 hertz * 10^6 Number of Virtual CPUs 1 virtual CPU(s) 1 3 1 byte * 2^20 Memory Size 512MB of memory 2 4 512 0 USB Controller usb 3 23 0 SCSI Controller scsiController0 4 lsilogic 6 0 IDE Controller ideController0 5 5 0 false Floppy Drive floppy0 6 14 0 false cdrom1 7 5 15 0 disk1 ovf:/disk/vmdisk1 8 4 17 2 true bridged E1000 ethernet adapter on "bridged" ethernet0 9 E1000 10 ganeti-2.9.3/test/data/ovfdata/virtualbox.ovf0000644000000000000000000001026112230001635021211 0ustar00rootroot00000000000000 List of the virtual disks used in the package Logical networks used in the package Logical network used by this appliance. A virtual machine The kind of installed guest operating system Ubuntu Virtual hardware requirements for a virtual machine Virtual Hardware Family 0 Ubuntu-freshly-created virtualbox-2.2 1 virtual CPU 1 virtual CPU Number of virtual CPUs 1 3 1 2048 MB of memory 2048 MB of memory Memory Size 2 4 MegaBytes 2048 ideController0 ideController0 IDE Controller 3 5 PIIX4 1 Ethernet adapter on 'NAT' Ethernet adapter on 'NAT' 5 10 PCNet32 true bridged disk1 disk1 Disk Image 7 17 /disk/vmdisk1 3 0 disk1 disk1 Disk Image 9 17 /disk/vmdisk1 3 0 ganeti-2.9.3/test/data/ovfdata/rawdisk.raw0000644000000000000000000001200012230001635020446 0ustar00rootroot00000000000000ganeti-2.9.3/test/data/ovfdata/new_disk.vmdk0000644000000000000000000020000012230001635020754 0ustar00rootroot00000000000000KDMVd€€ # Disk DescriptorFile version=1 CID=4e54f404 parentCID=ffffffff createType="monolithicSparse" # Extent description RW 100 SPARSE "new_disk.vmdk" # The Disk Data Base #DDB ddb.virtualHWVersion = "4" ddb.geometry.cylinders = "0" ddb.geometry.heads = "16" ddb.geometry.sectors = "63" ddb.adapterType = "ide" ganeti-2.9.3/test/data/ovfdata/ganeti.ovf0000644000000000000000000001000512230001635020255 0ustar00rootroot00000000000000 List of the virtual disks used in the package 0 False plain lenny-image bridged aa:00:00:d8:2c:1e none xen-br0 xen-pvm /dev/sda ro Logical networks used in the package Logical network used by this appliance. A virtual machine ganeti-test-xen The kind of installed guest operating system Ubuntu Virtual hardware requirements for a virtual machine Virtual Hardware Family 0 Ubuntu-freshly-created virtualbox-2.2 1 virtual CPU 1 virtual CPU Number of virtual CPUs 1 3 1 2048 MB of memory 2048 MB of memory Memory Size 2 4 MegaBytes 2048 Ethernet adapter on 'NAT' Ethernet adapter on 'NAT' 5 10 PCNet32 true disk1 disk1 Disk Image 7 17 /disk/vmdisk1 3 0 ganeti-2.9.3/test/data/ovfdata/wrong_manifest.ovf0000644000000000000000000001040312230001635022032 0ustar00rootroot00000000000000 List of the virtual disks used in the package 0 False lenny-image bridged aa:00:00:d8:2c:1e None xen-br0 xen-pvm /dev/sda ro Logical networks used in the package Logical network used by this appliance. A virtual machine The kind of installed guest operating system Ubuntu Virtual hardware requirements for a virtual machine Virtual Hardware Family 0 Ubuntu-freshly-created virtualbox-2.2 1 virtual CPU 1 virtual CPU Number of virtual CPUs 1 3 1 2048 MB of memory 2048 MB of memory Memory Size 2 4 MegaBytes 2048 ideController0 ideController0 IDE Controller 3 5 PIIX4 1 Ethernet adapter on 'NAT' Ethernet adapter on 'NAT' 5 10 PCNet32 true bridged network disk1 disk1 Disk Image 7 17 /disk/vmdisk1 3 0 ganeti-2.9.3/test/data/ovfdata/unsafe_path.ini0000644000000000000000000000060512230001635021275 0ustar00rootroot00000000000000[instance] disk0_dump = other/rawdisk.raw nic0_mode = bridged name = ganeti-test-xen hypervisor = xen-pvm disk_count = 1 nic0_mac = aa:00:00:d8:2c:1e nic_count = 1 nic0_link = xen-br0 nic0_ip = None disk0_ivname = disk/0 disk0_size = 0 [hypervisor] root-path = /dev/sda kernel_args = ro [export] version = 0 os = lenny-image [os] [backend] auto_balance = False vcpus = 1 memory = 2048 ganeti-2.9.3/test/data/ovfdata/no_disk_in_ref.ovf0000644000000000000000000001016512230001635021765 0ustar00rootroot00000000000000 List of the virtual disks used in the package Logical networks used in the package Logical network used by this appliance. A virtual machine The kind of installed guest operating system Ubuntu Virtual hardware requirements for a virtual machine Virtual Hardware Family 0 Ubuntu-freshly-created virtualbox-2.2 1 virtual CPU 1 virtual CPU Number of virtual CPUs 1 3 1 2048 MB of memory 2048 MB of memory Memory Size 2 4 MegaBytes 2048 ideController0 ideController0 IDE Controller 3 5 PIIX4 1 Ethernet adapter on 'NAT' Ethernet adapter on 'NAT' 5 10 PCNet32 true NAT disk1 disk1 Disk Image 7 17 /disk/vmdisk1 3 0 disk1 disk1 Disk Image 9 17 /disk/vmdisk1 3 0 ganeti-2.9.3/test/data/ovfdata/wrong_xml.ovf0000644000000000000000000000713312230001635021032 0ustar00rootroot00000000000000 Virtual disk information The list of logical networks The bridged network A virtual machine AyertiennaSUSE.x86_64-0.0.2 The kind of installed guest operating system Virtual hardware requirements Virtual Hardware Family 0 AyertiennaSUSE.x86_64-0.0.2 vmx-04 hertz * 10^6 Number of Virtual CPUs 1 virtual CPU(s) 1 3 1 byte * 2^20 Memory Size 512MB of memory 2 4 512 0 SCSI Controller scsiController0 4 lsilogic 6 0 IDE Controller ideController0 5 5 0 disk1 ovf:/disk/vmdisk1 8 4 17 ganeti-2.9.3/test/data/ovfdata/gzip_disk.ovf0000644000000000000000000001004212230001635020772 0ustar00rootroot00000000000000 List of the virtual disks used in the package 0 False plain lenny-image bridged aa:00:00:d8:2c:1e none xen-br0 xen-pvm /dev/sda ro Logical networks used in the package Logical network used by this appliance. A virtual machine ganeti-test-xen The kind of installed guest operating system Ubuntu Virtual hardware requirements for a virtual machine Virtual Hardware Family 0 Ubuntu-freshly-created virtualbox-2.2 1 virtual CPU 1 virtual CPU Number of virtual CPUs 1 3 1 2048 MB of memory 2048 MB of memory Memory Size 2 4 MegaBytes 2048 Ethernet adapter on 'NAT' Ethernet adapter on 'NAT' 5 10 PCNet32 true disk1 disk1 Disk Image 7 17 /disk/vmdisk1 3 0 ganeti-2.9.3/test/data/ovfdata/ova.ova0000644000000000000000000001026212230001635017573 0ustar00rootroot00000000000000‹œsSNíÚˆ¥óâÇñq¹¿{­$ùI’#Iš93;;ví®Öîbr×b-W’ÎÎ93{Ú™sÖ9gví•\I’$I’$I’$I’$I’$I’$I’$I’ä÷9fwf,wƽ~·ßý½^yÏî>çyžsžï÷<Ï9sŽ©R­Òª®¬ïìûÝ Åê¡¡¾¡UëGGÆòïâÈðÐpgyûïÅÑÕ}ÅâêáU16ºªo¨¸ª82ÚWúýÒA³ÍV©Q(ô•¦J­ÒÌl£´¯¹»tˆõjõÚD½ö;ŽÒ¿Çºs¯Ÿ™.ì­4šÕzm}qåPÿ¹V¬ÛRÛ[™®ï©òÄX»àÖBÖ_;]ªM­ï¯Ôvlï,¨5×÷ïjµö¬lNìªÌ”š+Ë3­É•õÆÔ`ö0Xéín°Ø[}m–.w“F©Yþåmöí¬Ì îkNTg‹ƒù9Ð]cpxpÓøÖk/«4ë³‰ÊÆééúD©•£Ù^iµªµ©Í™õ¹;ØÛüíwpEµÑš-MoßßlUf±ïë›Õ»Þ·oßÊ}#çù?ø×­ÙÞÙ×@µ–gcm¢2·ÕT­u`«©Î™Ú¿aE¡°î²Êd¥QÉŠÍö?³àüêtw®v5*ØZeßµåjs÷ʽ3åÝýª9´É¬Uìììbpá>ÖmÎêÛ+í¡éís¼6Yßð—j³U¨OZ»*…½Ýc,´÷Ü,Ì6+åBµÖ¹eOibwiª²n°³Mwóö;÷Ü^}<÷žÇ’¿»§ýP.k?Ôîcê.«7fJ­ôwf_©QY9QŸlî©LT'«ÝÙk¶lå®ÖÌôiÍ=¥F³2wX‹Žc]†pí¡[xtíåWtŸØãå Cë.8¸ÒÆÙVý¼Òt{Z6œ_šnVº«Î_|påËKSÍ Ý:=x˶=•F©ý¬è>Eº·ôn»¸4SÙ0]©ÕöTg:ãx`iwƒ¿¸‡îö•Ö¾zc÷Â}V':ƒZË^Ö÷7ê³­J¹nÞ:[ëåʆjyªRîÞGgÉ¢•6nÚX.7*Íæ†RiíÐPû¿òÙk‡'Ö{tÞ 7¿dnùÅõZoåƒË®û—jm÷†ësIÙÙèMFgÉÜ! öŽiþ€,8ìÎN.ÜŸ1Ú[mÖ?ßö¾÷ìY<¶½U.)5²¨•ù_ô¸õzk`O©µkÃ`¹²w°Y.u÷ppùÂõwWµÊôµ¥Ff¿Qï®;Ù‚Z|¯Ý¥ ¢»ìgOáu½£?Ô9[ŸÊi2]¨u×øÇ§joW¿ü|Y·¹ÒœhT÷tîiÑî»{ß¹?{¯6 ¥={¦«í“beNÅyõoÞ”øÇüƒZp=pݺ¸²o ¹«Ú>=æ.ówã+ÓLi"«-:¸öTw/ž­J³5'Bîûàɵè¼ê=œw¾fäà0tv{yFqwµVn_;gz:05[i_)çöUhvv6ÿ‘,Æ;gk­ÙCŽÒ¡QïÖÞ]Xj”Ûׯ7Î=ÄÞ:…]½• Êu³ÕFe¦Rk5 ¹ÎJ¿6fÙÍ«T´_×n™îì¢3xs÷1÷@ ç—fªÓû× þlÍE;ïÍáøæöewñ¢E+/xBŒ—³Ë¼T½Ñ˜Ì•d×ôþ‰F¥Ôj_Ä~u£_Û÷å9é6ôÆdgýúá•ÇÚ[gµçð¢kùøÂ!k¿aY»©ÔÚâßtÉŽuƒ n[´Éü±;Ôf‡ÛÎMóŸLÏÎì¬4ÚÏÒy»hööñ³gÝÁ}Ì›ŒboíCÎOç–¹7Vqé­¾`ᢠz£yél)³ÒÚà./?0ÆãKáá¡Ug¶ž×>à™ÊL½±é£üK›.q¤·v¶)l¯þ­²¬á^Þð®ZÂð|ƒ»£Vm5óЦJçíÏu¯·íâÛÿÁì´æ_6AÕreS½ÖjÔsÁl¿È/uv¹Ý§f|ó–ÂÁ—5;#Ë›Ñ%ÌÎÜmÛgwvn¾d|ü¯‹guî¶ÅÛ{×4w¾,zµŒi˜Ì¯U{ö/cün°Ä?¿³Qas£ºwy'Ūå {qIgEÞ¬çW‹êÄÁ§ÿ†Éî{ù_¼ýÐã¿­–÷l9ü C çáÀòåÏÇ–¼-ËûÃV¡TÎ’ö»V8ãâ—Ÿ±ôúG»ø•9›7Ì£Ëù¡ßòŒß”w}#ÃK|ÎbZZÙeÌZNýZï ÒÜh\²ü©Ê£­•—>-óW_âi³½½IaSÞS-ë¤Y½Ìêßr±ªÔšõZõºâÈXñ·Oß?yÖüËκÎgKŸÊù«/q*;€ŒwŸ_ÆTŽ-ó,[ÂT^Xo¶ænÏï±y`ƒ½Ocz/XaÑÆ‹†~áˆÿþ—ljr£>³Œ™Z°þ§jÓæË¶mý /Vg/s²–rÞýóW½ß{ÆÖ þò/¡n›ûÝhÝà܇ÉVü»?íf±ŸÿN÷ñkßÿ¬*æÎ÷?Ãc££#£cíïV}ÿó¿á¢Í[¯8<Þ×þÙwXûÇß{·Öûó¿çVþC_ßñ½¿žpD÷Ï¿ÿ¹·`EáèsðÆi…ÎÛ”¹W»z£ýMÒŠ¹oüŠ+6o^¿jÕXqõhixÅžÎ+B{ÑdÏŠîoí—±õýÍVþ1³-¯™3Õ¿UÊý+VœVØr}+›Ê_MW\ve¡¸zlll¸¸º°ý’—mßRèï}œ·íŠó»_Yµ7mÎZî<¸œ™…óJÍJaÅi›7Ÿ·bE¹¼seï3­ ¯ì}gSX_è_Õß¹©÷KP÷AUË•îÒÙÙjyeçë•õý#;‹å‰ryõ@idçØÀªòÙ#k&‹“åÕc“«'Ï.Œ­š·U÷¸×÷õ âÇœy›ÍÔ˾°Zß?:<¼³²srçÀP¹42°jõš‘RqÍØÀäÐÈÙÅ‘‘³+c£ÅŸÝçÂ],ýþ'ê33‡Üï5à?Ò é¤tJ:-‘ÎJƒi$¥sÒ¹iSº ]”¶¥íéÊtuº6M¤©Ôþÿˆë©™ö¥ÒMé–t[º#Ý•îI÷¥ÒCé‘ôXz"=•žIÏ¥ÒKé•ôZz#½•ÞIï¥ÒGé“ôYú"}•¾IߥÒOéðÃúúþ”ŽJǤãÒ é¤tJ:-‘ÎJƒi$¥sÒ¹iSº ]”¶¥íéÊtuº6M¤©´;ÕS3íK7¤›Ò-é¶tGº+Ý“îK¤‡Ò#é±ôDz*=“žK/¤—Ò+éµôFz+½“ÞK¤Ò'é³ôEú*}“¾K?¤ŸÒáÈñ§£Ò1é¸tB:)’NKg¤³Ò`IcéœtnÚ”.H¥mi{º2]®Mi*íNõÔLûÒ é¦tKº-Ý‘îJ÷¤ûÒé¡ôHz,=‘žJϤçÒ é¥ôJz-½‘ÞJï¤÷Òé£ôIú,}‘¾JߤïÒé§tøá9þtT:&—NH'¥SÒiéŒtVL#i,“ÎM›Ò颴-mOW¦«Óµi"M¥Ý©žši_º!Ý”nI·¥;Ò]éžt_z =”I¥'ÒSé™ô\z!½”^I¯¥7Ò[éô^ú }”>IŸ¥/ÒWé›ô]ú!ý”?"ÇŸŽJÇ¤ãŽø]D€ÿh'Ñ×wr:5žÎL©˜FÓš´>mL[ÒxÚš.M;ÒUéšTJ•TM3éº4›ö§ÓÍéÖt{º3ÝîM÷§ÓÃéÑôxz2=žMϧÓËéÕôzz3½ÞMï§ÓÇéÓôyú2}¾Mß§Óaìëûc:2ŽMǧÓÉéÔtz:3 ¤bMkÒú´1mIãikº4íHW¥kR)UR5ͤëÒlÚŸnL7§[ÓíéÎtwº7ÝŸL§GÓãéÉôtz6=Ÿ^L/§WÓëéÍôvz7½Ÿ>L§OÓçéËôuú6}Ÿ~L‡ýWŽ?™ŽNǦãÓ‰éätj:=™R1¦5i}Ú˜¶¤ñ´5]šv¤«Ò5©”*©šfÒui6íO7¦›Ó­éötgº;Ý›îO¦‡Ó£éñôdz:=›žO/¦—Ó«éõôfz;½›ÞO¦Ó§éóôeú:}›¾O?¦Ãþ”ãOG¦£Ó±éøtb:9šNOg¦TL£iMZŸ6¦-i”ó˜nbU—«åx˜¾ÿ”Gñíè~4¥ë¼ŠËf7ußI§UÌ›x³]Çqo±Z®æe3+§“ve{iÚW¿›vO(ºÚ·¦×wa8„É×wד«Ð[Æ_ߊ¶‰óÍ¢xØï¹™Å®­¼ÉÃe^Çö³ì2M‹âûù¦¬šŸùüãÝí¡Ë0½Qoýˆ«Elªíùt;/—E›ïÂÁQ8‹y±†GI§í'ØgoY^äë&V»î¦Ë¢½Ö©ÞËS7À?õúÔ ð¨ü^Xt(ganeti-2.9.3/test/data/ovfdata/ganeti.mf0000644000000000000000000000017112230001635020070 0ustar00rootroot00000000000000SHA1(ganeti.ovf)= d298200d9044c54b0fde13efaa90e564badc5961 SHA1(new_disk.vmdk)= 711c48f14c934228b8e117d036c913cdb9d63305 ganeti-2.9.3/test/data/ovfdata/empty.ini0000644000000000000000000000005712230001635020137 0ustar00rootroot00000000000000[instance] [hypervisor] [export] [os] [backend]ganeti-2.9.3/test/data/ovfdata/other/0000755000000000000000000000000012271445545017440 5ustar00rootroot00000000000000ganeti-2.9.3/test/data/ovfdata/other/rawdisk.raw0000644000000000000000000001200012230001635021567 0ustar00rootroot00000000000000ganeti-2.9.3/test/data/ovfdata/no_ovf.ova0000644000000000000000000000260112230001635020272 0ustar00rootroot00000000000000‹®MNí˜moÛ6€ó¹¿‚Ó€¡fK”mÙõm^ZKÛÕI1ìC Y¢c"é’”÷×ïHQŽå8nd´耢yo|îx²ºœ]â‹° ޾x AÛ;òÚ~Ðiuá·|Ï7ëðoß ¼#Œ¿…´`·ƒvçyß)Ÿ’dR…¡£ð:Taš‰p)ov`œEœMDJ?Rܦ Z!)gÇnz"pÒ˜²ëcçêò¼Ñsþ>üÔh¼"ŒˆP‘MVèÃÅ2ñÅTqž ¿é51z:Éh7üÀý޳ßЕ$¢àïˤhJúÈ÷0nx½î^âNû}Œ›]Üé¶ý sÆ$ás‚é²o¼âcgÓ­ƒ a&™Ró¾ëÊhFÒP6ãTM›\\»’K¬[õ~DÓ‡M–’ºK :.v#ž¦œvà­j(Êø±±hÚÈ5\ß=]|zO$ÏDD^$ B5¥ §ÐE`³ö¿\.›ð µhBâ6œNl­,Ïæ* “ñJ*’îHäVÒR"Ë–qêÃ¥vÿ¾øsl|5(ƒ+Æ"â Ÿ 4xO¦D@‡©aáœ&¦ú3A€õ‹ŠÆÂñÕø¬yÛ >í†ýå7b*o07¾qŒ…“MÁç’~!ÇN†ˆßÅ­ÀqM@·qp NÆ$Òlm#6åC{V¤c +"5ü®ÙÎ5µ­ …ó0¢j7&èà¶SZ¼+Þ£ Zu²RýŠü¾—kê º±á(úHù¢>É{`ãHyU›2EÄ4„“¹rN":¥yXéjFÍ™J“Ÿ¥$LßÎáþž8w;çó,ÑWylù0™ŸÃdö{³-Hƒ7D-¹ØÅírFPB¥B|Š~ $ˆåÚ²Äκ0ñY˜B؉ ñ5¤”+h¼DF‚ÎMí×*þ ­ …Ü«kÝæißOsPêáu×,Rgó/Ж? £e¤œ9d;ÜÓ—WkäÊoçzHÂE)nIe¸×rÌlãòr52“$h¿ÊˆTwÖ`o(‹5Xs…’P\kMÄ‹ Hš(¥tÝÝ)Ø]Ëãu(bÝG¥Íí»0³JHÏ$%L•Š yŒâôÀéŸ%FÙp)¼!Ñy˜Òd5pïin9ÙÁ1:zV{ciK¹TçQ .á:±¿n{-÷ÐÅ.ÒÛ†×ÞåÅl„Ü2¢Á¨ÌK¿.ú[#c8ƒ¤¿ÀÈÀÞÇ`àîTÙò°y5Þdé„Ý8û“wWÒú¹w‡î|lÖ¯¯Ø>•Ϭõîr™­Ú`«½³\f§xÑR-«^ZÜ2°'ù+ ¡>jµ±½¾¦>ªÊ|cJWG~AR.VHÏÓjœ;Ø¿x©K••0ûÕ0·À éUm†ú &’8÷èDZ R׬íóÆWã—è„3%8ŒBQ s&'•жª¡õ÷µðcz±:ñÉxt0IzgëUBÓ~4šq61IÍOƒ-FÅö>°Áç::=;+É¡T;Õ®SK•+ù–½ƒ·4SÛÐÖëÛf™âú·rt7.‡&@a¾cÿsøšš¯Ð© ‹Š£tj,«aªaÇûfèÿû&£(<Å•àu¿ Ïf^të WììosóÀæ›ìë ^s©Š$‡ºÎ®6tí7µ/)=L²÷X’í $»ß–¤x[*‘UèJ¡Ì~–Ø/@k»±±g€œaÏóQð>Q‡s¥ 3ôË猫߭Ïü¡Ú„)œV›1Ï+¿0Íx[âm£‡J>pþ\ïß-·øo¹á“ÿúÿ+k©¥–Zj©¥–Zj©¥–Zj©¥–Zj©&ÿ^(ÐA(ganeti-2.9.3/test/data/ovfdata/corrupted_resources.ovf0000644000000000000000000001003512230001635023112 0ustar00rootroot00000000000000 Virtual disk information The list of logical networks The bridged network A virtual machine AyertiennaSUSE.x86_64-0.0.2 The kind of installed guest operating system Virtual hardware requirements Virtual Hardware Family 0 AyertiennaSUSE.x86_64-0.0.2 vmx-04 hertz * 10^6 Number of Virtual CPUs 1 virtual CPU(s) 1 3 1 byte * 2^20 Memory Size 512MB of memory 2 4 512 0 SCSI Controller scsiController0 4 lsilogic 6 0 IDE Controller ideController0 5 5 0 disk1 ovf:/disk/vmdisk1 8 4 17 2 true bridged E1000 ethernet adapter on "bridged" ethernet0 9 E1000 10 ganeti-2.9.3/test/data/ovfdata/config.ini0000644000000000000000000000061512244641676020272 0ustar00rootroot00000000000000[instance] disk0_dump = rawdisk.raw nic0_mode = routed name = ganeti-test-xen hypervisor = xen-pvm disk_count = 1 nic0_mac = aa:00:00:d8:2c:1e nic_count = 1 nic0_link = br0 nic0_ip = None nic0_network = test disk0_ivname = disk/0 disk0_size = 0 [hypervisor] root-path = /dev/sda kernel_args = ro [export] version = 0 os = lenny-image [os] [backend] auto_balance = False vcpus = 1 memory = 512 ganeti-2.9.3/test/data/ovfdata/no_disk.ini0000644000000000000000000000050012230001635020420 0ustar00rootroot00000000000000[instance] disk0_dump = iamnothere.raw nic0_mode = nic name = ganeti-test-xen disk_count = 1 nic0_mac = aa:00:00:d8:2c:1e nic_count = 1 nic0_link = xen-br0 nic0_ip = None disk0_ivname = disk/0 disk0_size = 0 [hypervisor] root-path = /dev/sda kernel_args = ro [export] version = 0 [os] [backend] auto_balance = False ganeti-2.9.3/test/data/ovfdata/empty.ovf0000644000000000000000000000131212230001635020145 0ustar00rootroot00000000000000 A virtual machine ganeti-2.9.3/test/data/qa-minimal-nodes-instances-only.json0000644000000000000000000000162212244641676023671 0ustar00rootroot00000000000000{ "name": "xen-test-qa-minimal-nodes-instances-only", "# Lists of disks": null, "disks": [ { "size": "1G", "growth": "2G" }, { "size": "512M", "growth": "768M" } ], "enabled-disk-templates": [ "plain", "drbd", "diskless" ], "nodes": [ { "# Master node": null, "primary": "xen-test-0", "secondary": "192.0.2.1" }, { "primary": "xen-test-1", "secondary": "192.0.2.2" }, { "primary": "xen-test-2", "secondary": "192.0.2.3" }, { "primary": "xen-test-3", "secondary": "192.0.2.4" } ], "instances": [ { "name": "xen-test-inst1", "nic.mac/0": "AA:00:00:11:11:11" }, { "name": "xen-test-inst2", "nic.mac/0": "AA:00:00:22:22:22" } ], "tests": { "default": false }, "# vim: set syntax=javascript :": null } ganeti-2.9.3/test/data/cluster_config_2.8.json0000644000000000000000000003505312267470014021163 0ustar00rootroot00000000000000{ "cluster": { "beparams": { "default": { "always_failover": false, "auto_balance": true, "maxmem": 128, "minmem": 128, "spindle_use": 1, "vcpus": 1 } }, "blacklisted_os": [], "candidate_pool_size": 10, "cluster_name": "cluster.name.example.com", "ctime": 1343869045.604884, "default_iallocator": "hail", "disk_state_static": {}, "diskparams": { "blockdev": {}, "diskless": {}, "drbd": { "c-delay-target": 1, "c-fill-target": 200, "c-max-rate": 2048, "c-min-rate": 1024, "c-plan-ahead": 1, "data-stripes": 2, "disk-barriers": "bf", "disk-custom": "", "dynamic-resync": false, "meta-barriers": true, "meta-stripes": 2, "metavg": "xenvg", "net-custom": "", "resync-rate": 1024 }, "ext": {}, "file": {}, "plain": { "stripes": 2 }, "rbd": { "pool": "rbd" }, "sharedfile": {} }, "drbd_usermode_helper": "/bin/true", "enabled_disk_templates": [ "drbd", "plain", "file", "sharedfile" ], "enabled_hypervisors": [ "xen-pvm" ], "file_storage_dir": "", "hidden_os": [], "highest_used_port": 32105, "hv_state_static": { "xen-pvm": { "cpu_node": 1, "cpu_total": 1, "mem_hv": 0, "mem_node": 0, "mem_total": 0 } }, "hvparams": { "chroot": { "init_script": "/ganeti-chroot" }, "fake": {}, "kvm": { "acpi": true, "boot_order": "disk", "cdrom2_image_path": "", "cdrom_disk_type": "", "cdrom_image_path": "", "cpu_cores": 0, "cpu_mask": "all", "cpu_sockets": 0, "cpu_threads": 0, "cpu_type": "", "disk_cache": "default", "disk_type": "paravirtual", "floppy_image_path": "", "initrd_path": "", "kernel_args": "ro", "kernel_path": "/boot/vmlinuz-kvmU", "keymap": "", "kvm_extra": "", "kvm_flag": "", "kvm_path": "/usr/bin/kvm", "machine_version": "", "mem_path": "", "migration_bandwidth": 4, "migration_downtime": 30, "migration_mode": "live", "migration_port": 4041, "nic_type": "paravirtual", "reboot_behavior": "reboot", "root_path": "/dev/vda1", "security_domain": "", "security_model": "none", "serial_console": true, "serial_speed": 38400, "soundhw": "", "spice_bind": "", "spice_image_compression": "", "spice_ip_version": 0, "spice_jpeg_wan_compression": "", "spice_password_file": "", "spice_playback_compression": true, "spice_streaming_video": "", "spice_tls_ciphers": "HIGH:-DES:-3DES:-EXPORT:-ADH", "spice_use_tls": false, "spice_use_vdagent": true, "spice_zlib_glz_wan_compression": "", "usb_devices": "", "usb_mouse": "", "use_chroot": false, "use_localtime": false, "vga": "", "vhost_net": false, "vnc_bind_address": "", "vnc_password_file": "", "vnc_tls": false, "vnc_x509_path": "", "vnc_x509_verify": false }, "lxc": { "cpu_mask": "" }, "xen-hvm": { "acpi": true, "blockdev_prefix": "hd", "boot_order": "cd", "cdrom_image_path": "", "cpu_cap": 0, "cpu_mask": "all", "cpu_weight": 256, "device_model": "/usr/lib/xen/bin/qemu-dm", "disk_type": "paravirtual", "kernel_path": "/usr/lib/xen/boot/hvmloader", "migration_mode": "non-live", "migration_port": 8082, "nic_type": "rtl8139", "pae": true, "pci_pass": "", "reboot_behavior": "reboot", "use_localtime": false, "vnc_bind_address": "0.0.0.0", "vnc_password_file": "/your/vnc-cluster-password" }, "xen-pvm": { "blockdev_prefix": "sd", "bootloader_args": "", "bootloader_path": "", "cpu_cap": 0, "cpu_mask": "all", "cpu_weight": 256, "initrd_path": "", "kernel_args": "ro", "kernel_path": "/boot/vmlinuz-xenU", "migration_mode": "live", "migration_port": 8082, "reboot_behavior": "reboot", "root_path": "/dev/xvda1", "use_bootloader": false } }, "ipolicy": { "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "minmax": [ { "max": { "cpu-count": 8, "disk-count": 16, "disk-size": 1048576, "memory-size": 32768, "nic-count": 8, "spindle-use": 12 }, "min": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 } } ], "spindle-ratio": 32.0, "std": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 }, "vcpu-ratio": 1.0 }, "mac_prefix": "aa:bb:cc", "maintain_node_health": false, "master_ip": "192.0.2.87", "master_netdev": "eth0", "master_netmask": 32, "master_node": "node1.example.com", "modify_etc_hosts": true, "modify_ssh_setup": true, "mtime": 1361964122.79471, "ndparams": { "exclusive_storage": false, "oob_program": "", "spindle_count": 1 }, "nicparams": { "default": { "link": "br974", "mode": "bridged" } }, "os_hvp": { "TEMP-Ganeti-QA-OS": { "xen-hvm": { "acpi": false, "pae": true }, "xen-pvm": { "root_path": "/dev/sda5" } } }, "osparams": {}, "prealloc_wipe_disks": false, "primary_ip_family": 2, "reserved_lvs": [], "rsahostkeypub": "YOURKEY", "serial_no": 3189, "shared_file_storage_dir": "/srv/ganeti/shared-file-storage", "tags": [ "mytag" ], "tcpudp_port_pool": [ 32101, 32102, 32103, 32104, 32105 ], "uid_pool": [], "use_external_mip_script": false, "uuid": "dddf8c12-f2d8-4718-a35b-7804daf12a3f", "volume_group_name": "xenvg" }, "ctime": 1343869045.605523, "instances": { "instance1.example.com": { "admin_state": "up", "beparams": {}, "ctime": 1363620258.608976, "disk_template": "drbd", "disks": [ { "children": [ { "dev_type": "lvm", "logical_id": [ "xenvg", "5c390722-6a7a-4bb4-9cef-98d896a8e6b1.disk0_data" ], "params": {}, "physical_id": [ "xenvg", "5c390722-6a7a-4bb4-9cef-98d896a8e6b1.disk0_data" ], "size": 1024 }, { "dev_type": "lvm", "logical_id": [ "xenvg", "5c390722-6a7a-4bb4-9cef-98d896a8e6b1.disk0_meta" ], "params": {}, "physical_id": [ "xenvg", "5c390722-6a7a-4bb4-9cef-98d896a8e6b1.disk0_meta" ], "size": 128 } ], "dev_type": "drbd8", "iv_name": "disk/0", "logical_id": [ "node1.example.com", "node3.example.com", 32100, 0, 0, "d3c3fd475fcbaf5fd177fb245ac43b71247ada38" ], "mode": "rw", "params": {}, "physical_id": [ "198.51.100.82", 32100, "198.51.100.84", 32100, 0, "d3c3fd475fcbaf5fd177fb245ac43b71247ada38" ], "size": 1024, "uuid": "77ced3a5-6756-49ae-8d1f-274e27664c05" } ], "hvparams": {}, "hypervisor": "xen-pvm", "mtime": 1363620320.874901, "name": "instance1.example.com", "nics": [ { "mac": "aa:bb:cc:b2:6e:0b", "nicparams": {}, "uuid": "2c953d72-fac4-4aa9-a225-4131bb271791" } ], "os": "busybox", "osparams": {}, "primary_node": "node1.example.com", "serial_no": 2, "uuid": "6c078d22-3eb6-4780-857d-81772e09eef1" }, "instance2.example.com": { "admin_state": "up", "beparams": {}, "ctime": 1355186880.451181, "disk_template": "plain", "disks": [ { "dev_type": "lvm", "iv_name": "disk/0", "logical_id": [ "xenvg", "3e559cd7-1024-4294-a923-a9fd13182b2f.disk0" ], "mode": "rw", "params": {}, "physical_id": [ "xenvg", "3e559cd7-1024-4294-a923-a9fd13182b2f.disk0" ], "size": 102400, "uuid": "79acf611-be58-4334-9fe4-4f2b73ae8abb" } ], "hvparams": {}, "hypervisor": "xen-pvm", "mtime": 1355186898.307642, "name": "instance2.example.com", "nics": [ { "mac": "aa:bb:cc:56:83:fb", "nicparams": {}, "uuid": "1cf95562-e676-4fd0-8214-e8b84a2f7bd1" } ], "os": "debian-image", "osparams": {}, "primary_node": "node3.example.com", "serial_no": 2, "tags": [], "uuid": "8fde9f6d-e1f1-4850-9e9c-154966f622f5" }, "instance3.example.com": { "admin_state": "up", "beparams": {}, "ctime": 1354038435.343601, "disk_template": "plain", "disks": [ { "dev_type": "lvm", "iv_name": "disk/0", "logical_id": [ "xenvg", "b27a576a-13f7-4f07-885c-63fcad4fdfcc.disk0" ], "mode": "rw", "params": {}, "physical_id": [ "xenvg", "b27a576a-13f7-4f07-885c-63fcad4fdfcc.disk0" ], "size": 1280, "uuid": "150bd154-8e23-44d1-b762-5065ae5a507b" } ], "hvparams": {}, "hypervisor": "xen-pvm", "mtime": 1354224585.700732, "name": "instance3.example.com", "nics": [ { "mac": "aa:bb:cc:5e:5c:75", "nicparams": {}, "uuid": "1ab090c1-e017-406c-afb4-fc285cb43e31" } ], "os": "debian-image", "osparams": {}, "primary_node": "node2.example.com", "serial_no": 4, "tags": [], "uuid": "4e091bdc-e205-4ed7-8a47-0c9130a6619f" } }, "mtime": 1367352404.758083, "networks": { "99f0128a-1c84-44da-90b9-9581ea00c075": { "ext_reservations": "1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001", "name": "a network", "network": "203.0.113.0/24", "reservations": "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", "serial_no": 1, "uuid": "99f0128a-1c84-44da-90b9-9581ea00c075" } }, "nodegroups": { "5244a46d-7506-4e14-922d-02b58153dde1": { "alloc_policy": "preferred", "diskparams": {}, "ipolicy": {}, "mtime": 1361963775.575009, "name": "default", "ndparams": {}, "networks": {}, "serial_no": 125, "tags": [], "uuid": "5244a46d-7506-4e14-922d-02b58153dde1" }, "6c0a8916-b719-45ad-95dd-82192b1e473f": { "alloc_policy": "preferred", "diskparams": {}, "ipolicy": { "disk-templates": [ "plain" ], "minmax": [ { "max": { "cpu-count": 8, "disk-count": 16, "disk-size": 1048576, "memory-size": 32768, "nic-count": 18, "spindle-use": 14 }, "min": { "cpu-count": 2, "disk-count": 2, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 } } ], "spindle-ratio": 5.2, "vcpu-ratio": 3.14 }, "mtime": 1361963775.575009, "name": "another", "ndparams": { "exclusive_storage": true }, "networks": {}, "serial_no": 125, "tags": [], "uuid": "6c0a8916-b719-45ad-95dd-82192b1e473f" } }, "nodes": { "node1.example.com": { "ctime": 1349722460.022264, "drained": false, "group": "5244a46d-7506-4e14-922d-02b58153dde1", "master_candidate": true, "master_capable": true, "mtime": 1359986533.353329, "name": "node1.example.com", "ndparams": {}, "offline": false, "powered": true, "primary_ip": "192.0.2.82", "secondary_ip": "198.51.100.82", "serial_no": 197, "tags": [], "uuid": "9a12d554-75c0-4cb1-8064-103365145db0", "vm_capable": true }, "node2.example.com": { "ctime": 1343869045.604884, "drained": false, "group": "5244a46d-7506-4e14-922d-02b58153dde1", "master_candidate": true, "master_capable": true, "mtime": 1358348755.779906, "name": "node2.example.com", "ndparams": {}, "offline": false, "powered": true, "primary_ip": "192.0.2.83", "secondary_ip": "198.51.100.83", "serial_no": 6, "tags": [], "uuid": "2ae3d962-2dad-44f2-bdb1-85f77107f907", "vm_capable": true }, "node3.example.com": { "ctime": 1343869205.934807, "drained": false, "group": "5244a46d-7506-4e14-922d-02b58153dde1", "master_candidate": true, "master_capable": true, "mtime": 1353019704.885368, "name": "node3.example.com", "ndparams": {}, "offline": false, "powered": true, "primary_ip": "192.0.2.84", "secondary_ip": "198.51.100.84", "serial_no": 2, "tags": [], "uuid": "41f9c238-173c-4120-9e41-04ad379b647a", "vm_capable": true } }, "serial_no": 7625, "version": 2080000 } ganeti-2.9.3/test/data/kvm_0.9.1_help.txt0000644000000000000000000001360312230001635017750 0ustar00rootroot00000000000000QEMU PC emulator version 0.9.1 (kvm-72), Copyright (c) 2003-2008 Fabrice Bellard usage: qemu [options] [disk_image] 'disk_image' is a raw hard image image for IDE hard disk 0 Standard options: -M machine select emulated machine (-M ? for list) -cpu cpu select CPU (-cpu ? for list) -fda/-fdb file use 'file' as floppy disk 0/1 image -hda/-hdb file use 'file' as IDE hard disk 0/1 image -hdc/-hdd file use 'file' as IDE hard disk 2/3 image -cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master) -drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i] [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off] [,cache=on|off][,format=f][,boot=on|off] use 'file' as a drive image -mtdblock file use 'file' as on-board Flash memory image -sd file use 'file' as SecureDigital card image -pflash file use 'file' as a parallel flash image -boot [a|c|d|n] boot on floppy (a), hard disk (c), CD-ROM (d), or network (n) -snapshot write to temporary files instead of disk image files -no-frame open SDL window without a frame and window decorations -alt-grab use Ctrl-Alt-Shift to grab mouse (instead of Ctrl-Alt) -no-quit disable SDL window close capability -no-fd-bootchk disable boot signature checking for floppy disks -m megs set virtual RAM size to megs MB [default=128] -smp n set the number of CPUs to 'n' [default=1] -nographic disable graphical output and redirect serial I/Os to console -portrait rotate graphical output 90 deg left (only PXA LCD) -k language use keyboard layout (for example "fr" for French) -audio-help print list of audio drivers and their options -soundhw c1,... enable audio support and only specified sound cards (comma separated list) use -soundhw ? to get the list of supported cards use -soundhw all to enable all of them -localtime set the real time clock to local time [default=utc] -full-screen start in full screen -win2k-hack use it when installing Windows 2000 to avoid a disk full bug -usb enable the USB driver (will be the default soon) -usbdevice name add the host or guest USB device 'name' -name string set the name of the guest Network options: -net nic[,vlan=n][,macaddr=addr][,model=type] create a new Network Interface Card and connect it to VLAN 'n' -net user[,vlan=n][,hostname=host] connect the user mode network stack to VLAN 'n' and send hostname 'host' to DHCP clients -net tap[,vlan=n][,fd=h][,ifname=name][,script=file][,downscript=dfile] connect the host TAP network interface to VLAN 'n' and use the network scripts 'file' (default=/etc/kvm/kvm-ifup) and 'dfile' (default=/etc/kvm/kvm-ifdown); use '[down]script=no' to disable script execution; use 'fd=h' to connect to an already opened TAP interface -net socket[,vlan=n][,fd=h][,listen=[host]:port][,connect=host:port] connect the vlan 'n' to another VLAN using a socket connection -net socket[,vlan=n][,fd=h][,mcast=maddr:port] connect the vlan 'n' to multicast maddr and port -net none use it alone to have zero network devices; if no -net option is provided, the default is '-net nic -net user' -tftp dir allow tftp access to files in dir [-net user] -bootp file advertise file in BOOTP replies -smb dir allow SMB access to files in 'dir' [-net user] -redir [tcp|udp]:host-port:[guest-host]:guest-port redirect TCP or UDP connections from host to guest [-net user] Linux boot specific: -kernel bzImage use 'bzImage' as kernel image -append cmdline use 'cmdline' as kernel command line -initrd file use 'file' as initial ram disk Debug/Expert options: -monitor dev redirect the monitor to char device 'dev' -serial dev redirect the serial port to char device 'dev' -parallel dev redirect the parallel port to char device 'dev' -pidfile file Write PID to 'file' -S freeze CPU at startup (use 'c' to start execution) -s wait gdb connection to port -p port set gdb connection port [default=1234] -d item1,... output log to /tmp/qemu.log (use -d ? for a list of log items) -hdachs c,h,s[,t] force hard disk 0 physical geometry and the optional BIOS translation (t=none or lba) (usually qemu can guess them) -L path set the directory for the BIOS, VGA BIOS and keymaps -no-kvm disable KVM hardware virtualization -no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC -no-kvm-pit disable KVM kernel mode PIT -std-vga simulate a standard VGA card with VESA Bochs Extensions (default is CL-GD5446 PCI VGA) -no-acpi disable ACPI -curses use a curses/ncurses interface instead of SDL -no-reboot exit instead of rebooting -no-shutdown stop before shutdown -loadvm [tag|id] start right away with a saved state (loadvm in monitor) -vnc display start a VNC server on display -daemonize daemonize QEMU after initializing -tdf inject timer interrupts that got lost -kvm-shadow-memory megs set the amount of shadow pages to be allocated -mem-path set the path to hugetlbfs/tmpfs mounted directory, also enables allocation of guest memory with huge pages -option-rom rom load a file, rom, into the option ROM space -clock force the use of the given methods for timer alarm. To see what timers are available use -clock ? -startdate select initial date of the clock -icount [N|auto] Enable virtual instruction counter with 2^N clock ticks per instruction During emulation, the following keys are useful: ctrl-alt-f toggle full screen ctrl-alt-n switch to virtual console 'n' ctrl-alt toggle mouse and keyboard grab When using -nographic, press 'ctrl-a h' to get some help. ganeti-2.9.3/test/data/bdev-drbd-8.3.txt0000644000000000000000000000174312230001635017557 0ustar00rootroot00000000000000disk { size 0s _is_default; # bytes on-io-error detach; fencing dont-care _is_default; max-bio-bvecs 0 _is_default; } net { timeout 60 _is_default; # 1/10 seconds max-epoch-size 2048 _is_default; max-buffers 2048 _is_default; unplug-watermark 128 _is_default; connect-int 10 _is_default; # seconds ping-int 10 _is_default; # seconds sndbuf-size 131070 _is_default; # bytes ko-count 0 _is_default; after-sb-0pri discard-zero-changes; after-sb-1pri consensus; after-sb-2pri disconnect _is_default; rr-conflict disconnect _is_default; ping-timeout 5 _is_default; # 1/10 seconds } syncer { rate 61440k; # bytes/second after -1 _is_default; al-extents 257; } protocol C; _this_host { device minor 0; disk "/dev/xenvg/test.data"; meta-disk "/dev/xenvg/test.meta" [ 0 ]; address ipv4 192.0.2.1:11000; } _remote_host { address ipv4 192.0.2.2:11000; } ganeti-2.9.3/test/data/proc_diskstats.txt0000644000000000000000000000277612267470014020476 0ustar00rootroot00000000000000 1 0 ram0 0 0 0 0 0 0 0 0 0 0 0 1 1 ram1 0 0 0 0 0 0 0 0 0 0 0 1 2 ram2 0 0 0 0 0 0 0 0 0 0 0 1 3 ram3 0 0 0 0 0 0 0 0 0 0 0 1 4 ram4 0 0 0 0 0 0 0 0 0 0 0 1 5 ram5 0 0 0 0 0 0 0 0 0 0 0 1 6 ram6 0 0 0 0 0 0 0 0 0 0 0 1 7 ram7 0 0 0 0 0 0 0 0 0 0 0 1 8 ram8 0 0 0 0 0 0 0 0 0 0 0 1 9 ram9 0 0 0 0 0 0 0 0 0 0 0 1 10 ram10 0 0 0 0 0 0 0 0 0 0 0 1 11 ram11 0 0 0 0 0 0 0 0 0 0 0 1 12 ram12 0 0 0 0 0 0 0 0 0 0 0 1 13 ram13 0 0 0 0 0 0 0 0 0 0 0 1 14 ram14 0 0 0 0 0 0 0 0 0 0 0 1 15 ram15 0 0 0 0 0 0 0 0 0 0 0 7 0 loop0 0 0 0 0 0 0 0 0 0 0 0 7 1 loop1 0 0 0 0 0 0 0 0 0 0 0 7 2 loop2 0 0 0 0 0 0 0 0 0 0 0 7 3 loop3 0 0 0 0 0 0 0 0 0 0 0 7 4 loop4 0 0 0 0 0 0 0 0 0 0 0 7 5 loop5 0 0 0 0 0 0 0 0 0 0 0 7 6 loop6 0 0 0 0 0 0 0 0 0 0 0 7 7 loop7 0 0 0 0 0 0 0 0 0 0 0 8 0 sda 89502 4833 4433387 89244 519115 62738 16059726 465120 0 149148 554564 8 1 sda1 505 2431 8526 132 478 174 124358 8500 0 340 8632 8 2 sda2 2 0 4 4 0 0 0 0 0 4 4 8 5 sda5 88802 2269 4422249 89032 453703 62564 15935368 396244 0 90064 485500 252 0 dm-0 90978 0 4420002 158632 582226 0 15935368 5592012 0 167688 5750652 252 1 dm-1 88775 0 4402378 157204 469594 0 15136008 4910424 0 164556 5067640 252 2 dm-2 1956 0 15648 1052 99920 0 799360 682492 0 4516 683552 8 16 sdb 0 0 0 0 0 0 0 0 0 0 0 ganeti-2.9.3/test/data/bdev-drbd-net-ip4.txt0000644000000000000000000000144312230001635020524 0ustar00rootroot00000000000000net { timeout 60 _is_default; # 1/10 seconds max-epoch-size 2048 _is_default; max-buffers 2048 _is_default; unplug-watermark 128 _is_default; connect-int 10 _is_default; # seconds ping-int 10 _is_default; # seconds sndbuf-size 131070 _is_default; # bytes ko-count 0 _is_default; after-sb-0pri disconnect _is_default; after-sb-1pri disconnect _is_default; after-sb-2pri disconnect _is_default; rr-conflict disconnect _is_default; ping-timeout 5 _is_default; # 1/10 seconds } syncer { rate 250k _is_default; # bytes/second after -1 _is_default; al-extents 127 _is_default; } protocol C; _this_host { device "/dev/drbd59"; address 192.0.2.1:11002; } _remote_host { address 192.0.2.2:11002; } ganeti-2.9.3/test/data/bdev-drbd-net-ip6.txt0000644000000000000000000000161312230001635020525 0ustar00rootroot00000000000000net { timeout 60 _is_default; # 1/10 seconds max-epoch-size 2048 _is_default; max-buffers 2048 _is_default; unplug-watermark 128 _is_default; connect-int 10 _is_default; # seconds ping-int 10 _is_default; # seconds sndbuf-size 0 _is_default; # bytes rcvbuf-size 0 _is_default; # bytes ko-count 0 _is_default; cram-hmac-alg "md5"; shared-secret "a6526cb6118297c9c82c7003924e236ceac0d867"; after-sb-0pri discard-zero-changes; after-sb-1pri consensus; after-sb-2pri disconnect _is_default; rr-conflict disconnect _is_default; ping-timeout 5 _is_default; # 1/10 seconds } syncer { rate 61440k; # bytes/second after -1 _is_default; al-extents 257; } protocol C; _this_host { device minor 0; address ipv6 [2001:db8:65::1]:11048; } _remote_host { address ipv6 [2001:db8:66::1]:11048; } ganeti-2.9.3/test/data/ip-addr-show-lo.txt0000644000000000000000000000035512230001635020324 0ustar00rootroot000000000000001: lo: mtu 16436 qdisc noqueue state UNKNOWN link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 inet 127.0.0.1/8 scope host lo inet6 ::1/128 scope host valid_lft forever preferred_lft forever ganeti-2.9.3/test/data/proc_cpuinfo.txt0000644000000000000000000000732012244641676020127 0ustar00rootroot00000000000000processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 58 model name : Intel(R) Core(TM) i5-3320M CPU @ 2.60GHz stepping : 9 microcode : 0x13 cpu MHz : 1200.000 cache size : 3072 KB physical id : 0 siblings : 4 core id : 0 cpu cores : 2 apicid : 0 initial apicid : 0 fpu : yes fpu_exception : yes cpuid level : 13 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm ida arat epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase smep erms bogomips : 5188.22 clflush size : 64 cache_alignment : 64 address sizes : 36 bits physical, 48 bits virtual power management: processor : 1 vendor_id : GenuineIntel cpu family : 6 model : 58 model name : Intel(R) Core(TM) i5-3320M CPU @ 2.60GHz stepping : 9 microcode : 0x13 cpu MHz : 1200.000 cache size : 3072 KB physical id : 0 siblings : 4 core id : 0 cpu cores : 2 apicid : 1 initial apicid : 1 fpu : yes fpu_exception : yes cpuid level : 13 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm ida arat epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase smep erms bogomips : 5188.22 clflush size : 64 cache_alignment : 64 address sizes : 36 bits physical, 48 bits virtual power management: processor : 2 vendor_id : GenuineIntel cpu family : 6 model : 58 model name : Intel(R) Core(TM) i5-3320M CPU @ 2.60GHz stepping : 9 microcode : 0x13 cpu MHz : 1200.000 cache size : 3072 KB physical id : 0 siblings : 4 core id : 1 cpu cores : 2 apicid : 2 initial apicid : 2 fpu : yes fpu_exception : yes cpuid level : 13 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm ida arat epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase smep erms bogomips : 5188.22 clflush size : 64 cache_alignment : 64 address sizes : 36 bits physical, 48 bits virtual power management: processor : 3 vendor_id : GenuineIntel cpu family : 6 model : 58 model name : Intel(R) Core(TM) i5-3320M CPU @ 2.60GHz stepping : 9 microcode : 0x13 cpu MHz : 1200.000 cache size : 3072 KB physical id : 0 siblings : 4 core id : 1 cpu cores : 2 apicid : 3 initial apicid : 3 fpu : yes fpu_exception : yes cpuid level : 13 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm ida arat epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase smep erms bogomips : 5188.22 clflush size : 64 cache_alignment : 64 address sizes : 36 bits physical, 48 bits virtual power management: ganeti-2.9.3/test/data/ip-addr-show-dummy0.txt0000644000000000000000000000040412230001635021120 0ustar00rootroot000000000000007: dummy0: mtu 1500 qdisc noop state DOWN link/ether 06:d2:06:24:99:dc brd ff:ff:ff:ff:ff:ff inet 192.0.2.1/32 scope global dummy0 inet6 2001:db8:85a3::8a2e:370:7334/128 scope global valid_lft forever preferred_lft forever ganeti-2.9.3/test/data/kvm_0.15.90_help.txt0000644000000000000000000003206412230001635020117 0ustar00rootroot00000000000000QEMU emulator version 0.15.90, Copyright (c) 2003-2008 Fabrice Bellard usage: qemu [options] [disk_image] 'disk_image' is a raw hard disk image for IDE hard disk 0 Standard options: -h or -help display this help and exit -version display version information and exit -machine [type=]name[,prop[=value][,...]] selects emulated machine (-machine ? for list) property accel=accel1[:accel2[:...]] selects accelerator supported accelerators are kvm, xen, tcg (default: tcg) -cpu cpu select CPU (-cpu ? for list) -smp n[,maxcpus=cpus][,cores=cores][,threads=threads][,sockets=sockets] set the number of CPUs to 'n' [default=1] maxcpus= maximum number of total cpus, including offline CPUs for hotplug, etc cores= number of CPU cores on one socket threads= number of threads on one CPU core sockets= number of discrete sockets in the system -numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node] -fda/-fdb file use 'file' as floppy disk 0/1 image -hda/-hdb file use 'file' as IDE hard disk 0/1 image -hdc/-hdd file use 'file' as IDE hard disk 2/3 image -cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master) -drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i] [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off] [,cache=writethrough|writeback|none|directsync|unsafe][,format=f] [,serial=s][,addr=A][,id=name][,aio=threads|native] [,readonly=on|off] use 'file' as a drive image -set group.id.arg=value set parameter for item of type i.e. -set drive.$id.file=/path/to/image -global driver.property=value set a global default for a driver property -mtdblock file use 'file' as on-board Flash memory image -sd file use 'file' as SecureDigital card image -pflash file use 'file' as a parallel flash image -boot [order=drives][,once=drives][,menu=on|off] [,splash=sp_name][,splash-time=sp_time] 'drives': floppy (a), hard disk (c), CD-ROM (d), network (n) 'sp_name': the file's name that would be passed to bios as logo picture, if menu=on 'sp_time': the period that splash picture last if menu=on, unit is ms -snapshot write to temporary files instead of disk image files -m megs set virtual RAM size to megs MB [default=128] -mem-path FILE provide backing storage for guest RAM -mem-prealloc preallocate guest memory (use with -mem-path) -k language use keyboard layout (for example 'fr' for French) -audio-help print list of audio drivers and their options -soundhw c1,... enable audio support and only specified sound cards (comma separated list) use -soundhw ? to get the list of supported cards use -soundhw all to enable all of them -usb enable the USB driver (will be the default soon) -usbdevice name add the host or guest USB device 'name' -device driver[,prop[=value][,...]] add device (based on driver) prop=value,... sets driver properties use -device ? to print all possible drivers use -device driver,? to print all possible properties File system options: -fsdev fsdriver,id=id,path=path,[security_model={mapped|passthrough|none}] [,writeout=immediate][,readonly] Virtual File system pass-through options: -virtfs local,path=path,mount_tag=tag,security_model=[mapped|passthrough|none] [,writeout=immediate][,readonly] -virtfs_synth Create synthetic file system image -name string1[,process=string2] set the name of the guest string1 sets the window title and string2 the process name (on Linux) -uuid %08x-%04x-%04x-%04x-%012x specify machine UUID Display options: -display sdl[,frame=on|off][,alt_grab=on|off][,ctrl_grab=on|off] [,window_close=on|off]|curses|none| vnc=[,] select display type -nographic disable graphical output and redirect serial I/Os to console -curses use a curses/ncurses interface instead of SDL -no-frame open SDL window without a frame and window decorations -alt-grab use Ctrl-Alt-Shift to grab mouse (instead of Ctrl-Alt) -ctrl-grab use Right-Ctrl to grab mouse (instead of Ctrl-Alt) -no-quit disable SDL window close capability -sdl enable SDL -spice enable spice -portrait rotate graphical output 90 deg left (only PXA LCD) -rotate rotate graphical output some deg left (only PXA LCD) -vga [std|cirrus|vmware|qxl|xenfb|none] select video card type -full-screen start in full screen -g WxH[xDEPTH] Set the initial graphical resolution and depth -vnc display start a VNC server on display i386 target only: -win2k-hack use it when installing Windows 2000 to avoid a disk full bug -no-fd-bootchk disable boot signature checking for floppy disks -no-acpi disable ACPI -no-hpet disable HPET -balloon none disable balloon device -balloon virtio[,addr=str] enable virtio balloon device (default) -acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...] ACPI table description -smbios file=binary load SMBIOS entry from binary file -smbios type=0[,vendor=str][,version=str][,date=str][,release=%d.%d] specify SMBIOS type 0 fields -smbios type=1[,manufacturer=str][,product=str][,version=str][,serial=str] [,uuid=uuid][,sku=str][,family=str] specify SMBIOS type 1 fields Network options: -net nic[,vlan=n][,macaddr=mac][,model=type][,name=str][,addr=str][,vectors=v] create a new Network Interface Card and connect it to VLAN 'n' -net user[,vlan=n][,name=str][,net=addr[/mask]][,host=addr][,restrict=on|off] [,hostname=host][,dhcpstart=addr][,dns=addr][,tftp=dir][,bootfile=f] [,hostfwd=rule][,guestfwd=rule][,smb=dir[,smbserver=addr]] connect the user mode network stack to VLAN 'n', configure its DHCP server and enabled optional services -net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h][,vhostforce=on|off] connect the host TAP network interface to VLAN 'n' and use the network scripts 'file' (default=/etc/qemu-ifup) and 'dfile' (default=/etc/qemu-ifdown) use '[down]script=no' to disable script execution use 'fd=h' to connect to an already opened TAP interface use 'sndbuf=nbytes' to limit the size of the send buffer (the default is disabled 'sndbuf=0' to enable flow control set 'sndbuf=1048576') use vnet_hdr=off to avoid enabling the IFF_VNET_HDR tap flag use vnet_hdr=on to make the lack of IFF_VNET_HDR support an error condition use vhost=on to enable experimental in kernel accelerator (only has effect for virtio guests which use MSIX) use vhostforce=on to force vhost on for non-MSIX virtio guests use 'vhostfd=h' to connect to an already opened vhost net device -net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port] connect the vlan 'n' to another VLAN using a socket connection -net socket[,vlan=n][,name=str][,fd=h][,mcast=maddr:port[,localaddr=addr]] connect the vlan 'n' to multicast maddr and port use 'localaddr=addr' to specify the host address to send packets from -net vde[,vlan=n][,name=str][,sock=socketpath][,port=n][,group=groupname][,mode=octalmode] connect the vlan 'n' to port 'n' of a vde switch running on host and listening for incoming connections on 'socketpath'. Use group 'groupname' and mode 'octalmode' to change default ownership and permissions for communication port. -net dump[,vlan=n][,file=f][,len=n] dump traffic on vlan 'n' to file 'f' (max n bytes per packet) -net none use it alone to have zero network devices. If no -net option is provided, the default is '-net nic -net user' -netdev [user|tap|vde|socket],id=str[,option][,option][,...] Character device options: -chardev null,id=id[,mux=on|off] -chardev socket,id=id[,host=host],port=host[,to=to][,ipv4][,ipv6][,nodelay] [,server][,nowait][,telnet][,mux=on|off] (tcp) -chardev socket,id=id,path=path[,server][,nowait][,telnet],[mux=on|off] (unix) -chardev udp,id=id[,host=host],port=port[,localaddr=localaddr] [,localport=localport][,ipv4][,ipv6][,mux=on|off] -chardev msmouse,id=id[,mux=on|off] -chardev vc,id=id[[,width=width][,height=height]][[,cols=cols][,rows=rows]] [,mux=on|off] -chardev file,id=id,path=path[,mux=on|off] -chardev pipe,id=id,path=path[,mux=on|off] -chardev pty,id=id[,mux=on|off] -chardev stdio,id=id[,mux=on|off][,signal=on|off] -chardev braille,id=id[,mux=on|off] -chardev tty,id=id,path=path[,mux=on|off] -chardev parport,id=id,path=path[,mux=on|off] -chardev spicevmc,id=id,name=name[,debug=debug] Bluetooth(R) options: -bt hci,null dumb bluetooth HCI - doesn't respond to commands -bt hci,host[:id] use host's HCI with the given name -bt hci[,vlan=n] emulate a standard HCI in virtual scatternet 'n' -bt vhci[,vlan=n] add host computer to virtual scatternet 'n' using VHCI -bt device:dev[,vlan=n] emulate a bluetooth device 'dev' in scatternet 'n' Linux/Multiboot boot specific: -kernel bzImage use 'bzImage' as kernel image -append cmdline use 'cmdline' as kernel command line -initrd file use 'file' as initial ram disk Debug/Expert options: -serial dev redirect the serial port to char device 'dev' -parallel dev redirect the parallel port to char device 'dev' -monitor dev redirect the monitor to char device 'dev' -qmp dev like -monitor but opens in 'control' mode -mon chardev=[name][,mode=readline|control][,default] -debugcon dev redirect the debug console to char device 'dev' -pidfile file write PID to 'file' -singlestep always run in singlestep mode -S freeze CPU at startup (use 'c' to start execution) -gdb dev wait for gdb connection on 'dev' -s shorthand for -gdb tcp::1234 -d item1,... output log to /tmp/qemu.log (use -d ? for a list of log items) -D logfile output log to logfile (instead of the default /tmp/qemu.log) -hdachs c,h,s[,t] force hard disk 0 physical geometry and the optional BIOS translation (t=none or lba) (usually qemu can guess them) -L path set the directory for the BIOS, VGA BIOS and keymaps -bios file set the filename for the BIOS -enable-kvm enable KVM full virtualization support -xen-domid id specify xen guest domain id -xen-create create domain using xen hypercalls, bypassing xend warning: should not be used when xend is in use -xen-attach attach to existing xen domain xend will use this when starting qemu -no-reboot exit instead of rebooting -no-shutdown stop before shutdown -loadvm [tag|id] start right away with a saved state (loadvm in monitor) -daemonize daemonize QEMU after initializing -option-rom rom load a file, rom, into the option ROM space -clock force the use of the given methods for timer alarm. To see what timers are available use -clock ? -rtc [base=utc|localtime|date][,clock=host|vm][,driftfix=none|slew] set the RTC base and clock, enable drift fix for clock ticks (x86 only) -icount [N|auto] enable virtual instruction counter with 2^N clock ticks per instruction -watchdog i6300esb|ib700 enable virtual hardware watchdog [default=none] -watchdog-action reset|shutdown|poweroff|pause|debug|none action when watchdog fires [default=reset] -echr chr set terminal escape character instead of ctrl-a -virtioconsole c set virtio console -show-cursor show cursor -tb-size n set TB size -incoming p prepare for incoming migration, listen on port p -nodefaults don't create default devices -chroot dir chroot to dir just before starting the VM -runas user change to user id user just before starting the VM -prom-env variable=value set OpenBIOS nvram variables -semihosting semihosting mode -old-param old param mode -readconfig -writeconfig read/write config file -nodefconfig do not load default config files at startup -trace [events=][,file=] specify tracing options During emulation, the following keys are useful: ctrl-alt-f toggle full screen ctrl-alt-n switch to virtual console 'n' ctrl-alt toggle mouse and keyboard grab When using -nographic, press 'ctrl-a h' to get some help. ganeti-2.9.3/test/data/proc_drbd80-emptyline.txt0000644000000000000000000000101612267470014021536 0ustar00rootroot00000000000000version: 8.0.12 (api:86/proto:86) GIT-hash: 5c9f89594553e32adb87d9638dce591782f947e3 build by root@node1.example.com, 2009-05-22 12:47:52 0: cs:Connected st:Primary/Secondary ds:UpToDate/UpToDate C r--- ns:78728316 nr:0 dw:77675644 dr:1277039 al:254 bm:270 lo:0 pe:0 ua:0 ap:0 resync: used:0/61 hits:65657 misses:135 starving:0 dirty:0 changed:135 act_log: used:0/257 hits:11378843 misses:254 starving:0 dirty:0 changed:254 1: cs:Unconfigured 2: cs:Unconfigured 5: cs:Unconfigured 6: cs:Unconfigured ganeti-2.9.3/test/data/cluster_config_2.9.json0000644000000000000000000004652112267470014021166 0ustar00rootroot00000000000000{ "cluster": { "beparams": { "default": { "always_failover": false, "auto_balance": true, "maxmem": 128, "minmem": 128, "spindle_use": 1, "vcpus": 1 } }, "blacklisted_os": [], "candidate_pool_size": 10, "cluster_name": "cluster.name.example.com", "ctime": 1343869045.604884, "default_iallocator": "hail", "disk_state_static": {}, "diskparams": { "blockdev": {}, "diskless": {}, "drbd": { "c-delay-target": 1, "c-fill-target": 200, "c-max-rate": 2048, "c-min-rate": 1024, "c-plan-ahead": 1, "data-stripes": 2, "disk-barriers": "bf", "disk-custom": "", "dynamic-resync": false, "meta-barriers": true, "meta-stripes": 2, "metavg": "xenvg", "net-custom": "", "resync-rate": 1024 }, "ext": {}, "file": {}, "plain": { "stripes": 2 }, "rbd": { "pool": "rbd" }, "sharedfile": {} }, "drbd_usermode_helper": "/bin/true", "enabled_disk_templates": [ "drbd", "plain", "file", "sharedfile" ], "enabled_hypervisors": [ "xen-pvm" ], "file_storage_dir": "", "hidden_os": [], "highest_used_port": 32105, "hv_state_static": { "xen-pvm": { "cpu_node": 1, "cpu_total": 1, "mem_hv": 0, "mem_node": 0, "mem_total": 0 } }, "hvparams": { "chroot": { "init_script": "/ganeti-chroot" }, "fake": {}, "kvm": { "acpi": true, "boot_order": "disk", "cdrom2_image_path": "", "cdrom_disk_type": "", "cdrom_image_path": "", "cpu_cores": 0, "cpu_mask": "all", "cpu_sockets": 0, "cpu_threads": 0, "cpu_type": "", "disk_cache": "default", "disk_type": "paravirtual", "floppy_image_path": "", "initrd_path": "", "kernel_args": "ro", "kernel_path": "/boot/vmlinuz-kvmU", "keymap": "", "kvm_extra": "", "kvm_flag": "", "kvm_path": "/usr/bin/kvm", "machine_version": "", "mem_path": "", "migration_bandwidth": 4, "migration_downtime": 30, "migration_mode": "live", "migration_port": 4041, "nic_type": "paravirtual", "reboot_behavior": "reboot", "root_path": "/dev/vda1", "security_domain": "", "security_model": "none", "serial_console": true, "serial_speed": 38400, "soundhw": "", "spice_bind": "", "spice_image_compression": "", "spice_ip_version": 0, "spice_jpeg_wan_compression": "", "spice_password_file": "", "spice_playback_compression": true, "spice_streaming_video": "", "spice_tls_ciphers": "HIGH:-DES:-3DES:-EXPORT:-ADH", "spice_use_tls": false, "spice_use_vdagent": true, "spice_zlib_glz_wan_compression": "", "usb_devices": "", "usb_mouse": "", "use_chroot": false, "use_localtime": false, "vga": "", "vhost_net": false, "vnc_bind_address": "", "vnc_password_file": "", "vnc_tls": false, "vnc_x509_path": "", "vnc_x509_verify": false }, "lxc": { "cpu_mask": "" }, "xen-hvm": { "acpi": true, "blockdev_prefix": "hd", "boot_order": "cd", "cdrom_image_path": "", "cpu_cap": 0, "cpu_mask": "all", "cpu_weight": 256, "device_model": "/usr/lib/xen/bin/qemu-dm", "disk_type": "paravirtual", "kernel_path": "/usr/lib/xen/boot/hvmloader", "migration_mode": "non-live", "migration_port": 8082, "nic_type": "rtl8139", "pae": true, "pci_pass": "", "reboot_behavior": "reboot", "use_localtime": false, "vif_script": "", "vnc_bind_address": "0.0.0.0", "vnc_password_file": "/your/vnc-cluster-password", "xen_cmd": "xm" }, "xen-pvm": { "blockdev_prefix": "sd", "bootloader_args": "", "bootloader_path": "", "cpu_cap": 0, "cpu_mask": "all", "cpu_weight": 256, "initrd_path": "", "kernel_args": "ro", "kernel_path": "/boot/vmlinuz-xenU", "migration_mode": "live", "migration_port": 8082, "reboot_behavior": "reboot", "root_path": "/dev/xvda1", "use_bootloader": false, "vif_script": "", "xen_cmd": "xm" } }, "ipolicy": { "disk-templates": [ "sharedfile", "diskless", "plain", "blockdev", "drbd", "file", "rbd" ], "minmax": [ { "max": { "cpu-count": 8, "disk-count": 16, "disk-size": 1048576, "memory-size": 32768, "nic-count": 8, "spindle-use": 12 }, "min": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 } } ], "spindle-ratio": 32.0, "std": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 }, "vcpu-ratio": 1.0 }, "mac_prefix": "aa:bb:cc", "maintain_node_health": false, "master_ip": "192.0.2.87", "master_netdev": "eth0", "master_netmask": 32, "master_node": "9a12d554-75c0-4cb1-8064-103365145db0", "modify_etc_hosts": true, "modify_ssh_setup": true, "mtime": 1361964122.79471, "ndparams": { "exclusive_storage": false, "oob_program": "", "spindle_count": 1 }, "nicparams": { "default": { "link": "br974", "mode": "bridged" } }, "os_hvp": { "TEMP-Ganeti-QA-OS": { "xen-hvm": { "acpi": false, "pae": true }, "xen-pvm": { "root_path": "/dev/sda5" } } }, "osparams": {}, "prealloc_wipe_disks": false, "primary_ip_family": 2, "reserved_lvs": [], "rsahostkeypub": "YOURKEY", "serial_no": 3189, "shared_file_storage_dir": "/srv/ganeti/shared-file-storage", "tags": [ "mytag" ], "tcpudp_port_pool": [ 32101, 32102, 32103, 32104, 32105 ], "uid_pool": [], "use_external_mip_script": false, "uuid": "dddf8c12-f2d8-4718-a35b-7804daf12a3f", "volume_group_name": "xenvg" }, "ctime": 1343869045.605523, "instances": { "4e091bdc-e205-4ed7-8a47-0c9130a6619f": { "admin_state": "up", "beparams": {}, "ctime": 1354038435.343601, "disk_template": "plain", "disks": [ { "dev_type": "plain", "iv_name": "disk/0", "logical_id": [ "xenvg", "b27a576a-13f7-4f07-885c-63fcad4fdfcc.disk0" ], "mode": "rw", "params": {}, "physical_id": [ "xenvg", "b27a576a-13f7-4f07-885c-63fcad4fdfcc.disk0" ], "size": 1280, "uuid": "150bd154-8e23-44d1-b762-5065ae5a507b" } ], "hvparams": {}, "hypervisor": "xen-pvm", "mtime": 1354224585.700732, "name": "instance3.example.com", "nics": [ { "mac": "aa:bb:cc:5e:5c:75", "nicparams": {}, "uuid": "1ab090c1-e017-406c-afb4-fc285cb43e31" } ], "os": "debian-image", "osparams": {}, "primary_node": "2ae3d962-2dad-44f2-bdb1-85f77107f907", "serial_no": 4, "tags": [], "uuid": "4e091bdc-e205-4ed7-8a47-0c9130a6619f" }, "6c078d22-3eb6-4780-857d-81772e09eef1": { "admin_state": "up", "beparams": {}, "ctime": 1363620258.608976, "disk_template": "drbd", "disks": [ { "children": [ { "dev_type": "plain", "logical_id": [ "xenvg", "5c390722-6a7a-4bb4-9cef-98d896a8e6b1.disk0_data" ], "params": {}, "physical_id": [ "xenvg", "5c390722-6a7a-4bb4-9cef-98d896a8e6b1.disk0_data" ], "size": 1024 }, { "dev_type": "plain", "logical_id": [ "xenvg", "5c390722-6a7a-4bb4-9cef-98d896a8e6b1.disk0_meta" ], "params": {}, "physical_id": [ "xenvg", "5c390722-6a7a-4bb4-9cef-98d896a8e6b1.disk0_meta" ], "size": 128 } ], "dev_type": "drbd", "iv_name": "disk/0", "logical_id": [ "9a12d554-75c0-4cb1-8064-103365145db0", "41f9c238-173c-4120-9e41-04ad379b647a", 32100, 0, 0, "d3c3fd475fcbaf5fd177fb245ac43b71247ada38" ], "mode": "rw", "params": {}, "physical_id": [ "198.51.100.82", 32100, "198.51.100.84", 32100, 0, "d3c3fd475fcbaf5fd177fb245ac43b71247ada38" ], "size": 1024, "uuid": "77ced3a5-6756-49ae-8d1f-274e27664c05" } ], "hvparams": {}, "hypervisor": "xen-pvm", "mtime": 1363620320.874901, "name": "instance1.example.com", "nics": [ { "mac": "aa:bb:cc:b2:6e:0b", "nicparams": {}, "uuid": "2c953d72-fac4-4aa9-a225-4131bb271791" } ], "os": "busybox", "osparams": {}, "primary_node": "9a12d554-75c0-4cb1-8064-103365145db0", "serial_no": 2, "uuid": "6c078d22-3eb6-4780-857d-81772e09eef1" }, "8fde9f6d-e1f1-4850-9e9c-154966f622f5": { "admin_state": "up", "beparams": {}, "ctime": 1355186880.451181, "disk_template": "plain", "disks": [ { "dev_type": "plain", "iv_name": "disk/0", "logical_id": [ "xenvg", "3e559cd7-1024-4294-a923-a9fd13182b2f.disk0" ], "mode": "rw", "params": {}, "physical_id": [ "xenvg", "3e559cd7-1024-4294-a923-a9fd13182b2f.disk0" ], "size": 102400, "uuid": "79acf611-be58-4334-9fe4-4f2b73ae8abb" } ], "hvparams": {}, "hypervisor": "xen-pvm", "mtime": 1355186898.307642, "name": "instance2.example.com", "nics": [ { "mac": "aa:bb:cc:56:83:fb", "nicparams": {}, "uuid": "1cf95562-e676-4fd0-8214-e8b84a2f7bd1" } ], "os": "debian-image", "osparams": {}, "primary_node": "41f9c238-173c-4120-9e41-04ad379b647a", "serial_no": 2, "tags": [], "uuid": "8fde9f6d-e1f1-4850-9e9c-154966f622f5" } }, "mtime": 1367352404.758083, "networks": { "99f0128a-1c84-44da-90b9-9581ea00c075": { "ext_reservations": "1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001", "name": "a network", "network": "203.0.113.0/24", "reservations": "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", "serial_no": 1, "uuid": "99f0128a-1c84-44da-90b9-9581ea00c075" } }, "nodegroups": { "5244a46d-7506-4e14-922d-02b58153dde1": { "alloc_policy": "preferred", "diskparams": {}, "ipolicy": {}, "mtime": 1361963775.575009, "name": "default", "ndparams": {}, "networks": {}, "serial_no": 125, "tags": [], "uuid": "5244a46d-7506-4e14-922d-02b58153dde1" }, "6c0a8916-b719-45ad-95dd-82192b1e473f": { "alloc_policy": "preferred", "diskparams": {}, "ipolicy": { "disk-templates": [ "plain" ], "minmax": [ { "max": { "cpu-count": 8, "disk-count": 16, "disk-size": 1048576, "memory-size": 32768, "nic-count": 18, "spindle-use": 14 }, "min": { "cpu-count": 2, "disk-count": 2, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 } } ], "spindle-ratio": 5.2, "vcpu-ratio": 3.14 }, "mtime": 1361963775.575009, "name": "another", "ndparams": { "exclusive_storage": true }, "networks": {}, "serial_no": 125, "tags": [], "uuid": "6c0a8916-b719-45ad-95dd-82192b1e473f" } }, "nodes": { "2ae3d962-2dad-44f2-bdb1-85f77107f907": { "ctime": 1343869045.604884, "drained": false, "group": "5244a46d-7506-4e14-922d-02b58153dde1", "master_candidate": true, "master_capable": true, "mtime": 1358348755.779906, "name": "node2.example.com", "ndparams": {}, "offline": false, "powered": true, "primary_ip": "192.0.2.83", "secondary_ip": "198.51.100.83", "serial_no": 6, "tags": [], "uuid": "2ae3d962-2dad-44f2-bdb1-85f77107f907", "vm_capable": true }, "41f9c238-173c-4120-9e41-04ad379b647a": { "ctime": 1343869205.934807, "drained": false, "group": "5244a46d-7506-4e14-922d-02b58153dde1", "master_candidate": true, "master_capable": true, "mtime": 1353019704.885368, "name": "node3.example.com", "ndparams": {}, "offline": false, "powered": true, "primary_ip": "192.0.2.84", "secondary_ip": "198.51.100.84", "serial_no": 2, "tags": [], "uuid": "41f9c238-173c-4120-9e41-04ad379b647a", "vm_capable": true }, "9a12d554-75c0-4cb1-8064-103365145db0": { "ctime": 1349722460.022264, "drained": false, "group": "5244a46d-7506-4e14-922d-02b58153dde1", "master_candidate": true, "master_capable": true, "mtime": 1359986533.353329, "name": "node1.example.com", "ndparams": {}, "offline": false, "powered": true, "primary_ip": "192.0.2.82", "secondary_ip": "198.51.100.82", "serial_no": 197, "tags": [], "uuid": "9a12d554-75c0-4cb1-8064-103365145db0", "vm_capable": true } }, "serial_no": 7625, "version": 2090000 } ganeti-2.9.3/test/data/kvm_0.12.5_help.txt0000644000000000000000000002607512230001635020035 0ustar00rootroot00000000000000QEMU PC emulator version 0.12.5 (qemu-kvm-0.12.5), Copyright (c) 2003-2008 Fabrice Bellard usage: qemu [options] [disk_image] 'disk_image' is a raw hard image image for IDE hard disk 0 Standard options: -h or -help display this help and exit -version display version information and exit -M machine select emulated machine (-M ? for list) -cpu cpu select CPU (-cpu ? for list) -smp n[,maxcpus=cpus][,cores=cores][,threads=threads][,sockets=sockets] set the number of CPUs to 'n' [default=1] maxcpus= maximum number of total cpus, including offline CPUs for hotplug etc. cores= number of CPU cores on one socket threads= number of threads on one CPU core sockets= number of discrete sockets in the system -numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node] -fda/-fdb file use 'file' as floppy disk 0/1 image -hda/-hdb file use 'file' as IDE hard disk 0/1 image -hdc/-hdd file use 'file' as IDE hard disk 2/3 image -cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master) -drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i] [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off] [,cache=writethrough|writeback|none][,format=f][,serial=s] [,addr=A][,id=name][,aio=threads|native] [,boot=on|off] use 'file' as a drive image -set group.id.arg=value set parameter for item of type i.e. -set drive.$id.file=/path/to/image -global driver.property=value set a global default for a driver property -mtdblock file use 'file' as on-board Flash memory image -sd file use 'file' as SecureDigital card image -pflash file use 'file' as a parallel flash image -boot [order=drives][,once=drives][,menu=on|off] 'drives': floppy (a), hard disk (c), CD-ROM (d), network (n) -snapshot write to temporary files instead of disk image files -m megs set virtual RAM size to megs MB [default=128] -k language use keyboard layout (for example 'fr' for French) -audio-help print list of audio drivers and their options -soundhw c1,... enable audio support and only specified sound cards (comma separated list) use -soundhw ? to get the list of supported cards use -soundhw all to enable all of them -usb enable the USB driver (will be the default soon) -usbdevice name add the host or guest USB device 'name' -device driver[,options] add device -name string1[,process=string2] set the name of the guest string1 sets the window title and string2 the process name (on Linux) -uuid %08x-%04x-%04x-%04x-%012x specify machine UUID Display options: -nographic disable graphical output and redirect serial I/Os to console -curses use a curses/ncurses interface instead of SDL -no-frame open SDL window without a frame and window decorations -alt-grab use Ctrl-Alt-Shift to grab mouse (instead of Ctrl-Alt) -ctrl-grab use Right-Ctrl to grab mouse (instead of Ctrl-Alt) -no-quit disable SDL window close capability -sdl enable SDL -portrait rotate graphical output 90 deg left (only PXA LCD) -vga [std|cirrus|vmware|xenfb|none] select video card type -full-screen start in full screen -vnc display start a VNC server on display i386 target only: -win2k-hack use it when installing Windows 2000 to avoid a disk full bug -no-fd-bootchk disable boot signature checking for floppy disks -no-acpi disable ACPI -no-hpet disable HPET -balloon none disable balloon device -balloon virtio[,addr=str] enable virtio balloon device (default) -acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,data=file1[:file2]...] ACPI table description -smbios file=binary Load SMBIOS entry from binary file -smbios type=0[,vendor=str][,version=str][,date=str][,release=%d.%d] Specify SMBIOS type 0 fields -smbios type=1[,manufacturer=str][,product=str][,version=str][,serial=str] [,uuid=uuid][,sku=str][,family=str] Specify SMBIOS type 1 fields Network options: -net nic[,vlan=n][,macaddr=mac][,model=type][,name=str][,addr=str][,vectors=v] create a new Network Interface Card and connect it to VLAN 'n' -net user[,vlan=n][,name=str][,net=addr[/mask]][,host=addr][,restrict=y|n] [,hostname=host][,dhcpstart=addr][,dns=addr][,tftp=dir][,bootfile=f] [,hostfwd=rule][,guestfwd=rule][,smb=dir[,smbserver=addr]] connect the user mode network stack to VLAN 'n', configure its DHCP server and enabled optional services -net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off] connect the host TAP network interface to VLAN 'n' and use the network scripts 'file' (default=/etc/kvm/kvm-ifup) and 'dfile' (default=/etc/kvm/kvm-ifdown); use '[down]script=no' to disable script execution; use 'fd=h' to connect to an already opened TAP interface use 'sndbuf=nbytes' to limit the size of the send buffer; the default of 'sndbuf=1048576' can be disabled using 'sndbuf=0' use vnet_hdr=off to avoid enabling the IFF_VNET_HDR tap flag; use vnet_hdr=on to make the lack of IFF_VNET_HDR support an error condition -net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port] connect the vlan 'n' to another VLAN using a socket connection -net socket[,vlan=n][,name=str][,fd=h][,mcast=maddr:port] connect the vlan 'n' to multicast maddr and port -net vde[,vlan=n][,name=str][,sock=socketpath][,port=n][,group=groupname][,mode=octalmode] connect the vlan 'n' to port 'n' of a vde switch running on host and listening for incoming connections on 'socketpath'. Use group 'groupname' and mode 'octalmode' to change default ownership and permissions for communication port. -net dump[,vlan=n][,file=f][,len=n] dump traffic on vlan 'n' to file 'f' (max n bytes per packet) -net none use it alone to have zero network devices; if no -net option is provided, the default is '-net nic -net user' -netdev [user|tap|vde|socket],id=str[,option][,option][,...] Character device options: -chardev null,id=id -chardev socket,id=id[,host=host],port=host[,to=to][,ipv4][,ipv6][,nodelay] [,server][,nowait][,telnet] (tcp) -chardev socket,id=id,path=path[,server][,nowait][,telnet] (unix) -chardev udp,id=id[,host=host],port=port[,localaddr=localaddr] [,localport=localport][,ipv4][,ipv6] -chardev msmouse,id=id -chardev vc,id=id[[,width=width][,height=height]][[,cols=cols][,rows=rows]] -chardev file,id=id,path=path -chardev pipe,id=id,path=path -chardev pty,id=id -chardev stdio,id=id,[,signal=on|off] -chardev braille,id=id -chardev tty,id=id,path=path -chardev parport,id=id,path=path Bluetooth(R) options: -bt hci,null dumb bluetooth HCI - doesn't respond to commands -bt hci,host[:id] use host's HCI with the given name -bt hci[,vlan=n] emulate a standard HCI in virtual scatternet 'n' -bt vhci[,vlan=n] add host computer to virtual scatternet 'n' using VHCI -bt device:dev[,vlan=n] emulate a bluetooth device 'dev' in scatternet 'n' Linux/Multiboot boot specific: -kernel bzImage use 'bzImage' as kernel image -append cmdline use 'cmdline' as kernel command line -initrd file use 'file' as initial ram disk Debug/Expert options: -serial dev redirect the serial port to char device 'dev' -parallel dev redirect the parallel port to char device 'dev' -monitor dev redirect the monitor to char device 'dev' -qmp dev like -monitor but opens in 'control' mode. -mon chardev=[name][,mode=readline|control][,default] -pidfile file write PID to 'file' -singlestep always run in singlestep mode -S freeze CPU at startup (use 'c' to start execution) -gdb dev wait for gdb connection on 'dev' -s shorthand for -gdb tcp::1234 -d item1,... output log to /tmp/qemu.log (use -d ? for a list of log items) -hdachs c,h,s[,t] force hard disk 0 physical geometry and the optional BIOS translation (t=none or lba) (usually qemu can guess them) -L path set the directory for the BIOS, VGA BIOS and keymaps -bios file set the filename for the BIOS -enable-kvm enable KVM full virtualization support -no-reboot exit instead of rebooting -no-shutdown stop before shutdown -loadvm [tag|id] start right away with a saved state (loadvm in monitor) -daemonize daemonize QEMU after initializing -option-rom rom load a file, rom, into the option ROM space -clock force the use of the given methods for timer alarm. To see what timers are available use -clock ? -rtc [base=utc|localtime|date][,clock=host|vm][,driftfix=none|slew] set the RTC base and clock, enable drift fix for clock ticks -icount [N|auto] enable virtual instruction counter with 2^N clock ticks per instruction -watchdog i6300esb|ib700 enable virtual hardware watchdog [default=none] -watchdog-action reset|shutdown|poweroff|pause|debug|none action when watchdog fires [default=reset] -echr chr set terminal escape character instead of ctrl-a -virtioconsole c set virtio console -show-cursor show cursor -tb-size n set TB size -incoming uri wait on uri for incoming migration -nodefaults don't create default devices. -chroot dir Chroot to dir just before starting the VM. -runas user Change to user id user just before starting the VM. -readconfig -writeconfig read/write config file -no-kvm disable KVM hardware virtualization -no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC -no-kvm-pit disable KVM kernel mode PIT -no-kvm-pit-reinjection disable KVM kernel mode PIT interrupt reinjection -pcidevice host=bus:dev.func[,dma=none][,name=string] expose a PCI device to the guest OS. dma=none: don't perform any dma translations (default is to use an iommu) 'string' is used in log output. -enable-nesting enable support for running a VM inside the VM (AMD only) -nvram FILE provide ia64 nvram contents -tdf enable guest time drift compensation -kvm-shadow-memory MEGABYTES allocate MEGABYTES for kvm mmu shadowing -mem-path FILE provide backing storage for guest RAM -mem-prealloc preallocate guest memory (use with -mempath) During emulation, the following keys are useful: ctrl-alt-f toggle full screen ctrl-alt-n switch to virtual console 'n' ctrl-alt toggle mouse and keyboard grab When using -nographic, press 'ctrl-a h' to get some help. ganeti-2.9.3/test/data/proc_drbd83_sync_want.txt0000644000000000000000000000064712244641676021644 0ustar00rootroot00000000000000version: 8.3.11 (api:88/proto:86-96) srcversion: 2D876214BAAD53B31ADC1D6 0: cs:SyncTarget ro:Secondary/Primary ds:Inconsistent/UpToDate C r----- ns:0 nr:460288 dw:460160 dr:0 al:0 bm:28 lo:2 pe:4 ua:1 ap:0 ep:1 wo:f oos:588416 [=======>............] sync'ed: 44.4% (588416/1048576)K finish: 0:00:08 speed: 65,736 (65,736) want: 61,440 K/sec 1: cs:Unconfigured 2: cs:Unconfigured 3: cs:Unconfigured ganeti-2.9.3/test/data/xen-xm-list-4.0.1-dom0-only.txt0000644000000000000000000000023712244641676022100 0ustar00rootroot00000000000000Name ID Mem VCPUs State Time(s) Domain-0 0 1023 1 r----- 121152.6 ganeti-2.9.3/test/data/vgreduce-removemissing-2.02.66-fail.txt0000644000000000000000000000640612230001635023634 0ustar00rootroot00000000000000 Couldn't find device with uuid bHRa26-svpL-ihJX-e0S4-2HNz-wAAi-AlBFtl. WARNING: Partial LV 4ba7abfa-8459-43b6-b00f-c016244980f0.disk0 needs to be repaired or removed. WARNING: Partial LV e972960d-4e35-46b2-9cda-7029916b28c1.disk0_data needs to be repaired or removed. WARNING: Partial LV e972960d-4e35-46b2-9cda-7029916b28c1.disk0_meta needs to be repaired or removed. WARNING: Partial LV 4fa40b51-dd4d-4fd9-aef1-35cc3a0f1f11.disk0_data needs to be repaired or removed. WARNING: Partial LV 4fa40b51-dd4d-4fd9-aef1-35cc3a0f1f11.disk0_meta needs to be repaired or removed. WARNING: Partial LV 0a184b34-1270-4f1a-94df-86da2167cfee.disk0_data needs to be repaired or removed. WARNING: Partial LV 0a184b34-1270-4f1a-94df-86da2167cfee.disk0_meta needs to be repaired or removed. WARNING: Partial LV 7e49c8a9-9c65-4e76-810e-bd3d7a1d97a9.disk0_data needs to be repaired or removed. WARNING: Partial LV 7e49c8a9-9c65-4e76-810e-bd3d7a1d97a9.disk0_meta needs to be repaired or removed. WARNING: Partial LV 290a3fd4-c035-4fbe-9a18-f5a0889bd45d.disk0_data needs to be repaired or removed. WARNING: Partial LV 290a3fd4-c035-4fbe-9a18-f5a0889bd45d.disk0_meta needs to be repaired or removed. WARNING: Partial LV c579be32-c041-4f1b-ae3e-c58aac9c2593.disk0_data needs to be repaired or removed. WARNING: Partial LV c579be32-c041-4f1b-ae3e-c58aac9c2593.disk0_meta needs to be repaired or removed. WARNING: Partial LV 47524563-3788-4a89-a61f-4274134dea73.disk0_data needs to be repaired or removed. WARNING: Partial LV 47524563-3788-4a89-a61f-4274134dea73.disk0_meta needs to be repaired or removed. WARNING: Partial LV ede9f706-a0dc-4202-96f2-1728240bbf05.disk0_data needs to be repaired or removed. WARNING: Partial LV ede9f706-a0dc-4202-96f2-1728240bbf05.disk0_meta needs to be repaired or removed. WARNING: Partial LV 731d9f1b-3f2f-4860-85b3-217a36b9c48e.disk1_data needs to be repaired or removed. WARNING: Partial LV 731d9f1b-3f2f-4860-85b3-217a36b9c48e.disk1_meta needs to be repaired or removed. WARNING: Partial LV f449ccfd-4e6b-42d6-9a52-838371988ab5.disk0_data needs to be repaired or removed. WARNING: Partial LV f449ccfd-4e6b-42d6-9a52-838371988ab5.disk0_meta needs to be repaired or removed. WARNING: Partial LV 69bb4f61-fd0c-4c89-a57f-5285ae99b3bd.disk0_data needs to be repaired or removed. WARNING: Partial LV 9c29c24a-97ed-4fc7-b479-7a3385365a71.disk0 needs to be repaired or removed. WARNING: Partial LV a919d93e-0f51-4e4d-9018-e25ee7d5b36b.disk0 needs to be repaired or removed. WARNING: Partial LV d2501e6b-56a4-43b6-8856-471e5d49e892.disk0_data needs to be repaired or removed. WARNING: Partial LV d2501e6b-56a4-43b6-8856-471e5d49e892.disk0_meta needs to be repaired or removed. WARNING: Partial LV 31a1f85a-ecc8-40c0-88aa-e694626906a3.disk0 needs to be repaired or removed. WARNING: Partial LV d124d70a-4776-4e00-bf0d-43511c29c534.disk0_data needs to be repaired or removed. WARNING: Partial LV d124d70a-4776-4e00-bf0d-43511c29c534.disk0_meta needs to be repaired or removed. WARNING: Partial LV f73b4499-34ec-4f70-a543-e43152a8644a.disk0 needs to be repaired or removed. WARNING: There are still partial LVs in VG xenvg. To remove them unconditionally use: vgreduce --removemissing --force. Proceeding to remove empty missing PVs. ganeti-2.9.3/test/data/xen-xm-uptime-4.0.1.txt0000644000000000000000000000024112244641676020607 0ustar00rootroot00000000000000Name ID Uptime Domain-0 0 98 days, 2:27:44 instance1.example.com 119 15 days, 20:57:07 ganeti-2.9.3/test/data/ip-addr-show-lo-oneline.txt0000644000000000000000000000037112230001635021751 0ustar00rootroot000000000000001: lo: mtu 16436 qdisc noqueue state UNKNOWN \ link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 1: lo inet 127.0.0.1/8 scope host lo 1: lo inet6 ::1/128 scope host \ valid_lft forever preferred_lft forever ganeti-2.9.3/test/data/proc_drbd8.txt0000644000000000000000000000357612230001635017454 0ustar00rootroot00000000000000version: 8.0.12 (api:86/proto:86) GIT-hash: 5c9f89594553e32adb87d9638dce591782f947e3 build by XXX 0: cs:Connected st:Primary/Secondary ds:UpToDate/UpToDate C r--- ns:4375577 nr:0 dw:4446279 dr:674 al:1067 bm:69 lo:0 pe:0 ua:0 ap:0 resync: used:0/61 hits:0 misses:0 starving:0 dirty:0 changed:0 act_log: used:0/257 hits:793749 misses:1067 starving:0 dirty:0 changed:1067 1: cs:Connected st:Secondary/Primary ds:UpToDate/UpToDate C r--- ns:738320 nr:0 dw:738320 dr:554400 al:67 bm:0 lo:0 pe:0 ua:0 ap:0 resync: used:0/61 hits:0 misses:0 starving:0 dirty:0 changed:0 act_log: used:0/257 hits:92464 misses:67 starving:0 dirty:0 changed:67 2: cs:Unconfigured 4: cs:WFConnection st:Primary/Unknown ds:UpToDate/DUnknown C r--- ns:738320 nr:0 dw:738320 dr:554400 al:67 bm:0 lo:0 pe:0 ua:0 ap:0 resync: used:0/61 hits:0 misses:0 starving:0 dirty:0 changed:0 act_log: used:0/257 hits:92464 misses:67 starving:0 dirty:0 changed:67 5: cs:Connected st:Primary/Secondary ds:UpToDate/Diskless C r--- ns:4375581 nr:0 dw:4446283 dr:674 al:1069 bm:69 lo:0 pe:0 ua:0 ap:0 resync: used:0/61 hits:0 misses:0 starving:0 dirty:0 changed:0 act_log: used:0/257 hits:793750 misses:1069 starving:0 dirty:0 changed:1069 6: cs:Connected st:Secondary/Primary ds:Diskless/UpToDate C r--- ns:0 nr:4375581 dw:5186925 dr:327 al:75 bm:214 lo:0 pe:0 ua:0 ap:0 7: cs:WFConnection st:Secondary/Unknown ds:UpToDate/DUnknown C r--- ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 resync: used:0/61 hits:0 misses:0 starving:0 dirty:0 changed:0 act_log: used:0/257 hits:0 misses:0 starving:0 dirty:0 changed:0 8: cs:StandAlone st:Secondary/Unknown ds:UpToDate/DUnknown r--- ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 resync: used:0/61 hits:0 misses:0 starving:0 dirty:0 changed:0 act_log: used:0/257 hits:0 misses:0 starving:0 dirty:0 changed:0 ganeti-2.9.3/test/data/lvs_lv.txt0000644000000000000000000000100012267470014016722 0ustar00rootroot00000000000000 nhasjL-cnZi-uqLS-WRLj-tkXI-nvCB-n0o2lj;df9ff3f6-a833-48ff-8bd5-bff2eaeab759.disk0_data;-wi-ao;-1;-1;253;0;1073741824B;1;originstname+instance1.example.com;;uZgXit-eiRr-vRqe-xpEo-e9nU-mTuR-9nfVIU;xenvg;linear;0B;0;1073741824B;;/dev/sda5:0-15;/dev/sda5(0) 5fW5mE-SBSs-GSU0-KZDg-hnwb-sZOC-zZt736;df9ff3f6-a833-48ff-8bd5-bff2eaeab759.disk0_meta;-wi-ao;-1;-1;253;1;134217728B;1;originstname+instance1.example.com;;uZgXit-eiRr-vRqe-xpEo-e9nU-mTuR-9nfVIU;xenvg;linear;0B;0;134217728B;;/dev/sda5:16-17;/dev/sda5(16) ganeti-2.9.3/test/data/bdev-rbd/0000755000000000000000000000000012271445545016360 5ustar00rootroot00000000000000ganeti-2.9.3/test/data/bdev-rbd/json_output_empty.txt0000644000000000000000000000000312244641676022724 0ustar00rootroot00000000000000{} ganeti-2.9.3/test/data/bdev-rbd/json_output_extra_matches.txt0000644000000000000000000000066612244641676024434 0ustar00rootroot00000000000000{"4":{"pool":"rbd","name":"d7ab910a-4933-4ffe-88d0-faf2ce31390a.rbd.disk0","snap":"-","device":"\/dev\/rbd4"},"1":{"pool":"rbd","name":"b9e31bb3-4d4f-4a2c-bc63-207a0bc4b287.rbd.disk0","snap":"-","device":"\/dev\/rbd1"},"2":{"pool":"rbd","name":"abe7957a-ec96-490f-9c08-53b1c51cecf0.rbd.disk0","snap":"-","device":"\/dev\/rbd2"},"3":{"pool":"rbd","name":"d7ab910a-4933-4ffe-88d0-faf2ce31390a.rbd.disk0","snap":"-","device":"\/dev\/rbd3"}} ganeti-2.9.3/test/data/bdev-rbd/plain_output_old_no_matches.txt0000644000000000000000000000023412244641676024704 0ustar00rootroot00000000000000id pool image snap device 1 rbd b9e31bb3-4d4f-4a2c-bc63-207a0bc4b287.rbd.disk0 - /dev/rbd1 2 rbd abe7957a-ec96-490f-9c08-53b1c51cecf0.rbd.disk0 - /dev/rbd2 ganeti-2.9.3/test/data/bdev-rbd/plain_output_new_extra_matches.txt0000644000000000000000000000054312244641676025431 0ustar00rootroot00000000000000id pool image snap device 4 rbd d7ab910a-4933-4ffe-88d0-faf2ce31390a.rbd.disk0 - /dev/rbd4 1 rbd b9e31bb3-4d4f-4a2c-bc63-207a0bc4b287.rbd.disk0 - /dev/rbd1 2 rbd abe7957a-ec96-490f-9c08-53b1c51cecf0.rbd.disk0 - /dev/rbd2 3 rbd d7ab910a-4933-4ffe-88d0-faf2ce31390a.rbd.disk0 - /dev/rbd3 ganeti-2.9.3/test/data/bdev-rbd/output_invalid.txt0000644000000000000000000000002412244641676022166 0ustar00rootroot00000000000000invalid rbd output ganeti-2.9.3/test/data/bdev-rbd/plain_output_old_extra_matches.txt0000644000000000000000000000043612244641676025417 0ustar00rootroot00000000000000id pool image snap device 4 rbd d7ab910a-4933-4ffe-88d0-faf2ce31390a.rbd.disk0 - /dev/rbd4 1 rbd b9e31bb3-4d4f-4a2c-bc63-207a0bc4b287.rbd.disk0 - /dev/rbd1 2 rbd abe7957a-ec96-490f-9c08-53b1c51cecf0.rbd.disk0 - /dev/rbd2 3 rbd d7ab910a-4933-4ffe-88d0-faf2ce31390a.rbd.disk0 - /dev/rbd3 ganeti-2.9.3/test/data/bdev-rbd/plain_output_new_no_matches.txt0000644000000000000000000000032512244641676024720 0ustar00rootroot00000000000000id pool image snap device 1 rbd b9e31bb3-4d4f-4a2c-bc63-207a0bc4b287.rbd.disk0 - /dev/rbd1 2 rbd abe7957a-ec96-490f-9c08-53b1c51cecf0.rbd.disk0 - /dev/rbd2 ganeti-2.9.3/test/data/bdev-rbd/plain_output_old_empty.txt0000644000000000000000000000003212244641676023716 0ustar00rootroot00000000000000id pool image snap device ganeti-2.9.3/test/data/bdev-rbd/plain_output_old_ok.txt0000644000000000000000000000033512244641676023177 0ustar00rootroot00000000000000id pool image snap device 1 rbd b9e31bb3-4d4f-4a2c-bc63-207a0bc4b287.rbd.disk0 - /dev/rbd1 2 rbd abe7957a-ec96-490f-9c08-53b1c51cecf0.rbd.disk0 - /dev/rbd2 3 rbd d7ab910a-4933-4ffe-88d0-faf2ce31390a.rbd.disk0 - /dev/rbd3 ganeti-2.9.3/test/data/bdev-rbd/json_output_ok.txt0000644000000000000000000000051112244641676022203 0ustar00rootroot00000000000000{"1":{"pool":"rbd","name":"b9e31bb3-4d4f-4a2c-bc63-207a0bc4b287.rbd.disk0","snap":"-","device":"\/dev\/rbd1"},"2":{"pool":"rbd","name":"abe7957a-ec96-490f-9c08-53b1c51cecf0.rbd.disk0","snap":"-","device":"\/dev\/rbd2"},"3":{"pool":"rbd","name":"d7ab910a-4933-4ffe-88d0-faf2ce31390a.rbd.disk0","snap":"-","device":"\/dev\/rbd3"}} ganeti-2.9.3/test/data/bdev-rbd/json_output_no_matches.txt0000644000000000000000000000033412244641676023715 0ustar00rootroot00000000000000{"1":{"pool":"rbd","name":"b9e31bb3-4d4f-4a2c-bc63-207a0bc4b287.rbd.disk0","snap":"-","device":"\/dev\/rbd1"},"2":{"pool":"rbd","name":"abe7957a-ec96-490f-9c08-53b1c51cecf0.rbd.disk0","snap":"-","device":"\/dev\/rbd2"}} ganeti-2.9.3/test/data/bdev-rbd/plain_output_new_ok.txt0000644000000000000000000000032512244641676023211 0ustar00rootroot000000000000001 rbd b9e31bb3-4d4f-4a2c-bc63-207a0bc4b287.rbd.disk0 - /dev/rbd1 2 rbd abe7957a-ec96-490f-9c08-53b1c51cecf0.rbd.disk0 - /dev/rbd2 3 rbd d7ab910a-4933-4ffe-88d0-faf2ce31390a.rbd.disk0 - /dev/rbd3 ganeti-2.9.3/test/data/NEWS_OK.txt0000644000000000000000000000175112244641676016610 0ustar00rootroot00000000000000News ==== Version 2.8.0 beta1 ------------------- *(unreleased)* Incompatible/important changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Stuff New features ~~~~~~~~~~~~ - More stuff Version 2.7.0 rc2 ----------------- *(Released Fri, 24 May 2013)* Incompatible/important changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Other stuff Since rc1: - Many bugfixes Version 2.7.0 rc1 ----------------- *(Released Fri, 3 May 2013)* - Things Version 2.7.0 beta1 ------------------- *(Released Wed, 6 Feb 2013)* This was the first beta release of the 2.7 series. All important changes are listed in the latest 2.7 entry. Version 2.6.2 ------------- *(Released Fri, 21 Dec 2012)* Hic sunt pink bunnies. Version 2.6.1 ------------- *(Released Fri, 12 Oct 2012)* Team members come, team members go. Version 2.6.0 ------------- *(Released Fri, 27 Jul 2012)* Many things happened before this point. .. vim: set textwidth=72 syntax=rst : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/test/data/instance-minor-pairing.txt0000644000000000000000000000014212244641676022011 0ustar00rootroot00000000000000[["machine1.example.com",0,"instance1.example.com","disk/0","secondary", "machine2.example.com"]] ganeti-2.9.3/test/data/cert1.pem0000644000000000000000000000145612230001635016401 0ustar00rootroot00000000000000-----BEGIN CERTIFICATE----- MIICKzCCAdWgAwIBAgIJALdZsXwXOtW7MA0GCSqGSIb3DQEBBQUAMEUxCzAJBgNV BAYTAkFVMRMwEQYDVQQIEwpTb21lLVN0YXRlMSEwHwYDVQQKExhJbnRlcm5ldCBX aWRnaXRzIFB0eSBMdGQwHhcNMTAwMjIzMTAxMjQ3WhcNMTAwMzAyMTAxMjQ3WjBF MQswCQYDVQQGEwJBVTETMBEGA1UECBMKU29tZS1TdGF0ZTEhMB8GA1UEChMYSW50 ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMFwwDQYJKoZIhvcNAQEBBQADSwAwSAJBALIL AmF7Hay9WuhREpRqG2KPCFNbjVGeZ6cS/1FImhHCw40JWDElQJp4lprIly7mkp+7 seIEa7/kf0y9iy0o7s0CAwEAAaOBpzCBpDAdBgNVHQ4EFgQUBKWDVk2Hp9jW+hiD wuuecaBB0W0wdQYDVR0jBG4wbIAUBKWDVk2Hp9jW+hiDwuuecaBB0W2hSaRHMEUx CzAJBgNVBAYTAkFVMRMwEQYDVQQIEwpTb21lLVN0YXRlMSEwHwYDVQQKExhJbnRl cm5ldCBXaWRnaXRzIFB0eSBMdGSCCQC3WbF8FzrVuzAMBgNVHRMEBTADAQH/MA0G CSqGSIb3DQEBBQUAA0EAg7hwCEhY2+MmQYXqe8szmgkXe73qv+i2XyZGytUcdaB/ sd2ydbMLIZlWHD5Zb6xBVDVJpLttduW0cK9daFvElQ== -----END CERTIFICATE----- ganeti-2.9.3/test/data/proc_drbd83_sync_krnl2.6.39.txt0000644000000000000000000000154412230001635022272 0ustar00rootroot00000000000000version: 8.3.1 (api:88/proto:86-89) GIT-hash: fd40f4a8f9104941537d1afc8521e584a6d3003c build by phil@fat-tyre, 2009-03-27 12:19:49 0: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r---- ns:140978 nr:0 dw:9906 dr:131533 al:27 bm:8 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0 1: cs:Connected ro:Secondary/Primary ds:UpToDate/UpToDate C r--- ns:0 nr:140980 dw:140980 dr:0 al:0 bm:8 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:0 2: cs:Unconfigured 3: cs:SyncSource ro:Primary/Secondary ds:UpToDate/Inconsistent A r----- ns:373888 nr:0 dw:0 dr:374088 al:0 bm:22 lo:7 pe:27 ua:7 ap:0 ep:1 wo:f oos:15358208 [>....................] sync'ed: 2.4% (14996/15360)Mfinish: 0:04:08 speed: 61,736 (61,736) K/sec 4: cs:WFConnection ro:Primary/Unknown ds:UpToDate/DUnknown C r---- ns:140978 nr:0 dw:9906 dr:131534 al:27 bm:8 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0 ganeti-2.9.3/test/data/ip-addr-show-lo-ipv4.txt0000644000000000000000000000014712230001635021203 0ustar00rootroot000000000000001: lo: mtu 16436 qdisc noqueue state UNKNOWN inet 127.0.0.1/8 scope host lo ganeti-2.9.3/test/data/vgreduce-removemissing-2.02.66-ok.txt0000644000000000000000000000016312230001635023324 0ustar00rootroot00000000000000 Couldn't find device with uuid NzfYON-F7ky-1Szf-aGf1-v8Xa-Bt1W-8V3bou. Wrote out consistent volume group xenvg ganeti-2.9.3/test/data/xen-xm-info-4.0.1.txt0000644000000000000000000000226512244641676020247 0ustar00rootroot00000000000000host : host.example.com release : 3.2.0 version : #1 SMP Tue Jan 1 00:00:00 UTC 2013 machine : x86_64 nr_cpus : 4 nr_nodes : 1 cores_per_socket : 2 threads_per_core : 1 cpu_mhz : 2800 hw_caps : bfebfbff:20100800:00000000:00000940:0004e3bd:00000000:00000001:00000000 virt_caps : total_memory : 16378 free_memory : 8004 node_to_cpu : node0:0-3 node_to_memory : node0:8004 node_to_dma32_mem : node0:2985 max_node_id : 0 xen_major : 4 xen_minor : 0 xen_extra : .1 xen_caps : xen-3.0-x86_64 xen-3.0-x86_32p xen_scheduler : credit xen_pagesize : 4096 platform_params : virt_start=0xffff800000000000 xen_changeset : unavailable xen_commandline : placeholder dom0_mem=1024M com1=115200,8n1 console=com1 cc_compiler : gcc version 4.4.5 (Debian 4.4.5-8) cc_compile_by : user cc_compile_domain : example.com cc_compile_date : Tue Jan 1 00:00:00 UTC 2013 xend_config_format : 4 ganeti-2.9.3/test/data/bdev-drbd-disk.txt0000644000000000000000000000053712230001635020201 0ustar00rootroot00000000000000disk { size 0s _is_default; # bytes on-io-error detach; fencing dont-care _is_default; } syncer { rate 250k _is_default; # bytes/second after -1 _is_default; al-extents 257; } _this_host { device "/dev/drbd58"; disk "/dev/xenvg/test.data"; meta-disk "/dev/xenvg/test.meta" [ 0 ]; } ganeti-2.9.3/test/data/NEWS_previous_unreleased.txt0000644000000000000000000000173212244641676022361 0ustar00rootroot00000000000000News ==== Version 2.8.0 beta1 ------------------- *(unreleased)* Incompatible/important changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Stuff New features ~~~~~~~~~~~~ - More stuff Version 2.7.0 rc2 ----------------- *(Released Fri, 24 May 2013)* Incompatible/important changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Other stuff Since rc1: - Many bugfixes Version 2.7.0 rc1 ----------------- *(Released Fri, 3 May 2013)* - Things Version 2.7.0 beta1 ------------------- *(Released Wed, 6 Feb 2013)* This was the first beta release of the 2.7 series. All important changes are listed in the latest 2.7 entry. Version 2.6.2 ------------- *(Released Fri, 21 Dec 2012)* Hic sunt pink bunnies. Version 2.6.1 ------------- *(unreleased)* Team members come, team members go. Version 2.6.0 ------------- *(Released Fri, 27 Jul 2012)* Many things happened before this point. .. vim: set textwidth=72 syntax=rst : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/test/data/ip-addr-show-lo-ipv6.txt0000644000000000000000000000016412230001635021204 0ustar00rootroot000000000000001: lo: mtu 16436 inet6 ::1/128 scope host valid_lft forever preferred_lft forever ganeti-2.9.3/test/data/vgreduce-removemissing-2.02.02.txt0000644000000000000000000000072012230001635022702 0ustar00rootroot00000000000000 Couldn't find device with uuid 'gg4cmC-4lrT-EN1v-39OA-6S2b-6eEI-wWlJJJ'. Couldn't find all physical volumes for volume group xenvg. Couldn't find device with uuid 'gg4cmC-4lrT-EN1v-39OA-6S2b-6eEI-wWlJJJ'. Couldn't find all physical volumes for volume group xenvg. Couldn't find device with uuid 'gg4cmC-4lrT-EN1v-39OA-6S2b-6eEI-wWlJJJ'. Couldn't find device with uuid 'gg4cmC-4lrT-EN1v-39OA-6S2b-6eEI-wWlJJJ'. Wrote out consistent volume group xenvg ganeti-2.9.3/test/data/ip-addr-show-lo-oneline-ipv4.txt0000644000000000000000000000005012230001635022623 0ustar00rootroot000000000000001: lo inet 127.0.0.1/8 scope host lo ganeti-2.9.3/test/data/proc_drbd80-emptyversion.txt0000644000000000000000000000075312267470014022303 0ustar00rootroot00000000000000GIT-hash: 5c9f89594553e32adb87d9638dce591782f947e3 build by root@node1.example.com, 2009-05-22 12:47:52 0: cs:Connected st:Primary/Secondary ds:UpToDate/UpToDate C r--- ns:78728316 nr:0 dw:77675644 dr:1277039 al:254 bm:270 lo:0 pe:0 ua:0 ap:0 resync: used:0/61 hits:65657 misses:135 starving:0 dirty:0 changed:135 act_log: used:0/257 hits:11378843 misses:254 starving:0 dirty:0 changed:254 1: cs:Unconfigured 2: cs:Unconfigured 5: cs:Unconfigured 6: cs:Unconfigured ganeti-2.9.3/test/data/xen-xm-list-long-4.0.1.txt0000644000000000000000000001066012244641676021222 0ustar00rootroot00000000000000(domain (domid 0) (cpu_weight 2048) (cpu_cap 0) (bootloader ) (on_crash restart) (uuid 00000000-0000-0000-0000-000000000000) (bootloader_args ) (vcpus 24) (name Domain-0) (cpus ((0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) ) ) (on_reboot restart) (on_poweroff destroy) (maxmem 16777215) (memory 1023) (shadow_memory 0) (features ) (on_xend_start ignore) (on_xend_stop ignore) (cpu_time 184000.41332) (online_vcpus 1) (image (linux (kernel ) (superpages 0) (nomigrate 0) (tsc_mode 0))) (status 2) (state r-----) ) (domain (domid 119) (cpu_weight 256) (cpu_cap 0) (bootloader ) (on_crash restart) (uuid e430b4b8-dc91-9390-dfe0-b83c138ea0aa) (bootloader_args ) (vcpus 1) (description ) (name instance1.example.com) (cpus (())) (on_reboot restart) (on_poweroff destroy) (maxmem 128) (memory 128) (shadow_memory 0) (features ) (on_xend_start ignore) (on_xend_stop ignore) (start_time 1357749308.05) (cpu_time 24.116146647) (online_vcpus 1) (image (linux (kernel /boot/vmlinuz-ganetixenu) (args 'root=/dev/xvda1 ro') (superpages 0) (videoram 4) (pci ()) (nomigrate 0) (tsc_mode 0) (notes (HV_START_LOW 18446603336221196288) (FEATURES '!writable_page_tables|pae_pgdir_above_4gb') (VIRT_BASE 18446744071562067968) (GUEST_VERSION 2.6) (PADDR_OFFSET 0) (GUEST_OS linux) (HYPERCALL_PAGE 18446744071578849280) (LOADER generic) (SUSPEND_CANCEL 1) (PAE_MODE yes) (ENTRY 18446744071592116736) (XEN_VERSION xen-3.0) ) ) ) (status 2) (state -b----) (store_mfn 8836555) (console_mfn 8735251) (device (vif (bridge xen-br0) (mac aa:00:00:30:8d:9d) (script /etc/xen/scripts/vif-bridge) (uuid f57c4758-cf0a-8227-6d13-fe26ece82d75) (backend 0) ) ) (device (console (protocol vt100) (location 2) (uuid 7695737a-ffc2-4e0d-7f6d-734143b8afc4) ) ) (device (vbd (protocol x86_64-abi) (uuid 409e1ff8-435a-4704-80bb-4bfe800d932e) (bootable 1) (dev sda:disk) (uname phy:/var/run/ganeti/instance-disks/instance1.example.com:0 ) (mode w) (backend 0) (VDI ) ) ) ) ganeti-2.9.3/test/data/vgs-missing-pvs-2.02.66.txt0000644000000000000000000000016012230001635021275 0ustar00rootroot00000000000000 Couldn't find device with uuid bHRa26-svpL-ihJX-e0S4-2HNz-wAAi-AlBFtl. xenvg 2 52 0 wz-pn- 1.31t 1.07t ganeti-2.9.3/test/data/kvm_1.1.2_help.txt0000644000000000000000000003462412244641676017774 0ustar00rootroot00000000000000QEMU emulator version 1.1.2 (qemu-kvm-1.1.2+dfsg-2~bpo60+1, Debian), Copyright (c) 2003-2008 Fabrice Bellard usage: kvm [options] [disk_image] 'disk_image' is a raw hard disk image for IDE hard disk 0 Standard options: -h or -help display this help and exit -version display version information and exit -machine [type=]name[,prop[=value][,...]] selects emulated machine (-machine ? for list) property accel=accel1[:accel2[:...]] selects accelerator supported accelerators are kvm, xen, tcg (default: tcg) kernel_irqchip=on|off controls accelerated irqchip support kvm_shadow_mem=size of KVM shadow MMU -cpu cpu select CPU (-cpu ? for list) -smp n[,maxcpus=cpus][,cores=cores][,threads=threads][,sockets=sockets] set the number of CPUs to 'n' [default=1] maxcpus= maximum number of total cpus, including offline CPUs for hotplug, etc cores= number of CPU cores on one socket threads= number of threads on one CPU core sockets= number of discrete sockets in the system -numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node] -fda/-fdb file use 'file' as floppy disk 0/1 image -hda/-hdb file use 'file' as IDE hard disk 0/1 image -hdc/-hdd file use 'file' as IDE hard disk 2/3 image -cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master) -drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i] [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off] [,cache=writethrough|writeback|none|directsync|unsafe][,format=f] [,serial=s][,addr=A][,id=name][,aio=threads|native] [,readonly=on|off][,copy-on-read=on|off] [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]][[,iops=i]|[[,iops_rd=r][,iops_wr=w]] use 'file' as a drive image -set group.id.arg=value set parameter for item of type i.e. -set drive.$id.file=/path/to/image -global driver.prop=value set a global default for a driver property -mtdblock file use 'file' as on-board Flash memory image -sd file use 'file' as SecureDigital card image -pflash file use 'file' as a parallel flash image -boot [order=drives][,once=drives][,menu=on|off] [,splash=sp_name][,splash-time=sp_time] 'drives': floppy (a), hard disk (c), CD-ROM (d), network (n) 'sp_name': the file's name that would be passed to bios as logo picture, if menu=on 'sp_time': the period that splash picture last if menu=on, unit is ms -snapshot write to temporary files instead of disk image files -m megs set virtual RAM size to megs MB [default=128] -mem-path FILE provide backing storage for guest RAM -mem-prealloc preallocate guest memory (use with -mem-path) -k language use keyboard layout (for example 'fr' for French) -audio-help print list of audio drivers and their options -soundhw c1,... enable audio support and only specified sound cards (comma separated list) use -soundhw ? to get the list of supported cards use -soundhw all to enable all of them -balloon none disable balloon device -balloon virtio[,addr=str] enable virtio balloon device (default) -usb enable the USB driver (will be the default soon) -usbdevice name add the host or guest USB device 'name' -device driver[,prop[=value][,...]] add device (based on driver) prop=value,... sets driver properties use -device ? to print all possible drivers use -device driver,? to print all possible properties File system options: -fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}] [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd] Virtual File system pass-through options: -virtfs local,path=path,mount_tag=tag,security_model=[mapped-xattr|mapped-file|passthrough|none] [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd] -virtfs_synth Create synthetic file system image -name string1[,process=string2] set the name of the guest string1 sets the window title and string2 the process name (on Linux) -uuid %08x-%04x-%04x-%04x-%012x specify machine UUID Display options: -display sdl[,frame=on|off][,alt_grab=on|off][,ctrl_grab=on|off] [,window_close=on|off]|curses|none| vnc=[,] select display type -nographic disable graphical output and redirect serial I/Os to console -curses use a curses/ncurses interface instead of SDL -no-frame open SDL window without a frame and window decorations -alt-grab use Ctrl-Alt-Shift to grab mouse (instead of Ctrl-Alt) -ctrl-grab use Right-Ctrl to grab mouse (instead of Ctrl-Alt) -no-quit disable SDL window close capability -sdl enable SDL -spice enable spice -portrait rotate graphical output 90 deg left (only PXA LCD) -rotate rotate graphical output some deg left (only PXA LCD) -vga [std|cirrus|vmware|qxl|xenfb|none] select video card type -full-screen start in full screen -vnc display start a VNC server on display i386 target only: -win2k-hack use it when installing Windows 2000 to avoid a disk full bug -no-fd-bootchk disable boot signature checking for floppy disks -no-acpi disable ACPI -no-hpet disable HPET -acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...] ACPI table description -smbios file=binary load SMBIOS entry from binary file -smbios type=0[,vendor=str][,version=str][,date=str][,release=%d.%d] specify SMBIOS type 0 fields -smbios type=1[,manufacturer=str][,product=str][,version=str][,serial=str] [,uuid=uuid][,sku=str][,family=str] specify SMBIOS type 1 fields Network options: -net nic[,vlan=n][,macaddr=mac][,model=type][,name=str][,addr=str][,vectors=v] create a new Network Interface Card and connect it to VLAN 'n' -net user[,vlan=n][,name=str][,net=addr[/mask]][,host=addr][,restrict=on|off] [,hostname=host][,dhcpstart=addr][,dns=addr][,tftp=dir][,bootfile=f] [,hostfwd=rule][,guestfwd=rule][,smb=dir[,smbserver=addr]] connect the user mode network stack to VLAN 'n', configure its DHCP server and enabled optional services -net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h][,vhostforce=on|off] connect the host TAP network interface to VLAN 'n' use network scripts 'file' (default=/etc/kvm/kvm-ifup) to configure it and 'dfile' (default=/etc/kvm/kvm-ifdown) to deconfigure it use '[down]script=no' to disable script execution use network helper 'helper' (default=/usr/lib/qemu-bridge-helper) to configure it use 'fd=h' to connect to an already opened TAP interface use 'sndbuf=nbytes' to limit the size of the send buffer (the default is disabled 'sndbuf=0' to enable flow control set 'sndbuf=1048576') use vnet_hdr=off to avoid enabling the IFF_VNET_HDR tap flag use vnet_hdr=on to make the lack of IFF_VNET_HDR support an error condition use vhost=on to enable experimental in kernel accelerator (only has effect for virtio guests which use MSIX) use vhostforce=on to force vhost on for non-MSIX virtio guests use 'vhostfd=h' to connect to an already opened vhost net device -net bridge[,vlan=n][,name=str][,br=bridge][,helper=helper] connects a host TAP network interface to a host bridge device 'br' (default=br0) using the program 'helper' (default=/usr/lib/qemu-bridge-helper) -net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port] connect the vlan 'n' to another VLAN using a socket connection -net socket[,vlan=n][,name=str][,fd=h][,mcast=maddr:port[,localaddr=addr]] connect the vlan 'n' to multicast maddr and port use 'localaddr=addr' to specify the host address to send packets from -net socket[,vlan=n][,name=str][,fd=h][,udp=host:port][,localaddr=host:port] connect the vlan 'n' to another VLAN using an UDP tunnel -net vde[,vlan=n][,name=str][,sock=socketpath][,port=n][,group=groupname][,mode=octalmode] connect the vlan 'n' to port 'n' of a vde switch running on host and listening for incoming connections on 'socketpath'. Use group 'groupname' and mode 'octalmode' to change default ownership and permissions for communication port. -net dump[,vlan=n][,file=f][,len=n] dump traffic on vlan 'n' to file 'f' (max n bytes per packet) -net none use it alone to have zero network devices. If no -net option is provided, the default is '-net nic -net user' -netdev [user|tap|bridge|vde|socket],id=str[,option][,option][,...] Character device options: -chardev null,id=id[,mux=on|off] -chardev socket,id=id[,host=host],port=host[,to=to][,ipv4][,ipv6][,nodelay] [,server][,nowait][,telnet][,mux=on|off] (tcp) -chardev socket,id=id,path=path[,server][,nowait][,telnet],[mux=on|off] (unix) -chardev udp,id=id[,host=host],port=port[,localaddr=localaddr] [,localport=localport][,ipv4][,ipv6][,mux=on|off] -chardev msmouse,id=id[,mux=on|off] -chardev vc,id=id[[,width=width][,height=height]][[,cols=cols][,rows=rows]] [,mux=on|off] -chardev file,id=id,path=path[,mux=on|off] -chardev pipe,id=id,path=path[,mux=on|off] -chardev pty,id=id[,mux=on|off] -chardev stdio,id=id[,mux=on|off][,signal=on|off] -chardev braille,id=id[,mux=on|off] -chardev tty,id=id,path=path[,mux=on|off] -chardev parport,id=id,path=path[,mux=on|off] -chardev spicevmc,id=id,name=name[,debug=debug] -iscsi [user=user][,password=password] [,header-digest=CRC32C|CR32C-NONE|NONE-CRC32C|NONE [,initiator-name=iqn] iSCSI session parameters Bluetooth(R) options: -bt hci,null dumb bluetooth HCI - doesn't respond to commands -bt hci,host[:id] use host's HCI with the given name -bt hci[,vlan=n] emulate a standard HCI in virtual scatternet 'n' -bt vhci[,vlan=n] add host computer to virtual scatternet 'n' using VHCI -bt device:dev[,vlan=n] emulate a bluetooth device 'dev' in scatternet 'n' Linux/Multiboot boot specific: -kernel bzImage use 'bzImage' as kernel image -append cmdline use 'cmdline' as kernel command line -initrd file use 'file' as initial ram disk -dtb file use 'file' as device tree image Debug/Expert options: -serial dev redirect the serial port to char device 'dev' -parallel dev redirect the parallel port to char device 'dev' -monitor dev redirect the monitor to char device 'dev' -qmp dev like -monitor but opens in 'control' mode -mon chardev=[name][,mode=readline|control][,default] -debugcon dev redirect the debug console to char device 'dev' -pidfile file write PID to 'file' -singlestep always run in singlestep mode -S freeze CPU at startup (use 'c' to start execution) -gdb dev wait for gdb connection on 'dev' -s shorthand for -gdb tcp::1234 -d item1,... output log to /tmp/qemu.log (use -d ? for a list of log items) -D logfile output log to logfile (instead of the default /tmp/qemu.log) -hdachs c,h,s[,t] force hard disk 0 physical geometry and the optional BIOS translation (t=none or lba) (usually QEMU can guess them) -L path set the directory for the BIOS, VGA BIOS and keymaps -bios file set the filename for the BIOS -enable-kvm enable KVM full virtualization support -xen-domid id specify xen guest domain id -xen-create create domain using xen hypercalls, bypassing xend warning: should not be used when xend is in use -xen-attach attach to existing xen domain xend will use this when starting QEMU -no-reboot exit instead of rebooting -no-shutdown stop before shutdown -loadvm [tag|id] start right away with a saved state (loadvm in monitor) -daemonize daemonize QEMU after initializing -option-rom rom load a file, rom, into the option ROM space -clock force the use of the given methods for timer alarm. To see what timers are available use -clock ? -rtc [base=utc|localtime|date][,clock=host|rt|vm][,driftfix=none|slew] set the RTC base and clock, enable drift fix for clock ticks (x86 only) -icount [N|auto] enable virtual instruction counter with 2^N clock ticks per instruction -watchdog i6300esb|ib700 enable virtual hardware watchdog [default=none] -watchdog-action reset|shutdown|poweroff|pause|debug|none action when watchdog fires [default=reset] -echr chr set terminal escape character instead of ctrl-a -virtioconsole c set virtio console -show-cursor show cursor -tb-size n set TB size -incoming p prepare for incoming migration, listen on port p -nodefaults don't create default devices -chroot dir chroot to dir just before starting the VM -runas user change to user id user just before starting the VM -readconfig -writeconfig read/write config file -nodefconfig do not load default config files at startup -no-user-config do not load user-provided config files at startup -trace [events=][,file=] specify tracing options -qtest CHR specify tracing options -qtest-log LOG specify tracing options -no-kvm disable KVM hardware virtualization -no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC -no-kvm-pit disable KVM kernel mode PIT -no-kvm-pit-reinjection disable KVM kernel mode PIT interrupt reinjection During emulation, the following keys are useful: ctrl-alt-f toggle full screen ctrl-alt-n switch to virtual console 'n' ctrl-alt toggle mouse and keyboard grab When using -nographic, press 'ctrl-a h' to get some help. ganeti-2.9.3/test/data/instance-prim-sec.txt0000644000000000000000000000376312267470014020760 0ustar00rootroot00000000000000[[{"admin_state": "up", "beparams": {}, "ctime": 1372838883.9710441, "disk_template": "drbd", "disks": [ { "children": [ { "dev_type": "plain", "logical_id": [ "xenvg", "df9ff3f6-a833-48ff-8bd5-bff2eaeab759.disk0_data" ], "params": {}, "physical_id": [ "xenvg", "df9ff3f6-a833-48ff-8bd5-bff2eaeab759.disk0_data" ], "size": 1024, "uuid": "eaff6322-1bfb-4d59-b306-4535730917cc" }, { "dev_type": "plain", "logical_id": [ "xenvg", "df9ff3f6-a833-48ff-8bd5-bff2eaeab759.disk0_meta" ], "params": {}, "physical_id": [ "xenvg", "df9ff3f6-a833-48ff-8bd5-bff2eaeab759.disk0_meta" ], "size": 128, "uuid": "bf512e95-2a49-4cb3-8d1f-30a503f6bf1b" } ], "dev_type": "drbd", "iv_name": "disk/0", "logical_id": [ "60e687a0-21fc-4577-997f-ccd08925fa65", "c739c7f3-79d8-4e20-ac68-662e16577d2e", 11000, 0, 0, "9bdb15fb7ab6bb4610a313d654ed4d0d2433713e" ], "mode": "rw", "params": {}, "physical_id": [ "172.16.241.3", 11000, "172.16.241.2", 11000, 0, "9bdb15fb7ab6bb4610a313d654ed4d0d2433713e" ], "size": 1024, "uuid": "5d61e205-bf89-4ba8-a319-589b7bb7419e" } ], "disks_active": true, "hvparams": {}, "hypervisor": "xen-pvm", "mtime": 1372838946.2599809, "name": "instance1.example.com", "nics": [ { "mac": "aa:00:00:1d:ba:63", "nicparams": {}, "uuid": "7b7f4249-fab8-4b3f-b446-d7a2aff37644" } ], "os": "busybox", "osparams": {}, "primary_node": "60e687a0-21fc-4577-997f-ccd08925fa65", "serial_no": 2, "uuid": "aec390cb-5eae-44e6-bcc2-ec14d31347f0" }], []] ganeti-2.9.3/test/autotools/0000755000000000000000000000000012271445544016012 5ustar00rootroot00000000000000ganeti-2.9.3/test/autotools/autotools-check-news.test0000644000000000000000000000121312244641676022772 0ustar00rootroot00000000000000# Test a correct NEWS file against a stable release RELEASE=2.6.2 ./autotools/check-news < $TESTDATA_DIR/NEWS_OK.txt >>>= 0 # Test a correct NEWS file against an alpha release RELEASE=2.8.0~alpha ./autotools/check-news < $TESTDATA_DIR/NEWS_OK.txt >>>= 0 # Test a NEWS file with previous unreleased versions against a stable release RELEASE=2.6.2 ./autotools/check-news < $TESTDATA_DIR/NEWS_previous_unreleased.txt >>>2/Unreleased version after current release 2.6.2/ >>>= !0 # Test a NEWS file with previous unreleased versions against an alpha release RELEASE=2.8.0~alpha ./autotools/check-news < $TESTDATA_DIR/NEWS_previous_unreleased.txt >>>= 0 ganeti-2.9.3/test/hs/0000755000000000000000000000000012271445545014374 5ustar00rootroot00000000000000ganeti-2.9.3/test/hs/htest.hs0000644000000000000000000001011512271422343016044 0ustar00rootroot00000000000000{-| Unittest runner for ganeti-htools. -} {- Copyright (C) 2009, 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Main(main) where import Data.Monoid (mappend) import Test.Framework import System.Environment (getArgs) import System.Log.Logger import Test.Ganeti.TestImports () import Test.Ganeti.Attoparsec import Test.Ganeti.BasicTypes import Test.Ganeti.Common import Test.Ganeti.Confd.Utils import Test.Ganeti.Confd.Types import Test.Ganeti.Daemon import Test.Ganeti.Errors import Test.Ganeti.HTools.Backend.Simu import Test.Ganeti.HTools.Backend.Text import Test.Ganeti.HTools.CLI import Test.Ganeti.HTools.Cluster import Test.Ganeti.HTools.Container import Test.Ganeti.HTools.Graph import Test.Ganeti.HTools.Instance import Test.Ganeti.HTools.Loader import Test.Ganeti.HTools.Node import Test.Ganeti.HTools.PeerMap import Test.Ganeti.HTools.Types import Test.Ganeti.Hypervisor.Xen.XmParser import Test.Ganeti.JSON import Test.Ganeti.Jobs import Test.Ganeti.JQueue import Test.Ganeti.Luxi import Test.Ganeti.Network import Test.Ganeti.Objects import Test.Ganeti.OpCodes import Test.Ganeti.Query.Filter import Test.Ganeti.Query.Language import Test.Ganeti.Query.Network import Test.Ganeti.Query.Query import Test.Ganeti.Rpc import Test.Ganeti.Runtime import Test.Ganeti.Ssconf import Test.Ganeti.Storage.Diskstats.Parser import Test.Ganeti.Storage.Drbd.Parser import Test.Ganeti.Storage.Drbd.Types import Test.Ganeti.Storage.Lvm.LVParser import Test.Ganeti.THH import Test.Ganeti.Types import Test.Ganeti.Utils -- | Our default test options, overring the built-in test-framework -- ones (but not the supplied command line parameters). defOpts :: TestOptions defOpts = TestOptions { topt_seed = Nothing , topt_maximum_generated_tests = Just 500 , topt_maximum_unsuitable_generated_tests = Just 5000 , topt_maximum_test_size = Nothing , topt_maximum_test_depth = Nothing , topt_timeout = Nothing } -- | All our defined tests. allTests :: [Test] allTests = [ testBasicTypes , testAttoparsec , testCommon , testConfd_Types , testConfd_Utils , testDaemon , testBlock_Diskstats_Parser , testBlock_Drbd_Parser , testBlock_Drbd_Types , testErrors , testHTools_Backend_Simu , testHTools_Backend_Text , testHTools_CLI , testHTools_Cluster , testHTools_Container , testHTools_Graph , testHTools_Instance , testHTools_Loader , testHTools_Node , testHTools_PeerMap , testHTools_Types , testHypervisor_Xen_XmParser , testJSON , testJobs , testJQueue , testLuxi , testNetwork , testObjects , testOpCodes , testQuery_Filter , testQuery_Language , testQuery_Network , testQuery_Query , testRpc , testRuntime , testSsconf , testStorage_Lvm_LVParser , testTHH , testTypes , testUtils ] -- | Main function. Note we don't use defaultMain since we want to -- control explicitly our test sizes (and override the default). main :: IO () main = do ropts <- getArgs >>= interpretArgsOrExit let opts = maybe defOpts (defOpts `mappend`) $ ropt_test_options ropts -- silence the logging system, so that tests can execute I/O actions -- which create logs without polluting stderr -- FIXME: improve this by allowing tests to use logging if needed updateGlobalLogger rootLoggerName (setLevel EMERGENCY) defaultMainWithOpts allTests (ropts { ropt_test_options = Just opts }) ganeti-2.9.3/test/hs/cli-tests-defs.sh0000644000000000000000000000241612244641676017564 0ustar00rootroot00000000000000# # Copyright (C) 2012 Google Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # This is an shell testing configuration fragment. HBINARY=${HBINARY:-./test/hs/hpc-htools} export TESTDATA_DIR=${TOP_SRCDIR:-.}/test/data/htools export PYTESTDATA_DIR=${TOP_SRCDIR:-.}/test/data hbal() { HTOOLS=hbal $HBINARY "$@" } hscan() { HTOOLS=hscan $HBINARY "$@" } hail() { HTOOLS=hail $HBINARY "$@" } hspace() { HTOOLS=hspace $HBINARY "$@" } hinfo() { HTOOLS=hinfo $HBINARY "$@" } hcheck() { HTOOLS=hinfo $HBINARY "$@" } hroller() { HTOOLS=hroller $HBINARY "$@" } ALL_ROLES="hbal hscan hail hspace hinfo hcheck hroller" ganeti-2.9.3/test/hs/hpc-mon-collector.hs0000644000000000000000000000203612244641676020261 0ustar00rootroot00000000000000{-| Main binary for all stand-alone data collectors -} {- Copyright (C) 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Main (main) where import Ganeti.Common import Ganeti.DataCollectors.CLI (genericOptions, defaultOptions) import Ganeti.DataCollectors.Program (personalities) -- | Simple main function. main :: IO () main = genericMainCmds defaultOptions personalities genericOptions ganeti-2.9.3/test/hs/offline-test.sh0000744000000000000000000000760612267470014017333 0ustar00rootroot00000000000000#!/bin/bash # Copyright (C) 2012 Google Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # This is an offline testing script for most/all of the htools # programs, checking basic command line functionality. # Optional argument that specifies the test files to run. If not # specified, then all tests are run. # # For example, a value of 'balancing' runs the file # 'shelltests/htools-balancing.test'. Multiple files can be specified # using shell notation, for example, '{balancing,basic}'. TESTS=${1:-*} set -e set -o pipefail . $(dirname $0)/cli-tests-defs.sh echo Running offline htools tests export T=`mktemp -d` trap 'rm -rf $T' EXIT trap 'echo FAIL to build test files' ERR echo Using $T as temporary dir echo -n Generating hspace simulation data for hinfo and hbal... # this cluster spec should be fine ./test/hs/hspace --simu p,4,8T,64g,16 -S $T/simu-onegroup \ --disk-template drbd -l 8 -v -v -v >/dev/null 2>&1 echo OK echo -n Generating hinfo and hbal test files for multi-group... ./test/hs/hspace --simu p,4,8T,64g,16 --simu p,4,8T,64g,16 \ -S $T/simu-twogroups --disk-template drbd -l 8 >/dev/null 2>&1 echo OK echo -n Generating test files for rebalancing... # we generate a cluster with two node groups, one with unallocable # policy, then we change all nodes from this group to the allocable # one, and we check for rebalancing FROOT="$T/simu-rebal-orig" ./test/hs/hspace --simu u,4,8T,64g,16 --simu p,4,8T,64g,16 \ -S $FROOT --disk-template drbd -l 8 >/dev/null 2>&1 for suffix in standard tiered; do RELOC="$T/simu-rebal-merged.$suffix" # this relocates the nodes sed -re 's/^(node-.*|fake-uuid-)-02(|.*)/\1-01\2/' \ < $FROOT.$suffix > $RELOC done export BACKEND_BAL_STD="-t$T/simu-rebal-merged.standard" export BACKEND_BAL_TIER="-t$T/simu-rebal-merged.tiered" echo OK # For various tests export BACKEND_DYNU="-t $T/simu-onegroup.standard" export BACKEND_EXCL="-t $T/simu-onegroup.standard" echo -n Generating data files for IAllocator checks... for evac_mode in primary-only secondary-only all; do sed -e 's/"evac_mode": "all"/"evac_mode": "'${evac_mode}'"/' \ -e 's/"spindles": [0-9]\+,//' \ < $TESTDATA_DIR/hail-node-evac.json \ > $T/hail-node-evac.json.$evac_mode done for bf in hail-alloc-drbd hail-alloc-invalid-twodisks hail-alloc-twodisks \ hail-change-group hail-node-evac hail-reloc-drbd hail-alloc-spindles; do f=$bf.json sed -e 's/"exclusive_storage": false/"exclusive_storage": true/' \ < $TESTDATA_DIR/$f > $T/$f.excl-stor sed -e 's/"exclusive_storage": false/"exclusive_storage": true/' \ -e 's/"spindles": [0-9]\+,//' \ < $TESTDATA_DIR/$f > $T/$f.fail-excl-stor done echo OK echo -n Checking file-based RAPI... mkdir -p $T/hscan export RAPI_URL="file://$TESTDATA_DIR/rapi" ./test/hs/hscan -d $T/hscan/ -p -v -v $RAPI_URL >/dev/null 2>&1 # check that we file parsing is correct, i.e. hscan saves correct text # files, and is idempotent (rapi+text == rapi); more is tested in # shelltest later RAPI_TXT="$(ls $T/hscan/*.data|head -n1)" ./test/hs/hinfo -p --print-instances -m $RAPI_URL > $T/hscan/direct.hinfo 2>&1 ./test/hs/hinfo -p --print-instances -t $RAPI_TXT > $T/hscan/fromtext.hinfo 2>&1 echo OK echo Running shelltest... shelltest $SHELLTESTARGS \ ${TOP_SRCDIR:-.}/test/hs/shelltests/htools-$TESTS.testganeti-2.9.3/test/hs/shelltests/0000755000000000000000000000000012271445545016566 5ustar00rootroot00000000000000ganeti-2.9.3/test/hs/shelltests/htools-hail.test0000644000000000000000000001247512267470014021715 0ustar00rootroot00000000000000# test that on invalid files it can't parse the request ./test/hs/hail /dev/null >>>2 /Invalid JSON/ >>>= !0 # another invalid example echo '[]' | ./test/hs/hail - >>>2 /Unable to read JSObject/ >>>= !0 # empty dict echo '{}' | ./test/hs/hail - >>>2 /key 'request' not found/ >>>= !0 echo '{"request": 0}' | ./test/hs/hail - >>>2 /key 'request'/ >>>= !0 ./test/hs/hail $TESTDATA_DIR/hail-invalid-reloc.json >>>2 /key 'name': Unable to read String/ >>>= !0 # and now start the real tests ./test/hs/hail $TESTDATA_DIR/hail-alloc-drbd.json >>> /"success":true,.*,"result":\["node2","node1"\]/ >>>= 0 ./test/hs/hail $TESTDATA_DIR/hail-reloc-drbd.json >>> /"success":true,.*,"result":\["node1"\]/ >>>= 0 ./test/hs/hail $TESTDATA_DIR/hail-node-evac.json >>> /"success":true,"info":"Request successful: 0 instances failed to move and 1 were moved successfully"/ >>>= 0 ./test/hs/hail $TESTDATA_DIR/hail-change-group.json >>> /"success":true,"info":"Request successful: 0 instances failed to move and 1 were moved successfully"/ >>>= 0 # check that hail correctly applies the disk policy on a per-disk basis ./test/hs/hail $TESTDATA_DIR/hail-alloc-twodisks.json >>> /"success":true,.*,"result":\["node1"\]/ >>>= 0 ./test/hs/hail $TESTDATA_DIR/hail-alloc-invalid-twodisks.json >>> /"success":false,.*FailDisk: 1/ >>>= 0 # check that hail honors network requirements ./test/hs/hail $TESTDATA_DIR/hail-alloc-restricted-network.json >>> /"success":true,"info":"Request successful: Selected group: Group 1.*/ >>>= 0 # check that hail fails if no nodegroup can meet network and disk template requirements ./test/hs/hail $TESTDATA_DIR/hail-alloc-invalid-network.json >>> /"success":false,/ >>>= 0 # check that hail succeeds with the same test data, but with the network restrictions removed cat $TESTDATA_DIR/hail-alloc-invalid-network.json | grep -v -e '"network":"uuid-net-1-."' | ./test/hs/hail - >>> /"success":true,"info":"Request successful: Selected group: Group 2.*/ >>>= 0 # Run some of the tests above, with exclusive storage enabled ./test/hs/hail $T/hail-alloc-drbd.json.excl-stor >>> /"success":true,.*,"result":\["node2","node1"\]/ >>>= 0 ./test/hs/hail $T/hail-reloc-drbd.json.excl-stor >>> /"success":true,.*,"result":\["node1"\]/ >>>= 0 ./test/hs/hail $T/hail-node-evac.json.excl-stor >>> /"success":true,"info":"Request successful: 0 instances failed to move and 1 were moved successfully"/ >>>= 0 ./test/hs/hail $T/hail-change-group.json.excl-stor >>> /"success":true,"info":"Request successful: 0 instances failed to move and 1 were moved successfully"/ >>>= 0 ./test/hs/hail $T/hail-alloc-twodisks.json.excl-stor >>> /"success":true,.*,"result":\["node1"\]/ >>>= 0 ./test/hs/hail $T/hail-alloc-invalid-twodisks.json.excl-stor >>> /"success":false,.*FailDisk: 1"/ >>>= 0 # Same tests with exclusive storage enabled, but no spindles info in instances ./test/hs/hail $T/hail-alloc-drbd.json.fail-excl-stor >>> /"success":false,.*FailSpindles: 12"/ >>>= 0 ./test/hs/hail $T/hail-reloc-drbd.json.fail-excl-stor >>> /"success":false,.*FailSpindles/ >>>= 0 ./test/hs/hail $T/hail-node-evac.json.fail-excl-stor >>> /"success":true,"info":"Request successful: 1 instances failed to move and 0 were moved successfully",.*FailSpindles/ >>>= 0 ./test/hs/hail $T/hail-change-group.json.fail-excl-stor >>> /"success":true,"info":"Request successful: 1 instances failed to move and 0 were moved successfully",.*FailSpindles: 2"/ >>>= 0 ./test/hs/hail $T/hail-alloc-twodisks.json.fail-excl-stor >>> /"success":false,.*FailSpindles: 1"/ >>>= 0 # check that hail can use the simu backend ./test/hs/hail --simu p,8,8T,16g,16 $TESTDATA_DIR/hail-alloc-drbd.json >>> /"success":true,/ >>>= 0 # check that hail can use the text backend ./test/hs/hail -t $T/simu-rebal-merged.standard $TESTDATA_DIR/hail-alloc-drbd.json >>> /"success":true,/ >>>= 0 # check that hail can use the simu backend ./test/hs/hail -t $T/simu-rebal-merged.standard $TESTDATA_DIR/hail-alloc-drbd.json >>> /"success":true,/ >>>= 0 # check that hail pre/post saved state differs after allocation ./test/hs/hail -v -v -v -p $TESTDATA_DIR/hail-alloc-drbd.json -S $T/hail-alloc >/dev/null 2>&1 && ! diff -q $T/hail-alloc.pre-ialloc $T/hail-alloc.post-ialloc >>> /Files .* and .* differ/ >>>= 0 # check that hail pre/post saved state differs after relocation ./test/hs/hail -v -v -v -p $TESTDATA_DIR/hail-reloc-drbd.json -S $T/hail-reloc >/dev/null 2>&1 && ! diff -q $T/hail-reloc.pre-ialloc $T/hail-reloc.post-ialloc >>> /Files .* and .* differ/ >>>= 0 # evac tests ./test/hs/hail $T/hail-node-evac.json.primary-only >>> /"success":true,"info":"Request successful: 0 instances failed to move and 1 were moved successfully"/ >>>= 0 ./test/hs/hail $T/hail-node-evac.json.secondary-only >>> /"success":true,"info":"Request successful: 0 instances failed to move and 1 were moved successfully"/ >>>= 0 ./test/hs/hail $T/hail-node-evac.json.all >>> /"success":true,"info":"Request successful: 0 instances failed to move and 1 were moved successfully"/ >>>= 0 # Check interaction between policies and spindles ./test/hs/hail $TESTDATA_DIR/hail-alloc-spindles.json >>> /"success":true,"info":"Request successful: Selected group: group2,.*FailSpindles: 2,.*"result":\["node4"\]/ >>>= 0 ./test/hs/hail $T/hail-alloc-spindles.json.excl-stor >>> /"success":true,"info":"Request successful: Selected group: group1,.*FailSpindles: 2",.*"result":\["node1"\]/ >>>= 0 ganeti-2.9.3/test/hs/shelltests/htools-hspace.test0000644000000000000000000000574412267470014022244 0ustar00rootroot00000000000000# test that hspace machine readable output looks correct ./test/hs/hspace --simu p,4,8T,64g,16 --machine-readable --disk-template drbd -l 8 >>> /^HTS_OK=1/ >>>= 0 # test again via a file and shell parsing ./test/hs/hspace --simu p,4,8T,64g,16 --machine-readable --disk-template drbd -l 8 > $T/capacity && sh -c ". $T/capacity && test x\$HTS_OK = x1" >>>= 0 # standard & tiered allocation, using shell parsing to do multiple checks ./test/hs/hspace --machine-readable -t $TESTDATA_DIR/hspace-tiered.data > $T/capacity && sh -c ". $T/capacity && test \"\${HTS_TSPEC}\" = '131072,1048576,4,12=4 129984,1048320,4,12=2' && test \"\${HTS_ALLOC_INSTANCES}\" = 6" >>>=0 # again, but with a policy containing two min/max specs pairs ./test/hs/hspace --machine-readable -t $TESTDATA_DIR/hspace-tiered-dualspec.data > $T/capacity && sh -c ". $T/capacity && test \"\${HTS_TSPEC}\" = '131072,1048576,4,12=4 129984,1048320,4,12=2 65472,524288,2,12=2' && test \"\${HTS_ALLOC_INSTANCES}\" = 14" >>>2 >>>=0 # With exclusive storage ./test/hs/hspace --machine-readable -t $TESTDATA_DIR/hspace-tiered-exclusive.data > $T/capacity && sh -c ". $T/capacity && test \"\${HTS_TSPEC}\" = '131072,1048576,4,10=1 131072,1048576,4,9=1 131072,1048576,4,8=2' && test \"\${HTS_ALLOC_INSTANCES}\" = 6 && test \"\${HTS_TRL_SPN_FREE}\" = 0 && test \"\${HTS_FIN_SPN_FREE}\" = 29" >>>=0 # With exclusive storage and a policy containing two min/max specs pairs ./test/hs/hspace --machine-readable -t $TESTDATA_DIR/hspace-tiered-dualspec-exclusive.data > $T/capacity && sh -c ". $T/capacity && test \"\${HTS_TSPEC}\" = '131072,1048576,4,4=4 129984,1048320,4,4=2 65472,524288,2,2=2' && test \"\${HTS_ALLOC_INSTANCES}\" = 14 && test \"\${HTS_TRL_SPN_FREE}\" = 7 && test \"\${HTS_FIN_SPN_FREE}\" = 7" >>>2 >>>=0 # Mixed cluster, half with exclusive storage ./test/hs/hspace --machine-readable -t $TESTDATA_DIR/hspace-tiered-mixed.data > $T/capacity && sh -c ". $T/capacity && test \"\${HTS_TSPEC}\" = '131072,1048576,4,12=2 131072,1048576,4,10=2 129984,1048320,4,10=2' && test \"\${HTS_ALLOC_INSTANCES}\" = 6 && test \"\${HTS_TRL_SPN_FREE}\" = 0 && test \"\${HTS_FIN_SPN_FREE}\" = 18" >>>=0 # Verify that instance policy for disks is adhered to ./test/hs/hspace --machine-readable -t $TESTDATA_DIR/hspace-tiered-ipolicy.data >>>/HTS_TRL_INST_CNT=4/ >>>=0 # ...and instance positioning in human-readable form ./test/hs/hspace -pname,pcnt -t $TESTDATA_DIR/hspace-tiered-ipolicy.data >>>2/Tiered allocation status: Name pcnt node-01-001 1 node-01-002 1 node-01-003 1 node-01-004 1/ >>>=0 ./test/hs/hspace -pname,pcnt -t $TESTDATA_DIR/hspace-tiered-resourcetypes.data >>>2/Tiered allocation status: Name pcnt node-01-001 1 node-01-002 2 node-01-003 2 node-01-004 2/ >>>=0 # VCPU-dominated allocation ./test/hs/hspace --machine-readable -t $TESTDATA_DIR/hspace-tiered-vcpu.data > $T/capacity && sh -c ". $T/capacity && test \"\${HTS_TSPEC}\" = '32768,65536,4,12=4 32768,65536,2,12=2' && test \"\${HTS_ALLOC_INSTANCES}\" = 10" >>>=0 ganeti-2.9.3/test/hs/shelltests/htools-dynutil.test0000644000000000000000000000113112244641676022464 0ustar00rootroot00000000000000echo a > $T/dynu; ./test/hs/hbal -U $T/dynu $BACKEND_DYNU >>>2 /Cannot parse line/ >>>= !0 echo a b c d e f g h > $T/dynu; ./test/hs/hbal -U $T/dynu $BACKEND_DYNU >>>2 /Cannot parse line/ >>>= !0 echo inst cpu mem dsk net >$T/dynu; ./test/hs/hbal -U $T/dynu $BACKEND_DYNU >>>2 /cannot parse string '(cpu|mem|dsk|net)'/ >>>= !0 # unknown instances are currently just ignored echo no-such-inst 2 2 2 2 > $T/dynu; ./test/hs/hbal -U $T/dynu $BACKEND_DYNU >>>= 0 # new-0 is the name of the first instance allocated by hspace echo new-0 2 2 2 2 > $T/dynu; ./test/hs/hbal -U $T/dynu $BACKEND_DYNU >>>= 0 ganeti-2.9.3/test/hs/shelltests/htools-invalid.test0000644000000000000000000000300012244641676022417 0ustar00rootroot00000000000000# invalid option test ./test/hs/hail --no-such-option >>>= 2 # invalid option test ./test/hs/hbal --no-such-option >>>= 2 # invalid option test ./test/hs/hspace --no-such-option >>>= 2 # invalid option test ./test/hs/hscan --no-such-option >>>= 2 # invalid option test ./test/hs/hinfo --no-such-option >>>= 2 # invalid option test ./test/hs/hcheck --no-such-option >>>= 2 # invalid option test ./test/hs/hroller --no-such-option >>>= 2 # extra arguments ./test/hs/hspace unexpected-argument >>>2 Error: This program doesn't take any arguments. >>>=1 ./test/hs/hbal unexpected-argument >>>2 Error: This program doesn't take any arguments. >>>=1 ./test/hs/hinfo unexpected-argument >>>2 Error: This program doesn't take any arguments. >>>=1 ./test/hs/hcheck unexpected-argument >>>2 Error: This program doesn't take any arguments. >>>=1 ./test/hs/hroller unexpected-argument >>>2 Error: This program doesn't take any arguments. >>>=1 # hroller should notice the absence of a master node ./test/hs/hroller -t$TESTDATA_DIR/empty-cluster.data >>>2/Error: No master node found/ >>>=1 # hroller fails to build a graph for an empty cluster ./test/hs/hroller -f -t$TESTDATA_DIR/empty-cluster.data >>>2/Error: Cannot create node graph/ >>>=1 # hroller should reject a configuration with more than one master, # even with -f ./test/hs/hroller -f -t$TESTDATA_DIR/multiple-master.data >>>2/Error: Found more than one master node/ >>>=1 # hbal doesn't accept invalid priority ./test/hs/hbal --priority=abc >>>2/Unknown priority/ >>>=1 ganeti-2.9.3/test/hs/shelltests/htools-single-group.test0000644000000000000000000000200512244641676023410 0ustar00rootroot00000000000000# standard single-group tests ./test/hs/hinfo -v -v -p --print-instances -t$T/simu-onegroup.standard >>>= 0 ./test/hs/hbal -v -v -p --print-instances -t$T/simu-onegroup.standard >>>= 0 # hbal should not be able to balance ./test/hs/hbal -t$T/simu-onegroup.standard >>> /(Nothing to do, exiting|No solution found)/ >>>= 0 # tiered single-group tests ./test/hs/hinfo -v -v -p --print-instances -t$T/simu-onegroup.tiered >>>= 0 ./test/hs/hbal -v -v -p --print-instances -t$T/simu-onegroup.tiered >>>= 0 # hbal should not be able to balance ./test/hs/hbal -t$T/simu-onegroup.tiered >>> /(Nothing to do, exiting|No solution found)/ >>>= 0 # hcheck should not find reason to rebalance ./test/hs/hcheck -t$T/simu-onegroup.tiered --machine-readable >>> /HCHECK_INIT_CLUSTER_NEED_REBALANCE=0/ >>>= 0 # hroller should be able to print the solution ./test/hs/hroller -t$T/simu-onegroup.tiered >>>= 0 # hroller should be able to print the solution, in verbose mode as well ./test/hs/hroller -t$T/simu-onegroup.tiered -v -v >>>= 0 ganeti-2.9.3/test/hs/shelltests/htools-excl.test0000644000000000000000000000072512244641676021737 0ustar00rootroot00000000000000./test/hs/hbal $BACKEND_EXCL --exclude-instances no-such-instance >>>2 /Unknown instance/ >>>= !0 ./test/hs/hbal $BACKEND_EXCL --select-instances no-such-instances >>>2 /Unknown instance/ >>>= !0 ./test/hs/hbal $BACKEND_EXCL --exclude-instances new-0 --select-instances new-1 >>>= 0 # Test exclusion tags too (both from the command line and cluster tags). ./test/hs/hbal -t $TESTDATA_DIR/hbal-excl-tags.data --exclusion-tags test >>> /Cluster score improved/ >>>= 0 ganeti-2.9.3/test/hs/shelltests/htools-hroller.test0000644000000000000000000000476312267470014022450 0ustar00rootroot00000000000000./test/hs/hroller --no-headers --ignore-non-redundant -t $TESTDATA_DIR/unique-reboot-order.data >>> node-01-002 node-01-003,node-01-001 >>>= 0 ./test/hs/hroller --no-headers --skip-non-redundant -t $TESTDATA_DIR/unique-reboot-order.data >>> node-01-002 >>>= 0 ./test/hs/hroller --no-headers -t $TESTDATA_DIR/unique-reboot-order.data >>>/^node-01-00. node-01-00. node-01-001$/ >>>= 0 ./test/hs/hroller --ignore-non-redundant -O node-01-002 --no-headers -t $TESTDATA_DIR/unique-reboot-order.data >>> node-01-003,node-01-001 >>>= 0 ./test/hs/hroller --ignore-non-redundant -O node-01-003 --no-headers -t $TESTDATA_DIR/unique-reboot-order.data >>> node-01-002 node-01-001 >>>= 0 ./test/hs/hroller --node-tags=red --no-headers -t $TESTDATA_DIR/multiple-tags.data >>>/^node-01-00[45],node-01-00[45],node-01-001$/ >>>= 0 ./test/hs/hroller --node-tags=blue --no-headers -t $TESTDATA_DIR/multiple-tags.data >>>/^node-01-00[246],node-01-00[246],node-01-00[246]$/ >>>= 0 ./test/hs/hroller --no-headers --offline-maintenance -t $TESTDATA_DIR/hroller-online.data >>>/node-01-00.,node-01-00. node-01-001,node-01-003/ >>>= 0 ./test/hs/hroller --no-headers -t $TESTDATA_DIR/hroller-online.data >>>/node-01-00.,node-01-00. node-01-002 node-01-003/ >>>= 0 ./test/hs/hroller --no-headers -t $TESTDATA_DIR/hroller-nonredundant.data >>>/^node-01-00.,node-01-00. node-01-00.,node-01-00. node-01-00.,node-01-000$/ >>>= 0 ./test/hs/hroller --skip-non-redundant -t $TESTDATA_DIR/hroller-nonredundant.data >>>2 Error: Cannot create node graph >>>=1 ./test/hs/hroller --no-headers --ignore-non-redundant -t $TESTDATA_DIR/hroller-nonredundant.data >>>/^node-01-00.,node-01-00.,node-01-00.,node-01-00.,node-01-00.,node-01-000$/ >>>= 0 ./test/hs/hroller --no-headers -t $TESTDATA_DIR/hroller-nodegroups.data >>>/^node-01-00. node-01-00. node-01-00.,node-02-000$/ >>>= 0 ./test/hs/hroller --no-headers -t $TESTDATA_DIR/hroller-full.data >>>/^node-..,node-..,node-..,node-.. node-..,node-..,node-..,node-31$/ >>>= 0 ./test/hs/hroller --no-headers --full-evacuation -t $TESTDATA_DIR/hroller-full.data >>>/^node-..,node-.. node-..,node-.. node-..,node-.. node-..,node-31$/ >>>= 0 ./test/hs/hroller --no-headers --full-evacuation --one-step-only --print-moves -t $TESTDATA_DIR/hroller-full.data >>>/^node-.. node-.. inst-.. node-.. node-.. inst-.. node-.. node-.. inst-.. node-.. node-.. inst-.. node-.. node-..$/ >>>= 0 ./test/hs/hroller --full-evacuation -t $TESTDATA_DIR/unique-reboot-order.data >>>2 Error: Not enough capacity to move secondaries >>>=1 ganeti-2.9.3/test/hs/shelltests/htools-balancing.test0000644000000000000000000000765412271422343022716 0ustar00rootroot00000000000000### std tests # test basic parsing ./test/hs/hinfo -v -v -p --print-instances $BACKEND_BAL_STD >>>= 0 ./test/hs/hbal -v -v -v -p --print-instances $BACKEND_BAL_STD -G group-01 >>> !/(Nothing to do, exiting|No solution found)/ >>>2 !/(Nothing to do, exiting|No solution found)/ >>>= 0 # test command output ./test/hs/hbal $BACKEND_BAL_STD -G group-01 -C -S $T/simu-rebal.standard >>> /gnt-instance (failover|migrate|replace-disks)/ >>>= 0 # test that correct priorities are accepted ./test/hs/hbal $BACKEND_BAL_STD -G group-01 -C -S $T/simu-rebal.standard --prio low >>> /gnt-instance (failover|migrate|replace-disks)/ >>>= 0 # test that hbal won't execute rebalances when using the text backend ./test/hs/hbal $BACKEND_BAL_STD -G group-01 -X >>>2 Error: hbal: Execution of commands possible only on LUXI >>>= !0 # test that hbal won't execute any moves if we request an absurdly-high # minimum-improvement ./test/hs/hbal $BACKEND_BAL_STD -G group-01 -C --min-gain 10000 --min-gain-limit 10000 >>>/No solution found/ >>>= 0 # test saving commands ./test/hs/hbal $BACKEND_BAL_STD -G group-01 -C$T/rebal-cmds.standard >>>= 0 # and now check the file (depends on previous test) cat $T/rebal-cmds.standard >>> /gnt-instance (failover|migrate|replace-disks)/ >>>= 0 # state saved before rebalancing should be identical; depends on the # previous test diff -u $T/simu-rebal-merged.standard $T/simu-rebal.standard.original >>> >>>= 0 # no double rebalance; depends on previous test ./test/hs/hbal -t $T/simu-rebal.standard.balanced -G group-01 >>> /(Nothing to do, exiting|No solution found)/ >>>= 0 # hcheck sees no reason to rebalance after rebalancing was already done ./test/hs/hcheck -t$T/simu-rebal.standard.balanced --machine-readable >>> /HCHECK_INIT_CLUSTER_NEED_REBALANCE=0/ >>>= 0 ### now tiered tests # test basic parsing ./test/hs/hinfo -v -v -p --print-instances $BACKEND_BAL_TIER >>>= 0 ./test/hs/hbal -v -v -v -p --print-instances $BACKEND_BAL_TIER -G group-01 >>> !/(Nothing to do, exiting|No solution found)/ >>>2 !/(Nothing to do, exiting|No solution found)/ >>>= 0 # test command output ./test/hs/hbal $BACKEND_BAL_TIER -G group-01 -C -S $T/simu-rebal.tiered >>> /gnt-instance (failover|migrate|replace-disks)/ >>>= 0 # test saving commands ./test/hs/hbal $BACKEND_BAL_TIER -G group-01 -C$T/rebal-cmds.tiered >>>= 0 # and now check the file (depends on previous test) cat $T/rebal-cmds.tiered >>> /gnt-instance (failover|migrate|replace-disks)/ >>>= 0 # state saved before rebalancing should be identical; depends on the # previous test diff -u $T/simu-rebal-merged.tiered $T/simu-rebal.tiered.original >>> >>>= 0 # no double rebalance; depends on previous test ./test/hs/hbal -t $T/simu-rebal.tiered.balanced -G group-01 >>> /(Nothing to do, exiting|No solution found)/ >>>= 0 ### now some other custom tests # n+1 bad instances are reported as such ./test/hs/hbal -t$TESTDATA_DIR/n1-failure.data -G group-01 >>>/Initial check done: 4 bad nodes, 8 bad instances./ >>>=0 # same test again, different message check (shelltest can't test multiple # messages via regexp ./test/hs/hbal -t$TESTDATA_DIR/n1-failure.data -G group-01 >>>/Cluster is not N\+1 happy, continuing but no guarantee that the cluster will end N\+1 happy./ >>>2 >>>=0 # and hcheck should report this as needs rebalancing ./test/hs/hcheck -t$TESTDATA_DIR/n1-failure.data >>>/Cluster needs rebalancing./ >>>= 1 # ... unless we request no-simulation mode ./test/hs/hcheck -t$TESTDATA_DIR/n1-failure.data --no-simulation >>>/Running in no-simulation mode./ >>>= 0 # and a clean cluster should be reported as such ./test/hs/hcheck $BACKEND_BAL_STD >>>/No need to rebalance cluster, no problems found./ >>>= 0 # ... and even one with non-zero score ./test/hs/hcheck -t $TESTDATA_DIR/clean-nonzero-score.data >>>/No need to rebalance cluster, no problems found./ >>>= 0 # hbal should work on empty groups as well ./test/hs/hbal -t$TESTDATA_DIR/n1-failure.data -G group-02 >>>/Group size 0 nodes, 0 instances/ >>>= 0 ganeti-2.9.3/test/hs/shelltests/htools-mon-collector.test0000644000000000000000000002234012267470014023545 0ustar00rootroot00000000000000# Test that mon-collector won't run without specifying a personality ./test/hs/hpc-mon-collector >>>= !0 # Test that standard options are accepted, both at top level # and subcommands level ./test/hs/hpc-mon-collector --help >>>= 0 ./test/hs/hpc-mon-collector --help-completion >>>= 0 ./test/hs/hpc-mon-collector --version >>>= 0 ./test/hs/hpc-mon-collector drbd --help >>>= 0 ./test/hs/hpc-mon-collector drbd --help-completion >>>= 0 ./test/hs/hpc-mon-collector drbd --version >>>= 0 # Test that the drbd collector fails parsing /dev/null ./test/hs/hpc-mon-collector drbd --drbd-status=/dev/null --drbd-pairing=/dev/null >>>2/Malformed JSON/ >>>= !0 # Test that a non-existent file is correctly reported ./test/hs/hpc-mon-collector drbd --drbd-status=/dev/no-such-file --drbd-pairing=/dev/no-such-file >>>2/Error: reading from file: .* does not exist/ >>>= !0 # Test that arguments are rejected ./test/hs/hpc-mon-collector drbd /dev/null >>>2/takes exactly zero arguments/ >>>= !0 # Test that a standard test file is parsed correctly ./test/hs/hpc-mon-collector drbd --drbd-status=$PYTESTDATA_DIR/proc_drbd83.txt --drbd-pairing=$PYTESTDATA_DIR/instance-minor-pairing.txt >>>=0 # Test that the drbd collector fails parsing /dev/zero, but is not # stuck forever printing \NUL chars ./test/hs/hpc-mon-collector drbd --drbd-status=/dev/zero --drbd-pairing=$PYTESTDATA_DIR/instance-minor-pairing.txt >>>2 Error: "\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL" [] Failed reading: versionInfo >>>= !0 # Tests for diskstats ./test/hs/hpc-mon-collector diskstats --help >>>= 0 ./test/hs/hpc-mon-collector diskstats --help-completion >>>= 0 ./test/hs/hpc-mon-collector diskstats --version >>>= 0 # Test that the diskstats collector fails parsing a non-diskstats file ./test/hs/hpc-mon-collector diskstats -f /dev/zero >>>2/Failed reading/ >>>= !0 # Test that a non-existent file is correctly reported ./test/hs/hpc-mon-collector diskstats --file=/proc/no-such-file >>>2/Error: reading from file: .* does not exist/ >>>= !0 # Test that arguments are rejected ./test/hs/hpc-mon-collector diskstats /dev/null >>>2/takes exactly zero arguments/ >>>= !0 # Test that a standard test file is parsed correctly ./test/hs/hpc-mon-collector diskstats -f $PYTESTDATA_DIR/proc_diskstats.txt >>>=0 # Tests for lv ./test/hs/hpc-mon-collector lv --help >>>= 0 ./test/hs/hpc-mon-collector lv --help-completion >>>= 0 ./test/hs/hpc-mon-collector lv --version >>>= 0 # Test that the lv collector fails parsing a non-lv data ./test/hs/hpc-mon-collector lv -f $PYTESTDATA_DIR/proc_diskstats.txt >>>= !0 # Test that lv correctly reports a non-existent file ./test/hs/hpc-mon-collector lv --file=/proc/no-such-file >>>2/Error: reading from file: .* does not exist/ >>>= !0 # Test that lv rejects arguments ./test/hs/hpc-mon-collector lv /dev/null >>>2/takes exactly zero arguments/ >>>= !0 # Test that lv parses correctly a standard test file ./test/hs/hpc-mon-collector lv -f $PYTESTDATA_DIR/lvs_lv.txt -i $PYTESTDATA_DIR/instance-prim-sec.txt >>>/"instance":"instance1.example.com"/ >>>= 0 ganeti-2.9.3/test/hs/shelltests/htools-no-backend.test0000644000000000000000000000056512244641676023007 0ustar00rootroot00000000000000# hail no input file ./test/hs/hail >>>= 1 # hbal no backend ./test/hs/hbal >>>= 1 # hspace no backend ./test/hs/hspace >>>= 1 # hinfo no backend ./test/hs/hinfo >>>= 1 # hroller no backend ./test/hs/hroller >>>= 1 # hbal multiple backends ./test/hs/hbal -t /dev/null -m localhost >>>2 Error: Only one of the rapi, luxi, and data files options should be given. >>>= 1 ganeti-2.9.3/test/hs/shelltests/htools-rapi.test0000644000000000000000000000041012244641676021726 0ustar00rootroot00000000000000# test loading data via RAPI ./test/hs/hinfo -v -v -p --print-instances -m $RAPI_URL >>>= 0 ./test/hs/hbal -v -v -p --print-instances -m $RAPI_URL >>>= 0 # this compares generated files from hscan diff -u $T/hscan/direct.hinfo $T/hscan/fromtext.hinfo >>> >>>= 0 ganeti-2.9.3/test/hs/shelltests/htools-multi-group.test0000644000000000000000000000264612244641676023274 0ustar00rootroot00000000000000# standard multi-group tests ./test/hs/hinfo -v -v -p --print-instances -t$T/simu-twogroups.standard >>>= 0 ./test/hs/hbal -t$T/simu-twogroups.standard >>>= !0 # hbal should not be able to balance ./test/hs/hbal -t$T/simu-twogroups.standard >>>2 /Found multiple node groups/ >>>= !0 # but hbal should be able to balance one node group ./test/hs/hbal -t$T/simu-twogroups.standard -G group-01 >>>= 0 # and it should not find an invalid group ./test/hs/hbal -t$T/simu-twogroups.standard -G no-such-group >>>= !0 # tiered allocs multi-group tests ./test/hs/hinfo -v -v -p --print-instances -t$T/simu-twogroups.tiered >>>= 0 ./test/hs/hbal -t$T/simu-twogroups.tiered >>>= !0 # hbal should not be able to balance ./test/hs/hbal -t$T/simu-twogroups.tiered >>>2 /Found multiple node groups/ >>>= !0 # but hbal should be able to balance one node group ./test/hs/hbal -t$T/simu-twogroups.tiered -G group-01 >>>= 0 # and it should not find an invalid group ./test/hs/hbal -t$T/simu-twogroups.tiered -G no-such-group >>>= !0 # hcheck should be able to run with multiple groups ./test/hs/hcheck -t$T/simu-twogroups.tiered --machine-readable >>> /HCHECK_OK=1/ >>>= 0 # hcheck should be able to improve a group with split instances, and also # warn us about them ./test/hs/hbal -t $TESTDATA_DIR/hbal-split-insts.data -G group-01 -O node-01-001 -v >>> /Cluster score improved from .* to .*/ >>>2/Found instances belonging to multiple node groups:/ >>>= 0 ganeti-2.9.3/test/hs/shelltests/htools-text-backend.test0000644000000000000000000000152312244641676023352 0ustar00rootroot00000000000000# missing resources test ./test/hs/hbal -t $TESTDATA_DIR/missing-resources.data >>>2 /node node2 is missing .* ram and .* disk/ >>>= 0 ./test/hs/hinfo -t $TESTDATA_DIR/missing-resources.data >>>2 /node node2 is missing .* ram and .* disk/ >>>= 0 # common suffix test ./test/hs/hbal -t $TESTDATA_DIR/common-suffix.data -v -v >>>/Stripping common suffix of '\.example\.com' from names/ >>>= 0 ./test/hs/hinfo -t $TESTDATA_DIR/common-suffix.data -v -v >>>/Stripping common suffix of '\.example\.com' from names/ >>>= 0 # invalid node test ./test/hs/hbal -t $TESTDATA_DIR/invalid-node.data >>>2 /Unknown node '.*' for instance new-0/ >>>= !0 ./test/hs/hspace -t $TESTDATA_DIR/invalid-node.data >>>2 /Unknown node '.*' for instance new-0/ >>>= !0 ./test/hs/hinfo -t $TESTDATA_DIR/invalid-node.data >>>2 /Unknown node '.*' for instance new-0/ >>>= !0 ganeti-2.9.3/test/hs/shelltests/htools-basic.test0000644000000000000000000000136312244641676022064 0ustar00rootroot00000000000000# help/version tests ./test/hs/hail --version >>>= 0 ./test/hs/hail --help >>>= 0 ./test/hs/hail --help-completion >>>= 0 ./test/hs/hbal --version >>>= 0 ./test/hs/hbal --help >>>= 0 ./test/hs/hbal --help-completion >>>= 0 ./test/hs/hspace --version >>>= 0 ./test/hs/hspace --help >>>= 0 ./test/hs/hspace --help-completion >>>= 0 ./test/hs/hscan --version >>>= 0 ./test/hs/hscan --help >>>= 0 ./test/hs/hscan --help-completion >>>= 0 ./test/hs/hinfo --version >>>= 0 ./test/hs/hinfo --help >>>= 0 ./test/hs/hinfo --help-completion >>>= 0 ./test/hs/hcheck --version >>>= 0 ./test/hs/hcheck --help >>>= 0 ./test/hs/hcheck --help-completion >>>= 0 ./test/hs/hroller --version >>>= 0 ./test/hs/hroller --help >>>= 0 ./test/hs/hroller --help-completion >>>= 0 ganeti-2.9.3/test/hs/Test/0000755000000000000000000000000012271445544015312 5ustar00rootroot00000000000000ganeti-2.9.3/test/hs/Test/Ganeti/0000755000000000000000000000000012271445545016522 5ustar00rootroot00000000000000ganeti-2.9.3/test/hs/Test/Ganeti/Luxi.hs0000644000000000000000000001343512271422343017774 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Luxi (testLuxi) where import Test.HUnit import Test.QuickCheck import Test.QuickCheck.Monadic (monadicIO, run, stop) import Data.List import Control.Applicative import Control.Concurrent (forkIO) import Control.Exception (bracket) import System.Directory (getTemporaryDirectory, removeFile) import System.IO (hClose, openTempFile) import qualified Text.JSON as J import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.Query.Language (genFilter) import Test.Ganeti.OpCodes () import Ganeti.BasicTypes import qualified Ganeti.Luxi as Luxi {-# ANN module "HLint: ignore Use camelCase" #-} -- * Luxi tests $(genArbitrary ''Luxi.LuxiReq) instance Arbitrary Luxi.LuxiOp where arbitrary = do lreq <- arbitrary case lreq of Luxi.ReqQuery -> Luxi.Query <$> arbitrary <*> genFields <*> genFilter Luxi.ReqQueryFields -> Luxi.QueryFields <$> arbitrary <*> genFields Luxi.ReqQueryNodes -> Luxi.QueryNodes <$> listOf genFQDN <*> genFields <*> arbitrary Luxi.ReqQueryGroups -> Luxi.QueryGroups <$> arbitrary <*> arbitrary <*> arbitrary Luxi.ReqQueryNetworks -> Luxi.QueryNetworks <$> arbitrary <*> arbitrary <*> arbitrary Luxi.ReqQueryInstances -> Luxi.QueryInstances <$> listOf genFQDN <*> genFields <*> arbitrary Luxi.ReqQueryJobs -> Luxi.QueryJobs <$> arbitrary <*> genFields Luxi.ReqQueryExports -> Luxi.QueryExports <$> listOf genFQDN <*> arbitrary Luxi.ReqQueryConfigValues -> Luxi.QueryConfigValues <$> genFields Luxi.ReqQueryClusterInfo -> pure Luxi.QueryClusterInfo Luxi.ReqQueryTags -> Luxi.QueryTags <$> arbitrary Luxi.ReqSubmitJob -> Luxi.SubmitJob <$> resize maxOpCodes arbitrary Luxi.ReqSubmitManyJobs -> Luxi.SubmitManyJobs <$> resize maxOpCodes arbitrary Luxi.ReqWaitForJobChange -> Luxi.WaitForJobChange <$> arbitrary <*> genFields <*> pure J.JSNull <*> pure J.JSNull <*> arbitrary Luxi.ReqArchiveJob -> Luxi.ArchiveJob <$> arbitrary Luxi.ReqAutoArchiveJobs -> Luxi.AutoArchiveJobs <$> arbitrary <*> arbitrary Luxi.ReqCancelJob -> Luxi.CancelJob <$> arbitrary Luxi.ReqChangeJobPriority -> Luxi.ChangeJobPriority <$> arbitrary <*> arbitrary Luxi.ReqSetDrainFlag -> Luxi.SetDrainFlag <$> arbitrary Luxi.ReqSetWatcherPause -> Luxi.SetWatcherPause <$> arbitrary -- | Simple check that encoding/decoding of LuxiOp works. prop_CallEncoding :: Luxi.LuxiOp -> Property prop_CallEncoding op = (Luxi.validateCall (Luxi.buildCall op) >>= Luxi.decodeCall) ==? Ok op -- | Helper to a get a temporary file name. getTempFileName :: IO FilePath getTempFileName = do tempdir <- getTemporaryDirectory (fpath, handle) <- openTempFile tempdir "luxitest" _ <- hClose handle removeFile fpath return fpath -- | Server ping-pong helper. luxiServerPong :: Luxi.Client -> IO () luxiServerPong c = do msg <- Luxi.recvMsgExt c case msg of Luxi.RecvOk m -> Luxi.sendMsg c m >> luxiServerPong c _ -> return () -- | Client ping-pong helper. luxiClientPong :: Luxi.Client -> [String] -> IO [String] luxiClientPong c = mapM (\m -> Luxi.sendMsg c m >> Luxi.recvMsg c) -- | Monadic check that, given a server socket, we can connect via a -- client to it, and that we can send a list of arbitrary messages and -- get back what we sent. prop_ClientServer :: [[DNSChar]] -> Property prop_ClientServer dnschars = monadicIO $ do let msgs = map (map dnsGetChar) dnschars fpath <- run getTempFileName -- we need to create the server first, otherwise (if we do it in the -- forked thread) the client could try to connect to it before it's -- ready server <- run $ Luxi.getServer False fpath -- fork the server responder _ <- run . forkIO $ bracket (Luxi.acceptClient server) (\c -> Luxi.closeClient c >> Luxi.closeServer fpath server) luxiServerPong replies <- run $ bracket (Luxi.getClient fpath) Luxi.closeClient (`luxiClientPong` msgs) stop $ replies ==? msgs -- | Check that Python and Haskell define the same Luxi requests list. case_AllDefined :: Assertion case_AllDefined = do py_stdout <- runPython "from ganeti import luxi\n\ \print '\\n'.join(luxi.REQ_ALL)" "" >>= checkPythonResult let py_ops = sort $ lines py_stdout hs_ops = Luxi.allLuxiCalls extra_py = py_ops \\ hs_ops extra_hs = hs_ops \\ py_ops assertBool ("Luxi calls missing from Haskell code:\n" ++ unlines extra_py) (null extra_py) assertBool ("Extra Luxi calls in the Haskell code code:\n" ++ unlines extra_hs) (null extra_hs) testSuite "Luxi" [ 'prop_CallEncoding , 'prop_ClientServer , 'case_AllDefined ] ganeti-2.9.3/test/hs/Test/Ganeti/Common.hs0000644000000000000000000001546612271422343020311 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for the 'Ganeti.Common' module. -} {- Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Common ( testCommon , checkOpt , passFailOpt , checkEarlyExit ) where import Test.QuickCheck hiding (Result) import Test.HUnit import qualified System.Console.GetOpt as GetOpt import System.Exit import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Ganeti.BasicTypes import Ganeti.Common import Ganeti.HTools.Program.Main (personalities) {-# ANN module "HLint: ignore Use camelCase" #-} -- | Helper to check for correct parsing of an option. checkOpt :: (StandardOptions b) => (a -> Maybe String) -- ^ Converts the value into a cmdline form -> b -- ^ The default options -> (String -> c) -- ^ Fail test function -> (String -> d -> d -> c) -- ^ Check for equality function -> (a -> d) -- ^ Transforms the value to a compare val -> (a, GenericOptType b, b -> d) -- ^ Triple of value, the -- option, function to -- extract the set value -- from the options -> c checkOpt repr defaults failfn eqcheck valfn (val, opt@(GetOpt.Option _ longs _ _, _), fn) = case longs of [] -> failfn "no long options?" cmdarg:_ -> case parseOptsInner defaults ["--" ++ cmdarg ++ maybe "" ("=" ++) (repr val)] "prog" [opt] [] of Left e -> failfn $ "Failed to parse option '" ++ cmdarg ++ ": " ++ show e Right (options, _) -> eqcheck ("Wrong value in option " ++ cmdarg ++ "?") (valfn val) (fn options) -- | Helper to check for correct and incorrect parsing of an option. passFailOpt :: (StandardOptions b) => b -- ^ The default options -> (String -> c) -- ^ Fail test function -> c -- ^ Pass function -> (GenericOptType b, String, String) -- ^ The list of enabled options, fail value and pass value -> c passFailOpt defaults failfn passfn (opt@(GetOpt.Option _ longs _ _, _), bad, good) = let first_opt = case longs of [] -> error "no long options?" x:_ -> x prefix = "--" ++ first_opt ++ "=" good_cmd = prefix ++ good bad_cmd = prefix ++ bad in case (parseOptsInner defaults [bad_cmd] "prog" [opt] [], parseOptsInner defaults [good_cmd] "prog" [opt] []) of (Left _, Right _) -> passfn (Right _, Right _) -> failfn $ "Command line '" ++ bad_cmd ++ "' succeeded when it shouldn't" (Left _, Left _) -> failfn $ "Command line '" ++ good_cmd ++ "' failed when it shouldn't" (Right _, Left _) -> failfn $ "Command line '" ++ bad_cmd ++ "' succeeded when it shouldn't, while command line '" ++ good_cmd ++ "' failed when it shouldn't" -- | Helper to test that a given option is accepted OK with quick exit. checkEarlyExit :: (StandardOptions a) => a -> String -> [GenericOptType a] -> [ArgCompletion] -> Assertion checkEarlyExit defaults name options arguments = mapM_ (\param -> case parseOptsInner defaults [param] name options arguments of Left (code, _) -> assertEqual ("Program " ++ name ++ " returns invalid code " ++ show code ++ " for option " ++ param) ExitSuccess code _ -> assertFailure $ "Program " ++ name ++ " doesn't consider option " ++ param ++ " as early exit one" ) ["-h", "--help", "-V", "--version"] -- | Test parseYesNo. prop_parse_yes_no :: Bool -> Bool -> String -> Property prop_parse_yes_no def testval val = forAll (elements [val, "yes", "no"]) $ \actual_val -> if testval then parseYesNo def Nothing ==? Ok def else let result = parseYesNo def (Just actual_val) in if actual_val `elem` ["yes", "no"] then result ==? Ok (actual_val == "yes") else property $ isBad result -- | Check that formatCmdUsage works similar to Python _FormatUsage. case_formatCommands :: Assertion case_formatCommands = assertEqual "proper wrap for HTools Main" resCmdTest (formatCommands personalities) where resCmdTest :: [String] resCmdTest = [ " hail - Ganeti IAllocator plugin that implements the instance\ \ placement and" , " movement using the same algorithm as hbal(1)" , " harep - auto-repair tool that detects certain kind of problems\ \ with instances" , " and applies the allowed set of solutions" , " hbal - cluster balancer that looks at the current state of\ \ the cluster and" , " computes a series of steps designed to bring the\ \ cluster into a" , " better state" , " hcheck - cluster checker; prints information about cluster's\ \ health and checks" , " whether a rebalance done using hbal would help" , " hinfo - cluster information printer; it prints information\ \ about the current" , " cluster state and its residing nodes/instances" , " hroller - cluster rolling maintenance helper; it helps\ \ scheduling node reboots" , " in a manner that doesn't conflict with the instances'\ \ topology" , " hscan - tool for scanning clusters via RAPI and saving their\ \ data in the" , " input format used by hbal(1) and hspace(1)" , " hspace - computes how many additional instances can be fit on a\ \ cluster, while" , " maintaining N+1 status." ] testSuite "Common" [ 'prop_parse_yes_no , 'case_formatCommands ] ganeti-2.9.3/test/hs/Test/Ganeti/Rpc.hs0000644000000000000000000000753512271422343017603 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Rpc (testRpc) where import Test.QuickCheck import Test.QuickCheck.Monadic (monadicIO, run, stop) import Control.Applicative import qualified Data.Map as Map import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.Objects () import qualified Ganeti.Rpc as Rpc import qualified Ganeti.Objects as Objects import qualified Ganeti.Types as Types import qualified Ganeti.JSON as JSON import Ganeti.Types instance Arbitrary Rpc.RpcCallAllInstancesInfo where arbitrary = Rpc.RpcCallAllInstancesInfo <$> arbitrary instance Arbitrary Rpc.RpcCallInstanceList where arbitrary = Rpc.RpcCallInstanceList <$> arbitrary instance Arbitrary Rpc.RpcCallNodeInfo where arbitrary = Rpc.RpcCallNodeInfo <$> genStorageUnitMap <*> genHvSpecs genStorageUnit :: Gen StorageUnit genStorageUnit = do storage_type <- arbitrary storage_key <- genName storage_es <- arbitrary return $ addParamsToStorageUnit storage_es (SURaw storage_type storage_key) genStorageUnits :: Gen [StorageUnit] genStorageUnits = do num_storage_units <- choose (0, 5) vectorOf num_storage_units genStorageUnit genStorageUnitMap :: Gen (Map.Map String [StorageUnit]) genStorageUnitMap = do num_nodes <- choose (0,5) node_uuids <- vectorOf num_nodes genName storage_units_list <- vectorOf num_nodes genStorageUnits return $ Map.fromList (zip node_uuids storage_units_list) -- | Generate hypervisor specifications to be used for the NodeInfo call genHvSpecs :: Gen [ (Types.Hypervisor, Objects.HvParams) ] genHvSpecs = do numhv <- choose (0, 5) hvs <- vectorOf numhv arbitrary hvparams <- vectorOf numhv genHvParams let specs = zip hvs hvparams return specs -- FIXME: Generate more interesting hvparams -- | Generate Hvparams genHvParams :: Gen Objects.HvParams genHvParams = return $ JSON.GenericContainer Map.empty -- | Monadic check that, for an offline node and a call that does not -- offline nodes, we get a OfflineNodeError response. -- FIXME: We need a way of generalizing this, running it for -- every call manually will soon get problematic prop_noffl_request_allinstinfo :: Rpc.RpcCallAllInstancesInfo -> Property prop_noffl_request_allinstinfo call = forAll (arbitrary `suchThat` Objects.nodeOffline) $ \node -> monadicIO $ do res <- run $ Rpc.executeRpcCall [node] call stop $ res ==? [(node, Left Rpc.OfflineNodeError)] prop_noffl_request_instlist :: Rpc.RpcCallInstanceList -> Property prop_noffl_request_instlist call = forAll (arbitrary `suchThat` Objects.nodeOffline) $ \node -> monadicIO $ do res <- run $ Rpc.executeRpcCall [node] call stop $ res ==? [(node, Left Rpc.OfflineNodeError)] prop_noffl_request_nodeinfo :: Rpc.RpcCallNodeInfo -> Property prop_noffl_request_nodeinfo call = forAll (arbitrary `suchThat` Objects.nodeOffline) $ \node -> monadicIO $ do res <- run $ Rpc.executeRpcCall [node] call stop $ res ==? [(node, Left Rpc.OfflineNodeError)] testSuite "Rpc" [ 'prop_noffl_request_allinstinfo , 'prop_noffl_request_instlist , 'prop_noffl_request_nodeinfo ] ganeti-2.9.3/test/hs/Test/Ganeti/Objects.hs0000644000000000000000000005276212271422343020452 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell, TypeSynonymInstances, FlexibleInstances, OverloadedStrings #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Objects ( testObjects , Node(..) , genConfigDataWithNetworks , genDisk , genDiskWithChildren , genEmptyCluster , genInst , genInstWithNets , genValidNetwork , genBitStringMaxLen ) where import Test.QuickCheck import qualified Test.HUnit as HUnit import Control.Applicative import Control.Monad import Data.Char import qualified Data.List as List import qualified Data.Map as Map import Data.Maybe (fromMaybe) import qualified Data.Set as Set import GHC.Exts (IsString(..)) import qualified Text.JSON as J import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.Types () import qualified Ganeti.Constants as C import Ganeti.Network import Ganeti.Objects as Objects import Ganeti.JSON import Ganeti.Types -- * Arbitrary instances $(genArbitrary ''PartialNDParams) instance Arbitrary Node where arbitrary = Node <$> genFQDN <*> genFQDN <*> genFQDN <*> arbitrary <*> arbitrary <*> arbitrary <*> genFQDN <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> genFQDN <*> arbitrary <*> (Set.fromList <$> genTags) $(genArbitrary ''BlockDriver) $(genArbitrary ''DiskMode) instance Arbitrary DiskLogicalId where arbitrary = oneof [ LIDPlain <$> arbitrary <*> arbitrary , LIDDrbd8 <$> genFQDN <*> genFQDN <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary , LIDFile <$> arbitrary <*> arbitrary , LIDBlockDev <$> arbitrary <*> arbitrary , LIDRados <$> arbitrary <*> arbitrary ] -- | 'Disk' 'arbitrary' instance. Since we don't test disk hierarchy -- properties, we only generate disks with no children (FIXME), as -- generating recursive datastructures is a bit more work. instance Arbitrary Disk where arbitrary = Disk <$> arbitrary <*> pure [] <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary -- FIXME: we should generate proper values, >=0, etc., but this is -- hard for partial ones, where all must be wrapped in a 'Maybe' $(genArbitrary ''PartialBeParams) $(genArbitrary ''AdminState) $(genArbitrary ''PartialNicParams) $(genArbitrary ''PartialNic) instance Arbitrary Instance where arbitrary = Instance -- name <$> genFQDN -- primary node <*> genFQDN -- OS <*> genFQDN -- hypervisor <*> arbitrary -- hvparams -- FIXME: add non-empty hvparams when they're a proper type <*> pure (GenericContainer Map.empty) -- beparams <*> arbitrary -- osparams <*> pure (GenericContainer Map.empty) -- admin_state <*> arbitrary -- nics <*> arbitrary -- disks <*> vectorOf 5 genDisk -- disk template <*> arbitrary -- disks active <*> arbitrary -- network port <*> arbitrary -- ts <*> arbitrary <*> arbitrary -- uuid <*> arbitrary -- serial <*> arbitrary -- tags <*> (Set.fromList <$> genTags) -- | Generates an instance that is connected to the given networks -- and possibly some other networks genInstWithNets :: [String] -> Gen Instance genInstWithNets nets = do plain_inst <- arbitrary enhanceInstWithNets plain_inst nets -- | Generates an instance that is connected to some networks genInst :: Gen Instance genInst = genInstWithNets [] -- | Enhances a given instance with network information, by connecting it to the -- given networks and possibly some other networks enhanceInstWithNets :: Instance -> [String] -> Gen Instance enhanceInstWithNets inst nets = do mac <- arbitrary ip <- arbitrary nicparams <- arbitrary name <- arbitrary uuid <- arbitrary -- generate some more networks than the given ones num_more_nets <- choose (0,3) more_nets <- vectorOf num_more_nets genName let genNic net = PartialNic mac ip nicparams net name uuid partial_nics = map (genNic . Just) (List.nub (nets ++ more_nets)) new_inst = inst { instNics = partial_nics } return new_inst genDiskWithChildren :: Int -> Gen Disk genDiskWithChildren num_children = do logicalid <- arbitrary children <- vectorOf num_children (genDiskWithChildren 0) ivname <- genName size <- arbitrary mode <- arbitrary name <- genMaybe genName spindles <- arbitrary uuid <- genName let disk = Disk logicalid children ivname size mode name spindles uuid return disk genDisk :: Gen Disk genDisk = genDiskWithChildren 3 -- | FIXME: This generates completely random data, without normal -- validation rules. $(genArbitrary ''PartialISpecParams) -- | FIXME: This generates completely random data, without normal -- validation rules. $(genArbitrary ''PartialIPolicy) $(genArbitrary ''FilledISpecParams) $(genArbitrary ''MinMaxISpecs) $(genArbitrary ''FilledIPolicy) $(genArbitrary ''IpFamily) $(genArbitrary ''FilledNDParams) $(genArbitrary ''FilledNicParams) $(genArbitrary ''FilledBeParams) -- | No real arbitrary instance for 'ClusterHvParams' yet. instance Arbitrary ClusterHvParams where arbitrary = return $ GenericContainer Map.empty -- | No real arbitrary instance for 'OsHvParams' yet. instance Arbitrary OsHvParams where arbitrary = return $ GenericContainer Map.empty instance Arbitrary ClusterNicParams where arbitrary = (GenericContainer . Map.singleton C.ppDefault) <$> arbitrary instance Arbitrary OsParams where arbitrary = (GenericContainer . Map.fromList) <$> arbitrary instance Arbitrary ClusterOsParams where arbitrary = (GenericContainer . Map.fromList) <$> arbitrary instance Arbitrary ClusterBeParams where arbitrary = (GenericContainer . Map.fromList) <$> arbitrary instance Arbitrary TagSet where arbitrary = Set.fromList <$> genTags $(genArbitrary ''Cluster) instance Arbitrary Network where arbitrary = genValidNetwork -- | Generates a network instance with minimum netmasks of /24. Generating -- bigger networks slows down the tests, because long bit strings are generated -- for the reservations. genValidNetwork :: Gen Objects.Network genValidNetwork = do -- generate netmask for the IPv4 network netmask <- fromIntegral <$> choose (24::Int, 30) name <- genName >>= mkNonEmpty mac_prefix <- genMaybe genName net <- arbitrary net6 <- genMaybe genIp6Net gateway <- genMaybe arbitrary gateway6 <- genMaybe genIp6Addr res <- liftM Just (genBitString $ netmask2NumHosts netmask) ext_res <- liftM Just (genBitString $ netmask2NumHosts netmask) uuid <- arbitrary ctime <- arbitrary mtime <- arbitrary let n = Network name mac_prefix (Ip4Network net netmask) net6 gateway gateway6 res ext_res uuid ctime mtime 0 Set.empty return n -- | Generate an arbitrary string consisting of '0' and '1' of the given length. genBitString :: Int -> Gen String genBitString len = vectorOf len (elements "01") -- | Generate an arbitrary string consisting of '0' and '1' of the maximum given -- length. genBitStringMaxLen :: Int -> Gen String genBitStringMaxLen maxLen = choose (0, maxLen) >>= genBitString -- | Generator for config data with an empty cluster (no instances), -- with N defined nodes. genEmptyCluster :: Int -> Gen ConfigData genEmptyCluster ncount = do nodes <- vector ncount version <- arbitrary grp <- arbitrary let guuid = groupUuid grp nodes' = zipWith (\n idx -> let newname = nodeName n ++ "-" ++ show idx in (newname, n { nodeGroup = guuid, nodeName = newname})) nodes [(1::Int)..] nodemap = Map.fromList nodes' contnodes = if Map.size nodemap /= ncount then error ("Inconsistent node map, duplicates in" ++ " node name list? Names: " ++ show (map fst nodes')) else GenericContainer nodemap continsts = GenericContainer Map.empty networks = GenericContainer Map.empty let contgroups = GenericContainer $ Map.singleton guuid grp serial <- arbitrary cluster <- resize 8 arbitrary let c = ConfigData version cluster contnodes contgroups continsts networks serial return c -- | FIXME: make an even simpler base version of creating a cluster. -- | Generates config data with a couple of networks. genConfigDataWithNetworks :: ConfigData -> Gen ConfigData genConfigDataWithNetworks old_cfg = do num_nets <- choose (0, 3) -- generate a list of network names (no duplicates) net_names <- genUniquesList num_nets genName >>= mapM mkNonEmpty -- generate a random list of networks (possibly with duplicate names) nets <- vectorOf num_nets genValidNetwork -- use unique names for the networks let nets_unique = map ( \(name, net) -> net { networkName = name } ) (zip net_names nets) net_map = GenericContainer $ Map.fromList (map (\n -> (networkUuid n, n)) nets_unique) new_cfg = old_cfg { configNetworks = net_map } return new_cfg -- * Test properties -- | Tests that fillDict behaves correctly prop_fillDict :: [(Int, Int)] -> [(Int, Int)] -> Property prop_fillDict defaults custom = let d_map = Map.fromList defaults d_keys = map fst defaults c_map = Map.fromList custom c_keys = map fst custom in conjoin [ printTestCase "Empty custom filling" (fillDict d_map Map.empty [] == d_map) , printTestCase "Empty defaults filling" (fillDict Map.empty c_map [] == c_map) , printTestCase "Delete all keys" (fillDict d_map c_map (d_keys++c_keys) == Map.empty) ] -- | Test that the serialisation of 'DiskLogicalId', which is -- implemented manually, is idempotent. Since we don't have a -- standalone JSON instance for DiskLogicalId (it's a data type that -- expands over two fields in a JSObject), we test this by actially -- testing entire Disk serialisations. So this tests two things at -- once, basically. prop_Disk_serialisation :: Disk -> Property prop_Disk_serialisation = testSerialisation -- | Check that node serialisation is idempotent. prop_Node_serialisation :: Node -> Property prop_Node_serialisation = testSerialisation -- | Check that instance serialisation is idempotent. prop_Inst_serialisation :: Instance -> Property prop_Inst_serialisation = testSerialisation -- | Check that network serialisation is idempotent. prop_Network_serialisation :: Network -> Property prop_Network_serialisation = testSerialisation -- | Check config serialisation. prop_Config_serialisation :: Property prop_Config_serialisation = forAll (choose (0, maxNodes `div` 4) >>= genEmptyCluster) testSerialisation -- | Custom HUnit test to check the correspondence between Haskell-generated -- networks and their Python decoded, validated and re-encoded version. -- For the technical background of this unit test, check the documentation -- of "case_py_compat_types" of test/hs/Test/Ganeti/Opcodes.hs casePyCompatNetworks :: HUnit.Assertion casePyCompatNetworks = do let num_networks = 500::Int networks <- genSample (vectorOf num_networks genValidNetwork) let networks_with_properties = map getNetworkProperties networks serialized = J.encode networks -- check for non-ASCII fields, usually due to 'arbitrary :: String' mapM_ (\net -> when (any (not . isAscii) (J.encode net)) . HUnit.assertFailure $ "Network has non-ASCII fields: " ++ show net ) networks py_stdout <- runPython "from ganeti import network\n\ \from ganeti import objects\n\ \from ganeti import serializer\n\ \import sys\n\ \net_data = serializer.Load(sys.stdin.read())\n\ \decoded = [objects.Network.FromDict(n) for n in net_data]\n\ \encoded = []\n\ \for net in decoded:\n\ \ a = network.AddressPool(net)\n\ \ encoded.append((a.GetFreeCount(), a.GetReservedCount(), \\\n\ \ net.ToDict()))\n\ \print serializer.Dump(encoded)" serialized >>= checkPythonResult let deserialised = J.decode py_stdout::J.Result [(Int, Int, Network)] decoded <- case deserialised of J.Ok ops -> return ops J.Error msg -> HUnit.assertFailure ("Unable to decode networks: " ++ msg) -- this already raised an expection, but we need it -- for proper types >> fail "Unable to decode networks" HUnit.assertEqual "Mismatch in number of returned networks" (length decoded) (length networks_with_properties) mapM_ (uncurry (HUnit.assertEqual "Different result after encoding/decoding") ) $ zip decoded networks_with_properties -- | Creates a tuple of the given network combined with some of its properties -- to be compared against the same properties generated by the python code. getNetworkProperties :: Network -> (Int, Int, Network) getNetworkProperties net = let maybePool = createAddressPool net in case maybePool of (Just pool) -> (getFreeCount pool, getReservedCount pool, net) Nothing -> (-1, -1, net) -- | Tests the compatibility between Haskell-serialized node groups and their -- python-decoded and encoded version. casePyCompatNodegroups :: HUnit.Assertion casePyCompatNodegroups = do let num_groups = 500::Int groups <- genSample (vectorOf num_groups genNodeGroup) let serialized = J.encode groups -- check for non-ASCII fields, usually due to 'arbitrary :: String' mapM_ (\group -> when (any (not . isAscii) (J.encode group)) . HUnit.assertFailure $ "Node group has non-ASCII fields: " ++ show group ) groups py_stdout <- runPython "from ganeti import objects\n\ \from ganeti import serializer\n\ \import sys\n\ \group_data = serializer.Load(sys.stdin.read())\n\ \decoded = [objects.NodeGroup.FromDict(g) for g in group_data]\n\ \encoded = [g.ToDict() for g in decoded]\n\ \print serializer.Dump(encoded)" serialized >>= checkPythonResult let deserialised = J.decode py_stdout::J.Result [NodeGroup] decoded <- case deserialised of J.Ok ops -> return ops J.Error msg -> HUnit.assertFailure ("Unable to decode node groups: " ++ msg) -- this already raised an expection, but we need it -- for proper types >> fail "Unable to decode node groups" HUnit.assertEqual "Mismatch in number of returned node groups" (length decoded) (length groups) mapM_ (uncurry (HUnit.assertEqual "Different result after encoding/decoding") ) $ zip decoded groups -- | Generates a node group with up to 3 networks. -- | FIXME: This generates still somewhat completely random data, without normal -- validation rules. genNodeGroup :: Gen NodeGroup genNodeGroup = do name <- genFQDN members <- pure [] ndparams <- arbitrary alloc_policy <- arbitrary ipolicy <- arbitrary diskparams <- pure (GenericContainer Map.empty) num_networks <- choose (0, 3) net_uuid_list <- vectorOf num_networks (arbitrary::Gen String) nic_param_list <- vectorOf num_networks (arbitrary::Gen PartialNicParams) net_map <- pure (GenericContainer . Map.fromList $ zip net_uuid_list nic_param_list) -- timestamp fields ctime <- arbitrary mtime <- arbitrary uuid <- genFQDN `suchThat` (/= name) serial <- arbitrary tags <- Set.fromList <$> genTags let group = NodeGroup name members ndparams alloc_policy ipolicy diskparams net_map ctime mtime uuid serial tags return group instance Arbitrary NodeGroup where arbitrary = genNodeGroup $(genArbitrary ''Ip4Address) $(genArbitrary ''Ip4Network) -- | Helper to compute absolute value of an IPv4 address. ip4AddrValue :: Ip4Address -> Integer ip4AddrValue (Ip4Address a b c d) = fromIntegral a * (2^(24::Integer)) + fromIntegral b * (2^(16::Integer)) + fromIntegral c * (2^(8::Integer)) + fromIntegral d -- | Tests that any difference between IPv4 consecutive addresses is 1. prop_nextIp4Address :: Ip4Address -> Property prop_nextIp4Address ip4 = ip4AddrValue (nextIp4Address ip4) ==? ip4AddrValue ip4 + 1 -- | IsString instance for 'Ip4Address', to help write the tests. instance IsString Ip4Address where fromString s = fromMaybe (error $ "Failed to parse address from " ++ s) (readIp4Address s) -- | Tests a few simple cases of IPv4 next address. caseNextIp4Address :: HUnit.Assertion caseNextIp4Address = do HUnit.assertEqual "" "0.0.0.1" $ nextIp4Address "0.0.0.0" HUnit.assertEqual "" "0.0.0.0" $ nextIp4Address "255.255.255.255" HUnit.assertEqual "" "1.2.3.5" $ nextIp4Address "1.2.3.4" HUnit.assertEqual "" "1.3.0.0" $ nextIp4Address "1.2.255.255" HUnit.assertEqual "" "1.2.255.63" $ nextIp4Address "1.2.255.62" -- | Tests the compatibility between Haskell-serialized instances and their -- python-decoded and encoded version. -- Note: this can be enhanced with logical validations on the decoded objects casePyCompatInstances :: HUnit.Assertion casePyCompatInstances = do let num_inst = 500::Int instances <- genSample (vectorOf num_inst genInst) let serialized = J.encode instances -- check for non-ASCII fields, usually due to 'arbitrary :: String' mapM_ (\inst -> when (any (not . isAscii) (J.encode inst)) . HUnit.assertFailure $ "Instance has non-ASCII fields: " ++ show inst ) instances py_stdout <- runPython "from ganeti import objects\n\ \from ganeti import serializer\n\ \import sys\n\ \inst_data = serializer.Load(sys.stdin.read())\n\ \decoded = [objects.Instance.FromDict(i) for i in inst_data]\n\ \encoded = [i.ToDict() for i in decoded]\n\ \print serializer.Dump(encoded)" serialized >>= checkPythonResult let deserialised = J.decode py_stdout::J.Result [Instance] decoded <- case deserialised of J.Ok ops -> return ops J.Error msg -> HUnit.assertFailure ("Unable to decode instance: " ++ msg) -- this already raised an expection, but we need it -- for proper types >> fail "Unable to decode instances" HUnit.assertEqual "Mismatch in number of returned instances" (length decoded) (length instances) mapM_ (uncurry (HUnit.assertEqual "Different result after encoding/decoding") ) $ zip decoded instances -- | Tests that the logical ID is correctly found in a plain disk caseIncludeLogicalIdPlain :: HUnit.Assertion caseIncludeLogicalIdPlain = let vg_name = "xenvg" :: String lv_name = "1234sdf-qwef-2134-asff-asd2-23145d.data" :: String d = Disk (LIDPlain vg_name lv_name) [] "diskname" 1000 DiskRdWr Nothing Nothing "asdfgr-1234-5123-daf3-sdfw-134f43" in HUnit.assertBool "Unable to detect that plain Disk includes logical ID" $ includesLogicalId vg_name lv_name d -- | Tests that the logical ID is correctly found in a DRBD disk caseIncludeLogicalIdDrbd :: HUnit.Assertion caseIncludeLogicalIdDrbd = let vg_name = "xenvg" :: String lv_name = "1234sdf-qwef-2134-asff-asd2-23145d.data" :: String d = Disk (LIDDrbd8 "node1.example.com" "node2.example.com" 2000 1 5 "secret") [ Disk (LIDPlain "onevg" "onelv") [] "disk1" 1000 DiskRdWr Nothing Nothing "145145-asdf-sdf2-2134-asfd-534g2x" , Disk (LIDPlain vg_name lv_name) [] "disk2" 1000 DiskRdWr Nothing Nothing "6gd3sd-423f-ag2j-563b-dg34-gj3fse" ] "diskname" 1000 DiskRdWr Nothing Nothing "asdfgr-1234-5123-daf3-sdfw-134f43" in HUnit.assertBool "Unable to detect that plain Disk includes logical ID" $ includesLogicalId vg_name lv_name d -- | Tests that the logical ID is correctly NOT found in a plain disk caseNotIncludeLogicalIdPlain :: HUnit.Assertion caseNotIncludeLogicalIdPlain = let vg_name = "xenvg" :: String lv_name = "1234sdf-qwef-2134-asff-asd2-23145d.data" :: String d = Disk (LIDPlain "othervg" "otherlv") [] "diskname" 1000 DiskRdWr Nothing Nothing "asdfgr-1234-5123-daf3-sdfw-134f43" in HUnit.assertBool "Unable to detect that plain Disk includes logical ID" $ not (includesLogicalId vg_name lv_name d) testSuite "Objects" [ 'prop_fillDict , 'prop_Disk_serialisation , 'prop_Inst_serialisation , 'prop_Network_serialisation , 'prop_Node_serialisation , 'prop_Config_serialisation , 'casePyCompatNetworks , 'casePyCompatNodegroups , 'casePyCompatInstances , 'prop_nextIp4Address , 'caseNextIp4Address , 'caseIncludeLogicalIdPlain , 'caseIncludeLogicalIdDrbd , 'caseNotIncludeLogicalIdPlain ] ganeti-2.9.3/test/hs/Test/Ganeti/TestHTools.hs0000644000000000000000000001166612267470014021132 0ustar00rootroot00000000000000{-# OPTIONS_GHC -fno-warn-orphans #-} {-| Common functionality for htools-related unittests. -} {- Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.TestHTools ( nullIPolicy , defGroup , defGroupList , defGroupAssoc , createInstance , makeSmallCluster , setInstanceSmallerThanNode ) where import qualified Data.Map as Map import Test.Ganeti.TestCommon import qualified Ganeti.Constants as C import qualified Ganeti.HTools.Container as Container import qualified Ganeti.HTools.Group as Group import qualified Ganeti.HTools.Instance as Instance import qualified Ganeti.HTools.Loader as Loader import qualified Ganeti.HTools.Node as Node import qualified Ganeti.HTools.Types as Types -- * Helpers -- | Null iPolicy, and by null we mean very liberal. nullIPolicy :: Types.IPolicy nullIPolicy = Types.IPolicy { Types.iPolicyMinMaxISpecs = [Types.MinMaxISpecs { Types.minMaxISpecsMinSpec = Types.ISpec { Types.iSpecMemorySize = 0 , Types.iSpecCpuCount = 0 , Types.iSpecDiskSize = 0 , Types.iSpecDiskCount = 0 , Types.iSpecNicCount = 0 , Types.iSpecSpindleUse = 0 } , Types.minMaxISpecsMaxSpec = Types.ISpec { Types.iSpecMemorySize = maxBound , Types.iSpecCpuCount = maxBound , Types.iSpecDiskSize = maxBound , Types.iSpecDiskCount = C.maxDisks , Types.iSpecNicCount = C.maxNics , Types.iSpecSpindleUse = maxBound } }] , Types.iPolicyStdSpec = Types.ISpec { Types.iSpecMemorySize = Types.unitMem , Types.iSpecCpuCount = Types.unitCpu , Types.iSpecDiskSize = Types.unitDsk , Types.iSpecDiskCount = 1 , Types.iSpecNicCount = 1 , Types.iSpecSpindleUse = 1 } , Types.iPolicyDiskTemplates = [minBound..maxBound] , Types.iPolicyVcpuRatio = maxVcpuRatio -- somewhat random value, high -- enough to not impact us , Types.iPolicySpindleRatio = maxSpindleRatio } -- | Default group definition. defGroup :: Group.Group defGroup = flip Group.setIdx 0 $ Group.create "default" Types.defaultGroupID Types.AllocPreferred [] nullIPolicy [] -- | Default group, as a (singleton) 'Group.List'. defGroupList :: Group.List defGroupList = Container.fromList [(Group.idx defGroup, defGroup)] -- | Default group, as a string map. defGroupAssoc :: Map.Map String Types.Gdx defGroupAssoc = Map.singleton (Group.uuid defGroup) (Group.idx defGroup) -- | Create an instance given its spec. createInstance :: Int -> Int -> Int -> Instance.Instance createInstance mem dsk vcpus = Instance.create "inst-unnamed" mem dsk [Instance.Disk dsk Nothing] vcpus Types.Running [] True (-1) (-1) Types.DTDrbd8 1 [] -- | Create a small cluster by repeating a node spec. makeSmallCluster :: Node.Node -> Int -> Node.List makeSmallCluster node count = let origname = Node.name node origalias = Node.alias node nodes = map (\idx -> node { Node.name = origname ++ "-" ++ show idx , Node.alias = origalias ++ "-" ++ show idx }) [1..count] fn = flip Node.buildPeers Container.empty namelst = map (\n -> (Node.name n, fn n)) nodes (_, nlst) = Loader.assignIndices namelst in nlst -- | Update an instance to be smaller than a node. setInstanceSmallerThanNode :: Node.Node -> Instance.Instance -> Instance.Instance setInstanceSmallerThanNode node inst = let new_dsk = Node.availDisk node `div` 2 in inst { Instance.mem = Node.availMem node `div` 2 , Instance.dsk = new_dsk , Instance.vcpus = Node.availCpu node `div` 2 , Instance.disks = [Instance.Disk new_dsk (if Node.exclStorage node then Just $ Node.fSpindles node `div` 2 else Nothing)] } ganeti-2.9.3/test/hs/Test/Ganeti/Types.hs0000644000000000000000000003477312271422343020167 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for 'Ganeti.Types'. -} {- Copyright (C) 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Types ( testTypes , AllocPolicy(..) , DiskTemplate(..) , allDiskTemplates , InstanceStatus(..) , NonEmpty(..) , Hypervisor(..) , JobId(..) ) where import Data.List (sort) import Test.QuickCheck as QuickCheck hiding (Result) import Test.HUnit import qualified Text.JSON as J import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Ganeti.BasicTypes import qualified Ganeti.Constants as C import Ganeti.Types as Types import Ganeti.JSON {-# ANN module "HLint: ignore Use camelCase" #-} -- * Arbitrary instance instance (Arbitrary a, Ord a, Num a, Show a) => Arbitrary (Types.Positive a) where arbitrary = do (QuickCheck.Positive i) <- arbitrary Types.mkPositive i instance (Arbitrary a, Ord a, Num a, Show a) => Arbitrary (Types.NonNegative a) where arbitrary = do (QuickCheck.NonNegative i) <- arbitrary Types.mkNonNegative i instance (Arbitrary a, Ord a, Num a, Show a) => Arbitrary (Types.Negative a) where arbitrary = do (QuickCheck.Positive i) <- arbitrary Types.mkNegative $ negate i instance (Arbitrary a) => Arbitrary (Types.NonEmpty a) where arbitrary = do QuickCheck.NonEmpty lst <- arbitrary Types.mkNonEmpty lst $(genArbitrary ''AllocPolicy) -- | Valid disk templates (depending on configure options). allDiskTemplates :: [DiskTemplate] allDiskTemplates = [minBound..maxBound]::[DiskTemplate] -- | Custom 'Arbitrary' instance for 'DiskTemplate', which needs to -- handle the case of file storage being disabled at configure time. instance Arbitrary DiskTemplate where arbitrary = elements allDiskTemplates $(genArbitrary ''InstanceStatus) $(genArbitrary ''MigrationMode) $(genArbitrary ''VerifyOptionalChecks) $(genArbitrary ''DdmSimple) $(genArbitrary ''DdmFull) $(genArbitrary ''CVErrorCode) $(genArbitrary ''Hypervisor) $(genArbitrary ''OobCommand) -- | Valid storage types. allStorageTypes :: [StorageType] allStorageTypes = [minBound..maxBound]::[StorageType] -- | Custom 'Arbitrary' instance for 'StorageType', which needs to -- handle the case of file storage being disabled at configure time. instance Arbitrary StorageType where arbitrary = elements allStorageTypes $(genArbitrary ''NodeEvacMode) $(genArbitrary ''FileDriver) $(genArbitrary ''InstCreateMode) $(genArbitrary ''RebootType) $(genArbitrary ''ExportMode) $(genArbitrary ''IAllocatorTestDir) $(genArbitrary ''IAllocatorMode) $(genArbitrary ''NICMode) $(genArbitrary ''JobStatus) $(genArbitrary ''FinalizedJobStatus) instance Arbitrary JobId where arbitrary = do (Positive i) <- arbitrary makeJobId i $(genArbitrary ''JobIdDep) $(genArbitrary ''JobDependency) $(genArbitrary ''OpSubmitPriority) $(genArbitrary ''OpStatus) $(genArbitrary ''ELogType) -- * Properties prop_AllocPolicy_serialisation :: AllocPolicy -> Property prop_AllocPolicy_serialisation = testSerialisation -- | Test 'AllocPolicy' ordering is as expected. case_AllocPolicy_order :: Assertion case_AllocPolicy_order = assertEqual "sort order" [ Types.AllocPreferred , Types.AllocLastResort , Types.AllocUnallocable ] [minBound..maxBound] prop_DiskTemplate_serialisation :: DiskTemplate -> Property prop_DiskTemplate_serialisation = testSerialisation prop_InstanceStatus_serialisation :: InstanceStatus -> Property prop_InstanceStatus_serialisation = testSerialisation -- | Tests building non-negative numbers. prop_NonNeg_pass :: QuickCheck.NonNegative Int -> Property prop_NonNeg_pass (QuickCheck.NonNegative i) = case mkNonNegative i of Bad msg -> failTest $ "Fail to build non-negative: " ++ msg Ok nn -> fromNonNegative nn ==? i -- | Tests building non-negative numbers. prop_NonNeg_fail :: QuickCheck.Positive Int -> Property prop_NonNeg_fail (QuickCheck.Positive i) = case mkNonNegative (negate i)::Result (Types.NonNegative Int) of Bad _ -> passTest Ok nn -> failTest $ "Built non-negative number '" ++ show nn ++ "' from negative value " ++ show i -- | Tests building positive numbers. prop_Positive_pass :: QuickCheck.Positive Int -> Property prop_Positive_pass (QuickCheck.Positive i) = case mkPositive i of Bad msg -> failTest $ "Fail to build positive: " ++ msg Ok nn -> fromPositive nn ==? i -- | Tests building positive numbers. prop_Positive_fail :: QuickCheck.NonNegative Int -> Property prop_Positive_fail (QuickCheck.NonNegative i) = case mkPositive (negate i)::Result (Types.Positive Int) of Bad _ -> passTest Ok nn -> failTest $ "Built positive number '" ++ show nn ++ "' from negative or zero value " ++ show i -- | Tests building negative numbers. prop_Neg_pass :: QuickCheck.Positive Int -> Property prop_Neg_pass (QuickCheck.Positive i) = case mkNegative i' of Bad msg -> failTest $ "Fail to build negative: " ++ msg Ok nn -> fromNegative nn ==? i' where i' = negate i -- | Tests building negative numbers. prop_Neg_fail :: QuickCheck.NonNegative Int -> Property prop_Neg_fail (QuickCheck.NonNegative i) = case mkNegative i::Result (Types.Negative Int) of Bad _ -> passTest Ok nn -> failTest $ "Built negative number '" ++ show nn ++ "' from non-negative value " ++ show i -- | Tests building non-empty lists. prop_NonEmpty_pass :: QuickCheck.NonEmptyList String -> Property prop_NonEmpty_pass (QuickCheck.NonEmpty xs) = case mkNonEmpty xs of Bad msg -> failTest $ "Fail to build non-empty list: " ++ msg Ok nn -> fromNonEmpty nn ==? xs -- | Tests building positive numbers. case_NonEmpty_fail :: Assertion case_NonEmpty_fail = assertEqual "building non-empty list from an empty list" (Bad "Received empty value for non-empty list") (mkNonEmpty ([]::[Int])) -- | Tests migration mode serialisation. prop_MigrationMode_serialisation :: MigrationMode -> Property prop_MigrationMode_serialisation = testSerialisation -- | Tests verify optional checks serialisation. prop_VerifyOptionalChecks_serialisation :: VerifyOptionalChecks -> Property prop_VerifyOptionalChecks_serialisation = testSerialisation -- | Tests 'DdmSimple' serialisation. prop_DdmSimple_serialisation :: DdmSimple -> Property prop_DdmSimple_serialisation = testSerialisation -- | Tests 'DdmFull' serialisation. prop_DdmFull_serialisation :: DdmFull -> Property prop_DdmFull_serialisation = testSerialisation -- | Tests 'CVErrorCode' serialisation. prop_CVErrorCode_serialisation :: CVErrorCode -> Property prop_CVErrorCode_serialisation = testSerialisation -- | Tests equivalence with Python, based on Constants.hs code. case_CVErrorCode_pyequiv :: Assertion case_CVErrorCode_pyequiv = do let all_py_codes = sort C.cvAllEcodesStrings all_hs_codes = sort $ map Types.cVErrorCodeToRaw [minBound..maxBound] assertEqual "for CVErrorCode equivalence" all_py_codes all_hs_codes -- | Test 'Hypervisor' serialisation. prop_Hypervisor_serialisation :: Hypervisor -> Property prop_Hypervisor_serialisation = testSerialisation -- | Test 'OobCommand' serialisation. prop_OobCommand_serialisation :: OobCommand -> Property prop_OobCommand_serialisation = testSerialisation -- | Test 'StorageType' serialisation. prop_StorageType_serialisation :: StorageType -> Property prop_StorageType_serialisation = testSerialisation -- | Test 'NodeEvacMode' serialisation. prop_NodeEvacMode_serialisation :: NodeEvacMode -> Property prop_NodeEvacMode_serialisation = testSerialisation -- | Test 'FileDriver' serialisation. prop_FileDriver_serialisation :: FileDriver -> Property prop_FileDriver_serialisation = testSerialisation -- | Test 'InstCreate' serialisation. prop_InstCreateMode_serialisation :: InstCreateMode -> Property prop_InstCreateMode_serialisation = testSerialisation -- | Test 'RebootType' serialisation. prop_RebootType_serialisation :: RebootType -> Property prop_RebootType_serialisation = testSerialisation -- | Test 'ExportMode' serialisation. prop_ExportMode_serialisation :: ExportMode -> Property prop_ExportMode_serialisation = testSerialisation -- | Test 'IAllocatorTestDir' serialisation. prop_IAllocatorTestDir_serialisation :: IAllocatorTestDir -> Property prop_IAllocatorTestDir_serialisation = testSerialisation -- | Test 'IAllocatorMode' serialisation. prop_IAllocatorMode_serialisation :: IAllocatorMode -> Property prop_IAllocatorMode_serialisation = testSerialisation -- | Tests equivalence with Python, based on Constants.hs code. case_IAllocatorMode_pyequiv :: Assertion case_IAllocatorMode_pyequiv = do let all_py_codes = sort C.validIallocatorModes all_hs_codes = sort $ map Types.iAllocatorModeToRaw [minBound..maxBound] assertEqual "for IAllocatorMode equivalence" all_py_codes all_hs_codes -- | Test 'NICMode' serialisation. prop_NICMode_serialisation :: NICMode -> Property prop_NICMode_serialisation = testSerialisation -- | Test 'OpStatus' serialisation. prop_OpStatus_serialization :: OpStatus -> Property prop_OpStatus_serialization = testSerialisation -- | Test 'JobStatus' serialisation. prop_JobStatus_serialization :: JobStatus -> Property prop_JobStatus_serialization = testSerialisation -- | Test 'JobStatus' ordering is as expected. case_JobStatus_order :: Assertion case_JobStatus_order = assertEqual "sort order" [ Types.JOB_STATUS_QUEUED , Types.JOB_STATUS_WAITING , Types.JOB_STATUS_CANCELING , Types.JOB_STATUS_RUNNING , Types.JOB_STATUS_CANCELED , Types.JOB_STATUS_SUCCESS , Types.JOB_STATUS_ERROR ] [minBound..maxBound] -- | Tests equivalence with Python, based on Constants.hs code. case_NICMode_pyequiv :: Assertion case_NICMode_pyequiv = do let all_py_codes = sort C.nicValidModes all_hs_codes = sort $ map Types.nICModeToRaw [minBound..maxBound] assertEqual "for NICMode equivalence" all_py_codes all_hs_codes -- | Test 'FinalizedJobStatus' serialisation. prop_FinalizedJobStatus_serialisation :: FinalizedJobStatus -> Property prop_FinalizedJobStatus_serialisation = testSerialisation -- | Tests equivalence with Python, based on Constants.hs code. case_FinalizedJobStatus_pyequiv :: Assertion case_FinalizedJobStatus_pyequiv = do let all_py_codes = sort C.jobsFinalized all_hs_codes = sort $ map Types.finalizedJobStatusToRaw [minBound..maxBound] assertEqual "for FinalizedJobStatus equivalence" all_py_codes all_hs_codes -- | Tests JobId serialisation (both from string and ints). prop_JobId_serialisation :: JobId -> Property prop_JobId_serialisation jid = conjoin [ testSerialisation jid , (J.readJSON . J.showJSON . show $ fromJobId jid) ==? J.Ok jid , case (fromJVal . J.showJSON . negate $ fromJobId jid)::Result JobId of Bad _ -> passTest Ok jid' -> failTest $ "Parsed negative job id as id " ++ show (fromJobId jid') ] -- | Tests that fractional job IDs are not accepted. prop_JobId_fractional :: Property prop_JobId_fractional = forAll (arbitrary `suchThat` (\d -> fromIntegral (truncate d::Int) /= d)) $ \d -> case J.readJSON (J.showJSON (d::Double)) of J.Error _ -> passTest J.Ok jid -> failTest $ "Parsed fractional value " ++ show d ++ " as job id " ++ show (fromJobId jid) -- | Tests that a job ID is not parseable from \"bad\" JSON values. case_JobId_BadTypes :: Assertion case_JobId_BadTypes = do let helper jsval = case J.readJSON jsval of J.Error _ -> return () J.Ok jid -> assertFailure $ "Parsed " ++ show jsval ++ " as job id " ++ show (fromJobId jid) helper J.JSNull helper (J.JSBool True) helper (J.JSBool False) helper (J.JSArray []) -- | Test 'JobDependency' serialisation. prop_JobDependency_serialisation :: JobDependency -> Property prop_JobDependency_serialisation = testSerialisation -- | Test 'OpSubmitPriority' serialisation. prop_OpSubmitPriority_serialisation :: OpSubmitPriority -> Property prop_OpSubmitPriority_serialisation = testSerialisation -- | Tests string formatting for 'OpSubmitPriority'. prop_OpSubmitPriority_string :: OpSubmitPriority -> Property prop_OpSubmitPriority_string prio = parseSubmitPriority (fmtSubmitPriority prio) ==? Just prio -- | Test 'ELogType' serialisation. prop_ELogType_serialisation :: ELogType -> Property prop_ELogType_serialisation = testSerialisation testSuite "Types" [ 'prop_AllocPolicy_serialisation , 'case_AllocPolicy_order , 'prop_DiskTemplate_serialisation , 'prop_InstanceStatus_serialisation , 'prop_NonNeg_pass , 'prop_NonNeg_fail , 'prop_Positive_pass , 'prop_Positive_fail , 'prop_Neg_pass , 'prop_Neg_fail , 'prop_NonEmpty_pass , 'case_NonEmpty_fail , 'prop_MigrationMode_serialisation , 'prop_VerifyOptionalChecks_serialisation , 'prop_DdmSimple_serialisation , 'prop_DdmFull_serialisation , 'prop_CVErrorCode_serialisation , 'case_CVErrorCode_pyequiv , 'prop_Hypervisor_serialisation , 'prop_OobCommand_serialisation , 'prop_StorageType_serialisation , 'prop_NodeEvacMode_serialisation , 'prop_FileDriver_serialisation , 'prop_InstCreateMode_serialisation , 'prop_RebootType_serialisation , 'prop_ExportMode_serialisation , 'prop_IAllocatorTestDir_serialisation , 'prop_IAllocatorMode_serialisation , 'case_IAllocatorMode_pyequiv , 'prop_NICMode_serialisation , 'prop_OpStatus_serialization , 'prop_JobStatus_serialization , 'case_JobStatus_order , 'case_NICMode_pyequiv , 'prop_FinalizedJobStatus_serialisation , 'case_FinalizedJobStatus_pyequiv , 'prop_JobId_serialisation , 'prop_JobId_fractional , 'case_JobId_BadTypes , 'prop_JobDependency_serialisation , 'prop_OpSubmitPriority_serialisation , 'prop_OpSubmitPriority_string , 'prop_ELogType_serialisation ] ganeti-2.9.3/test/hs/Test/Ganeti/HTools/0000755000000000000000000000000012271445545017732 5ustar00rootroot00000000000000ganeti-2.9.3/test/hs/Test/Ganeti/HTools/Cluster.hs0000644000000000000000000004027612271422343021707 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.HTools.Cluster (testHTools_Cluster) where import Test.QuickCheck hiding (Result) import qualified Data.IntMap as IntMap import Data.Maybe import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.TestHTools import Test.Ganeti.HTools.Instance ( genInstanceSmallerThanNode , genInstanceMaybeBiggerThanNode ) import Test.Ganeti.HTools.Node (genOnlineNode, genNode) import Ganeti.BasicTypes import qualified Ganeti.HTools.Backend.IAlloc as IAlloc import qualified Ganeti.HTools.Cluster as Cluster import qualified Ganeti.HTools.Container as Container import qualified Ganeti.HTools.Group as Group import qualified Ganeti.HTools.Instance as Instance import qualified Ganeti.HTools.Node as Node import qualified Ganeti.HTools.Types as Types {-# ANN module "HLint: ignore Use camelCase" #-} -- * Helpers -- | Make a small cluster, both nodes and instances. makeSmallEmptyCluster :: Node.Node -> Int -> Instance.Instance -> (Node.List, Instance.List, Instance.Instance) makeSmallEmptyCluster node count inst = (makeSmallCluster node count, Container.empty, setInstanceSmallerThanNode node inst) -- | Checks if a node is "big" enough. isNodeBig :: Int -> Node.Node -> Bool isNodeBig size node = Node.availDisk node > size * Types.unitDsk && Node.availMem node > size * Types.unitMem && Node.availCpu node > size * Types.unitCpu canBalance :: Cluster.Table -> Bool -> Bool -> Bool -> Bool canBalance tbl dm im evac = isJust $ Cluster.tryBalance tbl dm im evac 0 0 -- | Assigns a new fresh instance to a cluster; this is not -- allocation, so no resource checks are done. assignInstance :: Node.List -> Instance.List -> Instance.Instance -> Types.Idx -> Types.Idx -> (Node.List, Instance.List) assignInstance nl il inst pdx sdx = let pnode = Container.find pdx nl snode = Container.find sdx nl maxiidx = if Container.null il then 0 else fst (Container.findMax il) + 1 inst' = inst { Instance.idx = maxiidx, Instance.pNode = pdx, Instance.sNode = sdx } pnode' = Node.setPri pnode inst' snode' = Node.setSec snode inst' nl' = Container.addTwo pdx pnode' sdx snode' nl il' = Container.add maxiidx inst' il in (nl', il') -- | Checks if an instance is mirrored. isMirrored :: Instance.Instance -> Bool isMirrored = (/= Types.MirrorNone) . Instance.mirrorType -- | Returns the possible change node types for a disk template. evacModeOptions :: Types.MirrorType -> [Types.EvacMode] evacModeOptions Types.MirrorNone = [] evacModeOptions Types.MirrorInternal = [minBound..maxBound] -- DRBD can do all evacModeOptions Types.MirrorExternal = [Types.ChangePrimary, Types.ChangeAll] -- * Test cases -- | Check that the cluster score is close to zero for a homogeneous -- cluster. prop_Score_Zero :: Node.Node -> Property prop_Score_Zero node = forAll (choose (1, 1024)) $ \count -> (not (Node.offline node) && not (Node.failN1 node) && (count > 0) && (Node.tDsk node > 0) && (Node.tMem node > 0) && (Node.tSpindles node > 0) && (Node.tCpu node > 0)) ==> let fn = Node.buildPeers node Container.empty nlst = replicate count fn score = Cluster.compCVNodes nlst -- we can't say == 0 here as the floating point errors accumulate; -- this should be much lower than the default score in CLI.hs in score <= 1e-12 -- | Check that cluster stats are sane. prop_CStats_sane :: Property prop_CStats_sane = forAll (choose (1, 1024)) $ \count -> forAll genOnlineNode $ \node -> let fn = Node.buildPeers node Container.empty nlst = zip [1..] $ replicate count fn::[(Types.Ndx, Node.Node)] nl = Container.fromList nlst cstats = Cluster.totalResources nl in Cluster.csAdsk cstats >= 0 && Cluster.csAdsk cstats <= Cluster.csFdsk cstats -- | Check that one instance is allocated correctly on an empty cluster, -- without rebalances needed. prop_Alloc_sane :: Instance.Instance -> Property prop_Alloc_sane inst = forAll (choose (5, 20)) $ \count -> forAll genOnlineNode $ \node -> let (nl, il, inst') = makeSmallEmptyCluster node count inst reqnodes = Instance.requiredNodes $ Instance.diskTemplate inst in case Cluster.genAllocNodes defGroupList nl reqnodes True >>= Cluster.tryAlloc nl il inst' of Bad msg -> failTest msg Ok as -> case Cluster.asSolution as of Nothing -> failTest "Failed to allocate, empty solution" Just (xnl, xi, _, cv) -> let il' = Container.add (Instance.idx xi) xi il tbl = Cluster.Table xnl il' cv [] in printTestCase "Cluster can be balanced after allocation" (not (canBalance tbl True True False)) .&&. printTestCase "Solution score differs from actual node list:" (Cluster.compCV xnl ==? cv) -- | Checks that on a 2-5 node cluster, we can allocate a random -- instance spec via tiered allocation (whatever the original instance -- spec), on either one or two nodes. Furthermore, we test that -- computed allocation statistics are correct. prop_CanTieredAlloc :: Property prop_CanTieredAlloc = forAll (choose (2, 5)) $ \count -> forAll (genOnlineNode `suchThat` isNodeBig 4) $ \node -> forAll (genInstanceMaybeBiggerThanNode node) $ \inst -> let nl = makeSmallCluster node count il = Container.empty rqnodes = Instance.requiredNodes $ Instance.diskTemplate inst allocnodes = Cluster.genAllocNodes defGroupList nl rqnodes True in case allocnodes >>= \allocnodes' -> Cluster.tieredAlloc nl il (Just 5) inst allocnodes' [] [] of Bad msg -> failTest $ "Failed to tiered alloc: " ++ msg Ok (_, nl', il', ixes, cstats) -> let (ai_alloc, ai_pool, ai_unav) = Cluster.computeAllocationDelta (Cluster.totalResources nl) (Cluster.totalResources nl') all_nodes fn = sum $ map fn (Container.elems nl) all_res fn = sum $ map fn [ai_alloc, ai_pool, ai_unav] in conjoin [ printTestCase "No instances allocated" $ not (null ixes) , IntMap.size il' ==? length ixes , length ixes ==? length cstats , all_res Types.allocInfoVCpus ==? all_nodes Node.hiCpu , all_res Types.allocInfoNCpus ==? all_nodes Node.tCpu , all_res Types.allocInfoMem ==? truncate (all_nodes Node.tMem) , all_res Types.allocInfoDisk ==? truncate (all_nodes Node.tDsk) ] -- | Helper function to create a cluster with the given range of nodes -- and allocate an instance on it. genClusterAlloc :: Int -> Node.Node -> Instance.Instance -> Result (Node.List, Instance.List, Instance.Instance) genClusterAlloc count node inst = let nl = makeSmallCluster node count reqnodes = Instance.requiredNodes $ Instance.diskTemplate inst in case Cluster.genAllocNodes defGroupList nl reqnodes True >>= Cluster.tryAlloc nl Container.empty inst of Bad msg -> Bad $ "Can't allocate: " ++ msg Ok as -> case Cluster.asSolution as of Nothing -> Bad "Empty solution?" Just (xnl, xi, _, _) -> let xil = Container.add (Instance.idx xi) xi Container.empty in Ok (xnl, xil, xi) -- | Checks that on a 4-8 node cluster, once we allocate an instance, -- we can also relocate it. prop_AllocRelocate :: Property prop_AllocRelocate = forAll (choose (4, 8)) $ \count -> forAll (genOnlineNode `suchThat` isNodeBig 4) $ \node -> forAll (genInstanceSmallerThanNode node `suchThat` isMirrored) $ \inst -> case genClusterAlloc count node inst of Bad msg -> failTest msg Ok (nl, il, inst') -> case IAlloc.processRelocate defGroupList nl il (Instance.idx inst) 1 [(if Instance.diskTemplate inst' == Types.DTDrbd8 then Instance.sNode else Instance.pNode) inst'] of Ok _ -> passTest Bad msg -> failTest $ "Failed to relocate: " ++ msg -- | Helper property checker for the result of a nodeEvac or -- changeGroup operation. check_EvacMode :: Group.Group -> Instance.Instance -> Result (Node.List, Instance.List, Cluster.EvacSolution) -> Property check_EvacMode grp inst result = case result of Bad msg -> failTest $ "Couldn't evacuate/change group:" ++ msg Ok (_, _, es) -> let moved = Cluster.esMoved es failed = Cluster.esFailed es opcodes = not . null $ Cluster.esOpCodes es in conjoin [ failmsg ("'failed' not empty: " ++ show failed) (null failed) , failmsg "'opcodes' is null" opcodes , case moved of [(idx', gdx, _)] -> failmsg "invalid instance moved" (idx == idx') .&&. failmsg "wrong target group" (gdx == Group.idx grp) v -> failmsg ("invalid solution: " ++ show v) False ] where failmsg :: String -> Bool -> Property failmsg msg = printTestCase ("Failed to evacuate: " ++ msg) idx = Instance.idx inst -- | Checks that on a 4-8 node cluster, once we allocate an instance, -- we can also node-evacuate it. prop_AllocEvacuate :: Property prop_AllocEvacuate = forAll (choose (4, 8)) $ \count -> forAll (genOnlineNode `suchThat` isNodeBig 4) $ \node -> forAll (genInstanceSmallerThanNode node `suchThat` isMirrored) $ \inst -> case genClusterAlloc count node inst of Bad msg -> failTest msg Ok (nl, il, inst') -> conjoin . map (\mode -> check_EvacMode defGroup inst' $ Cluster.tryNodeEvac defGroupList nl il mode [Instance.idx inst']) . evacModeOptions . Instance.mirrorType $ inst' -- | Checks that on a 4-8 node cluster with two node groups, once we -- allocate an instance on the first node group, we can also change -- its group. prop_AllocChangeGroup :: Property prop_AllocChangeGroup = forAll (choose (4, 8)) $ \count -> forAll (genOnlineNode `suchThat` isNodeBig 4) $ \node -> forAll (genInstanceSmallerThanNode node `suchThat` isMirrored) $ \inst -> case genClusterAlloc count node inst of Bad msg -> failTest msg Ok (nl, il, inst') -> -- we need to add a second node group and nodes to the cluster let nl2 = Container.elems $ makeSmallCluster node count grp2 = Group.setIdx defGroup (Group.idx defGroup + 1) maxndx = maximum . map Node.idx $ nl2 nl3 = map (\n -> n { Node.group = Group.idx grp2 , Node.idx = Node.idx n + maxndx }) nl2 nl4 = Container.fromList . map (\n -> (Node.idx n, n)) $ nl3 gl' = Container.add (Group.idx grp2) grp2 defGroupList nl' = IntMap.union nl nl4 in check_EvacMode grp2 inst' $ Cluster.tryChangeGroup gl' nl' il [] [Instance.idx inst'] -- | Check that allocating multiple instances on a cluster, then -- adding an empty node, results in a valid rebalance. prop_AllocBalance :: Property prop_AllocBalance = forAll (genNode (Just 5) (Just 128)) $ \node -> forAll (choose (3, 5)) $ \count -> not (Node.offline node) && not (Node.failN1 node) ==> let nl = makeSmallCluster node count hnode = snd $ IntMap.findMax nl nl' = IntMap.deleteMax nl il = Container.empty allocnodes = Cluster.genAllocNodes defGroupList nl' 2 True i_templ = createInstance Types.unitMem Types.unitDsk Types.unitCpu in case allocnodes >>= \allocnodes' -> Cluster.iterateAlloc nl' il (Just 5) i_templ allocnodes' [] [] of Bad msg -> failTest $ "Failed to allocate: " ++ msg Ok (_, _, _, [], _) -> failTest "Failed to allocate: no instances" Ok (_, xnl, il', _, _) -> let ynl = Container.add (Node.idx hnode) hnode xnl cv = Cluster.compCV ynl tbl = Cluster.Table ynl il' cv [] in printTestCase "Failed to rebalance" $ canBalance tbl True True False -- | Checks consistency. prop_CheckConsistency :: Node.Node -> Instance.Instance -> Bool prop_CheckConsistency node inst = let nl = makeSmallCluster node 3 (node1, node2, node3) = case Container.elems nl of [a, b, c] -> (a, b, c) l -> error $ "Invalid node list out of makeSmallCluster/3: " ++ show l node3' = node3 { Node.group = 1 } nl' = Container.add (Node.idx node3') node3' nl inst1 = Instance.setBoth inst (Node.idx node1) (Node.idx node2) inst2 = Instance.setBoth inst (Node.idx node1) Node.noSecondary inst3 = Instance.setBoth inst (Node.idx node1) (Node.idx node3) ccheck = Cluster.findSplitInstances nl' . Container.fromList in null (ccheck [(0, inst1)]) && null (ccheck [(0, inst2)]) && (not . null $ ccheck [(0, inst3)]) -- | For now, we only test that we don't lose instances during the split. prop_SplitCluster :: Node.Node -> Instance.Instance -> Property prop_SplitCluster node inst = forAll (choose (0, 100)) $ \icnt -> let nl = makeSmallCluster node 2 (nl', il') = foldl (\(ns, is) _ -> assignInstance ns is inst 0 1) (nl, Container.empty) [1..icnt] gni = Cluster.splitCluster nl' il' in sum (map (Container.size . snd . snd) gni) == icnt && all (\(guuid, (nl'', _)) -> all ((== guuid) . Node.group) (Container.elems nl'')) gni -- | Helper function to check if we can allocate an instance on a -- given node list. Successful allocation is denoted by 'Nothing', -- otherwise the 'Just' value will contain the error message. canAllocOn :: Node.List -> Int -> Instance.Instance -> Maybe String canAllocOn nl reqnodes inst = case Cluster.genAllocNodes defGroupList nl reqnodes True >>= Cluster.tryAlloc nl Container.empty inst of Bad msg -> Just $ "Can't allocate: " ++ msg Ok as -> case Cluster.asSolution as of Nothing -> Just $ "No allocation solution; failures: " ++ show (Cluster.collapseFailures $ Cluster.asFailures as) Just _ -> Nothing -- | Checks that allocation obeys minimum and maximum instance -- policies. The unittest generates a random node, duplicates it /count/ -- times, and generates a random instance that can be allocated on -- this mini-cluster; it then checks that after applying a policy that -- the instance doesn't fits, the allocation fails. prop_AllocPolicy :: Property prop_AllocPolicy = forAll genOnlineNode $ \node -> forAll (choose (5, 20)) $ \count -> forAll (genInstanceSmallerThanNode node) $ \inst -> forAll (arbitrary `suchThat` (isBad . flip (Instance.instMatchesPolicy inst) (Node.exclStorage node))) $ \ipol -> let rqn = Instance.requiredNodes $ Instance.diskTemplate inst node' = Node.setPolicy ipol node nl = makeSmallCluster node' count in printTestCase "Allocation check:" (isNothing (canAllocOn (makeSmallCluster node count) rqn inst)) .&&. printTestCase "Policy failure check:" (isJust $ canAllocOn nl rqn inst) testSuite "HTools/Cluster" [ 'prop_Score_Zero , 'prop_CStats_sane , 'prop_Alloc_sane , 'prop_CanTieredAlloc , 'prop_AllocRelocate , 'prop_AllocEvacuate , 'prop_AllocChangeGroup , 'prop_AllocBalance , 'prop_CheckConsistency , 'prop_SplitCluster , 'prop_AllocPolicy ] ganeti-2.9.3/test/hs/Test/Ganeti/HTools/Graph.hs0000644000000000000000000001571712244641676021345 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for Ganeti.Htools.Graph -} {- Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.HTools.Graph (testHTools_Graph) where import Test.QuickCheck import Test.HUnit import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Ganeti.HTools.Graph import qualified Data.Graph as Graph import qualified Data.IntMap as IntMap {-# ANN module "HLint: ignore Use camelCase" #-} data TestableGraph = TestableGraph Graph.Graph deriving (Show) data TestableClique = TestableClique Graph.Graph deriving (Show) -- | Generate node bounds and edges for an undirected graph. -- A graph is undirected if for every (a, b) edge there is a -- corresponding (b, a) one. undirEdges :: Gen (Graph.Bounds, [Graph.Edge]) undirEdges = sized undirEdges' where undirEdges' 0 = return ((0, 0), []) undirEdges' n = do maxv <- choose (1, n) edges <- listOf1 $ do i <- choose (0, maxv) j <- choose (0, maxv) `suchThat` (/= i) return [(i, j), (j, i)] return ((0, maxv), concat edges) -- | Generate node bounds and edges for a clique. -- In a clique all nodes are directly connected to each other. cliqueEdges :: Gen (Graph.Bounds, [Graph.Edge]) cliqueEdges = sized cliqueEdges' where cliqueEdges' 0 = return ((0, 0), []) cliqueEdges' n = do maxv <- choose (0, n) let edges = [(x, y) | x <- [0..maxv], y <- [0..maxv], x /= y] return ((0, maxv), edges) instance Arbitrary TestableGraph where arbitrary = do (mybounds, myedges) <- undirEdges return . TestableGraph $ Graph.buildG mybounds myedges instance Arbitrary TestableClique where arbitrary = do (mybounds, myedges) <- cliqueEdges return . TestableClique $ Graph.buildG mybounds myedges -- | Check that the empty vertex color map is empty. case_emptyVertColorMapNull :: Assertion case_emptyVertColorMapNull = assertBool "" $ IntMap.null emptyVertColorMap -- | Check that the empty vertex color map is zero in size. case_emptyVertColorMapEmpty :: Assertion case_emptyVertColorMapEmpty = assertEqual "" 0 $ IntMap.size emptyVertColorMap -- | Check if each two consecutive elements on a list -- respect a given condition. anyTwo :: (a -> a -> Bool) -> [a] -> Bool anyTwo _ [] = True anyTwo _ [_] = True anyTwo op (x:y:xs) = (x `op` y) && anyTwo op (y:xs) -- | Check order of vertices returned by verticesByDegreeAsc. prop_verticesByDegreeAscAsc :: TestableGraph -> Bool prop_verticesByDegreeAscAsc (TestableGraph g) = anyTwo (<=) (degrees asc) where degrees = map (length . neighbors g) asc = verticesByDegreeAsc g -- | Check order of vertices returned by verticesByDegreeDesc. prop_verticesByDegreeDescDesc :: TestableGraph -> Bool prop_verticesByDegreeDescDesc (TestableGraph g) = anyTwo (>=) (degrees desc) where degrees = map (length . neighbors g) desc = verticesByDegreeDesc g -- | Check that our generated graphs are colorable prop_isColorableTestableGraph :: TestableGraph -> Bool prop_isColorableTestableGraph (TestableGraph g) = isColorable g -- | Check that our generated graphs are colorable prop_isColorableTestableClique :: TestableClique -> Bool prop_isColorableTestableClique (TestableClique g) = isColorable g -- | Check that the given algorithm colors a clique with the same number of -- colors as the vertices number. prop_colorClique :: (Graph.Graph -> VertColorMap) -> TestableClique -> Property prop_colorClique alg (TestableClique g) = numvertices ==? numcolors where numcolors = (IntMap.size . colorVertMap) $ alg g numvertices = length (Graph.vertices g) -- | Specific check for the LF algorithm. prop_colorLFClique :: TestableClique -> Property prop_colorLFClique = prop_colorClique colorLF -- | Specific check for the Dsatur algorithm. prop_colorDsaturClique :: TestableClique -> Property prop_colorDsaturClique = prop_colorClique colorDsatur -- | Specific check for the Dcolor algorithm. prop_colorDcolorClique :: TestableClique -> Property prop_colorDcolorClique = prop_colorClique colorDcolor -- Check that all nodes are colored. prop_colorAllNodes :: (Graph.Graph -> VertColorMap) -> TestableGraph -> Property prop_colorAllNodes alg (TestableGraph g) = numvertices ==? numcolored where numcolored = IntMap.fold ((+) . length) 0 vcMap vcMap = colorVertMap $ alg g numvertices = length (Graph.vertices g) -- | Specific check for the LF algorithm. prop_colorLFAllNodes :: TestableGraph -> Property prop_colorLFAllNodes = prop_colorAllNodes colorLF -- | Specific check for the Dsatur algorithm. prop_colorDsaturAllNodes :: TestableGraph -> Property prop_colorDsaturAllNodes = prop_colorAllNodes colorDsatur -- | Specific check for the Dcolor algorithm. prop_colorDcolorAllNodes :: TestableGraph -> Property prop_colorDcolorAllNodes = prop_colorAllNodes colorDcolor -- | Check that no two vertices sharing the same edge have the same color. prop_colorProper :: (Graph.Graph -> VertColorMap) -> TestableGraph -> Bool prop_colorProper alg (TestableGraph g) = all isEdgeOk $ Graph.edges g where isEdgeOk :: Graph.Edge -> Bool isEdgeOk (v1, v2) = color v1 /= color v2 color v = cMap IntMap.! v cMap = alg g -- | Specific check for the LF algorithm. prop_colorLFProper :: TestableGraph -> Bool prop_colorLFProper = prop_colorProper colorLF -- | Specific check for the Dsatur algorithm. prop_colorDsaturProper :: TestableGraph -> Bool prop_colorDsaturProper = prop_colorProper colorDsatur -- | Specific check for the Dcolor algorithm. prop_colorDcolorProper :: TestableGraph -> Bool prop_colorDcolorProper = prop_colorProper colorDcolor -- | List of tests for the Graph module. testSuite "HTools/Graph" [ 'case_emptyVertColorMapNull , 'case_emptyVertColorMapEmpty , 'prop_verticesByDegreeAscAsc , 'prop_verticesByDegreeDescDesc , 'prop_colorLFClique , 'prop_colorDsaturClique , 'prop_colorDcolorClique , 'prop_colorLFAllNodes , 'prop_colorDsaturAllNodes , 'prop_colorDcolorAllNodes , 'prop_colorLFProper , 'prop_colorDsaturProper , 'prop_colorDcolorProper , 'prop_isColorableTestableGraph , 'prop_isColorableTestableClique ] ganeti-2.9.3/test/hs/Test/Ganeti/HTools/Types.hs0000644000000000000000000001617612271422343021374 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell, FlexibleInstances, TypeSynonymInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.HTools.Types ( testHTools_Types , Types.AllocPolicy(..) , Types.DiskTemplate(..) , Types.FailMode(..) , Types.EvacMode(..) , Types.ISpec(..) , Types.IPolicy(..) , nullIPolicy ) where import Test.QuickCheck hiding (Result) import Test.HUnit import Control.Applicative import Data.List (sort) import Control.Monad (replicateM) import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.TestHTools import Test.Ganeti.Types (allDiskTemplates) import Ganeti.BasicTypes import qualified Ganeti.Constants as C import qualified Ganeti.HTools.Types as Types {-# ANN module "HLint: ignore Use camelCase" #-} -- * Helpers -- * Arbitrary instance $(genArbitrary ''Types.FailMode) $(genArbitrary ''Types.EvacMode) instance Arbitrary a => Arbitrary (Types.OpResult a) where arbitrary = arbitrary >>= \c -> if c then Ok <$> arbitrary else Bad <$> arbitrary instance Arbitrary Types.ISpec where arbitrary = do mem_s <- arbitrary::Gen (NonNegative Int) dsk_c <- arbitrary::Gen (NonNegative Int) dsk_s <- arbitrary::Gen (NonNegative Int) cpu_c <- arbitrary::Gen (NonNegative Int) nic_c <- arbitrary::Gen (NonNegative Int) su <- arbitrary::Gen (NonNegative Int) return Types.ISpec { Types.iSpecMemorySize = fromIntegral mem_s , Types.iSpecCpuCount = fromIntegral cpu_c , Types.iSpecDiskSize = fromIntegral dsk_s , Types.iSpecDiskCount = fromIntegral dsk_c , Types.iSpecNicCount = fromIntegral nic_c , Types.iSpecSpindleUse = fromIntegral su } -- | Generates an ispec bigger than the given one. genBiggerISpec :: Types.ISpec -> Gen Types.ISpec genBiggerISpec imin = do mem_s <- choose (Types.iSpecMemorySize imin, maxBound) dsk_c <- choose (Types.iSpecDiskCount imin, maxBound) dsk_s <- choose (Types.iSpecDiskSize imin, maxBound) cpu_c <- choose (Types.iSpecCpuCount imin, maxBound) nic_c <- choose (Types.iSpecNicCount imin, maxBound) su <- choose (Types.iSpecSpindleUse imin, maxBound) return Types.ISpec { Types.iSpecMemorySize = fromIntegral mem_s , Types.iSpecCpuCount = fromIntegral cpu_c , Types.iSpecDiskSize = fromIntegral dsk_s , Types.iSpecDiskCount = fromIntegral dsk_c , Types.iSpecNicCount = fromIntegral nic_c , Types.iSpecSpindleUse = fromIntegral su } genMinMaxISpecs :: Gen Types.MinMaxISpecs genMinMaxISpecs = do imin <- arbitrary imax <- genBiggerISpec imin return Types.MinMaxISpecs { Types.minMaxISpecsMinSpec = imin , Types.minMaxISpecsMaxSpec = imax } instance Arbitrary Types.MinMaxISpecs where arbitrary = genMinMaxISpecs genMinMaxStdISpecs :: Gen (Types.MinMaxISpecs, Types.ISpec) genMinMaxStdISpecs = do imin <- arbitrary istd <- genBiggerISpec imin imax <- genBiggerISpec istd return (Types.MinMaxISpecs { Types.minMaxISpecsMinSpec = imin , Types.minMaxISpecsMaxSpec = imax }, istd) genIPolicySpecs :: Gen ([Types.MinMaxISpecs], Types.ISpec) genIPolicySpecs = do num_mm <- choose (1, 6) -- 6 is just an arbitrary limit std_compl <- choose (1, num_mm) mm_head <- replicateM (std_compl - 1) genMinMaxISpecs (mm_middle, istd) <- genMinMaxStdISpecs mm_tail <- replicateM (num_mm - std_compl) genMinMaxISpecs return (mm_head ++ (mm_middle : mm_tail), istd) instance Arbitrary Types.IPolicy where arbitrary = do (iminmax, istd) <- genIPolicySpecs num_tmpl <- choose (0, length allDiskTemplates) dts <- genUniquesList num_tmpl arbitrary vcpu_ratio <- choose (1.0, maxVcpuRatio) spindle_ratio <- choose (1.0, maxSpindleRatio) return Types.IPolicy { Types.iPolicyMinMaxISpecs = iminmax , Types.iPolicyStdSpec = istd , Types.iPolicyDiskTemplates = dts , Types.iPolicyVcpuRatio = vcpu_ratio , Types.iPolicySpindleRatio = spindle_ratio } -- * Test cases prop_ISpec_serialisation :: Types.ISpec -> Property prop_ISpec_serialisation = testSerialisation prop_IPolicy_serialisation :: Types.IPolicy -> Property prop_IPolicy_serialisation = testSerialisation prop_EvacMode_serialisation :: Types.EvacMode -> Property prop_EvacMode_serialisation = testSerialisation prop_opToResult :: Types.OpResult Int -> Property prop_opToResult op = case op of Bad _ -> printTestCase ("expected bad but got " ++ show r) $ isBad r Ok v -> case r of Bad msg -> failTest ("expected Ok but got Bad " ++ msg) Ok v' -> v ==? v' where r = Types.opToResult op prop_eitherToResult :: Either String Int -> Bool prop_eitherToResult ei = case ei of Left _ -> isBad r Right v -> case r of Bad _ -> False Ok v' -> v == v' where r = eitherToResult ei -- | Test 'AutoRepairType' ordering is as expected and consistent with Python -- codebase. case_AutoRepairType_sort :: Assertion case_AutoRepairType_sort = do let expected = [ Types.ArFixStorage , Types.ArMigrate , Types.ArFailover , Types.ArReinstall ] all_hs_raw = map Types.autoRepairTypeToRaw [minBound..maxBound] assertEqual "Haskell order" expected [minBound..maxBound] assertEqual "consistent with Python" C.autoRepairAllTypes all_hs_raw -- | Test 'AutoRepairResult' type is equivalent with Python codebase. case_AutoRepairResult_pyequiv :: Assertion case_AutoRepairResult_pyequiv = do let all_py_results = sort C.autoRepairAllResults all_hs_results = sort $ map Types.autoRepairResultToRaw [minBound..maxBound] assertEqual "for AutoRepairResult equivalence" all_py_results all_hs_results testSuite "HTools/Types" [ 'prop_ISpec_serialisation , 'prop_IPolicy_serialisation , 'prop_EvacMode_serialisation , 'prop_opToResult , 'prop_eitherToResult , 'case_AutoRepairType_sort , 'case_AutoRepairResult_pyequiv ] ganeti-2.9.3/test/hs/Test/Ganeti/HTools/PeerMap.hs0000644000000000000000000000507212244641676021626 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.HTools.PeerMap (testHTools_PeerMap) where import Test.QuickCheck import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import qualified Ganeti.HTools.PeerMap as PeerMap -- | Make sure add is idempotent. prop_addIdempotent :: PeerMap.PeerMap -> PeerMap.Key -> PeerMap.Elem -> Property prop_addIdempotent pmap key em = fn (fn puniq) ==? fn puniq where fn = PeerMap.add key em puniq = PeerMap.accumArray const pmap -- | Make sure remove is idempotent. prop_removeIdempotent :: PeerMap.PeerMap -> PeerMap.Key -> Property prop_removeIdempotent pmap key = fn (fn puniq) ==? fn puniq where fn = PeerMap.remove key puniq = PeerMap.accumArray const pmap -- | Make sure a missing item returns 0. prop_findMissing :: PeerMap.PeerMap -> PeerMap.Key -> Property prop_findMissing pmap key = PeerMap.find key (PeerMap.remove key puniq) ==? 0 where puniq = PeerMap.accumArray const pmap -- | Make sure an added item is found. prop_addFind :: PeerMap.PeerMap -> PeerMap.Key -> PeerMap.Elem -> Property prop_addFind pmap key em = PeerMap.find key (PeerMap.add key em puniq) ==? em where puniq = PeerMap.accumArray const pmap -- | Manual check that maxElem returns the maximum indeed, or 0 for null. prop_maxElem :: PeerMap.PeerMap -> Property prop_maxElem pmap = PeerMap.maxElem puniq ==? if null puniq then 0 else (maximum . snd . unzip) puniq where puniq = PeerMap.accumArray const pmap -- | List of tests for the PeerMap module. testSuite "HTools/PeerMap" [ 'prop_addIdempotent , 'prop_removeIdempotent , 'prop_maxElem , 'prop_addFind , 'prop_findMissing ] ganeti-2.9.3/test/hs/Test/Ganeti/HTools/Loader.hs0000644000000000000000000000675512244641676021514 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.HTools.Loader (testHTools_Loader) where import Test.QuickCheck import qualified Data.IntMap as IntMap import qualified Data.Map as Map import Data.List import System.Time (ClockTime(..)) import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.HTools.Node () import qualified Ganeti.BasicTypes as BasicTypes import qualified Ganeti.HTools.Container as Container import qualified Ganeti.HTools.Loader as Loader import qualified Ganeti.HTools.Node as Node prop_lookupNode :: [(String, Int)] -> String -> String -> Property prop_lookupNode ktn inst node = Loader.lookupNode nl inst node ==? Map.lookup node nl where nl = Map.fromList ktn prop_lookupInstance :: [(String, Int)] -> String -> Property prop_lookupInstance kti inst = Loader.lookupInstance il inst ==? Map.lookup inst il where il = Map.fromList kti prop_assignIndices :: Property prop_assignIndices = -- generate nodes with unique names forAll (arbitrary `suchThat` (\nodes -> let names = map Node.name nodes in length names == length (nub names))) $ \nodes -> let (nassoc, kt) = Loader.assignIndices (map (\n -> (Node.name n, n)) nodes) in Map.size nassoc == length nodes && Container.size kt == length nodes && (null nodes || maximum (IntMap.keys kt) == length nodes - 1) -- | Checks that the number of primary instances recorded on the nodes -- is zero. prop_mergeData :: [Node.Node] -> Bool prop_mergeData ns = let na = Container.fromList $ map (\n -> (Node.idx n, n)) ns in case Loader.mergeData [] [] [] [] (TOD 0 0) (Loader.emptyCluster {Loader.cdNodes = na}) of BasicTypes.Bad _ -> False BasicTypes.Ok (Loader.ClusterData _ nl il _ _) -> let nodes = Container.elems nl instances = Container.elems il in (sum . map (length . Node.pList)) nodes == 0 && null instances -- | Check that compareNameComponent on equal strings works. prop_compareNameComponent_equal :: String -> Bool prop_compareNameComponent_equal s = BasicTypes.compareNameComponent s s == BasicTypes.LookupResult BasicTypes.ExactMatch s -- | Check that compareNameComponent on prefix strings works. prop_compareNameComponent_prefix :: NonEmptyList Char -> String -> Bool prop_compareNameComponent_prefix (NonEmpty s1) s2 = BasicTypes.compareNameComponent (s1 ++ "." ++ s2) s1 == BasicTypes.LookupResult BasicTypes.PartialMatch s1 testSuite "HTools/Loader" [ 'prop_lookupNode , 'prop_lookupInstance , 'prop_assignIndices , 'prop_mergeData , 'prop_compareNameComponent_equal , 'prop_compareNameComponent_prefix ] ganeti-2.9.3/test/hs/Test/Ganeti/HTools/Instance.hs0000644000000000000000000001717312267470014022035 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.HTools.Instance ( testHTools_Instance , genInstanceSmallerThanNode , genInstanceMaybeBiggerThanNode , genInstanceOnNodeList , genInstanceList , Instance.Instance(..) ) where import Control.Monad (liftM) import Test.QuickCheck hiding (Result) import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.HTools.Types () import Ganeti.BasicTypes import qualified Ganeti.HTools.Instance as Instance import qualified Ganeti.HTools.Node as Node import qualified Ganeti.HTools.Container as Container import qualified Ganeti.HTools.Loader as Loader import qualified Ganeti.HTools.Types as Types -- * Arbitrary instances -- | Generates a random instance with maximum disk/mem/cpu values. genInstanceSmallerThan :: Int -> Int -> Int -> Maybe Int -> Gen Instance.Instance genInstanceSmallerThan lim_mem lim_dsk lim_cpu lim_spin = do name <- genFQDN mem <- choose (0, lim_mem) dsk <- choose (0, lim_dsk) run_st <- arbitrary pn <- arbitrary sn <- arbitrary vcpus <- choose (0, lim_cpu) dt <- arbitrary spindles <- case lim_spin of Nothing -> genMaybe $ choose (0, maxSpindles) Just ls -> liftM Just $ choose (0, ls) let disk = Instance.Disk dsk spindles return $ Instance.create name mem dsk [disk] vcpus run_st [] True pn sn dt 1 [] -- | Generates an instance smaller than a node. genInstanceSmallerThanNode :: Node.Node -> Gen Instance.Instance genInstanceSmallerThanNode node = genInstanceSmallerThan (Node.availMem node `div` 2) (Node.availDisk node `div` 2) (Node.availCpu node `div` 2) (if Node.exclStorage node then Just $ Node.fSpindles node `div` 2 else Nothing) -- | Generates an instance possibly bigger than a node. genInstanceMaybeBiggerThanNode :: Node.Node -> Gen Instance.Instance genInstanceMaybeBiggerThanNode node = genInstanceSmallerThan (Node.availMem node + Types.unitMem * 2) (Node.availDisk node + Types.unitDsk * 3) (Node.availCpu node + Types.unitCpu * 4) (if Node.exclStorage node then Just $ Node.fSpindles node + Types.unitSpindle * 5 else Nothing) -- | Generates an instance with nodes on a node list. -- The following rules are respected: -- 1. The instance is never bigger than its primary node -- 2. If possible the instance has different pnode and snode -- 3. Else disk templates which require secondary nodes are disabled genInstanceOnNodeList :: Node.List -> Gen Instance.Instance genInstanceOnNodeList nl = do let nsize = Container.size nl pnode <- choose (0, nsize-1) let (snodefilter, dtfilter) = if nsize >= 2 then ((/= pnode), const True) else (const True, not . Instance.hasSecondary) snode <- choose (0, nsize-1) `suchThat` snodefilter i <- genInstanceSmallerThanNode (Container.find pnode nl) `suchThat` dtfilter return $ i { Instance.pNode = pnode, Instance.sNode = snode } -- | Generates an instance list given an instance generator. genInstanceList :: Gen Instance.Instance -> Gen Instance.List genInstanceList igen = fmap (snd . Loader.assignIndices) names_instances where names_instances = (fmap . map) (\n -> (Instance.name n, n)) $ listOf igen -- let's generate a random instance instance Arbitrary Instance.Instance where arbitrary = genInstanceSmallerThan maxMem maxDsk maxCpu Nothing -- * Test cases -- Simple instance tests, we only have setter/getters prop_creat :: Instance.Instance -> Property prop_creat inst = Instance.name inst ==? Instance.alias inst prop_setIdx :: Instance.Instance -> Types.Idx -> Property prop_setIdx inst idx = Instance.idx (Instance.setIdx inst idx) ==? idx prop_setName :: Instance.Instance -> String -> Bool prop_setName inst name = Instance.name newinst == name && Instance.alias newinst == name where newinst = Instance.setName inst name prop_setAlias :: Instance.Instance -> String -> Bool prop_setAlias inst name = Instance.name newinst == Instance.name inst && Instance.alias newinst == name where newinst = Instance.setAlias inst name prop_setPri :: Instance.Instance -> Types.Ndx -> Property prop_setPri inst pdx = Instance.pNode (Instance.setPri inst pdx) ==? pdx prop_setSec :: Instance.Instance -> Types.Ndx -> Property prop_setSec inst sdx = Instance.sNode (Instance.setSec inst sdx) ==? sdx prop_setBoth :: Instance.Instance -> Types.Ndx -> Types.Ndx -> Bool prop_setBoth inst pdx sdx = Instance.pNode si == pdx && Instance.sNode si == sdx where si = Instance.setBoth inst pdx sdx prop_shrinkMG :: Instance.Instance -> Property prop_shrinkMG inst = Instance.mem inst >= 2 * Types.unitMem ==> case Instance.shrinkByType inst Types.FailMem of Ok inst' -> Instance.mem inst' ==? Instance.mem inst - Types.unitMem Bad msg -> failTest msg prop_shrinkMF :: Instance.Instance -> Property prop_shrinkMF inst = forAll (choose (0, 2 * Types.unitMem - 1)) $ \mem -> let inst' = inst { Instance.mem = mem} in isBad $ Instance.shrinkByType inst' Types.FailMem prop_shrinkCG :: Instance.Instance -> Property prop_shrinkCG inst = Instance.vcpus inst >= 2 * Types.unitCpu ==> case Instance.shrinkByType inst Types.FailCPU of Ok inst' -> Instance.vcpus inst' ==? Instance.vcpus inst - Types.unitCpu Bad msg -> failTest msg prop_shrinkCF :: Instance.Instance -> Property prop_shrinkCF inst = forAll (choose (0, 2 * Types.unitCpu - 1)) $ \vcpus -> let inst' = inst { Instance.vcpus = vcpus } in isBad $ Instance.shrinkByType inst' Types.FailCPU prop_shrinkDG :: Instance.Instance -> Property prop_shrinkDG inst = Instance.dsk inst >= 2 * Types.unitDsk ==> case Instance.shrinkByType inst Types.FailDisk of Ok inst' -> Instance.dsk inst' ==? Instance.dsk inst - Types.unitDsk Bad msg -> failTest msg prop_shrinkDF :: Instance.Instance -> Property prop_shrinkDF inst = forAll (choose (0, 2 * Types.unitDsk - 1)) $ \dsk -> let inst' = inst { Instance.dsk = dsk , Instance.disks = [Instance.Disk dsk Nothing] } in isBad $ Instance.shrinkByType inst' Types.FailDisk prop_setMovable :: Instance.Instance -> Bool -> Property prop_setMovable inst m = Instance.movable inst' ==? m where inst' = Instance.setMovable inst m testSuite "HTools/Instance" [ 'prop_creat , 'prop_setIdx , 'prop_setName , 'prop_setAlias , 'prop_setPri , 'prop_setSec , 'prop_setBoth , 'prop_shrinkMG , 'prop_shrinkMF , 'prop_shrinkCG , 'prop_shrinkCF , 'prop_shrinkDG , 'prop_shrinkDF , 'prop_setMovable ] ganeti-2.9.3/test/hs/Test/Ganeti/HTools/Node.hs0000644000000000000000000004322712267470014021155 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.HTools.Node ( testHTools_Node , Node.Node(..) , setInstanceSmallerThanNode , genNode , genOnlineNode , genNodeList , genUniqueNodeList ) where import Test.QuickCheck import Test.HUnit import Control.Monad import qualified Data.Map as Map import qualified Data.Graph as Graph import Data.List import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.TestHTools import Test.Ganeti.HTools.Instance ( genInstanceSmallerThanNode , genInstanceList , genInstanceOnNodeList) import Ganeti.BasicTypes import qualified Ganeti.HTools.Loader as Loader import qualified Ganeti.HTools.Container as Container import qualified Ganeti.HTools.Instance as Instance import qualified Ganeti.HTools.Node as Node import qualified Ganeti.HTools.Types as Types import qualified Ganeti.HTools.Graph as HGraph {-# ANN module "HLint: ignore Use camelCase" #-} -- * Arbitrary instances -- | Generates an arbitrary node based on sizing information. genNode :: Maybe Int -- ^ Minimum node size in terms of units -> Maybe Int -- ^ Maximum node size (when Nothing, bounded -- just by the max... constants) -> Gen Node.Node genNode min_multiplier max_multiplier = do let (base_mem, base_dsk, base_cpu, base_spindles) = case min_multiplier of Just mm -> (mm * Types.unitMem, mm * Types.unitDsk, mm * Types.unitCpu, mm) Nothing -> (0, 0, 0, 0) (top_mem, top_dsk, top_cpu, top_spindles) = case max_multiplier of Just mm -> (mm * Types.unitMem, mm * Types.unitDsk, mm * Types.unitCpu, mm) Nothing -> (maxMem, maxDsk, maxCpu, maxSpindles) name <- genFQDN mem_t <- choose (base_mem, top_mem) mem_f <- choose (base_mem, mem_t) mem_n <- choose (0, mem_t - mem_f) dsk_t <- choose (base_dsk, top_dsk) dsk_f <- choose (base_dsk, dsk_t) cpu_t <- choose (base_cpu, top_cpu) cpu_n <- choose (base_cpu, cpu_t) offl <- arbitrary spindles <- choose (base_spindles, top_spindles) let n = Node.create name (fromIntegral mem_t) mem_n mem_f (fromIntegral dsk_t) dsk_f (fromIntegral cpu_t) cpu_n offl spindles 0 0 False n' = Node.setPolicy nullIPolicy n return $ Node.buildPeers n' Container.empty -- | Helper function to generate a sane node. genOnlineNode :: Gen Node.Node genOnlineNode = arbitrary `suchThat` (\n -> not (Node.offline n) && not (Node.failN1 n) && Node.availDisk n > 0 && Node.availMem n > 0 && Node.availCpu n > 0 && Node.tSpindles n > 0) -- | Generate a node with exclusive storage enabled. genExclStorNode :: Gen Node.Node genExclStorNode = do n <- genOnlineNode fs <- choose (Types.unitSpindle, Node.tSpindles n) let pd = fromIntegral fs / fromIntegral (Node.tSpindles n)::Double return n { Node.exclStorage = True , Node.fSpindles = fs , Node.pDsk = pd } -- | Generate a node with exclusive storage possibly enabled. genMaybeExclStorNode :: Gen Node.Node genMaybeExclStorNode = oneof [genOnlineNode, genExclStorNode] -- and a random node instance Arbitrary Node.Node where arbitrary = genNode Nothing Nothing -- | Node list generator. -- Given a node generator, create a random length node list. Note that "real" -- clusters always have at least one node, so we don't generate empty node -- lists here. genNodeList :: Gen Node.Node -> Gen Node.List genNodeList ngen = fmap (snd . Loader.assignIndices) names_nodes where names_nodes = (fmap . map) (\n -> (Node.name n, n)) nodes nodes = listOf1 ngen `suchThat` ((\ns -> ns == nub ns) . map Node.name) -- | Node list generator where node names are unique genUniqueNodeList :: Gen Node.Node -> Gen (Node.List, Types.NameAssoc) genUniqueNodeList ngen = (do nl <- genNodeList ngen let na = (fst . Loader.assignIndices) $ map (\n -> (Node.name n, n)) (Container.elems nl) return (nl, na)) `suchThat` (\(nl, na) -> Container.size nl == Map.size na) -- | Generate a node list, an instance list, and a node graph. -- We choose instances with nodes contained in the node list. genNodeGraph :: Gen (Maybe Graph.Graph, Node.List, Instance.List) genNodeGraph = do nl <- genNodeList genOnlineNode `suchThat` ((2<=).Container.size) il <- genInstanceList (genInstanceOnNodeList nl) return (Node.mkNodeGraph nl il, nl, il) -- * Test cases prop_setAlias :: Node.Node -> String -> Bool prop_setAlias node name = Node.name newnode == Node.name node && Node.alias newnode == name where newnode = Node.setAlias node name prop_setOffline :: Node.Node -> Bool -> Property prop_setOffline node status = Node.offline newnode ==? status where newnode = Node.setOffline node status prop_setXmem :: Node.Node -> Int -> Property prop_setXmem node xm = Node.xMem newnode ==? xm where newnode = Node.setXmem node xm prop_setMcpu :: Node.Node -> Double -> Property prop_setMcpu node mc = Types.iPolicyVcpuRatio (Node.iPolicy newnode) ==? mc where newnode = Node.setMcpu node mc prop_setFmemGreater :: Node.Node -> Int -> Property prop_setFmemGreater node new_mem = not (Node.failN1 node) && (Node.rMem node >= 0) && (new_mem > Node.rMem node) ==> not (Node.failN1 (Node.setFmem node new_mem)) prop_setFmemExact :: Node.Node -> Property prop_setFmemExact node = not (Node.failN1 node) && (Node.rMem node >= 0) ==> not (Node.failN1 (Node.setFmem node (Node.rMem node))) -- Check if adding an instance that consumes exactly all reserved -- memory does not raise an N+1 error prop_addPri_NoN1Fail :: Property prop_addPri_NoN1Fail = forAll genMaybeExclStorNode $ \node -> forAll (genInstanceSmallerThanNode node) $ \inst -> let inst' = inst { Instance.mem = Node.fMem node - Node.rMem node } in (Node.addPri node inst' /=? Bad Types.FailN1) -- | Check that an instance add with too high memory or disk will be -- rejected. prop_addPriFM :: Node.Node -> Instance.Instance -> Property prop_addPriFM node inst = Instance.mem inst >= Node.fMem node && not (Node.failN1 node) && not (Instance.isOffline inst) ==> (Node.addPri node inst'' ==? Bad Types.FailMem) where inst' = setInstanceSmallerThanNode node inst inst'' = inst' { Instance.mem = Instance.mem inst } -- | Check that adding a primary instance with too much disk fails -- with type FailDisk. prop_addPriFD :: Node.Node -> Instance.Instance -> Property prop_addPriFD node inst = forAll (elements Instance.localStorageTemplates) $ \dt -> Instance.dsk inst >= Node.fDsk node && not (Node.failN1 node) ==> let inst' = setInstanceSmallerThanNode node inst inst'' = inst' { Instance.dsk = Instance.dsk inst , Instance.diskTemplate = dt } in (Node.addPri node inst'' ==? Bad Types.FailDisk) -- | Check if an instance exceeds a spindles limit or has no spindles set. hasInstTooManySpindles :: Instance.Instance -> Int -> Bool hasInstTooManySpindles inst sp_lim = case Instance.getTotalSpindles inst of Just s -> s > sp_lim Nothing -> True -- | Check that adding a primary instance with too many spindles fails -- with type FailSpindles (when exclusive storage is enabled). prop_addPriFS :: Instance.Instance -> Property prop_addPriFS inst = forAll genExclStorNode $ \node -> forAll (elements Instance.localStorageTemplates) $ \dt -> hasInstTooManySpindles inst (Node.fSpindles node) && not (Node.failN1 node) ==> let inst' = setInstanceSmallerThanNode node inst inst'' = inst' { Instance.disks = Instance.disks inst , Instance.diskTemplate = dt } in (Node.addPri node inst'' ==? Bad Types.FailSpindles) -- | Check that adding a primary instance with too many VCPUs fails -- with type FailCPU. prop_addPriFC :: Property prop_addPriFC = forAll (choose (1, maxCpu)) $ \extra -> forAll genMaybeExclStorNode $ \node -> forAll (arbitrary `suchThat` Instance.notOffline) $ \inst -> let inst' = setInstanceSmallerThanNode node inst inst'' = inst' { Instance.vcpus = Node.availCpu node + extra } in case Node.addPri node inst'' of Bad Types.FailCPU -> passTest v -> failTest $ "Expected OpFail FailCPU, but got " ++ show v -- | Check that an instance add with too high memory or disk will be -- rejected. prop_addSec :: Node.Node -> Instance.Instance -> Int -> Property prop_addSec node inst pdx = ((Instance.mem inst >= (Node.fMem node - Node.rMem node) && not (Instance.isOffline inst)) || Instance.dsk inst >= Node.fDsk node || (Node.exclStorage node && hasInstTooManySpindles inst (Node.fSpindles node))) && not (Node.failN1 node) ==> isBad (Node.addSec node inst pdx) -- | Check that an offline instance with reasonable disk size but -- extra mem/cpu can always be added. prop_addOfflinePri :: NonNegative Int -> NonNegative Int -> Property prop_addOfflinePri (NonNegative extra_mem) (NonNegative extra_cpu) = forAll genMaybeExclStorNode $ \node -> forAll (genInstanceSmallerThanNode node) $ \inst -> let inst' = inst { Instance.runSt = Types.StatusOffline , Instance.mem = Node.availMem node + extra_mem , Instance.vcpus = Node.availCpu node + extra_cpu } in case Node.addPri node inst' of Ok _ -> passTest v -> failTest $ "Expected OpGood, but got: " ++ show v -- | Check that an offline instance with reasonable disk size but -- extra mem/cpu can always be added. prop_addOfflineSec :: NonNegative Int -> NonNegative Int -> Types.Ndx -> Property prop_addOfflineSec (NonNegative extra_mem) (NonNegative extra_cpu) pdx = forAll genMaybeExclStorNode $ \node -> forAll (genInstanceSmallerThanNode node) $ \inst -> let inst' = inst { Instance.runSt = Types.StatusOffline , Instance.mem = Node.availMem node + extra_mem , Instance.vcpus = Node.availCpu node + extra_cpu , Instance.diskTemplate = Types.DTDrbd8 } in case Node.addSec node inst' pdx of Ok _ -> passTest v -> failTest $ "Expected OpGood/OpGood, but got: " ++ show v -- | Checks for memory reservation changes. prop_rMem :: Instance.Instance -> Property prop_rMem inst = not (Instance.isOffline inst) ==> forAll (genMaybeExclStorNode `suchThat` ((> Types.unitMem) . Node.fMem)) $ \node -> -- ab = auto_balance, nb = non-auto_balance -- we use -1 as the primary node of the instance let inst' = inst { Instance.pNode = -1, Instance.autoBalance = True , Instance.diskTemplate = Types.DTDrbd8 } inst_ab = setInstanceSmallerThanNode node inst' inst_nb = inst_ab { Instance.autoBalance = False } -- now we have the two instances, identical except the -- autoBalance attribute orig_rmem = Node.rMem node inst_idx = Instance.idx inst_ab node_add_ab = Node.addSec node inst_ab (-1) node_add_nb = Node.addSec node inst_nb (-1) node_del_ab = liftM (`Node.removeSec` inst_ab) node_add_ab node_del_nb = liftM (`Node.removeSec` inst_nb) node_add_nb in case (node_add_ab, node_add_nb, node_del_ab, node_del_nb) of (Ok a_ab, Ok a_nb, Ok d_ab, Ok d_nb) -> printTestCase "Consistency checks failed" $ Node.rMem a_ab > orig_rmem && Node.rMem a_ab - orig_rmem == Instance.mem inst_ab && Node.rMem a_nb == orig_rmem && Node.rMem d_ab == orig_rmem && Node.rMem d_nb == orig_rmem && -- this is not related to rMem, but as good a place to -- test as any inst_idx `elem` Node.sList a_ab && inst_idx `notElem` Node.sList d_ab x -> failTest $ "Failed to add/remove instances: " ++ show x -- | Check mdsk setting. prop_setMdsk :: Node.Node -> SmallRatio -> Bool prop_setMdsk node mx = Node.loDsk node' >= 0 && fromIntegral (Node.loDsk node') <= Node.tDsk node && Node.availDisk node' >= 0 && Node.availDisk node' <= Node.fDsk node' && fromIntegral (Node.availDisk node') <= Node.tDsk node' && Node.mDsk node' == mx' where node' = Node.setMdsk node mx' SmallRatio mx' = mx -- Check tag maps prop_tagMaps_idempotent :: Property prop_tagMaps_idempotent = forAll genTags $ \tags -> Node.delTags (Node.addTags m tags) tags ==? m where m = Map.empty prop_tagMaps_reject :: Property prop_tagMaps_reject = forAll (genTags `suchThat` (not . null)) $ \tags -> let m = Node.addTags Map.empty tags in all (\t -> Node.rejectAddTags m [t]) tags prop_showField :: Node.Node -> Property prop_showField node = forAll (elements Node.defaultFields) $ \ field -> fst (Node.showHeader field) /= Types.unknownField && Node.showField node field /= Types.unknownField prop_computeGroups :: [Node.Node] -> Bool prop_computeGroups nodes = let ng = Node.computeGroups nodes onlyuuid = map fst ng in length nodes == sum (map (length . snd) ng) && all (\(guuid, ns) -> all ((== guuid) . Node.group) ns) ng && length (nub onlyuuid) == length onlyuuid && (null nodes || not (null ng)) -- Check idempotence of add/remove operations prop_addPri_idempotent :: Property prop_addPri_idempotent = forAll genMaybeExclStorNode $ \node -> forAll (genInstanceSmallerThanNode node) $ \inst -> case Node.addPri node inst of Ok node' -> Node.removePri node' inst ==? node _ -> failTest "Can't add instance" prop_addSec_idempotent :: Property prop_addSec_idempotent = forAll genMaybeExclStorNode $ \node -> forAll (genInstanceSmallerThanNode node) $ \inst -> let pdx = Node.idx node + 1 inst' = Instance.setPri inst pdx inst'' = inst' { Instance.diskTemplate = Types.DTDrbd8 } in case Node.addSec node inst'' pdx of Ok node' -> Node.removeSec node' inst'' ==? node _ -> failTest "Can't add instance" -- | Check that no graph is created on an empty node list. case_emptyNodeList :: Assertion case_emptyNodeList = assertEqual "" Nothing $ Node.mkNodeGraph emptynodes emptyinstances where emptynodes = Container.empty :: Node.List emptyinstances = Container.empty :: Instance.List -- | Check that the number of vertices of a nodegraph is equal to the number of -- nodes in the original node list. prop_numVertices :: Property prop_numVertices = forAll genNodeGraph $ \(graph, nl, _) -> (fmap numvertices graph ==? Just (Container.size nl)) where numvertices = length . Graph.vertices -- | Check that the number of edges of a nodegraph is equal to twice the number -- of instances with secondary nodes in the original instance list. prop_numEdges :: Property prop_numEdges = forAll genNodeGraph $ \(graph, _, il) -> (fmap numedges graph ==? Just (numwithsec il * 2)) where numedges = length . Graph.edges numwithsec = length . filter Instance.hasSecondary . Container.elems -- | Check that a node graph is colorable. prop_nodeGraphIsColorable :: Property prop_nodeGraphIsColorable = forAll genNodeGraph $ \(graph, _, _) -> fmap HGraph.isColorable graph ==? Just True -- | Check that each edge in a nodegraph is an instance. prop_instanceIsEdge :: Property prop_instanceIsEdge = forAll genNodeGraph $ \(graph, _, il) -> fmap (\g -> all (`isEdgeOn` g) (iwithsec il)) graph ==? Just True where i `isEdgeOn` g = iEdges i `intersect` Graph.edges g == iEdges i iEdges i = [ (Instance.pNode i, Instance.sNode i) , (Instance.sNode i, Instance.pNode i)] iwithsec = filter Instance.hasSecondary . Container.elems -- | Check that each instance in an edge in the resulting nodegraph. prop_edgeIsInstance :: Property prop_edgeIsInstance = forAll genNodeGraph $ \(graph, _, il) -> fmap (all (`isInstanceIn` il).Graph.edges) graph ==? Just True where e `isInstanceIn` il = any (`hasNodes` e) (Container.elems il) i `hasNodes` (v1,v2) = Instance.allNodes i `elem` permutations [v1,v2] -- | List of tests for the Node module. testSuite "HTools/Node" [ 'prop_setAlias , 'prop_setOffline , 'prop_setMcpu , 'prop_setFmemGreater , 'prop_setFmemExact , 'prop_setXmem , 'prop_addPriFM , 'prop_addPriFD , 'prop_addPriFS , 'prop_addPriFC , 'prop_addPri_NoN1Fail , 'prop_addSec , 'prop_addOfflinePri , 'prop_addOfflineSec , 'prop_rMem , 'prop_setMdsk , 'prop_tagMaps_idempotent , 'prop_tagMaps_reject , 'prop_showField , 'prop_computeGroups , 'prop_addPri_idempotent , 'prop_addSec_idempotent , 'case_emptyNodeList , 'prop_numVertices , 'prop_numEdges , 'prop_nodeGraphIsColorable , 'prop_edgeIsInstance , 'prop_instanceIsEdge ] ganeti-2.9.3/test/hs/Test/Ganeti/HTools/Container.hs0000644000000000000000000000627112244641676022221 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.HTools.Container (testHTools_Container) where import Test.QuickCheck import Data.Maybe import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.TestHTools import Test.Ganeti.HTools.Node (genNode) import qualified Ganeti.HTools.Container as Container import qualified Ganeti.HTools.Node as Node -- we silence the following due to hlint bug fixed in later versions {-# ANN prop_addTwo "HLint: ignore Avoid lambda" #-} prop_addTwo :: [Container.Key] -> Int -> Int -> Bool prop_addTwo cdata i1 i2 = fn i1 i2 cont == fn i2 i1 cont && fn i1 i2 cont == fn i1 i2 (fn i1 i2 cont) where cont = foldl (\c x -> Container.add x x c) Container.empty cdata fn x1 x2 = Container.addTwo x1 x1 x2 x2 prop_nameOf :: Node.Node -> Property prop_nameOf node = let nl = makeSmallCluster node 1 in case Container.elems nl of [] -> failTest "makeSmallCluster 1 returned empty cluster?" _:_:_ -> failTest "makeSmallCluster 1 returned >1 node?" fnode:_ -> Container.nameOf nl (Node.idx fnode) ==? Node.name fnode -- | We test that in a cluster, given a random node, we can find it by -- its name and alias, as long as all names and aliases are unique, -- and that we fail to find a non-existing name. prop_findByName :: Property prop_findByName = forAll (genNode (Just 1) Nothing) $ \node -> forAll (choose (1, 20)) $ \ cnt -> forAll (choose (0, cnt - 1)) $ \ fidx -> forAll (genUniquesList (cnt * 2) arbitrary) $ \ allnames -> forAll (arbitrary `suchThat` (`notElem` allnames)) $ \ othername -> let names = zip (take cnt allnames) (drop cnt allnames) nl = makeSmallCluster node cnt nodes = Container.elems nl nodes' = map (\((name, alias), nn) -> (Node.idx nn, nn { Node.name = name, Node.alias = alias })) $ zip names nodes nl' = Container.fromList nodes' target = snd (nodes' !! fidx) in conjoin [ Container.findByName nl' (Node.name target) ==? Just target , Container.findByName nl' (Node.alias target) ==? Just target , printTestCase "Found non-existing name" (isNothing (Container.findByName nl' othername)) ] testSuite "HTools/Container" [ 'prop_addTwo , 'prop_nameOf , 'prop_findByName ] ganeti-2.9.3/test/hs/Test/Ganeti/HTools/Backend/0000755000000000000000000000000012271445545021261 5ustar00rootroot00000000000000ganeti-2.9.3/test/hs/Test/Ganeti/HTools/Backend/Simu.hs0000644000000000000000000000722412244641676022542 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.HTools.Backend.Simu (testHTools_Backend_Simu) where import Test.QuickCheck hiding (Result) import Control.Monad import qualified Data.IntMap as IntMap import Text.Printf (printf) import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Ganeti.BasicTypes import qualified Ganeti.Constants as C import qualified Ganeti.HTools.Backend.Simu as Simu import qualified Ganeti.HTools.Container as Container import qualified Ganeti.HTools.Group as Group import qualified Ganeti.HTools.Loader as Loader import qualified Ganeti.HTools.Node as Node import qualified Ganeti.HTools.Types as Types -- | Generates a tuple of specs for simulation. genSimuSpec :: Gen (String, Int, Int, Int, Int) genSimuSpec = do pol <- elements [C.allocPolicyPreferred, C.allocPolicyLastResort, C.allocPolicyUnallocable, "p", "a", "u"] -- should be reasonable (nodes/group), bigger values only complicate -- the display of failed tests, and we don't care (in this particular -- test) about big node groups nodes <- choose (0, 20) dsk <- choose (0, maxDsk) mem <- choose (0, maxMem) cpu <- choose (0, maxCpu) return (pol, nodes, dsk, mem, cpu) -- | Checks that given a set of corrects specs, we can load them -- successfully, and that at high-level the values look right. prop_Load :: Property prop_Load = forAll (choose (0, 10)) $ \ngroups -> forAll (replicateM ngroups genSimuSpec) $ \specs -> let strspecs = map (\(p, n, d, m, c) -> printf "%s,%d,%d,%d,%d" p n d m c::String) specs totnodes = sum $ map (\(_, n, _, _, _) -> n) specs mdc_in = concatMap (\(_, n, d, m, c) -> replicate n (fromIntegral m, fromIntegral d, fromIntegral c, fromIntegral m, fromIntegral d)) specs :: [(Double, Double, Double, Int, Int)] in case Simu.parseData strspecs of Bad msg -> failTest $ "Failed to load specs: " ++ msg Ok (Loader.ClusterData gl nl il tags ipol) -> let nodes = map snd $ IntMap.toAscList nl nidx = map Node.idx nodes mdc_out = map (\n -> (Node.tMem n, Node.tDsk n, Node.tCpu n, Node.fMem n, Node.fDsk n)) nodes in conjoin [ Container.size gl ==? ngroups , Container.size nl ==? totnodes , Container.size il ==? 0 , length tags ==? 0 , ipol ==? Types.defIPolicy , nidx ==? [1..totnodes] , mdc_in ==? mdc_out , map Group.iPolicy (Container.elems gl) ==? replicate ngroups Types.defIPolicy ] testSuite "HTools/Backend/Simu" [ 'prop_Load ] ganeti-2.9.3/test/hs/Test/Ganeti/HTools/Backend/Text.hs0000644000000000000000000002173212267470014022540 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.HTools.Backend.Text (testHTools_Backend_Text) where import Test.QuickCheck import qualified Data.Map as Map import Data.List import Data.Maybe import System.Time (ClockTime(..)) import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.TestHTools import Test.Ganeti.HTools.Instance (genInstanceSmallerThanNode, genInstanceOnNodeList) import Test.Ganeti.HTools.Node (genNode, genOnlineNode, genUniqueNodeList) import Ganeti.BasicTypes import qualified Ganeti.HTools.Backend.Text as Text import qualified Ganeti.HTools.Cluster as Cluster import qualified Ganeti.HTools.Container as Container import qualified Ganeti.HTools.Group as Group import qualified Ganeti.HTools.Instance as Instance import qualified Ganeti.HTools.Loader as Loader import qualified Ganeti.HTools.Node as Node import qualified Ganeti.HTools.Types as Types import qualified Ganeti.Utils as Utils -- * Instance text loader tests prop_Load_Instance :: String -> Int -> Int -> Int -> Types.InstanceStatus -> NonEmptyList Char -> String -> NonNegative Int -> NonNegative Int -> Bool -> Types.DiskTemplate -> Int -> Property prop_Load_Instance name mem dsk vcpus status (NonEmpty pnode) snode (NonNegative pdx) (NonNegative sdx) autobal dt su = pnode /= snode && pdx /= sdx ==> let vcpus_s = show vcpus dsk_s = show dsk mem_s = show mem su_s = show su status_s = Types.instanceStatusToRaw status ndx = if null snode then [(pnode, pdx)] else [(pnode, pdx), (snode, sdx)] nl = Map.fromList ndx tags = "" sbal = if autobal then "Y" else "N" sdt = Types.diskTemplateToRaw dt inst = Text.loadInst nl [name, mem_s, dsk_s, vcpus_s, status_s, sbal, pnode, snode, sdt, tags, su_s] fail1 = Text.loadInst nl [name, mem_s, dsk_s, vcpus_s, status_s, sbal, pnode, pnode, tags] in case inst of Bad msg -> failTest $ "Failed to load instance: " ++ msg Ok (_, i) -> printTestCase "Mismatch in some field while\ \ loading the instance" $ Instance.name i == name && Instance.vcpus i == vcpus && Instance.mem i == mem && Instance.pNode i == pdx && Instance.sNode i == (if null snode then Node.noSecondary else sdx) && Instance.autoBalance i == autobal && Instance.spindleUse i == su && isBad fail1 prop_Load_InstanceFail :: [(String, Int)] -> [String] -> Property prop_Load_InstanceFail ktn fields = length fields < 10 || length fields > 12 ==> case Text.loadInst nl fields of Ok _ -> failTest "Managed to load instance from invalid data" Bad msg -> printTestCase ("Unrecognised error message: " ++ msg) $ "Invalid/incomplete instance data: '" `isPrefixOf` msg where nl = Map.fromList ktn genInstanceNodes :: Gen (Instance.Instance, Node.List, Types.NameAssoc) genInstanceNodes = do (nl, na) <- genUniqueNodeList genOnlineNode inst <- genInstanceOnNodeList nl return (inst, nl, na) prop_InstanceLSIdempotent :: Property prop_InstanceLSIdempotent = forAll genInstanceNodes $ \(inst, nl, assoc) -> (Text.loadInst assoc . Utils.sepSplit '|' . Text.serializeInstance nl) inst ==? Ok (Instance.name inst, inst) prop_Load_Node :: String -> Int -> Int -> Int -> Int -> Int -> Int -> Bool -> Bool prop_Load_Node name tm nm fm td fd tc fo = let conv v = if v < 0 then "?" else show v tm_s = conv tm nm_s = conv nm fm_s = conv fm td_s = conv td fd_s = conv fd tc_s = conv tc fo_s = if fo then "Y" else "N" any_broken = any (< 0) [tm, nm, fm, td, fd, tc] gid = Group.uuid defGroup in case Text.loadNode defGroupAssoc [name, tm_s, nm_s, fm_s, td_s, fd_s, tc_s, fo_s, gid] of Nothing -> False Just (name', node) -> if fo || any_broken then Node.offline node else Node.name node == name' && name' == name && Node.alias node == name && Node.tMem node == fromIntegral tm && Node.nMem node == nm && Node.fMem node == fm && Node.tDsk node == fromIntegral td && Node.fDsk node == fd && Node.tCpu node == fromIntegral tc prop_Load_NodeFail :: [String] -> Property prop_Load_NodeFail fields = length fields /= 8 ==> isNothing $ Text.loadNode Map.empty fields prop_NodeLSIdempotent :: Property prop_NodeLSIdempotent = forAll (genNode (Just 1) Nothing) $ \node -> -- override failN1 to what loadNode returns by default let n = Node.setPolicy Types.defIPolicy $ node { Node.failN1 = True, Node.offline = False } in (Text.loadNode defGroupAssoc. Utils.sepSplit '|' . Text.serializeNode defGroupList) n ==? Just (Node.name n, n) prop_ISpecIdempotent :: Types.ISpec -> Property prop_ISpecIdempotent ispec = case Text.loadISpec "dummy" . Utils.sepSplit ',' . Text.serializeISpec $ ispec of Bad msg -> failTest $ "Failed to load ispec: " ++ msg Ok ispec' -> ispec ==? ispec' prop_MultipleMinMaxISpecsIdempotent :: [Types.MinMaxISpecs] -> Property prop_MultipleMinMaxISpecsIdempotent minmaxes = case Text.loadMultipleMinMaxISpecs "dummy" . Utils.sepSplit ';' . Text.serializeMultipleMinMaxISpecs $ minmaxes of Bad msg -> failTest $ "Failed to load min/max ispecs: " ++ msg Ok minmaxes' -> minmaxes ==? minmaxes' prop_IPolicyIdempotent :: Types.IPolicy -> Property prop_IPolicyIdempotent ipol = case Text.loadIPolicy . Utils.sepSplit '|' $ Text.serializeIPolicy owner ipol of Bad msg -> failTest $ "Failed to load ispec: " ++ msg Ok res -> (owner, ipol) ==? res where owner = "dummy" -- | This property, while being in the text tests, does more than just -- test end-to-end the serialisation and loading back workflow; it -- also tests the Loader.mergeData and the actual -- Cluster.iterateAlloc (for well-behaving w.r.t. instance -- allocations, not for the business logic). As such, it's a quite -- complex and slow test, and that's the reason we restrict it to -- small cluster sizes. prop_CreateSerialise :: Property prop_CreateSerialise = forAll genTags $ \ctags -> forAll (choose (1, 20)) $ \maxiter -> forAll (choose (2, 10)) $ \count -> forAll genOnlineNode $ \node -> forAll (genInstanceSmallerThanNode node) $ \inst -> let nl = makeSmallCluster node count reqnodes = Instance.requiredNodes $ Instance.diskTemplate inst in case Cluster.genAllocNodes defGroupList nl reqnodes True >>= \allocn -> Cluster.iterateAlloc nl Container.empty (Just maxiter) inst allocn [] [] of Bad msg -> failTest $ "Failed to allocate: " ++ msg Ok (_, _, _, [], _) -> printTestCase "Failed to allocate: no allocations" False Ok (_, nl', il', _, _) -> let cdata = Loader.ClusterData defGroupList nl' il' ctags Types.defIPolicy saved = Text.serializeCluster cdata in case Text.parseData saved >>= Loader.mergeData [] [] [] [] (TOD 0 0) of Bad msg -> failTest $ "Failed to load/merge: " ++ msg Ok (Loader.ClusterData gl2 nl2 il2 ctags2 cpol2) -> conjoin [ ctags ==? ctags2 , Types.defIPolicy ==? cpol2 , il' ==? il2 , defGroupList ==? gl2 , nl' ==? nl2 ] testSuite "HTools/Backend/Text" [ 'prop_Load_Instance , 'prop_Load_InstanceFail , 'prop_InstanceLSIdempotent , 'prop_Load_Node , 'prop_Load_NodeFail , 'prop_NodeLSIdempotent , 'prop_ISpecIdempotent , 'prop_MultipleMinMaxISpecsIdempotent , 'prop_IPolicyIdempotent , 'prop_CreateSerialise ] ganeti-2.9.3/test/hs/Test/Ganeti/HTools/CLI.hs0000644000000000000000000001111012267470014020661 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.HTools.CLI (testHTools_CLI) where import Test.HUnit import Test.QuickCheck import Control.Monad import Data.List import Text.Printf (printf) import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.Common import Ganeti.BasicTypes import Ganeti.HTools.CLI as CLI import qualified Ganeti.HTools.Program.Main as Program import qualified Ganeti.HTools.Types as Types {-# ANN module "HLint: ignore Use camelCase" #-} -- | Test correct parsing. prop_parseISpec :: String -> Int -> Int -> Int -> Maybe Int -> Property prop_parseISpec descr dsk mem cpu spn = let (str, spn') = case spn of Nothing -> (printf "%d,%d,%d" dsk mem cpu::String, 1) Just spn'' -> (printf "%d,%d,%d,%d" dsk mem cpu spn''::String, spn'') in parseISpecString descr str ==? Ok (Types.RSpec cpu mem dsk spn') -- | Test parsing failure due to wrong section count. prop_parseISpecFail :: String -> Property prop_parseISpecFail descr = forAll (choose (0,100) `suchThat` (not . flip elem [3, 4])) $ \nelems -> forAll (replicateM nelems arbitrary) $ \values -> let str = intercalate "," $ map show (values::[Int]) in case parseISpecString descr str of Ok v -> failTest $ "Expected failure, got " ++ show v _ -> passTest -- | Test a few string arguments. prop_string_arg :: String -> Property prop_string_arg argument = let args = [ (oDataFile, optDataFile) , (oDynuFile, optDynuFile) , (oSaveCluster, optSaveCluster) , (oPrintCommands, optShowCmds) , (genOLuxiSocket "", optLuxi) , (oIAllocSrc, optIAllocSrc) ] in conjoin $ map (\(o, opt) -> checkOpt Just defaultOptions failTest (const (==?)) Just (argument, o, opt)) args -- | Test a few positive arguments. prop_numeric_arg :: Positive Double -> Property prop_numeric_arg (Positive argument) = let args = [ (oMaxCpu, optMcpu) , (oMinDisk, Just . optMdsk) , (oMinGain, Just . optMinGain) , (oMinGainLim, Just . optMinGainLim) , (oMinScore, Just . optMinScore) ] in conjoin $ map (\(x, y) -> checkOpt (Just . show) defaultOptions failTest (const (==?)) Just (argument, x, y)) args -- | Test a few boolean arguments. case_bool_arg :: Assertion case_bool_arg = mapM_ (checkOpt (const Nothing) defaultOptions assertFailure assertEqual id) [ (False, oDiskMoves, optDiskMoves) , (False, oInstMoves, optInstMoves) , (True, oEvacMode, optEvacMode) , (True, oExecJobs, optExecJobs) , (True, oNoHeaders, optNoHeaders) , (True, oNoSimulation, optNoSimulation) ] -- | Tests a few invalid arguments. case_wrong_arg :: Assertion case_wrong_arg = mapM_ (passFailOpt defaultOptions assertFailure (return ())) [ (oSpindleUse, "-1", "1") , (oSpindleUse, "a", "1") , (oMaxCpu, "-1", "1") , (oMinDisk, "a", "1") , (oMinGainLim, "a", "1") , (oMaxSolLength, "x", "10") , (oStdSpec, "no-such-spec", "1,1,1") , (oTieredSpec, "no-such-spec", "1,1,1") ] -- | Test that all binaries support some common options. case_stdopts :: Assertion case_stdopts = mapM_ (\(name, (_, o, a, _)) -> do o' <- o checkEarlyExit defaultOptions name (o' ++ genericOpts) a) Program.personalities testSuite "HTools/CLI" [ 'prop_parseISpec , 'prop_parseISpecFail , 'prop_string_arg , 'prop_numeric_arg , 'case_bool_arg , 'case_wrong_arg , 'case_stdopts ] ganeti-2.9.3/test/hs/Test/Ganeti/TestImports.hs.in0000644000000000000000000000022512244641676021762 0ustar00rootroot00000000000000-- Hey Emacs, this is a -*- haskell -*- file {-| Auto-generated file importing all production modules. -} module Test.Ganeti.TestImports () where ganeti-2.9.3/test/hs/Test/Ganeti/TestCommon.hs0000644000000000000000000002744412271422343021150 0ustar00rootroot00000000000000{-| Unittest helpers for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.TestCommon ( maxMem , maxDsk , maxCpu , maxSpindles , maxVcpuRatio , maxSpindleRatio , maxNodes , maxOpCodes , (==?) , (/=?) , failTest , passTest , pythonCmd , runPython , checkPythonResult , DNSChar(..) , genName , genFQDN , genUUID , genMaybe , genTags , genFields , genUniquesList , SmallRatio(..) , genSetHelper , genSet , genIp4AddrStr , genIp4Addr , genIp4NetWithNetmask , genIp4Net , genIp6Addr , genIp6Net , netmask2NumHosts , testSerialisation , resultProp , readTestData , genSample , testParser , genPropParser , genNonNegative ) where import Control.Applicative import Control.Exception (catchJust) import Control.Monad import Data.Attoparsec.Text (Parser, parseOnly) import Data.List import Data.Text (pack) import Data.Word import qualified Data.Set as Set import System.Environment (getEnv) import System.Exit (ExitCode(..)) import System.IO.Error (isDoesNotExistError) import System.Process (readProcessWithExitCode) import qualified Test.HUnit as HUnit import Test.QuickCheck import Test.QuickCheck.Monadic import qualified Text.JSON as J import Numeric import qualified Ganeti.BasicTypes as BasicTypes import Ganeti.Types -- * Constants -- | Maximum memory (1TiB, somewhat random value). maxMem :: Int maxMem = 1024 * 1024 -- | Maximum disk (8TiB, somewhat random value). maxDsk :: Int maxDsk = 1024 * 1024 * 8 -- | Max CPUs (1024, somewhat random value). maxCpu :: Int maxCpu = 1024 -- | Max spindles (1024, somewhat random value). maxSpindles :: Int maxSpindles = 1024 -- | Max vcpu ratio (random value). maxVcpuRatio :: Double maxVcpuRatio = 1024.0 -- | Max spindle ratio (random value). maxSpindleRatio :: Double maxSpindleRatio = 1024.0 -- | Max nodes, used just to limit arbitrary instances for smaller -- opcode definitions (e.g. list of nodes in OpTestDelay). maxNodes :: Int maxNodes = 32 -- | Max opcodes or jobs in a submit job and submit many jobs. maxOpCodes :: Int maxOpCodes = 16 -- * Helper functions -- | Checks for equality with proper annotation. The first argument is -- the computed value, the second one the expected value. (==?) :: (Show a, Eq a) => a -> a -> Property (==?) x y = printTestCase ("Expected equality, but got mismatch\nexpected: " ++ show y ++ "\n but got: " ++ show x) (x == y) infix 3 ==? -- | Checks for inequality with proper annotation. The first argument -- is the computed value, the second one the expected (not equal) -- value. (/=?) :: (Show a, Eq a) => a -> a -> Property (/=?) x y = printTestCase ("Expected inequality, but got equality: '" ++ show x ++ "'.") (x /= y) infix 3 /=? -- | Show a message and fail the test. failTest :: String -> Property failTest msg = printTestCase msg False -- | A 'True' property. passTest :: Property passTest = property True -- | Return the python binary to use. If the PYTHON environment -- variable is defined, use its value, otherwise use just \"python\". pythonCmd :: IO String pythonCmd = catchJust (guard . isDoesNotExistError) (getEnv "PYTHON") (const (return "python")) -- | Run Python with an expression, returning the exit code, standard -- output and error. runPython :: String -> String -> IO (ExitCode, String, String) runPython expr stdin = do py_binary <- pythonCmd readProcessWithExitCode py_binary ["-c", expr] stdin -- | Check python exit code, and fail via HUnit assertions if -- non-zero. Otherwise, return the standard output. checkPythonResult :: (ExitCode, String, String) -> IO String checkPythonResult (py_code, py_stdout, py_stderr) = do HUnit.assertEqual ("python exited with error: " ++ py_stderr) ExitSuccess py_code return py_stdout -- * Arbitrary instances -- | Defines a DNS name. newtype DNSChar = DNSChar { dnsGetChar::Char } instance Arbitrary DNSChar where arbitrary = liftM DNSChar $ elements (['a'..'z'] ++ ['0'..'9'] ++ "_-") instance Show DNSChar where show = show . dnsGetChar -- | Generates a single name component. genName :: Gen String genName = do n <- choose (1, 16) dn <- vector n return (map dnsGetChar dn) -- | Generates an entire FQDN. genFQDN :: Gen String genFQDN = do ncomps <- choose (1, 4) names <- vectorOf ncomps genName return $ intercalate "." names -- | Generates a UUID-like string. -- -- Only to be used for QuickCheck testing. For obtaining actual UUIDs use -- the newUUID function in Ganeti.Utils genUUID :: Gen String genUUID = do c1 <- vector 6 c2 <- vector 4 c3 <- vector 4 c4 <- vector 4 c5 <- vector 4 c6 <- vector 4 c7 <- vector 6 return $ map dnsGetChar c1 ++ "-" ++ map dnsGetChar c2 ++ "-" ++ map dnsGetChar c3 ++ "-" ++ map dnsGetChar c4 ++ "-" ++ map dnsGetChar c5 ++ "-" ++ map dnsGetChar c6 ++ "-" ++ map dnsGetChar c7 -- | Combinator that generates a 'Maybe' using a sub-combinator. genMaybe :: Gen a -> Gen (Maybe a) genMaybe subgen = frequency [ (1, pure Nothing), (3, Just <$> subgen) ] -- | Defines a tag type. newtype TagChar = TagChar { tagGetChar :: Char } -- | All valid tag chars. This doesn't need to match _exactly_ -- Ganeti's own tag regex, just enough for it to be close. tagChar :: String tagChar = ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9'] ++ ".+*/:@-" instance Arbitrary TagChar where arbitrary = liftM TagChar $ elements tagChar -- | Generates a tag genTag :: Gen [TagChar] genTag = do -- the correct value would be C.maxTagLen, but that's way too -- verbose in unittests, and at the moment I don't see any possible -- bugs with longer tags and the way we use tags in htools n <- choose (1, 10) vector n -- | Generates a list of tags (correctly upper bounded). genTags :: Gen [String] genTags = do -- the correct value would be C.maxTagsPerObj, but per the comment -- in genTag, we don't use tags enough in htools to warrant testing -- such big values n <- choose (0, 10::Int) tags <- mapM (const genTag) [1..n] return $ map (map tagGetChar) tags -- | Generates a fields list. This uses the same character set as a -- DNS name (just for simplicity). genFields :: Gen [String] genFields = do n <- choose (1, 32) vectorOf n genName -- | Generates a list of a given size with non-duplicate elements. genUniquesList :: (Eq a, Arbitrary a, Ord a) => Int -> Gen a -> Gen [a] genUniquesList cnt generator = do set <- foldM (\set _ -> do newelem <- generator `suchThat` (`Set.notMember` set) return (Set.insert newelem set)) Set.empty [1..cnt] return $ Set.toList set newtype SmallRatio = SmallRatio Double deriving Show instance Arbitrary SmallRatio where arbitrary = liftM SmallRatio $ choose (0, 1) -- | Helper for 'genSet', declared separately due to type constraints. genSetHelper :: (Ord a) => [a] -> Maybe Int -> Gen (Set.Set a) genSetHelper candidates size = do size' <- case size of Nothing -> choose (0, length candidates) Just s | s > length candidates -> error $ "Invalid size " ++ show s ++ ", maximum is " ++ show (length candidates) | otherwise -> return s foldM (\set _ -> do newelem <- elements candidates `suchThat` (`Set.notMember` set) return (Set.insert newelem set)) Set.empty [1..size'] -- | Generates a set of arbitrary elements. genSet :: (Ord a, Bounded a, Enum a) => Maybe Int -> Gen (Set.Set a) genSet = genSetHelper [minBound..maxBound] -- | Generate an arbitrary IPv4 address in textual form (non empty). genIp4Addr :: Gen NonEmptyString genIp4Addr = genIp4AddrStr >>= mkNonEmpty -- | Generate an arbitrary IPv4 address in textual form. genIp4AddrStr :: Gen String genIp4AddrStr = do a <- choose (1::Int, 255) b <- choose (0::Int, 255) c <- choose (0::Int, 255) d <- choose (0::Int, 255) return $ intercalate "." (map show [a, b, c, d]) -- | Generates an arbitrary IPv4 address with a given netmask in textual form. genIp4NetWithNetmask :: Int -> Gen NonEmptyString genIp4NetWithNetmask netmask = do ip <- genIp4AddrStr mkNonEmpty $ ip ++ "/" ++ show netmask -- | Generate an arbitrary IPv4 network in textual form. genIp4Net :: Gen NonEmptyString genIp4Net = do netmask <- choose (8::Int, 30) genIp4NetWithNetmask netmask -- | Helper function to compute the number of hosts in a network -- given the netmask. (For IPv4 only.) netmask2NumHosts :: Word8 -> Int netmask2NumHosts n = 2^(32-n) -- | Generates an arbitrary IPv6 network address in textual form. -- The generated address is not simpflified, e. g. an address like -- "2607:f0d0:1002:0051:0000:0000:0000:0004" does not become -- "2607:f0d0:1002:51::4" genIp6Addr :: Gen String genIp6Addr = do rawIp <- vectorOf 8 $ choose (0::Integer, 65535) return $ intercalate ":" (map (`showHex` "") rawIp) -- | Generates an arbitrary IPv6 network in textual form. genIp6Net :: Gen String genIp6Net = do netmask <- choose (8::Int, 126) ip <- genIp6Addr return $ ip ++ "/" ++ show netmask -- * Helper functions -- | Checks for serialisation idempotence. testSerialisation :: (Eq a, Show a, J.JSON a) => a -> Property testSerialisation a = case J.readJSON (J.showJSON a) of J.Error msg -> failTest $ "Failed to deserialise: " ++ msg J.Ok a' -> a ==? a' -- | Result to PropertyM IO. resultProp :: (Show a) => BasicTypes.GenericResult a b -> PropertyM IO b resultProp (BasicTypes.Bad err) = stop . failTest $ show err resultProp (BasicTypes.Ok val) = return val -- | Return the source directory of Ganeti. getSourceDir :: IO FilePath getSourceDir = catchJust (guard . isDoesNotExistError) (getEnv "TOP_SRCDIR") (const (return ".")) -- | Returns the path of a file in the test data directory, given its name. testDataFilename :: String -> String -> IO FilePath testDataFilename datadir name = do src <- getSourceDir return $ src ++ datadir ++ name -- | Returns the content of the specified haskell test data file. readTestData :: String -> IO String readTestData filename = do name <- testDataFilename "/test/data/" filename readFile name -- | Generate arbitrary values in the IO monad. This is a simple -- wrapper over 'sample''. genSample :: Gen a -> IO a genSample gen = do values <- sample' gen case values of [] -> error "sample' returned an empty list of values??" x:_ -> return x -- | Function for testing whether a file is parsed correctly. testParser :: (Show a, Eq a) => Parser a -> String -> a -> HUnit.Assertion testParser parser fileName expectedContent = do fileContent <- readTestData fileName case parseOnly parser $ pack fileContent of Left msg -> HUnit.assertFailure $ "Parsing failed: " ++ msg Right obtained -> HUnit.assertEqual fileName expectedContent obtained -- | Generate a property test for parsers. genPropParser :: (Show a, Eq a) => Parser a -> String -> a -> Property genPropParser parser s expected = case parseOnly parser $ pack s of Left msg -> failTest $ "Parsing failed: " ++ msg Right obtained -> expected ==? obtained -- | Generate an arbitrary non negative integer number genNonNegative :: Gen Int genNonNegative = fmap fromIntegral (arbitrary::Gen (Test.QuickCheck.NonNegative Int)) ganeti-2.9.3/test/hs/Test/Ganeti/TestHelper.hs0000644000000000000000000001154412244641676021145 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-| Unittest helpers for TemplateHaskell components. -} {- Copyright (C) 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.TestHelper ( testSuite , genArbitrary ) where import Control.Applicative import Data.List (stripPrefix, isPrefixOf) import Data.Maybe (fromMaybe) import Test.Framework import Test.Framework.Providers.HUnit import Test.Framework.Providers.QuickCheck2 import Test.HUnit (Assertion) import Test.QuickCheck import Language.Haskell.TH -- | Test property prefix. propPrefix :: String propPrefix = "prop_" -- | Test case prefix. casePrefix :: String casePrefix = "case_" -- | Test case prefix without underscore. case2Pfx :: String case2Pfx = "case" -- | Tries to drop a prefix from a string. simplifyName :: String -> String -> String simplifyName pfx string = fromMaybe string (stripPrefix pfx string) -- | Builds a test from a QuickCheck property. runProp :: Testable prop => String -> prop -> Test runProp = testProperty . simplifyName propPrefix -- | Builds a test for a HUnit test case. runCase :: String -> Assertion -> Test runCase = testCase . simplifyName casePrefix -- | Runs the correct test provider for a given test, based on its -- name (not very nice, but...). run :: Name -> Q Exp run name = let str = nameBase name nameE = varE name strE = litE (StringL str) in case () of _ | propPrefix `isPrefixOf` str -> [| runProp $strE $nameE |] | casePrefix `isPrefixOf` str -> [| runCase $strE $nameE |] | case2Pfx `isPrefixOf` str -> [| (testCase . simplifyName case2Pfx) $strE $nameE |] | otherwise -> fail $ "Unsupported test function name '" ++ str ++ "'" -- | Convert slashes in a name to underscores. mapSlashes :: String -> String mapSlashes = map (\c -> if c == '/' then '_' else c) -- | Builds a test suite. testSuite :: String -> [Name] -> Q [Dec] testSuite tsname tdef = do let fullname = mkName $ "test" ++ mapSlashes tsname tests <- mapM run tdef sigtype <- [t| Test |] body <- [| testGroup $(litE $ stringL tsname) $(return $ ListE tests) |] return [ SigD fullname sigtype , ValD (VarP fullname) (NormalB body) [] ] -- | Builds an arbitrary value for a given constructor. This doesn't -- use the actual types of the fields, since we expect arbitrary -- instances for all of the types anyway, we only care about the -- number of fields. mkConsArbitrary :: (Name, [a]) -> Exp mkConsArbitrary (name, types) = let infix_arb a = InfixE (Just a) (VarE '(<*>)) (Just (VarE 'arbitrary)) constr = AppE (VarE 'pure) (ConE name) in foldl (\a _ -> infix_arb a) constr types -- | Extracts the name and the types from a constructor. conInfo :: Con -> (Name, [Type]) conInfo (NormalC name t) = (name, map snd t) conInfo (RecC name t) = (name, map (\(_, _, x) -> x) t) conInfo (InfixC t1 name t2) = (name, [snd t1, snd t2]) conInfo (ForallC _ _ subcon) = conInfo subcon -- | Builds an arbitrary instance for a regular data type (i.e. not Bounded). mkRegularArbitrary :: Name -> [Con] -> Q [Dec] mkRegularArbitrary name cons = do expr <- case cons of [] -> fail "Can't make Arbitrary instance for an empty data type" [x] -> return $ mkConsArbitrary (conInfo x) xs -> appE (varE 'oneof) $ listE (map (return . mkConsArbitrary . conInfo) xs) return [InstanceD [] (AppT (ConT ''Arbitrary) (ConT name)) [ValD (VarP 'arbitrary) (NormalB expr) []]] -- | Builds a default Arbitrary instance for a type. This requires -- that all members are of types that already have Arbitrary -- instances, and that the arbitrary instances are well behaved -- (w.r.t. recursive data structures, or similar concerns). In that -- sense, this is not appropriate for all data types, just those that -- are simple but very repetitive or have many simple fields. genArbitrary :: Name -> Q [Dec] genArbitrary name = do r <- reify name case r of TyConI (DataD _ _ _ cons _) -> mkRegularArbitrary name cons TyConI (NewtypeD _ _ _ con _) -> mkRegularArbitrary name [con] TyConI (TySynD _ _ (ConT tn)) -> genArbitrary tn _ -> fail $ "Invalid type in call to genArbitrary for " ++ show name ++ ", type " ++ show r ganeti-2.9.3/test/hs/Test/Ganeti/Runtime.hs0000644000000000000000000001076112271422343020475 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for "Ganeti.Runtime". -} {- Copyright (C) 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Runtime (testRuntime) where import Test.HUnit import qualified Text.JSON as J import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Ganeti.Runtime {-# ANN module "HLint: ignore Use camelCase" #-} -- | Tests the compatibility between Haskell and Python log files. case_LogFiles :: Assertion case_LogFiles = do let daemons = [minBound..maxBound]::[GanetiDaemon] dnames = map daemonName daemons dfiles <- mapM daemonLogFile daemons let serialized = J.encode dnames py_stdout <- runPython "from ganeti import constants\n\ \from ganeti import serializer\n\ \import sys\n\ \daemons = serializer.Load(sys.stdin.read())\n\ \logfiles = [constants.DAEMONS_LOGFILES[d] for d in daemons]\n\ \print serializer.Dump(logfiles)" serialized >>= checkPythonResult let deserialised = J.decode py_stdout::J.Result [String] decoded <- case deserialised of J.Ok ops -> return ops J.Error msg -> assertFailure ("Unable to decode log files: " ++ msg) -- this already raised an expection, but we need it -- for proper types >> fail "Unable to decode log files" assertEqual "Mismatch in number of returned log files" (length decoded) (length daemons) mapM_ (uncurry (assertEqual "Different result after encoding/decoding") ) $ zip decoded dfiles -- | Tests the compatibility between Haskell and Python users. case_UsersGroups :: Assertion case_UsersGroups = do -- note: we don't have here a programatic way to list all users, so -- we harcode some parts of the two (hs/py) lists let daemons = [minBound..maxBound]::[GanetiDaemon] users = map daemonUser daemons groups = map daemonGroup $ map DaemonGroup daemons ++ map ExtraGroup [minBound..maxBound] py_stdout <- runPython "from ganeti import constants\n\ \from ganeti import serializer\n\ \import sys\n\ \users = [constants.MASTERD_USER,\n\ \ constants.NODED_USER,\n\ \ constants.RAPI_USER,\n\ \ constants.CONFD_USER,\n\ \ constants.LUXID_USER,\n\ \ constants.MOND_USER,\n\ \ ]\n\ \groups = [constants.MASTERD_GROUP,\n\ \ constants.NODED_GROUP,\n\ \ constants.RAPI_GROUP,\n\ \ constants.CONFD_GROUP,\n\ \ constants.LUXID_GROUP,\n\ \ constants.MOND_GROUP,\n\ \ constants.DAEMONS_GROUP,\n\ \ constants.ADMIN_GROUP,\n\ \ ]\n\ \encoded = (users, groups)\n\ \print serializer.Dump(encoded)" "" >>= checkPythonResult let deserialised = J.decode py_stdout::J.Result ([String], [String]) (py_users, py_groups) <- case deserialised of J.Ok ops -> return ops J.Error msg -> assertFailure ("Unable to decode users/groups: " ++ msg) -- this already raised an expection, but we need it for proper -- types >> fail "Unable to decode users/groups" assertEqual "Mismatch in number of returned users" (length py_users) (length users) assertEqual "Mismatch in number of returned users" (length py_groups) (length groups) mapM_ (uncurry (assertEqual "Different result for users") ) $ zip py_users users mapM_ (uncurry (assertEqual "Different result for groups") ) $ zip py_groups groups testSuite "Runtime" [ 'case_LogFiles , 'case_UsersGroups ] ganeti-2.9.3/test/hs/Test/Ganeti/Ssconf.hs0000644000000000000000000000241212244641676020313 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Ssconf (testSsconf) where import Test.QuickCheck import Data.List import Test.Ganeti.TestHelper import qualified Ganeti.Ssconf as Ssconf -- * Ssconf tests $(genArbitrary ''Ssconf.SSKey) prop_filename :: Ssconf.SSKey -> Property prop_filename key = printTestCase "Key doesn't start with correct prefix" $ Ssconf.sSFilePrefix `isPrefixOf` Ssconf.keyToFilename "" key testSuite "Ssconf" [ 'prop_filename ] ganeti-2.9.3/test/hs/Test/Ganeti/JSON.hs0000644000000000000000000000533012270501740017615 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.JSON (testJSON) where import Data.List import Test.QuickCheck import qualified Text.JSON as J import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import qualified Ganeti.BasicTypes as BasicTypes import qualified Ganeti.JSON as JSON prop_toArray :: [Int] -> Property prop_toArray intarr = let arr = map J.showJSON intarr in case JSON.toArray (J.JSArray arr) of BasicTypes.Ok arr' -> arr ==? arr' BasicTypes.Bad err -> failTest $ "Failed to parse array: " ++ err prop_toArrayFail :: Int -> String -> Bool -> Property prop_toArrayFail i s b = -- poor man's instance Arbitrary JSValue forAll (elements [J.showJSON i, J.showJSON s, J.showJSON b]) $ \item -> case JSON.toArray item::BasicTypes.Result [J.JSValue] of BasicTypes.Bad _ -> passTest BasicTypes.Ok result -> failTest $ "Unexpected parse, got " ++ show result arrayMaybeToJson :: (J.JSON a) => [Maybe a] -> String -> JSON.JSRecord arrayMaybeToJson xs k = [(k, J.JSArray $ map sh xs)] where sh x = case x of Just v -> J.showJSON v Nothing -> J.JSNull prop_arrayMaybeFromObj :: String -> [Maybe Int] -> String -> Property prop_arrayMaybeFromObj t xs k = case JSON.tryArrayMaybeFromObj t (arrayMaybeToJson xs k) k of BasicTypes.Ok xs' -> xs' ==? xs BasicTypes.Bad e -> failTest $ "Parsing failing, got: " ++ show e prop_arrayMaybeFromObjFail :: String -> String -> Property prop_arrayMaybeFromObjFail t k = case JSON.tryArrayMaybeFromObj t [] k of BasicTypes.Ok r -> fail $ "Unexpected result, got: " ++ show (r::[Maybe Int]) BasicTypes.Bad e -> conjoin [ Data.List.isInfixOf t e ==? True , Data.List.isInfixOf k e ==? True ] testSuite "JSON" [ 'prop_toArray , 'prop_toArrayFail , 'prop_arrayMaybeFromObj , 'prop_arrayMaybeFromObjFail ] ganeti-2.9.3/test/hs/Test/Ganeti/Utils.hs0000644000000000000000000003250412271422343020151 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell, CPP #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Utils (testUtils) where import Test.QuickCheck hiding (Result) import Test.HUnit import Data.Char (isSpace) import qualified Data.Either as Either import Data.List import System.Time import qualified Text.JSON as J #ifndef NO_REGEX_PCRE import Text.Regex.PCRE #endif import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Ganeti.BasicTypes import qualified Ganeti.Constants as C import qualified Ganeti.JSON as JSON import Ganeti.Utils -- | Helper to generate a small string that doesn't contain commas. genNonCommaString :: Gen String genNonCommaString = do size <- choose (0, 20) -- arbitrary max size vectorOf size (arbitrary `suchThat` (/=) ',') -- | If the list is not just an empty element, and if the elements do -- not contain commas, then join+split should be idempotent. prop_commaJoinSplit :: Property prop_commaJoinSplit = forAll (choose (0, 20)) $ \llen -> forAll (vectorOf llen genNonCommaString `suchThat` (/=) [""]) $ \lst -> sepSplit ',' (commaJoin lst) ==? lst -- | Split and join should always be idempotent. prop_commaSplitJoin :: String -> Property prop_commaSplitJoin s = commaJoin (sepSplit ',' s) ==? s -- | fromObjWithDefault, we test using the Maybe monad and an integer -- value. prop_fromObjWithDefault :: Integer -> String -> Bool prop_fromObjWithDefault def_value random_key = -- a missing key will be returned with the default JSON.fromObjWithDefault [] random_key def_value == Just def_value && -- a found key will be returned as is, not with default JSON.fromObjWithDefault [(random_key, J.showJSON def_value)] random_key (def_value+1) == Just def_value -- | Test that functional if' behaves like the syntactic sugar if. prop_if'if :: Bool -> Int -> Int -> Gen Prop prop_if'if cnd a b = if' cnd a b ==? if cnd then a else b -- | Test basic select functionality prop_select :: Int -- ^ Default result -> [Int] -- ^ List of False values -> [Int] -- ^ List of True values -> Gen Prop -- ^ Test result prop_select def lst1 lst2 = select def (flist ++ tlist) ==? expectedresult where expectedresult = defaultHead def lst2 flist = zip (repeat False) lst1 tlist = zip (repeat True) lst2 {-# ANN prop_select_undefd "HLint: ignore Use alternative" #-} -- | Test basic select functionality with undefined default prop_select_undefd :: [Int] -- ^ List of False values -> NonEmptyList Int -- ^ List of True values -> Gen Prop -- ^ Test result prop_select_undefd lst1 (NonEmpty lst2) = -- head is fine as NonEmpty "guarantees" a non-empty list, but not -- via types select undefined (flist ++ tlist) ==? head lst2 where flist = zip (repeat False) lst1 tlist = zip (repeat True) lst2 {-# ANN prop_select_undefv "HLint: ignore Use alternative" #-} -- | Test basic select functionality with undefined list values prop_select_undefv :: [Int] -- ^ List of False values -> NonEmptyList Int -- ^ List of True values -> Gen Prop -- ^ Test result prop_select_undefv lst1 (NonEmpty lst2) = -- head is fine as NonEmpty "guarantees" a non-empty list, but not -- via types select undefined cndlist ==? head lst2 where flist = zip (repeat False) lst1 tlist = zip (repeat True) lst2 cndlist = flist ++ tlist ++ [undefined] prop_parseUnit :: NonNegative Int -> Property prop_parseUnit (NonNegative n) = conjoin [ parseUnit (show n) ==? (Ok n::Result Int) , parseUnit (show n ++ "m") ==? (Ok n::Result Int) , parseUnit (show n ++ "M") ==? (Ok (truncate n_mb)::Result Int) , parseUnit (show n ++ "g") ==? (Ok (n*1024)::Result Int) , parseUnit (show n ++ "G") ==? (Ok (truncate n_gb)::Result Int) , parseUnit (show n ++ "t") ==? (Ok (n*1048576)::Result Int) , parseUnit (show n ++ "T") ==? (Ok (truncate n_tb)::Result Int) , printTestCase "Internal error/overflow?" (n_mb >=0 && n_gb >= 0 && n_tb >= 0) , property (isBad (parseUnit (show n ++ "x")::Result Int)) ] where n_mb = (fromIntegral n::Rational) * 1000 * 1000 / 1024 / 1024 n_gb = n_mb * 1000 n_tb = n_gb * 1000 {-# ANN case_niceSort_static "HLint: ignore Use camelCase" #-} case_niceSort_static :: Assertion case_niceSort_static = do assertEqual "empty list" [] $ niceSort [] assertEqual "punctuation" [",", "."] $ niceSort [",", "."] assertEqual "decimal numbers" ["0.1", "0.2"] $ niceSort ["0.1", "0.2"] assertEqual "various numbers" ["0,099", "0.1", "0.2", "0;099"] $ niceSort ["0;099", "0,099", "0.1", "0.2"] assertEqual "simple concat" ["0000", "a0", "a1", "a2", "a20", "a99", "b00", "b10", "b70"] $ niceSort ["a0", "a1", "a99", "a20", "a2", "b10", "b70", "b00", "0000"] assertEqual "ranges" ["A", "Z", "a0-0", "a0-4", "a1-0", "a9-1", "a09-2", "a20-3", "a99-3", "a99-10", "b"] $ niceSort ["a0-0", "a1-0", "a99-10", "a20-3", "a0-4", "a99-3", "a09-2", "Z", "a9-1", "A", "b"] assertEqual "large" ["3jTwJPtrXOY22bwL2YoW", "Eegah9ei", "KOt7vn1dWXi", "KVQqLPDjcPjf8T3oyzjcOsfkb", "WvNJd91OoXvLzdEiEXa6", "Z8Ljf1Pf5eBfNg171wJR", "a07h8feON165N67PIE", "bH4Q7aCu3PUPjK3JtH", "cPRi0lM7HLnSuWA2G9", "guKJkXnkULealVC8CyF1xefym", "pqF8dkU5B1cMnyZuREaSOADYx", "uHXAyYYftCSG1o7qcCqe", "xij88brTulHYAv8IEOyU", "xpIUJeVT1Rp"] $ niceSort ["Eegah9ei", "xij88brTulHYAv8IEOyU", "3jTwJPtrXOY22bwL2YoW", "Z8Ljf1Pf5eBfNg171wJR", "WvNJd91OoXvLzdEiEXa6", "uHXAyYYftCSG1o7qcCqe", "xpIUJeVT1Rp", "KOt7vn1dWXi", "a07h8feON165N67PIE", "bH4Q7aCu3PUPjK3JtH", "cPRi0lM7HLnSuWA2G9", "KVQqLPDjcPjf8T3oyzjcOsfkb", "guKJkXnkULealVC8CyF1xefym", "pqF8dkU5B1cMnyZuREaSOADYx"] -- | Tests single-string behaviour of 'niceSort'. prop_niceSort_single :: Property prop_niceSort_single = forAll genName $ \name -> conjoin [ printTestCase "single string" $ [name] ==? niceSort [name] , printTestCase "single plus empty" $ ["", name] ==? niceSort [name, ""] ] -- | Tests some generic 'niceSort' properties. Note that the last test -- must add a non-digit prefix; a digit one might change ordering. prop_niceSort_generic :: Property prop_niceSort_generic = forAll (resize 20 arbitrary) $ \names -> let n_sorted = niceSort names in conjoin [ printTestCase "length" $ length names ==? length n_sorted , printTestCase "same strings" $ sort names ==? sort n_sorted , printTestCase "idempotence" $ n_sorted ==? niceSort n_sorted , printTestCase "static prefix" $ n_sorted ==? map tail (niceSort $ map (" "++) names) ] -- | Tests that niceSorting numbers is identical to actual sorting -- them (in numeric form). prop_niceSort_numbers :: Property prop_niceSort_numbers = forAll (listOf (arbitrary::Gen (NonNegative Int))) $ \numbers -> map show (sort numbers) ==? niceSort (map show numbers) -- | Tests that 'niceSort' and 'niceSortKey' are equivalent. prop_niceSortKey_equiv :: Property prop_niceSortKey_equiv = forAll (resize 20 arbitrary) $ \names -> forAll (vectorOf (length names) (arbitrary::Gen Int)) $ \numbers -> let n_sorted = niceSort names in conjoin [ printTestCase "key id" $ n_sorted ==? niceSortKey id names , printTestCase "key rev" $ niceSort (map reverse names) ==? map reverse (niceSortKey reverse names) , printTestCase "key snd" $ n_sorted ==? map snd (niceSortKey snd $ zip numbers names) ] -- | Tests 'rStripSpace'. prop_rStripSpace :: NonEmptyList Char -> Property prop_rStripSpace (NonEmpty str) = forAll (resize 50 $ listOf1 (arbitrary `suchThat` isSpace)) $ \whitespace -> conjoin [ printTestCase "arb. string last char is not space" $ case rStripSpace str of [] -> True xs -> not . isSpace $ last xs , printTestCase "whitespace suffix is stripped" $ rStripSpace str ==? rStripSpace (str ++ whitespace) , printTestCase "whitespace reduced to null" $ rStripSpace whitespace ==? "" , printTestCase "idempotent on empty strings" $ rStripSpace "" ==? "" ] #ifndef NO_REGEX_PCRE {-# ANN case_new_uuid "HLint: ignore Use camelCase" #-} -- | Tests that the newUUID function produces valid UUIDs. case_new_uuid :: Assertion case_new_uuid = do uuid <- newUUID assertBool "newUUID" $ uuid =~ C.uuidRegex #endif prop_clockTimeToString :: Integer -> Integer -> Property prop_clockTimeToString ts pico = clockTimeToString (TOD ts pico) ==? show ts -- | Test normal operation for 'chompPrefix'. -- -- Any random prefix of a string must be stripped correctly, including the empty -- prefix, and the whole string. prop_chompPrefix_normal :: String -> Property prop_chompPrefix_normal str = forAll (choose (0, length str)) $ \size -> chompPrefix (take size str) str ==? (Just $ drop size str) -- | Test that 'chompPrefix' correctly allows the last char (the separator) to -- be absent if the string terminates there. prop_chompPrefix_last :: Property prop_chompPrefix_last = forAll (choose (1, 20)) $ \len -> forAll (vectorOf len arbitrary) $ \pfx -> chompPrefix pfx pfx ==? Just "" .&&. chompPrefix pfx (init pfx) ==? Just "" -- | Test that chompPrefix on the empty string always returns Nothing for -- prefixes of length 2 or more. prop_chompPrefix_empty_string :: Property prop_chompPrefix_empty_string = forAll (choose (2, 20)) $ \len -> forAll (vectorOf len arbitrary) $ \pfx -> chompPrefix pfx "" ==? Nothing -- | Test 'chompPrefix' returns Nothing when the prefix doesn't match. prop_chompPrefix_nothing :: Property prop_chompPrefix_nothing = forAll (choose (1, 20)) $ \len -> forAll (vectorOf len arbitrary) $ \pfx -> forAll (arbitrary `suchThat` (\s -> not (pfx `isPrefixOf` s) && s /= init pfx)) $ \str -> chompPrefix pfx str ==? Nothing -- | Tests 'trim'. prop_trim :: NonEmptyList Char -> Property prop_trim (NonEmpty str) = forAll (listOf1 $ elements " \t\n\r\f") $ \whitespace -> forAll (choose (0, length whitespace)) $ \n -> let (preWS, postWS) = splitAt n whitespace in conjoin [ printTestCase "arb. string first and last char are not space" $ case trim str of [] -> True xs -> (not . isSpace . head) xs && (not . isSpace . last) xs , printTestCase "whitespace is striped" $ trim str ==? trim (preWS ++ str ++ postWS) , printTestCase "whitespace reduced to null" $ trim whitespace ==? "" , printTestCase "idempotent on empty strings" $ trim "" ==? "" ] -- | Tests 'splitEithers' and 'recombineEithers'. prop_splitRecombineEithers :: [Either Int Int] -> Property prop_splitRecombineEithers es = conjoin [ printTestCase "only lefts are mapped correctly" $ splitEithers (map Left lefts) ==? (reverse lefts, emptylist, falses) , printTestCase "only rights are mapped correctly" $ splitEithers (map Right rights) ==? (emptylist, reverse rights, trues) , printTestCase "recombination is no-op" $ recombineEithers splitleft splitright trail ==? Ok es , printTestCase "fail on too long lefts" $ isBad (recombineEithers (0:splitleft) splitright trail) , printTestCase "fail on too long rights" $ isBad (recombineEithers splitleft (0:splitright) trail) , printTestCase "fail on too long trail" $ isBad (recombineEithers splitleft splitright (True:trail)) ] where (lefts, rights) = Either.partitionEithers es falses = map (const False) lefts trues = map (const True) rights (splitleft, splitright, trail) = splitEithers es emptylist = []::[Int] -- | Test list for the Utils module. testSuite "Utils" [ 'prop_commaJoinSplit , 'prop_commaSplitJoin , 'prop_fromObjWithDefault , 'prop_if'if , 'prop_select , 'prop_select_undefd , 'prop_select_undefv , 'prop_parseUnit , 'case_niceSort_static , 'prop_niceSort_single , 'prop_niceSort_generic , 'prop_niceSort_numbers , 'prop_niceSortKey_equiv , 'prop_rStripSpace , 'prop_trim #ifndef NO_REGEX_PCRE , 'case_new_uuid #endif , 'prop_clockTimeToString , 'prop_chompPrefix_normal , 'prop_chompPrefix_last , 'prop_chompPrefix_empty_string , 'prop_chompPrefix_nothing , 'prop_splitRecombineEithers ] ganeti-2.9.3/test/hs/Test/Ganeti/Daemon.hs0000644000000000000000000000475412244641676020276 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Daemon (testDaemon) where import Test.QuickCheck hiding (Result) import Test.HUnit import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.Common import Ganeti.Common import Ganeti.Daemon as Daemon {-# ANN module "HLint: ignore Use camelCase" #-} -- | Test a few string arguments. prop_string_arg :: String -> Property prop_string_arg argument = let args = [ (argument, oBindAddress, optBindAddress) ] in conjoin $ map (checkOpt Just defaultOptions failTest (const (==?)) Just) args -- | Test a few integer arguments (only one for now). prop_numeric_arg :: Int -> Property prop_numeric_arg argument = checkOpt (Just . show) defaultOptions failTest (const (==?)) (Just . fromIntegral) (argument, oPort 0, optPort) -- | Test a few boolean arguments. case_bool_arg :: Assertion case_bool_arg = mapM_ (checkOpt (const Nothing) defaultOptions assertFailure assertEqual id) [ (False, oNoDaemonize, optDaemonize) , (True, oDebug, optDebug) , (True, oNoUserChecks, optNoUserChecks) ] -- | Tests a few invalid arguments. case_wrong_arg :: Assertion case_wrong_arg = mapM_ (passFailOpt defaultOptions assertFailure (return ())) [ (oSyslogUsage, "foo", "yes") , (oPort 0, "x", "10") ] -- | Test that the option list supports some common options. case_stdopts :: Assertion case_stdopts = checkEarlyExit defaultOptions "prog" [oShowHelp, oShowVer] [] testSuite "Daemon" [ 'prop_string_arg , 'prop_numeric_arg , 'case_bool_arg , 'case_wrong_arg , 'case_stdopts ] ganeti-2.9.3/test/hs/Test/Ganeti/Query/0000755000000000000000000000000012271445545017627 5ustar00rootroot00000000000000ganeti-2.9.3/test/hs/Test/Ganeti/Query/Language.hs0000644000000000000000000001300412244641676021707 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Query.Language ( testQuery_Language , genFilter , genJSValue ) where import Test.QuickCheck import Control.Applicative import Control.Arrow (second) import Text.JSON import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Ganeti.Query.Language -- | Custom 'Filter' generator (top-level), which enforces a -- (sane) limit on the depth of the generated filters. genFilter :: Gen (Filter FilterField) genFilter = choose (0, 10) >>= genFilter' -- | Custom generator for filters that correctly halves the state of -- the generators at each recursive step, per the QuickCheck -- documentation, in order not to run out of memory. genFilter' :: Int -> Gen (Filter FilterField) genFilter' 0 = oneof [ pure EmptyFilter , TrueFilter <$> genName , EQFilter <$> genName <*> value , LTFilter <$> genName <*> value , GTFilter <$> genName <*> value , LEFilter <$> genName <*> value , GEFilter <$> genName <*> value , RegexpFilter <$> genName <*> arbitrary , ContainsFilter <$> genName <*> value ] where value = oneof [ QuotedString <$> genName , NumericValue <$> arbitrary ] genFilter' n = oneof [ AndFilter <$> vectorOf n'' (genFilter' n') , OrFilter <$> vectorOf n'' (genFilter' n') , NotFilter <$> genFilter' n' ] where n' = n `div` 2 -- sub-filter generator size n'' = max n' 2 -- but we don't want empty or 1-element lists, -- so use this for and/or filter list length $(genArbitrary ''QueryTypeOp) $(genArbitrary ''QueryTypeLuxi) $(genArbitrary ''ItemType) instance Arbitrary FilterRegex where arbitrary = genName >>= mkRegex -- a name should be a good regex $(genArbitrary ''ResultStatus) $(genArbitrary ''FieldType) $(genArbitrary ''FieldDefinition) -- | Generates an arbitrary JSValue. We do this via a function a not -- via arbitrary instance since that would require us to define an -- arbitrary for JSValue, which can be recursive, entering the usual -- problems with that; so we only generate the base types, not the -- recursive ones, and not 'JSNull', which we can't use in a -- 'RSNormal' 'ResultEntry'. genJSValue :: Gen JSValue genJSValue = oneof [ JSBool <$> arbitrary , JSRational <$> pure False <*> arbitrary , JSString <$> (toJSString <$> arbitrary) , (JSArray . map showJSON) <$> (arbitrary::Gen [Int]) , JSObject . toJSObject . map (second showJSON) <$> (arbitrary::Gen [(String, Int)]) ] -- | Generates a 'ResultEntry' value. genResultEntry :: Gen ResultEntry genResultEntry = do rs <- arbitrary rv <- case rs of RSNormal -> Just <$> genJSValue _ -> pure Nothing return $ ResultEntry rs rv $(genArbitrary ''QueryFieldsResult) -- | Tests that serialisation/deserialisation of filters is -- idempotent. prop_filter_serialisation :: Property prop_filter_serialisation = forAll genFilter testSerialisation -- | Tests that filter regexes are serialised correctly. prop_filterregex_instances :: FilterRegex -> Property prop_filterregex_instances rex = printTestCase "failed JSON encoding" (testSerialisation rex) -- | Tests 'ResultStatus' serialisation. prop_resultstatus_serialisation :: ResultStatus -> Property prop_resultstatus_serialisation = testSerialisation -- | Tests 'FieldType' serialisation. prop_fieldtype_serialisation :: FieldType -> Property prop_fieldtype_serialisation = testSerialisation -- | Tests 'FieldDef' serialisation. prop_fielddef_serialisation :: FieldDefinition -> Property prop_fielddef_serialisation = testSerialisation -- | Tests 'ResultEntry' serialisation. Needed especially as this is -- done manually, and not via buildObject (different serialisation -- format). prop_resultentry_serialisation :: Property prop_resultentry_serialisation = forAll genResultEntry testSerialisation -- | Tests 'FieldDef' serialisation. We use a made-up maximum limit of -- 20 for the generator, since otherwise the lists become too long and -- we don't care so much about list length but rather structure. prop_fieldsresult_serialisation :: Property prop_fieldsresult_serialisation = forAll (resize 20 arbitrary::Gen QueryFieldsResult) testSerialisation -- | Tests 'ItemType' serialisation. prop_itemtype_serialisation :: ItemType -> Property prop_itemtype_serialisation = testSerialisation testSuite "Query/Language" [ 'prop_filter_serialisation , 'prop_filterregex_instances , 'prop_resultstatus_serialisation , 'prop_fieldtype_serialisation , 'prop_fielddef_serialisation , 'prop_resultentry_serialisation , 'prop_fieldsresult_serialisation , 'prop_itemtype_serialisation ] ganeti-2.9.3/test/hs/Test/Ganeti/Query/Query.hs0000644000000000000000000003642512244641676021305 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell, BangPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Query.Query (testQuery_Query) where import Test.HUnit (Assertion, assertEqual) import Test.QuickCheck hiding (Result) import Test.QuickCheck.Monadic import Data.Function (on) import Data.List import qualified Data.Map as Map import Data.Maybe import qualified Data.Set as Set import Text.JSON (JSValue(..), showJSON) import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.Objects (genEmptyCluster) import Ganeti.BasicTypes import Ganeti.Errors import Ganeti.JSON import Ganeti.Objects import Ganeti.Query.Filter import qualified Ganeti.Query.Group as Group import Ganeti.Query.Language import qualified Ganeti.Query.Node as Node import Ganeti.Query.Query import qualified Ganeti.Query.Job as Job import Ganeti.Utils (sepSplit) {-# ANN module "HLint: ignore Use camelCase" #-} -- * Helpers -- | Checks if a list of field definitions contains unknown fields. hasUnknownFields :: [FieldDefinition] -> Bool hasUnknownFields = (QFTUnknown `notElem`) . map fdefKind -- * Test cases -- ** Node queries -- | Tests that querying any existing fields, via either query or -- queryFields, will not return unknown fields. prop_queryNode_noUnknown :: Property prop_queryNode_noUnknown = forAll (choose (0, maxNodes) >>= genEmptyCluster) $ \cluster -> forAll (elements (Map.keys Node.fieldsMap)) $ \field -> monadicIO $ do QueryResult fdefs fdata <- run (query cluster False (Query (ItemTypeOpCode QRNode) [field] EmptyFilter)) >>= resultProp QueryFieldsResult fdefs' <- resultProp $ queryFields (QueryFields (ItemTypeOpCode QRNode) [field]) stop $ conjoin [ printTestCase ("Got unknown fields via query (" ++ show fdefs ++ ")") (hasUnknownFields fdefs) , printTestCase ("Got unknown result status via query (" ++ show fdata ++ ")") (all (all ((/= RSUnknown) . rentryStatus)) fdata) , printTestCase ("Got unknown fields via query fields (" ++ show fdefs'++ ")") (hasUnknownFields fdefs') ] -- | Tests that an unknown field is returned as such. prop_queryNode_Unknown :: Property prop_queryNode_Unknown = forAll (choose (0, maxNodes) >>= genEmptyCluster) $ \cluster -> forAll (arbitrary `suchThat` (`notElem` Map.keys Node.fieldsMap)) $ \field -> monadicIO $ do QueryResult fdefs fdata <- run (query cluster False (Query (ItemTypeOpCode QRNode) [field] EmptyFilter)) >>= resultProp QueryFieldsResult fdefs' <- resultProp $ queryFields (QueryFields (ItemTypeOpCode QRNode) [field]) stop $ conjoin [ printTestCase ("Got known fields via query (" ++ show fdefs ++ ")") (not $ hasUnknownFields fdefs) , printTestCase ("Got /= ResultUnknown result status via query (" ++ show fdata ++ ")") (all (all ((== RSUnknown) . rentryStatus)) fdata) , printTestCase ("Got a Just in a result value (" ++ show fdata ++ ")") (all (all (isNothing . rentryValue)) fdata) , printTestCase ("Got known fields via query fields (" ++ show fdefs' ++ ")") (not $ hasUnknownFields fdefs') ] -- | Checks that a result type is conforming to a field definition. checkResultType :: FieldDefinition -> ResultEntry -> Property checkResultType _ (ResultEntry RSNormal Nothing) = failTest "Nothing result in RSNormal field" checkResultType _ (ResultEntry _ Nothing) = passTest checkResultType fdef (ResultEntry RSNormal (Just v)) = case (fdefKind fdef, v) of (QFTText , JSString {}) -> passTest (QFTBool , JSBool {}) -> passTest (QFTNumber , JSRational {}) -> passTest (QFTTimestamp , JSRational {}) -> passTest (QFTUnit , JSRational {}) -> passTest (QFTOther , _) -> passTest -- meh, QFT not precise... (kind, _) -> failTest $ "Type mismatch, field definition says " ++ show kind ++ " but returned value is " ++ show v ++ " for field '" ++ fdefName fdef ++ "'" checkResultType _ (ResultEntry r (Just _)) = failTest $ "Just result in " ++ show r ++ " field" -- | Tests that querying any existing fields, the following three -- properties hold: RSNormal corresponds to a Just value, any other -- value corresponds to Nothing, and for a RSNormal and value field, -- the type of the value corresponds to the type of the field as -- declared in the FieldDefinition. prop_queryNode_types :: Property prop_queryNode_types = forAll (choose (0, maxNodes)) $ \numnodes -> forAll (genEmptyCluster numnodes) $ \cfg -> forAll (elements (Map.keys Node.fieldsMap)) $ \field -> monadicIO $ do QueryResult fdefs fdata <- run (query cfg False (Query (ItemTypeOpCode QRNode) [field] EmptyFilter)) >>= resultProp stop $ conjoin [ printTestCase ("Inconsistent result entries (" ++ show fdata ++ ")") (conjoin $ map (conjoin . zipWith checkResultType fdefs) fdata) , printTestCase "Wrong field definitions length" (length fdefs ==? 1) , printTestCase "Wrong field result rows length" (all ((== 1) . length) fdata) , printTestCase "Wrong number of result rows" (length fdata ==? numnodes) ] -- | Test that queryFields with empty fields list returns all node fields. case_queryNode_allfields :: Assertion case_queryNode_allfields = do fdefs <- case queryFields (QueryFields (ItemTypeOpCode QRNode) []) of Bad msg -> fail $ "Error in query all fields: " ++ formatError msg Ok (QueryFieldsResult v) -> return v let field_sort = compare `on` fdefName assertEqual "Mismatch in all fields list" (sortBy field_sort . map (\(f, _, _) -> f) $ Map.elems Node.fieldsMap) (sortBy field_sort fdefs) -- | Check if cluster node names are unique (first elems). areNodeNamesSane :: ConfigData -> Bool areNodeNamesSane cfg = let fqdns = map nodeName . Map.elems . fromContainer $ configNodes cfg names = map (head . sepSplit '.') fqdns in length names == length (nub names) -- | Check that the nodes reported by a name filter are sane. prop_queryNode_filter :: Property prop_queryNode_filter = forAll (choose (1, maxNodes)) $ \nodes -> forAll (genEmptyCluster nodes `suchThat` areNodeNamesSane) $ \cluster -> monadicIO $ do let node_list = map nodeName . Map.elems . fromContainer $ configNodes cluster count <- pick $ choose (1, nodes) fqdn_set <- pick . genSetHelper node_list $ Just count let fqdns = Set.elems fqdn_set names = map (head . sepSplit '.') fqdns flt = makeSimpleFilter "name" $ map Left names QueryResult _ fdata <- run (query cluster False (Query (ItemTypeOpCode QRNode) ["name"] flt)) >>= resultProp stop $ conjoin [ printTestCase "Invalid node names" $ map (map rentryValue) fdata ==? map (\f -> [Just (showJSON f)]) fqdns ] -- ** Group queries prop_queryGroup_noUnknown :: Property prop_queryGroup_noUnknown = forAll (choose (0, maxNodes) >>= genEmptyCluster) $ \cluster -> forAll (elements (Map.keys Group.fieldsMap)) $ \field -> monadicIO $ do QueryResult fdefs fdata <- run (query cluster False (Query (ItemTypeOpCode QRGroup) [field] EmptyFilter)) >>= resultProp QueryFieldsResult fdefs' <- resultProp $ queryFields (QueryFields (ItemTypeOpCode QRGroup) [field]) stop $ conjoin [ printTestCase ("Got unknown fields via query (" ++ show fdefs ++ ")") (hasUnknownFields fdefs) , printTestCase ("Got unknown result status via query (" ++ show fdata ++ ")") (all (all ((/= RSUnknown) . rentryStatus)) fdata) , printTestCase ("Got unknown fields via query fields (" ++ show fdefs' ++ ")") (hasUnknownFields fdefs') ] prop_queryGroup_Unknown :: Property prop_queryGroup_Unknown = forAll (choose (0, maxNodes) >>= genEmptyCluster) $ \cluster -> forAll (arbitrary `suchThat` (`notElem` Map.keys Group.fieldsMap)) $ \field -> monadicIO $ do QueryResult fdefs fdata <- run (query cluster False (Query (ItemTypeOpCode QRGroup) [field] EmptyFilter)) >>= resultProp QueryFieldsResult fdefs' <- resultProp $ queryFields (QueryFields (ItemTypeOpCode QRGroup) [field]) stop $ conjoin [ printTestCase ("Got known fields via query (" ++ show fdefs ++ ")") (not $ hasUnknownFields fdefs) , printTestCase ("Got /= ResultUnknown result status via query (" ++ show fdata ++ ")") (all (all ((== RSUnknown) . rentryStatus)) fdata) , printTestCase ("Got a Just in a result value (" ++ show fdata ++ ")") (all (all (isNothing . rentryValue)) fdata) , printTestCase ("Got known fields via query fields (" ++ show fdefs' ++ ")") (not $ hasUnknownFields fdefs') ] prop_queryGroup_types :: Property prop_queryGroup_types = forAll (choose (0, maxNodes)) $ \numnodes -> forAll (genEmptyCluster numnodes) $ \cfg -> forAll (elements (Map.keys Group.fieldsMap)) $ \field -> monadicIO $ do QueryResult fdefs fdata <- run (query cfg False (Query (ItemTypeOpCode QRGroup) [field] EmptyFilter)) >>= resultProp stop $ conjoin [ printTestCase ("Inconsistent result entries (" ++ show fdata ++ ")") (conjoin $ map (conjoin . zipWith checkResultType fdefs) fdata) , printTestCase "Wrong field definitions length" (length fdefs ==? 1) , printTestCase "Wrong field result rows length" (all ((== 1) . length) fdata) ] case_queryGroup_allfields :: Assertion case_queryGroup_allfields = do fdefs <- case queryFields (QueryFields (ItemTypeOpCode QRGroup) []) of Bad msg -> fail $ "Error in query all fields: " ++ formatError msg Ok (QueryFieldsResult v) -> return v let field_sort = compare `on` fdefName assertEqual "Mismatch in all fields list" (sortBy field_sort . map (\(f, _, _) -> f) $ Map.elems Group.fieldsMap) (sortBy field_sort fdefs) -- | Check that the node count reported by a group list is sane. -- -- FIXME: also verify the node list, etc. prop_queryGroup_nodeCount :: Property prop_queryGroup_nodeCount = forAll (choose (0, maxNodes)) $ \nodes -> forAll (genEmptyCluster nodes) $ \cluster -> monadicIO $ do QueryResult _ fdata <- run (query cluster False (Query (ItemTypeOpCode QRGroup) ["node_cnt"] EmptyFilter)) >>= resultProp stop $ conjoin [ printTestCase "Invalid node count" $ map (map rentryValue) fdata ==? [[Just (showJSON nodes)]] ] -- ** Job queries -- | Tests that querying any existing fields, via either query or -- queryFields, will not return unknown fields. This uses 'undefined' -- for config, as job queries shouldn't use the configuration, and an -- explicit filter as otherwise non-live queries wouldn't return any -- result rows. prop_queryJob_noUnknown :: Property prop_queryJob_noUnknown = forAll (listOf (arbitrary::Gen (Positive Integer))) $ \ids -> forAll (elements (Map.keys Job.fieldsMap)) $ \field -> monadicIO $ do let qtype = ItemTypeLuxi QRJob flt = makeSimpleFilter (nameField qtype) $ map (\(Positive i) -> Right i) ids QueryResult fdefs fdata <- run (query undefined False (Query qtype [field] flt)) >>= resultProp QueryFieldsResult fdefs' <- resultProp $ queryFields (QueryFields qtype [field]) stop $ conjoin [ printTestCase ("Got unknown fields via query (" ++ show fdefs ++ ")") (hasUnknownFields fdefs) , printTestCase ("Got unknown result status via query (" ++ show fdata ++ ")") (all (all ((/= RSUnknown) . rentryStatus)) fdata) , printTestCase ("Got unknown fields via query fields (" ++ show fdefs'++ ")") (hasUnknownFields fdefs') ] -- | Tests that an unknown field is returned as such. prop_queryJob_Unknown :: Property prop_queryJob_Unknown = forAll (listOf (arbitrary::Gen (Positive Integer))) $ \ids -> forAll (arbitrary `suchThat` (`notElem` Map.keys Job.fieldsMap)) $ \field -> monadicIO $ do let qtype = ItemTypeLuxi QRJob flt = makeSimpleFilter (nameField qtype) $ map (\(Positive i) -> Right i) ids QueryResult fdefs fdata <- run (query undefined False (Query qtype [field] flt)) >>= resultProp QueryFieldsResult fdefs' <- resultProp $ queryFields (QueryFields qtype [field]) stop $ conjoin [ printTestCase ("Got known fields via query (" ++ show fdefs ++ ")") (not $ hasUnknownFields fdefs) , printTestCase ("Got /= ResultUnknown result status via query (" ++ show fdata ++ ")") (all (all ((== RSUnknown) . rentryStatus)) fdata) , printTestCase ("Got a Just in a result value (" ++ show fdata ++ ")") (all (all (isNothing . rentryValue)) fdata) , printTestCase ("Got known fields via query fields (" ++ show fdefs' ++ ")") (not $ hasUnknownFields fdefs') ] -- ** Misc other tests -- | Tests that requested names checking behaves as expected. prop_getRequestedNames :: Property prop_getRequestedNames = forAll genName $ \node1 -> let chk = getRequestedNames . Query (ItemTypeOpCode QRNode) [] q_node1 = QuotedString node1 eq_name = EQFilter "name" eq_node1 = eq_name q_node1 in conjoin [ printTestCase "empty filter" $ chk EmptyFilter ==? [] , printTestCase "and filter" $ chk (AndFilter [eq_node1]) ==? [] , printTestCase "simple equality" $ chk eq_node1 ==? [node1] , printTestCase "non-name field" $ chk (EQFilter "foo" q_node1) ==? [] , printTestCase "non-simple filter" $ chk (OrFilter [ eq_node1 , LTFilter "foo" q_node1]) ==? [] ] testSuite "Query/Query" [ 'prop_queryNode_noUnknown , 'prop_queryNode_Unknown , 'prop_queryNode_types , 'prop_queryNode_filter , 'case_queryNode_allfields , 'prop_queryGroup_noUnknown , 'prop_queryGroup_Unknown , 'prop_queryGroup_types , 'case_queryGroup_allfields , 'prop_queryGroup_nodeCount , 'prop_queryJob_noUnknown , 'prop_queryJob_Unknown , 'prop_getRequestedNames ] ganeti-2.9.3/test/hs/Test/Ganeti/Query/Network.hs0000644000000000000000000000567312244641676021632 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for Network Queries. -} {- Copyright (C) 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Query.Network ( testQuery_Network ) where import Ganeti.JSON import Ganeti.Objects import Ganeti.Query.Network import Ganeti.Types import Test.Ganeti.Objects import Test.Ganeti.TestCommon import Test.Ganeti.TestHelper import Test.QuickCheck import qualified Data.Map as Map import Data.Maybe instance Arbitrary ConfigData where arbitrary = genEmptyCluster 0 >>= genConfigDataWithNetworks -- | Check if looking up a valid network ID of a nodegroup yields -- a non-Nothing result. prop_getGroupConnection :: NodeGroup -> Property prop_getGroupConnection group = let net_keys = (Map.keys . fromContainer . groupNetworks) group in True ==? all (\nk -> isJust (getGroupConnection nk group)) net_keys -- | Checks if looking up an ID of a non-existing network in a node group -- yields 'Nothing'. prop_getGroupConnection_notFound :: NodeGroup -> String -> Property prop_getGroupConnection_notFound group uuid = let net_keys = (Map.keys . fromContainer . groupNetworks) group in notElem uuid net_keys ==> isNothing (getGroupConnection uuid group) -- | Checks whether actually connected instances are identified as such. prop_instIsConnected :: ConfigData -> Property prop_instIsConnected cfg = let nets = (fromContainer . configNetworks) cfg net_keys = Map.keys nets net_names = map (fromNonEmpty . networkName) (Map.elems nets) in forAll (genInstWithNets net_names) $ \inst -> True ==? all (\nk -> instIsConnected cfg nk inst) net_keys -- | Tests whether instances that are not connected to a network are -- correctly classified as such. prop_instIsConnected_notFound :: ConfigData -> String -> Property prop_instIsConnected_notFound cfg network_uuid = let nets = (fromContainer . configNetworks) cfg net_keys = Map.keys nets net_names = map (fromNonEmpty . networkName) (Map.elems nets) in notElem network_uuid net_keys ==> forAll (genInstWithNets net_names) $ \inst -> not (instIsConnected cfg network_uuid inst) testSuite "Query_Network" [ 'prop_getGroupConnection , 'prop_getGroupConnection_notFound , 'prop_instIsConnected , 'prop_instIsConnected_notFound ] ganeti-2.9.3/test/hs/Test/Ganeti/Query/Filter.hs0000644000000000000000000001736212244641676021424 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Query.Filter (testQuery_Filter) where import Test.QuickCheck hiding (Result) import Test.QuickCheck.Monadic import qualified Data.Map as Map import Data.List import Text.JSON (showJSON) import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.Objects (genEmptyCluster) import Ganeti.BasicTypes import Ganeti.JSON import Ganeti.Objects import Ganeti.Query.Filter import Ganeti.Query.Language import Ganeti.Query.Query import Ganeti.Utils (niceSort) -- * Helpers -- | Run a query and check that we got a specific response. checkQueryResults :: ConfigData -> Query -> String -> [[ResultEntry]] -> Property checkQueryResults cfg qr descr expected = monadicIO $ do result <- run (query cfg False qr) >>= resultProp stop $ printTestCase ("Inconsistent results in " ++ descr) (qresData result ==? expected) -- | Makes a node name query, given a filter. makeNodeQuery :: Filter FilterField -> Query makeNodeQuery = Query (ItemTypeOpCode QRNode) ["name"] -- | Checks if a given operation failed. expectBadQuery :: ConfigData -> Query -> String -> Property expectBadQuery cfg qr descr = monadicIO $ do result <- run (query cfg False qr) case result of Bad _ -> return () Ok a -> stop . failTest $ "Expected failure in " ++ descr ++ " but got " ++ show a -- | A helper to construct a list of results from an expected names list. namesToResult :: [String] -> [[ResultEntry]] namesToResult = map ((:[]) . ResultEntry RSNormal . Just . showJSON) -- | Generates a cluster and returns its node names too. genClusterNames :: Int -> Int -> Gen (ConfigData, [String]) genClusterNames min_nodes max_nodes = do numnodes <- choose (min_nodes, max_nodes) cfg <- genEmptyCluster numnodes return (cfg, niceSort . Map.keys . fromContainer $ configNodes cfg) -- * Test cases -- | Tests single node filtering: eq should return it, and (lt and gt) -- should fail. prop_node_single_filter :: Property prop_node_single_filter = forAll (genClusterNames 1 maxNodes) $ \(cfg, allnodes) -> forAll (elements allnodes) $ \nname -> let fvalue = QuotedString nname buildflt n = n "name" fvalue expsingle = namesToResult [nname] othernodes = nname `delete` allnodes expnot = namesToResult othernodes test_query = checkQueryResults cfg . makeNodeQuery in conjoin [ test_query (buildflt EQFilter) "single-name 'EQ' filter" expsingle , test_query (NotFilter (buildflt EQFilter)) "single-name 'NOT EQ' filter" expnot , test_query (AndFilter [buildflt LTFilter, buildflt GTFilter]) "single-name 'AND [LT,GT]' filter" [] , test_query (AndFilter [buildflt LEFilter, buildflt GEFilter]) "single-name 'And [LE,GE]' filter" expsingle ] -- | Tests node filtering based on name equality: many 'OrFilter' -- should return all results combined, many 'AndFilter' together -- should return nothing. Note that we need at least 2 nodes so that -- the 'AndFilter' case breaks. prop_node_many_filter :: Property prop_node_many_filter = forAll (genClusterNames 2 maxNodes) $ \(cfg, nnames) -> let eqfilter = map (EQFilter "name" . QuotedString) nnames alln = namesToResult nnames test_query = checkQueryResults cfg . makeNodeQuery num_zero = NumericValue 0 in conjoin [ test_query (OrFilter eqfilter) "all nodes 'Or' name filter" alln , test_query (AndFilter eqfilter) "all nodes 'And' name filter" [] -- this next test works only because genEmptyCluster generates a -- cluster with no instances , test_query (EQFilter "pinst_cnt" num_zero) "pinst_cnt 'Eq' 0" alln , test_query (GTFilter "sinst_cnt" num_zero) "sinst_cnt 'GT' 0" [] ] -- | Tests name ordering consistency: requesting a 'simple filter' -- results in identical name ordering as the wanted names, requesting -- a more complex filter results in a niceSort-ed order. prop_node_name_ordering :: Property prop_node_name_ordering = forAll (genClusterNames 2 6) $ \(cfg, nnames) -> forAll (elements (subsequences nnames)) $ \sorted_nodes -> forAll (elements (permutations sorted_nodes)) $ \chosen_nodes -> let orfilter = OrFilter $ map (EQFilter "name" . QuotedString) chosen_nodes alln = namesToResult chosen_nodes all_sorted = namesToResult $ niceSort chosen_nodes test_query = checkQueryResults cfg . makeNodeQuery in conjoin [ test_query orfilter "simple filter/requested" alln , test_query (AndFilter [orfilter]) "complex filter/sorted" all_sorted ] -- | Tests node regex filtering. This is a very basic test :( prop_node_regex_filter :: Property prop_node_regex_filter = forAll (genClusterNames 0 maxNodes) $ \(cfg, nnames) -> case mkRegex ".*"::Result FilterRegex of Bad msg -> failTest $ "Can't build regex?! Error: " ++ msg Ok rx -> checkQueryResults cfg (makeNodeQuery (RegexpFilter "name" rx)) "rows for all nodes regexp filter" $ namesToResult nnames -- | Tests node regex filtering. This is a very basic test :( prop_node_bad_filter :: String -> Int -> Property prop_node_bad_filter rndname rndint = forAll (genClusterNames 1 maxNodes) $ \(cfg, _) -> let test_query = expectBadQuery cfg . makeNodeQuery string_value = QuotedString rndname numeric_value = NumericValue $ fromIntegral rndint in case mkRegex ".*"::Result FilterRegex of Bad msg -> failTest $ "Can't build regex?! Error: " ++ msg Ok rx -> conjoin [ test_query (RegexpFilter "offline" rx) "regex filter against boolean field" , test_query (EQFilter "name" numeric_value) "numeric value eq against string field" , test_query (TrueFilter "name") "true filter against string field" , test_query (EQFilter "offline" string_value) "quoted string eq against boolean field" , test_query (ContainsFilter "name" string_value) "quoted string in non-list field" , test_query (ContainsFilter "name" numeric_value) "numeric value in non-list field" ] -- | Tests make simple filter. prop_makeSimpleFilter :: Property prop_makeSimpleFilter = forAll (resize 10 $ listOf1 genName) $ \names -> forAll (resize 10 $ listOf1 arbitrary) $ \ids -> forAll genName $ \namefield -> conjoin [ printTestCase "test expected names" $ makeSimpleFilter namefield (map Left names) ==? OrFilter (map (EQFilter namefield . QuotedString) names) , printTestCase "test expected IDs" $ makeSimpleFilter namefield (map Right ids) ==? OrFilter (map (EQFilter namefield . NumericValue) ids) , printTestCase "test empty names" $ makeSimpleFilter namefield [] ==? EmptyFilter ] testSuite "Query/Filter" [ 'prop_node_single_filter , 'prop_node_many_filter , 'prop_node_name_ordering , 'prop_node_regex_filter , 'prop_node_bad_filter , 'prop_makeSimpleFilter ] ganeti-2.9.3/test/hs/Test/Ganeti/JQueue.hs0000644000000000000000000002710012271422343020243 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-| Unittests for the job queue functionality. -} {- Copyright (C) 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.JQueue (testJQueue) where import Control.Applicative import Control.Monad (when) import Data.Char (isAscii) import Data.List (nub, sort) import System.Directory import System.FilePath import System.IO.Temp import System.Posix.Files import Test.HUnit import Test.QuickCheck as QuickCheck import Test.QuickCheck.Monadic import Text.JSON import Test.Ganeti.TestCommon import Test.Ganeti.TestHelper import Test.Ganeti.Types () import Test.Ganeti.OpCodes import Ganeti.BasicTypes import qualified Ganeti.Constants as C import Ganeti.JQueue import Ganeti.OpCodes import Ganeti.Path import Ganeti.Types as Types {-# ANN module "HLint: ignore Use camelCase" #-} -- * Helpers -- | noTimestamp in Just form. justNoTs :: Maybe Timestamp justNoTs = Just noTimestamp -- | Generates a simple queued opcode. genQueuedOpCode :: Gen QueuedOpCode genQueuedOpCode = QueuedOpCode <$> pure (ValidOpCode $ wrapOpCode OpClusterQuery) <*> arbitrary <*> pure JSNull <*> pure [] <*> choose (C.opPrioLowest, C.opPrioHighest) <*> pure justNoTs <*> pure justNoTs <*> pure justNoTs -- | Generates an static, empty job. emptyJob :: (Monad m) => m QueuedJob emptyJob = do jid0 <- makeJobId 0 return $ QueuedJob jid0 [] justNoTs justNoTs justNoTs -- | Generates a job ID. genJobId :: Gen JobId genJobId = do p <- arbitrary::Gen (Types.NonNegative Int) makeJobId $ fromNonNegative p -- * Test cases -- | Tests default priority value. case_JobPriorityDef :: Assertion case_JobPriorityDef = do ej <- emptyJob assertEqual "for default priority" C.opPrioDefault $ calcJobPriority ej -- | Test arbitrary priorities. prop_JobPriority :: Property prop_JobPriority = forAll (listOf1 (genQueuedOpCode `suchThat` (not . opStatusFinalized . qoStatus))) $ \ops -> do jid0 <- makeJobId 0 let job = QueuedJob jid0 ops justNoTs justNoTs justNoTs calcJobPriority job ==? minimum (map qoPriority ops) -- | Tests default job status. case_JobStatusDef :: Assertion case_JobStatusDef = do ej <- emptyJob assertEqual "for job status" JOB_STATUS_SUCCESS $ calcJobStatus ej -- | Test some job status properties. prop_JobStatus :: Property prop_JobStatus = forAll genJobId $ \jid -> forAll genQueuedOpCode $ \op -> let job1 = QueuedJob jid [op] justNoTs justNoTs justNoTs st1 = calcJobStatus job1 op_succ = op { qoStatus = OP_STATUS_SUCCESS } op_err = op { qoStatus = OP_STATUS_ERROR } op_cnl = op { qoStatus = OP_STATUS_CANCELING } op_cnd = op { qoStatus = OP_STATUS_CANCELED } -- computes status for a job with an added opcode before st_pre_op pop = calcJobStatus (job1 { qjOps = pop:qjOps job1 }) -- computes status for a job with an added opcode after st_post_op pop = calcJobStatus (job1 { qjOps = qjOps job1 ++ [pop] }) in conjoin [ printTestCase "pre-success doesn't change status" (st_pre_op op_succ ==? st1) , printTestCase "post-success doesn't change status" (st_post_op op_succ ==? st1) , printTestCase "pre-error is error" (st_pre_op op_err ==? JOB_STATUS_ERROR) , printTestCase "pre-canceling is canceling" (st_pre_op op_cnl ==? JOB_STATUS_CANCELING) , printTestCase "pre-canceled is canceled" (st_pre_op op_cnd ==? JOB_STATUS_CANCELED) ] -- | Tests job status equivalence with Python. Very similar to OpCodes test. case_JobStatusPri_py_equiv :: Assertion case_JobStatusPri_py_equiv = do let num_jobs = 2000::Int jobs <- genSample (vectorOf num_jobs $ do num_ops <- choose (1, 5) ops <- vectorOf num_ops genQueuedOpCode jid <- genJobId return $ QueuedJob jid ops justNoTs justNoTs justNoTs) let serialized = encode jobs -- check for non-ASCII fields, usually due to 'arbitrary :: String' mapM_ (\job -> when (any (not . isAscii) (encode job)) . assertFailure $ "Job has non-ASCII fields: " ++ show job ) jobs py_stdout <- runPython "from ganeti import jqueue\n\ \from ganeti import serializer\n\ \import sys\n\ \job_data = serializer.Load(sys.stdin.read())\n\ \decoded = [jqueue._QueuedJob.Restore(None, o, False, False)\n\ \ for o in job_data]\n\ \encoded = [(job.CalcStatus(), job.CalcPriority())\n\ \ for job in decoded]\n\ \print serializer.Dump(encoded)" serialized >>= checkPythonResult let deserialised = decode py_stdout::Text.JSON.Result [(String, Int)] decoded <- case deserialised of Text.JSON.Ok jobs' -> return jobs' Error msg -> assertFailure ("Unable to decode jobs: " ++ msg) -- this already raised an expection, but we need it -- for proper types >> fail "Unable to decode jobs" assertEqual "Mismatch in number of returned jobs" (length decoded) (length jobs) mapM_ (\(py_sp, job) -> let hs_sp = (jobStatusToRaw $ calcJobStatus job, calcJobPriority job) in assertEqual ("Different result after encoding/decoding for " ++ show job) py_sp hs_sp ) $ zip decoded jobs -- | Tests listing of Job ids. prop_ListJobIDs :: Property prop_ListJobIDs = monadicIO $ do let extractJobIDs jIDs = do either_jobs <- jIDs case either_jobs of Right j -> return j Left e -> fail $ show e isLeft e = case e of Left _ -> True _ -> False jobs <- pick $ resize 10 (listOf1 genJobId `suchThat` (\l -> l == nub l)) (e, f, g) <- run . withSystemTempDirectory "jqueue-test." $ \tempdir -> do empty_dir <- extractJobIDs $ getJobIDs [tempdir] mapM_ (\jid -> writeFile (tempdir jobFileName jid) "") jobs full_dir <- extractJobIDs $ getJobIDs [tempdir] invalid_dir <- getJobIDs [tempdir "no-such-dir"] return (empty_dir, sortJobIDs full_dir, invalid_dir) stop $ conjoin [ printTestCase "empty directory" $ e ==? [] , printTestCase "directory with valid names" $ f ==? sortJobIDs jobs , printTestCase "invalid directory" $ isLeft g ] -- | Tests loading jobs from disk. prop_LoadJobs :: Property prop_LoadJobs = monadicIO $ do ops <- pick $ resize 5 (listOf1 genQueuedOpCode) jid <- pick genJobId let job = QueuedJob jid ops justNoTs justNoTs justNoTs job_s = encode job -- check that jobs in the right directories are parsed correctly (missing, current, archived, missing_current, broken) <- run . withSystemTempDirectory "jqueue-test." $ \tempdir -> do let load a = loadJobFromDisk tempdir a jid live_path = liveJobFile tempdir jid arch_path = archivedJobFile tempdir jid createDirectory $ tempdir jobQueueArchiveSubDir createDirectory $ dropFileName arch_path -- missing job missing <- load True writeFile live_path job_s -- this should exist current <- load False removeFile live_path writeFile arch_path job_s -- this should exist (archived) archived <- load True -- this should be missing missing_current <- load False removeFile arch_path writeFile live_path "invalid job" broken <- load True return (missing, current, archived, missing_current, broken) stop $ conjoin [ missing ==? noSuchJob , current ==? Ganeti.BasicTypes.Ok (job, False) , archived ==? Ganeti.BasicTypes.Ok (job, True) , missing_current ==? noSuchJob , printTestCase "broken job" (isBad broken) ] -- | Tests computing job directories. Creates random directories, -- files and stale symlinks in a directory, and checks that we return -- \"the right thing\". prop_DetermineDirs :: Property prop_DetermineDirs = monadicIO $ do count <- pick $ choose (2, 10) nums <- pick $ genUniquesList count (arbitrary::Gen (QuickCheck.Positive Int)) let (valid, invalid) = splitAt (count `div` 2) $ map (\(QuickCheck.Positive i) -> show i) nums (tempdir, non_arch, with_arch, invalid_root) <- run . withSystemTempDirectory "jqueue-test." $ \tempdir -> do let arch_dir = tempdir jobQueueArchiveSubDir createDirectory arch_dir mapM_ (createDirectory . (arch_dir )) valid mapM_ (\p -> writeFile (arch_dir p) "") invalid mapM_ (\p -> createSymbolicLink "/dev/null/no/such/file" (arch_dir p <.> "missing")) invalid non_arch <- determineJobDirectories tempdir False with_arch <- determineJobDirectories tempdir True invalid_root <- determineJobDirectories (tempdir "no-such-subdir") True return (tempdir, non_arch, with_arch, invalid_root) let arch_dir = tempdir jobQueueArchiveSubDir stop $ conjoin [ non_arch ==? [tempdir] , sort with_arch ==? sort (tempdir:map (arch_dir ) valid) , invalid_root ==? [tempdir "no-such-subdir"] ] -- | Tests the JSON serialisation for 'InputOpCode'. prop_InputOpCode :: MetaOpCode -> Int -> Property prop_InputOpCode meta i = conjoin [ readJSON (showJSON valid) ==? Text.JSON.Ok valid , readJSON (showJSON invalid) ==? Text.JSON.Ok invalid ] where valid = ValidOpCode meta invalid = InvalidOpCode (showJSON i) -- | Tests 'extractOpSummary'. prop_extractOpSummary :: MetaOpCode -> Int -> Property prop_extractOpSummary meta i = conjoin [ printTestCase "valid opcode" $ extractOpSummary (ValidOpCode meta) ==? summary , printTestCase "invalid opcode, correct object" $ extractOpSummary (InvalidOpCode jsobj) ==? summary , printTestCase "invalid opcode, empty object" $ extractOpSummary (InvalidOpCode emptyo) ==? invalid , printTestCase "invalid opcode, object with invalid OP_ID" $ extractOpSummary (InvalidOpCode invobj) ==? invalid , printTestCase "invalid opcode, not jsobject" $ extractOpSummary (InvalidOpCode jsinval) ==? invalid ] where summary = opSummary (metaOpCode meta) jsobj = showJSON $ toJSObject [("OP_ID", showJSON ("OP_" ++ summary))] emptyo = showJSON $ toJSObject ([]::[(String, JSValue)]) invobj = showJSON $ toJSObject [("OP_ID", showJSON False)] jsinval = showJSON i invalid = "INVALID_OP" testSuite "JQueue" [ 'case_JobPriorityDef , 'prop_JobPriority , 'case_JobStatusDef , 'prop_JobStatus , 'case_JobStatusPri_py_equiv , 'prop_ListJobIDs , 'prop_LoadJobs , 'prop_DetermineDirs , 'prop_InputOpCode , 'prop_extractOpSummary ] ganeti-2.9.3/test/hs/Test/Ganeti/Storage/0000755000000000000000000000000012271445544020125 5ustar00rootroot00000000000000ganeti-2.9.3/test/hs/Test/Ganeti/Storage/Drbd/0000755000000000000000000000000012271445545021001 5ustar00rootroot00000000000000ganeti-2.9.3/test/hs/Test/Ganeti/Storage/Drbd/Types.hs0000644000000000000000000001217612267470014022442 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for the types representing DRBD status -} {- Copyright (C) 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Storage.Drbd.Types (testBlock_Drbd_Types) where import Test.QuickCheck import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Text.JSON import Text.Printf import Ganeti.JSON import Ganeti.Storage.Drbd.Types {-# ANN module "HLint: ignore Use camelCase" #-} {-# ANN module "HLint: ignore Use string literal" #-} -- * Arbitrary instances $(genArbitrary ''ConnState) $(genArbitrary ''Role) $(genArbitrary ''DiskState) $(genArbitrary ''SizeUnit) $(genArbitrary ''TimeUnit) -- | Natural numbers generator. natural :: Gen Int natural = choose (0, maxBound :: Int) -- | Generator of percentages. percent :: Gen Double percent = choose (0 :: Double, 100 :: Double) -- | Generator of write order flags. wOrderFlag :: Gen Char wOrderFlag = elements ['b', 'f', 'd', 'n'] -- | Property for testing the JSON serialization of a DeviceInfo. prop_DeviceInfo :: Property prop_DeviceInfo = do minor <- natural state <- arbitrary locRole <- arbitrary remRole <- arbitrary locState <- arbitrary remState <- arbitrary alg <- choose ('A','C') ns <- natural nr <- natural dw <- natural dr <- natural al <- natural bm <- natural lc <- natural pe <- natural ua <- natural ap <- natural ep <- genMaybe natural wo <- genMaybe wOrderFlag oos <- genMaybe natural inst <- genMaybe arbitrary let obtained = showJSON $ DeviceInfo minor state (LocalRemote locRole remRole) (LocalRemote locState remState) alg "r----" perfInd Nothing Nothing Nothing inst perfInd = PerfIndicators ns nr dw dr al bm lc pe ua ap ep wo oos expected = makeObj [ ("minor", showJSON minor) , ("connectionState", showJSON state) , ("localRole", showJSON locRole) , ("remoteRole", showJSON remRole) , ("localState", showJSON locState) , ("remoteState", showJSON remState) , ("replicationProtocol", showJSON alg) , ("ioFlags", showJSON "r----") , ("perfIndicators", showJSON perfInd) , ("instance", maybe JSNull showJSON inst) ] obtained ==? expected -- | Property for testing the JSON serialization of a PerfIndicators. prop_PerfIndicators :: Property prop_PerfIndicators = do ns <- natural nr <- natural dw <- natural dr <- natural al <- natural bm <- natural lc <- natural pe <- natural ua <- natural ap <- natural ep <- genMaybe natural wo <- genMaybe wOrderFlag oos <- genMaybe natural let expected = showJSON $ PerfIndicators ns nr dw dr al bm lc pe ua ap ep wo oos obtained = optFieldsToObj [ Just ("networkSend", showJSON ns) , Just ("networkReceive", showJSON nr) , Just ("diskWrite", showJSON dw) , Just ("diskRead", showJSON dr) , Just ("activityLog", showJSON al) , Just ("bitMap", showJSON bm) , Just ("localCount", showJSON lc) , Just ("pending", showJSON pe) , Just ("unacknowledged", showJSON ua) , Just ("applicationPending", showJSON ap) , optionalJSField "epochs" ep , optionalJSField "writeOrder" wo , optionalJSField "outOfSync" oos ] obtained ==? expected -- | Function for testing the JSON serialization of a SyncStatus. prop_SyncStatus :: Property prop_SyncStatus = do perc <- percent numer <- natural denom <- natural sizeU1 <- arbitrary h <- choose (0, 23) m <- choose (0, 59) s <- choose (0, 59) sp <- natural wa <- genMaybe natural sizeU2 <- arbitrary timeU <- arbitrary let obtained = showJSON $ SyncStatus perc numer denom sizeU1 (Time h m s) sp wa sizeU2 timeU expected = optFieldsToObj [ Just ("percentage", showJSON perc) , Just ("progress", showJSON $ show numer ++ "/" ++ show denom) , Just ("progressUnit", showJSON sizeU1) , Just ("timeToFinish", showJSON (printf "%02d:%02d:%02d" h m s :: String)) , Just ("speed", showJSON sp) , optionalJSField "want" wa , Just ("speedUnit", showJSON $ show sizeU2 ++ "/" ++ show timeU) ] obtained ==? expected testSuite "Block/Drbd/Types" [ 'prop_DeviceInfo , 'prop_PerfIndicators , 'prop_SyncStatus ] ganeti-2.9.3/test/hs/Test/Ganeti/Storage/Drbd/Parser.hs0000644000000000000000000004276612267470014022602 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-| Unittests for the DRBD Parser -} {- Copyright (C) 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Storage.Drbd.Parser (testBlock_Drbd_Parser) where import Test.QuickCheck as QuickCheck hiding (Result) import Test.HUnit import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import qualified Data.Attoparsec.Text as A import Data.List (intercalate) import Data.Text (pack) import Ganeti.Storage.Drbd.Parser (drbdStatusParser, commaIntParser) import Ganeti.Storage.Drbd.Types {-# ANN module "HLint: ignore Use camelCase" #-} -- | Test a DRBD 8.0 file with an empty line inside. case_drbd80_emptyline :: Assertion case_drbd80_emptyline = testParser (drbdStatusParser []) "proc_drbd80-emptyline.txt" $ DRBDStatus ( VersionInfo (Just "8.0.12") (Just "86") (Just "86") Nothing (Just "5c9f89594553e32adb87d9638dce591782f947e3") (Just "root@node1.example.com, 2009-05-22 12:47:52") ) [ DeviceInfo 0 Connected (LocalRemote Primary Secondary) (LocalRemote UpToDate UpToDate) 'C' "r---" (PerfIndicators 78728316 0 77675644 1277039 254 270 0 0 0 0 Nothing Nothing Nothing) Nothing (Just $ AdditionalInfo 0 61 65657 135 0 0 135) (Just $ AdditionalInfo 0 257 11378843 254 0 0 254) Nothing, UnconfiguredDevice 1, UnconfiguredDevice 2, UnconfiguredDevice 5, UnconfiguredDevice 6 ] -- | Test a DRBD 8.0 file with an empty version. case_drbd80_emptyversion :: Assertion case_drbd80_emptyversion = testParser (drbdStatusParser []) "proc_drbd80-emptyversion.txt" $ DRBDStatus ( VersionInfo Nothing Nothing Nothing Nothing (Just "5c9f89594553e32adb87d9638dce591782f947e3") (Just "root@node1.example.com, 2009-05-22 12:47:52") ) [ DeviceInfo 0 Connected (LocalRemote Primary Secondary) (LocalRemote UpToDate UpToDate) 'C' "r---" (PerfIndicators 78728316 0 77675644 1277039 254 270 0 0 0 0 Nothing Nothing Nothing) Nothing (Just $ AdditionalInfo 0 61 65657 135 0 0 135) (Just $ AdditionalInfo 0 257 11378843 254 0 0 254) Nothing, UnconfiguredDevice 1, UnconfiguredDevice 2, UnconfiguredDevice 5, UnconfiguredDevice 6 ] -- | Test a DRBD 8.4 file with an ongoing synchronization. case_drbd84_sync :: Assertion case_drbd84_sync = testParser (drbdStatusParser []) "proc_drbd84_sync.txt" $ DRBDStatus ( VersionInfo (Just "8.4.2") (Just "1") (Just "86-101") Nothing (Just "7ad5f850d711223713d6dcadc3dd48860321070c") (Just "root@example.com, 2013-04-10 07:45:25") ) [ DeviceInfo 0 StandAlone (LocalRemote Primary Unknown) (LocalRemote UpToDate DUnknown) ' ' "r-----" (PerfIndicators 0 0 33318 730 15 0 0 0 0 0 (Just 1) (Just 'd') (Just 1048320)) Nothing Nothing Nothing Nothing, UnconfiguredDevice 3, DeviceInfo 5 SyncSource (LocalRemote Secondary Secondary) (LocalRemote UpToDate Inconsistent) 'C' "r---n-" (PerfIndicators 716992 0 0 719432 0 43 0 33 18 0 (Just 1) (Just 'f') (Just 335744)) (Just $ SyncStatus 68.5 335744 1048576 KiloByte (Time 0 0 5) 64800 Nothing KiloByte Second) Nothing Nothing Nothing ] -- | Test a DRBD 8.4 file. case_drbd84 :: Assertion case_drbd84 = testParser (drbdStatusParser []) "proc_drbd84.txt" $ DRBDStatus ( VersionInfo (Just "8.4.2") (Just "1") (Just "86-101") Nothing (Just "7ad5f850d711223713d6dcadc3dd48860321070c") (Just "root@example.com, 2013-04-10 07:45:25") ) [ DeviceInfo 0 Connected (LocalRemote Primary Secondary) (LocalRemote UpToDate UpToDate) 'C' "r-----" (PerfIndicators 1048576 0 0 1048776 0 64 0 0 0 0 (Just 1) (Just 'f') (Just 0)) Nothing Nothing Nothing Nothing, DeviceInfo 1 Connected (LocalRemote Secondary Primary) (LocalRemote UpToDate UpToDate) 'C' "r-----" (PerfIndicators 0 1048576 1048576 0 0 64 0 0 0 0 (Just 1) (Just 'f') (Just 0)) Nothing Nothing Nothing Nothing, UnconfiguredDevice 2, DeviceInfo 4 WFConnection (LocalRemote Primary Unknown) (LocalRemote UpToDate DUnknown) 'C' "r-----" (PerfIndicators 0 0 0 200 0 0 0 0 0 0 (Just 1) (Just 'f') (Just 1048320)) Nothing Nothing Nothing Nothing, DeviceInfo 6 Connected (LocalRemote Secondary Primary) (LocalRemote Diskless UpToDate) 'C' "r-----" (PerfIndicators 0 0 0 0 0 0 0 0 0 0 (Just 1) (Just 'b') (Just 0)) Nothing Nothing Nothing Nothing, DeviceInfo 8 StandAlone (LocalRemote Secondary Unknown) (LocalRemote UpToDate DUnknown) ' ' "r-----" (PerfIndicators 0 0 0 200 0 0 0 0 0 0 (Just 1) (Just 'f') (Just 1048320)) Nothing Nothing Nothing Nothing ] -- | Test a DRBD 8.3 file with a NULL caracter inside. case_drbd83_sync_krnl2_6_39 :: Assertion case_drbd83_sync_krnl2_6_39 = testParser (drbdStatusParser []) "proc_drbd83_sync_krnl2.6.39.txt" $ DRBDStatus ( VersionInfo (Just "8.3.1") (Just "88") (Just "86-89") Nothing (Just "fd40f4a8f9104941537d1afc8521e584a6d3003c") (Just "phil@fat-tyre, 2009-03-27 12:19:49") ) [ DeviceInfo 0 Connected (LocalRemote Primary Secondary) (LocalRemote UpToDate UpToDate) 'C' "r----" (PerfIndicators 140978 0 9906 131533 27 8 0 0 0 0 (Just 1) (Just 'b') (Just 0)) Nothing Nothing Nothing Nothing, DeviceInfo 1 Connected (LocalRemote Secondary Primary) (LocalRemote UpToDate UpToDate) 'C' "r---" (PerfIndicators 0 140980 140980 0 0 8 0 0 0 0 (Just 1) (Just 'f') (Just 0)) Nothing Nothing Nothing Nothing, UnconfiguredDevice 2, DeviceInfo 3 SyncSource (LocalRemote Primary Secondary) (LocalRemote UpToDate Inconsistent) 'A' "r-----" (PerfIndicators 373888 0 0 374088 0 22 7 27 7 0 (Just 1) (Just 'f') (Just 15358208)) (Just $ SyncStatus 2.4 14996 15360 MegaByte (Time 0 4 8) 61736 Nothing KiloByte Second) Nothing Nothing Nothing, DeviceInfo 4 WFConnection (LocalRemote Primary Unknown) (LocalRemote UpToDate DUnknown) 'C' "r----" (PerfIndicators 140978 0 9906 131534 27 8 0 0 0 0 (Just 1) (Just 'b') (Just 0)) Nothing Nothing Nothing Nothing ] -- | Test a DRBD 8.3 file with an ongoing synchronization. case_drbd83_sync :: Assertion case_drbd83_sync = testParser (drbdStatusParser []) "proc_drbd83_sync.txt" $ DRBDStatus ( VersionInfo (Just "8.3.1") (Just "88") (Just "86-89") Nothing (Just "fd40f4a8f9104941537d1afc8521e584a6d3003c") (Just "phil@fat-tyre, 2009-03-27 12:19:49") ) [ DeviceInfo 0 Connected (LocalRemote Primary Secondary) (LocalRemote UpToDate UpToDate) 'C' "r----" (PerfIndicators 140978 0 9906 131533 27 8 0 0 0 0 (Just 1) (Just 'b') (Just 0)) Nothing Nothing Nothing Nothing, DeviceInfo 1 Connected (LocalRemote Secondary Primary) (LocalRemote UpToDate UpToDate) 'C' "r---" (PerfIndicators 0 140980 140980 0 0 8 0 0 0 0 (Just 1) (Just 'f') (Just 0)) Nothing Nothing Nothing Nothing, UnconfiguredDevice 2, DeviceInfo 3 SyncTarget (LocalRemote Primary Secondary) (LocalRemote Inconsistent UpToDate) 'C' "r----" (PerfIndicators 0 178176 178176 0 104 42 0 0 0 0 (Just 1) (Just 'b') (Just 346112)) (Just $ SyncStatus 34.9 346112 524288 MegaByte (Time 0 0 5) 59392 Nothing KiloByte Second) Nothing Nothing Nothing, DeviceInfo 4 WFConnection (LocalRemote Primary Unknown) (LocalRemote UpToDate DUnknown) 'C' "r----" (PerfIndicators 140978 0 9906 131534 27 8 0 0 0 0 (Just 1) (Just 'b') (Just 0)) Nothing Nothing Nothing Nothing ] -- | Test a DRBD 8.3 file not from git sources, with an ongoing synchronization -- and the "want" field case_drbd83_sync_want :: Assertion case_drbd83_sync_want = testParser (drbdStatusParser []) "proc_drbd83_sync_want.txt" $ DRBDStatus ( VersionInfo (Just "8.3.11") (Just "88") (Just "86-96") (Just "2D876214BAAD53B31ADC1D6") Nothing Nothing ) [ DeviceInfo 0 SyncTarget (LocalRemote Secondary Primary) (LocalRemote Inconsistent UpToDate) 'C' "r-----" (PerfIndicators 0 460288 460160 0 0 28 2 4 1 0 (Just 1) (Just 'f') (Just 588416)) (Just $ SyncStatus 44.4 588416 1048576 KiloByte (Time 0 0 8) 65736 (Just 61440) KiloByte Second) Nothing Nothing Nothing, UnconfiguredDevice 1, UnconfiguredDevice 2, UnconfiguredDevice 3 ] -- | Test a DRBD 8.3 file. case_drbd83 :: Assertion case_drbd83 = testParser (drbdStatusParser []) "proc_drbd83.txt" $ DRBDStatus ( VersionInfo (Just "8.3.1") (Just "88") (Just "86-89") Nothing (Just "fd40f4a8f9104941537d1afc8521e584a6d3003c") (Just "phil@fat-tyre, 2009-03-27 12:19:49") ) [ DeviceInfo 0 Connected (LocalRemote Primary Secondary) (LocalRemote UpToDate UpToDate) 'C' "r----" (PerfIndicators 140978 0 9906 131533 27 8 0 0 0 0 (Just 1) (Just 'b') (Just 0)) Nothing Nothing Nothing Nothing, DeviceInfo 1 Connected (LocalRemote Secondary Primary) (LocalRemote UpToDate UpToDate) 'C' "r---" (PerfIndicators 0 140980 140980 0 0 8 0 0 0 0 (Just 1) (Just 'f') (Just 0)) Nothing Nothing Nothing Nothing, UnconfiguredDevice 2, DeviceInfo 4 WFConnection (LocalRemote Primary Unknown) (LocalRemote UpToDate DUnknown) 'C' "r----" (PerfIndicators 140978 0 9906 131534 27 8 0 0 0 0 (Just 1) (Just 'b') (Just 0)) Nothing Nothing Nothing Nothing, DeviceInfo 5 Connected (LocalRemote Primary Secondary) (LocalRemote UpToDate Diskless) 'C' "r----" (PerfIndicators 140978 0 9906 131533 19 8 0 0 0 0 (Just 1) (Just 'b') (Just 0)) Nothing Nothing Nothing Nothing, DeviceInfo 6 Connected (LocalRemote Secondary Primary) (LocalRemote Diskless UpToDate) 'C' "r---" (PerfIndicators 0 140978 140978 0 0 8 0 0 0 0 (Just 1) (Just 'f') (Just 0)) Nothing Nothing Nothing Nothing, DeviceInfo 7 WFConnection (LocalRemote Secondary Unknown) (LocalRemote UpToDate DUnknown) 'C' "r---" (PerfIndicators 0 140978 140978 0 0 8 0 0 0 0 (Just 1) (Just 'f') (Just 0)) Nothing Nothing Nothing Nothing, DeviceInfo 8 StandAlone (LocalRemote Secondary Unknown) (LocalRemote UpToDate DUnknown) ' ' "r---" (PerfIndicators 0 140978 140978 0 0 8 0 0 0 0 (Just 1) (Just 'f') (Just 0)) Nothing Nothing Nothing Nothing ] -- | Test a DRBD 8.0 file with a missing device. case_drbd8 :: Assertion case_drbd8 = testParser (drbdStatusParser []) "proc_drbd8.txt" $ DRBDStatus ( VersionInfo (Just "8.0.12") (Just "86") (Just "86") Nothing (Just "5c9f89594553e32adb87d9638dce591782f947e3") (Just "XXX") ) [ DeviceInfo 0 Connected (LocalRemote Primary Secondary) (LocalRemote UpToDate UpToDate) 'C' "r---" (PerfIndicators 4375577 0 4446279 674 1067 69 0 0 0 0 Nothing Nothing Nothing) Nothing (Just $ AdditionalInfo 0 61 0 0 0 0 0) (Just $ AdditionalInfo 0 257 793749 1067 0 0 1067) Nothing, DeviceInfo 1 Connected (LocalRemote Secondary Primary) (LocalRemote UpToDate UpToDate) 'C' "r---" (PerfIndicators 738320 0 738320 554400 67 0 0 0 0 0 Nothing Nothing Nothing) Nothing (Just $ AdditionalInfo 0 61 0 0 0 0 0) (Just $ AdditionalInfo 0 257 92464 67 0 0 67) Nothing, UnconfiguredDevice 2, DeviceInfo 4 WFConnection (LocalRemote Primary Unknown) (LocalRemote UpToDate DUnknown) 'C' "r---" (PerfIndicators 738320 0 738320 554400 67 0 0 0 0 0 Nothing Nothing Nothing) Nothing (Just $ AdditionalInfo 0 61 0 0 0 0 0) (Just $ AdditionalInfo 0 257 92464 67 0 0 67) Nothing, DeviceInfo 5 Connected (LocalRemote Primary Secondary) (LocalRemote UpToDate Diskless) 'C' "r---" (PerfIndicators 4375581 0 4446283 674 1069 69 0 0 0 0 Nothing Nothing Nothing) Nothing (Just $ AdditionalInfo 0 61 0 0 0 0 0) (Just $ AdditionalInfo 0 257 793750 1069 0 0 1069) Nothing, DeviceInfo 6 Connected (LocalRemote Secondary Primary) (LocalRemote Diskless UpToDate) 'C' "r---" (PerfIndicators 0 4375581 5186925 327 75 214 0 0 0 0 Nothing Nothing Nothing) Nothing Nothing Nothing Nothing, DeviceInfo 7 WFConnection (LocalRemote Secondary Unknown) (LocalRemote UpToDate DUnknown) 'C' "r---" (PerfIndicators 0 0 0 0 0 0 0 0 0 0 Nothing Nothing Nothing) Nothing (Just $ AdditionalInfo 0 61 0 0 0 0 0) (Just $ AdditionalInfo 0 257 0 0 0 0 0) Nothing, DeviceInfo 8 StandAlone (LocalRemote Secondary Unknown) (LocalRemote UpToDate DUnknown) ' ' "r---" (PerfIndicators 0 0 0 0 0 0 0 0 0 0 Nothing Nothing Nothing) Nothing (Just $ AdditionalInfo 0 61 0 0 0 0 0) (Just $ AdditionalInfo 0 257 0 0 0 0 0) Nothing ] -- | Function for splitting a list in chunks of a given size. -- FIXME: an equivalent function exists in Data.List.Split, but it seems -- pointless to add this package as a dependence just for this single -- use. In case it is ever added, just remove this function definition -- and use the one from the package. splitEvery :: Int -> [e] -> [[e]] splitEvery i l = map (take i) (splitter l (:) []) where splitter [] _ n = n splitter li c n = li `c` splitter (drop i li) c n -- | Function for testing whether a single comma-separated integer is -- parsed correctly. testCommaInt :: String -> Int -> Assertion testCommaInt numString expectedResult = case A.parseOnly commaIntParser $ pack numString of Left msg -> assertFailure $ "Parsing failed: " ++ msg Right obtained -> assertEqual numString expectedResult obtained -- | Generate a property test for CommaInt numbers in a given range. gen_prop_CommaInt :: Int -> Int -> Property gen_prop_CommaInt minVal maxVal = forAll (choose (minVal, maxVal)) $ \i -> case A.parseOnly commaIntParser $ pack (generateCommaInt i) of Left msg -> failTest $ "Parsing failed: " ++ msg Right obtained -> i ==? obtained where generateCommaInt x = ((reverse . intercalate ",") . splitEvery 3) . reverse $ show x -- | Test if <4 digit integers are recognized correctly. prop_commaInt_noCommas :: Property prop_commaInt_noCommas = gen_prop_CommaInt 0 999 -- | Test if integers with 1 comma are recognized correctly. prop_commaInt_1Comma :: Property prop_commaInt_1Comma = gen_prop_CommaInt 1000 999999 -- | Test if integers with multiple commas are recognized correctly. prop_commaInt_multipleCommas :: Property prop_commaInt_multipleCommas = gen_prop_CommaInt 1000000 (maxBound :: Int) -- | Test whether the parser is actually able to behave as intended with -- numbers without commas. That is, if a number with more than 3 digits -- is parsed, only up to the first 3 digits are considered (because they -- are a valid commaInt), and the rest is ignored. -- e.g.: parse "1234" = 123 prop_commaInt_max3WithoutComma :: Property prop_commaInt_max3WithoutComma = forAll (choose (0, maxBound :: Int)) $ \i -> case A.parseOnly commaIntParser $ pack (show i) of Left msg -> failTest $ "Parsing failed: " ++ msg Right obtained -> obtained < 1000 .&&. getFirst3Digits i ==? obtained where getFirst3Digits x = if x >= 1000 then getFirst3Digits $ x `div` 10 else x -- | Test if non-triplets are handled correctly (they are assumed NOT being part -- of the number). case_commaInt_non_triplet :: Assertion case_commaInt_non_triplet = testCommaInt "61,736,12" 61736 testSuite "Block/Drbd/Parser" [ 'case_drbd80_emptyline, 'case_drbd80_emptyversion, 'case_drbd84_sync, 'case_drbd84, 'case_drbd83_sync_krnl2_6_39, 'case_drbd83_sync, 'case_drbd83_sync_want, 'case_drbd83, 'case_drbd8, 'case_commaInt_non_triplet, 'prop_commaInt_noCommas, 'prop_commaInt_1Comma, 'prop_commaInt_multipleCommas, 'prop_commaInt_max3WithoutComma ] ganeti-2.9.3/test/hs/Test/Ganeti/Storage/Lvm/0000755000000000000000000000000012271445545020664 5ustar00rootroot00000000000000ganeti-2.9.3/test/hs/Test/Ganeti/Storage/Lvm/LVParser.hs0000644000000000000000000001007412267470014022712 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for the LV Parser -} {- Copyright (C) 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Storage.Lvm.LVParser (testStorage_Lvm_LVParser) where import Test.QuickCheck as QuickCheck hiding (Result) import Test.HUnit import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Control.Applicative ((<$>), (<*>)) import Data.List (intercalate) import Ganeti.Storage.Lvm.LVParser import Ganeti.Storage.Lvm.Types {-# ANN module "HLint: ignore Use camelCase" #-} -- | Test parsing a LV @lvs@ output. case_lvs_lv :: Assertion case_lvs_lv = testParser lvParser "lvs_lv.txt" [ LVInfo "nhasjL-cnZi-uqLS-WRLj-tkXI-nvCB-n0o2lj" "df9ff3f6-a833-48ff-8bd5-bff2eaeab759.disk0_data" "-wi-ao" (negate 1) (negate 1) 253 0 1073741824 1 "originstname+instance1.example.com" "" "uZgXit-eiRr-vRqe-xpEo-e9nU-mTuR-9nfVIU" "xenvg" "linear" 0 0 1073741824 "" "/dev/sda5:0-15" "/dev/sda5(0)" Nothing , LVInfo "5fW5mE-SBSs-GSU0-KZDg-hnwb-sZOC-zZt736" "df9ff3f6-a833-48ff-8bd5-bff2eaeab759.disk0_meta" "-wi-ao" (negate 1) (negate 1) 253 1 134217728 1 "originstname+instance1.example.com" "" "uZgXit-eiRr-vRqe-xpEo-e9nU-mTuR-9nfVIU" "xenvg" "linear" 0 0 134217728 "" "/dev/sda5:16-17" "/dev/sda5(16)" Nothing ] -- | Serialize a LVInfo in the same format that is output by @lvs@. -- The "instance" field is not serialized because it's not provided by @lvs@ -- so it is not part of this test. serializeLVInfo :: LVInfo -> String serializeLVInfo l = intercalate ";" [ lviUuid l , lviName l , lviAttr l , show $ lviMajor l , show $ lviMinor l , show $ lviKernelMajor l , show $ lviKernelMinor l , show (lviSize l) ++ "B" , show $ lviSegCount l , lviTags l , lviModules l , lviVgUuid l , lviVgName l , lviSegtype l , show (lviSegStart l) ++ "B" , show $ lviSegStartPe l , show (lviSegSize l) ++ "B" , lviSegTags l , lviSegPeRanges l , lviDevices l ] ++ "\n" -- | Serialize a list of LVInfo in the same format that is output by @lvs@. serializeLVInfos :: [LVInfo] -> String serializeLVInfos = concatMap serializeLVInfo -- | Arbitrary instance for LVInfo. -- The instance is always Nothing because it is not part of the parsed data: -- it is added afterwards from a different source. instance Arbitrary LVInfo where arbitrary = LVInfo <$> genUUID -- uuid <*> genName -- name <*> genName -- attr <*> arbitrary -- major <*> arbitrary -- minor <*> arbitrary -- kernel_major <*> arbitrary -- kernel_minor <*> genNonNegative -- size <*> arbitrary -- seg_cont <*> genName -- tags <*> genName -- modules <*> genUUID -- vg_uuid <*> genName -- vg_name <*> genName -- segtype <*> genNonNegative -- seg_start <*> arbitrary -- seg_start_pe <*> genNonNegative -- seg_size <*> genName -- seg_tags <*> genName -- seg_pe_ranges <*> genName -- devices <*> return Nothing -- instance -- | Test if a randomly generated LV lvs output is properly parsed. prop_parse_lvs_lv :: [LVInfo] -> Property prop_parse_lvs_lv expected = genPropParser lvParser (serializeLVInfos expected) expected testSuite "Storage/Lvm/LVParser" [ 'case_lvs_lv, 'prop_parse_lvs_lv ] ganeti-2.9.3/test/hs/Test/Ganeti/Storage/Diskstats/0000755000000000000000000000000012271445545022077 5ustar00rootroot00000000000000ganeti-2.9.3/test/hs/Test/Ganeti/Storage/Diskstats/Parser.hs0000644000000000000000000001103112267470014023655 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for the @/proc/diskstats@ parser -} {- Copyright (C) 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Storage.Diskstats.Parser (testBlock_Diskstats_Parser) where import Test.QuickCheck as QuickCheck hiding (Result) import Test.HUnit import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Control.Applicative ((<*>), (<$>)) import qualified Data.Attoparsec.Text as A import Data.Text (pack) import Text.Printf import Ganeti.Storage.Diskstats.Parser (diskstatsParser) import Ganeti.Storage.Diskstats.Types {-# ANN module "HLint: ignore Use camelCase" #-} -- | Test a diskstats. case_diskstats :: Assertion case_diskstats = testParser diskstatsParser "proc_diskstats.txt" [ Diskstats 1 0 "ram0" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 1 1 "ram1" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 1 2 "ram2" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 1 3 "ram3" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 1 4 "ram4" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 1 5 "ram5" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 1 6 "ram6" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 1 7 "ram7" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 1 8 "ram8" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 1 9 "ram9" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 1 10 "ram10" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 1 11 "ram11" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 1 12 "ram12" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 1 13 "ram13" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 1 14 "ram14" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 1 15 "ram15" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 7 0 "loop0" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 7 1 "loop1" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 7 2 "loop2" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 7 3 "loop3" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 7 4 "loop4" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 7 5 "loop5" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 7 6 "loop6" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 7 7 "loop7" 0 0 0 0 0 0 0 0 0 0 0 , Diskstats 8 0 "sda" 89502 4833 4433387 89244 519115 62738 16059726 465120 0 149148 554564 , Diskstats 8 1 "sda1" 505 2431 8526 132 478 174 124358 8500 0 340 8632 , Diskstats 8 2 "sda2" 2 0 4 4 0 0 0 0 0 4 4 , Diskstats 8 5 "sda5" 88802 2269 4422249 89032 453703 62564 15935368 396244 0 90064 485500 , Diskstats 252 0 "dm-0" 90978 0 4420002 158632 582226 0 15935368 5592012 0 167688 5750652 , Diskstats 252 1 "dm-1" 88775 0 4402378 157204 469594 0 15136008 4910424 0 164556 5067640 , Diskstats 252 2 "dm-2" 1956 0 15648 1052 99920 0 799360 682492 0 4516 683552 , Diskstats 8 16 "sdb" 0 0 0 0 0 0 0 0 0 0 0 ] -- | The instance for generating arbitrary Diskstats instance Arbitrary Diskstats where arbitrary = Diskstats <$> genNonNegative <*> genNonNegative <*> genName <*> genNonNegative <*> genNonNegative <*> genNonNegative <*> genNonNegative <*> genNonNegative <*> genNonNegative <*> genNonNegative <*> genNonNegative <*> genNonNegative <*> genNonNegative <*> genNonNegative -- | Serialize a list of Diskstats in a parsable way serializeDiskstatsList :: [Diskstats] -> String serializeDiskstatsList = concatMap serializeDiskstats -- | Serialize a Diskstats in a parsable way serializeDiskstats :: Diskstats -> String serializeDiskstats ds = printf "\t%d\t%d %s %d %d %d %d %d %d %d %d %d %d %d\n" (dsMajor ds) (dsMinor ds) (dsName ds) (dsReadsNum ds) (dsMergedReads ds) (dsSecRead ds) (dsTimeRead ds) (dsWrites ds) (dsMergedWrites ds) (dsSecWritten ds) (dsTimeWrite ds) (dsIos ds) (dsTimeIO ds) (dsWIOmillis ds) -- | Test whether an arbitrary Diskstats is parsed correctly prop_diskstats :: [Diskstats] -> Property prop_diskstats dsList = case A.parseOnly diskstatsParser $ pack (serializeDiskstatsList dsList) of Left msg -> failTest $ "Parsing failed: " ++ msg Right obtained -> dsList ==? obtained testSuite "Block/Diskstats/Parser" [ 'case_diskstats, 'prop_diskstats ] ganeti-2.9.3/test/hs/Test/Ganeti/OpCodes.hs0000644000000000000000000006262312271422343020412 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.OpCodes ( testOpCodes , OpCodes.OpCode(..) ) where import Test.HUnit as HUnit import Test.QuickCheck as QuickCheck import Control.Applicative import Control.Monad import Data.Char import Data.List import qualified Data.Map as Map import qualified Text.JSON as J import Text.Printf (printf) import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.Types () import Test.Ganeti.Query.Language import Ganeti.BasicTypes import qualified Ganeti.Constants as C import qualified Ganeti.OpCodes as OpCodes import Ganeti.Types import Ganeti.OpParams import Ganeti.JSON {-# ANN module "HLint: ignore Use camelCase" #-} -- * Arbitrary instances instance Arbitrary OpCodes.TagObject where arbitrary = oneof [ OpCodes.TagInstance <$> genFQDN , OpCodes.TagNode <$> genFQDN , OpCodes.TagGroup <$> genFQDN , OpCodes.TagNetwork <$> genFQDN , pure OpCodes.TagCluster ] $(genArbitrary ''OpCodes.ReplaceDisksMode) $(genArbitrary ''DiskAccess) instance Arbitrary OpCodes.DiskIndex where arbitrary = choose (0, C.maxDisks - 1) >>= OpCodes.mkDiskIndex instance Arbitrary INicParams where arbitrary = INicParams <$> genMaybe genNameNE <*> genMaybe genName <*> genMaybe genNameNE <*> genMaybe genNameNE <*> genMaybe genNameNE instance Arbitrary IDiskParams where arbitrary = IDiskParams <$> arbitrary <*> arbitrary <*> genMaybe genNameNE <*> genMaybe genNameNE <*> genMaybe genNameNE <*> genMaybe genNameNE instance Arbitrary RecreateDisksInfo where arbitrary = oneof [ pure RecreateDisksAll , RecreateDisksIndices <$> arbitrary , RecreateDisksParams <$> arbitrary ] instance Arbitrary DdmOldChanges where arbitrary = oneof [ DdmOldIndex <$> arbitrary , DdmOldMod <$> arbitrary ] instance (Arbitrary a) => Arbitrary (SetParamsMods a) where arbitrary = oneof [ pure SetParamsEmpty , SetParamsDeprecated <$> arbitrary , SetParamsNew <$> arbitrary ] instance Arbitrary ExportTarget where arbitrary = oneof [ ExportTargetLocal <$> genNodeNameNE , ExportTargetRemote <$> pure [] ] instance Arbitrary OpCodes.OpCode where arbitrary = do op_id <- elements OpCodes.allOpIDs case op_id of "OP_TEST_DELAY" -> OpCodes.OpTestDelay <$> arbitrary <*> arbitrary <*> genNodeNamesNE <*> return Nothing <*> arbitrary "OP_INSTANCE_REPLACE_DISKS" -> OpCodes.OpInstanceReplaceDisks <$> genFQDN <*> return Nothing <*> arbitrary <*> arbitrary <*> arbitrary <*> genDiskIndices <*> genMaybe genNodeNameNE <*> return Nothing <*> genMaybe genNameNE "OP_INSTANCE_FAILOVER" -> OpCodes.OpInstanceFailover <$> genFQDN <*> return Nothing <*> arbitrary <*> arbitrary <*> genMaybe genNodeNameNE <*> return Nothing <*> arbitrary <*> genMaybe genNameNE <*> arbitrary "OP_INSTANCE_MIGRATE" -> OpCodes.OpInstanceMigrate <$> genFQDN <*> return Nothing <*> arbitrary <*> arbitrary <*> genMaybe genNodeNameNE <*> return Nothing <*> arbitrary <*> arbitrary <*> arbitrary <*> genMaybe genNameNE <*> arbitrary "OP_TAGS_GET" -> OpCodes.OpTagsGet <$> arbitrary <*> arbitrary "OP_TAGS_SEARCH" -> OpCodes.OpTagsSearch <$> genNameNE "OP_TAGS_SET" -> OpCodes.OpTagsSet <$> arbitrary <*> genTags "OP_TAGS_DEL" -> OpCodes.OpTagsSet <$> arbitrary <*> genTags "OP_CLUSTER_POST_INIT" -> pure OpCodes.OpClusterPostInit "OP_CLUSTER_DESTROY" -> pure OpCodes.OpClusterDestroy "OP_CLUSTER_QUERY" -> pure OpCodes.OpClusterQuery "OP_CLUSTER_VERIFY" -> OpCodes.OpClusterVerify <$> arbitrary <*> arbitrary <*> genSet Nothing <*> genSet Nothing <*> arbitrary <*> genMaybe genNameNE "OP_CLUSTER_VERIFY_CONFIG" -> OpCodes.OpClusterVerifyConfig <$> arbitrary <*> arbitrary <*> genSet Nothing <*> arbitrary "OP_CLUSTER_VERIFY_GROUP" -> OpCodes.OpClusterVerifyGroup <$> genNameNE <*> arbitrary <*> arbitrary <*> genSet Nothing <*> genSet Nothing <*> arbitrary "OP_CLUSTER_VERIFY_DISKS" -> pure OpCodes.OpClusterVerifyDisks "OP_GROUP_VERIFY_DISKS" -> OpCodes.OpGroupVerifyDisks <$> genNameNE "OP_CLUSTER_REPAIR_DISK_SIZES" -> OpCodes.OpClusterRepairDiskSizes <$> genNodeNamesNE "OP_CLUSTER_CONFIG_QUERY" -> OpCodes.OpClusterConfigQuery <$> genFieldsNE "OP_CLUSTER_RENAME" -> OpCodes.OpClusterRename <$> genNameNE "OP_CLUSTER_SET_PARAMS" -> OpCodes.OpClusterSetParams <$> arbitrary <*> emptyMUD <*> emptyMUD <*> arbitrary <*> genMaybe (listOf1 arbitrary >>= mkNonEmpty) <*> genMaybe genEmptyContainer <*> emptyMUD <*> genMaybe genEmptyContainer <*> genMaybe genEmptyContainer <*> genMaybe genEmptyContainer <*> genMaybe arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> emptyMUD <*> emptyMUD <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> genMaybe (genName >>= mkNonEmpty) <*> genMaybe (genName >>= mkNonEmpty) "OP_CLUSTER_REDIST_CONF" -> pure OpCodes.OpClusterRedistConf "OP_CLUSTER_ACTIVATE_MASTER_IP" -> pure OpCodes.OpClusterActivateMasterIp "OP_CLUSTER_DEACTIVATE_MASTER_IP" -> pure OpCodes.OpClusterDeactivateMasterIp "OP_QUERY" -> OpCodes.OpQuery <$> arbitrary <*> arbitrary <*> arbitrary <*> genFilter "OP_QUERY_FIELDS" -> OpCodes.OpQueryFields <$> arbitrary <*> arbitrary "OP_OOB_COMMAND" -> OpCodes.OpOobCommand <$> genNodeNamesNE <*> return Nothing <*> arbitrary <*> arbitrary <*> arbitrary <*> (arbitrary `suchThat` (>0)) "OP_NODE_REMOVE" -> OpCodes.OpNodeRemove <$> genNodeNameNE <*> return Nothing "OP_NODE_ADD" -> OpCodes.OpNodeAdd <$> genNodeNameNE <*> emptyMUD <*> emptyMUD <*> genMaybe genName <*> genMaybe genNameNE <*> arbitrary <*> genMaybe genNameNE <*> arbitrary <*> arbitrary <*> emptyMUD "OP_NODE_QUERY" -> OpCodes.OpNodeQuery <$> genFieldsNE <*> genNamesNE <*> arbitrary "OP_NODE_QUERYVOLS" -> OpCodes.OpNodeQueryvols <$> arbitrary <*> genNodeNamesNE "OP_NODE_QUERY_STORAGE" -> OpCodes.OpNodeQueryStorage <$> arbitrary <*> arbitrary <*> genNodeNamesNE <*> genNameNE "OP_NODE_MODIFY_STORAGE" -> OpCodes.OpNodeModifyStorage <$> genNodeNameNE <*> return Nothing <*> arbitrary <*> genNameNE <*> pure emptyJSObject "OP_REPAIR_NODE_STORAGE" -> OpCodes.OpRepairNodeStorage <$> genNodeNameNE <*> return Nothing <*> arbitrary <*> genNameNE <*> arbitrary "OP_NODE_SET_PARAMS" -> OpCodes.OpNodeSetParams <$> genNodeNameNE <*> return Nothing <*> arbitrary <*> emptyMUD <*> emptyMUD <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> genMaybe genNameNE <*> emptyMUD <*> arbitrary "OP_NODE_POWERCYCLE" -> OpCodes.OpNodePowercycle <$> genNodeNameNE <*> return Nothing <*> arbitrary "OP_NODE_MIGRATE" -> OpCodes.OpNodeMigrate <$> genNodeNameNE <*> return Nothing <*> arbitrary <*> arbitrary <*> genMaybe genNodeNameNE <*> return Nothing <*> arbitrary <*> arbitrary <*> genMaybe genNameNE "OP_NODE_EVACUATE" -> OpCodes.OpNodeEvacuate <$> arbitrary <*> genNodeNameNE <*> return Nothing <*> genMaybe genNodeNameNE <*> return Nothing <*> genMaybe genNameNE <*> arbitrary "OP_INSTANCE_CREATE" -> OpCodes.OpInstanceCreate <$> genFQDN <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> pure emptyJSObject <*> arbitrary <*> arbitrary <*> arbitrary <*> genMaybe genNameNE <*> pure emptyJSObject <*> arbitrary <*> genMaybe genNameNE <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> pure emptyJSObject <*> genMaybe genNameNE <*> genMaybe genNodeNameNE <*> return Nothing <*> genMaybe genNodeNameNE <*> return Nothing <*> genMaybe (pure []) <*> genMaybe genNodeNameNE <*> arbitrary <*> genMaybe genNodeNameNE <*> return Nothing <*> genMaybe genNodeNameNE <*> genMaybe genNameNE <*> arbitrary <*> arbitrary <*> (genTags >>= mapM mkNonEmpty) "OP_INSTANCE_MULTI_ALLOC" -> OpCodes.OpInstanceMultiAlloc <$> genMaybe genNameNE <*> pure [] <*> arbitrary "OP_INSTANCE_REINSTALL" -> OpCodes.OpInstanceReinstall <$> genFQDN <*> return Nothing <*> arbitrary <*> genMaybe genNameNE <*> genMaybe (pure emptyJSObject) "OP_INSTANCE_REMOVE" -> OpCodes.OpInstanceRemove <$> genFQDN <*> return Nothing <*> arbitrary <*> arbitrary "OP_INSTANCE_RENAME" -> OpCodes.OpInstanceRename <$> genFQDN <*> return Nothing <*> genNodeNameNE <*> arbitrary <*> arbitrary "OP_INSTANCE_STARTUP" -> OpCodes.OpInstanceStartup <$> genFQDN <*> return Nothing <*> arbitrary <*> arbitrary <*> pure emptyJSObject <*> pure emptyJSObject <*> arbitrary <*> arbitrary "OP_INSTANCE_SHUTDOWN" -> OpCodes.OpInstanceShutdown <$> genFQDN <*> return Nothing <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary "OP_INSTANCE_REBOOT" -> OpCodes.OpInstanceReboot <$> genFQDN <*> return Nothing <*> arbitrary <*> arbitrary <*> arbitrary "OP_INSTANCE_MOVE" -> OpCodes.OpInstanceMove <$> genFQDN <*> return Nothing <*> arbitrary <*> arbitrary <*> genNodeNameNE <*> return Nothing <*> arbitrary "OP_INSTANCE_CONSOLE" -> OpCodes.OpInstanceConsole <$> genFQDN <*> return Nothing "OP_INSTANCE_ACTIVATE_DISKS" -> OpCodes.OpInstanceActivateDisks <$> genFQDN <*> return Nothing <*> arbitrary <*> arbitrary "OP_INSTANCE_DEACTIVATE_DISKS" -> OpCodes.OpInstanceDeactivateDisks <$> genFQDN <*> return Nothing <*> arbitrary "OP_INSTANCE_RECREATE_DISKS" -> OpCodes.OpInstanceRecreateDisks <$> genFQDN <*> return Nothing <*> arbitrary <*> genNodeNamesNE <*> return Nothing <*> genMaybe genNameNE "OP_INSTANCE_QUERY" -> OpCodes.OpInstanceQuery <$> genFieldsNE <*> genNamesNE <*> arbitrary "OP_INSTANCE_QUERY_DATA" -> OpCodes.OpInstanceQueryData <$> arbitrary <*> genNodeNamesNE <*> arbitrary "OP_INSTANCE_SET_PARAMS" -> OpCodes.OpInstanceSetParams <$> genFQDN <*> return Nothing <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> pure emptyJSObject <*> arbitrary <*> pure emptyJSObject <*> arbitrary <*> genMaybe genNodeNameNE <*> return Nothing <*> genMaybe genNodeNameNE <*> return Nothing <*> genMaybe genNameNE <*> pure emptyJSObject <*> arbitrary <*> arbitrary <*> arbitrary "OP_INSTANCE_GROW_DISK" -> OpCodes.OpInstanceGrowDisk <$> genFQDN <*> return Nothing <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary "OP_INSTANCE_CHANGE_GROUP" -> OpCodes.OpInstanceChangeGroup <$> genFQDN <*> return Nothing <*> arbitrary <*> genMaybe genNameNE <*> genMaybe (resize maxNodes (listOf genNameNE)) "OP_GROUP_ADD" -> OpCodes.OpGroupAdd <$> genNameNE <*> arbitrary <*> emptyMUD <*> genMaybe genEmptyContainer <*> emptyMUD <*> emptyMUD <*> emptyMUD "OP_GROUP_ASSIGN_NODES" -> OpCodes.OpGroupAssignNodes <$> genNameNE <*> arbitrary <*> genNodeNamesNE <*> return Nothing "OP_GROUP_QUERY" -> OpCodes.OpGroupQuery <$> genFieldsNE <*> genNamesNE "OP_GROUP_SET_PARAMS" -> OpCodes.OpGroupSetParams <$> genNameNE <*> arbitrary <*> emptyMUD <*> genMaybe genEmptyContainer <*> emptyMUD <*> emptyMUD <*> emptyMUD "OP_GROUP_REMOVE" -> OpCodes.OpGroupRemove <$> genNameNE "OP_GROUP_RENAME" -> OpCodes.OpGroupRename <$> genNameNE <*> genNameNE "OP_GROUP_EVACUATE" -> OpCodes.OpGroupEvacuate <$> genNameNE <*> arbitrary <*> genMaybe genNameNE <*> genMaybe genNamesNE "OP_OS_DIAGNOSE" -> OpCodes.OpOsDiagnose <$> genFieldsNE <*> genNamesNE "OP_EXT_STORAGE_DIAGNOSE" -> OpCodes.OpOsDiagnose <$> genFieldsNE <*> genNamesNE "OP_BACKUP_QUERY" -> OpCodes.OpBackupQuery <$> arbitrary <*> genNodeNamesNE "OP_BACKUP_PREPARE" -> OpCodes.OpBackupPrepare <$> genFQDN <*> return Nothing <*> arbitrary "OP_BACKUP_EXPORT" -> OpCodes.OpBackupExport <$> genFQDN <*> return Nothing <*> arbitrary <*> arbitrary <*> return Nothing <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> genMaybe (pure []) <*> genMaybe genNameNE "OP_BACKUP_REMOVE" -> OpCodes.OpBackupRemove <$> genFQDN <*> return Nothing "OP_TEST_ALLOCATOR" -> OpCodes.OpTestAllocator <$> arbitrary <*> arbitrary <*> genNameNE <*> pure [] <*> pure [] <*> arbitrary <*> genMaybe genNameNE <*> (genTags >>= mapM mkNonEmpty) <*> arbitrary <*> arbitrary <*> genMaybe genNameNE <*> arbitrary <*> genMaybe genNodeNamesNE <*> arbitrary <*> genMaybe genNamesNE <*> arbitrary <*> arbitrary "OP_TEST_JQUEUE" -> OpCodes.OpTestJqueue <$> arbitrary <*> arbitrary <*> resize 20 (listOf genFQDN) <*> arbitrary "OP_TEST_DUMMY" -> OpCodes.OpTestDummy <$> pure J.JSNull <*> pure J.JSNull <*> pure J.JSNull <*> pure J.JSNull "OP_NETWORK_ADD" -> OpCodes.OpNetworkAdd <$> genNameNE <*> genIp4Net <*> genMaybe genIp4Addr <*> pure Nothing <*> pure Nothing <*> genMaybe genMacPrefix <*> genMaybe (listOf genIp4Addr) <*> arbitrary <*> (genTags >>= mapM mkNonEmpty) "OP_NETWORK_REMOVE" -> OpCodes.OpNetworkRemove <$> genNameNE <*> arbitrary "OP_NETWORK_SET_PARAMS" -> OpCodes.OpNetworkSetParams <$> genNameNE <*> genMaybe genIp4Addr <*> pure Nothing <*> pure Nothing <*> genMaybe genMacPrefix <*> genMaybe (listOf genIp4Addr) <*> genMaybe (listOf genIp4Addr) "OP_NETWORK_CONNECT" -> OpCodes.OpNetworkConnect <$> genNameNE <*> genNameNE <*> arbitrary <*> genNameNE <*> arbitrary "OP_NETWORK_DISCONNECT" -> OpCodes.OpNetworkDisconnect <$> genNameNE <*> genNameNE "OP_NETWORK_QUERY" -> OpCodes.OpNetworkQuery <$> genFieldsNE <*> genNamesNE <*> arbitrary "OP_RESTRICTED_COMMAND" -> OpCodes.OpRestrictedCommand <$> arbitrary <*> genNodeNamesNE <*> return Nothing <*> genNameNE _ -> fail $ "Undefined arbitrary for opcode " ++ op_id -- | Generates one element of a reason trail genReasonElem :: Gen ReasonElem genReasonElem = (,,) <$> genFQDN <*> genFQDN <*> arbitrary -- | Generates a reason trail genReasonTrail :: Gen ReasonTrail genReasonTrail = do size <- choose (0, 10) vectorOf size genReasonElem instance Arbitrary OpCodes.CommonOpParams where arbitrary = OpCodes.CommonOpParams <$> arbitrary <*> arbitrary <*> arbitrary <*> resize 5 arbitrary <*> genMaybe genName <*> genReasonTrail -- * Helper functions -- | Empty JSObject. emptyJSObject :: J.JSObject J.JSValue emptyJSObject = J.toJSObject [] -- | Empty maybe unchecked dictionary. emptyMUD :: Gen (Maybe (J.JSObject J.JSValue)) emptyMUD = genMaybe $ pure emptyJSObject -- | Generates an empty container. genEmptyContainer :: (Ord a) => Gen (GenericContainer a b) genEmptyContainer = pure . GenericContainer $ Map.fromList [] -- | Generates list of disk indices. genDiskIndices :: Gen [DiskIndex] genDiskIndices = do cnt <- choose (0, C.maxDisks) genUniquesList cnt arbitrary -- | Generates a list of node names. genNodeNames :: Gen [String] genNodeNames = resize maxNodes (listOf genFQDN) -- | Generates a list of node names in non-empty string type. genNodeNamesNE :: Gen [NonEmptyString] genNodeNamesNE = genNodeNames >>= mapM mkNonEmpty -- | Gets a node name in non-empty type. genNodeNameNE :: Gen NonEmptyString genNodeNameNE = genFQDN >>= mkNonEmpty -- | Gets a name (non-fqdn) in non-empty type. genNameNE :: Gen NonEmptyString genNameNE = genName >>= mkNonEmpty -- | Gets a list of names (non-fqdn) in non-empty type. genNamesNE :: Gen [NonEmptyString] genNamesNE = resize maxNodes (listOf genNameNE) -- | Returns a list of non-empty fields. genFieldsNE :: Gen [NonEmptyString] genFieldsNE = genFields >>= mapM mkNonEmpty -- | Generate a 3-byte MAC prefix. genMacPrefix :: Gen NonEmptyString genMacPrefix = do octets <- vectorOf 3 $ choose (0::Int, 255) mkNonEmpty . intercalate ":" $ map (printf "%02x") octets -- | Arbitrary instance for MetaOpCode, defined here due to TH ordering. $(genArbitrary ''OpCodes.MetaOpCode) -- | Small helper to check for a failed JSON deserialisation isJsonError :: J.Result a -> Bool isJsonError (J.Error _) = True isJsonError _ = False -- * Test cases -- | Check that opcode serialization is idempotent. prop_serialization :: OpCodes.OpCode -> Property prop_serialization = testSerialisation -- | Check that Python and Haskell defined the same opcode list. case_AllDefined :: HUnit.Assertion case_AllDefined = do let py_ops = sort C.opcodesOpIds hs_ops = sort OpCodes.allOpIDs extra_py = py_ops \\ hs_ops extra_hs = hs_ops \\ py_ops HUnit.assertBool ("Missing OpCodes from the Haskell code:\n" ++ unlines extra_py) (null extra_py) HUnit.assertBool ("Extra OpCodes in the Haskell code code:\n" ++ unlines extra_hs) (null extra_hs) -- | Custom HUnit test case that forks a Python process and checks -- correspondence between Haskell-generated OpCodes and their Python -- decoded, validated and re-encoded version. -- -- Note that we have a strange beast here: since launching Python is -- expensive, we don't do this via a usual QuickProperty, since that's -- slow (I've tested it, and it's indeed quite slow). Rather, we use a -- single HUnit assertion, and in it we manually use QuickCheck to -- generate 500 opcodes times the number of defined opcodes, which -- then we pass in bulk to Python. The drawbacks to this method are -- two fold: we cannot control the number of generated opcodes, since -- HUnit assertions don't get access to the test options, and for the -- same reason we can't run a repeatable seed. We should probably find -- a better way to do this, for example by having a -- separately-launched Python process (if not running the tests would -- be skipped). case_py_compat_types :: HUnit.Assertion case_py_compat_types = do let num_opcodes = length OpCodes.allOpIDs * 100 opcodes <- genSample (vectorOf num_opcodes (arbitrary::Gen OpCodes.MetaOpCode)) let with_sum = map (\o -> (OpCodes.opSummary $ OpCodes.metaOpCode o, o)) opcodes serialized = J.encode opcodes -- check for non-ASCII fields, usually due to 'arbitrary :: String' mapM_ (\op -> when (any (not . isAscii) (J.encode op)) . HUnit.assertFailure $ "OpCode has non-ASCII fields: " ++ show op ) opcodes py_stdout <- runPython "from ganeti import opcodes\n\ \import sys\n\ \from ganeti import serializer\n\ \op_data = serializer.Load(sys.stdin.read())\n\ \decoded = [opcodes.OpCode.LoadOpCode(o) for o in op_data]\n\ \for op in decoded:\n\ \ op.Validate(True)\n\ \encoded = [(op.Summary(), op.__getstate__())\n\ \ for op in decoded]\n\ \print serializer.Dump(encoded)" serialized >>= checkPythonResult let deserialised = J.decode py_stdout::J.Result [(String, OpCodes.MetaOpCode)] decoded <- case deserialised of J.Ok ops -> return ops J.Error msg -> HUnit.assertFailure ("Unable to decode opcodes: " ++ msg) -- this already raised an expection, but we need it -- for proper types >> fail "Unable to decode opcodes" HUnit.assertEqual "Mismatch in number of returned opcodes" (length decoded) (length with_sum) mapM_ (uncurry (HUnit.assertEqual "Different result after encoding/decoding") ) $ zip decoded with_sum -- | Custom HUnit test case that forks a Python process and checks -- correspondence between Haskell OpCodes fields and their Python -- equivalent. case_py_compat_fields :: HUnit.Assertion case_py_compat_fields = do let hs_fields = sort $ map (\op_id -> (op_id, OpCodes.allOpFields op_id)) OpCodes.allOpIDs py_stdout <- runPython "from ganeti import opcodes\n\ \import sys\n\ \from ganeti import serializer\n\ \fields = [(k, sorted([p[0] for p in v.OP_PARAMS]))\n\ \ for k, v in opcodes.OP_MAPPING.items()]\n\ \print serializer.Dump(fields)" "" >>= checkPythonResult let deserialised = J.decode py_stdout::J.Result [(String, [String])] py_fields <- case deserialised of J.Ok v -> return $ sort v J.Error msg -> HUnit.assertFailure ("Unable to decode op fields: " ++ msg) -- this already raised an expection, but we need it -- for proper types >> fail "Unable to decode op fields" HUnit.assertEqual "Mismatch in number of returned opcodes" (length hs_fields) (length py_fields) HUnit.assertEqual "Mismatch in defined OP_IDs" (map fst hs_fields) (map fst py_fields) mapM_ (\((py_id, py_flds), (hs_id, hs_flds)) -> do HUnit.assertEqual "Mismatch in OP_ID" py_id hs_id HUnit.assertEqual ("Mismatch in fields for " ++ hs_id) py_flds hs_flds ) $ zip py_fields hs_fields -- | Checks that setOpComment works correctly. prop_setOpComment :: OpCodes.MetaOpCode -> String -> Property prop_setOpComment op comment = let (OpCodes.MetaOpCode common _) = OpCodes.setOpComment comment op in OpCodes.opComment common ==? Just comment -- | Tests wrong tag object building (cluster takes only jsnull, the -- other take a string, so we test the opposites). case_TagObject_fail :: Assertion case_TagObject_fail = mapM_ (\(t, j) -> assertEqual (show t ++ "/" ++ J.encode j) Nothing $ tagObjectFrom t j) [ (TagTypeCluster, J.showJSON "abc") , (TagTypeInstance, J.JSNull) , (TagTypeNode, J.JSNull) , (TagTypeGroup, J.JSNull) , (TagTypeNetwork, J.JSNull) ] -- | Tests wrong (negative) disk index. prop_mkDiskIndex_fail :: QuickCheck.Positive Int -> Property prop_mkDiskIndex_fail (Positive i) = case mkDiskIndex (negate i) of Bad msg -> printTestCase "error message " $ "Invalid value" `isPrefixOf` msg Ok v -> failTest $ "Succeeded to build disk index '" ++ show v ++ "' from negative value " ++ show (negate i) -- | Tests a few invalid 'readRecreateDisks' cases. case_readRecreateDisks_fail :: Assertion case_readRecreateDisks_fail = do assertBool "null" $ isJsonError (J.readJSON J.JSNull::J.Result RecreateDisksInfo) assertBool "string" $ isJsonError (J.readJSON (J.showJSON "abc")::J.Result RecreateDisksInfo) -- | Tests a few invalid 'readDdmOldChanges' cases. case_readDdmOldChanges_fail :: Assertion case_readDdmOldChanges_fail = do assertBool "null" $ isJsonError (J.readJSON J.JSNull::J.Result DdmOldChanges) assertBool "string" $ isJsonError (J.readJSON (J.showJSON "abc")::J.Result DdmOldChanges) -- | Tests a few invalid 'readExportTarget' cases. case_readExportTarget_fail :: Assertion case_readExportTarget_fail = do assertBool "null" $ isJsonError (J.readJSON J.JSNull::J.Result ExportTarget) assertBool "int" $ isJsonError (J.readJSON (J.showJSON (5::Int))::J.Result ExportTarget) testSuite "OpCodes" [ 'prop_serialization , 'case_AllDefined , 'case_py_compat_types , 'case_py_compat_fields , 'prop_setOpComment , 'case_TagObject_fail , 'prop_mkDiskIndex_fail , 'case_readRecreateDisks_fail , 'case_readDdmOldChanges_fail , 'case_readExportTarget_fail ] ganeti-2.9.3/test/hs/Test/Ganeti/Attoparsec.hs0000644000000000000000000000403012244641676021163 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-| Unittests for Attoparsec support for unicode -} {- Copyright (C) 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Attoparsec (testAttoparsec) where import Test.HUnit import Test.Ganeti.TestHelper import qualified Data.Attoparsec.Text as A import Data.Attoparsec.Text (Parser) import Data.Text (pack, unpack) -- | Unicode test string, first part. part1 :: String part1 = "äßĉ" -- | Unicode test string, second part. part2 :: String part2 = "ðèق" -- | Simple parser able to split a string in two parts, name and -- value, separated by a '=' sign. simpleParser :: Parser (String, String) simpleParser = do n <- A.takeTill (\c -> A.isHorizontalSpace c || c == '=') A.skipWhile A.isHorizontalSpace _ <- A.char '=' A.skipWhile A.isHorizontalSpace v <- A.takeTill A.isEndOfLine return (unpack n, unpack v) {-# ANN case_unicodeParsing "HLint: ignore Use camelCase" #-} -- | Tests whether a Unicode string is still Unicode after being -- parsed. case_unicodeParsing :: Assertion case_unicodeParsing = case A.parseOnly simpleParser text of Right (name, value) -> do assertEqual "name part" part1 name assertEqual "value part" part2 value Left msg -> assertFailure $ "Failed to parse: " ++ msg where text = Data.Text.pack $ part1 ++ " = \t" ++ part2 testSuite "Attoparsec" [ 'case_unicodeParsing ] ganeti-2.9.3/test/hs/Test/Ganeti/Hypervisor/0000755000000000000000000000000012271445544020673 5ustar00rootroot00000000000000ganeti-2.9.3/test/hs/Test/Ganeti/Hypervisor/Xen/0000755000000000000000000000000012271445545021426 5ustar00rootroot00000000000000ganeti-2.9.3/test/hs/Test/Ganeti/Hypervisor/Xen/XmParser.hs0000644000000000000000000001676412271422343023530 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for @xm list --long@ parser -} {- Copyright (C) 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Hypervisor.Xen.XmParser ( testHypervisor_Xen_XmParser ) where import Test.HUnit import Test.QuickCheck as QuickCheck hiding (Result) import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Control.Monad (liftM) import qualified Data.Attoparsec.Text as A import Data.Text (pack) import Data.Char import qualified Data.Map as Map import Text.Printf import Ganeti.Hypervisor.Xen.Types import Ganeti.Hypervisor.Xen.XmParser {-# ANN module "HLint: ignore Use camelCase" #-} -- * Arbitraries -- | Generator for 'ListConfig'. -- -- A completely arbitrary configuration would contain too many lists -- and its size would be to big to be actually parsable in reasonable -- time. This generator builds a random Config that is still of a -- reasonable size, and it also Avoids generating strings that might -- be interpreted as numbers. genConfig :: Int -> Gen LispConfig genConfig 0 = -- only terminal values for size 0 frequency [ (5, liftM LCString (genName `suchThat` (not . canBeNumber))) , (5, liftM LCDouble arbitrary) ] genConfig n = -- for size greater than 0, allow "some" lists frequency [ (5, liftM LCString (resize n genName `suchThat` (not . canBeNumber))) , (5, liftM LCDouble arbitrary) , (1, liftM LCList (choose (1, n) >>= (\n' -> vectorOf n' (genConfig $ n `div` n')))) ] -- | Arbitrary instance for 'LispConfig' using 'genConfig'. instance Arbitrary LispConfig where arbitrary = sized genConfig -- | Determines conservatively whether a string could be a number. canBeNumber :: String -> Bool canBeNumber [] = False canBeNumber (c:[]) = canBeNumberChar c canBeNumber (c:xs) = canBeNumberChar c && canBeNumber xs -- | Determines whether a char can be part of the string representation of a -- number (even in scientific notation). canBeNumberChar :: Char -> Bool canBeNumberChar c = isDigit c || (c `elem` "eE-") -- | Generates an arbitrary @xm uptime@ output line. instance Arbitrary UptimeInfo where arbitrary = do name <- genFQDN NonNegative idNum <- arbitrary :: Gen (NonNegative Int) NonNegative days <- arbitrary :: Gen (NonNegative Int) hours <- choose (0, 23) :: Gen Int mins <- choose (0, 59) :: Gen Int secs <- choose (0, 59) :: Gen Int let uptime :: String uptime = if days /= 0 then printf "%d days, %d:%d:%d" days hours mins secs else printf "%d:%d:%d" hours mins secs return $ UptimeInfo name idNum uptime -- * Helper functions for tests -- | Function for testing whether a domain configuration is parsed correctly. testDomain :: String -> Map.Map String Domain -> Assertion testDomain fileName expectedContent = do fileContent <- readTestData fileName case A.parseOnly xmListParser $ pack fileContent of Left msg -> assertFailure $ "Parsing failed: " ++ msg Right obtained -> assertEqual fileName expectedContent obtained -- | Function for testing whether a @xm uptime@ output (stored in a file) -- is parsed correctly. testUptimeInfo :: String -> Map.Map Int UptimeInfo -> Assertion testUptimeInfo fileName expectedContent = do fileContent <- readTestData fileName case A.parseOnly xmUptimeParser $ pack fileContent of Left msg -> assertFailure $ "Parsing failed: " ++ msg Right obtained -> assertEqual fileName expectedContent obtained -- | Computes the relative error of two 'Double' numbers. -- -- This is the \"relative error\" algorithm in -- http:\/\/randomascii.wordpress.com\/2012\/02\/25\/ -- comparing-floating-point-numbers-2012-edition (URL split due to too -- long line). relativeError :: Double -> Double -> Double relativeError d1 d2 = let delta = abs $ d1 - d2 a1 = abs d1 a2 = abs d2 greatest = max a1 a2 in if delta == 0 then 0 else delta / greatest -- | Determines whether two LispConfig are equal, with the exception of Double -- values, that just need to be \"almost equal\". -- -- Meant mainly for testing purposes, given that Double values may be slightly -- rounded during parsing. isAlmostEqual :: LispConfig -> LispConfig -> Property isAlmostEqual (LCList c1) (LCList c2) = (length c1 ==? length c2) .&&. conjoin (zipWith isAlmostEqual c1 c2) isAlmostEqual (LCString s1) (LCString s2) = s1 ==? s2 isAlmostEqual (LCDouble d1) (LCDouble d2) = printTestCase msg $ rel <= 1e-12 where rel = relativeError d1 d2 msg = "Relative error " ++ show rel ++ " not smaller than 1e-12\n" ++ "expected: " ++ show d2 ++ "\n but got: " ++ show d1 isAlmostEqual a b = failTest $ "Comparing different types: '" ++ show a ++ "' with '" ++ show b ++ "'" -- | Function to serialize LispConfigs in such a way that they can be rebuilt -- again by the lispConfigParser. serializeConf :: LispConfig -> String serializeConf (LCList c) = "(" ++ unwords (map serializeConf c) ++ ")" serializeConf (LCString s) = s serializeConf (LCDouble d) = show d -- | Function to serialize UptimeInfos in such a way that they can be rebuilt -- againg by the uptimeLineParser. serializeUptime :: UptimeInfo -> String serializeUptime (UptimeInfo name idNum uptime) = printf "%s\t%d\t%s" name idNum uptime -- | Test whether a randomly generated config can be parsed. -- Implicitly, this also tests that the Show instance of Config is correct. prop_config :: LispConfig -> Property prop_config conf = case A.parseOnly lispConfigParser . pack . serializeConf $ conf of Left msg -> failTest $ "Parsing failed: " ++ msg Right obtained -> printTestCase "Failing almost equal check" $ isAlmostEqual obtained conf -- | Test whether a randomly generated UptimeInfo text line can be parsed. prop_uptimeInfo :: UptimeInfo -> Property prop_uptimeInfo uInfo = case A.parseOnly uptimeLineParser . pack . serializeUptime $ uInfo of Left msg -> failTest $ "Parsing failed: " ++ msg Right obtained -> obtained ==? uInfo -- | Test a Xen 4.0.1 @xm list --long@ output. case_xen401list :: Assertion case_xen401list = testDomain "xen-xm-list-long-4.0.1.txt" $ Map.fromList [ ("Domain-0", Domain 0 "Domain-0" 184000.41332 ActualRunning Nothing) , ("instance1.example.com", Domain 119 "instance1.example.com" 24.116146647 ActualBlocked Nothing) ] -- | Test a Xen 4.0.1 @xm uptime@ output. case_xen401uptime :: Assertion case_xen401uptime = testUptimeInfo "xen-xm-uptime-4.0.1.txt" $ Map.fromList [ (0, UptimeInfo "Domain-0" 0 "98 days, 2:27:44") , (119, UptimeInfo "instance1.example.com" 119 "15 days, 20:57:07") ] testSuite "Hypervisor/Xen/XmParser" [ 'prop_config , 'prop_uptimeInfo , 'case_xen401list , 'case_xen401uptime ] ganeti-2.9.3/test/hs/Test/Ganeti/Network.hs0000644000000000000000000001072112244641676020513 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell, TypeSynonymInstances, FlexibleInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Test.Ganeti.Network ( testNetwork , genBitStringMaxLen ) where import Test.QuickCheck import Ganeti.Network as Network import Ganeti.Objects as Objects import Test.Ganeti.Objects ( genBitStringMaxLen , genValidNetwork ) import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import qualified Data.Vector.Unboxed as V -- * Generators and arbitrary instances -- | Generates address pools. The size of the network is intentionally -- decoupled from the size of the bit vectors, to avoid slowing down -- the tests by generating unnecessary bit strings. genAddressPool :: Int -> Gen AddressPool genAddressPool maxLenBitVec = do -- Generating networks with netmask of minimum /24 to avoid too long -- bit strings being generated. net <- genValidNetwork lenBitVec <- choose (0, maxLenBitVec) res <- genBitVector lenBitVec ext_res <- genBitVector lenBitVec return AddressPool { network = net , reservations = res , extReservations = ext_res } -- | Generates an arbitrary bit vector of the given length. genBitVector :: Int -> Gen (V.Vector Bool) genBitVector len = do boolList <- vector len::Gen [Bool] return $ V.fromList boolList instance Arbitrary AddressPool where arbitrary = genAddressPool ((2::Int)^(8::Int)) -- * Test cases -- | Check the mapping of bit strings to bit vectors prop_bitStringToBitVector :: Property prop_bitStringToBitVector = forAll (genBitStringMaxLen 256) $ \bs -> let bitList = V.toList $ Network.bitStringToBitVector bs bitCharList = Prelude.zip bitList bs in Prelude.all checkBit bitCharList -- | Check whether an element of a bit vector is consistent with an element -- of a bit string (containing '0' and '1' characters). checkBit :: (Bool, Char) -> Bool checkBit (False, '0') = True checkBit (True, '1') = True checkBit _ = False -- | Check creation of an address pool when a network is given. prop_createAddressPool :: Objects.Network -> Property prop_createAddressPool n = let valid = networkIsValid n in case createAddressPool n of Just _ -> True ==? valid Nothing -> False ==? valid -- | Check that the address pool's properties are calculated correctly. prop_addressPoolProperties :: AddressPool -> Property prop_addressPoolProperties a = conjoin [ printTestCase ("Not all reservations are included in 'allReservations' of " ++ "address pool:" ++ show a) (allReservationsSubsumesInternal a) , printTestCase ("Not all external reservations are covered by 'allReservations' " ++ "of address pool: " ++ show a) (allReservationsSubsumesExternal a) , printTestCase ("The counts of free and reserved addresses do not add up for " ++ "address pool: " ++ show a) (checkCounts a) , printTestCase ("'isFull' wrongly classified the status of the address pool: " ++ show a) (checkIsFull a) , printTestCase ("Network map is inconsistent with reservations of address pool: " ++ show a) (checkGetMap a) ] -- | Check that all internally reserved ips are included in 'allReservations'. allReservationsSubsumesInternal :: AddressPool -> Bool allReservationsSubsumesInternal a = bitVectorSubsumes (allReservations a) (reservations a) -- | Check that all externally reserved ips are included in 'allReservations'. allReservationsSubsumesExternal :: AddressPool -> Bool allReservationsSubsumesExternal a = bitVectorSubsumes (allReservations a) (extReservations a) -- | Checks if one bit vector subsumes the other one. bitVectorSubsumes :: V.Vector Bool -> V.Vector Bool -> Bool bitVectorSubsumes v1 v2 = V.and $ V.zipWith (\a b -> not b || a) v1 v2 -- | Check that the counts of free and reserved ips add up. checkCounts :: AddressPool -> Bool checkCounts a = let res = reservations a in V.length res == getFreeCount a + getReservedCount a -- | Check that the detection of a full network works correctly. checkIsFull :: AddressPool -> Bool checkIsFull a = isFull a == V.notElem False (allReservations a) -- | Check that the map representation of the network corresponds to the -- network's reservations. checkGetMap :: AddressPool -> Bool checkGetMap a = allReservations a == V.fromList (Prelude.map (== 'X') (getMap a)) testSuite "Network" [ 'prop_bitStringToBitVector , 'prop_createAddressPool , 'prop_addressPoolProperties ] ganeti-2.9.3/test/hs/Test/Ganeti/BasicTypes.hs0000644000000000000000000001221512244641676021130 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell, FlexibleInstances, TypeSynonymInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.BasicTypes (testBasicTypes) where import Test.QuickCheck hiding (Result) import Test.QuickCheck.Function import Control.Applicative import Control.Monad import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Ganeti.BasicTypes -- Since we actually want to test these, don't tell us not to use them :) {-# ANN module "HLint: ignore Functor law" #-} {-# ANN module "HLint: ignore Monad law, left identity" #-} {-# ANN module "HLint: ignore Monad law, right identity" #-} {-# ANN module "HLint: ignore Use >=>" #-} {-# ANN module "HLint: ignore Use ." #-} -- * Arbitrary instances instance (Arbitrary a) => Arbitrary (Result a) where arbitrary = oneof [ Bad <$> arbitrary , Ok <$> arbitrary ] -- * Test cases -- | Tests the functor identity law: -- -- > fmap id == id prop_functor_id :: Result Int -> Property prop_functor_id ri = fmap id ri ==? ri -- | Tests the functor composition law: -- -- > fmap (f . g) == fmap f . fmap g prop_functor_composition :: Result Int -> Fun Int Int -> Fun Int Int -> Property prop_functor_composition ri (Fun _ f) (Fun _ g) = fmap (f . g) ri ==? (fmap f . fmap g) ri -- | Tests the applicative identity law: -- -- > pure id <*> v = v prop_applicative_identity :: Result Int -> Property prop_applicative_identity v = pure id <*> v ==? v -- | Tests the applicative composition law: -- -- > pure (.) <*> u <*> v <*> w = u <*> (v <*> w) prop_applicative_composition :: Result (Fun Int Int) -> Result (Fun Int Int) -> Result Int -> Property prop_applicative_composition u v w = let u' = fmap apply u v' = fmap apply v in pure (.) <*> u' <*> v' <*> w ==? u' <*> (v' <*> w) -- | Tests the applicative homomorphism law: -- -- > pure f <*> pure x = pure (f x) prop_applicative_homomorphism :: Fun Int Int -> Int -> Property prop_applicative_homomorphism (Fun _ f) x = ((pure f <*> pure x)::Result Int) ==? pure (f x) -- | Tests the applicative interchange law: -- -- > u <*> pure y = pure ($ y) <*> u prop_applicative_interchange :: Result (Fun Int Int) -> Int -> Property prop_applicative_interchange f y = let u = fmap apply f -- need to extract the actual function from Fun in u <*> pure y ==? pure ($ y) <*> u -- | Tests the applicative\/functor correspondence: -- -- > fmap f x = pure f <*> x prop_applicative_functor :: Fun Int Int -> Result Int -> Property prop_applicative_functor (Fun _ f) x = fmap f x ==? pure f <*> x -- | Tests the applicative\/monad correspondence: -- -- > pure = return -- -- > (<*>) = ap prop_applicative_monad :: Int -> Result (Fun Int Int) -> Property prop_applicative_monad v f = let v' = pure v :: Result Int f' = fmap apply f -- need to extract the actual function from Fun in v' ==? return v .&&. (f' <*> v') ==? f' `ap` v' -- | Tests the monad laws: -- -- > return a >>= k == k a -- -- > m >>= return == m -- -- > m >>= (\x -> k x >>= h) == (m >>= k) >>= h prop_monad_laws :: Int -> Result Int -> Fun Int (Result Int) -> Fun Int (Result Int) -> Property prop_monad_laws a m (Fun _ k) (Fun _ h) = conjoin [ printTestCase "return a >>= k == k a" ((return a >>= k) ==? k a) , printTestCase "m >>= return == m" ((m >>= return) ==? m) , printTestCase "m >>= (\\x -> k x >>= h) == (m >>= k) >>= h)" ((m >>= (\x -> k x >>= h)) ==? ((m >>= k) >>= h)) ] -- | Tests the monad plus laws: -- -- > mzero >>= f = mzero -- -- > v >> mzero = mzero prop_monadplus_mzero :: Result Int -> Fun Int (Result Int) -> Property prop_monadplus_mzero v (Fun _ f) = printTestCase "mzero >>= f = mzero" ((mzero >>= f) ==? mzero) .&&. -- FIXME: since we have "many" mzeros, we can't test for equality, -- just that we got back a 'Bad' value; I'm not sure if this means -- our MonadPlus instance is not sound or not... printTestCase "v >> mzero = mzero" (isBad (v >> mzero)) testSuite "BasicTypes" [ 'prop_functor_id , 'prop_functor_composition , 'prop_applicative_identity , 'prop_applicative_composition , 'prop_applicative_homomorphism , 'prop_applicative_interchange , 'prop_applicative_functor , 'prop_applicative_monad , 'prop_monad_laws , 'prop_monadplus_mzero ] ganeti-2.9.3/test/hs/Test/Ganeti/THH.hs0000644000000000000000000000404212244641676017504 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-| Unittests for our template-haskell generated code. -} {- Copyright (C) 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.THH ( testTHH ) where import Test.QuickCheck import Text.JSON import Ganeti.THH import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon {-# ANN module "HLint: ignore Use camelCase" #-} -- * Custom types -- | Type used to test optional field implementation. Equivalent to -- @data TestObj = TestObj { tobjA :: Maybe Int, tobjB :: Maybe Int -- }@. $(buildObject "TestObj" "tobj" [ optionalField $ simpleField "a" [t| Int |] , optionalNullSerField $ simpleField "b" [t| Int |] ]) -- | Arbitrary instance for 'TestObj'. $(genArbitrary ''TestObj) -- | Tests that serialising an (arbitrary) 'TestObj' instance is -- correct: fully optional fields are represented in the resulting -- dictionary only when non-null, optional-but-required fields are -- always represented (with either null or an actual value). prop_OptFields :: TestObj -> Property prop_OptFields to = let a_member = case tobjA to of Nothing -> [] Just x -> [("a", showJSON x)] b_member = [("b", case tobjB to of Nothing -> JSNull Just x -> showJSON x)] in showJSON to ==? makeObj (a_member ++ b_member) testSuite "THH" [ 'prop_OptFields ] ganeti-2.9.3/test/hs/Test/Ganeti/Confd/0000755000000000000000000000000012271445545017553 5ustar00rootroot00000000000000ganeti-2.9.3/test/hs/Test/Ganeti/Confd/Types.hs0000644000000000000000000000637212244641676021226 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Confd.Types ( testConfd_Types , ConfdRequestType(..) , ConfdReqField(..) , ConfdReqQ(..) ) where import Control.Applicative import Test.QuickCheck import Test.HUnit import qualified Text.JSON as J import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Ganeti.Confd.Types as Confd {-# ANN module "HLint: ignore Use camelCase" #-} -- * Arbitrary instances $(genArbitrary ''ConfdRequestType) $(genArbitrary ''ConfdReqField) $(genArbitrary ''ConfdReqQ) instance Arbitrary ConfdQuery where arbitrary = oneof [ pure EmptyQuery , PlainQuery <$> genName , DictQuery <$> arbitrary ] $(genArbitrary ''ConfdRequest) $(genArbitrary ''ConfdReplyStatus) instance Arbitrary ConfdReply where arbitrary = ConfdReply <$> arbitrary <*> arbitrary <*> pure J.JSNull <*> arbitrary $(genArbitrary ''ConfdErrorType) $(genArbitrary ''ConfdNodeRole) -- * Test cases -- | Test 'ConfdQuery' serialisation. prop_ConfdQuery_serialisation :: ConfdQuery -> Property prop_ConfdQuery_serialisation = testSerialisation -- | Test bad types deserialisation for 'ConfdQuery'. case_ConfdQuery_BadTypes :: Assertion case_ConfdQuery_BadTypes = do let helper jsval = case J.readJSON jsval of J.Error _ -> return () J.Ok cq -> assertFailure $ "Parsed " ++ show jsval ++ " as query " ++ show (cq::ConfdQuery) helper $ J.showJSON (1::Int) helper $ J.JSBool True helper $ J.JSBool False helper $ J.JSArray [] -- | Test 'ConfdReplyStatus' serialisation. prop_ConfdReplyStatus_serialisation :: ConfdReplyStatus -> Property prop_ConfdReplyStatus_serialisation = testSerialisation -- | Test 'ConfdReply' serialisation. prop_ConfdReply_serialisation :: ConfdReply -> Property prop_ConfdReply_serialisation = testSerialisation -- | Test 'ConfdErrorType' serialisation. prop_ConfdErrorType_serialisation :: ConfdErrorType -> Property prop_ConfdErrorType_serialisation = testSerialisation -- | Test 'ConfdNodeRole' serialisation. prop_ConfdNodeRole_serialisation :: ConfdNodeRole -> Property prop_ConfdNodeRole_serialisation = testSerialisation testSuite "Confd/Types" [ 'prop_ConfdQuery_serialisation , 'case_ConfdQuery_BadTypes , 'prop_ConfdReplyStatus_serialisation , 'prop_ConfdReply_serialisation , 'prop_ConfdErrorType_serialisation , 'prop_ConfdNodeRole_serialisation ] ganeti-2.9.3/test/hs/Test/Ganeti/Confd/Utils.hs0000644000000000000000000001042712244641676021216 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Confd.Utils (testConfd_Utils) where import Test.QuickCheck import qualified Text.JSON as J import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import Test.Ganeti.Confd.Types () import qualified Ganeti.BasicTypes as BasicTypes import qualified Ganeti.Confd.Types as Confd import qualified Ganeti.Confd.Utils as Confd.Utils import qualified Ganeti.Constants as C import qualified Ganeti.Hash as Hash -- | Test that signing messages and checking signatures is correct. It -- also tests, indirectly the serialisation of messages so we don't -- need a separate test for that. prop_req_sign :: Hash.HashKey -- ^ The hash key -> NonNegative Integer -- ^ The base timestamp -> Positive Integer -- ^ Delta for out of window -> Bool -- ^ Whether delta should be + or - -> Confd.ConfdRequest -> Property prop_req_sign key (NonNegative timestamp) (Positive bad_delta) pm crq = forAll (choose (0, fromIntegral C.confdMaxClockSkew)) $ \ good_delta -> let encoded = J.encode crq salt = show timestamp signed = J.encode $ Confd.Utils.signMessage key salt encoded good_timestamp = timestamp + if pm then good_delta else (-good_delta) bad_delta' = fromIntegral C.confdMaxClockSkew + bad_delta bad_timestamp = timestamp + if pm then bad_delta' else (-bad_delta') ts_ok = Confd.Utils.parseRequest key signed good_timestamp ts_bad = Confd.Utils.parseRequest key signed bad_timestamp in printTestCase "Failed to parse good message" (ts_ok ==? BasicTypes.Ok (encoded, crq)) .&&. printTestCase ("Managed to deserialise message with bad\ \ timestamp, got " ++ show ts_bad) (ts_bad ==? BasicTypes.Bad "Too old/too new timestamp or clock skew") -- | Tests that a ConfdReply can be properly encoded, signed and parsed using -- the proper salt, but fails parsing with the wrong salt. prop_rep_salt :: Hash.HashKey -- ^ The hash key -> Confd.ConfdReply -- ^ A Confd reply -> Property prop_rep_salt hmac reply = forAll arbitrary $ \salt1 -> forAll (arbitrary `suchThat` (/= salt1)) $ \salt2 -> let innerMsg = J.encode reply msg = J.encode $ Confd.Utils.signMessage hmac salt1 innerMsg in Confd.Utils.parseReply hmac msg salt1 ==? BasicTypes.Ok (innerMsg, reply) .&&. Confd.Utils.parseReply hmac msg salt2 ==? BasicTypes.Bad "The received salt differs from the expected salt" -- | Tests that signing with a different key fails detects failure -- correctly. prop_bad_key :: String -- ^ Salt -> Confd.ConfdRequest -- ^ Request -> Property prop_bad_key salt crq = -- fixme: we hardcode here the expected length of a sha1 key, as -- otherwise we could have two short keys that differ only in the -- final zero elements count, and those will be expanded to be the -- same forAll (vector 20) $ \key_sign -> forAll (vector 20 `suchThat` (/= key_sign)) $ \key_verify -> let signed = Confd.Utils.signMessage key_sign salt (J.encode crq) encoded = J.encode signed in printTestCase ("Accepted message signed with different key" ++ encoded) $ (Confd.Utils.parseSignedMessage key_verify encoded :: BasicTypes.Result (String, String, Confd.ConfdRequest)) ==? BasicTypes.Bad "HMAC verification failed" testSuite "Confd/Utils" [ 'prop_req_sign , 'prop_rep_salt , 'prop_bad_key ] ganeti-2.9.3/test/hs/Test/Ganeti/Jobs.hs0000644000000000000000000000173512244641676017764 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-| Unittests for ganeti-htools. -} {- Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Jobs (testJobs) where import Test.Ganeti.TestHelper {-# ANN module "HLint: ignore Unused LANGUAGE pragma" #-} testSuite "Jobs" [ ] ganeti-2.9.3/test/hs/Test/Ganeti/Errors.hs0000644000000000000000000000244412244641676020341 0ustar00rootroot00000000000000{-# LANGUAGE TemplateHaskell #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-| Unittests for "Ganeti.Errors". -} {- Copyright (C) 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Test.Ganeti.Errors (testErrors) where import Test.QuickCheck import Test.Ganeti.TestHelper import Test.Ganeti.TestCommon import qualified Ganeti.Errors as Errors $(genArbitrary ''Errors.ErrorCode) $(genArbitrary ''Errors.GanetiException) -- | Tests error serialisation. prop_GenericError_serialisation :: Errors.GanetiException -> Property prop_GenericError_serialisation = testSerialisation testSuite "Errors" [ 'prop_GenericError_serialisation ] ganeti-2.9.3/test/hs/hpc-htools.hs0000644000000000000000000000147012244641676017015 0ustar00rootroot00000000000000{-| Main htools binary. -} {- Copyright (C) 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -} module Main (main) where import Ganeti.HTools.Program.Main (main) ganeti-2.9.3/test/py/0000755000000000000000000000000012271445545014412 5ustar00rootroot00000000000000ganeti-2.9.3/test/py/ganeti.http_unittest.py0000744000000000000000000006437312244641676021171 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2007, 2008 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the http module""" import os import unittest import time import tempfile import pycurl import itertools import threading from cStringIO import StringIO from ganeti import http from ganeti import compat import ganeti.http.server import ganeti.http.client import ganeti.http.auth import testutils class TestStartLines(unittest.TestCase): """Test cases for start line classes""" def testClientToServerStartLine(self): """Test client to server start line (HTTP request)""" start_line = http.HttpClientToServerStartLine("GET", "/", "HTTP/1.1") self.assertEqual(str(start_line), "GET / HTTP/1.1") def testServerToClientStartLine(self): """Test server to client start line (HTTP response)""" start_line = http.HttpServerToClientStartLine("HTTP/1.1", 200, "OK") self.assertEqual(str(start_line), "HTTP/1.1 200 OK") class TestMisc(unittest.TestCase): """Miscellaneous tests""" def _TestDateTimeHeader(self, gmnow, expected): self.assertEqual(http.server._DateTimeHeader(gmnow=gmnow), expected) def testDateTimeHeader(self): """Test ganeti.http._DateTimeHeader""" self._TestDateTimeHeader((2008, 1, 2, 3, 4, 5, 3, 0, 0), "Thu, 02 Jan 2008 03:04:05 GMT") self._TestDateTimeHeader((2008, 1, 1, 0, 0, 0, 0, 0, 0), "Mon, 01 Jan 2008 00:00:00 GMT") self._TestDateTimeHeader((2008, 12, 31, 0, 0, 0, 0, 0, 0), "Mon, 31 Dec 2008 00:00:00 GMT") self._TestDateTimeHeader((2008, 12, 31, 23, 59, 59, 0, 0, 0), "Mon, 31 Dec 2008 23:59:59 GMT") self._TestDateTimeHeader((2008, 12, 31, 0, 0, 0, 6, 0, 0), "Sun, 31 Dec 2008 00:00:00 GMT") def testHttpServerRequest(self): """Test ganeti.http.server._HttpServerRequest""" server_request = http.server._HttpServerRequest("GET", "/", None, None) # These are expected by users of the HTTP server self.assert_(hasattr(server_request, "request_method")) self.assert_(hasattr(server_request, "request_path")) self.assert_(hasattr(server_request, "request_headers")) self.assert_(hasattr(server_request, "request_body")) self.assert_(isinstance(server_request.resp_headers, dict)) self.assert_(hasattr(server_request, "private")) def testServerSizeLimits(self): """Test HTTP server size limits""" message_reader_class = http.server._HttpClientToServerMessageReader self.assert_(message_reader_class.START_LINE_LENGTH_MAX > 0) self.assert_(message_reader_class.HEADER_LENGTH_MAX > 0) def testFormatAuthHeader(self): self.assertEqual(http.auth._FormatAuthHeader("Basic", {}), "Basic") self.assertEqual(http.auth._FormatAuthHeader("Basic", { "foo": "bar", }), "Basic foo=bar") self.assertEqual(http.auth._FormatAuthHeader("Basic", { "foo": "", }), "Basic foo=\"\"") self.assertEqual(http.auth._FormatAuthHeader("Basic", { "foo": "x,y", }), "Basic foo=\"x,y\"") params = { "foo": "x,y", "realm": "secure", } # It's a dict whose order isn't guaranteed, hence checking a list self.assert_(http.auth._FormatAuthHeader("Digest", params) in ("Digest foo=\"x,y\" realm=secure", "Digest realm=secure foo=\"x,y\"")) class _FakeRequestAuth(http.auth.HttpServerRequestAuthentication): def __init__(self, realm, authreq, authenticate_fn): http.auth.HttpServerRequestAuthentication.__init__(self) self.realm = realm self.authreq = authreq self.authenticate_fn = authenticate_fn def AuthenticationRequired(self, req): return self.authreq def GetAuthRealm(self, req): return self.realm def Authenticate(self, *args): if self.authenticate_fn: return self.authenticate_fn(*args) raise NotImplementedError() class TestAuth(unittest.TestCase): """Authentication tests""" hsra = http.auth.HttpServerRequestAuthentication def testConstants(self): for scheme in [self.hsra._CLEARTEXT_SCHEME, self.hsra._HA1_SCHEME]: self.assertEqual(scheme, scheme.upper()) self.assert_(scheme.startswith("{")) self.assert_(scheme.endswith("}")) def _testVerifyBasicAuthPassword(self, realm, user, password, expected): ra = _FakeRequestAuth(realm, False, None) return ra.VerifyBasicAuthPassword(None, user, password, expected) def testVerifyBasicAuthPassword(self): tvbap = self._testVerifyBasicAuthPassword good_pws = ["pw", "pw{", "pw}", "pw{}", "pw{x}y", "}pw", "0", "123", "foo...:xyz", "TeST"] for pw in good_pws: # Try cleartext passwords self.assert_(tvbap("abc", "user", pw, pw)) self.assert_(tvbap("abc", "user", pw, "{cleartext}" + pw)) self.assert_(tvbap("abc", "user", pw, "{ClearText}" + pw)) self.assert_(tvbap("abc", "user", pw, "{CLEARTEXT}" + pw)) # Try with invalid password self.failIf(tvbap("abc", "user", pw, "something")) # Try with invalid scheme self.failIf(tvbap("abc", "user", pw, "{000}" + pw)) self.failIf(tvbap("abc", "user", pw, "{unk}" + pw)) self.failIf(tvbap("abc", "user", pw, "{Unk}" + pw)) self.failIf(tvbap("abc", "user", pw, "{UNK}" + pw)) # Try with invalid scheme format self.failIf(tvbap("abc", "user", "pw", "{something")) # Hash is MD5("user:This is only a test:pw") self.assert_(tvbap("This is only a test", "user", "pw", "{ha1}92ea58ae804481498c257b2f65561a17")) self.assert_(tvbap("This is only a test", "user", "pw", "{HA1}92ea58ae804481498c257b2f65561a17")) self.failUnlessRaises(AssertionError, tvbap, None, "user", "pw", "{HA1}92ea58ae804481498c257b2f65561a17") self.failIf(tvbap("Admin area", "user", "pw", "{HA1}92ea58ae804481498c257b2f65561a17")) self.failIf(tvbap("This is only a test", "someone", "pw", "{HA1}92ea58ae804481498c257b2f65561a17")) self.failIf(tvbap("This is only a test", "user", "something", "{HA1}92ea58ae804481498c257b2f65561a17")) class _SimpleAuthenticator: def __init__(self, user, password): self.user = user self.password = password self.called = False def __call__(self, req, user, password): self.called = True return self.user == user and self.password == password class TestHttpServerRequestAuthentication(unittest.TestCase): def testNoAuth(self): req = http.server._HttpServerRequest("GET", "/", None, None) _FakeRequestAuth("area1", False, None).PreHandleRequest(req) def testNoRealm(self): headers = { http.HTTP_AUTHORIZATION: "", } req = http.server._HttpServerRequest("GET", "/", headers, None) ra = _FakeRequestAuth(None, False, None) self.assertRaises(AssertionError, ra.PreHandleRequest, req) def testNoScheme(self): headers = { http.HTTP_AUTHORIZATION: "", } req = http.server._HttpServerRequest("GET", "/", headers, None) ra = _FakeRequestAuth("area1", False, None) self.assertRaises(http.HttpUnauthorized, ra.PreHandleRequest, req) def testUnknownScheme(self): headers = { http.HTTP_AUTHORIZATION: "NewStyleAuth abc", } req = http.server._HttpServerRequest("GET", "/", headers, None) ra = _FakeRequestAuth("area1", False, None) self.assertRaises(http.HttpUnauthorized, ra.PreHandleRequest, req) def testInvalidBase64(self): headers = { http.HTTP_AUTHORIZATION: "Basic x_=_", } req = http.server._HttpServerRequest("GET", "/", headers, None) ra = _FakeRequestAuth("area1", False, None) self.assertRaises(http.HttpUnauthorized, ra.PreHandleRequest, req) def testAuthForPublicResource(self): headers = { http.HTTP_AUTHORIZATION: "Basic %s" % ("foo".encode("base64").strip(), ), } req = http.server._HttpServerRequest("GET", "/", headers, None) ra = _FakeRequestAuth("area1", False, None) self.assertRaises(http.HttpUnauthorized, ra.PreHandleRequest, req) def testAuthForPublicResource(self): headers = { http.HTTP_AUTHORIZATION: "Basic %s" % ("foo:bar".encode("base64").strip(), ), } req = http.server._HttpServerRequest("GET", "/", headers, None) ac = _SimpleAuthenticator("foo", "bar") ra = _FakeRequestAuth("area1", False, ac) ra.PreHandleRequest(req) req = http.server._HttpServerRequest("GET", "/", headers, None) ac = _SimpleAuthenticator("something", "else") ra = _FakeRequestAuth("area1", False, ac) self.assertRaises(http.HttpUnauthorized, ra.PreHandleRequest, req) def testInvalidRequestHeader(self): checks = { http.HttpUnauthorized: ["", "\t", "-", ".", "@", "<", ">", "Digest", "basic %s" % "foobar".encode("base64").strip()], http.HttpBadRequest: ["Basic"], } for exc, headers in checks.items(): for i in headers: headers = { http.HTTP_AUTHORIZATION: i, } req = http.server._HttpServerRequest("GET", "/", headers, None) ra = _FakeRequestAuth("area1", False, None) self.assertRaises(exc, ra.PreHandleRequest, req) def testBasicAuth(self): for user in ["", "joe", "user name with spaces"]: for pw in ["", "-", ":", "foobar", "Foo Bar Baz", "@@@", "###", "foo:bar:baz"]: for wrong_pw in [True, False]: basic_auth = "%s:%s" % (user, pw) if wrong_pw: basic_auth += "WRONG" headers = { http.HTTP_AUTHORIZATION: "Basic %s" % (basic_auth.encode("base64").strip(), ), } req = http.server._HttpServerRequest("GET", "/", headers, None) ac = _SimpleAuthenticator(user, pw) self.assertFalse(ac.called) ra = _FakeRequestAuth("area1", True, ac) if wrong_pw: try: ra.PreHandleRequest(req) except http.HttpUnauthorized, err: www_auth = err.headers[http.HTTP_WWW_AUTHENTICATE] self.assert_(www_auth.startswith(http.auth.HTTP_BASIC_AUTH)) else: self.fail("Didn't raise HttpUnauthorized") else: ra.PreHandleRequest(req) self.assert_(ac.called) class TestReadPasswordFile(unittest.TestCase): def testSimple(self): users = http.auth.ParsePasswordFile("user1 password") self.assertEqual(len(users), 1) self.assertEqual(users["user1"].password, "password") self.assertEqual(len(users["user1"].options), 0) def testOptions(self): buf = StringIO() buf.write("# Passwords\n") buf.write("user1 password\n") buf.write("\n") buf.write("# Comment\n") buf.write("user2 pw write,read\n") buf.write(" \t# Another comment\n") buf.write("invalidline\n") users = http.auth.ParsePasswordFile(buf.getvalue()) self.assertEqual(len(users), 2) self.assertEqual(users["user1"].password, "password") self.assertEqual(len(users["user1"].options), 0) self.assertEqual(users["user2"].password, "pw") self.assertEqual(users["user2"].options, ["write", "read"]) class TestClientRequest(unittest.TestCase): def testRepr(self): cr = http.client.HttpClientRequest("localhost", 1234, "GET", "/version", headers=[], post_data="Hello World") self.assert_(repr(cr).startswith("<")) def testNoHeaders(self): cr = http.client.HttpClientRequest("localhost", 1234, "GET", "/version", headers=None) self.assert_(isinstance(cr.headers, list)) self.assertEqual(cr.headers, []) self.assertEqual(cr.url, "https://localhost:1234/version") def testPlainAddressIPv4(self): cr = http.client.HttpClientRequest("192.0.2.9", 19956, "GET", "/version") self.assertEqual(cr.url, "https://192.0.2.9:19956/version") def testPlainAddressIPv6(self): cr = http.client.HttpClientRequest("2001:db8::cafe", 15110, "GET", "/info") self.assertEqual(cr.url, "https://[2001:db8::cafe]:15110/info") def testOldStyleHeaders(self): headers = { "Content-type": "text/plain", "Accept": "text/html", } cr = http.client.HttpClientRequest("localhost", 16481, "GET", "/vg_list", headers=headers) self.assert_(isinstance(cr.headers, list)) self.assertEqual(sorted(cr.headers), [ "Accept: text/html", "Content-type: text/plain", ]) self.assertEqual(cr.url, "https://localhost:16481/vg_list") def testNewStyleHeaders(self): headers = [ "Accept: text/html", "Content-type: text/plain; charset=ascii", "Server: httpd 1.0", ] cr = http.client.HttpClientRequest("localhost", 1234, "GET", "/version", headers=headers) self.assert_(isinstance(cr.headers, list)) self.assertEqual(sorted(cr.headers), sorted(headers)) self.assertEqual(cr.url, "https://localhost:1234/version") def testPostData(self): cr = http.client.HttpClientRequest("localhost", 1234, "GET", "/version", post_data="Hello World") self.assertEqual(cr.post_data, "Hello World") def testNoPostData(self): cr = http.client.HttpClientRequest("localhost", 1234, "GET", "/version") self.assertEqual(cr.post_data, "") def testCompletionCallback(self): for argname in ["completion_cb", "curl_config_fn"]: kwargs = { argname: NotImplementedError, } cr = http.client.HttpClientRequest("localhost", 14038, "GET", "/version", **kwargs) self.assertEqual(getattr(cr, argname), NotImplementedError) for fn in [NotImplemented, {}, 1]: kwargs = { argname: fn, } self.assertRaises(AssertionError, http.client.HttpClientRequest, "localhost", 23150, "GET", "/version", **kwargs) class _FakeCurl: def __init__(self): self.opts = {} self.info = NotImplemented def setopt(self, opt, value): assert opt not in self.opts, "Option set more than once" self.opts[opt] = value def getinfo(self, info): return self.info.pop(info) class TestClientStartRequest(unittest.TestCase): @staticmethod def _TestCurlConfig(curl): curl.setopt(pycurl.SSLKEYTYPE, "PEM") def test(self): for method in [http.HTTP_GET, http.HTTP_PUT, "CUSTOM"]: for port in [8761, 29796, 19528]: for curl_config_fn in [None, self._TestCurlConfig]: for read_timeout in [None, 0, 1, 123, 36000]: self._TestInner(method, port, curl_config_fn, read_timeout) def _TestInner(self, method, port, curl_config_fn, read_timeout): for response_code in [http.HTTP_OK, http.HttpNotFound.code, http.HTTP_NOT_MODIFIED]: for response_body in [None, "Hello World", "Very Long\tContent here\n" * 171]: for errmsg in [None, "error"]: req = http.client.HttpClientRequest("localhost", port, method, "/version", curl_config_fn=curl_config_fn, read_timeout=read_timeout) curl = _FakeCurl() pending = http.client._StartRequest(curl, req) self.assertEqual(pending.GetCurlHandle(), curl) self.assertEqual(pending.GetCurrentRequest(), req) # Check options opts = curl.opts self.assertEqual(opts.pop(pycurl.CUSTOMREQUEST), method) self.assertEqual(opts.pop(pycurl.URL), "https://localhost:%s/version" % port) if read_timeout is None: self.assertEqual(opts.pop(pycurl.TIMEOUT), 0) else: self.assertEqual(opts.pop(pycurl.TIMEOUT), read_timeout) self.assertFalse(opts.pop(pycurl.VERBOSE)) self.assertTrue(opts.pop(pycurl.NOSIGNAL)) self.assertEqual(opts.pop(pycurl.USERAGENT), http.HTTP_GANETI_VERSION) self.assertEqual(opts.pop(pycurl.PROXY), "") self.assertFalse(opts.pop(pycurl.POSTFIELDS)) self.assertFalse(opts.pop(pycurl.HTTPHEADER)) write_fn = opts.pop(pycurl.WRITEFUNCTION) self.assertTrue(callable(write_fn)) if hasattr(pycurl, "SSL_SESSIONID_CACHE"): self.assertFalse(opts.pop(pycurl.SSL_SESSIONID_CACHE)) if curl_config_fn: self.assertEqual(opts.pop(pycurl.SSLKEYTYPE), "PEM") else: self.assertFalse(pycurl.SSLKEYTYPE in opts) self.assertFalse(opts) if response_body is not None: offset = 0 while offset < len(response_body): piece = response_body[offset:offset + 10] write_fn(piece) offset += len(piece) curl.info = { pycurl.RESPONSE_CODE: response_code, } # Finalize request pending.Done(errmsg) self.assertFalse(curl.info) # Can only finalize once self.assertRaises(AssertionError, pending.Done, True) if errmsg: self.assertFalse(req.success) else: self.assertTrue(req.success) self.assertEqual(req.error, errmsg) self.assertEqual(req.resp_status_code, response_code) if response_body is None: self.assertEqual(req.resp_body, "") else: self.assertEqual(req.resp_body, response_body) # Check if resetting worked assert not hasattr(curl, "reset") opts = curl.opts self.assertFalse(opts.pop(pycurl.POSTFIELDS)) self.assertTrue(callable(opts.pop(pycurl.WRITEFUNCTION))) self.assertFalse(opts) self.assertFalse(curl.opts, msg="Previous checks did not consume all options") assert id(opts) == id(curl.opts) def _TestWrongTypes(self, *args, **kwargs): req = http.client.HttpClientRequest(*args, **kwargs) self.assertRaises(AssertionError, http.client._StartRequest, _FakeCurl(), req) def testWrongHostType(self): self._TestWrongTypes(unicode("localhost"), 8080, "GET", "/version") def testWrongUrlType(self): self._TestWrongTypes("localhost", 8080, "GET", unicode("/version")) def testWrongMethodType(self): self._TestWrongTypes("localhost", 8080, unicode("GET"), "/version") def testWrongHeaderType(self): self._TestWrongTypes("localhost", 8080, "GET", "/version", headers={ unicode("foo"): "bar", }) def testWrongPostDataType(self): self._TestWrongTypes("localhost", 8080, "GET", "/version", post_data=unicode("verylongdata" * 100)) class _EmptyCurlMulti: def perform(self): return (pycurl.E_MULTI_OK, 0) def info_read(self): return (0, [], []) class TestClientProcessRequests(unittest.TestCase): def testEmpty(self): requests = [] http.client.ProcessRequests(requests, _curl=NotImplemented, _curl_multi=_EmptyCurlMulti) self.assertEqual(requests, []) class TestProcessCurlRequests(unittest.TestCase): class _FakeCurlMulti: def __init__(self): self.handles = [] self.will_fail = [] self._expect = ["perform"] self._counter = itertools.count() def add_handle(self, curl): assert curl not in self.handles self.handles.append(curl) if self._counter.next() % 3 == 0: self.will_fail.append(curl) def remove_handle(self, curl): self.handles.remove(curl) def perform(self): assert self._expect.pop(0) == "perform" if self._counter.next() % 2 == 0: self._expect.append("perform") return (pycurl.E_CALL_MULTI_PERFORM, None) self._expect.append("info_read") return (pycurl.E_MULTI_OK, len(self.handles)) def info_read(self): assert self._expect.pop(0) == "info_read" successful = [] failed = [] if self.handles: if self._counter.next() % 17 == 0: curl = self.handles[0] if curl in self.will_fail: failed.append((curl, -1, "test error")) else: successful.append(curl) remaining_messages = len(self.handles) % 3 if remaining_messages > 0: self._expect.append("info_read") else: self._expect.append("select") else: remaining_messages = 0 self._expect.append("select") return (remaining_messages, successful, failed) def select(self, timeout): # Never compare floats for equality assert timeout >= 0.95 and timeout <= 1.05 assert self._expect.pop(0) == "select" self._expect.append("perform") def test(self): requests = [_FakeCurl() for _ in range(10)] multi = self._FakeCurlMulti() for (curl, errmsg) in http.client._ProcessCurlRequests(multi, requests): self.assertTrue(curl not in multi.handles) if curl in multi.will_fail: self.assertTrue("test error" in errmsg) else: self.assertTrue(errmsg is None) self.assertFalse(multi.handles) self.assertEqual(multi._expect, ["select"]) class TestProcessRequests(unittest.TestCase): class _DummyCurlMulti: pass def testNoMonitor(self): self._Test(False) def testWithMonitor(self): self._Test(True) class _MonitorChecker: def __init__(self): self._monitor = None def GetMonitor(self): return self._monitor def __call__(self, monitor): assert callable(monitor.GetLockInfo) self._monitor = monitor def _Test(self, use_monitor): def cfg_fn(port, curl): curl.opts["__port__"] = port def _LockCheckReset(monitor, req): self.assertTrue(monitor._lock.is_owned(shared=0), msg="Lock must be owned in exclusive mode") assert not hasattr(req, "lockcheck__") setattr(req, "lockcheck__", True) def _BuildNiceName(port, default=None): if port % 5 == 0: return "nicename%s" % port else: # Use standard name return default requests = \ [http.client.HttpClientRequest("localhost", i, "POST", "/version%s" % i, curl_config_fn=compat.partial(cfg_fn, i), completion_cb=NotImplementedError, nicename=_BuildNiceName(i)) for i in range(15176, 15501)] requests_count = len(requests) if use_monitor: lock_monitor_cb = self._MonitorChecker() else: lock_monitor_cb = None def _ProcessRequests(multi, handles): self.assertTrue(isinstance(multi, self._DummyCurlMulti)) self.assertEqual(len(requests), len(handles)) self.assertTrue(compat.all(isinstance(curl, _FakeCurl) for curl in handles)) # Prepare for lock check for req in requests: assert req.completion_cb is NotImplementedError if use_monitor: req.completion_cb = \ compat.partial(_LockCheckReset, lock_monitor_cb.GetMonitor()) for idx, curl in enumerate(handles): try: port = curl.opts["__port__"] except KeyError: self.fail("Per-request config function was not called") if use_monitor: # Check if lock information is correct lock_info = lock_monitor_cb.GetMonitor().GetLockInfo(None) expected = \ [("rpc/%s" % (_BuildNiceName(handle.opts["__port__"], default=("localhost/version%s" % handle.opts["__port__"]))), None, [threading.currentThread().getName()], None) for handle in handles[idx:]] self.assertEqual(sorted(lock_info), sorted(expected)) if port % 3 == 0: response_code = http.HTTP_OK msg = None else: response_code = http.HttpNotFound.code msg = "test error" curl.info = { pycurl.RESPONSE_CODE: response_code, } # Prepare for reset self.assertFalse(curl.opts.pop(pycurl.POSTFIELDS)) self.assertTrue(callable(curl.opts.pop(pycurl.WRITEFUNCTION))) yield (curl, msg) if use_monitor: self.assertTrue(compat.all(req.lockcheck__ for req in requests)) if use_monitor: self.assertEqual(lock_monitor_cb.GetMonitor(), None) http.client.ProcessRequests(requests, lock_monitor_cb=lock_monitor_cb, _curl=_FakeCurl, _curl_multi=self._DummyCurlMulti, _curl_process=_ProcessRequests) for req in requests: if req.port % 3 == 0: self.assertTrue(req.success) self.assertEqual(req.error, None) else: self.assertFalse(req.success) self.assertTrue("test error" in req.error) # See if monitor was disabled if use_monitor: monitor = lock_monitor_cb.GetMonitor() self.assertEqual(monitor._pending_fn, None) self.assertEqual(monitor.GetLockInfo(None), []) else: self.assertEqual(lock_monitor_cb, None) self.assertEqual(len(requests), requests_count) def testBadRequest(self): bad_request = http.client.HttpClientRequest("localhost", 27784, "POST", "/version") bad_request.success = False self.assertRaises(AssertionError, http.client.ProcessRequests, [bad_request], _curl=NotImplemented, _curl_multi=NotImplemented, _curl_process=NotImplemented) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/docs_unittest.py0000744000000000000000000002433512271422343017652 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2009 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting documentation""" import unittest import re import itertools import operator from ganeti import _autoconf from ganeti import utils from ganeti import cmdlib from ganeti import build from ganeti import compat from ganeti import mcpu from ganeti import opcodes from ganeti import constants from ganeti.rapi import baserlib from ganeti.rapi import rlib2 from ganeti.rapi import connector import testutils VALID_URI_RE = re.compile(r"^[-/a-z0-9]*$") RAPI_OPCODE_EXCLUDE = compat.UniqueFrozenset([ # Not yet implemented opcodes.OpBackupQuery, opcodes.OpBackupRemove, opcodes.OpClusterConfigQuery, opcodes.OpClusterRepairDiskSizes, opcodes.OpClusterVerify, opcodes.OpClusterVerifyDisks, opcodes.OpInstanceChangeGroup, opcodes.OpInstanceMove, opcodes.OpNodeQueryvols, opcodes.OpOobCommand, opcodes.OpTagsSearch, opcodes.OpClusterActivateMasterIp, opcodes.OpClusterDeactivateMasterIp, opcodes.OpExtStorageDiagnose, # Difficult if not impossible opcodes.OpClusterDestroy, opcodes.OpClusterPostInit, opcodes.OpClusterRename, opcodes.OpNodeAdd, opcodes.OpNodeRemove, # Very sensitive in nature opcodes.OpRestrictedCommand, # Helper opcodes (e.g. submitted by LUs) opcodes.OpClusterVerifyConfig, opcodes.OpClusterVerifyGroup, opcodes.OpGroupEvacuate, opcodes.OpGroupVerifyDisks, # Test opcodes opcodes.OpTestAllocator, opcodes.OpTestDelay, opcodes.OpTestDummy, opcodes.OpTestJqueue, ]) def _ReadDocFile(filename): return utils.ReadFile("%s/doc/%s" % (testutils.GetSourceDir(), filename)) class TestHooksDocs(unittest.TestCase): HOOK_PATH_OK = compat.UniqueFrozenset([ "master-ip-turnup", "master-ip-turndown", ]) def test(self): """Check whether all hooks are documented. """ hooksdoc = _ReadDocFile("hooks.rst") # Reverse mapping from LU to opcode lu2opcode = dict((lu, op) for (op, lu) in mcpu.Processor.DISPATCH_TABLE.items()) assert len(lu2opcode) == len(mcpu.Processor.DISPATCH_TABLE), \ "Found duplicate entries" hooks_paths = frozenset(re.findall("^:directory:\s*(.+)\s*$", hooksdoc, re.M)) self.assertTrue(self.HOOK_PATH_OK.issubset(hooks_paths), msg="Whitelisted path not found in documentation") raw_hooks_ops = re.findall("^OP_(?!CODE$).+$", hooksdoc, re.M) hooks_ops = set() duplicate_ops = set() for op in raw_hooks_ops: if op in hooks_ops: duplicate_ops.add(op) else: hooks_ops.add(op) self.assertFalse(duplicate_ops, msg="Found duplicate opcode documentation: %s" % utils.CommaJoin(duplicate_ops)) seen_paths = set() seen_ops = set() self.assertFalse(duplicate_ops, msg="Found duplicated hook documentation: %s" % utils.CommaJoin(duplicate_ops)) for name in dir(cmdlib): lucls = getattr(cmdlib, name) if (isinstance(lucls, type) and issubclass(lucls, cmdlib.LogicalUnit) and hasattr(lucls, "HPATH")): if lucls.HTYPE is None: continue opcls = lu2opcode.get(lucls, None) if opcls: seen_ops.add(opcls.OP_ID) self.assertTrue(opcls.OP_ID in hooks_ops, msg="Missing hook documentation for %s" % opcls.OP_ID) self.assertTrue(lucls.HPATH in hooks_paths, msg="Missing documentation for hook %s/%s" % (lucls.HTYPE, lucls.HPATH)) seen_paths.add(lucls.HPATH) missed_ops = hooks_ops - seen_ops missed_paths = hooks_paths - seen_paths - self.HOOK_PATH_OK self.assertFalse(missed_ops, msg="Op documents hook not existing anymore: %s" % utils.CommaJoin(missed_ops)) self.assertFalse(missed_paths, msg="Hook path does not exist in opcode: %s" % utils.CommaJoin(missed_paths)) class TestRapiDocs(unittest.TestCase): def _CheckRapiResource(self, uri, fixup, handler): docline = "%s resource." % uri self.assertEqual(handler.__doc__.splitlines()[0].strip(), docline, msg=("First line of %r's docstring is not %r" % (handler, docline))) # Apply fixes before testing for (rx, value) in fixup.items(): uri = rx.sub(value, uri) self.assertTrue(VALID_URI_RE.match(uri), msg="Invalid URI %r" % uri) def test(self): """Check whether all RAPI resources are documented. """ rapidoc = _ReadDocFile("rapi.rst") node_name = re.escape("[node_name]") instance_name = re.escape("[instance_name]") group_name = re.escape("[group_name]") network_name = re.escape("[network_name]") job_id = re.escape("[job_id]") disk_index = re.escape("[disk_index]") query_res = re.escape("[resource]") resources = connector.GetHandlers(node_name, instance_name, group_name, network_name, job_id, disk_index, query_res) handler_dups = utils.FindDuplicates(resources.values()) self.assertFalse(handler_dups, msg=("Resource handlers used more than once: %r" % handler_dups)) uri_check_fixup = { re.compile(node_name): "node1examplecom", re.compile(instance_name): "inst1examplecom", re.compile(group_name): "group4440", re.compile(network_name): "network5550", re.compile(job_id): "9409", re.compile(disk_index): "123", re.compile(query_res): "lock", } assert compat.all(VALID_URI_RE.match(value) for value in uri_check_fixup.values()), \ "Fixup values must be valid URIs, too" titles = [] prevline = None for line in rapidoc.splitlines(): if re.match(r"^\++$", line): titles.append(prevline) prevline = line prefix_exception = compat.UniqueFrozenset(["/", "/version", "/2"]) undocumented = [] used_uris = [] for key, handler in resources.iteritems(): # Regex objects if hasattr(key, "match"): self.assert_(key.pattern.startswith("^/2/"), msg="Pattern %r does not start with '^/2/'" % key.pattern) self.assertEqual(key.pattern[-1], "$") found = False for title in titles: if title.startswith("``") and title.endswith("``"): uri = title[2:-2] if key.match(uri): self._CheckRapiResource(uri, uri_check_fixup, handler) used_uris.append(uri) found = True break if not found: # TODO: Find better way of identifying resource undocumented.append(key.pattern) else: self.assert_(key.startswith("/2/") or key in prefix_exception, msg="Path %r does not start with '/2/'" % key) if ("``%s``" % key) in titles: self._CheckRapiResource(key, {}, handler) used_uris.append(key) else: undocumented.append(key) self.failIf(undocumented, msg=("Missing RAPI resource documentation for %s" % utils.CommaJoin(undocumented))) uri_dups = utils.FindDuplicates(used_uris) self.failIf(uri_dups, msg=("URIs matched by more than one resource: %s" % utils.CommaJoin(uri_dups))) self._FindRapiMissing(resources.values()) self._CheckTagHandlers(resources.values()) def _FindRapiMissing(self, handlers): used = frozenset(itertools.chain(*map(baserlib.GetResourceOpcodes, handlers))) unexpected = used & RAPI_OPCODE_EXCLUDE self.assertFalse(unexpected, msg=("Found RAPI resources for excluded opcodes: %s" % utils.CommaJoin(_GetOpIds(unexpected)))) missing = (frozenset(opcodes.OP_MAPPING.values()) - used - RAPI_OPCODE_EXCLUDE) self.assertFalse(missing, msg=("Missing RAPI resources for opcodes: %s" % utils.CommaJoin(_GetOpIds(missing)))) def _CheckTagHandlers(self, handlers): tag_handlers = filter(lambda x: issubclass(x, rlib2._R_Tags), handlers) self.assertEqual(frozenset(map(operator.attrgetter("TAG_LEVEL"), tag_handlers)), constants.VALID_TAG_TYPES) def _GetOpIds(ops): """Returns C{OP_ID} for all opcodes in passed sequence. """ return sorted(opcls.OP_ID for opcls in ops) class TestManpages(unittest.TestCase): """Manpage tests""" @staticmethod def _ReadManFile(name): return utils.ReadFile("%s/man/%s.rst" % (testutils.GetSourceDir(), name)) @staticmethod def _LoadScript(name): return build.LoadModule("scripts/%s" % name) def test(self): for script in _autoconf.GNT_SCRIPTS: self._CheckManpage(script, self._ReadManFile(script), self._LoadScript(script).commands.keys()) def _CheckManpage(self, script, mantext, commands): missing = [] for cmd in commands: pattern = r"^(\| )?\*\*%s\*\*" % re.escape(cmd) if not re.findall(pattern, mantext, re.DOTALL | re.MULTILINE): missing.append(cmd) self.failIf(missing, msg=("Manpage for '%s' missing documentation for %s" % (script, utils.CommaJoin(missing)))) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.utils.hash_unittest.py0000744000000000000000000001065412244641676022265 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.utils.hash""" import unittest import random import operator import tempfile from ganeti import constants from ganeti import utils import testutils class TestHmacFunctions(unittest.TestCase): # Digests can be checked with "openssl sha1 -hmac $key" def testSha1Hmac(self): self.assertEqual(utils.Sha1Hmac("", ""), "fbdb1d1b18aa6c08324b7d64b71fb76370690e1d") self.assertEqual(utils.Sha1Hmac("3YzMxZWE", "Hello World"), "ef4f3bda82212ecb2f7ce868888a19092481f1fd") self.assertEqual(utils.Sha1Hmac("TguMTA2K", ""), "f904c2476527c6d3e6609ab683c66fa0652cb1dc") longtext = 1500 * "The quick brown fox jumps over the lazy dog\n" self.assertEqual(utils.Sha1Hmac("3YzMxZWE", longtext), "35901b9a3001a7cdcf8e0e9d7c2e79df2223af54") def testSha1HmacSalt(self): self.assertEqual(utils.Sha1Hmac("TguMTA2K", "", salt="abc0"), "4999bf342470eadb11dfcd24ca5680cf9fd7cdce") self.assertEqual(utils.Sha1Hmac("TguMTA2K", "", salt="abc9"), "17a4adc34d69c0d367d4ffbef96fd41d4df7a6e8") self.assertEqual(utils.Sha1Hmac("3YzMxZWE", "Hello World", salt="xyz0"), "7f264f8114c9066afc9bb7636e1786d996d3cc0d") def testVerifySha1Hmac(self): self.assert_(utils.VerifySha1Hmac("", "", ("fbdb1d1b18aa6c08324b" "7d64b71fb76370690e1d"))) self.assert_(utils.VerifySha1Hmac("TguMTA2K", "", ("f904c2476527c6d3e660" "9ab683c66fa0652cb1dc"))) digest = "ef4f3bda82212ecb2f7ce868888a19092481f1fd" self.assert_(utils.VerifySha1Hmac("3YzMxZWE", "Hello World", digest)) self.assert_(utils.VerifySha1Hmac("3YzMxZWE", "Hello World", digest.lower())) self.assert_(utils.VerifySha1Hmac("3YzMxZWE", "Hello World", digest.upper())) self.assert_(utils.VerifySha1Hmac("3YzMxZWE", "Hello World", digest.title())) def testVerifySha1HmacSalt(self): self.assert_(utils.VerifySha1Hmac("TguMTA2K", "", ("17a4adc34d69c0d367d4" "ffbef96fd41d4df7a6e8"), salt="abc9")) self.assert_(utils.VerifySha1Hmac("3YzMxZWE", "Hello World", ("7f264f8114c9066afc9b" "b7636e1786d996d3cc0d"), salt="xyz0")) class TestFingerprintFiles(unittest.TestCase): def setUp(self): self.tmpfile = tempfile.NamedTemporaryFile() self.tmpfile2 = tempfile.NamedTemporaryFile() utils.WriteFile(self.tmpfile2.name, data="Hello World\n") self.results = { self.tmpfile.name: "da39a3ee5e6b4b0d3255bfef95601890afd80709", self.tmpfile2.name: "648a6a6ffffdaa0badb23b8baf90b6168dd16b3a", } def testSingleFile(self): self.assertEqual(utils.hash._FingerprintFile(self.tmpfile.name), self.results[self.tmpfile.name]) self.assertEqual(utils.hash._FingerprintFile("/no/such/file"), None) def testBigFile(self): self.tmpfile.write("A" * 8192) self.tmpfile.flush() self.assertEqual(utils.hash._FingerprintFile(self.tmpfile.name), "35b6795ca20d6dc0aff8c7c110c96cd1070b8c38") def testMultiple(self): all_files = self.results.keys() all_files.append("/no/such/file") self.assertEqual(utils.FingerprintFiles(self.results.keys()), self.results) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.jstore_unittest.py0000744000000000000000000000626312244641676021512 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.jstore""" import re import unittest import random from ganeti import constants from ganeti import utils from ganeti import compat from ganeti import errors from ganeti import jstore import testutils class TestFormatJobID(testutils.GanetiTestCase): def test(self): self.assertEqual(jstore.FormatJobID(0), 0) self.assertEqual(jstore.FormatJobID(30498), 30498) self.assertEqual(jstore.FormatJobID(319472592764518609), 319472592764518609) def testErrors(self): for i in [-1, -2288, -9667, -0.205641, 0.0, 0.1, 13041.4472, "", "Hello", [], [1], {}]: self.assertRaises(errors.ProgrammerError, jstore.FormatJobID, i) class TestGetArchiveDirectory(testutils.GanetiTestCase): def test(self): tests = [ ("0", [0, 1, 3343, 9712, 9999]), ("1", [10000, 13188, 19999]), ("29", [290000, 296041, 298796, 299999]), ("30", [300000, 309384]), ] for (exp, job_ids) in tests: for job_id in job_ids: fmt_id = jstore.FormatJobID(job_id) self.assertEqual(jstore.GetArchiveDirectory(fmt_id), exp) self.assertEqual(jstore.ParseJobId(fmt_id), job_id) def testErrors(self): self.assertRaises(errors.ParameterError, jstore.GetArchiveDirectory, None) self.assertRaises(errors.ParameterError, jstore.GetArchiveDirectory, "foo") class TestParseJobId(testutils.GanetiTestCase): def test(self): self.assertEqual(jstore.ParseJobId(29981), 29981) self.assertEqual(jstore.ParseJobId("12918"), 12918) def testErrors(self): self.assertRaises(errors.ParameterError, jstore.ParseJobId, "") self.assertRaises(errors.ParameterError, jstore.ParseJobId, "MXXI") self.assertRaises(errors.ParameterError, jstore.ParseJobId, []) class TestReadNumericFile(testutils.GanetiTestCase): def testNonExistingFile(self): result = jstore._ReadNumericFile("/tmp/this/file/does/not/exist") self.assertTrue(result is None) def testValidFile(self): tmpfile = self._CreateTempFile() for (data, exp) in [("123", 123), ("0\n", 0)]: utils.WriteFile(tmpfile, data=data) result = jstore._ReadNumericFile(tmpfile) self.assertEqual(result, exp) def testInvalidContent(self): tmpfile = self._CreateTempFile() utils.WriteFile(tmpfile, data="{wrong content") self.assertRaises(errors.JobQueueError, jstore._ReadNumericFile, tmpfile) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.daemon_unittest.py0000744000000000000000000006041312244641676021444 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the daemon module""" import unittest import signal import os import socket import time import tempfile import shutil from ganeti import daemon from ganeti import errors from ganeti import constants from ganeti import utils import testutils class TestMainloop(testutils.GanetiTestCase): """Test daemon.Mainloop""" def setUp(self): testutils.GanetiTestCase.setUp(self) self.mainloop = daemon.Mainloop() self.sendsig_events = [] self.onsignal_events = [] def _CancelEvent(self, handle): self.mainloop.scheduler.cancel(handle) def _SendSig(self, sig): self.sendsig_events.append(sig) os.kill(os.getpid(), sig) def OnSignal(self, signum): self.onsignal_events.append(signum) def testRunAndTermBySched(self): self.mainloop.scheduler.enter(0.1, 1, self._SendSig, [signal.SIGTERM]) self.mainloop.Run() # terminates by _SendSig being scheduled self.assertEquals(self.sendsig_events, [signal.SIGTERM]) def testTerminatingSignals(self): self.mainloop.scheduler.enter(0.1, 1, self._SendSig, [signal.SIGCHLD]) self.mainloop.scheduler.enter(0.2, 1, self._SendSig, [signal.SIGINT]) self.mainloop.Run() self.assertEquals(self.sendsig_events, [signal.SIGCHLD, signal.SIGINT]) self.mainloop.scheduler.enter(0.1, 1, self._SendSig, [signal.SIGTERM]) self.mainloop.Run() self.assertEquals(self.sendsig_events, [signal.SIGCHLD, signal.SIGINT, signal.SIGTERM]) def testSchedulerCancel(self): handle = self.mainloop.scheduler.enter(0.1, 1, self._SendSig, [signal.SIGTERM]) self.mainloop.scheduler.cancel(handle) self.mainloop.scheduler.enter(0.2, 1, self._SendSig, [signal.SIGCHLD]) self.mainloop.scheduler.enter(0.3, 1, self._SendSig, [signal.SIGTERM]) self.mainloop.Run() self.assertEquals(self.sendsig_events, [signal.SIGCHLD, signal.SIGTERM]) def testRegisterSignal(self): self.mainloop.RegisterSignal(self) self.mainloop.scheduler.enter(0.1, 1, self._SendSig, [signal.SIGCHLD]) handle = self.mainloop.scheduler.enter(0.1, 1, self._SendSig, [signal.SIGTERM]) self.mainloop.scheduler.cancel(handle) self.mainloop.scheduler.enter(0.2, 1, self._SendSig, [signal.SIGCHLD]) self.mainloop.scheduler.enter(0.3, 1, self._SendSig, [signal.SIGTERM]) # ...not delievered because they are scheduled after TERM self.mainloop.scheduler.enter(0.4, 1, self._SendSig, [signal.SIGCHLD]) self.mainloop.scheduler.enter(0.5, 1, self._SendSig, [signal.SIGCHLD]) self.mainloop.Run() self.assertEquals(self.sendsig_events, [signal.SIGCHLD, signal.SIGCHLD, signal.SIGTERM]) self.assertEquals(self.onsignal_events, self.sendsig_events) def testDeferredCancel(self): self.mainloop.RegisterSignal(self) now = time.time() self.mainloop.scheduler.enterabs(now + 0.1, 1, self._SendSig, [signal.SIGCHLD]) handle1 = self.mainloop.scheduler.enterabs(now + 0.3, 2, self._SendSig, [signal.SIGCHLD]) handle2 = self.mainloop.scheduler.enterabs(now + 0.4, 2, self._SendSig, [signal.SIGCHLD]) self.mainloop.scheduler.enterabs(now + 0.2, 1, self._CancelEvent, [handle1]) self.mainloop.scheduler.enterabs(now + 0.2, 1, self._CancelEvent, [handle2]) self.mainloop.scheduler.enter(0.5, 1, self._SendSig, [signal.SIGTERM]) self.mainloop.Run() self.assertEquals(self.sendsig_events, [signal.SIGCHLD, signal.SIGTERM]) self.assertEquals(self.onsignal_events, self.sendsig_events) def testReRun(self): self.mainloop.RegisterSignal(self) self.mainloop.scheduler.enter(0.1, 1, self._SendSig, [signal.SIGCHLD]) self.mainloop.scheduler.enter(0.2, 1, self._SendSig, [signal.SIGCHLD]) self.mainloop.scheduler.enter(0.3, 1, self._SendSig, [signal.SIGTERM]) self.mainloop.scheduler.enter(0.4, 1, self._SendSig, [signal.SIGCHLD]) self.mainloop.scheduler.enter(0.5, 1, self._SendSig, [signal.SIGCHLD]) self.mainloop.Run() self.assertEquals(self.sendsig_events, [signal.SIGCHLD, signal.SIGCHLD, signal.SIGTERM]) self.assertEquals(self.onsignal_events, self.sendsig_events) self.mainloop.scheduler.enter(0.3, 1, self._SendSig, [signal.SIGTERM]) self.mainloop.Run() self.assertEquals(self.sendsig_events, [signal.SIGCHLD, signal.SIGCHLD, signal.SIGTERM, signal.SIGCHLD, signal.SIGCHLD, signal.SIGTERM]) self.assertEquals(self.onsignal_events, self.sendsig_events) def testPriority(self): # for events at the same time, the highest priority one executes first now = time.time() self.mainloop.scheduler.enterabs(now + 0.1, 2, self._SendSig, [signal.SIGCHLD]) self.mainloop.scheduler.enterabs(now + 0.1, 1, self._SendSig, [signal.SIGTERM]) self.mainloop.Run() self.assertEquals(self.sendsig_events, [signal.SIGTERM]) self.mainloop.scheduler.enter(0.2, 1, self._SendSig, [signal.SIGTERM]) self.mainloop.Run() self.assertEquals(self.sendsig_events, [signal.SIGTERM, signal.SIGCHLD, signal.SIGTERM]) class _MyAsyncUDPSocket(daemon.AsyncUDPSocket): def __init__(self, family): daemon.AsyncUDPSocket.__init__(self, family) self.received = [] self.error_count = 0 def handle_datagram(self, payload, ip, port): self.received.append((payload)) if payload == "terminate": os.kill(os.getpid(), signal.SIGTERM) elif payload == "error": raise errors.GenericError("error") def handle_error(self): self.error_count += 1 raise class _BaseAsyncUDPSocketTest: """Base class for AsyncUDPSocket tests""" family = None address = None def setUp(self): self.mainloop = daemon.Mainloop() self.server = _MyAsyncUDPSocket(self.family) self.client = _MyAsyncUDPSocket(self.family) self.server.bind((self.address, 0)) self.port = self.server.getsockname()[1] # Save utils.IgnoreSignals so we can do evil things to it... self.saved_utils_ignoresignals = utils.IgnoreSignals def tearDown(self): self.server.close() self.client.close() # ...and restore it as well utils.IgnoreSignals = self.saved_utils_ignoresignals testutils.GanetiTestCase.tearDown(self) def testNoDoubleBind(self): self.assertRaises(socket.error, self.client.bind, (self.address, self.port)) def testAsyncClientServer(self): self.client.enqueue_send(self.address, self.port, "p1") self.client.enqueue_send(self.address, self.port, "p2") self.client.enqueue_send(self.address, self.port, "terminate") self.mainloop.Run() self.assertEquals(self.server.received, ["p1", "p2", "terminate"]) def testSyncClientServer(self): self.client.handle_write() self.client.enqueue_send(self.address, self.port, "p1") self.client.enqueue_send(self.address, self.port, "p2") while self.client.writable(): self.client.handle_write() self.server.process_next_packet() self.assertEquals(self.server.received, ["p1"]) self.server.process_next_packet() self.assertEquals(self.server.received, ["p1", "p2"]) self.client.enqueue_send(self.address, self.port, "p3") while self.client.writable(): self.client.handle_write() self.server.process_next_packet() self.assertEquals(self.server.received, ["p1", "p2", "p3"]) def testErrorHandling(self): self.client.enqueue_send(self.address, self.port, "p1") self.client.enqueue_send(self.address, self.port, "p2") self.client.enqueue_send(self.address, self.port, "error") self.client.enqueue_send(self.address, self.port, "p3") self.client.enqueue_send(self.address, self.port, "error") self.client.enqueue_send(self.address, self.port, "terminate") self.assertRaises(errors.GenericError, self.mainloop.Run) self.assertEquals(self.server.received, ["p1", "p2", "error"]) self.assertEquals(self.server.error_count, 1) self.assertRaises(errors.GenericError, self.mainloop.Run) self.assertEquals(self.server.received, ["p1", "p2", "error", "p3", "error"]) self.assertEquals(self.server.error_count, 2) self.mainloop.Run() self.assertEquals(self.server.received, ["p1", "p2", "error", "p3", "error", "terminate"]) self.assertEquals(self.server.error_count, 2) def testSignaledWhileReceiving(self): utils.IgnoreSignals = lambda fn, *args, **kwargs: None self.client.enqueue_send(self.address, self.port, "p1") self.client.enqueue_send(self.address, self.port, "p2") self.server.handle_read() self.assertEquals(self.server.received, []) self.client.enqueue_send(self.address, self.port, "terminate") utils.IgnoreSignals = self.saved_utils_ignoresignals self.mainloop.Run() self.assertEquals(self.server.received, ["p1", "p2", "terminate"]) def testOversizedDatagram(self): oversized_data = (constants.MAX_UDP_DATA_SIZE + 1) * "a" self.assertRaises(errors.UdpDataSizeError, self.client.enqueue_send, self.address, self.port, oversized_data) class TestAsyncIP4UDPSocket(testutils.GanetiTestCase, _BaseAsyncUDPSocketTest): """Test IP4 daemon.AsyncUDPSocket""" family = socket.AF_INET address = "127.0.0.1" def setUp(self): testutils.GanetiTestCase.setUp(self) _BaseAsyncUDPSocketTest.setUp(self) def tearDown(self): testutils.GanetiTestCase.tearDown(self) _BaseAsyncUDPSocketTest.tearDown(self) class TestAsyncIP6UDPSocket(testutils.GanetiTestCase, _BaseAsyncUDPSocketTest): """Test IP6 daemon.AsyncUDPSocket""" family = socket.AF_INET6 address = "::1" def setUp(self): testutils.GanetiTestCase.setUp(self) _BaseAsyncUDPSocketTest.setUp(self) def tearDown(self): testutils.GanetiTestCase.tearDown(self) _BaseAsyncUDPSocketTest.tearDown(self) class _MyAsyncStreamServer(daemon.AsyncStreamServer): def __init__(self, family, address, handle_connection_fn): daemon.AsyncStreamServer.__init__(self, family, address) self.handle_connection_fn = handle_connection_fn self.error_count = 0 self.expt_count = 0 def handle_connection(self, connected_socket, client_address): self.handle_connection_fn(connected_socket, client_address) def handle_error(self): self.error_count += 1 self.close() raise def handle_expt(self): self.expt_count += 1 self.close() class _MyMessageStreamHandler(daemon.AsyncTerminatedMessageStream): def __init__(self, connected_socket, client_address, terminator, family, message_fn, client_id, unhandled_limit): daemon.AsyncTerminatedMessageStream.__init__(self, connected_socket, client_address, terminator, family, unhandled_limit) self.message_fn = message_fn self.client_id = client_id self.error_count = 0 def handle_message(self, message, message_id): self.message_fn(self, message, message_id) def handle_error(self): self.error_count += 1 raise class TestAsyncStreamServerTCP(testutils.GanetiTestCase): """Test daemon.AsyncStreamServer with a TCP connection""" family = socket.AF_INET def setUp(self): testutils.GanetiTestCase.setUp(self) self.mainloop = daemon.Mainloop() self.address = self.getAddress() self.server = _MyAsyncStreamServer(self.family, self.address, self.handle_connection) self.client_handler = _MyMessageStreamHandler self.unhandled_limit = None self.terminator = "\3" self.address = self.server.getsockname() self.clients = [] self.connections = [] self.messages = {} self.connect_terminate_count = 0 self.message_terminate_count = 0 self.next_client_id = 0 # Save utils.IgnoreSignals so we can do evil things to it... self.saved_utils_ignoresignals = utils.IgnoreSignals def tearDown(self): for c in self.clients: c.close() for c in self.connections: c.close() self.server.close() # ...and restore it as well utils.IgnoreSignals = self.saved_utils_ignoresignals testutils.GanetiTestCase.tearDown(self) def getAddress(self): return ("127.0.0.1", 0) def countTerminate(self, name): value = getattr(self, name) if value is not None: value -= 1 setattr(self, name, value) if value <= 0: os.kill(os.getpid(), signal.SIGTERM) def handle_connection(self, connected_socket, client_address): client_id = self.next_client_id self.next_client_id += 1 client_handler = self.client_handler(connected_socket, client_address, self.terminator, self.family, self.handle_message, client_id, self.unhandled_limit) self.connections.append(client_handler) self.countTerminate("connect_terminate_count") def handle_message(self, handler, message, message_id): self.messages.setdefault(handler.client_id, []) # We should just check that the message_ids are monotonically increasing. # If in the unit tests we never remove messages from the received queue, # though, we can just require that the queue length is the same as the # message id, before pushing the message to it. This forces a more # restrictive check, but we can live with this for now. self.assertEquals(len(self.messages[handler.client_id]), message_id) self.messages[handler.client_id].append(message) if message == "error": raise errors.GenericError("error") self.countTerminate("message_terminate_count") def getClient(self): client = socket.socket(self.family, socket.SOCK_STREAM) client.connect(self.address) self.clients.append(client) return client def tearDown(self): testutils.GanetiTestCase.tearDown(self) self.server.close() def testConnect(self): self.getClient() self.mainloop.Run() self.assertEquals(len(self.connections), 1) self.getClient() self.mainloop.Run() self.assertEquals(len(self.connections), 2) self.connect_terminate_count = 4 self.getClient() self.getClient() self.getClient() self.getClient() self.mainloop.Run() self.assertEquals(len(self.connections), 6) def testBasicMessage(self): self.connect_terminate_count = None client = self.getClient() client.send("ciao\3") self.mainloop.Run() self.assertEquals(len(self.connections), 1) self.assertEquals(len(self.messages[0]), 1) self.assertEquals(self.messages[0][0], "ciao") def testDoubleMessage(self): self.connect_terminate_count = None client = self.getClient() client.send("ciao\3") self.mainloop.Run() client.send("foobar\3") self.mainloop.Run() self.assertEquals(len(self.connections), 1) self.assertEquals(len(self.messages[0]), 2) self.assertEquals(self.messages[0][1], "foobar") def testComposedMessage(self): self.connect_terminate_count = None self.message_terminate_count = 3 client = self.getClient() client.send("one\3composed\3message\3") self.mainloop.Run() self.assertEquals(len(self.messages[0]), 3) self.assertEquals(self.messages[0], ["one", "composed", "message"]) def testLongTerminator(self): self.terminator = "\0\1\2" self.connect_terminate_count = None self.message_terminate_count = 3 client = self.getClient() client.send("one\0\1\2composed\0\1\2message\0\1\2") self.mainloop.Run() self.assertEquals(len(self.messages[0]), 3) self.assertEquals(self.messages[0], ["one", "composed", "message"]) def testErrorHandling(self): self.connect_terminate_count = None self.message_terminate_count = None client = self.getClient() client.send("one\3two\3error\3three\3") self.assertRaises(errors.GenericError, self.mainloop.Run) self.assertEquals(self.connections[0].error_count, 1) self.assertEquals(self.messages[0], ["one", "two", "error"]) client.send("error\3") self.assertRaises(errors.GenericError, self.mainloop.Run) self.assertEquals(self.connections[0].error_count, 2) self.assertEquals(self.messages[0], ["one", "two", "error", "three", "error"]) def testDoubleClient(self): self.connect_terminate_count = None self.message_terminate_count = 2 client1 = self.getClient() client2 = self.getClient() client1.send("c1m1\3") client2.send("c2m1\3") self.mainloop.Run() self.assertEquals(self.messages[0], ["c1m1"]) self.assertEquals(self.messages[1], ["c2m1"]) def testUnterminatedMessage(self): self.connect_terminate_count = None self.message_terminate_count = 3 client1 = self.getClient() client2 = self.getClient() client1.send("message\3unterminated") client2.send("c2m1\3c2m2\3") self.mainloop.Run() self.assertEquals(self.messages[0], ["message"]) self.assertEquals(self.messages[1], ["c2m1", "c2m2"]) client1.send("message\3") self.mainloop.Run() self.assertEquals(self.messages[0], ["message", "unterminatedmessage"]) def testSignaledWhileAccepting(self): utils.IgnoreSignals = lambda fn, *args, **kwargs: None client1 = self.getClient() self.server.handle_accept() # When interrupted while accepting we don't have a connection, but we # didn't crash either. self.assertEquals(len(self.connections), 0) utils.IgnoreSignals = self.saved_utils_ignoresignals self.mainloop.Run() self.assertEquals(len(self.connections), 1) def testSendMessage(self): self.connect_terminate_count = None self.message_terminate_count = 3 client1 = self.getClient() client2 = self.getClient() client1.send("one\3composed\3message\3") self.mainloop.Run() self.assertEquals(self.messages[0], ["one", "composed", "message"]) self.assertFalse(self.connections[0].writable()) self.assertFalse(self.connections[1].writable()) self.connections[0].send_message("r0") self.assert_(self.connections[0].writable()) self.assertFalse(self.connections[1].writable()) self.connections[0].send_message("r1") self.connections[0].send_message("r2") # We currently have no way to terminate the mainloop on write events, but # let's assume handle_write will be called if writable() is True. while self.connections[0].writable(): self.connections[0].handle_write() client1.setblocking(0) client2.setblocking(0) self.assertEquals(client1.recv(4096), "r0\3r1\3r2\3") self.assertRaises(socket.error, client2.recv, 4096) def testLimitedUnhandledMessages(self): self.connect_terminate_count = None self.message_terminate_count = 3 self.unhandled_limit = 2 client1 = self.getClient() client2 = self.getClient() client1.send("one\3composed\3long\3message\3") client2.send("c2one\3") self.mainloop.Run() self.assertEquals(self.messages[0], ["one", "composed"]) self.assertEquals(self.messages[1], ["c2one"]) self.assertFalse(self.connections[0].readable()) self.assert_(self.connections[1].readable()) self.connections[0].send_message("r0") self.message_terminate_count = None client1.send("another\3") # when we write replies messages queued also get handled, but not the ones # in the socket. while self.connections[0].writable(): self.connections[0].handle_write() self.assertFalse(self.connections[0].readable()) self.assertEquals(self.messages[0], ["one", "composed", "long"]) self.connections[0].send_message("r1") self.connections[0].send_message("r2") while self.connections[0].writable(): self.connections[0].handle_write() self.assertEquals(self.messages[0], ["one", "composed", "long", "message"]) self.assert_(self.connections[0].readable()) def testLimitedUnhandledMessagesOne(self): self.connect_terminate_count = None self.message_terminate_count = 2 self.unhandled_limit = 1 client1 = self.getClient() client2 = self.getClient() client1.send("one\3composed\3message\3") client2.send("c2one\3") self.mainloop.Run() self.assertEquals(self.messages[0], ["one"]) self.assertEquals(self.messages[1], ["c2one"]) self.assertFalse(self.connections[0].readable()) self.assertFalse(self.connections[1].readable()) self.connections[0].send_message("r0") self.message_terminate_count = None while self.connections[0].writable(): self.connections[0].handle_write() self.assertFalse(self.connections[0].readable()) self.assertEquals(self.messages[0], ["one", "composed"]) self.connections[0].send_message("r2") self.connections[0].send_message("r3") while self.connections[0].writable(): self.connections[0].handle_write() self.assertEquals(self.messages[0], ["one", "composed", "message"]) self.assert_(self.connections[0].readable()) class TestAsyncStreamServerUnixPath(TestAsyncStreamServerTCP): """Test daemon.AsyncStreamServer with a Unix path connection""" family = socket.AF_UNIX def getAddress(self): self.tmpdir = tempfile.mkdtemp() return os.path.join(self.tmpdir, "server.sock") def tearDown(self): shutil.rmtree(self.tmpdir) TestAsyncStreamServerTCP.tearDown(self) class TestAsyncStreamServerUnixAbstract(TestAsyncStreamServerTCP): """Test daemon.AsyncStreamServer with a Unix abstract connection""" family = socket.AF_UNIX def getAddress(self): return "\0myabstractsocketaddress" class TestAsyncAwaker(testutils.GanetiTestCase): """Test daemon.AsyncAwaker""" family = socket.AF_INET def setUp(self): testutils.GanetiTestCase.setUp(self) self.mainloop = daemon.Mainloop() self.awaker = daemon.AsyncAwaker(signal_fn=self.handle_signal) self.signal_count = 0 self.signal_terminate_count = 1 def tearDown(self): self.awaker.close() def handle_signal(self): self.signal_count += 1 self.signal_terminate_count -= 1 if self.signal_terminate_count <= 0: os.kill(os.getpid(), signal.SIGTERM) def testBasicSignaling(self): self.awaker.signal() self.mainloop.Run() self.assertEquals(self.signal_count, 1) def testDoubleSignaling(self): self.awaker.signal() self.awaker.signal() self.mainloop.Run() # The second signal is never delivered self.assertEquals(self.signal_count, 1) def testReallyDoubleSignaling(self): self.assert_(self.awaker.readable()) self.awaker.signal() # Let's suppose two threads overlap, and both find need_signal True self.awaker.need_signal = True self.awaker.signal() self.mainloop.Run() # We still get only one signaling self.assertEquals(self.signal_count, 1) def testNoSignalFnArgument(self): myawaker = daemon.AsyncAwaker() self.assertRaises(socket.error, myawaker.handle_read) myawaker.signal() myawaker.handle_read() self.assertRaises(socket.error, myawaker.handle_read) myawaker.signal() myawaker.signal() myawaker.handle_read() self.assertRaises(socket.error, myawaker.handle_read) myawaker.close() def testWrongSignalFnArgument(self): self.assertRaises(AssertionError, daemon.AsyncAwaker, 1) self.assertRaises(AssertionError, daemon.AsyncAwaker, "string") self.assertRaises(AssertionError, daemon.AsyncAwaker, signal_fn=1) self.assertRaises(AssertionError, daemon.AsyncAwaker, signal_fn="string") if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.tools.node_daemon_setup_unittest.py0000744000000000000000000001524312244641676025031 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.tools.node_daemon_setup""" import unittest import shutil import tempfile import os.path import OpenSSL from ganeti import errors from ganeti import constants from ganeti import serializer from ganeti import pathutils from ganeti import compat from ganeti import utils from ganeti.tools import node_daemon_setup import testutils _SetupError = node_daemon_setup.SetupError class TestLoadData(unittest.TestCase): def testNoJson(self): for data in ["", "{", "}"]: self.assertRaises(errors.ParseError, node_daemon_setup.LoadData, data) def testInvalidDataStructure(self): raw = serializer.DumpJson({ "some other thing": False, }) self.assertRaises(errors.ParseError, node_daemon_setup.LoadData, raw) raw = serializer.DumpJson([]) self.assertRaises(errors.ParseError, node_daemon_setup.LoadData, raw) def testValidData(self): raw = serializer.DumpJson({}) self.assertEqual(node_daemon_setup.LoadData(raw), {}) class TestVerifyCertificate(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) self.tmpdir = tempfile.mkdtemp() def tearDown(self): testutils.GanetiTestCase.tearDown(self) shutil.rmtree(self.tmpdir) def testNoCert(self): self.assertRaises(_SetupError, node_daemon_setup.VerifyCertificate, {}, _verify_fn=NotImplemented) def testVerificationSuccessWithCert(self): node_daemon_setup.VerifyCertificate({ constants.NDS_NODE_DAEMON_CERTIFICATE: "something", }, _verify_fn=lambda _: None) def testNoPrivateKey(self): cert_filename = testutils.TestDataFilename("cert1.pem") cert_pem = utils.ReadFile(cert_filename) self.assertRaises(errors.X509CertError, node_daemon_setup._VerifyCertificate, cert_pem, _check_fn=NotImplemented) def testInvalidCertificate(self): self.assertRaises(errors.X509CertError, node_daemon_setup._VerifyCertificate, "Something that's not a certificate", _check_fn=NotImplemented) @staticmethod def _Check(cert): assert cert.get_subject() def testSuccessfulCheck(self): cert_filename = testutils.TestDataFilename("cert2.pem") cert_pem = utils.ReadFile(cert_filename) result = \ node_daemon_setup._VerifyCertificate(cert_pem, _check_fn=self._Check) cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, result) self.assertTrue(cert) key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, result) self.assertTrue(key) def testMismatchingKey(self): cert1_path = testutils.TestDataFilename("cert1.pem") cert2_path = testutils.TestDataFilename("cert2.pem") # Extract certificate cert1 = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, utils.ReadFile(cert1_path)) cert1_pem = OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, cert1) # Extract mismatching key key2 = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, utils.ReadFile(cert2_path)) key2_pem = OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, key2) try: node_daemon_setup._VerifyCertificate(cert1_pem + key2_pem, _check_fn=NotImplemented) except errors.X509CertError, err: self.assertEqual(err.args, ("(stdin)", "Certificate is not signed with given key")) else: self.fail("Exception was not raised") class TestVerifyClusterName(unittest.TestCase): def setUp(self): unittest.TestCase.setUp(self) self.tmpdir = tempfile.mkdtemp() def tearDown(self): unittest.TestCase.tearDown(self) shutil.rmtree(self.tmpdir) def testNoName(self): self.assertRaises(_SetupError, node_daemon_setup.VerifyClusterName, {}, _verify_fn=NotImplemented) @staticmethod def _FailingVerify(name): assert name == "somecluster.example.com" raise errors.GenericError() def testFailingVerification(self): data = { constants.NDS_CLUSTER_NAME: "somecluster.example.com", } self.assertRaises(errors.GenericError, node_daemon_setup.VerifyClusterName, data, _verify_fn=self._FailingVerify) def testSuccess(self): data = { constants.NDS_CLUSTER_NAME: "cluster.example.com", } result = \ node_daemon_setup.VerifyClusterName(data, _verify_fn=lambda _: None) self.assertEqual(result, "cluster.example.com") class TestVerifySsconf(unittest.TestCase): def testNoSsconf(self): self.assertRaises(_SetupError, node_daemon_setup.VerifySsconf, {}, NotImplemented, _verify_fn=NotImplemented) for items in [None, {}]: self.assertRaises(_SetupError, node_daemon_setup.VerifySsconf, { constants.NDS_SSCONF: items, }, NotImplemented, _verify_fn=NotImplemented) def _Check(self, names): self.assertEqual(frozenset(names), frozenset([ constants.SS_CLUSTER_NAME, constants.SS_INSTANCE_LIST, ])) def testSuccess(self): ssdata = { constants.SS_CLUSTER_NAME: "cluster.example.com", constants.SS_INSTANCE_LIST: [], } result = node_daemon_setup.VerifySsconf({ constants.NDS_SSCONF: ssdata, }, "cluster.example.com", _verify_fn=self._Check) self.assertEqual(result, ssdata) self.assertRaises(_SetupError, node_daemon_setup.VerifySsconf, { constants.NDS_SSCONF: ssdata, }, "wrong.example.com", _verify_fn=self._Check) def testInvalidKey(self): self.assertRaises(errors.GenericError, node_daemon_setup.VerifySsconf, { constants.NDS_SSCONF: { "no-valid-ssconf-key": "value", }, }, NotImplemented) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.utils.storage_unittest.py0000744000000000000000000001154012271422343022765 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the ganeti.utils.storage module""" import mock import unittest from ganeti import constants from ganeti import objects from ganeti.utils import storage import testutils class TestGetStorageUnitForDiskTemplate(unittest.TestCase): def setUp(self): self._default_vg_name = "some_vg_name" self._cluster = mock.Mock() self._cluster.file_storage_dir = "my/file/storage/dir" self._cluster.shared_file_storage_dir = "my/shared/file/storage/dir" self._cfg = mock.Mock() self._cfg.GetVGName = mock.Mock(return_value=self._default_vg_name) self._cfg.GetClusterInfo = mock.Mock(return_value=self._cluster) def testGetDefaultStorageUnitForDiskTemplateLvm(self): for disk_template in [constants.DT_DRBD8, constants.DT_PLAIN]: (storage_type, storage_key) = \ storage._GetDefaultStorageUnitForDiskTemplate(self._cfg, disk_template) self.assertEqual(storage_type, constants.ST_LVM_VG) self.assertEqual(storage_key, self._default_vg_name) def testGetDefaultStorageUnitForDiskTemplateFile(self): (storage_type, storage_key) = \ storage._GetDefaultStorageUnitForDiskTemplate(self._cfg, constants.DT_FILE) self.assertEqual(storage_type, constants.ST_FILE) self.assertEqual(storage_key, self._cluster.file_storage_dir) def testGetDefaultStorageUnitForDiskTemplateSharedFile(self): (storage_type, storage_key) = \ storage._GetDefaultStorageUnitForDiskTemplate(self._cfg, constants.DT_SHARED_FILE) self.assertEqual(storage_type, constants.ST_FILE) self.assertEqual(storage_key, self._cluster.shared_file_storage_dir) def testGetDefaultStorageUnitForDiskTemplateDiskless(self): (storage_type, storage_key) = \ storage._GetDefaultStorageUnitForDiskTemplate(self._cfg, constants.DT_DISKLESS) self.assertEqual(storage_type, constants.ST_DISKLESS) self.assertEqual(storage_key, None) def testGetDefaultStorageUnitForSpindles(self): (storage_type, storage_key) = \ storage._GetDefaultStorageUnitForSpindles(self._cfg) self.assertEqual(storage_type, constants.ST_LVM_PV) self.assertEqual(storage_key, self._default_vg_name) class TestGetStorageUnitsOfCluster(unittest.TestCase): def setUp(self): storage._GetDefaultStorageUnitForDiskTemplate = \ mock.Mock(return_value=("foo", "bar")) self._cluster_cfg = objects.Cluster() self._enabled_disk_templates = \ [constants.DT_DRBD8, constants.DT_PLAIN, constants.DT_FILE, constants.DT_SHARED_FILE] self._cluster_cfg.enabled_disk_templates = \ self._enabled_disk_templates self._cfg = mock.Mock() self._cfg.GetClusterInfo = mock.Mock(return_value=self._cluster_cfg) self._cfg.GetVGName = mock.Mock(return_value="some_vg_name") def testGetStorageUnitsOfCluster(self): storage_units = storage.GetStorageUnitsOfCluster(self._cfg) self.assertEqual(len(storage_units), len(self._enabled_disk_templates)) def testGetStorageUnitsOfClusterWithSpindles(self): storage_units = storage.GetStorageUnitsOfCluster( self._cfg, include_spindles=True) self.assertEqual(len(storage_units), len(self._enabled_disk_templates) + 1) self.assertTrue(constants.ST_LVM_PV in [st for (st, sk) in storage_units]) class TestLookupSpaceInfoByStorageType(unittest.TestCase): def setUp(self): self._space_info = [ {"type": st, "name": st + "_key", "storage_size": 0, "storage_free": 0} for st in constants.STORAGE_TYPES] def testValidLookup(self): query_type = constants.ST_LVM_PV result = storage.LookupSpaceInfoByStorageType(self._space_info, query_type) self.assertEqual(query_type, result["type"]) def testNotInList(self): result = storage.LookupSpaceInfoByStorageType(self._space_info, "non_existing_type") self.assertEqual(None, result) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.uidpool_unittest.py0000744000000000000000000000747312244641676021663 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the uidpool module""" import os import tempfile import unittest from ganeti import constants from ganeti import uidpool from ganeti import errors from ganeti import pathutils import testutils class TestUidPool(testutils.GanetiTestCase): """Uid-pool tests""" def setUp(self): self.old_uid_min = constants.UIDPOOL_UID_MIN self.old_uid_max = constants.UIDPOOL_UID_MAX constants.UIDPOOL_UID_MIN = 1 constants.UIDPOOL_UID_MAX = 10 pathutils.UIDPOOL_LOCKDIR = tempfile.mkdtemp() def tearDown(self): constants.UIDPOOL_UID_MIN = self.old_uid_min constants.UIDPOOL_UID_MAX = self.old_uid_max for name in os.listdir(pathutils.UIDPOOL_LOCKDIR): os.unlink(os.path.join(pathutils.UIDPOOL_LOCKDIR, name)) os.rmdir(pathutils.UIDPOOL_LOCKDIR) def testParseUidPool(self): self.assertEqualValues( uidpool.ParseUidPool("1-100,200,"), [(1, 100), (200, 200)]) self.assertEqualValues( uidpool.ParseUidPool("1000:2000-2500", separator=":"), [(1000, 1000), (2000, 2500)]) self.assertEqualValues( uidpool.ParseUidPool("1000\n2000-2500", separator="\n"), [(1000, 1000), (2000, 2500)]) def testCheckUidPool(self): # UID < UIDPOOL_UID_MIN self.assertRaises(errors.OpPrereqError, uidpool.CheckUidPool, [(0, 0)]) # UID > UIDPOOL_UID_MAX self.assertRaises(errors.OpPrereqError, uidpool.CheckUidPool, [(11, 11)]) # lower boundary > higher boundary self.assertRaises(errors.OpPrereqError, uidpool.CheckUidPool, [(2, 1)]) def testFormatUidPool(self): self.assertEqualValues( uidpool.FormatUidPool([(1, 100), (200, 200)]), "1-100, 200") self.assertEqualValues( uidpool.FormatUidPool([(1, 100), (200, 200)], separator=":"), "1-100:200") self.assertEqualValues( uidpool.FormatUidPool([(1, 100), (200, 200)], separator="\n"), "1-100\n200") def testRequestUnusedUid(self): # Check with known used user-ids # # Test with user-id "0" and with our own user-id, both # of which are guaranteed to be used user-ids for uid in 0, os.getuid(): self.assertRaises(errors.LockError, uidpool.RequestUnusedUid, set([uid])) # Check with a single, known unused user-id # # We use "-1" here, which is not a valid user-id, so it's # guaranteed that it's unused. uid = uidpool.RequestUnusedUid(set([-1])) self.assertEqualValues(uid.GetUid(), -1) # Check uid-pool exhaustion # # uid "-1" is locked now, so RequestUnusedUid is expected to fail self.assertRaises(errors.LockError, uidpool.RequestUnusedUid, set([-1])) # Check unlocking uid.Unlock() # After unlocking, "-1" should be available again uid = uidpool.RequestUnusedUid(set([-1])) self.assertEqualValues(uid.GetUid(), -1) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/mocks.py0000644000000000000000000000631712271422343016076 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module implementing a fake ConfigWriter""" import os from ganeti import utils from ganeti import netutils FAKE_CLUSTER_KEY = ("AAAAB3NzaC1yc2EAAAABIwAAAQEAsuGLw70et3eApJ/ZEJkAVZogIrm" "EYPQJvb1ll52Ti0nr80Wztxibaa8bYGzY22rQIAloIlePeTGcJceAYK" "PZgm0I/Mp2EUGg2NVsQZIzasz6cW0vYuiUbF9GkVlROmvOAykT58RfM" "L8RhPrjrQxZc+NXgZtgDugYSZcXHDLUyWM1xKUoYy0MqYG6ZXCC/Zno" "RThhmjOJgEmvwrMcTWQjmzH3NeJAxaBsEHR8tiVZ/Y23C/ULWLyNT6R" "fB+DE7IovsMQaS+83AK1Teg7RWNyQczachatf/JT8VjUqFYjJepPjMb" "vYdB2nQds7/+Bf40C/OpbvnAxna1kVtgFHAo18cQ==") class FakeConfig: """Fake configuration object""" def IsCluster(self): return True def GetNodeList(self): return ["a", "b", "c"] def GetRsaHostKey(self): return FAKE_CLUSTER_KEY def GetDsaHostKey(self): return FAKE_CLUSTER_KEY def GetClusterName(self): return "test.cluster" def GetMasterNode(self): return "a" def GetMasterNodeName(self): return netutils.Hostname.GetSysName() def GetDefaultIAllocator(Self): return "testallocator" def GetNodeName(self, node_uuid): if node_uuid in self.GetNodeList(): return "node_%s.example.com" % (node_uuid,) else: return None def GetNodeNames(self, node_uuids): return map(self.GetNodeName, node_uuids) class FakeProc: """Fake processor object""" def Log(self, msg, *args, **kwargs): pass def LogWarning(self, msg, *args, **kwargs): pass def LogInfo(self, msg, *args, **kwargs): pass def LogStep(self, current, total, message): pass class FakeGLM: """Fake global lock manager object""" def list_owned(self, level): return set() class FakeContext: """Fake context object""" def __init__(self): self.cfg = FakeConfig() self.glm = FakeGLM() class FakeGetentResolver: """Fake runtime.GetentResolver""" def __init__(self): # As we nomally don't run under root we use our own uid/gid for all # fields. This way we don't run into permission denied problems. uid = os.getuid() gid = os.getgid() self.masterd_uid = uid self.masterd_gid = gid self.confd_uid = uid self.confd_gid = gid self.rapi_uid = uid self.rapi_gid = gid self.noded_uid = uid self.noded_gid = gid self.daemons_gid = gid self.admin_gid = gid def LookupUid(self, uid): return "user%s" % uid def LookupGid(self, gid): return "group%s" % gid ganeti-2.9.3/test/py/ganeti.masterd.instance_unittest.py0000744000000000000000000001323412244641676023442 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.masterd.instance""" import os import sys import unittest from ganeti import constants from ganeti import errors from ganeti import utils from ganeti import masterd from ganeti.masterd.instance import \ ImportExportTimeouts, _DiskImportExportBase, \ ComputeRemoteExportHandshake, CheckRemoteExportHandshake, \ ComputeRemoteImportDiskInfo, CheckRemoteExportDiskInfo, \ FormatProgress import testutils class TestMisc(unittest.TestCase): def testTimeouts(self): tmo = ImportExportTimeouts(0) self.assertEqual(tmo.connect, 0) self.assertEqual(tmo.listen, ImportExportTimeouts.DEFAULT_LISTEN_TIMEOUT) self.assertEqual(tmo.ready, ImportExportTimeouts.DEFAULT_READY_TIMEOUT) self.assertEqual(tmo.error, ImportExportTimeouts.DEFAULT_ERROR_TIMEOUT) self.assertEqual(tmo.progress, ImportExportTimeouts.DEFAULT_PROGRESS_INTERVAL) tmo = ImportExportTimeouts(999) self.assertEqual(tmo.connect, 999) tmo = ImportExportTimeouts(1, listen=2, error=3, ready=4, progress=5) self.assertEqual(tmo.connect, 1) self.assertEqual(tmo.listen, 2) self.assertEqual(tmo.error, 3) self.assertEqual(tmo.ready, 4) self.assertEqual(tmo.progress, 5) def testTimeoutExpired(self): self.assert_(utils.TimeoutExpired(100, 300, _time_fn=lambda: 500)) self.assertFalse(utils.TimeoutExpired(100, 300, _time_fn=lambda: 0)) self.assertFalse(utils.TimeoutExpired(100, 300, _time_fn=lambda: 100)) self.assertFalse(utils.TimeoutExpired(100, 300, _time_fn=lambda: 400)) def testDiskImportExportBaseDirect(self): self.assertRaises(AssertionError, _DiskImportExportBase, None, None, None, None, None, None, None) class TestRieHandshake(unittest.TestCase): def test(self): cds = "cd-secret" hs = ComputeRemoteExportHandshake(cds) self.assertEqual(len(hs), 3) self.assertEqual(hs[0], constants.RIE_VERSION) self.assertEqual(CheckRemoteExportHandshake(cds, hs), None) def testCheckErrors(self): self.assert_(CheckRemoteExportHandshake(None, None)) self.assert_(CheckRemoteExportHandshake("", "")) self.assert_(CheckRemoteExportHandshake("", ("xyz", "foo"))) def testCheckWrongHash(self): cds = "cd-secret999" self.assert_(CheckRemoteExportHandshake(cds, (0, "fakehash", "xyz"))) def testCheckWrongVersion(self): version = 14887 self.assertNotEqual(version, constants.RIE_VERSION) cds = "c28ac99" salt = "a19cf8cc06" msg = "%s:%s" % (version, constants.RIE_HANDSHAKE) hs = (version, utils.Sha1Hmac(cds, msg, salt=salt), salt) self.assert_(CheckRemoteExportHandshake(cds, hs)) class TestRieDiskInfo(unittest.TestCase): def test(self): cds = "bbf46ea9a" salt = "ee5ad9" di = ComputeRemoteImportDiskInfo(cds, salt, 0, "node1", 1234, "mag111") self.assertEqual(CheckRemoteExportDiskInfo(cds, 0, di), ("node1", 1234, "mag111")) for i in range(1, 100): # Wrong disk index self.assertRaises(errors.GenericError, CheckRemoteExportDiskInfo, cds, i, di) def testInvalidHostPort(self): cds = "3ZoJY8KtGJ" salt = "drK5oYiHWD" for host in [",", "...", "Hello World", "`", "!", "#", "\\"]: di = ComputeRemoteImportDiskInfo(cds, salt, 0, host, 1234, "magic") self.assertRaises(errors.OpPrereqError, CheckRemoteExportDiskInfo, cds, 0, di) for port in [-1, 792825908, "HelloWorld!", "`#", "\\\"", "_?_"]: di = ComputeRemoteImportDiskInfo(cds, salt, 0, "localhost", port, "magic") self.assertRaises(errors.OpPrereqError, CheckRemoteExportDiskInfo, cds, 0, di) def testCheckErrors(self): cds = "0776450535a" self.assertRaises(errors.GenericError, CheckRemoteExportDiskInfo, cds, 0, "") self.assertRaises(errors.GenericError, CheckRemoteExportDiskInfo, cds, 0, ()) self.assertRaises(errors.GenericError, CheckRemoteExportDiskInfo, cds, 0, ("", 1, 2, 3, 4, 5)) # No host/port self.assertRaises(errors.GenericError, CheckRemoteExportDiskInfo, cds, 0, ("", 1234, "magic", "", "")) self.assertRaises(errors.GenericError, CheckRemoteExportDiskInfo, cds, 0, ("host", 0, "magic", "", "")) self.assertRaises(errors.GenericError, CheckRemoteExportDiskInfo, cds, 0, ("host", 1234, "", "", "")) # Wrong hash self.assertRaises(errors.GenericError, CheckRemoteExportDiskInfo, cds, 0, ("nodeX", 123, "magic", "fakehash", "xyz")) class TestFormatProgress(unittest.TestCase): def test(self): FormatProgress((0, 0, None, None)) FormatProgress((100, 3.3, 30, None)) FormatProgress((100, 3.3, 30, 900)) self.assertEqual(FormatProgress((1500, 12, 30, None)), "1.5G, 12.0 MiB/s, 30%") if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.tools.prepare_node_join_unittest.py0000744000000000000000000002135712244641676025026 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.tools.prepare_node_join""" import unittest import shutil import tempfile import os.path import OpenSSL from ganeti import errors from ganeti import constants from ganeti import serializer from ganeti import pathutils from ganeti import compat from ganeti import utils from ganeti.tools import prepare_node_join import testutils _JoinError = prepare_node_join.JoinError class TestLoadData(unittest.TestCase): def testNoJson(self): self.assertRaises(errors.ParseError, prepare_node_join.LoadData, "") self.assertRaises(errors.ParseError, prepare_node_join.LoadData, "}") def testInvalidDataStructure(self): raw = serializer.DumpJson({ "some other thing": False, }) self.assertRaises(errors.ParseError, prepare_node_join.LoadData, raw) raw = serializer.DumpJson([]) self.assertRaises(errors.ParseError, prepare_node_join.LoadData, raw) def testValidData(self): raw = serializer.DumpJson({}) self.assertEqual(prepare_node_join.LoadData(raw), {}) class TestVerifyCertificate(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) self.tmpdir = tempfile.mkdtemp() def tearDown(self): testutils.GanetiTestCase.tearDown(self) shutil.rmtree(self.tmpdir) def testNoCert(self): prepare_node_join.VerifyCertificate({}, _verify_fn=NotImplemented) def testGivenPrivateKey(self): cert_filename = testutils.TestDataFilename("cert2.pem") cert_pem = utils.ReadFile(cert_filename) self.assertRaises(_JoinError, prepare_node_join._VerifyCertificate, cert_pem, _check_fn=NotImplemented) def testInvalidCertificate(self): self.assertRaises(errors.X509CertError, prepare_node_join._VerifyCertificate, "Something that's not a certificate", _check_fn=NotImplemented) @staticmethod def _Check(cert): assert cert.get_subject() def testSuccessfulCheck(self): cert_filename = testutils.TestDataFilename("cert1.pem") cert_pem = utils.ReadFile(cert_filename) prepare_node_join._VerifyCertificate(cert_pem, _check_fn=self._Check) class TestVerifyClusterName(unittest.TestCase): def setUp(self): unittest.TestCase.setUp(self) self.tmpdir = tempfile.mkdtemp() def tearDown(self): unittest.TestCase.tearDown(self) shutil.rmtree(self.tmpdir) def testNoName(self): self.assertRaises(_JoinError, prepare_node_join.VerifyClusterName, {}, _verify_fn=NotImplemented) @staticmethod def _FailingVerify(name): assert name == "cluster.example.com" raise errors.GenericError() def testFailingVerification(self): data = { constants.SSHS_CLUSTER_NAME: "cluster.example.com", } self.assertRaises(errors.GenericError, prepare_node_join.VerifyClusterName, data, _verify_fn=self._FailingVerify) class TestUpdateSshDaemon(unittest.TestCase): def setUp(self): unittest.TestCase.setUp(self) self.tmpdir = tempfile.mkdtemp() self.keyfiles = { constants.SSHK_RSA: (utils.PathJoin(self.tmpdir, "rsa.private"), utils.PathJoin(self.tmpdir, "rsa.public")), constants.SSHK_DSA: (utils.PathJoin(self.tmpdir, "dsa.private"), utils.PathJoin(self.tmpdir, "dsa.public")), } def tearDown(self): unittest.TestCase.tearDown(self) shutil.rmtree(self.tmpdir) def testNoKeys(self): data_empty_keys = { constants.SSHS_SSH_HOST_KEY: [], } for data in [{}, data_empty_keys]: for dry_run in [False, True]: prepare_node_join.UpdateSshDaemon(data, dry_run, _runcmd_fn=NotImplemented, _keyfiles=NotImplemented) self.assertEqual(os.listdir(self.tmpdir), []) def _TestDryRun(self, data): prepare_node_join.UpdateSshDaemon(data, True, _runcmd_fn=NotImplemented, _keyfiles=self.keyfiles) self.assertEqual(os.listdir(self.tmpdir), []) def testDryRunRsa(self): self._TestDryRun({ constants.SSHS_SSH_HOST_KEY: [ (constants.SSHK_RSA, "rsapriv", "rsapub"), ], }) def testDryRunDsa(self): self._TestDryRun({ constants.SSHS_SSH_HOST_KEY: [ (constants.SSHK_DSA, "dsapriv", "dsapub"), ], }) def _RunCmd(self, fail, cmd, interactive=NotImplemented): self.assertTrue(interactive) self.assertEqual(cmd, [pathutils.DAEMON_UTIL, "reload-ssh-keys"]) if fail: exit_code = constants.EXIT_FAILURE else: exit_code = constants.EXIT_SUCCESS return utils.RunResult(exit_code, None, "stdout", "stderr", utils.ShellQuoteArgs(cmd), NotImplemented, NotImplemented) def _TestUpdate(self, failcmd): data = { constants.SSHS_SSH_HOST_KEY: [ (constants.SSHK_DSA, "dsapriv", "dsapub"), (constants.SSHK_RSA, "rsapriv", "rsapub"), ], } runcmd_fn = compat.partial(self._RunCmd, failcmd) if failcmd: self.assertRaises(_JoinError, prepare_node_join.UpdateSshDaemon, data, False, _runcmd_fn=runcmd_fn, _keyfiles=self.keyfiles) else: prepare_node_join.UpdateSshDaemon(data, False, _runcmd_fn=runcmd_fn, _keyfiles=self.keyfiles) self.assertEqual(sorted(os.listdir(self.tmpdir)), sorted([ "rsa.public", "rsa.private", "dsa.public", "dsa.private", ])) self.assertEqual(utils.ReadFile(utils.PathJoin(self.tmpdir, "rsa.public")), "rsapub") self.assertEqual(utils.ReadFile(utils.PathJoin(self.tmpdir, "rsa.private")), "rsapriv") self.assertEqual(utils.ReadFile(utils.PathJoin(self.tmpdir, "dsa.public")), "dsapub") self.assertEqual(utils.ReadFile(utils.PathJoin(self.tmpdir, "dsa.private")), "dsapriv") def testSuccess(self): self._TestUpdate(False) def testFailure(self): self._TestUpdate(True) class TestUpdateSshRoot(unittest.TestCase): def setUp(self): unittest.TestCase.setUp(self) self.tmpdir = tempfile.mkdtemp() self.sshdir = utils.PathJoin(self.tmpdir, ".ssh") def tearDown(self): unittest.TestCase.tearDown(self) shutil.rmtree(self.tmpdir) def _GetHomeDir(self, user): self.assertEqual(user, constants.SSH_LOGIN_USER) return self.tmpdir def testNoKeys(self): data_empty_keys = { constants.SSHS_SSH_ROOT_KEY: [], } for data in [{}, data_empty_keys]: for dry_run in [False, True]: prepare_node_join.UpdateSshRoot(data, dry_run, _homedir_fn=NotImplemented) self.assertEqual(os.listdir(self.tmpdir), []) def testDryRun(self): data = { constants.SSHS_SSH_ROOT_KEY: [ (constants.SSHK_RSA, "aaa", "bbb"), ] } prepare_node_join.UpdateSshRoot(data, True, _homedir_fn=self._GetHomeDir) self.assertEqual(os.listdir(self.tmpdir), [".ssh"]) self.assertEqual(os.listdir(self.sshdir), []) def testUpdate(self): data = { constants.SSHS_SSH_ROOT_KEY: [ (constants.SSHK_DSA, "privatedsa", "ssh-dss pubdsa"), ] } prepare_node_join.UpdateSshRoot(data, False, _homedir_fn=self._GetHomeDir) self.assertEqual(os.listdir(self.tmpdir), [".ssh"]) self.assertEqual(sorted(os.listdir(self.sshdir)), sorted(["authorized_keys", "id_dsa", "id_dsa.pub"])) self.assertEqual(utils.ReadFile(utils.PathJoin(self.sshdir, "id_dsa")), "privatedsa") self.assertEqual(utils.ReadFile(utils.PathJoin(self.sshdir, "id_dsa.pub")), "ssh-dss pubdsa") self.assertEqual(utils.ReadFile(utils.PathJoin(self.sshdir, "authorized_keys")), "ssh-dss pubdsa\n") if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.masterd.iallocator_unittest.py0000744000000000000000000001540212271422343023752 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.masterd.iallocator""" import unittest from ganeti import compat from ganeti import constants from ganeti import errors from ganeti import objects from ganeti import ht from ganeti.masterd import iallocator import testutils class _StubIAllocator(object): def __init__(self, success): self.success = success class TestIAReqMultiInstanceAlloc(unittest.TestCase): def testResult(self): good_results = [ # First result (all instances "allocate") [ [["foo", ["a", "b"]], ["bar", ["c"]], ["baz", []]], [] ], # Second result (partial "allocate", partial "fail") [ [["bar", ["c", "b"]], ["baz", ["a"]]], ["foo"] ], # Third result (all instances "fail") [ [], ["foo", "bar", "baz"] ], ] bad_results = [ "foobar", 1234, [], [[]], [[], [], []], ] result_fn = iallocator.IAReqMultiInstanceAlloc.REQ_RESULT self.assertTrue(compat.all(map(result_fn, good_results))) self.assertFalse(compat.any(map(result_fn, bad_results))) class TestIARequestBase(unittest.TestCase): def testValidateResult(self): class _StubReqBase(iallocator.IARequestBase): MODE = constants.IALLOCATOR_MODE_ALLOC REQ_RESULT = ht.TBool stub = _StubReqBase() stub.ValidateResult(_StubIAllocator(True), True) self.assertRaises(errors.ResultValidationError, stub.ValidateResult, _StubIAllocator(True), "foo") stub.ValidateResult(_StubIAllocator(False), True) # We don't validate the result if the iallocation request was not successful stub.ValidateResult(_StubIAllocator(False), "foo") class _FakeConfigWithNdParams: def GetNdParams(self, _): return None class TestComputeBasicNodeData(unittest.TestCase): def setUp(self): self.fn = compat.partial(iallocator.IAllocator._ComputeBasicNodeData, _FakeConfigWithNdParams()) def testEmpty(self): self.assertEqual(self.fn({}, None), {}) def testSimple(self): node1 = objects.Node(name="node1", primary_ip="192.0.2.1", secondary_ip="192.0.2.2", offline=False, drained=False, master_candidate=True, master_capable=True, group="11112222", vm_capable=False) node2 = objects.Node(name="node2", primary_ip="192.0.2.3", secondary_ip="192.0.2.4", offline=True, drained=False, master_candidate=False, master_capable=False, group="11112222", vm_capable=True) assert node1 != node2 ninfo = { "#unused-1#": node1, "#unused-2#": node2, } self.assertEqual(self.fn(ninfo, None), { "node1": { "tags": [], "primary_ip": "192.0.2.1", "secondary_ip": "192.0.2.2", "offline": False, "drained": False, "master_candidate": True, "group": "11112222", "master_capable": True, "vm_capable": False, "ndparams": None, }, "node2": { "tags": [], "primary_ip": "192.0.2.3", "secondary_ip": "192.0.2.4", "offline": True, "drained": False, "master_candidate": False, "group": "11112222", "master_capable": False, "vm_capable": True, "ndparams": None, }, }) def testOfflineNode(self): for whitelist in [None, [], set(), ["node1"], ["node2"]]: result = self.fn({ "node1": objects.Node(name="node1", offline=True) }, whitelist) self.assertEqual(len(result), 1) self.assertTrue(result["node1"]["offline"]) def testWhitelist(self): for whitelist in [None, [], set(), ["node1"], ["node2"]]: result = self.fn({ "node1": objects.Node(name="node1", offline=False) }, whitelist) self.assertEqual(len(result), 1) if whitelist is None or "node1" in whitelist: self.assertFalse(result["node1"]["offline"]) else: self.assertTrue(result["node1"]["offline"]) class TestProcessStorageInfo(unittest.TestCase): def setUp(self): self.free_storage_file = 23 self.total_storage_file = 42 self.free_storage_lvm = 69 self.total_storage_lvm = 666 self.space_info = [{"name": "mynode", "type": constants.ST_FILE, "storage_free": self.free_storage_file, "storage_size": self.total_storage_file}, {"name": "mynode", "type": constants.ST_LVM_VG, "storage_free": self.free_storage_lvm, "storage_size": self.total_storage_lvm}, {"name": "mynode", "type": constants.ST_LVM_PV, "storage_free": 33, "storage_size": 44}] def testComputeStorageDataFromNodeInfoDefault(self): has_lvm = False node_name = "mynode" (total_disk, free_disk, total_spindles, free_spindles) = \ iallocator.IAllocator._ComputeStorageDataFromSpaceInfo( self.space_info, node_name, has_lvm) # FIXME: right now, iallocator ignores anything else than LVM, adjust # this test once that arbitrary storage is supported self.assertEqual(0, free_disk) self.assertEqual(0, total_disk) def testComputeStorageDataFromNodeInfoLvm(self): has_lvm = True node_name = "mynode" (total_disk, free_disk, total_spindles, free_spindles) = \ iallocator.IAllocator._ComputeStorageDataFromSpaceInfo( self.space_info, node_name, has_lvm) self.assertEqual(self.free_storage_lvm, free_disk) self.assertEqual(self.total_storage_lvm, total_disk) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.workerpool_unittest.py0000744000000000000000000004704212244641676022407 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2008, 2009, 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the workerpool module""" import unittest import threading import time import sys import zlib import random from ganeti import workerpool from ganeti import errors from ganeti import utils from ganeti import compat import testutils class CountingContext(object): def __init__(self): self._lock = threading.Condition(threading.Lock()) self.done = 0 def DoneTask(self): self._lock.acquire() try: self.done += 1 finally: self._lock.release() def GetDoneTasks(self): self._lock.acquire() try: return self.done finally: self._lock.release() @staticmethod def UpdateChecksum(current, value): return zlib.adler32(str(value), current) class CountingBaseWorker(workerpool.BaseWorker): def RunTask(self, ctx, text): ctx.DoneTask() class ChecksumContext: CHECKSUM_START = zlib.adler32("") def __init__(self): self.lock = threading.Condition(threading.Lock()) self.checksum = self.CHECKSUM_START @staticmethod def UpdateChecksum(current, value): return zlib.adler32(str(value), current) class ChecksumBaseWorker(workerpool.BaseWorker): def RunTask(self, ctx, number): name = "number%s" % number self.SetTaskName(name) # This assertion needs to be checked before updating the checksum. A # failing assertion will then cause the result to be wrong. assert self.getName() == ("%s/%s" % (self._worker_id, name)) ctx.lock.acquire() try: ctx.checksum = ctx.UpdateChecksum(ctx.checksum, number) finally: ctx.lock.release() class ListBuilderContext: def __init__(self): self.lock = threading.Lock() self.result = [] self.prioresult = {} class ListBuilderWorker(workerpool.BaseWorker): def RunTask(self, ctx, data): ctx.lock.acquire() try: ctx.result.append((self.GetCurrentPriority(), data)) ctx.prioresult.setdefault(self.GetCurrentPriority(), []).append(data) finally: ctx.lock.release() class DeferringTaskContext: def __init__(self): self.lock = threading.Lock() self.prioresult = {} self.samepriodefer = {} self.num2ordertaskid = {} class DeferringWorker(workerpool.BaseWorker): def RunTask(self, ctx, num, targetprio): ctx.lock.acquire() try: otilst = ctx.num2ordertaskid.setdefault(num, []) otilst.append(self._GetCurrentOrderAndTaskId()) if num in ctx.samepriodefer: del ctx.samepriodefer[num] raise workerpool.DeferTask() if self.GetCurrentPriority() > targetprio: raise workerpool.DeferTask(priority=self.GetCurrentPriority() - 1) ctx.prioresult.setdefault(self.GetCurrentPriority(), set()).add(num) finally: ctx.lock.release() class PriorityContext: def __init__(self): self.lock = threading.Lock() self.result = [] class PriorityWorker(workerpool.BaseWorker): def RunTask(self, ctx, data): ctx.lock.acquire() try: ctx.result.append((self.GetCurrentPriority(), data)) finally: ctx.lock.release() class NotImplementedWorker(workerpool.BaseWorker): def RunTask(self): raise NotImplementedError class TestWorkerpool(unittest.TestCase): """Workerpool tests""" def testCounting(self): ctx = CountingContext() wp = workerpool.WorkerPool("Test", 3, CountingBaseWorker) try: self._CheckWorkerCount(wp, 3) for i in range(10): wp.AddTask((ctx, "Hello world %s" % i)) wp.Quiesce() finally: wp.TerminateWorkers() self._CheckWorkerCount(wp, 0) self.assertEquals(ctx.GetDoneTasks(), 10) def testNoTasks(self): wp = workerpool.WorkerPool("Test", 3, CountingBaseWorker) try: self._CheckWorkerCount(wp, 3) self._CheckNoTasks(wp) finally: wp.TerminateWorkers() self._CheckWorkerCount(wp, 0) def testNoTasksQuiesce(self): wp = workerpool.WorkerPool("Test", 3, CountingBaseWorker) try: self._CheckWorkerCount(wp, 3) self._CheckNoTasks(wp) wp.Quiesce() self._CheckNoTasks(wp) finally: wp.TerminateWorkers() self._CheckWorkerCount(wp, 0) def testActive(self): ctx = CountingContext() wp = workerpool.WorkerPool("TestActive", 5, CountingBaseWorker) try: self._CheckWorkerCount(wp, 5) self.assertTrue(wp._active) # Process some tasks for _ in range(10): wp.AddTask((ctx, None)) wp.Quiesce() self._CheckNoTasks(wp) self.assertEquals(ctx.GetDoneTasks(), 10) # Repeat a few times for count in range(10): # Deactivate pool wp.SetActive(False) self._CheckNoTasks(wp) # Queue some more tasks for _ in range(10): wp.AddTask((ctx, None)) for _ in range(5): # Short delays to give other threads a chance to cause breakage time.sleep(.01) wp.AddTask((ctx, "Hello world %s" % 999)) self.assertFalse(wp._active) self.assertEquals(ctx.GetDoneTasks(), 10 + (count * 15)) # Start processing again wp.SetActive(True) self.assertTrue(wp._active) # Wait for tasks to finish wp.Quiesce() self._CheckNoTasks(wp) self.assertEquals(ctx.GetDoneTasks(), 10 + (count * 15) + 15) self._CheckWorkerCount(wp, 5) finally: wp.TerminateWorkers() self._CheckWorkerCount(wp, 0) def testChecksum(self): # Tests whether all tasks are run and, since we're only using a single # thread, whether everything is started in order. wp = workerpool.WorkerPool("Test", 1, ChecksumBaseWorker) try: self._CheckWorkerCount(wp, 1) ctx = ChecksumContext() checksum = ChecksumContext.CHECKSUM_START for i in range(1, 100): checksum = ChecksumContext.UpdateChecksum(checksum, i) wp.AddTask((ctx, i)) wp.Quiesce() self._CheckNoTasks(wp) # Check sum ctx.lock.acquire() try: self.assertEqual(checksum, ctx.checksum) finally: ctx.lock.release() finally: wp.TerminateWorkers() self._CheckWorkerCount(wp, 0) def testAddManyTasks(self): ctx = CountingContext() wp = workerpool.WorkerPool("Test", 3, CountingBaseWorker) try: self._CheckWorkerCount(wp, 3) wp.AddManyTasks([(ctx, "Hello world %s" % i, ) for i in range(10)]) wp.AddTask((ctx, "A separate hello")) wp.AddTask((ctx, "Once more, hi!")) wp.AddManyTasks([(ctx, "Hello world %s" % i, ) for i in range(10)]) wp.Quiesce() self._CheckNoTasks(wp) finally: wp.TerminateWorkers() self._CheckWorkerCount(wp, 0) self.assertEquals(ctx.GetDoneTasks(), 22) def testManyTasksSequence(self): ctx = CountingContext() wp = workerpool.WorkerPool("Test", 3, CountingBaseWorker) try: self._CheckWorkerCount(wp, 3) self.assertRaises(AssertionError, wp.AddManyTasks, ["Hello world %s" % i for i in range(10)]) self.assertRaises(AssertionError, wp.AddManyTasks, [i for i in range(10)]) self.assertRaises(AssertionError, wp.AddManyTasks, [], task_id=0) wp.AddManyTasks([(ctx, "Hello world %s" % i, ) for i in range(10)]) wp.AddTask((ctx, "A separate hello")) wp.Quiesce() self._CheckNoTasks(wp) finally: wp.TerminateWorkers() self._CheckWorkerCount(wp, 0) self.assertEquals(ctx.GetDoneTasks(), 11) def _CheckNoTasks(self, wp): wp._lock.acquire() try: # The task queue must be empty now self.assertFalse(wp._tasks) self.assertFalse(wp._taskdata) finally: wp._lock.release() def _CheckWorkerCount(self, wp, num_workers): wp._lock.acquire() try: self.assertEqual(len(wp._workers), num_workers) finally: wp._lock.release() def testPriorityChecksum(self): # Tests whether all tasks are run and, since we're only using a single # thread, whether everything is started in order and respects the priority wp = workerpool.WorkerPool("Test", 1, ChecksumBaseWorker) try: self._CheckWorkerCount(wp, 1) ctx = ChecksumContext() data = {} tasks = [] priorities = [] for i in range(1, 333): prio = i % 7 tasks.append((ctx, i)) priorities.append(prio) data.setdefault(prio, []).append(i) wp.AddManyTasks(tasks, priority=priorities) wp.Quiesce() self._CheckNoTasks(wp) # Check sum ctx.lock.acquire() try: checksum = ChecksumContext.CHECKSUM_START for priority in sorted(data.keys()): for i in data[priority]: checksum = ChecksumContext.UpdateChecksum(checksum, i) self.assertEqual(checksum, ctx.checksum) finally: ctx.lock.release() self._CheckWorkerCount(wp, 1) finally: wp.TerminateWorkers() self._CheckWorkerCount(wp, 0) def testPriorityListManyTasks(self): # Tests whether all tasks are run and, since we're only using a single # thread, whether everything is started in order and respects the priority wp = workerpool.WorkerPool("Test", 1, ListBuilderWorker) try: self._CheckWorkerCount(wp, 1) ctx = ListBuilderContext() # Use static seed for this test rnd = random.Random(0) data = {} tasks = [] priorities = [] for i in range(1, 333): prio = int(rnd.random() * 10) tasks.append((ctx, i)) priorities.append(prio) data.setdefault(prio, []).append((prio, i)) wp.AddManyTasks(tasks, priority=priorities) self.assertRaises(errors.ProgrammerError, wp.AddManyTasks, [("x", ), ("y", )], priority=[1] * 5) self.assertRaises(errors.ProgrammerError, wp.AddManyTasks, [("x", ), ("y", )], task_id=[1] * 5) wp.Quiesce() self._CheckNoTasks(wp) # Check result ctx.lock.acquire() try: expresult = [] for priority in sorted(data.keys()): expresult.extend(data[priority]) self.assertEqual(expresult, ctx.result) finally: ctx.lock.release() self._CheckWorkerCount(wp, 1) finally: wp.TerminateWorkers() self._CheckWorkerCount(wp, 0) def testPriorityListSingleTasks(self): # Tests whether all tasks are run and, since we're only using a single # thread, whether everything is started in order and respects the priority wp = workerpool.WorkerPool("Test", 1, ListBuilderWorker) try: self._CheckWorkerCount(wp, 1) ctx = ListBuilderContext() # Use static seed for this test rnd = random.Random(26279) data = {} for i in range(1, 333): prio = int(rnd.random() * 30) wp.AddTask((ctx, i), priority=prio) data.setdefault(prio, []).append(i) # Cause some distortion if i % 11 == 0: time.sleep(.001) if i % 41 == 0: wp.Quiesce() wp.Quiesce() self._CheckNoTasks(wp) # Check result ctx.lock.acquire() try: self.assertEqual(data, ctx.prioresult) finally: ctx.lock.release() self._CheckWorkerCount(wp, 1) finally: wp.TerminateWorkers() self._CheckWorkerCount(wp, 0) def testDeferTask(self): # Tests whether all tasks are run and, since we're only using a single # thread, whether everything is started in order and respects the priority wp = workerpool.WorkerPool("Test", 1, DeferringWorker) try: self._CheckWorkerCount(wp, 1) ctx = DeferringTaskContext() # Use static seed for this test rnd = random.Random(14921) data = {} num2taskid = {} for i in range(1, 333): ctx.lock.acquire() try: if i % 5 == 0: ctx.samepriodefer[i] = True finally: ctx.lock.release() prio = int(rnd.random() * 30) num2taskid[i] = 1000 * i wp.AddTask((ctx, i, prio), priority=50, task_id=num2taskid[i]) data.setdefault(prio, set()).add(i) # Cause some distortion if i % 24 == 0: time.sleep(.001) if i % 31 == 0: wp.Quiesce() wp.Quiesce() self._CheckNoTasks(wp) # Check result ctx.lock.acquire() try: self.assertEqual(data, ctx.prioresult) all_order_ids = [] for (num, numordertaskid) in ctx.num2ordertaskid.items(): order_ids = map(compat.fst, numordertaskid) self.assertFalse(utils.FindDuplicates(order_ids), msg="Order ID has been reused") all_order_ids.extend(order_ids) for task_id in map(compat.snd, numordertaskid): self.assertEqual(task_id, num2taskid[num], msg=("Task %s used different task IDs" % num)) self.assertFalse(utils.FindDuplicates(all_order_ids), msg="Order ID has been reused") finally: ctx.lock.release() self._CheckWorkerCount(wp, 1) finally: wp.TerminateWorkers() self._CheckWorkerCount(wp, 0) def testChangeTaskPriority(self): wp = workerpool.WorkerPool("Test", 1, PriorityWorker) try: self._CheckWorkerCount(wp, 1) ctx = PriorityContext() # Use static seed for this test rnd = random.Random(4727) # Disable processing of tasks wp.SetActive(False) # No task ID self.assertRaises(workerpool.NoSuchTask, wp.ChangeTaskPriority, None, 0) # Pre-generate task IDs and priorities count = 100 task_ids = range(0, count) priorities = range(200, 200 + count) * 2 rnd.shuffle(task_ids) rnd.shuffle(priorities) # Make sure there are some duplicate priorities, but not all priorities[count * 2 - 10:count * 2 - 1] = \ priorities[count - 10: count - 1] assert len(priorities) == 2 * count assert priorities[0:(count - 1)] != priorities[count:(2 * count - 1)] # Add some tasks; this loop consumes the first half of all previously # generated priorities for (idx, task_id) in enumerate(task_ids): wp.AddTask((ctx, idx), priority=priorities.pop(), task_id=task_id) self.assertEqual(len(wp._tasks), len(task_ids)) self.assertEqual(len(wp._taskdata), len(task_ids)) # Tasks have been added, so half of the priorities should have been # consumed assert len(priorities) == len(task_ids) # Change task priority expected = [] for ((idx, task_id), prio) in zip(enumerate(task_ids), priorities): wp.ChangeTaskPriority(task_id, prio) expected.append((prio, idx)) self.assertEqual(len(wp._taskdata), len(task_ids)) # Half the entries are now abandoned tasks self.assertEqual(len(wp._tasks), len(task_ids) * 2) assert len(priorities) == count assert len(task_ids) == count # Start processing wp.SetActive(True) # Wait for tasks to finish wp.Quiesce() self._CheckNoTasks(wp) for task_id in task_ids: # All tasks are done self.assertRaises(workerpool.NoSuchTask, wp.ChangeTaskPriority, task_id, 0) # Check result ctx.lock.acquire() try: self.assertEqual(ctx.result, sorted(expected)) finally: ctx.lock.release() self._CheckWorkerCount(wp, 1) finally: wp.TerminateWorkers() self._CheckWorkerCount(wp, 0) def testChangeTaskPriorityInteralStructures(self): wp = workerpool.WorkerPool("Test", 1, NotImplementedWorker) try: self._CheckWorkerCount(wp, 1) # Use static seed for this test rnd = random.Random(643) (num1, num2) = rnd.sample(range(1000), 2) # Disable processing of tasks wp.SetActive(False) self.assertFalse(wp._tasks) self.assertFalse(wp._taskdata) # No priority or task ID wp.AddTask(()) self.assertEqual(wp._tasks, [ [workerpool._DEFAULT_PRIORITY, 0, None, ()], ]) self.assertFalse(wp._taskdata) # No task ID wp.AddTask((), priority=7413) self.assertEqual(wp._tasks, [ [workerpool._DEFAULT_PRIORITY, 0, None, ()], [7413, 1, None, ()], ]) self.assertFalse(wp._taskdata) # Start adding real tasks wp.AddTask((), priority=10267659, task_id=num1) self.assertEqual(wp._tasks, [ [workerpool._DEFAULT_PRIORITY, 0, None, ()], [7413, 1, None, ()], [10267659, 2, num1, ()], ]) self.assertEqual(wp._taskdata, { num1: [10267659, 2, num1, ()], }) wp.AddTask((), priority=123, task_id=num2) self.assertEqual(sorted(wp._tasks), [ [workerpool._DEFAULT_PRIORITY, 0, None, ()], [123, 3, num2, ()], [7413, 1, None, ()], [10267659, 2, num1, ()], ]) self.assertEqual(wp._taskdata, { num1: [10267659, 2, num1, ()], num2: [123, 3, num2, ()], }) wp.ChangeTaskPriority(num1, 100) self.assertEqual(sorted(wp._tasks), [ [workerpool._DEFAULT_PRIORITY, 0, None, ()], [100, 2, num1, ()], [123, 3, num2, ()], [7413, 1, None, ()], [10267659, 2, num1, None], ]) self.assertEqual(wp._taskdata, { num1: [100, 2, num1, ()], num2: [123, 3, num2, ()], }) wp.ChangeTaskPriority(num2, 91337) self.assertEqual(sorted(wp._tasks), [ [workerpool._DEFAULT_PRIORITY, 0, None, ()], [100, 2, num1, ()], [123, 3, num2, None], [7413, 1, None, ()], [91337, 3, num2, ()], [10267659, 2, num1, None], ]) self.assertEqual(wp._taskdata, { num1: [100, 2, num1, ()], num2: [91337, 3, num2, ()], }) wp.ChangeTaskPriority(num1, 10139) self.assertEqual(sorted(wp._tasks), [ [workerpool._DEFAULT_PRIORITY, 0, None, ()], [100, 2, num1, None], [123, 3, num2, None], [7413, 1, None, ()], [10139, 2, num1, ()], [91337, 3, num2, ()], [10267659, 2, num1, None], ]) self.assertEqual(wp._taskdata, { num1: [10139, 2, num1, ()], num2: [91337, 3, num2, ()], }) # Change to the same priority once again wp.ChangeTaskPriority(num1, 10139) self.assertEqual(sorted(wp._tasks), [ [workerpool._DEFAULT_PRIORITY, 0, None, ()], [100, 2, num1, None], [123, 3, num2, None], [7413, 1, None, ()], [10139, 2, num1, None], [10139, 2, num1, ()], [91337, 3, num2, ()], [10267659, 2, num1, None], ]) self.assertEqual(wp._taskdata, { num1: [10139, 2, num1, ()], num2: [91337, 3, num2, ()], }) self._CheckWorkerCount(wp, 1) finally: wp.TerminateWorkers() self._CheckWorkerCount(wp, 0) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/testutils.py0000644000000000000000000001616712271422343017026 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utilities for unit testing""" import os import sys import stat import tempfile import unittest import logging import types from ganeti import utils def GetSourceDir(): return os.environ.get("TOP_SRCDIR", ".") def TestDataFilename(name): """Returns the filename of a given test data file. @type name: str @param name: the 'base' of the file name, as present in the test/data directory @rtype: str @return: the full path to the filename, such that it can be used in 'make distcheck' rules """ return "%s/test/data/%s" % (GetSourceDir(), name) def ReadTestData(name): """Returns the content of a test data file. This is just a very simple wrapper over utils.ReadFile with the proper test file name. """ return utils.ReadFile(TestDataFilename(name)) def _SetupLogging(verbose): """Setupup logging infrastructure. """ fmt = logging.Formatter("%(asctime)s: %(threadName)s" " %(levelname)s %(message)s") if verbose: handler = logging.StreamHandler() else: handler = logging.FileHandler(os.devnull, "a") handler.setLevel(logging.NOTSET) handler.setFormatter(fmt) root_logger = logging.getLogger("") root_logger.setLevel(logging.NOTSET) root_logger.addHandler(handler) class GanetiTestProgram(unittest.TestProgram): def runTests(self): """Runs all tests. """ _SetupLogging("LOGTOSTDERR" in os.environ) sys.stderr.write("Running %s\n" % self.progName) sys.stderr.flush() # Ensure assertions will be evaluated if not __debug__: raise Exception("Not running in debug mode, assertions would not be" " evaluated") # Check again, this time with a real assertion try: assert False except AssertionError: pass else: raise Exception("Assertion not evaluated") # The following piece of code is a backport from Python 2.6. Python 2.4/2.5 # only accept class instances as test runners. Being able to pass classes # reduces the amount of code necessary for using a custom test runner. # 2.6 and above should use their own code, however. if (self.testRunner and sys.hexversion < 0x2060000 and isinstance(self.testRunner, (type, types.ClassType))): try: self.testRunner = self.testRunner(verbosity=self.verbosity) except TypeError: # didn't accept the verbosity argument self.testRunner = self.testRunner() return unittest.TestProgram.runTests(self) class GanetiTestCase(unittest.TestCase): """Helper class for unittesting. This class defines a few utility functions that help in building unittests. Child classes must call the parent setup and cleanup. """ def setUp(self): self._temp_files = [] def tearDown(self): while self._temp_files: try: utils.RemoveFile(self._temp_files.pop()) except EnvironmentError, err: pass def assertFileContent(self, file_name, expected_content): """Checks that the content of a file is what we expect. @type file_name: str @param file_name: the file whose contents we should check @type expected_content: str @param expected_content: the content we expect """ actual_content = utils.ReadFile(file_name) self.assertEqual(actual_content, expected_content) def assertFileMode(self, file_name, expected_mode): """Checks that the mode of a file is what we expect. @type file_name: str @param file_name: the file whose contents we should check @type expected_mode: int @param expected_mode: the mode we expect """ st = os.stat(file_name) actual_mode = stat.S_IMODE(st.st_mode) self.assertEqual(actual_mode, expected_mode) def assertFileUid(self, file_name, expected_uid): """Checks that the user id of a file is what we expect. @type file_name: str @param file_name: the file whose contents we should check @type expected_uid: int @param expected_uid: the user id we expect """ st = os.stat(file_name) actual_uid = st.st_uid self.assertEqual(actual_uid, expected_uid) def assertFileGid(self, file_name, expected_gid): """Checks that the group id of a file is what we expect. @type file_name: str @param file_name: the file whose contents we should check @type expected_gid: int @param expected_gid: the group id we expect """ st = os.stat(file_name) actual_gid = st.st_gid self.assertEqual(actual_gid, expected_gid) def assertEqualValues(self, first, second, msg=None): """Compares two values whether they're equal. Tuples are automatically converted to lists before comparing. """ return self.assertEqual(UnifyValueType(first), UnifyValueType(second), msg=msg) def _CreateTempFile(self): """Creates a temporary file and adds it to the internal cleanup list. This method simplifies the creation and cleanup of temporary files during tests. """ fh, fname = tempfile.mkstemp(prefix="ganeti-test", suffix=".tmp") os.close(fh) self._temp_files.append(fname) return fname def patch_object(*args, **kwargs): """Unified patch_object for various versions of Python Mock. Different Python Mock versions provide incompatible versions of patching an object. More recent versions use _patch_object, older ones used patch_object. This function unifies the different variations. """ import mock try: return mock._patch_object(*args, **kwargs) except AttributeError: return mock.patch_object(*args, **kwargs) def UnifyValueType(data): """Converts all tuples into lists. This is useful for unittests where an external library doesn't keep types. """ if isinstance(data, (tuple, list)): return [UnifyValueType(i) for i in data] elif isinstance(data, dict): return dict([(UnifyValueType(key), UnifyValueType(value)) for (key, value) in data.iteritems()]) return data class CallCounter(object): """Utility class to count number of calls to a function/method. """ def __init__(self, fn): """Initializes this class. @type fn: Callable """ self._fn = fn self._count = 0 def __call__(self, *args, **kwargs): """Calls wrapped function with given parameters. """ self._count += 1 return self._fn(*args, **kwargs) def Count(self): """Returns number of calls. @rtype: number """ return self._count ganeti-2.9.3/test/py/ganeti.impexpd_unittest.py0000744000000000000000000002103512244641676021644 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.impexpd""" import os import sys import re import unittest import socket from ganeti import constants from ganeti import objects from ganeti import compat from ganeti import utils from ganeti import errors from ganeti import impexpd import testutils class CmdBuilderConfig(objects.ConfigObject): __slots__ = [ "bind", "key", "cert", "ca", "host", "port", "ipv4", "ipv6", "compress", "magic", "connect_timeout", "connect_retries", "cmd_prefix", "cmd_suffix", ] def CheckCmdWord(cmd, word): wre = re.compile(r"\b%s\b" % re.escape(word)) return compat.any(wre.search(i) for i in cmd) class TestCommandBuilder(unittest.TestCase): def test(self): for mode in [constants.IEM_IMPORT, constants.IEM_EXPORT]: if mode == constants.IEM_IMPORT: comprcmd = "gunzip" elif mode == constants.IEM_EXPORT: comprcmd = "gzip" for compress in [constants.IEC_NONE, constants.IEC_GZIP]: for magic in [None, 10 * "-", "HelloWorld", "J9plh4nFo2", "24A02A81-2264-4B51-A882-A2AB9D85B420"]: opts = CmdBuilderConfig(magic=magic, compress=compress) builder = impexpd.CommandBuilder(mode, opts, 1, 2, 3) magic_cmd = builder._GetMagicCommand() dd_cmd = builder._GetDdCommand() if magic: self.assert_(("M=%s" % magic) in magic_cmd) self.assert_(("M=%s" % magic) in dd_cmd) else: self.assertFalse(magic_cmd) for host in ["localhost", "198.51.100.4", "192.0.2.99"]: for port in [0, 1, 1234, 7856, 45452]: for cmd_prefix in [None, "PrefixCommandGoesHere|", "dd if=/dev/hda bs=1048576 |"]: for cmd_suffix in [None, "< /some/file/name", "| dd of=/dev/null"]: opts = CmdBuilderConfig(host=host, port=port, compress=compress, cmd_prefix=cmd_prefix, cmd_suffix=cmd_suffix) builder = impexpd.CommandBuilder(mode, opts, 1, 2, 3) # Check complete command cmd = builder.GetCommand() self.assert_(isinstance(cmd, list)) if compress == constants.IEC_GZIP: self.assert_(CheckCmdWord(cmd, comprcmd)) if cmd_prefix is not None: self.assert_(compat.any(cmd_prefix in i for i in cmd)) if cmd_suffix is not None: self.assert_(compat.any(cmd_suffix in i for i in cmd)) # Check socat command socat_cmd = builder._GetSocatCommand() if mode == constants.IEM_IMPORT: ssl_addr = socat_cmd[-2].split(",") self.assert_(("OPENSSL-LISTEN:%s" % port) in ssl_addr) elif mode == constants.IEM_EXPORT: ssl_addr = socat_cmd[-1].split(",") self.assert_(("OPENSSL:%s:%s" % (host, port)) in ssl_addr) self.assert_("verify=1" in ssl_addr) def testIPv6(self): for mode in [constants.IEM_IMPORT, constants.IEM_EXPORT]: opts = CmdBuilderConfig(host="localhost", port=6789, ipv4=False, ipv6=False) builder = impexpd.CommandBuilder(mode, opts, 1, 2, 3) cmd = builder._GetSocatCommand() self.assert_(compat.all("pf=" not in i for i in cmd)) # IPv4 opts = CmdBuilderConfig(host="localhost", port=6789, ipv4=True, ipv6=False) builder = impexpd.CommandBuilder(mode, opts, 1, 2, 3) cmd = builder._GetSocatCommand() self.assert_(compat.any(",pf=ipv4" in i for i in cmd)) # IPv6 opts = CmdBuilderConfig(host="localhost", port=6789, ipv4=False, ipv6=True) builder = impexpd.CommandBuilder(mode, opts, 1, 2, 3) cmd = builder._GetSocatCommand() self.assert_(compat.any(",pf=ipv6" in i for i in cmd)) # IPv4 and IPv6 opts = CmdBuilderConfig(host="localhost", port=6789, ipv4=True, ipv6=True) builder = impexpd.CommandBuilder(mode, opts, 1, 2, 3) self.assertRaises(AssertionError, builder._GetSocatCommand) def testCommaError(self): opts = CmdBuilderConfig(host="localhost", port=1234, ca="/some/path/with,a/,comma") for mode in [constants.IEM_IMPORT, constants.IEM_EXPORT]: builder = impexpd.CommandBuilder(mode, opts, 1, 2, 3) self.assertRaises(errors.GenericError, builder.GetCommand) def testOptionLengthError(self): testopts = [ CmdBuilderConfig(bind="0.0.0.0" + ("A" * impexpd.SOCAT_OPTION_MAXLEN), port=1234, ca="/tmp/ca"), CmdBuilderConfig(host="localhost", port=1234, ca="/tmp/ca" + ("B" * impexpd.SOCAT_OPTION_MAXLEN)), CmdBuilderConfig(host="localhost", port=1234, key="/tmp/key" + ("B" * impexpd.SOCAT_OPTION_MAXLEN)), ] for opts in testopts: for mode in [constants.IEM_IMPORT, constants.IEM_EXPORT]: builder = impexpd.CommandBuilder(mode, opts, 1, 2, 3) self.assertRaises(errors.GenericError, builder.GetCommand) opts.host = "localhost" + ("A" * impexpd.SOCAT_OPTION_MAXLEN) builder = impexpd.CommandBuilder(constants.IEM_EXPORT, opts, 1, 2, 3) self.assertRaises(errors.GenericError, builder.GetCommand) def testModeError(self): mode = "foobarbaz" assert mode not in [constants.IEM_IMPORT, constants.IEM_EXPORT] opts = CmdBuilderConfig(host="localhost", port=1234) builder = impexpd.CommandBuilder(mode, opts, 1, 2, 3) self.assertRaises(errors.GenericError, builder.GetCommand) class TestVerifyListening(unittest.TestCase): def test(self): self.assertEqual(impexpd._VerifyListening(socket.AF_INET, "192.0.2.7", 1234), ("192.0.2.7", 1234)) self.assertEqual(impexpd._VerifyListening(socket.AF_INET6, "::1", 9876), ("::1", 9876)) self.assertEqual(impexpd._VerifyListening(socket.AF_INET6, "[::1]", 4563), ("::1", 4563)) self.assertEqual(impexpd._VerifyListening(socket.AF_INET6, "[2001:db8::1:4563]", 4563), ("2001:db8::1:4563", 4563)) def testError(self): for family in [socket.AF_UNIX, socket.AF_INET, socket.AF_INET6]: self.assertRaises(errors.GenericError, impexpd._VerifyListening, family, "", 1234) self.assertRaises(errors.GenericError, impexpd._VerifyListening, family, "192", 999) for family in [socket.AF_UNIX, socket.AF_INET6]: self.assertRaises(errors.GenericError, impexpd._VerifyListening, family, "192.0.2.7", 1234) self.assertRaises(errors.GenericError, impexpd._VerifyListening, family, "[2001:db8::1", 1234) self.assertRaises(errors.GenericError, impexpd._VerifyListening, family, "2001:db8::1]", 1234) for family in [socket.AF_UNIX, socket.AF_INET]: self.assertRaises(errors.GenericError, impexpd._VerifyListening, family, "::1", 1234) class TestCalcThroughput(unittest.TestCase): def test(self): self.assertEqual(impexpd._CalcThroughput([]), None) self.assertEqual(impexpd._CalcThroughput([(0, 0)]), None) samples = [ (0.0, 0.0), (10.0, 100.0), ] self.assertAlmostEqual(impexpd._CalcThroughput(samples), 10.0, 3) samples = [ (5.0, 7.0), (10.0, 100.0), (16.0, 181.0), ] self.assertAlmostEqual(impexpd._CalcThroughput(samples), 15.818, 3) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.cli_unittest.py0000744000000000000000000016441312244641676020755 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2008, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the cli module""" import copy import testutils import time import unittest import yaml from cStringIO import StringIO from ganeti import constants from ganeti import cli from ganeti import errors from ganeti import utils from ganeti import objects from ganeti import qlang from ganeti.errors import OpPrereqError, ParameterError class TestParseTimespec(unittest.TestCase): """Testing case for ParseTimespec""" def testValidTimes(self): """Test valid timespecs""" test_data = [ ("1s", 1), ("1", 1), ("1m", 60), ("1h", 60 * 60), ("1d", 60 * 60 * 24), ("1w", 60 * 60 * 24 * 7), ("4h", 4 * 60 * 60), ("61m", 61 * 60), ] for value, expected_result in test_data: self.failUnlessEqual(cli.ParseTimespec(value), expected_result) def testInvalidTime(self): """Test invalid timespecs""" test_data = [ "1y", "", "aaa", "s", ] for value in test_data: self.failUnlessRaises(OpPrereqError, cli.ParseTimespec, value) class TestSplitKeyVal(unittest.TestCase): """Testing case for cli._SplitKeyVal""" DATA = "a=b,c,no_d,-e" RESULT = {"a": "b", "c": True, "d": False, "e": None} RESULT_NOPREFIX = {"a": "b", "c": {}, "no_d": {}, "-e": {}} def testSplitKeyVal(self): """Test splitting""" self.failUnlessEqual(cli._SplitKeyVal("option", self.DATA, True), self.RESULT) def testDuplicateParam(self): """Test duplicate parameters""" for data in ("a=1,a=2", "a,no_a"): self.failUnlessRaises(ParameterError, cli._SplitKeyVal, "option", data, True) def testEmptyData(self): """Test how we handle splitting an empty string""" self.failUnlessEqual(cli._SplitKeyVal("option", "", True), {}) class TestIdentKeyVal(unittest.TestCase): """Testing case for cli.check_ident_key_val""" def testIdentKeyVal(self): """Test identkeyval""" def cikv(value): return cli.check_ident_key_val("option", "opt", value) self.assertEqual(cikv("foo:bar"), ("foo", {"bar": True})) self.assertEqual(cikv("foo:bar=baz"), ("foo", {"bar": "baz"})) self.assertEqual(cikv("bar:b=c,c=a"), ("bar", {"b": "c", "c": "a"})) self.assertEqual(cikv("no_bar"), ("bar", False)) self.assertRaises(ParameterError, cikv, "no_bar:foo") self.assertRaises(ParameterError, cikv, "no_bar:foo=baz") self.assertRaises(ParameterError, cikv, "bar:foo=baz,foo=baz") self.assertEqual(cikv("-foo"), ("foo", None)) self.assertRaises(ParameterError, cikv, "-foo:a=c") # Check negative numbers self.assertEqual(cikv("-1:remove"), ("-1", { "remove": True, })) self.assertEqual(cikv("-29447:add,size=4G"), ("-29447", { "add": True, "size": "4G", })) for i in ["-:", "-"]: self.assertEqual(cikv(i), ("", None)) @staticmethod def _csikv(value): return cli._SplitIdentKeyVal("opt", value, False) def testIdentKeyValNoPrefix(self): """Test identkeyval without prefixes""" test_cases = [ ("foo:bar", None), ("foo:no_bar", None), ("foo:bar=baz,bar=baz", None), ("foo", ("foo", {})), ("foo:bar=baz", ("foo", {"bar": "baz"})), ("no_foo:-1=baz,no_op=3", ("no_foo", {"-1": "baz", "no_op": "3"})), ] for (arg, res) in test_cases: if res is None: self.assertRaises(ParameterError, self._csikv, arg) else: self.assertEqual(self._csikv(arg), res) class TestMultilistIdentKeyVal(unittest.TestCase): """Test for cli.check_multilist_ident_key_val()""" @staticmethod def _cmikv(value): return cli.check_multilist_ident_key_val("option", "opt", value) def testListIdentKeyVal(self): test_cases = [ ("", None), ("foo", [ {"foo": {}} ]), ("foo:bar=baz", [ {"foo": {"bar": "baz"}} ]), ("foo:bar=baz/foo:bat=bad", None), ("foo:abc=42/bar:def=11", [ {"foo": {"abc": "42"}, "bar": {"def": "11"}} ]), ("foo:abc=42/bar:def=11,ghi=07", [ {"foo": {"abc": "42"}, "bar": {"def": "11", "ghi": "07"}} ]), ("foo:abc=42/bar:def=11//", None), ("foo:abc=42/bar:def=11,ghi=07//foobar", [ {"foo": {"abc": "42"}, "bar": {"def": "11", "ghi": "07"}}, {"foobar": {}} ]), ("foo:abc=42/bar:def=11,ghi=07//foobar:xyz=88", [ {"foo": {"abc": "42"}, "bar": {"def": "11", "ghi": "07"}}, {"foobar": {"xyz": "88"}} ]), ("foo:abc=42/bar:def=11,ghi=07//foobar:xyz=88/foo:uvw=314", [ {"foo": {"abc": "42"}, "bar": {"def": "11", "ghi": "07"}}, {"foobar": {"xyz": "88"}, "foo": {"uvw": "314"}} ]), ] for (arg, res) in test_cases: if res is None: self.assertRaises(ParameterError, self._cmikv, arg) else: self.assertEqual(res, self._cmikv(arg)) class TestToStream(unittest.TestCase): """Test the ToStream functions""" def testBasic(self): for data in ["foo", "foo %s", "foo %(test)s", "foo %s %s", "", ]: buf = StringIO() cli._ToStream(buf, data) self.failUnlessEqual(buf.getvalue(), data + "\n") def testParams(self): buf = StringIO() cli._ToStream(buf, "foo %s", 1) self.failUnlessEqual(buf.getvalue(), "foo 1\n") buf = StringIO() cli._ToStream(buf, "foo %s", (15,16)) self.failUnlessEqual(buf.getvalue(), "foo (15, 16)\n") buf = StringIO() cli._ToStream(buf, "foo %s %s", "a", "b") self.failUnlessEqual(buf.getvalue(), "foo a b\n") class TestGenerateTable(unittest.TestCase): HEADERS = dict([("f%s" % i, "Field%s" % i) for i in range(5)]) FIELDS1 = ["f1", "f2"] DATA1 = [ ["abc", 1234], ["foobar", 56], ["b", -14], ] def _test(self, headers, fields, separator, data, numfields, unitfields, units, expected): table = cli.GenerateTable(headers, fields, separator, data, numfields=numfields, unitfields=unitfields, units=units) self.assertEqual(table, expected) def testPlain(self): exp = [ "Field1 Field2", "abc 1234", "foobar 56", "b -14", ] self._test(self.HEADERS, self.FIELDS1, None, self.DATA1, None, None, "m", exp) def testNoFields(self): self._test(self.HEADERS, [], None, [[], []], None, None, "m", ["", "", ""]) self._test(None, [], None, [[], []], None, None, "m", ["", ""]) def testSeparator(self): for sep in ["#", ":", ",", "^", "!", "%", "|", "###", "%%", "!!!", "||"]: exp = [ "Field1%sField2" % sep, "abc%s1234" % sep, "foobar%s56" % sep, "b%s-14" % sep, ] self._test(self.HEADERS, self.FIELDS1, sep, self.DATA1, None, None, "m", exp) def testNoHeader(self): exp = [ "abc 1234", "foobar 56", "b -14", ] self._test(None, self.FIELDS1, None, self.DATA1, None, None, "m", exp) def testUnknownField(self): headers = { "f1": "Field1", } exp = [ "Field1 UNKNOWN", "abc 1234", "foobar 56", "b -14", ] self._test(headers, ["f1", "UNKNOWN"], None, self.DATA1, None, None, "m", exp) def testNumfields(self): fields = ["f1", "f2", "f3"] data = [ ["abc", 1234, 0], ["foobar", 56, 3], ["b", -14, "-"], ] exp = [ "Field1 Field2 Field3", "abc 1234 0", "foobar 56 3", "b -14 -", ] self._test(self.HEADERS, fields, None, data, ["f2", "f3"], None, "m", exp) def testUnitfields(self): expnosep = [ "Field1 Field2 Field3", "abc 1234 0M", "foobar 56 3M", "b -14 -", ] expsep = [ "Field1:Field2:Field3", "abc:1234:0M", "foobar:56:3M", "b:-14:-", ] for sep, expected in [(None, expnosep), (":", expsep)]: fields = ["f1", "f2", "f3"] data = [ ["abc", 1234, 0], ["foobar", 56, 3], ["b", -14, "-"], ] self._test(self.HEADERS, fields, sep, data, ["f2", "f3"], ["f3"], "h", expected) def testUnusual(self): data = [ ["%", "xyz"], ["%%", "abc"], ] exp = [ "Field1 Field2", "% xyz", "%% abc", ] self._test(self.HEADERS, ["f1", "f2"], None, data, None, None, "m", exp) class TestFormatQueryResult(unittest.TestCase): def test(self): fields = [ objects.QueryFieldDefinition(name="name", title="Name", kind=constants.QFT_TEXT), objects.QueryFieldDefinition(name="size", title="Size", kind=constants.QFT_NUMBER), objects.QueryFieldDefinition(name="act", title="Active", kind=constants.QFT_BOOL), objects.QueryFieldDefinition(name="mem", title="Memory", kind=constants.QFT_UNIT), objects.QueryFieldDefinition(name="other", title="SomeList", kind=constants.QFT_OTHER), ] response = objects.QueryResponse(fields=fields, data=[ [(constants.RS_NORMAL, "nodeA"), (constants.RS_NORMAL, 128), (constants.RS_NORMAL, False), (constants.RS_NORMAL, 1468006), (constants.RS_NORMAL, [])], [(constants.RS_NORMAL, "other"), (constants.RS_NORMAL, 512), (constants.RS_NORMAL, True), (constants.RS_NORMAL, 16), (constants.RS_NORMAL, [1, 2, 3])], [(constants.RS_NORMAL, "xyz"), (constants.RS_NORMAL, 1024), (constants.RS_NORMAL, True), (constants.RS_NORMAL, 4096), (constants.RS_NORMAL, [{}, {}])], ]) self.assertEqual(cli.FormatQueryResult(response, unit="h", header=True), (cli.QR_NORMAL, [ "Name Size Active Memory SomeList", "nodeA 128 N 1.4T []", "other 512 Y 16M [1, 2, 3]", "xyz 1024 Y 4.0G [{}, {}]", ])) def testTimestampAndUnit(self): fields = [ objects.QueryFieldDefinition(name="name", title="Name", kind=constants.QFT_TEXT), objects.QueryFieldDefinition(name="size", title="Size", kind=constants.QFT_UNIT), objects.QueryFieldDefinition(name="mtime", title="ModTime", kind=constants.QFT_TIMESTAMP), ] response = objects.QueryResponse(fields=fields, data=[ [(constants.RS_NORMAL, "a"), (constants.RS_NORMAL, 1024), (constants.RS_NORMAL, 0)], [(constants.RS_NORMAL, "b"), (constants.RS_NORMAL, 144996), (constants.RS_NORMAL, 1291746295)], ]) self.assertEqual(cli.FormatQueryResult(response, unit="m", header=True), (cli.QR_NORMAL, [ "Name Size ModTime", "a 1024 %s" % utils.FormatTime(0), "b 144996 %s" % utils.FormatTime(1291746295), ])) def testOverride(self): fields = [ objects.QueryFieldDefinition(name="name", title="Name", kind=constants.QFT_TEXT), objects.QueryFieldDefinition(name="cust", title="Custom", kind=constants.QFT_OTHER), objects.QueryFieldDefinition(name="xt", title="XTime", kind=constants.QFT_TIMESTAMP), ] response = objects.QueryResponse(fields=fields, data=[ [(constants.RS_NORMAL, "x"), (constants.RS_NORMAL, ["a", "b", "c"]), (constants.RS_NORMAL, 1234)], [(constants.RS_NORMAL, "y"), (constants.RS_NORMAL, range(10)), (constants.RS_NORMAL, 1291746295)], ]) override = { "cust": (utils.CommaJoin, False), "xt": (hex, True), } self.assertEqual(cli.FormatQueryResult(response, unit="h", header=True, format_override=override), (cli.QR_NORMAL, [ "Name Custom XTime", "x a, b, c 0x4d2", "y 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 0x4cfe7bf7", ])) def testSeparator(self): fields = [ objects.QueryFieldDefinition(name="name", title="Name", kind=constants.QFT_TEXT), objects.QueryFieldDefinition(name="count", title="Count", kind=constants.QFT_NUMBER), objects.QueryFieldDefinition(name="desc", title="Description", kind=constants.QFT_TEXT), ] response = objects.QueryResponse(fields=fields, data=[ [(constants.RS_NORMAL, "instance1.example.com"), (constants.RS_NORMAL, 21125), (constants.RS_NORMAL, "Hello World!")], [(constants.RS_NORMAL, "mail.other.net"), (constants.RS_NORMAL, -9000), (constants.RS_NORMAL, "a,b,c")], ]) for sep in [":", "|", "#", "|||", "###", "@@@", "@#@"]: for header in [None, "Name%sCount%sDescription" % (sep, sep)]: exp = [] if header: exp.append(header) exp.extend([ "instance1.example.com%s21125%sHello World!" % (sep, sep), "mail.other.net%s-9000%sa,b,c" % (sep, sep), ]) self.assertEqual(cli.FormatQueryResult(response, separator=sep, header=bool(header)), (cli.QR_NORMAL, exp)) def testStatusWithUnknown(self): fields = [ objects.QueryFieldDefinition(name="id", title="ID", kind=constants.QFT_NUMBER), objects.QueryFieldDefinition(name="unk", title="unk", kind=constants.QFT_UNKNOWN), objects.QueryFieldDefinition(name="unavail", title="Unavail", kind=constants.QFT_BOOL), objects.QueryFieldDefinition(name="nodata", title="NoData", kind=constants.QFT_TEXT), objects.QueryFieldDefinition(name="offline", title="OffLine", kind=constants.QFT_TEXT), ] response = objects.QueryResponse(fields=fields, data=[ [(constants.RS_NORMAL, 1), (constants.RS_UNKNOWN, None), (constants.RS_NORMAL, False), (constants.RS_NORMAL, ""), (constants.RS_OFFLINE, None)], [(constants.RS_NORMAL, 2), (constants.RS_UNKNOWN, None), (constants.RS_NODATA, None), (constants.RS_NORMAL, "x"), (constants.RS_OFFLINE, None)], [(constants.RS_NORMAL, 3), (constants.RS_UNKNOWN, None), (constants.RS_NORMAL, False), (constants.RS_UNAVAIL, None), (constants.RS_OFFLINE, None)], ]) self.assertEqual(cli.FormatQueryResult(response, header=True, separator="|", verbose=True), (cli.QR_UNKNOWN, [ "ID|unk|Unavail|NoData|OffLine", "1|(unknown)|N||(offline)", "2|(unknown)|(nodata)|x|(offline)", "3|(unknown)|N|(unavail)|(offline)", ])) self.assertEqual(cli.FormatQueryResult(response, header=True, separator="|", verbose=False), (cli.QR_UNKNOWN, [ "ID|unk|Unavail|NoData|OffLine", "1|??|N||*", "2|??|?|x|*", "3|??|N|-|*", ])) def testNoData(self): fields = [ objects.QueryFieldDefinition(name="id", title="ID", kind=constants.QFT_NUMBER), objects.QueryFieldDefinition(name="name", title="Name", kind=constants.QFT_TEXT), ] response = objects.QueryResponse(fields=fields, data=[]) self.assertEqual(cli.FormatQueryResult(response, header=True), (cli.QR_NORMAL, ["ID Name"])) def testNoDataWithUnknown(self): fields = [ objects.QueryFieldDefinition(name="id", title="ID", kind=constants.QFT_NUMBER), objects.QueryFieldDefinition(name="unk", title="unk", kind=constants.QFT_UNKNOWN), ] response = objects.QueryResponse(fields=fields, data=[]) self.assertEqual(cli.FormatQueryResult(response, header=False), (cli.QR_UNKNOWN, [])) def testStatus(self): fields = [ objects.QueryFieldDefinition(name="id", title="ID", kind=constants.QFT_NUMBER), objects.QueryFieldDefinition(name="unavail", title="Unavail", kind=constants.QFT_BOOL), objects.QueryFieldDefinition(name="nodata", title="NoData", kind=constants.QFT_TEXT), objects.QueryFieldDefinition(name="offline", title="OffLine", kind=constants.QFT_TEXT), ] response = objects.QueryResponse(fields=fields, data=[ [(constants.RS_NORMAL, 1), (constants.RS_NORMAL, False), (constants.RS_NORMAL, ""), (constants.RS_OFFLINE, None)], [(constants.RS_NORMAL, 2), (constants.RS_NODATA, None), (constants.RS_NORMAL, "x"), (constants.RS_NORMAL, "abc")], [(constants.RS_NORMAL, 3), (constants.RS_NORMAL, False), (constants.RS_UNAVAIL, None), (constants.RS_OFFLINE, None)], ]) self.assertEqual(cli.FormatQueryResult(response, header=False, separator="|", verbose=True), (cli.QR_INCOMPLETE, [ "1|N||(offline)", "2|(nodata)|x|abc", "3|N|(unavail)|(offline)", ])) self.assertEqual(cli.FormatQueryResult(response, header=False, separator="|", verbose=False), (cli.QR_INCOMPLETE, [ "1|N||*", "2|?|x|abc", "3|N|-|*", ])) def testInvalidFieldType(self): fields = [ objects.QueryFieldDefinition(name="x", title="x", kind="#some#other#type"), ] response = objects.QueryResponse(fields=fields, data=[]) self.assertRaises(NotImplementedError, cli.FormatQueryResult, response) def testInvalidFieldStatus(self): fields = [ objects.QueryFieldDefinition(name="x", title="x", kind=constants.QFT_TEXT), ] response = objects.QueryResponse(fields=fields, data=[[(-1, None)]]) self.assertRaises(NotImplementedError, cli.FormatQueryResult, response) response = objects.QueryResponse(fields=fields, data=[[(-1, "x")]]) self.assertRaises(AssertionError, cli.FormatQueryResult, response) def testEmptyFieldTitle(self): fields = [ objects.QueryFieldDefinition(name="x", title="", kind=constants.QFT_TEXT), ] response = objects.QueryResponse(fields=fields, data=[]) self.assertRaises(AssertionError, cli.FormatQueryResult, response) class _MockJobPollCb(cli.JobPollCbBase, cli.JobPollReportCbBase): def __init__(self, tc, job_id): self.tc = tc self.job_id = job_id self._wfjcr = [] self._jobstatus = [] self._expect_notchanged = False self._expect_log = [] def CheckEmpty(self): self.tc.assertFalse(self._wfjcr) self.tc.assertFalse(self._jobstatus) self.tc.assertFalse(self._expect_notchanged) self.tc.assertFalse(self._expect_log) def AddWfjcResult(self, *args): self._wfjcr.append(args) def AddQueryJobsResult(self, *args): self._jobstatus.append(args) def WaitForJobChangeOnce(self, job_id, fields, prev_job_info, prev_log_serial): self.tc.assertEqual(job_id, self.job_id) self.tc.assertEqualValues(fields, ["status"]) self.tc.assertFalse(self._expect_notchanged) self.tc.assertFalse(self._expect_log) (exp_prev_job_info, exp_prev_log_serial, result) = self._wfjcr.pop(0) self.tc.assertEqualValues(prev_job_info, exp_prev_job_info) self.tc.assertEqual(prev_log_serial, exp_prev_log_serial) if result == constants.JOB_NOTCHANGED: self._expect_notchanged = True elif result: (_, logmsgs) = result if logmsgs: self._expect_log.extend(logmsgs) return result def QueryJobs(self, job_ids, fields): self.tc.assertEqual(job_ids, [self.job_id]) self.tc.assertEqualValues(fields, ["status", "opstatus", "opresult"]) self.tc.assertFalse(self._expect_notchanged) self.tc.assertFalse(self._expect_log) result = self._jobstatus.pop(0) self.tc.assertEqual(len(fields), len(result)) return [result] def ReportLogMessage(self, job_id, serial, timestamp, log_type, log_msg): self.tc.assertEqual(job_id, self.job_id) self.tc.assertEqualValues((serial, timestamp, log_type, log_msg), self._expect_log.pop(0)) def ReportNotChanged(self, job_id, status): self.tc.assertEqual(job_id, self.job_id) self.tc.assert_(self._expect_notchanged) self._expect_notchanged = False class TestGenericPollJob(testutils.GanetiTestCase): def testSuccessWithLog(self): job_id = 29609 cbs = _MockJobPollCb(self, job_id) cbs.AddWfjcResult(None, None, constants.JOB_NOTCHANGED) cbs.AddWfjcResult(None, None, ((constants.JOB_STATUS_QUEUED, ), None)) cbs.AddWfjcResult((constants.JOB_STATUS_QUEUED, ), None, constants.JOB_NOTCHANGED) cbs.AddWfjcResult((constants.JOB_STATUS_QUEUED, ), None, ((constants.JOB_STATUS_RUNNING, ), [(1, utils.SplitTime(1273491611.0), constants.ELOG_MESSAGE, "Step 1"), (2, utils.SplitTime(1273491615.9), constants.ELOG_MESSAGE, "Step 2"), (3, utils.SplitTime(1273491625.02), constants.ELOG_MESSAGE, "Step 3"), (4, utils.SplitTime(1273491635.05), constants.ELOG_MESSAGE, "Step 4"), (37, utils.SplitTime(1273491645.0), constants.ELOG_MESSAGE, "Step 5"), (203, utils.SplitTime(127349155.0), constants.ELOG_MESSAGE, "Step 6")])) cbs.AddWfjcResult((constants.JOB_STATUS_RUNNING, ), 203, ((constants.JOB_STATUS_RUNNING, ), [(300, utils.SplitTime(1273491711.01), constants.ELOG_MESSAGE, "Step X"), (302, utils.SplitTime(1273491815.8), constants.ELOG_MESSAGE, "Step Y"), (303, utils.SplitTime(1273491925.32), constants.ELOG_MESSAGE, "Step Z")])) cbs.AddWfjcResult((constants.JOB_STATUS_RUNNING, ), 303, ((constants.JOB_STATUS_SUCCESS, ), None)) cbs.AddQueryJobsResult(constants.JOB_STATUS_SUCCESS, [constants.OP_STATUS_SUCCESS, constants.OP_STATUS_SUCCESS], ["Hello World", "Foo man bar"]) self.assertEqual(["Hello World", "Foo man bar"], cli.GenericPollJob(job_id, cbs, cbs)) cbs.CheckEmpty() def testJobLost(self): job_id = 13746 cbs = _MockJobPollCb(self, job_id) cbs.AddWfjcResult(None, None, constants.JOB_NOTCHANGED) cbs.AddWfjcResult(None, None, None) self.assertRaises(errors.JobLost, cli.GenericPollJob, job_id, cbs, cbs) cbs.CheckEmpty() def testError(self): job_id = 31088 cbs = _MockJobPollCb(self, job_id) cbs.AddWfjcResult(None, None, constants.JOB_NOTCHANGED) cbs.AddWfjcResult(None, None, ((constants.JOB_STATUS_ERROR, ), None)) cbs.AddQueryJobsResult(constants.JOB_STATUS_ERROR, [constants.OP_STATUS_SUCCESS, constants.OP_STATUS_ERROR], ["Hello World", "Error code 123"]) self.assertRaises(errors.OpExecError, cli.GenericPollJob, job_id, cbs, cbs) cbs.CheckEmpty() def testError2(self): job_id = 22235 cbs = _MockJobPollCb(self, job_id) cbs.AddWfjcResult(None, None, ((constants.JOB_STATUS_ERROR, ), None)) encexc = errors.EncodeException(errors.LockError("problem")) cbs.AddQueryJobsResult(constants.JOB_STATUS_ERROR, [constants.OP_STATUS_ERROR], [encexc]) self.assertRaises(errors.LockError, cli.GenericPollJob, job_id, cbs, cbs) cbs.CheckEmpty() def testWeirdError(self): job_id = 28847 cbs = _MockJobPollCb(self, job_id) cbs.AddWfjcResult(None, None, ((constants.JOB_STATUS_ERROR, ), None)) cbs.AddQueryJobsResult(constants.JOB_STATUS_ERROR, [constants.OP_STATUS_RUNNING, constants.OP_STATUS_RUNNING], [None, None]) self.assertRaises(errors.OpExecError, cli.GenericPollJob, job_id, cbs, cbs) cbs.CheckEmpty() def testCancel(self): job_id = 4275 cbs = _MockJobPollCb(self, job_id) cbs.AddWfjcResult(None, None, constants.JOB_NOTCHANGED) cbs.AddWfjcResult(None, None, ((constants.JOB_STATUS_CANCELING, ), None)) cbs.AddQueryJobsResult(constants.JOB_STATUS_CANCELING, [constants.OP_STATUS_CANCELING, constants.OP_STATUS_CANCELING], [None, None]) self.assertRaises(errors.OpExecError, cli.GenericPollJob, job_id, cbs, cbs) cbs.CheckEmpty() class TestFormatLogMessage(unittest.TestCase): def test(self): self.assertEqual(cli.FormatLogMessage(constants.ELOG_MESSAGE, "Hello World"), "Hello World") self.assertRaises(TypeError, cli.FormatLogMessage, constants.ELOG_MESSAGE, [1, 2, 3]) self.assert_(cli.FormatLogMessage("some other type", (1, 2, 3))) class TestParseFields(unittest.TestCase): def test(self): self.assertEqual(cli.ParseFields(None, []), []) self.assertEqual(cli.ParseFields("name,foo,hello", []), ["name", "foo", "hello"]) self.assertEqual(cli.ParseFields(None, ["def", "ault", "fields", "here"]), ["def", "ault", "fields", "here"]) self.assertEqual(cli.ParseFields("name,foo", ["def", "ault"]), ["name", "foo"]) self.assertEqual(cli.ParseFields("+name,foo", ["def", "ault"]), ["def", "ault", "name", "foo"]) class TestConstants(unittest.TestCase): def testPriority(self): self.assertEqual(set(cli._PRIONAME_TO_VALUE.values()), set(constants.OP_PRIO_SUBMIT_VALID)) self.assertEqual(list(value for _, value in cli._PRIORITY_NAMES), sorted(constants.OP_PRIO_SUBMIT_VALID, reverse=True)) class TestParseNicOption(unittest.TestCase): def test(self): self.assertEqual(cli.ParseNicOption([("0", { "link": "eth0", })]), [{ "link": "eth0", }]) self.assertEqual(cli.ParseNicOption([("5", { "ip": "192.0.2.7", })]), [{}, {}, {}, {}, {}, { "ip": "192.0.2.7", }]) def testErrors(self): for i in [None, "", "abc", "zero", "Hello World", "\0", []]: self.assertRaises(errors.OpPrereqError, cli.ParseNicOption, [(i, { "link": "eth0", })]) self.assertRaises(errors.OpPrereqError, cli.ParseNicOption, [("0", i)]) self.assertRaises(errors.TypeEnforcementError, cli.ParseNicOption, [(0, { True: False, })]) self.assertRaises(errors.TypeEnforcementError, cli.ParseNicOption, [(3, { "mode": [], })]) class TestFormatResultError(unittest.TestCase): def testNormal(self): for verbose in [False, True]: self.assertRaises(AssertionError, cli.FormatResultError, constants.RS_NORMAL, verbose) def testUnknown(self): for verbose in [False, True]: self.assertRaises(NotImplementedError, cli.FormatResultError, "#some!other!status#", verbose) def test(self): for status in constants.RS_ALL: if status == constants.RS_NORMAL: continue self.assertNotEqual(cli.FormatResultError(status, False), cli.FormatResultError(status, True)) result = cli.FormatResultError(status, True) self.assertTrue(result.startswith("(")) self.assertTrue(result.endswith(")")) class TestGetOnlineNodes(unittest.TestCase): class _FakeClient: def __init__(self): self._query = [] def AddQueryResult(self, *args): self._query.append(args) def CountPending(self): return len(self._query) def Query(self, res, fields, qfilter): if res != constants.QR_NODE: raise Exception("Querying wrong resource") (exp_fields, check_filter, result) = self._query.pop(0) if exp_fields != fields: raise Exception("Expected fields %s, got %s" % (exp_fields, fields)) if not (qfilter is None or check_filter(qfilter)): raise Exception("Filter doesn't match expectations") return objects.QueryResponse(fields=None, data=result) def testEmpty(self): cl = self._FakeClient() cl.AddQueryResult(["name", "offline", "sip"], None, []) self.assertEqual(cli.GetOnlineNodes(None, cl=cl), []) self.assertEqual(cl.CountPending(), 0) def testNoSpecialFilter(self): cl = self._FakeClient() cl.AddQueryResult(["name", "offline", "sip"], None, [ [(constants.RS_NORMAL, "master.example.com"), (constants.RS_NORMAL, False), (constants.RS_NORMAL, "192.0.2.1")], [(constants.RS_NORMAL, "node2.example.com"), (constants.RS_NORMAL, False), (constants.RS_NORMAL, "192.0.2.2")], ]) self.assertEqual(cli.GetOnlineNodes(None, cl=cl), ["master.example.com", "node2.example.com"]) self.assertEqual(cl.CountPending(), 0) def testNoMaster(self): cl = self._FakeClient() def _CheckFilter(qfilter): self.assertEqual(qfilter, [qlang.OP_NOT, [qlang.OP_TRUE, "master"]]) return True cl.AddQueryResult(["name", "offline", "sip"], _CheckFilter, [ [(constants.RS_NORMAL, "node2.example.com"), (constants.RS_NORMAL, False), (constants.RS_NORMAL, "192.0.2.2")], ]) self.assertEqual(cli.GetOnlineNodes(None, cl=cl, filter_master=True), ["node2.example.com"]) self.assertEqual(cl.CountPending(), 0) def testSecondaryIpAddress(self): cl = self._FakeClient() cl.AddQueryResult(["name", "offline", "sip"], None, [ [(constants.RS_NORMAL, "master.example.com"), (constants.RS_NORMAL, False), (constants.RS_NORMAL, "192.0.2.1")], [(constants.RS_NORMAL, "node2.example.com"), (constants.RS_NORMAL, False), (constants.RS_NORMAL, "192.0.2.2")], ]) self.assertEqual(cli.GetOnlineNodes(None, cl=cl, secondary_ips=True), ["192.0.2.1", "192.0.2.2"]) self.assertEqual(cl.CountPending(), 0) def testNoMasterFilterNodeName(self): cl = self._FakeClient() def _CheckFilter(qfilter): self.assertEqual(qfilter, [qlang.OP_AND, [qlang.OP_OR] + [[qlang.OP_EQUAL, "name", name] for name in ["node2", "node3"]], [qlang.OP_NOT, [qlang.OP_TRUE, "master"]]]) return True cl.AddQueryResult(["name", "offline", "sip"], _CheckFilter, [ [(constants.RS_NORMAL, "node2.example.com"), (constants.RS_NORMAL, False), (constants.RS_NORMAL, "192.0.2.12")], [(constants.RS_NORMAL, "node3.example.com"), (constants.RS_NORMAL, False), (constants.RS_NORMAL, "192.0.2.13")], ]) self.assertEqual(cli.GetOnlineNodes(["node2", "node3"], cl=cl, secondary_ips=True, filter_master=True), ["192.0.2.12", "192.0.2.13"]) self.assertEqual(cl.CountPending(), 0) def testOfflineNodes(self): cl = self._FakeClient() cl.AddQueryResult(["name", "offline", "sip"], None, [ [(constants.RS_NORMAL, "master.example.com"), (constants.RS_NORMAL, False), (constants.RS_NORMAL, "192.0.2.1")], [(constants.RS_NORMAL, "node2.example.com"), (constants.RS_NORMAL, True), (constants.RS_NORMAL, "192.0.2.2")], [(constants.RS_NORMAL, "node3.example.com"), (constants.RS_NORMAL, True), (constants.RS_NORMAL, "192.0.2.3")], ]) self.assertEqual(cli.GetOnlineNodes(None, cl=cl, nowarn=True), ["master.example.com"]) self.assertEqual(cl.CountPending(), 0) def testNodeGroup(self): cl = self._FakeClient() def _CheckFilter(qfilter): self.assertEqual(qfilter, [qlang.OP_OR, [qlang.OP_EQUAL, "group", "foobar"], [qlang.OP_EQUAL, "group.uuid", "foobar"]]) return True cl.AddQueryResult(["name", "offline", "sip"], _CheckFilter, [ [(constants.RS_NORMAL, "master.example.com"), (constants.RS_NORMAL, False), (constants.RS_NORMAL, "192.0.2.1")], [(constants.RS_NORMAL, "node3.example.com"), (constants.RS_NORMAL, False), (constants.RS_NORMAL, "192.0.2.3")], ]) self.assertEqual(cli.GetOnlineNodes(None, cl=cl, nodegroup="foobar"), ["master.example.com", "node3.example.com"]) self.assertEqual(cl.CountPending(), 0) class TestFormatTimestamp(unittest.TestCase): def testGood(self): self.assertEqual(cli.FormatTimestamp((0, 1)), time.strftime("%F %T", time.localtime(0)) + ".000001") self.assertEqual(cli.FormatTimestamp((1332944009, 17376)), (time.strftime("%F %T", time.localtime(1332944009)) + ".017376")) def testWrong(self): for i in [0, [], {}, "", [1]]: self.assertEqual(cli.FormatTimestamp(i), "?") class TestFormatUsage(unittest.TestCase): def test(self): binary = "gnt-unittest" commands = { "cmdA": (NotImplemented, NotImplemented, NotImplemented, NotImplemented, "description of A"), "bbb": (NotImplemented, NotImplemented, NotImplemented, NotImplemented, "Hello World," * 10), "longname": (NotImplemented, NotImplemented, NotImplemented, NotImplemented, "Another description"), } self.assertEqual(list(cli._FormatUsage(binary, commands)), [ "Usage: gnt-unittest {command} [options...] [argument...]", "gnt-unittest --help to see details, or man gnt-unittest", "", "Commands:", (" bbb - Hello World,Hello World,Hello World,Hello World,Hello" " World,Hello"), " World,Hello World,Hello World,Hello World,Hello World,", " cmdA - description of A", " longname - Another description", "", ]) class TestParseArgs(unittest.TestCase): def testNoArguments(self): for argv in [[], ["gnt-unittest"]]: try: cli._ParseArgs("gnt-unittest", argv, {}, {}, set()) except cli._ShowUsage, err: self.assertTrue(err.exit_error) else: self.fail("Did not raise exception") def testVersion(self): for argv in [["test", "--version"], ["test", "--version", "somethingelse"]]: try: cli._ParseArgs("test", argv, {}, {}, set()) except cli._ShowVersion: pass else: self.fail("Did not raise exception") def testHelp(self): for argv in [["test", "--help"], ["test", "--help", "somethingelse"]]: try: cli._ParseArgs("test", argv, {}, {}, set()) except cli._ShowUsage, err: self.assertFalse(err.exit_error) else: self.fail("Did not raise exception") def testUnknownCommandOrAlias(self): for argv in [["test", "list"], ["test", "somethingelse", "--help"]]: try: cli._ParseArgs("test", argv, {}, {}, set()) except cli._ShowUsage, err: self.assertTrue(err.exit_error) else: self.fail("Did not raise exception") def testInvalidAliasList(self): cmd = { "list": NotImplemented, "foo": NotImplemented, } aliases = { "list": NotImplemented, "foo": NotImplemented, } assert sorted(cmd.keys()) == sorted(aliases.keys()) self.assertRaises(AssertionError, cli._ParseArgs, "test", ["test", "list"], cmd, aliases, set()) def testAliasForNonExistantCommand(self): cmd = {} aliases = { "list": NotImplemented, } self.assertRaises(errors.ProgrammerError, cli._ParseArgs, "test", ["test", "list"], cmd, aliases, set()) class TestQftNames(unittest.TestCase): def testComplete(self): self.assertEqual(frozenset(cli._QFT_NAMES), constants.QFT_ALL) def testUnique(self): lcnames = map(lambda s: s.lower(), cli._QFT_NAMES.values()) self.assertFalse(utils.FindDuplicates(lcnames)) def testUppercase(self): for name in cli._QFT_NAMES.values(): self.assertEqual(name[0], name[0].upper()) class TestFieldDescValues(unittest.TestCase): def testKnownKind(self): fdef = objects.QueryFieldDefinition(name="aname", title="Atitle", kind=constants.QFT_TEXT, doc="aaa doc aaa") self.assertEqual(cli._FieldDescValues(fdef), ["aname", "Text", "Atitle", "aaa doc aaa"]) def testUnknownKind(self): kind = "#foo#" self.assertFalse(kind in constants.QFT_ALL) self.assertFalse(kind in cli._QFT_NAMES) fdef = objects.QueryFieldDefinition(name="zname", title="Ztitle", kind=kind, doc="zzz doc zzz") self.assertEqual(cli._FieldDescValues(fdef), ["zname", kind, "Ztitle", "zzz doc zzz"]) class TestSerializeGenericInfo(unittest.TestCase): """Test case for cli._SerializeGenericInfo""" def _RunTest(self, data, expected): buf = StringIO() cli._SerializeGenericInfo(buf, data, 0) self.assertEqual(buf.getvalue(), expected) def testSimple(self): test_samples = [ ("abc", "abc\n"), ([], "\n"), ({}, "\n"), (["1", "2", "3"], "- 1\n- 2\n- 3\n"), ([("z", "26")], "z: 26\n"), ({"z": "26"}, "z: 26\n"), ([("z", "26"), ("a", "1")], "z: 26\na: 1\n"), ({"z": "26", "a": "1"}, "a: 1\nz: 26\n"), ] for (data, expected) in test_samples: self._RunTest(data, expected) def testLists(self): adict = { "aa": "11", "bb": "22", "cc": "33", } adict_exp = ("- aa: 11\n" " bb: 22\n" " cc: 33\n") anobj = [ ("zz", "11"), ("ww", "33"), ("xx", "22"), ] anobj_exp = ("- zz: 11\n" " ww: 33\n" " xx: 22\n") alist = ["aa", "cc", "bb"] alist_exp = ("- - aa\n" " - cc\n" " - bb\n") test_samples = [ (adict, adict_exp), (anobj, anobj_exp), (alist, alist_exp), ] for (base_data, base_expected) in test_samples: for k in range(1, 4): data = k * [base_data] expected = k * base_expected self._RunTest(data, expected) def testDictionaries(self): data = [ ("aaa", ["x", "y"]), ("bbb", { "w": "1", "z": "2", }), ("ccc", [ ("xyz", "123"), ("efg", "456"), ]), ] expected = ("aaa: \n" " - x\n" " - y\n" "bbb: \n" " w: 1\n" " z: 2\n" "ccc: \n" " xyz: 123\n" " efg: 456\n") self._RunTest(data, expected) self._RunTest(dict(data), expected) class TestFormatPolicyInfo(unittest.TestCase): """Test case for cli.FormatPolicyInfo. These tests rely on cli._SerializeGenericInfo (tested elsewhere). """ def setUp(self): # Policies are big, and we want to see the difference in case of an error self.maxDiff = None def _RenameDictItem(self, parsed, old, new): self.assertTrue(old in parsed) self.assertTrue(new not in parsed) parsed[new] = parsed[old] del parsed[old] def _TranslateParsedNames(self, parsed): for (pretty, raw) in [ ("bounds specs", constants.ISPECS_MINMAX), ("allowed disk templates", constants.IPOLICY_DTS) ]: self._RenameDictItem(parsed, pretty, raw) for minmax in parsed[constants.ISPECS_MINMAX]: for key in minmax: keyparts = key.split("/", 1) if len(keyparts) > 1: self._RenameDictItem(minmax, key, keyparts[0]) self.assertTrue(constants.IPOLICY_DTS in parsed) parsed[constants.IPOLICY_DTS] = yaml.load("[%s]" % parsed[constants.IPOLICY_DTS]) @staticmethod def _PrintAndParsePolicy(custom, effective, iscluster): formatted = cli.FormatPolicyInfo(custom, effective, iscluster) buf = StringIO() cli._SerializeGenericInfo(buf, formatted, 0) return yaml.load(buf.getvalue()) def _PrintAndCheckParsed(self, policy): parsed = self._PrintAndParsePolicy(policy, NotImplemented, True) self._TranslateParsedNames(parsed) self.assertEqual(parsed, policy) def _CompareClusterGroupItems(self, cluster, group, skip=None): if isinstance(group, dict): self.assertTrue(isinstance(cluster, dict)) if skip is None: skip = frozenset() self.assertEqual(frozenset(cluster.keys()).difference(skip), frozenset(group.keys())) for key in group: self._CompareClusterGroupItems(cluster[key], group[key]) elif isinstance(group, list): self.assertTrue(isinstance(cluster, list)) self.assertEqual(len(cluster), len(group)) for (cval, gval) in zip(cluster, group): self._CompareClusterGroupItems(cval, gval) else: self.assertTrue(isinstance(group, basestring)) self.assertEqual("default (%s)" % cluster, group) def _TestClusterVsGroup(self, policy): cluster = self._PrintAndParsePolicy(policy, NotImplemented, True) group = self._PrintAndParsePolicy({}, policy, False) self._CompareClusterGroupItems(cluster, group, ["std"]) def testWithDefaults(self): self._PrintAndCheckParsed(constants.IPOLICY_DEFAULTS) self._TestClusterVsGroup(constants.IPOLICY_DEFAULTS) class TestCreateIPolicyFromOpts(unittest.TestCase): """Test case for cli.CreateIPolicyFromOpts.""" def setUp(self): # Policies are big, and we want to see the difference in case of an error self.maxDiff = None def _RecursiveCheckMergedDicts(self, default_pol, diff_pol, merged_pol, merge_minmax=False): self.assertTrue(type(default_pol) is dict) self.assertTrue(type(diff_pol) is dict) self.assertTrue(type(merged_pol) is dict) self.assertEqual(frozenset(default_pol.keys()), frozenset(merged_pol.keys())) for (key, val) in merged_pol.items(): if key in diff_pol: if type(val) is dict: self._RecursiveCheckMergedDicts(default_pol[key], diff_pol[key], val) elif (merge_minmax and key == "minmax" and type(val) is list and len(val) == 1): self.assertEqual(len(default_pol[key]), 1) self.assertEqual(len(diff_pol[key]), 1) self._RecursiveCheckMergedDicts(default_pol[key][0], diff_pol[key][0], val[0]) else: self.assertEqual(val, diff_pol[key]) else: self.assertEqual(val, default_pol[key]) def testClusterPolicy(self): pol0 = cli.CreateIPolicyFromOpts( ispecs_mem_size={}, ispecs_cpu_count={}, ispecs_disk_count={}, ispecs_disk_size={}, ispecs_nic_count={}, ipolicy_disk_templates=None, ipolicy_vcpu_ratio=None, ipolicy_spindle_ratio=None, fill_all=True ) self.assertEqual(pol0, constants.IPOLICY_DEFAULTS) exp_pol1 = { constants.ISPECS_MINMAX: [ { constants.ISPECS_MIN: { constants.ISPEC_CPU_COUNT: 2, constants.ISPEC_DISK_COUNT: 1, }, constants.ISPECS_MAX: { constants.ISPEC_MEM_SIZE: 12*1024, constants.ISPEC_DISK_COUNT: 2, }, }, ], constants.ISPECS_STD: { constants.ISPEC_CPU_COUNT: 2, constants.ISPEC_DISK_COUNT: 2, }, constants.IPOLICY_VCPU_RATIO: 3.1, } pol1 = cli.CreateIPolicyFromOpts( ispecs_mem_size={"max": "12g"}, ispecs_cpu_count={"min": 2, "std": 2}, ispecs_disk_count={"min": 1, "max": 2, "std": 2}, ispecs_disk_size={}, ispecs_nic_count={}, ipolicy_disk_templates=None, ipolicy_vcpu_ratio=3.1, ipolicy_spindle_ratio=None, fill_all=True ) self._RecursiveCheckMergedDicts(constants.IPOLICY_DEFAULTS, exp_pol1, pol1, merge_minmax=True) exp_pol2 = { constants.ISPECS_MINMAX: [ { constants.ISPECS_MIN: { constants.ISPEC_DISK_SIZE: 512, constants.ISPEC_NIC_COUNT: 2, }, constants.ISPECS_MAX: { constants.ISPEC_NIC_COUNT: 3, }, }, ], constants.ISPECS_STD: { constants.ISPEC_CPU_COUNT: 2, constants.ISPEC_NIC_COUNT: 3, }, constants.IPOLICY_SPINDLE_RATIO: 1.3, constants.IPOLICY_DTS: ["templates"], } pol2 = cli.CreateIPolicyFromOpts( ispecs_mem_size={}, ispecs_cpu_count={"std": 2}, ispecs_disk_count={}, ispecs_disk_size={"min": "0.5g"}, ispecs_nic_count={"min": 2, "max": 3, "std": 3}, ipolicy_disk_templates=["templates"], ipolicy_vcpu_ratio=None, ipolicy_spindle_ratio=1.3, fill_all=True ) self._RecursiveCheckMergedDicts(constants.IPOLICY_DEFAULTS, exp_pol2, pol2, merge_minmax=True) for fill_all in [False, True]: exp_pol3 = { constants.ISPECS_STD: { constants.ISPEC_CPU_COUNT: 2, constants.ISPEC_NIC_COUNT: 3, }, } pol3 = cli.CreateIPolicyFromOpts( std_ispecs={ constants.ISPEC_CPU_COUNT: "2", constants.ISPEC_NIC_COUNT: "3", }, ipolicy_disk_templates=None, ipolicy_vcpu_ratio=None, ipolicy_spindle_ratio=None, fill_all=fill_all ) if fill_all: self._RecursiveCheckMergedDicts(constants.IPOLICY_DEFAULTS, exp_pol3, pol3, merge_minmax=True) else: self.assertEqual(pol3, exp_pol3) def testPartialPolicy(self): exp_pol0 = objects.MakeEmptyIPolicy() pol0 = cli.CreateIPolicyFromOpts( minmax_ispecs=None, std_ispecs=None, ipolicy_disk_templates=None, ipolicy_vcpu_ratio=None, ipolicy_spindle_ratio=None, fill_all=False ) self.assertEqual(pol0, exp_pol0) exp_pol1 = { constants.IPOLICY_VCPU_RATIO: 3.1, } pol1 = cli.CreateIPolicyFromOpts( minmax_ispecs=None, std_ispecs=None, ipolicy_disk_templates=None, ipolicy_vcpu_ratio=3.1, ipolicy_spindle_ratio=None, fill_all=False ) self.assertEqual(pol1, exp_pol1) exp_pol2 = { constants.IPOLICY_SPINDLE_RATIO: 1.3, constants.IPOLICY_DTS: ["templates"], } pol2 = cli.CreateIPolicyFromOpts( minmax_ispecs=None, std_ispecs=None, ipolicy_disk_templates=["templates"], ipolicy_vcpu_ratio=None, ipolicy_spindle_ratio=1.3, fill_all=False ) self.assertEqual(pol2, exp_pol2) def _TestInvalidISpecs(self, minmax_ispecs, std_ispecs, fail=True): for fill_all in [False, True]: if fail: self.assertRaises((errors.OpPrereqError, errors.UnitParseError, errors.TypeEnforcementError), cli.CreateIPolicyFromOpts, minmax_ispecs=minmax_ispecs, std_ispecs=std_ispecs, fill_all=fill_all) else: cli.CreateIPolicyFromOpts(minmax_ispecs=minmax_ispecs, std_ispecs=std_ispecs, fill_all=fill_all) def testInvalidPolicies(self): self.assertRaises(AssertionError, cli.CreateIPolicyFromOpts, std_ispecs={constants.ISPEC_MEM_SIZE: 1024}, ipolicy_disk_templates=None, ipolicy_vcpu_ratio=None, ipolicy_spindle_ratio=None, group_ipolicy=True) self.assertRaises(errors.OpPrereqError, cli.CreateIPolicyFromOpts, ispecs_mem_size={"wrong": "x"}, ispecs_cpu_count={}, ispecs_disk_count={}, ispecs_disk_size={}, ispecs_nic_count={}, ipolicy_disk_templates=None, ipolicy_vcpu_ratio=None, ipolicy_spindle_ratio=None, fill_all=True) self.assertRaises(errors.TypeEnforcementError, cli.CreateIPolicyFromOpts, ispecs_mem_size={}, ispecs_cpu_count={"min": "default"}, ispecs_disk_count={}, ispecs_disk_size={}, ispecs_nic_count={}, ipolicy_disk_templates=None, ipolicy_vcpu_ratio=None, ipolicy_spindle_ratio=None, fill_all=True) good_mmspecs = [ constants.ISPECS_MINMAX_DEFAULTS, constants.ISPECS_MINMAX_DEFAULTS, ] self._TestInvalidISpecs(good_mmspecs, None, fail=False) broken_mmspecs = copy.deepcopy(good_mmspecs) for minmaxpair in broken_mmspecs: for key in constants.ISPECS_MINMAX_KEYS: for par in constants.ISPECS_PARAMETERS: old = minmaxpair[key][par] del minmaxpair[key][par] self._TestInvalidISpecs(broken_mmspecs, None) minmaxpair[key][par] = "invalid" self._TestInvalidISpecs(broken_mmspecs, None) minmaxpair[key][par] = old minmaxpair[key]["invalid_key"] = None self._TestInvalidISpecs(broken_mmspecs, None) del minmaxpair[key]["invalid_key"] minmaxpair["invalid_key"] = None self._TestInvalidISpecs(broken_mmspecs, None) del minmaxpair["invalid_key"] assert broken_mmspecs == good_mmspecs good_stdspecs = constants.IPOLICY_DEFAULTS[constants.ISPECS_STD] self._TestInvalidISpecs(None, good_stdspecs, fail=False) broken_stdspecs = copy.deepcopy(good_stdspecs) for par in constants.ISPECS_PARAMETERS: old = broken_stdspecs[par] broken_stdspecs[par] = "invalid" self._TestInvalidISpecs(None, broken_stdspecs) broken_stdspecs[par] = old broken_stdspecs["invalid_key"] = None self._TestInvalidISpecs(None, broken_stdspecs) del broken_stdspecs["invalid_key"] assert broken_stdspecs == good_stdspecs def testAllowedValues(self): allowedv = "blah" exp_pol1 = { constants.ISPECS_MINMAX: allowedv, constants.IPOLICY_DTS: allowedv, constants.IPOLICY_VCPU_RATIO: allowedv, constants.IPOLICY_SPINDLE_RATIO: allowedv, } pol1 = cli.CreateIPolicyFromOpts(minmax_ispecs=[{allowedv: {}}], std_ispecs=None, ipolicy_disk_templates=allowedv, ipolicy_vcpu_ratio=allowedv, ipolicy_spindle_ratio=allowedv, allowed_values=[allowedv]) self.assertEqual(pol1, exp_pol1) @staticmethod def _ConvertSpecToStrings(spec): ret = {} for (par, val) in spec.items(): ret[par] = str(val) return ret def _CheckNewStyleSpecsCall(self, exp_ipolicy, minmax_ispecs, std_ispecs, group_ipolicy, fill_all): ipolicy = cli.CreateIPolicyFromOpts(minmax_ispecs=minmax_ispecs, std_ispecs=std_ispecs, group_ipolicy=group_ipolicy, fill_all=fill_all) self.assertEqual(ipolicy, exp_ipolicy) def _TestFullISpecsInner(self, skel_exp_ipol, exp_minmax, exp_std, group_ipolicy, fill_all): exp_ipol = skel_exp_ipol.copy() if exp_minmax is not None: minmax_ispecs = [] for exp_mm_pair in exp_minmax: mmpair = {} for (key, spec) in exp_mm_pair.items(): mmpair[key] = self._ConvertSpecToStrings(spec) minmax_ispecs.append(mmpair) exp_ipol[constants.ISPECS_MINMAX] = exp_minmax else: minmax_ispecs = None if exp_std is not None: std_ispecs = self._ConvertSpecToStrings(exp_std) exp_ipol[constants.ISPECS_STD] = exp_std else: std_ispecs = None self._CheckNewStyleSpecsCall(exp_ipol, minmax_ispecs, std_ispecs, group_ipolicy, fill_all) if minmax_ispecs: for mmpair in minmax_ispecs: for (key, spec) in mmpair.items(): for par in [constants.ISPEC_MEM_SIZE, constants.ISPEC_DISK_SIZE]: if par in spec: spec[par] += "m" self._CheckNewStyleSpecsCall(exp_ipol, minmax_ispecs, std_ispecs, group_ipolicy, fill_all) if std_ispecs: for par in [constants.ISPEC_MEM_SIZE, constants.ISPEC_DISK_SIZE]: if par in std_ispecs: std_ispecs[par] += "m" self._CheckNewStyleSpecsCall(exp_ipol, minmax_ispecs, std_ispecs, group_ipolicy, fill_all) def testFullISpecs(self): exp_minmax1 = [ { constants.ISPECS_MIN: { constants.ISPEC_MEM_SIZE: 512, constants.ISPEC_CPU_COUNT: 2, constants.ISPEC_DISK_COUNT: 2, constants.ISPEC_DISK_SIZE: 512, constants.ISPEC_NIC_COUNT: 2, constants.ISPEC_SPINDLE_USE: 2, }, constants.ISPECS_MAX: { constants.ISPEC_MEM_SIZE: 768*1024, constants.ISPEC_CPU_COUNT: 7, constants.ISPEC_DISK_COUNT: 6, constants.ISPEC_DISK_SIZE: 2048*1024, constants.ISPEC_NIC_COUNT: 3, constants.ISPEC_SPINDLE_USE: 3, }, }, ] exp_minmax2 = [ { constants.ISPECS_MIN: { constants.ISPEC_MEM_SIZE: 512, constants.ISPEC_CPU_COUNT: 2, constants.ISPEC_DISK_COUNT: 2, constants.ISPEC_DISK_SIZE: 512, constants.ISPEC_NIC_COUNT: 2, constants.ISPEC_SPINDLE_USE: 2, }, constants.ISPECS_MAX: { constants.ISPEC_MEM_SIZE: 768*1024, constants.ISPEC_CPU_COUNT: 7, constants.ISPEC_DISK_COUNT: 6, constants.ISPEC_DISK_SIZE: 2048*1024, constants.ISPEC_NIC_COUNT: 3, constants.ISPEC_SPINDLE_USE: 3, }, }, { constants.ISPECS_MIN: { constants.ISPEC_MEM_SIZE: 1024*1024, constants.ISPEC_CPU_COUNT: 3, constants.ISPEC_DISK_COUNT: 3, constants.ISPEC_DISK_SIZE: 256, constants.ISPEC_NIC_COUNT: 4, constants.ISPEC_SPINDLE_USE: 5, }, constants.ISPECS_MAX: { constants.ISPEC_MEM_SIZE: 2048*1024, constants.ISPEC_CPU_COUNT: 5, constants.ISPEC_DISK_COUNT: 5, constants.ISPEC_DISK_SIZE: 1024*1024, constants.ISPEC_NIC_COUNT: 5, constants.ISPEC_SPINDLE_USE: 7, }, }, ] exp_std1 = { constants.ISPEC_MEM_SIZE: 768*1024, constants.ISPEC_CPU_COUNT: 7, constants.ISPEC_DISK_COUNT: 6, constants.ISPEC_DISK_SIZE: 2048*1024, constants.ISPEC_NIC_COUNT: 3, constants.ISPEC_SPINDLE_USE: 1, } for fill_all in [False, True]: if fill_all: skel_ipolicy = constants.IPOLICY_DEFAULTS else: skel_ipolicy = {} self._TestFullISpecsInner(skel_ipolicy, None, exp_std1, False, fill_all) for exp_minmax in [exp_minmax1, exp_minmax2]: self._TestFullISpecsInner(skel_ipolicy, exp_minmax, exp_std1, False, fill_all) self._TestFullISpecsInner(skel_ipolicy, exp_minmax, None, False, fill_all) class TestPrintIPolicyCommand(unittest.TestCase): """Test case for cli.PrintIPolicyCommand""" _SPECS1 = { "par1": 42, "par2": "xyz", } _SPECS1_STR = "par1=42,par2=xyz" _SPECS2 = { "param": 10, "another_param": 101, } _SPECS2_STR = "another_param=101,param=10" _SPECS3 = { "par1": 1024, "param": "abc", } _SPECS3_STR = "par1=1024,param=abc" def _CheckPrintIPolicyCommand(self, ipolicy, isgroup, expected): buf = StringIO() cli.PrintIPolicyCommand(buf, ipolicy, isgroup) self.assertEqual(buf.getvalue(), expected) def testIgnoreStdForGroup(self): self._CheckPrintIPolicyCommand({"std": self._SPECS1}, True, "") def testIgnoreEmpty(self): policies = [ {}, {"std": {}}, {"minmax": []}, {"minmax": [{}]}, {"minmax": [{ "min": {}, "max": {}, }]}, {"minmax": [{ "min": self._SPECS1, "max": {}, }]}, ] for pol in policies: self._CheckPrintIPolicyCommand(pol, False, "") def testFullPolicies(self): cases = [ ({"std": self._SPECS1}, " %s %s" % (cli.IPOLICY_STD_SPECS_STR, self._SPECS1_STR)), ({"minmax": [{ "min": self._SPECS1, "max": self._SPECS2, }]}, " %s min:%s/max:%s" % (cli.IPOLICY_BOUNDS_SPECS_STR, self._SPECS1_STR, self._SPECS2_STR)), ({"minmax": [ { "min": self._SPECS1, "max": self._SPECS2, }, { "min": self._SPECS2, "max": self._SPECS3, }, ]}, " %s min:%s/max:%s//min:%s/max:%s" % (cli.IPOLICY_BOUNDS_SPECS_STR, self._SPECS1_STR, self._SPECS2_STR, self._SPECS2_STR, self._SPECS3_STR)), ] for (pol, exp) in cases: self._CheckPrintIPolicyCommand(pol, False, exp) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.errors_unittest.py0000744000000000000000000000567112244641676021522 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.backend""" import os import sys import unittest from ganeti import errors import testutils class TestErrors(testutils.GanetiTestCase): def testGetErrorClass(self): tdata = { "": None, ".": None, "-": None, "ECODE_INVAL": None, "NoErrorClassName": None, "GenericError": errors.GenericError, "ProgrammerError": errors.ProgrammerError, } for name, cls in tdata.items(): self.assert_(errors.GetErrorClass(name) is cls) def testEncodeException(self): self.assertEqualValues(errors.EncodeException(Exception("Foobar")), ("Exception", ("Foobar", ))) err = errors.GenericError(True, 100, "foo", ["x", "y"]) self.assertEqualValues(errors.EncodeException(err), ("GenericError", (True, 100, "foo", ["x", "y"]))) def testMaybeRaise(self): testvals = [None, 1, 2, 3, "Hello World", (1, ), (1, 2, 3), ("NoErrorClassName", []), ("NoErrorClassName", None), ("GenericError", [1, 2, 3], None), ("GenericError", 1)] # These shouldn't raise for i in testvals: errors.MaybeRaise(i) self.assertRaises(errors.GenericError, errors.MaybeRaise, ("GenericError", ["Hello"])) # Check error encoding for i in testvals: src = errors.GenericError(i) try: errors.MaybeRaise(errors.EncodeException(src)) except errors.GenericError, dst: self.assertEqual(src.args, dst.args) self.assertEqual(src.__class__, dst.__class__) else: self.fail("Exception %s not raised" % repr(src)) def testGetEncodedError(self): self.assertEqualValues(errors.GetEncodedError(["GenericError", ("Hello", 123, "World")]), (errors.GenericError, ("Hello", 123, "World"))) self.assertEqualValues(errors.GetEncodedError(["GenericError", []]), (errors.GenericError, ())) self.assertFalse(errors.GetEncodedError(["NoErrorClass", ("Hello", 123, "World")])) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.ovf_unittest.py0000744000000000000000000007411212244641676020774 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.ovf. """ import optparse import os import os.path import re import shutil import sys import tempfile import unittest try: import xml.etree.ElementTree as ET except ImportError: import elementtree.ElementTree as ET from ganeti import constants from ganeti import errors from ganeti import ovf from ganeti import utils from ganeti import pathutils import testutils OUTPUT_DIR = "newdir" GANETI_DISKS = { "disk_count": "1", "disk0_dump": "new_disk.raw", "disk0_size": "0", "disk0_ivname": "disk/0", } GANETI_NETWORKS = { "nic_count": "1", "nic0_mode": "bridged", "nic0_ip": "none", "nic0_mac": "aa:00:00:d8:2c:1e", "nic0_link": "xen-br0", "nic0_network": "auto", } GANETI_HYPERVISOR = { "hypervisor_name": "xen-pvm", "root-path": "/dev/sda", "kernel_args": "ro", } GANETI_OS = {"os_name": "lenny-image"} GANETI_BACKEND = { "vcpus": "1", "memory" : "2048", "auto_balance": "False", } GANETI_NAME = "ganeti-test-xen" GANETI_TEMPLATE = "plain" GANETI_TAGS = None GANETI_VERSION = "0" VIRTUALBOX_DISKS = { "disk_count": "2", "disk0_ivname": "disk/0", "disk0_dump": "new_disk.raw", "disk0_size": "0", "disk1_ivname": "disk/1", "disk1_dump": "second_disk.raw", "disk1_size": "0", } VIRTUALBOX_NETWORKS = { "nic_count": "1", "nic0_mode": "bridged", "nic0_ip": "none", "nic0_link": "auto", "nic0_mac": "auto", "nic0_network": "auto", } VIRTUALBOX_HYPERVISOR = {"hypervisor_name": "auto"} VIRTUALBOX_OS = {"os_name": None} VIRTUALBOX_BACKEND = { "vcpus": "1", "memory" : "2048", "auto_balance": "auto", } VIRTUALBOX_NAME = None VIRTUALBOX_TEMPLATE = None VIRTUALBOX_TAGS = None VIRTUALBOX_VERSION = None EMPTY_DISKS = {} EMPTY_NETWORKS = {} EMPTY_HYPERVISOR = {"hypervisor_name": "auto"} EMPTY_OS = {} EMPTY_BACKEND = { "vcpus": "auto", "memory" : "auto", "auto_balance": "auto", } EMPTY_NAME = None EMPTY_TEMPLATE = None EMPTY_TAGS = None EMPTY_VERSION = None CMDARGS_DISKS = { "disk_count": "1", "disk0_ivname": "disk/0", "disk0_dump": "disk0.raw", "disk0_size": "8", } CMDARGS_NETWORKS = { "nic0_link": "auto", "nic0_mode": "bridged", "nic0_ip": "none", "nic0_mac": "auto", "nic_count": "1", "nic0_network": "auto", } CMDARGS_HYPERVISOR = { "hypervisor_name": "xen-pvm" } CMDARGS_OS = {"os_name": "lenny-image"} CMDARGS_BACKEND = { "auto_balance": False, "vcpus": "1", "memory": "256", } CMDARGS_NAME = "test-instance" CMDARGS_TEMPLATE = "plain" CMDARGS_TAGS = "test-tag-1,test-tag-2" ARGS_EMPTY = { "output_dir": None, "nics": [], "disks": [], "name": "test-instance", "ova_package": False, "ext_usage": False, "disk_format": "cow", "compression": False, } ARGS_EXPORT_DIR = dict(ARGS_EMPTY, **{ "output_dir": OUTPUT_DIR, "name": None, "hypervisor": None, "os": None, "beparams": {}, "no_nics": False, "disk_template": None, "tags": None, }) ARGS_VBOX = dict(ARGS_EXPORT_DIR, **{ "output_dir": OUTPUT_DIR, "name": "test-instance", "os": "lenny-image", "hypervisor": ("xen-pvm", {}), "osparams":{}, "disks": [], }) ARGS_COMPLETE = dict(ARGS_VBOX, **{ "beparams": {"vcpus":"1", "memory":"256", "auto_balance": False}, "disks": [(0,{"size":"5mb"})], "nics": [("0",{"mode":"bridged"})], "disk_template": "plain", "tags": "test-tag-1,test-tag-2", }) ARGS_BROKEN = dict(ARGS_EXPORT_DIR , **{ "no_nics": True, "disk_template": "diskless", "name": "test-instance", "os": "lenny-image", "osparams": {}, }) EXP_ARGS_COMPRESSED = dict(ARGS_EXPORT_DIR, **{ "compression": True, }) EXP_DISKS_LIST = [ { "format": "vmdk", "compression": "gzip", "virt-size": 90000, "real-size": 203, "path": "new_disk.cow.gz", }, { "format": "cow", "virt-size": 15, "real-size": 15, "path": "new_disk.cow", }, ] EXP_NETWORKS_LIST = [ {"mac": "aa:00:00:d8:2c:1e", "ip":"None", "link":"br0", "mode":"routed", "network": "test"}, ] EXP_PARTIAL_GANETI_DICT = { "hypervisor": {"name": "xen-kvm"}, "os": {"name": "lenny-image"}, "auto_balance": "True", "version": "0", } EXP_GANETI_DICT = { "tags": None, "auto_balance": "False", "hypervisor": { "root-path": "/dev/sda", "name": "xen-pvm", "kernel_args": "ro" }, "version": "0", "disk_template": None, "os": {"name": "lenny-image"} } EXP_NAME ="xen-dev-i1" EXP_VCPUS = 1 EXP_MEMORY = 512 EXPORT_EMPTY = ("") EXPORT_DISKS_EMPTY = ("Virtual disk" " information") EXPORT_DISKS = ("" "Virtual disk information" "") EXPORT_NETWORKS_EMPTY = ("List of logical networks" "") EXPORT_NETWORKS = ("List of logical networks" "") EXPORT_GANETI_INCOMPLETE = ("0" "Truelenny-image" "xen-kvmroutedaa:00:00:d8:2c:1eNone" "br0test" "") EXPORT_GANETI = ("0False" "lenny-imagexen-pvm" "/dev/sdaroroutedaa:00:00:d8:2c:1eNonebr0" "test" "") EXPORT_SYSTEM = ("" "Virtual disk information" "" "" "List of logical networks" "A virtual machinexen-dev-i1" "Installed guest" " operating systemVirtual hardware requirements" "Virtual Hardware Family" "0xen-dev-i1ganeti-ovf1 virtual CPU(s)" "131" "" "byte * 2^20512MB of" " memory24512" "0scsi" "_controller03" "lsilogic6" "disk0ovf:/disk/disk043" "17disk1ovf:/" "disk/disk15317" "aa:00" ":00:d8:2c:1erouted0routed0610" "") def _GetArgs(args, with_name=False): options = optparse.Values() needed = args if with_name: needed["name"] = "test-instance" options._update_loose(needed) return options OPTS_EMPTY = _GetArgs(ARGS_EMPTY) OPTS_EXPORT_NO_NAME = _GetArgs(ARGS_EXPORT_DIR) OPTS_EXPORT = _GetArgs(ARGS_EXPORT_DIR, with_name=True) EXP_OPTS = OPTS_EXPORT_NO_NAME EXP_OPTS_COMPRESSED = _GetArgs(EXP_ARGS_COMPRESSED) OPTS_VBOX = _GetArgs(ARGS_VBOX) OPTS_COMPLETE = _GetArgs(ARGS_COMPLETE) OPTS_NONIC_NODISK = _GetArgs(ARGS_BROKEN) def _GetFullFilename(file_name): file_path = "%s/test/data/ovfdata/%s" % (testutils.GetSourceDir(), file_name) file_path = os.path.abspath(file_path) return file_path class BetterUnitTest(unittest.TestCase): def assertRaisesRegexp(self, exception, regexp_val, function, *args): try: function(*args) self.fail("Expected raising %s" % exception) except exception, err: regexp = re.compile(regexp_val) if re.search(regexp, str(err)) == None: self.fail("Expected matching '%s', got '%s'" % (regexp_val, str(err))) class TestOVFImporter(BetterUnitTest): def setUp(self): self.non_existing_file = _GetFullFilename("not_the_file.ovf") self.ganeti_ovf = _GetFullFilename("ganeti.ovf") self.virtualbox_ovf = _GetFullFilename("virtualbox.ovf") self.ova_package = _GetFullFilename("ova.ova") self.empty_ovf = _GetFullFilename("empty.ovf") self.wrong_extension = _GetFullFilename("wrong_extension.ovd") self.wrong_ova_archive = _GetFullFilename("wrong_ova.ova") self.no_ovf_in_ova = _GetFullFilename("no_ovf.ova") self.importer = None def tearDown(self): if self.importer: self.importer.Cleanup() del_dir = os.path.abspath(OUTPUT_DIR) try: shutil.rmtree(del_dir) except OSError: pass def testFileDoesNotExistError(self): self.assertRaisesRegexp(errors.OpPrereqError, "does not exist", ovf.OVFImporter, self.non_existing_file, None) def testWrongInputFileExtensionError(self): self.assertRaisesRegexp(errors.OpPrereqError, "Unknown file extension", ovf.OVFImporter, self.wrong_extension, None) def testOVAUnpackingDirectories(self): self.importer = ovf.OVFImporter(self.ova_package, OPTS_EMPTY) self.assertTrue(self.importer.input_dir != None) self.assertEquals(self.importer.output_dir , pathutils.EXPORT_DIR) self.assertTrue(self.importer.temp_dir != None) def testOVFUnpackingDirectories(self): self.importer = ovf.OVFImporter(self.virtualbox_ovf, OPTS_EMPTY) self.assertEquals(self.importer.input_dir , _GetFullFilename("")) self.assertEquals(self.importer.output_dir , pathutils.EXPORT_DIR) self.assertEquals(self.importer.temp_dir , None) def testOVFSetOutputDirDirectories(self): self.importer = ovf.OVFImporter(self.ganeti_ovf, OPTS_EXPORT) self.assertEquals(self.importer.input_dir , _GetFullFilename("")) self.assertTrue(OUTPUT_DIR in self.importer.output_dir) self.assertEquals(self.importer.temp_dir , None) def testWrongOVAArchiveError(self): self.assertRaisesRegexp(errors.OpPrereqError, "not a proper tar", ovf.OVFImporter, self.wrong_ova_archive, None) def testNoOVFFileInOVAPackageError(self): self.assertRaisesRegexp(errors.OpPrereqError, "No .ovf file", ovf.OVFImporter, self.no_ovf_in_ova, None) def testParseGanetiOvf(self): self.importer = ovf.OVFImporter(self.ganeti_ovf, OPTS_EXPORT_NO_NAME) self.importer.Parse() self.assertTrue("%s/ganeti-test-xen" % OUTPUT_DIR in self.importer.output_dir) self.assertEqual(self.importer.results_disk, GANETI_DISKS) self.assertEqual(self.importer.results_network, GANETI_NETWORKS) self.assertEqual(self.importer.results_hypervisor, GANETI_HYPERVISOR) self.assertEqual(self.importer.results_os, GANETI_OS) self.assertEqual(self.importer.results_backend, GANETI_BACKEND) self.assertEqual(self.importer.results_name, GANETI_NAME) self.assertEqual(self.importer.results_template, GANETI_TEMPLATE) self.assertEqual(self.importer.results_tags, GANETI_TAGS) self.assertEqual(self.importer.results_version, GANETI_VERSION) def testParseVirtualboxOvf(self): self.importer = ovf.OVFImporter(self.virtualbox_ovf, OPTS_VBOX) self.importer.Parse() self.assertTrue("%s/test-instance" % OUTPUT_DIR in self.importer.output_dir) self.assertEquals(self.importer.results_disk, VIRTUALBOX_DISKS) self.assertEquals(self.importer.results_network, VIRTUALBOX_NETWORKS) self.assertEquals(self.importer.results_hypervisor, CMDARGS_HYPERVISOR) self.assertEquals(self.importer.results_os, CMDARGS_OS) self.assertEquals(self.importer.results_backend, VIRTUALBOX_BACKEND) self.assertEquals(self.importer.results_name, CMDARGS_NAME) self.assertEquals(self.importer.results_template, VIRTUALBOX_TEMPLATE) self.assertEqual(self.importer.results_tags, VIRTUALBOX_TAGS) self.assertEqual(self.importer.results_version, constants.EXPORT_VERSION) def testParseEmptyOvf(self): self.importer = ovf.OVFImporter(self.empty_ovf, OPTS_COMPLETE) self.importer.Parse() self.assertTrue("%s/test-instance" % OUTPUT_DIR in self.importer.output_dir) self.assertEquals(self.importer.results_disk, CMDARGS_DISKS) self.assertEquals(self.importer.results_network, CMDARGS_NETWORKS) self.assertEquals(self.importer.results_hypervisor, CMDARGS_HYPERVISOR) self.assertEquals(self.importer.results_os, CMDARGS_OS) self.assertEquals(self.importer.results_backend, CMDARGS_BACKEND) self.assertEquals(self.importer.results_name, CMDARGS_NAME) self.assertEquals(self.importer.results_template, CMDARGS_TEMPLATE) self.assertEqual(self.importer.results_tags, CMDARGS_TAGS) self.assertEqual(self.importer.results_version, constants.EXPORT_VERSION) def testParseNameOptions(self): self.importer = ovf.OVFImporter(self.empty_ovf, OPTS_COMPLETE) results = self.importer._ParseNameOptions() self.assertEquals(results, CMDARGS_NAME) def testParseHypervisorOptions(self): self.importer = ovf.OVFImporter(self.empty_ovf, OPTS_COMPLETE) results = self.importer._ParseHypervisorOptions() self.assertEquals(results, CMDARGS_HYPERVISOR) def testParseOSOptions(self): self.importer = ovf.OVFImporter(self.empty_ovf, OPTS_COMPLETE) results = self.importer._ParseOSOptions() self.assertEquals(results, CMDARGS_OS) def testParseBackendOptions(self): self.importer = ovf.OVFImporter(self.empty_ovf, OPTS_COMPLETE) results = self.importer._ParseBackendOptions() self.assertEquals(results, CMDARGS_BACKEND) def testParseTags(self): self.importer = ovf.OVFImporter(self.empty_ovf, OPTS_COMPLETE) results = self.importer._ParseTags() self.assertEquals(results, CMDARGS_TAGS) def testParseNicOptions(self): self.importer = ovf.OVFImporter(self.empty_ovf, OPTS_COMPLETE) results = self.importer._ParseNicOptions() self.assertEquals(results, CMDARGS_NETWORKS) def testParseDiskOptionsFromGanetiOVF(self): self.importer = ovf.OVFImporter(self.ganeti_ovf, OPTS_EXPORT) os.mkdir(OUTPUT_DIR) results = self.importer._GetDiskInfo() self.assertEquals(results, GANETI_DISKS) def testParseTemplateOptions(self): self.importer = ovf.OVFImporter(self.empty_ovf, OPTS_COMPLETE) results = self.importer._ParseTemplateOptions() self.assertEquals(results, GANETI_TEMPLATE) def testParseDiskOptionsFromCmdLine(self): self.importer = ovf.OVFImporter(self.empty_ovf, OPTS_COMPLETE) os.mkdir(OUTPUT_DIR) results = self.importer._ParseDiskOptions() self.assertEquals(results, CMDARGS_DISKS) def testGetDiskFormat(self): self.importer = ovf.OVFImporter(self.ganeti_ovf, OPTS_EXPORT) disks_list = self.importer.ovf_reader.GetDisksNames() results = [self.importer._GetDiskQemuInfo("%s/%s" % (self.importer.input_dir, path), "file format: (\S+)") for (path, _) in disks_list] self.assertEqual(results, ["vmdk"]) def testNoInstanceNameOVF(self): self.importer = ovf.OVFImporter(self.empty_ovf, OPTS_EXPORT_NO_NAME) self.assertRaisesRegexp(errors.OpPrereqError, "Name of instance", self.importer.Parse) def testErrorNoOSNameOVF(self): self.importer = ovf.OVFImporter(self.virtualbox_ovf, OPTS_EXPORT) self.assertRaisesRegexp(errors.OpPrereqError, "OS name", self.importer.Parse) def testErrorNoDiskAndNoNetwork(self): self.importer = ovf.OVFImporter(self.empty_ovf, OPTS_NONIC_NODISK) self.assertRaisesRegexp(errors.OpPrereqError, "Either disk specification or network" " description", self.importer.Parse) class TestOVFExporter(BetterUnitTest): def setUp(self): self.exporter = None self.wrong_config_file = _GetFullFilename("wrong_config.ini") self.unsafe_path_to_disk = _GetFullFilename("unsafe_path.ini") self.disk_image_not_exist = _GetFullFilename("no_disk.ini") self.empty_config = _GetFullFilename("empty.ini") self.standard_export = _GetFullFilename("config.ini") self.wrong_network_mode = self.disk_image_not_exist self.no_memory = self.disk_image_not_exist self.no_vcpus = self.disk_image_not_exist self.no_os = _GetFullFilename("no_os.ini") self.no_hypervisor = self.disk_image_not_exist def tearDown(self): if self.exporter: self.exporter.Cleanup() del_dir = os.path.abspath(OUTPUT_DIR) try: shutil.rmtree(del_dir) except OSError: pass def testErrorWrongConfigFile(self): self.assertRaisesRegexp(errors.OpPrereqError, "Error when trying to read", ovf.OVFExporter, self.wrong_config_file, EXP_OPTS) def testErrorPathToTheDiskIncorrect(self): self.exporter = ovf.OVFExporter(self.unsafe_path_to_disk, EXP_OPTS) self.assertRaisesRegexp(errors.OpPrereqError, "contains a directory name", self.exporter._ParseDisks) def testErrorDiskImageNotExist(self): self.exporter = ovf.OVFExporter(self.disk_image_not_exist, EXP_OPTS) self.assertRaisesRegexp(errors.OpPrereqError, "Disk image does not exist", self.exporter._ParseDisks) def testParseNetworks(self): self.exporter = ovf.OVFExporter(self.standard_export, EXP_OPTS) results = self.exporter._ParseNetworks() self.assertEqual(results, EXP_NETWORKS_LIST) def testErrorWrongNetworkMode(self): self.exporter = ovf.OVFExporter(self.wrong_network_mode, EXP_OPTS) self.assertRaisesRegexp(errors.OpPrereqError, "Network mode nic not recognized", self.exporter._ParseNetworks) def testParseVCPusMem(self): self.exporter = ovf.OVFExporter(self.standard_export, EXP_OPTS) vcpus = self.exporter._ParseVCPUs() memory = self.exporter._ParseMemory() self.assertEqual(vcpus, EXP_VCPUS) self.assertEqual(memory, EXP_MEMORY) def testErrorNoVCPUs(self): self.exporter = ovf.OVFExporter(self.no_vcpus, EXP_OPTS) self.assertRaisesRegexp(errors.OpPrereqError, "No CPU information found", self.exporter._ParseVCPUs) def testErrorNoMemory(self): self.exporter = ovf.OVFExporter(self.no_memory, EXP_OPTS) self.assertRaisesRegexp(errors.OpPrereqError, "No memory information found", self.exporter._ParseMemory) def testParseGaneti(self): self.exporter = ovf.OVFExporter(self.standard_export, EXP_OPTS) results = self.exporter._ParseGaneti() self.assertEqual(results, EXP_GANETI_DICT) def testErrorNoHypervisor(self): self.exporter = ovf.OVFExporter(self.no_hypervisor, EXP_OPTS) self.assertRaisesRegexp(errors.OpPrereqError, "No hypervisor information found", self.exporter._ParseGaneti) def testErrorNoOS(self): self.exporter = ovf.OVFExporter(self.no_os, EXP_OPTS) self.assertRaisesRegexp(errors.OpPrereqError, "No operating system information found", self.exporter._ParseGaneti) def testErrorParseNoInstanceName(self): self.exporter = ovf.OVFExporter(self.empty_config, EXP_OPTS) self.assertRaisesRegexp(errors.OpPrereqError, "No instance name found", self.exporter.Parse) class TestOVFReader(BetterUnitTest): def setUp(self): self.wrong_xml_file = _GetFullFilename("wrong_xml.ovf") self.ganeti_ovf = _GetFullFilename("ganeti.ovf") self.virtualbox_ovf = _GetFullFilename("virtualbox.ovf") self.corrupted_ovf = _GetFullFilename("corrupted_resources.ovf") self.wrong_manifest_ovf = _GetFullFilename("wrong_manifest.ovf") self.no_disk_in_ref_ovf = _GetFullFilename("no_disk_in_ref.ovf") self.empty_ovf = _GetFullFilename("empty.ovf") self.compressed_disk = _GetFullFilename("gzip_disk.ovf") def tearDown(self): pass def testXMLParsingError(self): self.assertRaisesRegexp(errors.OpPrereqError, "Error while reading .ovf", ovf.OVFReader, self.wrong_xml_file) def testFileInResourcesDoesNotExistError(self): self.assertRaisesRegexp(errors.OpPrereqError, "does not exist", ovf.OVFReader, self.corrupted_ovf) def testWrongManifestChecksumError(self): reader = ovf.OVFReader(self.wrong_manifest_ovf) self.assertRaisesRegexp(errors.OpPrereqError, "does not match the value in manifest file", reader.VerifyManifest) def testGoodManifestChecksum(self): reader = ovf.OVFReader(self.ganeti_ovf) self.assertEqual(reader.VerifyManifest(), None) def testGetDisksNamesOVFCorruptedError(self): reader = ovf.OVFReader(self.no_disk_in_ref_ovf) self.assertRaisesRegexp(errors.OpPrereqError, "not found in references", reader.GetDisksNames) def testGetDisksNamesVirtualbox(self): reader = ovf.OVFReader(self.virtualbox_ovf) disk_names = reader.GetDisksNames() expected_names = [ ("new_disk.vmdk", None) , ("second_disk.vmdk", None), ] self.assertEqual(sorted(disk_names), sorted(expected_names)) def testGetDisksNamesEmpty(self): reader = ovf.OVFReader(self.empty_ovf) disk_names = reader.GetDisksNames() self.assertEqual(disk_names, []) def testGetDisksNamesCompressed(self): reader = ovf.OVFReader(self.compressed_disk) disk_names = reader.GetDisksNames() self.assertEqual(disk_names, [("compr_disk.vmdk.gz", "gzip")]) def testGetNetworkDataGaneti(self): reader = ovf.OVFReader(self.ganeti_ovf) networks = reader.GetNetworkData() self.assertEqual(networks, GANETI_NETWORKS) def testGetNetworkDataVirtualbox(self): reader = ovf.OVFReader(self.virtualbox_ovf) networks = reader.GetNetworkData() self.assertEqual(networks, VIRTUALBOX_NETWORKS) def testGetNetworkDataEmpty(self): reader = ovf.OVFReader(self.empty_ovf) networks = reader.GetNetworkData() self.assertEqual(networks, EMPTY_NETWORKS) def testGetHypervisorDataGaneti(self): reader = ovf.OVFReader(self.ganeti_ovf) hypervisor = reader.GetHypervisorData() self.assertEqual(hypervisor, GANETI_HYPERVISOR) def testGetHypervisorDataEmptyOvf(self): reader = ovf.OVFReader(self.empty_ovf) hypervisor = reader.GetHypervisorData() self.assertEqual(hypervisor, EMPTY_HYPERVISOR) def testGetOSDataGaneti(self): reader = ovf.OVFReader(self.ganeti_ovf) osys = reader.GetOSData() self.assertEqual(osys, GANETI_OS) def testGetOSDataEmptyOvf(self): reader = ovf.OVFReader(self.empty_ovf) osys = reader.GetOSData() self.assertEqual(osys, EMPTY_OS) def testGetBackendDataGaneti(self): reader = ovf.OVFReader(self.ganeti_ovf) backend = reader.GetBackendData() self.assertEqual(backend, GANETI_BACKEND) def testGetBackendDataVirtualbox(self): reader = ovf.OVFReader(self.virtualbox_ovf) backend = reader.GetBackendData() self.assertEqual(backend, VIRTUALBOX_BACKEND) def testGetBackendDataEmptyOvf(self): reader = ovf.OVFReader(self.empty_ovf) backend = reader.GetBackendData() self.assertEqual(backend, EMPTY_BACKEND) def testGetInstanceNameGaneti(self): reader = ovf.OVFReader(self.ganeti_ovf) name = reader.GetInstanceName() self.assertEqual(name, GANETI_NAME) def testGetInstanceNameDataEmptyOvf(self): reader = ovf.OVFReader(self.empty_ovf) name = reader.GetInstanceName() self.assertEqual(name, EMPTY_NAME) def testGetDiskTemplateGaneti(self): reader = ovf.OVFReader(self.ganeti_ovf) name = reader.GetDiskTemplate() self.assertEqual(name, GANETI_TEMPLATE) def testGetDiskTemplateEmpty(self): reader = ovf.OVFReader(self.empty_ovf) name = reader.GetDiskTemplate() self.assertEqual(name, EMPTY_TEMPLATE) def testGetTagsGaneti(self): reader = ovf.OVFReader(self.ganeti_ovf) tags = reader.GetTagsData() self.assertEqual(tags, GANETI_TAGS) def testGetTagsEmpty(self): reader = ovf.OVFReader(self.empty_ovf) tags = reader.GetTagsData() self.assertEqual(tags, EMPTY_TAGS) def testGetVersionGaneti(self): reader = ovf.OVFReader(self.ganeti_ovf) version = reader.GetVersionData() self.assertEqual(version, GANETI_VERSION) def testGetVersionEmpty(self): reader = ovf.OVFReader(self.empty_ovf) version = reader.GetVersionData() self.assertEqual(version, EMPTY_VERSION) class TestOVFWriter(BetterUnitTest): def setUp(self): self.writer = ovf.OVFWriter(True) def tearDown(self): pass def testOVFWriterInit(self): result = ET.tostring(self.writer.tree) self.assertTrue(EXPORT_EMPTY in result) def testSaveDisksDataEmpty(self): self.writer.SaveDisksData([]) result = ET.tostring(self.writer.tree) self.assertTrue(EXPORT_DISKS_EMPTY in result) def testSaveDisksData(self): self.writer.SaveDisksData(EXP_DISKS_LIST) result = ET.tostring(self.writer.tree) self.assertTrue(EXPORT_DISKS in result) def testSaveNetworkDataEmpty(self): self.writer.SaveNetworksData([]) result = ET.tostring(self.writer.tree) self.assertTrue(EXPORT_NETWORKS_EMPTY in result) def testSaveNetworksData(self): self.writer.SaveNetworksData(EXP_NETWORKS_LIST) result = ET.tostring(self.writer.tree) self.assertTrue(EXPORT_NETWORKS in result) def testSaveGanetiDataIncomplete(self): self.writer.SaveGanetiData(EXP_PARTIAL_GANETI_DICT, EXP_NETWORKS_LIST) result = ET.tostring(self.writer.tree) self.assertTrue(EXPORT_GANETI_INCOMPLETE in result) def testSaveGanetiDataComplete(self): self.writer.SaveGanetiData(EXP_GANETI_DICT, EXP_NETWORKS_LIST) result = ET.tostring(self.writer.tree) self.assertTrue(EXPORT_GANETI in result) def testSaveVirtualSystem(self): self.writer.SaveDisksData(EXP_DISKS_LIST) self.writer.SaveNetworksData(EXP_NETWORKS_LIST) self.writer.SaveVirtualSystemData(EXP_NAME, EXP_VCPUS, EXP_MEMORY) result = ET.tostring(self.writer.tree) self.assertTrue(EXPORT_SYSTEM in result) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.confd.client_unittest.py0000744000000000000000000001617512244641676022555 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2009 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the confd client module""" import socket import unittest from ganeti import confd from ganeti import constants from ganeti import errors import ganeti.confd.client import testutils class ResettableMock(object): def __init__(self, *args, **kwargs): self.Reset() def Reset(self): pass class MockLogger(ResettableMock): def Reset(self): self.debug_count = 0 self.warn_count = 0 self.error_count = 0 def debug(string): self.debug_count += 1 def warning(string): self.warn_count += 1 def error(string): self.error_count += 1 class MockConfdAsyncUDPClient(ResettableMock): def Reset(self): self.send_count = 0 self.last_address = '' self.last_port = -1 self.last_sent = '' def enqueue_send(self, address, port, payload): self.send_count += 1 self.last_payload = payload self.last_port = port self.last_address = address class MockCallback(ResettableMock): def Reset(self): self.call_count = 0 self.last_up = None def __call__(self, up): """Callback @type up: L{ConfdUpcallPayload} @param up: upper callback """ self.call_count += 1 self.last_up = up class MockTime(ResettableMock): def Reset(self): self.mytime = 1254213006.5175071 def time(self): return self.mytime def increase(self, delta): self.mytime += delta class _BaseClientTest: """Base class for client tests""" mc_list = None new_peers = None family = None def setUp(self): self.mock_time = MockTime() confd.client.time = self.mock_time confd.client.ConfdAsyncUDPClient = MockConfdAsyncUDPClient self.logger = MockLogger() hmac_key = "mykeydata" self.callback = MockCallback() self.client = confd.client.ConfdClient(hmac_key, self.mc_list, self.callback, logger=self.logger) def testRequest(self): req1 = confd.client.ConfdClientRequest(type=constants.CONFD_REQ_PING) req2 = confd.client.ConfdClientRequest(type=constants.CONFD_REQ_PING) self.assertNotEqual(req1.rsalt, req2.rsalt) self.assertEqual(req1.protocol, constants.CONFD_PROTOCOL_VERSION) self.assertEqual(req2.protocol, constants.CONFD_PROTOCOL_VERSION) self.assertRaises(errors.ConfdClientError, confd.client.ConfdClientRequest, type=-33) def testClientSend(self): req = confd.client.ConfdClientRequest(type=constants.CONFD_REQ_PING) self.client.SendRequest(req) # Cannot send the same request twice self.assertRaises(errors.ConfdClientError, self.client.SendRequest, req) req2 = confd.client.ConfdClientRequest(type=constants.CONFD_REQ_PING) # Coverage is too big self.assertRaises(errors.ConfdClientError, self.client.SendRequest, req2, coverage=15) self.assertEquals(self.client._socket.send_count, constants.CONFD_DEFAULT_REQ_COVERAGE) # Send with max coverage self.client.SendRequest(req2, coverage=-1) self.assertEquals(self.client._socket.send_count, constants.CONFD_DEFAULT_REQ_COVERAGE + len(self.mc_list)) self.assert_(self.client._socket.last_address in self.mc_list) def testClientExpire(self): req = confd.client.ConfdClientRequest(type=constants.CONFD_REQ_PING) self.client.SendRequest(req) # Make a couple of seconds pass ;) self.mock_time.increase(2) # Now sending the second request req2 = confd.client.ConfdClientRequest(type=constants.CONFD_REQ_PING) self.client.SendRequest(req2) self.mock_time.increase(constants.CONFD_CLIENT_EXPIRE_TIMEOUT - 1) # First request should be expired, second one should not self.client.ExpireRequests() self.assertEquals(self.callback.call_count, 1) self.assertEquals(self.callback.last_up.type, confd.client.UPCALL_EXPIRE) self.assertEquals(self.callback.last_up.salt, req.rsalt) self.assertEquals(self.callback.last_up.orig_request, req) self.mock_time.increase(3) self.assertEquals(self.callback.call_count, 1) self.client.ExpireRequests() self.assertEquals(self.callback.call_count, 2) self.assertEquals(self.callback.last_up.type, confd.client.UPCALL_EXPIRE) self.assertEquals(self.callback.last_up.salt, req2.rsalt) self.assertEquals(self.callback.last_up.orig_request, req2) def testClientCascadeExpire(self): req = confd.client.ConfdClientRequest(type=constants.CONFD_REQ_PING) self.client.SendRequest(req) self.mock_time.increase(constants.CONFD_CLIENT_EXPIRE_TIMEOUT +1) req2 = confd.client.ConfdClientRequest(type=constants.CONFD_REQ_PING) self.client.SendRequest(req2) self.assertEquals(self.callback.call_count, 1) def testUpdatePeerList(self): self.client.UpdatePeerList(self.new_peers) self.assertEquals(self.client._peers, self.new_peers) req = confd.client.ConfdClientRequest(type=constants.CONFD_REQ_PING) self.client.SendRequest(req) self.assertEquals(self.client._socket.send_count, len(self.new_peers)) self.assert_(self.client._socket.last_address in self.new_peers) def testSetPeersFamily(self): self.client._SetPeersAddressFamily() self.assertEquals(self.client._family, self.family) mixed_peers = ["192.0.2.99", "2001:db8:beef::13"] self.client.UpdatePeerList(mixed_peers) self.assertRaises(errors.ConfdClientError, self.client._SetPeersAddressFamily) class TestIP4Client(unittest.TestCase, _BaseClientTest): """Client tests""" mc_list = ["192.0.2.1", "192.0.2.2", "192.0.2.3", "192.0.2.4", "192.0.2.5", "192.0.2.6", "192.0.2.7", "192.0.2.8", "192.0.2.9", ] new_peers = ["198.51.100.1", "198.51.100.2"] family = socket.AF_INET def setUp(self): unittest.TestCase.setUp(self) _BaseClientTest.setUp(self) class TestIP6Client(unittest.TestCase, _BaseClientTest): """Client tests""" mc_list = ["2001:db8::1", "2001:db8::2", "2001:db8::3", "2001:db8::4", "2001:db8::5", "2001:db8::6", "2001:db8::7", "2001:db8::8", "2001:db8::9", ] new_peers = ["2001:db8:beef::11", "2001:db8:beef::12"] family = socket.AF_INET6 def setUp(self): unittest.TestCase.setUp(self) _BaseClientTest.setUp(self) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.client.gnt_job_unittest.py0000744000000000000000000001202212244641676023071 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.client.gnt_job""" import unittest import optparse from ganeti.client import gnt_job from ganeti import utils from ganeti import errors from ganeti import query from ganeti import qlang from ganeti import objects from ganeti import compat from ganeti import constants import testutils class _ClientForCancelJob: def __init__(self, cancel_cb, query_cb): self.cancelled = [] self._cancel_cb = cancel_cb self._query_cb = query_cb def CancelJob(self, job_id): self.cancelled.append(job_id) return self._cancel_cb(job_id) def Query(self, kind, selected, qfilter): assert kind == constants.QR_JOB assert selected == ["id", "status", "summary"] fields = query.GetAllFields(query._GetQueryFields(query.JOB_FIELDS, selected)) return objects.QueryResponse(data=self._query_cb(qfilter), fields=fields) class TestCancelJob(unittest.TestCase): def setUp(self): unittest.TestCase.setUp(self) self.stdout = [] def _ToStdout(self, line): self.stdout.append(line) def _Ask(self, answer, question): self.assertTrue(question.endswith("?")) return answer def testStatusFilterAndArguments(self): opts = optparse.Values(dict(status_filter=frozenset())) try: gnt_job.CancelJobs(opts, ["a"], cl=NotImplemented, _stdout_fn=NotImplemented, _ask_fn=NotImplemented) except errors.OpPrereqError, err: self.assertEqual(err.args[1], errors.ECODE_INVAL) else: self.fail("Did not raise exception") def _TestArguments(self, force): opts = optparse.Values(dict(status_filter=None, force=force)) def _CancelCb(job_id): self.assertTrue(job_id in ("24185", "3252")) return (True, "%s will be cancelled" % job_id) cl = _ClientForCancelJob(_CancelCb, NotImplemented) self.assertEqual(gnt_job.CancelJobs(opts, ["24185", "3252"], cl=cl, _stdout_fn=self._ToStdout, _ask_fn=NotImplemented), constants.EXIT_SUCCESS) self.assertEqual(cl.cancelled, ["24185", "3252"]) self.assertEqual(self.stdout, [ "24185 will be cancelled", "3252 will be cancelled", ]) def testArgumentsWithForce(self): self._TestArguments(True) def testArgumentsNoForce(self): self._TestArguments(False) def testArgumentsWithError(self): opts = optparse.Values(dict(status_filter=None, force=True)) def _CancelCb(job_id): if job_id == "10788": return (False, "error %s" % job_id) else: return (True, "%s will be cancelled" % job_id) cl = _ClientForCancelJob(_CancelCb, NotImplemented) self.assertEqual(gnt_job.CancelJobs(opts, ["203", "10788", "30801"], cl=cl, _stdout_fn=self._ToStdout, _ask_fn=NotImplemented), constants.EXIT_FAILURE) self.assertEqual(cl.cancelled, ["203", "10788", "30801"]) self.assertEqual(self.stdout, [ "203 will be cancelled", "error 10788", "30801 will be cancelled", ]) def testFilterPending(self): opts = optparse.Values(dict(status_filter=constants.JOBS_PENDING, force=False)) def _Query(qfilter): # Need to sort as constants.JOBS_PENDING has no stable order assert isinstance(constants.JOBS_PENDING, frozenset) self.assertEqual(sorted(qfilter), sorted(qlang.MakeSimpleFilter("status", constants.JOBS_PENDING))) return [ [(constants.RS_UNAVAIL, None), (constants.RS_UNAVAIL, None), (constants.RS_UNAVAIL, None)], [(constants.RS_NORMAL, 32532), (constants.RS_NORMAL, constants.JOB_STATUS_QUEUED), (constants.RS_NORMAL, ["op1", "op2", "op3"])], ] cl = _ClientForCancelJob(NotImplemented, _Query) result = gnt_job.CancelJobs(opts, [], cl=cl, _stdout_fn=self._ToStdout, _ask_fn=compat.partial(self._Ask, False)) self.assertEqual(result, constants.EXIT_CONFIRMATION) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.luxi_unittest.py0000744000000000000000000002170412271422343021146 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the luxi module""" import unittest from ganeti import constants from ganeti import errors from ganeti import luxi from ganeti import serializer import testutils class TestLuxiParsing(testutils.GanetiTestCase): def testParseRequest(self): msg = serializer.DumpJson({ luxi.KEY_METHOD: "foo", luxi.KEY_ARGS: ("bar", "baz", 123), }) self.assertEqualValues(luxi.ParseRequest(msg), ("foo", ["bar", "baz", 123], None)) self.assertRaises(luxi.ProtocolError, luxi.ParseRequest, "this\"is {invalid, ]json data") # No dict self.assertRaises(luxi.ProtocolError, luxi.ParseRequest, serializer.DumpJson(123)) # Empty dict self.assertRaises(luxi.ProtocolError, luxi.ParseRequest, serializer.DumpJson({ })) # No arguments self.assertRaises(luxi.ProtocolError, luxi.ParseRequest, serializer.DumpJson({ luxi.KEY_METHOD: "foo", })) # No method self.assertRaises(luxi.ProtocolError, luxi.ParseRequest, serializer.DumpJson({ luxi.KEY_ARGS: [], })) # No method or arguments self.assertRaises(luxi.ProtocolError, luxi.ParseRequest, serializer.DumpJson({ luxi.KEY_VERSION: 1, })) def testParseRequestWithVersion(self): msg = serializer.DumpJson({ luxi.KEY_METHOD: "version", luxi.KEY_ARGS: (["some"], "args", 0, "here"), luxi.KEY_VERSION: 20100101, }) self.assertEqualValues(luxi.ParseRequest(msg), ("version", [["some"], "args", 0, "here"], 20100101)) def testParseResponse(self): msg = serializer.DumpJson({ luxi.KEY_SUCCESS: True, luxi.KEY_RESULT: None, }) self.assertEqual(luxi.ParseResponse(msg), (True, None, None)) self.assertRaises(luxi.ProtocolError, luxi.ParseResponse, "this\"is {invalid, ]json data") # No dict self.assertRaises(luxi.ProtocolError, luxi.ParseResponse, serializer.DumpJson(123)) # Empty dict self.assertRaises(luxi.ProtocolError, luxi.ParseResponse, serializer.DumpJson({ })) # No success self.assertRaises(luxi.ProtocolError, luxi.ParseResponse, serializer.DumpJson({ luxi.KEY_RESULT: True, })) # No result self.assertRaises(luxi.ProtocolError, luxi.ParseResponse, serializer.DumpJson({ luxi.KEY_SUCCESS: True, })) # No result or success self.assertRaises(luxi.ProtocolError, luxi.ParseResponse, serializer.DumpJson({ luxi.KEY_VERSION: 123, })) def testParseResponseWithVersion(self): msg = serializer.DumpJson({ luxi.KEY_SUCCESS: True, luxi.KEY_RESULT: "Hello World", luxi.KEY_VERSION: 19991234, }) self.assertEqual(luxi.ParseResponse(msg), (True, "Hello World", 19991234)) def testFormatResponse(self): for success, result in [(False, "error"), (True, "abc"), (True, { "a": 123, "b": None, })]: msg = luxi.FormatResponse(success, result) msgdata = serializer.LoadJson(msg) self.assert_(luxi.KEY_SUCCESS in msgdata) self.assert_(luxi.KEY_RESULT in msgdata) self.assert_(luxi.KEY_VERSION not in msgdata) self.assertEqualValues(msgdata, { luxi.KEY_SUCCESS: success, luxi.KEY_RESULT: result, }) def testFormatResponseWithVersion(self): for success, result, version in [(False, "error", 123), (True, "abc", 999), (True, { "a": 123, "b": None, }, 2010)]: msg = luxi.FormatResponse(success, result, version=version) msgdata = serializer.LoadJson(msg) self.assert_(luxi.KEY_SUCCESS in msgdata) self.assert_(luxi.KEY_RESULT in msgdata) self.assert_(luxi.KEY_VERSION in msgdata) self.assertEqualValues(msgdata, { luxi.KEY_SUCCESS: success, luxi.KEY_RESULT: result, luxi.KEY_VERSION: version, }) def testFormatRequest(self): for method, args in [("a", []), ("b", [1, 2, 3])]: msg = luxi.FormatRequest(method, args) msgdata = serializer.LoadJson(msg) self.assert_(luxi.KEY_METHOD in msgdata) self.assert_(luxi.KEY_ARGS in msgdata) self.assert_(luxi.KEY_VERSION not in msgdata) self.assertEqualValues(msgdata, { luxi.KEY_METHOD: method, luxi.KEY_ARGS: args, }) def testFormatRequestWithVersion(self): for method, args, version in [("fn1", [], 123), ("fn2", [1, 2, 3], 999)]: msg = luxi.FormatRequest(method, args, version=version) msgdata = serializer.LoadJson(msg) self.assert_(luxi.KEY_METHOD in msgdata) self.assert_(luxi.KEY_ARGS in msgdata) self.assert_(luxi.KEY_VERSION in msgdata) self.assertEqualValues(msgdata, { luxi.KEY_METHOD: method, luxi.KEY_ARGS: args, luxi.KEY_VERSION: version, }) class TestCallLuxiMethod(unittest.TestCase): MY_LUXI_VERSION = 1234 assert constants.LUXI_VERSION != MY_LUXI_VERSION def testSuccessNoVersion(self): def _Cb(msg): (method, args, version) = luxi.ParseRequest(msg) self.assertEqual(method, "fn1") self.assertEqual(args, "Hello World") return luxi.FormatResponse(True, "x") result = luxi.CallLuxiMethod(_Cb, "fn1", "Hello World") def testServerVersionOnly(self): def _Cb(msg): (method, args, version) = luxi.ParseRequest(msg) self.assertEqual(method, "fn1") self.assertEqual(args, "Hello World") return luxi.FormatResponse(True, "x", version=self.MY_LUXI_VERSION) self.assertRaises(errors.LuxiError, luxi.CallLuxiMethod, _Cb, "fn1", "Hello World") def testWithVersion(self): def _Cb(msg): (method, args, version) = luxi.ParseRequest(msg) self.assertEqual(method, "fn99") self.assertEqual(args, "xyz") return luxi.FormatResponse(True, "y", version=self.MY_LUXI_VERSION) self.assertEqual("y", luxi.CallLuxiMethod(_Cb, "fn99", "xyz", version=self.MY_LUXI_VERSION)) def testVersionMismatch(self): def _Cb(msg): (method, args, version) = luxi.ParseRequest(msg) self.assertEqual(method, "fn5") self.assertEqual(args, "xyz") return luxi.FormatResponse(True, "F", version=self.MY_LUXI_VERSION * 2) self.assertRaises(errors.LuxiError, luxi.CallLuxiMethod, _Cb, "fn5", "xyz", version=self.MY_LUXI_VERSION) def testError(self): def _Cb(msg): (method, args, version) = luxi.ParseRequest(msg) self.assertEqual(method, "fnErr") self.assertEqual(args, []) err = errors.OpPrereqError("Test") return luxi.FormatResponse(False, errors.EncodeException(err)) self.assertRaises(errors.OpPrereqError, luxi.CallLuxiMethod, _Cb, "fnErr", []) def testErrorWithVersionMismatch(self): def _Cb(msg): (method, args, version) = luxi.ParseRequest(msg) self.assertEqual(method, "fnErr") self.assertEqual(args, []) err = errors.OpPrereqError("TestVer") return luxi.FormatResponse(False, errors.EncodeException(err), version=self.MY_LUXI_VERSION * 2) self.assertRaises(errors.LuxiError, luxi.CallLuxiMethod, _Cb, "fnErr", [], version=self.MY_LUXI_VERSION) def testErrorWithVersion(self): def _Cb(msg): (method, args, version) = luxi.ParseRequest(msg) self.assertEqual(method, "fn9") self.assertEqual(args, []) err = errors.OpPrereqError("TestVer") return luxi.FormatResponse(False, errors.EncodeException(err), version=self.MY_LUXI_VERSION) self.assertRaises(errors.OpPrereqError, luxi.CallLuxiMethod, _Cb, "fn9", [], version=self.MY_LUXI_VERSION) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.utils.text_unittest.py0000744000000000000000000005402612271422343022313 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.utils.text""" import re import string import time import unittest import os from cStringIO import StringIO from ganeti import constants from ganeti import utils from ganeti import errors import testutils class TestMatchNameComponent(unittest.TestCase): """Test case for the MatchNameComponent function""" def testEmptyList(self): """Test that there is no match against an empty list""" self.failUnlessEqual(utils.MatchNameComponent("", []), None) self.failUnlessEqual(utils.MatchNameComponent("test", []), None) def testSingleMatch(self): """Test that a single match is performed correctly""" mlist = ["test1.example.com", "test2.example.com", "test3.example.com"] for key in "test2", "test2.example", "test2.example.com": self.failUnlessEqual(utils.MatchNameComponent(key, mlist), mlist[1]) def testMultipleMatches(self): """Test that a multiple match is returned as None""" mlist = ["test1.example.com", "test1.example.org", "test1.example.net"] for key in "test1", "test1.example": self.failUnlessEqual(utils.MatchNameComponent(key, mlist), None) def testFullMatch(self): """Test that a full match is returned correctly""" key1 = "test1" key2 = "test1.example" mlist = [key2, key2 + ".com"] self.failUnlessEqual(utils.MatchNameComponent(key1, mlist), None) self.failUnlessEqual(utils.MatchNameComponent(key2, mlist), key2) def testCaseInsensitivePartialMatch(self): """Test for the case_insensitive keyword""" mlist = ["test1.example.com", "test2.example.net"] self.assertEqual(utils.MatchNameComponent("test2", mlist, case_sensitive=False), "test2.example.net") self.assertEqual(utils.MatchNameComponent("Test2", mlist, case_sensitive=False), "test2.example.net") self.assertEqual(utils.MatchNameComponent("teSt2", mlist, case_sensitive=False), "test2.example.net") self.assertEqual(utils.MatchNameComponent("TeSt2", mlist, case_sensitive=False), "test2.example.net") def testCaseInsensitiveFullMatch(self): mlist = ["ts1.ex", "ts1.ex.org", "ts2.ex", "Ts2.ex"] # Between the two ts1 a full string match non-case insensitive should work self.assertEqual(utils.MatchNameComponent("Ts1", mlist, case_sensitive=False), None) self.assertEqual(utils.MatchNameComponent("Ts1.ex", mlist, case_sensitive=False), "ts1.ex") self.assertEqual(utils.MatchNameComponent("ts1.ex", mlist, case_sensitive=False), "ts1.ex") # Between the two ts2 only case differs, so only case-match works self.assertEqual(utils.MatchNameComponent("ts2.ex", mlist, case_sensitive=False), "ts2.ex") self.assertEqual(utils.MatchNameComponent("Ts2.ex", mlist, case_sensitive=False), "Ts2.ex") self.assertEqual(utils.MatchNameComponent("TS2.ex", mlist, case_sensitive=False), None) class TestDnsNameGlobPattern(unittest.TestCase): def setUp(self): self.names = [ "node1.example.com", "node2-0.example.com", "node2-1.example.com", "node1.example.net", "web1.example.com", "web2.example.com", "sub.site.example.com", ] def _Test(self, pattern): re_pat = utils.DnsNameGlobPattern(pattern) return filter(re.compile(re_pat).match, self.names) def test(self): for pattern in ["xyz", "node", " ", "example.net", "x*.example.*", "x*.example.com"]: self.assertEqual(self._Test(pattern), []) for pattern in ["*", "???*"]: self.assertEqual(self._Test(pattern), self.names) self.assertEqual(self._Test("node1.*.net"), ["node1.example.net"]) self.assertEqual(self._Test("*.example.net"), ["node1.example.net"]) self.assertEqual(self._Test("web1.example.com"), ["web1.example.com"]) for pattern in ["*.*.*.*", "???", "*.site"]: self.assertEqual(self._Test(pattern), ["sub.site.example.com"]) self.assertEqual(self._Test("node1"), [ "node1.example.com", "node1.example.net", ]) self.assertEqual(self._Test("node?*.example.*"), [ "node1.example.com", "node2-0.example.com", "node2-1.example.com", "node1.example.net", ]) self.assertEqual(self._Test("*-?"), [ "node2-0.example.com", "node2-1.example.com", ]) self.assertEqual(self._Test("node2-?.example.com"), [ "node2-0.example.com", "node2-1.example.com", ]) class TestFormatUnit(unittest.TestCase): """Test case for the FormatUnit function""" def testMiB(self): self.assertEqual(utils.FormatUnit(1, "h"), "1M") self.assertEqual(utils.FormatUnit(100, "h"), "100M") self.assertEqual(utils.FormatUnit(1023, "h"), "1023M") self.assertEqual(utils.FormatUnit(1, "m"), "1") self.assertEqual(utils.FormatUnit(100, "m"), "100") self.assertEqual(utils.FormatUnit(1023, "m"), "1023") self.assertEqual(utils.FormatUnit(1024, "m"), "1024") self.assertEqual(utils.FormatUnit(1536, "m"), "1536") self.assertEqual(utils.FormatUnit(17133, "m"), "17133") self.assertEqual(utils.FormatUnit(1024 * 1024 - 1, "m"), "1048575") def testGiB(self): self.assertEqual(utils.FormatUnit(1024, "h"), "1.0G") self.assertEqual(utils.FormatUnit(1536, "h"), "1.5G") self.assertEqual(utils.FormatUnit(17133, "h"), "16.7G") self.assertEqual(utils.FormatUnit(1024 * 1024 - 1, "h"), "1024.0G") self.assertEqual(utils.FormatUnit(1024, "g"), "1.0") self.assertEqual(utils.FormatUnit(1536, "g"), "1.5") self.assertEqual(utils.FormatUnit(17133, "g"), "16.7") self.assertEqual(utils.FormatUnit(1024 * 1024 - 1, "g"), "1024.0") self.assertEqual(utils.FormatUnit(1024 * 1024, "g"), "1024.0") self.assertEqual(utils.FormatUnit(5120 * 1024, "g"), "5120.0") self.assertEqual(utils.FormatUnit(29829 * 1024, "g"), "29829.0") def testTiB(self): self.assertEqual(utils.FormatUnit(1024 * 1024, "h"), "1.0T") self.assertEqual(utils.FormatUnit(5120 * 1024, "h"), "5.0T") self.assertEqual(utils.FormatUnit(29829 * 1024, "h"), "29.1T") self.assertEqual(utils.FormatUnit(1024 * 1024, "t"), "1.0") self.assertEqual(utils.FormatUnit(5120 * 1024, "t"), "5.0") self.assertEqual(utils.FormatUnit(29829 * 1024, "t"), "29.1") def testErrors(self): self.assertRaises(errors.ProgrammerError, utils.FormatUnit, 1, "a") class TestParseUnit(unittest.TestCase): """Test case for the ParseUnit function""" SCALES = (("", 1), ("M", 1), ("G", 1024), ("T", 1024 * 1024), ("MB", 1), ("GB", 1024), ("TB", 1024 * 1024), ("MiB", 1), ("GiB", 1024), ("TiB", 1024 * 1024)) def testRounding(self): self.assertEqual(utils.ParseUnit("0"), 0) self.assertEqual(utils.ParseUnit("1"), 4) self.assertEqual(utils.ParseUnit("2"), 4) self.assertEqual(utils.ParseUnit("3"), 4) self.assertEqual(utils.ParseUnit("124"), 124) self.assertEqual(utils.ParseUnit("125"), 128) self.assertEqual(utils.ParseUnit("126"), 128) self.assertEqual(utils.ParseUnit("127"), 128) self.assertEqual(utils.ParseUnit("128"), 128) self.assertEqual(utils.ParseUnit("129"), 132) self.assertEqual(utils.ParseUnit("130"), 132) def testFloating(self): self.assertEqual(utils.ParseUnit("0"), 0) self.assertEqual(utils.ParseUnit("0.5"), 4) self.assertEqual(utils.ParseUnit("1.75"), 4) self.assertEqual(utils.ParseUnit("1.99"), 4) self.assertEqual(utils.ParseUnit("2.00"), 4) self.assertEqual(utils.ParseUnit("2.01"), 4) self.assertEqual(utils.ParseUnit("3.99"), 4) self.assertEqual(utils.ParseUnit("4.00"), 4) self.assertEqual(utils.ParseUnit("4.01"), 8) self.assertEqual(utils.ParseUnit("1.5G"), 1536) self.assertEqual(utils.ParseUnit("1.8G"), 1844) self.assertEqual(utils.ParseUnit("8.28T"), 8682212) def testSuffixes(self): for sep in ("", " ", " ", "\t", "\t "): for suffix, scale in self.SCALES: for func in (lambda x: x, str.lower, str.upper): self.assertEqual(utils.ParseUnit("1024" + sep + func(suffix)), 1024 * scale) def testInvalidInput(self): for sep in ("-", "_", ",", "a"): for suffix, _ in self.SCALES: self.assertRaises(errors.UnitParseError, utils.ParseUnit, "1" + sep + suffix) for suffix, _ in self.SCALES: self.assertRaises(errors.UnitParseError, utils.ParseUnit, "1,3" + suffix) class TestShellQuoting(unittest.TestCase): """Test case for shell quoting functions""" def testShellQuote(self): self.assertEqual(utils.ShellQuote("abc"), "abc") self.assertEqual(utils.ShellQuote('ab"c'), "'ab\"c'") self.assertEqual(utils.ShellQuote("a'bc"), "'a'\\''bc'") self.assertEqual(utils.ShellQuote("a b c"), "'a b c'") self.assertEqual(utils.ShellQuote("a b\\ c"), "'a b\\ c'") def testShellQuoteArgs(self): self.assertEqual(utils.ShellQuoteArgs(["a", "b", "c"]), "a b c") self.assertEqual(utils.ShellQuoteArgs(['a', 'b"', 'c']), "a 'b\"' c") self.assertEqual(utils.ShellQuoteArgs(['a', 'b\'', 'c']), "a 'b'\\\''' c") class TestShellWriter(unittest.TestCase): def test(self): buf = StringIO() sw = utils.ShellWriter(buf) sw.Write("#!/bin/bash") sw.Write("if true; then") sw.IncIndent() try: sw.Write("echo true") sw.Write("for i in 1 2 3") sw.Write("do") sw.IncIndent() try: self.assertEqual(sw._indent, 2) sw.Write("date") finally: sw.DecIndent() sw.Write("done") finally: sw.DecIndent() sw.Write("echo %s", utils.ShellQuote("Hello World")) sw.Write("exit 0") self.assertEqual(sw._indent, 0) output = buf.getvalue() self.assert_(output.endswith("\n")) lines = output.splitlines() self.assertEqual(len(lines), 9) self.assertEqual(lines[0], "#!/bin/bash") self.assert_(re.match(r"^\s+date$", lines[5])) self.assertEqual(lines[7], "echo 'Hello World'") def testEmpty(self): buf = StringIO() sw = utils.ShellWriter(buf) sw = None self.assertEqual(buf.getvalue(), "") def testEmptyNoIndent(self): buf = StringIO() sw = utils.ShellWriter(buf, indent=False) sw = None self.assertEqual(buf.getvalue(), "") @classmethod def _AddLevel(cls, sw, level): if level == 6: return sw.IncIndent() try: # Add empty line, it should not be indented sw.Write("") sw.Write(str(level)) cls._AddLevel(sw, level + 1) finally: sw.DecIndent() def testEmptyLines(self): buf = StringIO() sw = utils.ShellWriter(buf) self._AddLevel(sw, 1) self.assertEqual(buf.getvalue(), "".join("\n%s%s\n" % (i * " ", i) for i in range(1, 6))) def testEmptyLinesNoIndent(self): buf = StringIO() sw = utils.ShellWriter(buf, indent=False) self._AddLevel(sw, 1) self.assertEqual(buf.getvalue(), "".join("\n%s\n" % i for i in range(1, 6))) class TestNormalizeAndValidateMac(unittest.TestCase): def testInvalid(self): for i in ["xxx", "00:11:22:33:44:55:66", "zz:zz:zz:zz:zz:zz"]: self.assertRaises(errors.OpPrereqError, utils.NormalizeAndValidateMac, i) def testNormalization(self): for mac in ["aa:bb:cc:dd:ee:ff", "00:AA:11:bB:22:cc"]: self.assertEqual(utils.NormalizeAndValidateMac(mac), mac.lower()) class TestNormalizeAndValidateThreeOctetMacPrefix(unittest.TestCase): def testInvalid(self): for i in ["xxx", "00:11:22:33:44:55:66", "zz:zz:zz:zz:zz:zz", "aa:bb:cc:dd:ee:ff", "00:AA:11:bB:22:cc", "00:11:"]: self.assertRaises(errors.OpPrereqError, utils.NormalizeAndValidateThreeOctetMacPrefix, i) def testNormalization(self): for mac in ["aa:bb:cc", "00:AA:11"]: self.assertEqual(utils.NormalizeAndValidateThreeOctetMacPrefix(mac), mac.lower()) class TestSafeEncode(unittest.TestCase): """Test case for SafeEncode""" def testAscii(self): for txt in [string.digits, string.letters, string.punctuation]: self.failUnlessEqual(txt, utils.SafeEncode(txt)) def testDoubleEncode(self): for i in range(255): txt = utils.SafeEncode(chr(i)) self.failUnlessEqual(txt, utils.SafeEncode(txt)) def testUnicode(self): # 1024 is high enough to catch non-direct ASCII mappings for i in range(1024): txt = utils.SafeEncode(unichr(i)) self.failUnlessEqual(txt, utils.SafeEncode(txt)) class TestUnescapeAndSplit(unittest.TestCase): """Testing case for UnescapeAndSplit""" def setUp(self): # testing more that one separator for regexp safety self._seps = [",", "+", ".", ":"] def testSimple(self): a = ["a", "b", "c", "d"] for sep in self._seps: self.failUnlessEqual(utils.UnescapeAndSplit(sep.join(a), sep=sep), a) def testEscape(self): for sep in self._seps: a = ["a", "b\\" + sep + "c", "d"] b = ["a", "b" + sep + "c", "d"] self.failUnlessEqual(utils.UnescapeAndSplit(sep.join(a), sep=sep), b) def testDoubleEscape(self): for sep in self._seps: a = ["a", "b\\\\", "c", "d"] b = ["a", "b\\", "c", "d"] self.failUnlessEqual(utils.UnescapeAndSplit(sep.join(a), sep=sep), b) def testThreeEscape(self): for sep in self._seps: a = ["a", "b\\\\\\" + sep + "c", "d"] b = ["a", "b\\" + sep + "c", "d"] self.failUnlessEqual(utils.UnescapeAndSplit(sep.join(a), sep=sep), b) def testEscapeAtEnd(self): for sep in self._seps: self.assertEqual(utils.UnescapeAndSplit("\\", sep=sep), ["\\"]) a = ["a", "b\\", "c"] b = ["a", "b" + sep + "c\\"] self.assertEqual(utils.UnescapeAndSplit("%s\\" % sep.join(a), sep=sep), b) a = ["\\" + sep, "\\" + sep, "c", "d\\.moo"] b = [sep, sep, "c", "d.moo\\"] self.assertEqual(utils.UnescapeAndSplit("%s\\" % sep.join(a), sep=sep), b) def testMultipleEscapes(self): for sep in self._seps: a = ["a", "b\\" + sep + "c", "d\\" + sep + "e\\" + sep + "f", "g"] b = ["a", "b" + sep + "c", "d" + sep + "e" + sep + "f", "g"] self.failUnlessEqual(utils.UnescapeAndSplit(sep.join(a), sep=sep), b) class TestCommaJoin(unittest.TestCase): def test(self): self.assertEqual(utils.CommaJoin([]), "") self.assertEqual(utils.CommaJoin([1, 2, 3]), "1, 2, 3") self.assertEqual(utils.CommaJoin(["Hello"]), "Hello") self.assertEqual(utils.CommaJoin(["Hello", "World"]), "Hello, World") self.assertEqual(utils.CommaJoin(["Hello", "World", 99]), "Hello, World, 99") class TestFormatTime(unittest.TestCase): """Testing case for FormatTime""" @staticmethod def _TestInProcess(tz, timestamp, usecs, expected): os.environ["TZ"] = tz time.tzset() return utils.FormatTime(timestamp, usecs=usecs) == expected def _Test(self, *args): # Need to use separate process as we want to change TZ self.assert_(utils.RunInSeparateProcess(self._TestInProcess, *args)) def test(self): self._Test("UTC", 0, None, "1970-01-01 00:00:00") self._Test("America/Sao_Paulo", 1292606926, None, "2010-12-17 15:28:46") self._Test("Europe/London", 1292606926, None, "2010-12-17 17:28:46") self._Test("Europe/Zurich", 1292606926, None, "2010-12-17 18:28:46") self._Test("Europe/Zurich", 1332944288, 8787, "2012-03-28 16:18:08.008787") self._Test("Australia/Sydney", 1292606926, None, "2010-12-18 04:28:46") self._Test("Australia/Sydney", 1292606926, None, "2010-12-18 04:28:46") self._Test("Australia/Sydney", 1292606926, 999999, "2010-12-18 04:28:46.999999") def testNone(self): self.failUnlessEqual(utils.FormatTime(None), "N/A") def testInvalid(self): self.failUnlessEqual(utils.FormatTime(()), "N/A") def testNow(self): # tests that we accept time.time input utils.FormatTime(time.time()) # tests that we accept int input utils.FormatTime(int(time.time())) class TestFormatSeconds(unittest.TestCase): def test(self): self.assertEqual(utils.FormatSeconds(1), "1s") self.assertEqual(utils.FormatSeconds(3600), "1h 0m 0s") self.assertEqual(utils.FormatSeconds(3599), "59m 59s") self.assertEqual(utils.FormatSeconds(7200), "2h 0m 0s") self.assertEqual(utils.FormatSeconds(7201), "2h 0m 1s") self.assertEqual(utils.FormatSeconds(7281), "2h 1m 21s") self.assertEqual(utils.FormatSeconds(29119), "8h 5m 19s") self.assertEqual(utils.FormatSeconds(19431228), "224d 21h 33m 48s") self.assertEqual(utils.FormatSeconds(-1), "-1s") self.assertEqual(utils.FormatSeconds(-282), "-282s") self.assertEqual(utils.FormatSeconds(-29119), "-29119s") def testFloat(self): self.assertEqual(utils.FormatSeconds(1.3), "1s") self.assertEqual(utils.FormatSeconds(1.9), "2s") self.assertEqual(utils.FormatSeconds(3912.12311), "1h 5m 12s") self.assertEqual(utils.FormatSeconds(3912.8), "1h 5m 13s") class TestLineSplitter(unittest.TestCase): def test(self): lines = [] ls = utils.LineSplitter(lines.append) ls.write("Hello World\n") self.assertEqual(lines, []) ls.write("Foo\n Bar\r\n ") ls.write("Baz") ls.write("Moo") self.assertEqual(lines, []) ls.flush() self.assertEqual(lines, ["Hello World", "Foo", " Bar"]) ls.close() self.assertEqual(lines, ["Hello World", "Foo", " Bar", " BazMoo"]) def _testExtra(self, line, all_lines, p1, p2): self.assertEqual(p1, 999) self.assertEqual(p2, "extra") all_lines.append(line) def testExtraArgsNoFlush(self): lines = [] ls = utils.LineSplitter(self._testExtra, lines, 999, "extra") ls.write("\n\nHello World\n") ls.write("Foo\n Bar\r\n ") ls.write("") ls.write("Baz") ls.write("Moo\n\nx\n") self.assertEqual(lines, []) ls.close() self.assertEqual(lines, ["", "", "Hello World", "Foo", " Bar", " BazMoo", "", "x"]) class TestIsValidShellParam(unittest.TestCase): def test(self): for val, result in [ ("abc", True), ("ab;cd", False), ]: self.assertEqual(utils.IsValidShellParam(val), result) class TestBuildShellCmd(unittest.TestCase): def test(self): self.assertRaises(errors.ProgrammerError, utils.BuildShellCmd, "ls %s", "ab;cd") self.assertEqual(utils.BuildShellCmd("ls %s", "ab"), "ls ab") class TestOrdinal(unittest.TestCase): def test(self): checks = { 0: "0th", 1: "1st", 2: "2nd", 3: "3rd", 4: "4th", 5: "5th", 6: "6th", 7: "7th", 8: "8th", 9: "9th", 10: "10th", 11: "11th", 12: "12th", 13: "13th", 14: "14th", 15: "15th", 16: "16th", 17: "17th", 18: "18th", 19: "19th", 20: "20th", 21: "21st", 25: "25th", 30: "30th", 32: "32nd", 40: "40th", 50: "50th", 55: "55th", 60: "60th", 62: "62nd", 70: "70th", 80: "80th", 83: "83rd", 90: "90th", 91: "91st", 582: "582nd", 999: "999th", } for value, ordinal in checks.items(): self.assertEqual(utils.FormatOrdinal(value), ordinal) class TestTruncate(unittest.TestCase): def _Test(self, text, length): result = utils.Truncate(text, length) self.assertTrue(len(result) <= length) return result def test(self): self.assertEqual(self._Test("", 80), "") self.assertEqual(self._Test("abc", 4), "abc") self.assertEqual(self._Test("Hello World", 80), "Hello World") self.assertEqual(self._Test("Hello World", 4), "H...") self.assertEqual(self._Test("Hello World", 5), "He...") for i in [4, 10, 100]: data = i * "FooBarBaz" self.assertEqual(self._Test(data, len(data)), data) for (length, exp) in [(8, u"T\u00e4st\u2026xyz"), (7, u"T\u00e4st...")]: self.assertEqual(self._Test(u"T\u00e4st\u2026xyz", length), exp) self.assertEqual(self._Test(range(100), 20), "[0, 1, 2, 3, 4, 5...") def testError(self): for i in range(4): self.assertRaises(AssertionError, utils.Truncate, "", i) class TestFilterEmptyLinesAndComments(unittest.TestCase): def testEmpty(self): self.assertEqual(utils.FilterEmptyLinesAndComments(""), []) self.assertEqual(utils.FilterEmptyLinesAndComments("\n"), []) self.assertEqual(utils.FilterEmptyLinesAndComments("\n" * 100), []) self.assertEqual(utils.FilterEmptyLinesAndComments("\n \n\t \n"), []) def test(self): text = """ This is # with comments a test # in # saying ...#... # multiple places Hello World! """ self.assertEqual(utils.FilterEmptyLinesAndComments(text), [ "This", "is", "a", "test", "saying", "...#...", "Hello World!", ]) class TestFormatKeyValue(unittest.TestCase): def test(self): self.assertEqual(utils.FormatKeyValue({}), []) self.assertEqual(utils.FormatKeyValue({1: 2}), ["1=2"]) self.assertEqual(utils.FormatKeyValue({ "zzz": "0", "aaa": "1", }), ["aaa=1", "zzz=0"]) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.bootstrap_unittest.py0000744000000000000000000001144012271422343022176 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.bootstrap""" import shutil import tempfile import unittest from ganeti import bootstrap from ganeti import constants from ganeti import errors from ganeti import pathutils import testutils import mock class TestPrepareFileStorage(unittest.TestCase): def setUp(self): unittest.TestCase.setUp(self) self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def enableFileStorage(self, enable): self.enabled_disk_templates = [] if enable: self.enabled_disk_templates.append(constants.DT_FILE) else: # anything != DT_FILE would do here self.enabled_disk_templates.append(constants.DT_DISKLESS) def testFallBackToDefaultPathAcceptedFileStorageEnabled(self): expected_file_storage_dir = pathutils.DEFAULT_FILE_STORAGE_DIR acceptance_fn = mock.Mock() init_fn = mock.Mock(return_value=expected_file_storage_dir) self.enableFileStorage(True) file_storage_dir = bootstrap._PrepareFileStorage( self.enabled_disk_templates, None, acceptance_fn=acceptance_fn, init_fn=init_fn) self.assertEqual(expected_file_storage_dir, file_storage_dir) acceptance_fn.assert_called_with(expected_file_storage_dir) init_fn.assert_called_with(expected_file_storage_dir) def testPathAcceptedFileStorageEnabled(self): acceptance_fn = mock.Mock() init_fn = mock.Mock(return_value=self.tmpdir) self.enableFileStorage(True) file_storage_dir = bootstrap._PrepareFileStorage( self.enabled_disk_templates, self.tmpdir, acceptance_fn=acceptance_fn, init_fn=init_fn) self.assertEqual(self.tmpdir, file_storage_dir) acceptance_fn.assert_called_with(self.tmpdir) init_fn.assert_called_with(self.tmpdir) def testPathAcceptedFileStorageDisabled(self): acceptance_fn = mock.Mock() init_fn = mock.Mock() self.enableFileStorage(False) file_storage_dir = bootstrap._PrepareFileStorage( self.enabled_disk_templates, self.tmpdir, acceptance_fn=acceptance_fn, init_fn=init_fn) self.assertEqual(self.tmpdir, file_storage_dir) self.assertFalse(init_fn.called) self.assertFalse(acceptance_fn.called) def testPathNotAccepted(self): acceptance_fn = mock.Mock() acceptance_fn.side_effect = errors.FileStoragePathError init_fn = mock.Mock() self.enableFileStorage(True) self.assertRaises(errors.OpPrereqError, bootstrap._PrepareFileStorage, self.enabled_disk_templates, self.tmpdir, acceptance_fn=acceptance_fn, init_fn=init_fn) acceptance_fn.assert_called_with(self.tmpdir) class TestInitCheckEnabledDiskTemplates(unittest.TestCase): def testValidTemplates(self): enabled_disk_templates = list(constants.DISK_TEMPLATES) bootstrap._InitCheckEnabledDiskTemplates(enabled_disk_templates) def testInvalidTemplates(self): enabled_disk_templates = ["pinkbunny"] self.assertRaises(errors.OpPrereqError, bootstrap._InitCheckEnabledDiskTemplates, enabled_disk_templates) def testEmptyTemplates(self): enabled_disk_templates = [] self.assertRaises(errors.OpPrereqError, bootstrap._InitCheckEnabledDiskTemplates, enabled_disk_templates) class TestRestrictIpolicyToEnabledDiskTemplates(unittest.TestCase): def testNoRestriction(self): allowed_disk_templates = list(constants.DISK_TEMPLATES) ipolicy = {constants.IPOLICY_DTS: allowed_disk_templates} enabled_disk_templates = list(constants.DISK_TEMPLATES) bootstrap._RestrictIpolicyToEnabledDiskTemplates( ipolicy, enabled_disk_templates) self.assertEqual(ipolicy[constants.IPOLICY_DTS], allowed_disk_templates) def testRestriction(self): allowed_disk_templates = [constants.DT_DRBD8, constants.DT_PLAIN] ipolicy = {constants.IPOLICY_DTS: allowed_disk_templates} enabled_disk_templates = [constants.DT_PLAIN, constants.DT_FILE] bootstrap._RestrictIpolicyToEnabledDiskTemplates( ipolicy, enabled_disk_templates) self.assertEqual(ipolicy[constants.IPOLICY_DTS], [constants.DT_PLAIN]) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.utils.io_unittest-runasroot.py0000644000000000000000000001114312244641676023774 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.utils.io (tests that require root access)""" import os import tempfile import shutil import errno import grp import pwd import stat from ganeti import constants from ganeti import utils from ganeti import compat from ganeti import errors import testutils class TestWriteFile(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) self.tmpdir = None self.tfile = tempfile.NamedTemporaryFile() self.did_pre = False self.did_post = False self.did_write = False def tearDown(self): testutils.GanetiTestCase.tearDown(self) if self.tmpdir: shutil.rmtree(self.tmpdir) def testFileUid(self): self.tmpdir = tempfile.mkdtemp() target = utils.PathJoin(self.tmpdir, "target") tuid = os.geteuid() + 1 utils.WriteFile(target, data="data", uid=tuid + 1) self.assertFileUid(target, tuid + 1) utils.WriteFile(target, data="data", uid=tuid) self.assertFileUid(target, tuid) utils.WriteFile(target, data="data", uid=tuid + 1, keep_perms=utils.KP_IF_EXISTS) self.assertFileUid(target, tuid) utils.WriteFile(target, data="data", keep_perms=utils.KP_ALWAYS) self.assertFileUid(target, tuid) def testNewFileUid(self): self.tmpdir = tempfile.mkdtemp() target = utils.PathJoin(self.tmpdir, "target") tuid = os.geteuid() + 1 utils.WriteFile(target, data="data", uid=tuid, keep_perms=utils.KP_IF_EXISTS) self.assertFileUid(target, tuid) def testFileGid(self): self.tmpdir = tempfile.mkdtemp() target = utils.PathJoin(self.tmpdir, "target") tgid = os.getegid() + 1 utils.WriteFile(target, data="data", gid=tgid + 1) self.assertFileGid(target, tgid + 1) utils.WriteFile(target, data="data", gid=tgid) self.assertFileGid(target, tgid) utils.WriteFile(target, data="data", gid=tgid + 1, keep_perms=utils.KP_IF_EXISTS) self.assertFileGid(target, tgid) utils.WriteFile(target, data="data", keep_perms=utils.KP_ALWAYS) self.assertFileGid(target, tgid) def testNewFileGid(self): self.tmpdir = tempfile.mkdtemp() target = utils.PathJoin(self.tmpdir, "target") tgid = os.getegid() + 1 utils.WriteFile(target, data="data", gid=tgid, keep_perms=utils.KP_IF_EXISTS) self.assertFileGid(target, tgid) class TestCanRead(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) self.tmpdir = tempfile.mkdtemp() self.confdUid = pwd.getpwnam(constants.CONFD_USER).pw_uid self.masterdUid = pwd.getpwnam(constants.MASTERD_USER).pw_uid self.masterdGid = grp.getgrnam(constants.MASTERD_GROUP).gr_gid def tearDown(self): testutils.GanetiTestCase.tearDown(self) if self.tmpdir: shutil.rmtree(self.tmpdir) def testUserCanRead(self): target = utils.PathJoin(self.tmpdir, "target1") f=open(target, "w") f.close() utils.EnforcePermission(target, 0400, uid=self.confdUid, gid=self.masterdGid) self.assertTrue(utils.CanRead(constants.CONFD_USER, target)) if constants.CONFD_USER != constants.MASTERD_USER: self.assertFalse(utils.CanRead(constants.MASTERD_USER, target)) def testGroupCanRead(self): target = utils.PathJoin(self.tmpdir, "target2") f=open(target, "w") f.close() utils.EnforcePermission(target, 0040, uid=self.confdUid, gid=self.masterdGid) self.assertFalse(utils.CanRead(constants.CONFD_USER, target)) if constants.CONFD_USER != constants.MASTERD_USER: self.assertTrue(utils.CanRead(constants.MASTERD_USER, target)) utils.EnforcePermission(target, 0040, uid=self.masterdUid+1, gid=self.masterdGid) self.assertTrue(utils.CanRead(constants.MASTERD_USER, target)) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.client.gnt_cluster_unittest.py0000744000000000000000000002247212271422343023776 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.client.gnt_cluster""" import unittest import optparse from ganeti.client import gnt_cluster from ganeti import utils from ganeti import compat from ganeti import constants import testutils class TestEpoUtilities(unittest.TestCase): def setUp(self): self.nodes2ip = dict(("node%s" % i, "192.0.2.%s" % i) for i in range(1, 10)) self.nodes = set(self.nodes2ip.keys()) self.ips2node = dict((v, k) for (k, v) in self.nodes2ip.items()) def _FakeAction(*args): return True def _FakePing(ip, port, live_port_needed=False): self.assert_(live_port_needed) self.assertEqual(port, 0) return True def _FakeSleep(secs): self.assert_(secs >= 0 and secs <= 5) return def _NoopFeedback(self, text): return def testPingFnRemoveHostsUp(self): seen = set() def _FakeSeenPing(ip, *args, **kwargs): node = self.ips2node[ip] self.assertFalse(node in seen) seen.add(node) return True helper = gnt_cluster._RunWhenNodesReachableHelper(self.nodes, self._FakeAction, self.nodes2ip, 0, self._NoopFeedback, _ping_fn=_FakeSeenPing, _sleep_fn=self._FakeSleep) nodes_len = len(self.nodes) for (num, _) in enumerate(self.nodes): helper.Wait(5) if num < nodes_len - 1: self.assertRaises(utils.RetryAgain, helper) else: helper() self.assertEqual(seen, self.nodes) self.assertFalse(helper.down) self.assertEqual(helper.up, self.nodes) def testActionReturnFalseSetsHelperFalse(self): called = False def _FalseAction(*args): return called helper = gnt_cluster._RunWhenNodesReachableHelper(self.nodes, _FalseAction, self.nodes2ip, 0, self._NoopFeedback, _ping_fn=self._FakePing, _sleep_fn=self._FakeSleep) for _ in self.nodes: try: helper() except utils.RetryAgain: called = True self.assertFalse(helper.success) def testMaybeInstanceStartup(self): instances_arg = [] def _FakeInstanceStart(opts, instances, start): instances_arg.append(set(instances)) return None inst_map = { "inst1": set(["node1", "node2"]), "inst2": set(["node1", "node3"]), "inst3": set(["node2", "node1"]), "inst4": set(["node2", "node1", "node3"]), "inst5": set(["node4"]), } fn = _FakeInstanceStart self.assert_(gnt_cluster._MaybeInstanceStartup(None, inst_map, set(), _instance_start_fn=fn)) self.assertFalse(instances_arg) result = gnt_cluster._MaybeInstanceStartup(None, inst_map, set(["node1"]), _instance_start_fn=fn) self.assert_(result) self.assertFalse(instances_arg) result = gnt_cluster._MaybeInstanceStartup(None, inst_map, set(["node1", "node3"]), _instance_start_fn=fn) self.assert_(result is None) self.assertEqual(instances_arg.pop(0), set(["inst2"])) self.assertFalse("inst2" in inst_map) result = gnt_cluster._MaybeInstanceStartup(None, inst_map, set(["node1", "node3"]), _instance_start_fn=fn) self.assert_(result) self.assertFalse(instances_arg) result = gnt_cluster._MaybeInstanceStartup(None, inst_map, set(["node1", "node3", "node2"]), _instance_start_fn=fn) self.assertEqual(instances_arg.pop(0), set(["inst1", "inst3", "inst4"])) self.assert_(result is None) result = gnt_cluster._MaybeInstanceStartup(None, inst_map, set(["node1", "node3", "node2", "node4"]), _instance_start_fn=fn) self.assert_(result is None) self.assertEqual(instances_arg.pop(0), set(["inst5"])) self.assertFalse(inst_map) class _ClientForEpo: def __init__(self, groups, nodes): self._groups = groups self._nodes = nodes def QueryGroups(self, names, fields, use_locking): assert not use_locking assert fields == ["node_list"] return self._groups def QueryNodes(self, names, fields, use_locking): assert not use_locking assert fields == ["name", "master", "pinst_list", "sinst_list", "powered", "offline"] return self._nodes class TestEpo(unittest.TestCase): _ON_EXITCODE = 253 _OFF_EXITCODE = 254 def _ConfirmForce(self, *args): self.fail("Shouldn't need confirmation") def _Confirm(self, exp_names, result, names, ltype, text): self.assertEqual(names, exp_names) self.assertFalse(result is NotImplemented) return result def _Off(self, exp_node_list, opts, node_list, inst_map): self.assertEqual(node_list, exp_node_list) self.assertFalse(inst_map) return self._OFF_EXITCODE def _Test(self, *args, **kwargs): defaults = dict(cl=NotImplemented, _on_fn=NotImplemented, _off_fn=NotImplemented, _stdout_fn=lambda *args: None, _stderr_fn=lambda *args: None) defaults.update(kwargs) return gnt_cluster.Epo(*args, **defaults) def testShowAllWithGroups(self): opts = optparse.Values(dict(groups=True, show_all=True)) result = self._Test(opts, NotImplemented) self.assertEqual(result, constants.EXIT_FAILURE) def testShowAllWithArgs(self): opts = optparse.Values(dict(groups=False, show_all=True)) result = self._Test(opts, ["a", "b", "c"]) self.assertEqual(result, constants.EXIT_FAILURE) def testNoArgumentsNoParameters(self): for (force, confirm_result) in [(True, NotImplemented), (False, False), (False, True)]: opts = optparse.Values(dict(groups=False, show_all=False, force=force, on=False)) client = _ClientForEpo(NotImplemented, [ ("node1.example.com", False, [], [], True, False), ]) if force: confirm_fn = self._ConfirmForce else: confirm_fn = compat.partial(self._Confirm, ["node1.example.com"], confirm_result) off_fn = compat.partial(self._Off, ["node1.example.com"]) result = self._Test(opts, [], cl=client, _off_fn=off_fn, _confirm_fn=confirm_fn) if force or confirm_result: self.assertEqual(result, self._OFF_EXITCODE) else: self.assertEqual(result, constants.EXIT_FAILURE) def testPowerOn(self): for master in [False, True]: opts = optparse.Values(dict(groups=False, show_all=True, force=True, on=True)) client = _ClientForEpo(NotImplemented, [ ("node1.example.com", False, [], [], True, False), ("node2.example.com", False, [], [], False, False), ("node3.example.com", False, [], [], True, True), ("node4.example.com", False, [], [], None, True), ("node5.example.com", master, [], [], False, False), ]) def _On(_, all_nodes, node_list, inst_map): self.assertEqual(all_nodes, ["node%s.example.com" % i for i in range(1, 6)]) if master: self.assertEqual(node_list, ["node2.example.com"]) else: self.assertEqual(node_list, ["node2.example.com", "node5.example.com"]) self.assertFalse(inst_map) return self._ON_EXITCODE result = self._Test(opts, [], cl=client, _on_fn=_On, _confirm_fn=self._ConfirmForce) self.assertEqual(result, self._ON_EXITCODE) def testMasterWithoutShowAll(self): opts = optparse.Values(dict(groups=False, show_all=False, force=True, on=False)) client = _ClientForEpo(NotImplemented, [ ("node1.example.com", True, [], [], True, False), ]) result = self._Test(opts, [], cl=client, _confirm_fn=self._ConfirmForce) self.assertEqual(result, constants.EXIT_FAILURE) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.cmdlib.instance_storage_unittest.py0000744000000000000000000000725112271422343024747 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the cmdlib module 'instance_storage'""" import unittest from ganeti import constants from ganeti.cmdlib import instance_storage from ganeti import errors import testutils import mock class TestCheckNodesFreeDiskOnVG(unittest.TestCase): def setUp(self): self.node_uuid = "12345" self.node_uuids = [self.node_uuid] self.node_info = mock.Mock() self.es = True self.ndparams = {constants.ND_EXCLUSIVE_STORAGE: self.es} mock_rpc = mock.Mock() mock_rpc.call_node_info = mock.Mock() mock_cfg = mock.Mock() mock_cfg.GetNodeInfo = mock.Mock(return_value=self.node_info) mock_cfg.GetNdParams = mock.Mock(return_value=self.ndparams) self.hvname = "myhv" self.hvparams = mock.Mock() self.clusterinfo = mock.Mock() self.clusterinfo.hvparams = {self.hvname: self.hvparams} mock_cfg.GetHypervisorType = mock.Mock(return_value=self.hvname) mock_cfg.GetClusterInfo = mock.Mock(return_value=self.clusterinfo) self.lu = mock.Mock() self.lu.rpc = mock_rpc self.lu.cfg = mock_cfg self.vg = "myvg" self.node_name = "mynode" self.space_info = [{"type": constants.ST_LVM_VG, "name": self.vg, "storage_free": 125, "storage_size": 666}] def testPerformNodeInfoCall(self): expected_hv_arg = [(self.hvname, self.hvparams)] expected_storage_arg = {self.node_uuid: [(constants.ST_LVM_VG, self.vg, [self.es])]} instance_storage._PerformNodeInfoCall(self.lu, self.node_uuids, self.vg) self.lu.rpc.call_node_info.assert_called_with( self.node_uuids, expected_storage_arg, expected_hv_arg) def testCheckVgCapacityForNode(self): requested = 123 node_info = (None, self.space_info, None) instance_storage._CheckVgCapacityForNode(self.node_name, node_info, self.vg, requested) def testCheckVgCapacityForNodeNotEnough(self): requested = 250 node_info = (None, self.space_info, None) self.assertRaises( errors.OpPrereqError, instance_storage._CheckVgCapacityForNode, self.node_name, node_info, self.vg, requested) def testCheckVgCapacityForNodeNoStorageData(self): node_info = (None, [], None) self.assertRaises( errors.OpPrereqError, instance_storage._CheckVgCapacityForNode, self.node_name, node_info, self.vg, NotImplemented) def testCheckVgCapacityForNodeBogusSize(self): broken_space_info = [{"type": constants.ST_LVM_VG, "name": self.vg, "storage_free": "greenbunny", "storage_size": "redbunny"}] node_info = (None, broken_space_info, None) self.assertRaises( errors.OpPrereqError, instance_storage._CheckVgCapacityForNode, self.node_name, node_info, self.vg, NotImplemented) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/bash_completion.bash0000744000000000000000000001235412244641676020430 0ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. set -e -u -o pipefail # Disable any locales export LC_ALL=C readonly bash_completion=${TOP_BUILDDIR:-.}/doc/examples/bash_completion-debug readonly default_wordbreaks=$' \t\n"'\''@><=;|&(:' err() { echo "$@" echo 'Aborting' exit 1 } contains() { local -r needle="$1"; shift for value; do if [[ "$value" = "$needle" ]]; then return 0 fi done return 1 } # Prepares a subshell for testing bash completion functions setup() { local -r unused=UNUSED set +e +u -o pipefail . $bash_completion COMP_KEY=$unused COMP_LINE=$unused COMP_POINT=$unused COMP_TYPE=$unused COMP_WORDBREAKS="$default_wordbreaks" GANETI_COMPL_LOG= unset COMP_CWORD unset COMP_WORDS } # Check if default wordbreaks are still valid (this detects cases where Bash # were to change its built-in default value) # TODO: May need an update for older Bash versions; some didn't include the # colon character (':') in COMP_WORDBREAKS ( bashdef=$(env - bash --noprofile --norc -c 'echo -n "$COMP_WORDBREAKS"') case "$bashdef" in $default_wordbreaks) ;; *) err 'Bash uses unknown value for COMP_WORDBREAKS' ;; esac ) # Check for --help for cmd in gnt-{instance,node,group,job}; do ( setup COMP_CWORD=2 COMP_WORDS=( $cmd list - ) _${cmd/-/_} contains --help "${COMPREPLY[@]}" || \ err "'$cmd list' did not list --help as an option" ) done # Completing a yes/no option ( setup COMP_CWORD=3 COMP_WORDS=( gnt-node modify --drained ) _gnt_node if [[ "${COMPREPLY[*]}" != 'no yes' ]]; then err "Completing '${COMP_WORDS[@]}' did not give correct result" fi ) # Completing a multiple-choice option ( setup COMP_CWORD=2 COMP_WORDS=( gnt-debug allocator --disk-template=sh foo ) _gnt_debug if [[ "${COMPREPLY[*]}" != sharedfile ]]; then err "Completing '${COMP_WORDS[*]}' did not give correct result" fi ) # Completing a node name ( setup # Override built-in function _ganeti_nodes() { echo aanode1 bbnode2 aanode3 } COMP_CWORD=4 COMP_WORDS=( gnt-node modify --drained yes aa ) _gnt_node if [[ "${COMPREPLY[*]}" != 'aanode1 aanode3' ]]; then err 'Completing node names failed' fi ) # Completing an option when it's not at the end ( setup # Override built-in function _ganeti_instances() { echo inst{1..5} } # Completing word in the middle COMP_CWORD=2 COMP_WORDS=( gnt-instance list --o inst3 inst inst5 ) _gnt_node contains --output "${COMPREPLY[@]}" || err 'Did not complete parameter' ) # Completing an instance name ( setup # Override built-in function _ganeti_instances() { echo inst{1..5} } # Completing word in the middle COMP_CWORD=5 COMP_WORDS=( gnt-instance list -o foobar inst1 inst inst5 ) _gnt_instance if [[ "${COMPREPLY[*]}" != "$(echo inst{1..5})" ]]; then err "Completing '${COMP_WORDS[*]}' did not give correct result" fi ) # Need to test node expansion with different wordbreak settings [[ "$default_wordbreaks" == *:* ]] || \ err 'No colon in default wordbreak characters' for wb in "$default_wordbreaks" "${default_wordbreaks/:/}"; do ( setup # Override built-in function _ganeti_nodes() { echo node{A..C} } COMP_WORDBREAKS="$wb" # Completing nodes COMP_CWORD=3 COMP_WORDS=( gnt-instance add -n ) _gnt_instance if [[ "${COMPREPLY[*]}" != 'nodeA nodeA: nodeB nodeB: nodeC nodeC:' ]]; then err 'Got wrong node list' fi COMP_CWORD=3 COMP_WORDS=( gnt-instance add -n nodeB ) _gnt_instance if [[ "${COMPREPLY[*]}" != 'nodeB nodeB:' ]]; then err 'Got wrong node list' fi COMP_CWORD=3 COMP_WORDS=( gnt-instance add -n nodeC: ) _gnt_instance if [[ "$COMP_WORDBREAKS" == *:* ]]; then expected='nodeA nodeB' else expected='nodeC:nodeA nodeC:nodeB' fi if [[ "${COMPREPLY[*]}" != "$expected" ]]; then err 'Got wrong node list' fi ) done # Need to test different settings for the extglob shell option for opt in -u -s; do verify_extglob() { if [[ "$(shopt -p extglob)" != "shopt $opt extglob" ]]; then err 'The "extglob" shell option has an unexpected value' fi } ( shopt $opt extglob verify_extglob setup verify_extglob # Completing nodes COMP_CWORD=4 COMP_WORDS=( gnt-instance add --os-type busybox --no-n ) _gnt_instance if [[ "${COMPREPLY[*]}" != '--no-name-check --no-nics' ]]; then err "Completing '${COMP_WORDS[*]}' did not give correct result" fi verify_extglob ) done exit 0 ganeti-2.9.3/test/py/cfgupgrade_unittest.py0000744000000000000000000004222312271422343021025 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing tools/cfgupgrade""" import os import sys import unittest import shutil import tempfile import operator from ganeti import constants from ganeti import utils from ganeti import serializer from ganeti import netutils import testutils def GetMinimalConfig(): return { "version": constants.CONFIG_VERSION, "cluster": { "master_node": "node1-uuid" }, "instances": {}, "nodegroups": {}, "nodes": { "node1-uuid": { "name": "node1", "uuid": "node1-uuid" } }, } def _RunUpgrade(path, dry_run, no_verify, ignore_hostname=True, downgrade=False): cmd = [sys.executable, "%s/tools/cfgupgrade" % testutils.GetSourceDir(), "--debug", "--force", "--path=%s" % path, "--confdir=%s" % path] if ignore_hostname: cmd.append("--ignore-hostname") if dry_run: cmd.append("--dry-run") if no_verify: cmd.append("--no-verify") if downgrade: cmd.append("--downgrade") result = utils.RunCmd(cmd, cwd=os.getcwd()) if result.failed: raise Exception("cfgupgrade failed: %s, output %r" % (result.fail_reason, result.output)) class TestCfgupgrade(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() self.config_path = utils.PathJoin(self.tmpdir, "config.data") self.noded_cert_path = utils.PathJoin(self.tmpdir, "server.pem") self.rapi_cert_path = utils.PathJoin(self.tmpdir, "rapi.pem") self.rapi_users_path = utils.PathJoin(self.tmpdir, "rapi", "users") self.rapi_users_path_pre24 = utils.PathJoin(self.tmpdir, "rapi_users") self.known_hosts_path = utils.PathJoin(self.tmpdir, "known_hosts") self.confd_hmac_path = utils.PathJoin(self.tmpdir, "hmac.key") self.cds_path = utils.PathJoin(self.tmpdir, "cluster-domain-secret") self.ss_master_node_path = utils.PathJoin(self.tmpdir, "ssconf_master_node") self.file_storage_paths = utils.PathJoin(self.tmpdir, "file-storage-paths") def tearDown(self): shutil.rmtree(self.tmpdir) def _LoadConfig(self): return serializer.LoadJson(utils.ReadFile(self.config_path)) def _LoadTestDataConfig(self, filename): return serializer.LoadJson(testutils.ReadTestData(filename)) def _CreateValidConfigDir(self): utils.WriteFile(self.noded_cert_path, data="") utils.WriteFile(self.known_hosts_path, data="") utils.WriteFile(self.ss_master_node_path, data="node.has.another.name.example.net") def testNoConfigDir(self): self.assertFalse(utils.ListVisibleFiles(self.tmpdir)) self.assertRaises(Exception, _RunUpgrade, self.tmpdir, False, True) self.assertRaises(Exception, _RunUpgrade, self.tmpdir, True, True) def testWrongHostname(self): self._CreateValidConfigDir() utils.WriteFile(self.config_path, data=serializer.DumpJson(GetMinimalConfig())) hostname = netutils.GetHostname().name assert hostname != utils.ReadOneLineFile(self.ss_master_node_path) self.assertRaises(Exception, _RunUpgrade, self.tmpdir, False, True, ignore_hostname=False) def testCorrectHostname(self): self._CreateValidConfigDir() utils.WriteFile(self.config_path, data=serializer.DumpJson(GetMinimalConfig())) utils.WriteFile(self.ss_master_node_path, data="%s\n" % netutils.GetHostname().name) _RunUpgrade(self.tmpdir, False, True, ignore_hostname=False) def testInconsistentConfig(self): self._CreateValidConfigDir() # There should be no "config_version" cfg = GetMinimalConfig() cfg["version"] = 0 cfg["cluster"]["config_version"] = 0 utils.WriteFile(self.config_path, data=serializer.DumpJson(cfg)) self.assertRaises(Exception, _RunUpgrade, self.tmpdir, False, True) def testInvalidConfig(self): self._CreateValidConfigDir() # Missing version from config utils.WriteFile(self.config_path, data=serializer.DumpJson({})) self.assertRaises(Exception, _RunUpgrade, self.tmpdir, False, True) def _TestUpgradeFromFile(self, filename, dry_run): cfg = self._LoadTestDataConfig(filename) self._TestUpgradeFromData(cfg, dry_run) def _TestSimpleUpgrade(self, from_version, dry_run, file_storage_dir=None, shared_file_storage_dir=None): cfg = GetMinimalConfig() cfg["version"] = from_version cluster = cfg["cluster"] if file_storage_dir: cluster["file_storage_dir"] = file_storage_dir if shared_file_storage_dir: cluster["shared_file_storage_dir"] = shared_file_storage_dir self._TestUpgradeFromData(cfg, dry_run) def _TestUpgradeFromData(self, cfg, dry_run): assert "version" in cfg from_version = cfg["version"] self._CreateValidConfigDir() utils.WriteFile(self.config_path, data=serializer.DumpJson(cfg)) self.assertFalse(os.path.isfile(self.rapi_cert_path)) self.assertFalse(os.path.isfile(self.confd_hmac_path)) self.assertFalse(os.path.isfile(self.cds_path)) _RunUpgrade(self.tmpdir, dry_run, True) if dry_run: expversion = from_version checkfn = operator.not_ else: expversion = constants.CONFIG_VERSION checkfn = operator.truth self.assert_(checkfn(os.path.isfile(self.rapi_cert_path))) self.assert_(checkfn(os.path.isfile(self.confd_hmac_path))) self.assert_(checkfn(os.path.isfile(self.cds_path))) newcfg = self._LoadConfig() self.assertEqual(newcfg["version"], expversion) def testRapiUsers(self): self.assertFalse(os.path.exists(self.rapi_users_path)) self.assertFalse(os.path.exists(self.rapi_users_path_pre24)) self.assertFalse(os.path.exists(os.path.dirname(self.rapi_users_path))) utils.WriteFile(self.rapi_users_path_pre24, data="some user\n") self._TestSimpleUpgrade(constants.BuildVersion(2, 3, 0), False) self.assertTrue(os.path.isdir(os.path.dirname(self.rapi_users_path))) self.assert_(os.path.islink(self.rapi_users_path_pre24)) self.assert_(os.path.isfile(self.rapi_users_path)) self.assertEqual(os.readlink(self.rapi_users_path_pre24), self.rapi_users_path) for path in [self.rapi_users_path, self.rapi_users_path_pre24]: self.assertEqual(utils.ReadFile(path), "some user\n") def testRapiUsers24AndAbove(self): self.assertFalse(os.path.exists(self.rapi_users_path)) self.assertFalse(os.path.exists(self.rapi_users_path_pre24)) os.mkdir(os.path.dirname(self.rapi_users_path)) utils.WriteFile(self.rapi_users_path, data="other user\n") self._TestSimpleUpgrade(constants.BuildVersion(2, 3, 0), False) self.assert_(os.path.islink(self.rapi_users_path_pre24)) self.assert_(os.path.isfile(self.rapi_users_path)) self.assertEqual(os.readlink(self.rapi_users_path_pre24), self.rapi_users_path) for path in [self.rapi_users_path, self.rapi_users_path_pre24]: self.assertEqual(utils.ReadFile(path), "other user\n") def testRapiUsersExistingSymlink(self): self.assertFalse(os.path.exists(self.rapi_users_path)) self.assertFalse(os.path.exists(self.rapi_users_path_pre24)) os.mkdir(os.path.dirname(self.rapi_users_path)) os.symlink(self.rapi_users_path, self.rapi_users_path_pre24) utils.WriteFile(self.rapi_users_path, data="hello world\n") self._TestSimpleUpgrade(constants.BuildVersion(2, 2, 0), False) self.assert_(os.path.isfile(self.rapi_users_path) and not os.path.islink(self.rapi_users_path)) self.assert_(os.path.islink(self.rapi_users_path_pre24)) self.assertEqual(os.readlink(self.rapi_users_path_pre24), self.rapi_users_path) for path in [self.rapi_users_path, self.rapi_users_path_pre24]: self.assertEqual(utils.ReadFile(path), "hello world\n") def testRapiUsersExistingTarget(self): self.assertFalse(os.path.exists(self.rapi_users_path)) self.assertFalse(os.path.exists(self.rapi_users_path_pre24)) os.mkdir(os.path.dirname(self.rapi_users_path)) utils.WriteFile(self.rapi_users_path, data="other user\n") utils.WriteFile(self.rapi_users_path_pre24, data="hello world\n") self.assertRaises(Exception, self._TestSimpleUpgrade, constants.BuildVersion(2, 2, 0), False) for path in [self.rapi_users_path, self.rapi_users_path_pre24]: self.assert_(os.path.isfile(path) and not os.path.islink(path)) self.assertEqual(utils.ReadFile(self.rapi_users_path), "other user\n") self.assertEqual(utils.ReadFile(self.rapi_users_path_pre24), "hello world\n") def testRapiUsersDryRun(self): self.assertFalse(os.path.exists(self.rapi_users_path)) self.assertFalse(os.path.exists(self.rapi_users_path_pre24)) utils.WriteFile(self.rapi_users_path_pre24, data="some user\n") self._TestSimpleUpgrade(constants.BuildVersion(2, 3, 0), True) self.assertFalse(os.path.isdir(os.path.dirname(self.rapi_users_path))) self.assertTrue(os.path.isfile(self.rapi_users_path_pre24) and not os.path.islink(self.rapi_users_path_pre24)) self.assertFalse(os.path.exists(self.rapi_users_path)) def testRapiUsers24AndAboveDryRun(self): self.assertFalse(os.path.exists(self.rapi_users_path)) self.assertFalse(os.path.exists(self.rapi_users_path_pre24)) os.mkdir(os.path.dirname(self.rapi_users_path)) utils.WriteFile(self.rapi_users_path, data="other user\n") self._TestSimpleUpgrade(constants.BuildVersion(2, 3, 0), True) self.assertTrue(os.path.isfile(self.rapi_users_path) and not os.path.islink(self.rapi_users_path)) self.assertFalse(os.path.exists(self.rapi_users_path_pre24)) self.assertEqual(utils.ReadFile(self.rapi_users_path), "other user\n") def testRapiUsersExistingSymlinkDryRun(self): self.assertFalse(os.path.exists(self.rapi_users_path)) self.assertFalse(os.path.exists(self.rapi_users_path_pre24)) os.mkdir(os.path.dirname(self.rapi_users_path)) os.symlink(self.rapi_users_path, self.rapi_users_path_pre24) utils.WriteFile(self.rapi_users_path, data="hello world\n") self._TestSimpleUpgrade(constants.BuildVersion(2, 2, 0), True) self.assertTrue(os.path.islink(self.rapi_users_path_pre24)) self.assertTrue(os.path.isfile(self.rapi_users_path) and not os.path.islink(self.rapi_users_path)) self.assertEqual(os.readlink(self.rapi_users_path_pre24), self.rapi_users_path) for path in [self.rapi_users_path, self.rapi_users_path_pre24]: self.assertEqual(utils.ReadFile(path), "hello world\n") def testFileStoragePathsDryRun(self): self.assertFalse(os.path.exists(self.file_storage_paths)) self._TestSimpleUpgrade(constants.BuildVersion(2, 6, 0), True, file_storage_dir=self.tmpdir, shared_file_storage_dir="/tmp") self.assertFalse(os.path.exists(self.file_storage_paths)) def testFileStoragePathsBoth(self): self.assertFalse(os.path.exists(self.file_storage_paths)) self._TestSimpleUpgrade(constants.BuildVersion(2, 6, 0), False, file_storage_dir=self.tmpdir, shared_file_storage_dir="/tmp") lines = utils.ReadFile(self.file_storage_paths).splitlines() self.assertTrue(lines.pop(0).startswith("# ")) self.assertTrue(lines.pop(0).startswith("# cfgupgrade")) self.assertEqual(lines.pop(0), self.tmpdir) self.assertEqual(lines.pop(0), "/tmp") self.assertFalse(lines) self.assertEqual(os.stat(self.file_storage_paths).st_mode & 0777, 0600, msg="Wrong permissions") def testFileStoragePathsSharedOnly(self): self.assertFalse(os.path.exists(self.file_storage_paths)) self._TestSimpleUpgrade(constants.BuildVersion(2, 5, 0), False, file_storage_dir=None, shared_file_storage_dir=self.tmpdir) lines = utils.ReadFile(self.file_storage_paths).splitlines() self.assertTrue(lines.pop(0).startswith("# ")) self.assertTrue(lines.pop(0).startswith("# cfgupgrade")) self.assertEqual(lines.pop(0), self.tmpdir) self.assertFalse(lines) def testUpgradeFrom_2_0(self): self._TestSimpleUpgrade(constants.BuildVersion(2, 0, 0), False) def testUpgradeFrom_2_1(self): self._TestSimpleUpgrade(constants.BuildVersion(2, 1, 0), False) def testUpgradeFrom_2_2(self): self._TestSimpleUpgrade(constants.BuildVersion(2, 2, 0), False) def testUpgradeFrom_2_3(self): self._TestSimpleUpgrade(constants.BuildVersion(2, 3, 0), False) def testUpgradeFrom_2_4(self): self._TestSimpleUpgrade(constants.BuildVersion(2, 4, 0), False) def testUpgradeFrom_2_5(self): self._TestSimpleUpgrade(constants.BuildVersion(2, 5, 0), False) def testUpgradeFrom_2_6(self): self._TestSimpleUpgrade(constants.BuildVersion(2, 6, 0), False) def testUpgradeFrom_2_7(self): self._TestSimpleUpgrade(constants.BuildVersion(2, 7, 0), False) def testUpgradeFullConfigFrom_2_7(self): self._TestUpgradeFromFile("cluster_config_2.7.json", False) def testUpgradeFullConfigFrom_2_8(self): self._TestUpgradeFromFile("cluster_config_2.8.json", False) def testUpgradeCurrent(self): self._TestSimpleUpgrade(constants.CONFIG_VERSION, False) def _RunDowngradeUpgrade(self): oldconf = self._LoadConfig() _RunUpgrade(self.tmpdir, False, True, downgrade=True) _RunUpgrade(self.tmpdir, False, True) newconf = self._LoadConfig() self.assertEqual(oldconf, newconf) def testDowngrade(self): self._TestSimpleUpgrade(constants.CONFIG_VERSION, False) self._RunDowngradeUpgrade() def testDowngradeFullConfig(self): """Test for upgrade + downgrade combination.""" # This test can work only with the previous version of a configuration! oldconfname = "cluster_config_2.8.json" self._TestUpgradeFromFile(oldconfname, False) _RunUpgrade(self.tmpdir, False, True, downgrade=True) oldconf = self._LoadTestDataConfig(oldconfname) newconf = self._LoadConfig() self.assertEqual(oldconf, newconf) def testDowngradeFrom_2_9(self): cfg29_name = "cluster_config_2.9.json" cfg29 = self._LoadTestDataConfig(cfg29_name) self._CreateValidConfigDir() utils.WriteFile(self.config_path, data=serializer.DumpJson(cfg29)) _RunUpgrade(self.tmpdir, False, True, downgrade=True) cfg28 = self._LoadConfig() hvparams = cfg28["cluster"]["hvparams"] for xen_variant in [constants.HT_XEN_PVM, constants.HT_XEN_HVM]: xen_params = hvparams[xen_variant] self.assertTrue(constants.HV_XEN_CMD not in xen_params) self.assertTrue(constants.HV_VIF_SCRIPT not in xen_params) def testDowngradeFullConfigBackwardFrom_2_7(self): """Test for upgrade + downgrade + upgrade combination.""" self._TestUpgradeFromFile("cluster_config_2.7.json", False) self._RunDowngradeUpgrade() def _RunDowngradeTwice(self): """Make sure that downgrade is idempotent.""" _RunUpgrade(self.tmpdir, False, True, downgrade=True) oldconf = self._LoadConfig() _RunUpgrade(self.tmpdir, False, True, downgrade=True) newconf = self._LoadConfig() self.assertEqual(oldconf, newconf) def testDowngradeTwice(self): self._TestSimpleUpgrade(constants.CONFIG_VERSION, False) self._RunDowngradeTwice() def testDowngradeTwiceFullConfigFrom_2_7(self): self._TestUpgradeFromFile("cluster_config_2.7.json", False) self._RunDowngradeTwice() def testUpgradeDryRunFrom_2_0(self): self._TestSimpleUpgrade(constants.BuildVersion(2, 0, 0), True) def testUpgradeDryRunFrom_2_1(self): self._TestSimpleUpgrade(constants.BuildVersion(2, 1, 0), True) def testUpgradeDryRunFrom_2_2(self): self._TestSimpleUpgrade(constants.BuildVersion(2, 2, 0), True) def testUpgradeDryRunFrom_2_3(self): self._TestSimpleUpgrade(constants.BuildVersion(2, 3, 0), True) def testUpgradeDryRunFrom_2_4(self): self._TestSimpleUpgrade(constants.BuildVersion(2, 4, 0), True) def testUpgradeDryRunFrom_2_5(self): self._TestSimpleUpgrade(constants.BuildVersion(2, 5, 0), True) def testUpgradeDryRunFrom_2_6(self): self._TestSimpleUpgrade(constants.BuildVersion(2, 6, 0), True) def testUpgradeCurrentDryRun(self): self._TestSimpleUpgrade(constants.CONFIG_VERSION, True) def testDowngradeDryRun(self): self._TestSimpleUpgrade(constants.CONFIG_VERSION, False) oldconf = self._LoadConfig() _RunUpgrade(self.tmpdir, True, True, downgrade=True) newconf = self._LoadConfig() self.assertEqual(oldconf["version"], newconf["version"]) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.utils.lvm_unittest.py0000744000000000000000000001002612244641676022131 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.utils.lvm""" import unittest from ganeti import constants from ganeti import utils from ganeti.objects import LvmPvInfo import testutils class TestLvmExclusiveCheckNodePvs(unittest.TestCase): """Test cases for LvmExclusiveCheckNodePvs()""" _VG = "vg" _SMALL_PV = LvmPvInfo(name="small", vg_name=_VG, size=100e3, free=40e3, attributes="a-") _MED_PV = LvmPvInfo(name="medium", vg_name=_VG, size=400e3, free=40e3, attributes="a-") _BIG_PV = LvmPvInfo(name="big", vg_name=_VG, size=1e6, free=400e3, attributes="a-") # Allowance for rounding _EPS = 1e-4 def testOnePv(self): (errmsgs, (small, big)) = utils.LvmExclusiveCheckNodePvs([self._MED_PV]) self.assertFalse(errmsgs) self.assertEqual(small, self._MED_PV.size) self.assertEqual(big, self._MED_PV.size) def testEqualPvs(self): (errmsgs, (small, big)) = utils.LvmExclusiveCheckNodePvs( [self._MED_PV] * 2) self.assertFalse(errmsgs) self.assertEqual(small, self._MED_PV.size) self.assertEqual(big, self._MED_PV.size) (errmsgs, (small, big)) = utils.LvmExclusiveCheckNodePvs( [self._SMALL_PV] * 3) self.assertFalse(errmsgs) self.assertEqual(small, self._SMALL_PV.size) self.assertEqual(big, self._SMALL_PV.size) def testTooDifferentPvs(self): (errmsgs, (small, big)) = utils.LvmExclusiveCheckNodePvs( [self._MED_PV, self._BIG_PV]) self.assertEqual(len(errmsgs), 1) self.assertEqual(small, self._MED_PV.size) self.assertEqual(big, self._BIG_PV.size) (errmsgs, (small, big)) = utils.LvmExclusiveCheckNodePvs( [self._MED_PV, self._SMALL_PV]) self.assertEqual(len(errmsgs), 1) self.assertEqual(small, self._SMALL_PV.size) self.assertEqual(big, self._MED_PV.size) def testBoundarySizeCases(self): medpv1 = self._MED_PV.Copy() medpv2 = self._MED_PV.Copy() (errmsgs, (small, big)) = utils.LvmExclusiveCheckNodePvs( [medpv1, medpv2, self._MED_PV]) self.assertFalse(errmsgs) self.assertEqual(small, self._MED_PV.size) self.assertEqual(big, self._MED_PV.size) # Just within the margins medpv1.size = self._MED_PV.size * (1 - constants.PART_MARGIN + self._EPS) medpv2.size = self._MED_PV.size * (1 + constants.PART_MARGIN - self._EPS) (errmsgs, (small, big)) = utils.LvmExclusiveCheckNodePvs( [medpv1, medpv2, self._MED_PV]) self.assertFalse(errmsgs) self.assertEqual(small, medpv1.size) self.assertEqual(big, medpv2.size) # Just outside the margins medpv1.size = self._MED_PV.size * (1 - constants.PART_MARGIN - self._EPS) medpv2.size = self._MED_PV.size * (1 + constants.PART_MARGIN) (errmsgs, (small, big)) = utils.LvmExclusiveCheckNodePvs( [medpv1, medpv2, self._MED_PV]) self.assertTrue(errmsgs) self.assertEqual(small, medpv1.size) self.assertEqual(big, medpv2.size) medpv1.size = self._MED_PV.size * (1 - constants.PART_MARGIN) medpv2.size = self._MED_PV.size * (1 + constants.PART_MARGIN + self._EPS) (errmsgs, (small, big)) = utils.LvmExclusiveCheckNodePvs( [medpv1, medpv2, self._MED_PV]) self.assertTrue(errmsgs) self.assertEqual(small, medpv1.size) self.assertEqual(big, medpv2.size) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.constants_unittest.py0000744000000000000000000001466112271422343022205 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2008 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the constants module""" import unittest import re import itertools from ganeti import constants from ganeti import locking from ganeti import utils import testutils class TestConstants(unittest.TestCase): """Constants tests""" def testConfigVersion(self): self.failUnless(constants.CONFIG_MAJOR >= 0 and constants.CONFIG_MAJOR <= 99) self.failUnless(constants.CONFIG_MINOR >= 0 and constants.CONFIG_MINOR <= 99) self.failUnless(constants.CONFIG_REVISION >= 0 and constants.CONFIG_REVISION <= 9999) self.failUnless(constants.CONFIG_VERSION >= 0 and constants.CONFIG_VERSION <= 99999999) self.failUnless(constants.BuildVersion(0, 0, 0) == 0) self.failUnless(constants.BuildVersion(10, 10, 1010) == 10101010) self.failUnless(constants.BuildVersion(12, 34, 5678) == 12345678) self.failUnless(constants.BuildVersion(99, 99, 9999) == 99999999) self.failUnless(constants.SplitVersion(00000000) == (0, 0, 0)) self.failUnless(constants.SplitVersion(10101010) == (10, 10, 1010)) self.failUnless(constants.SplitVersion(12345678) == (12, 34, 5678)) self.failUnless(constants.SplitVersion(99999999) == (99, 99, 9999)) self.failUnless(constants.SplitVersion(constants.CONFIG_VERSION) == (constants.CONFIG_MAJOR, constants.CONFIG_MINOR, constants.CONFIG_REVISION)) def testDiskStatus(self): self.failUnless(constants.LDS_OKAY < constants.LDS_UNKNOWN) self.failUnless(constants.LDS_UNKNOWN < constants.LDS_FAULTY) def testClockSkew(self): self.failUnless(constants.NODE_MAX_CLOCK_SKEW < (0.8 * constants.CONFD_MAX_CLOCK_SKEW)) def testSslCertExpiration(self): self.failUnless(constants.SSL_CERT_EXPIRATION_ERROR < constants.SSL_CERT_EXPIRATION_WARN) def testOpCodePriority(self): self.failUnless(constants.OP_PRIO_LOWEST > constants.OP_PRIO_LOW) self.failUnless(constants.OP_PRIO_LOW > constants.OP_PRIO_NORMAL) self.failUnlessEqual(constants.OP_PRIO_NORMAL, locking._DEFAULT_PRIORITY) self.failUnlessEqual(constants.OP_PRIO_DEFAULT, locking._DEFAULT_PRIORITY) self.failUnless(constants.OP_PRIO_NORMAL > constants.OP_PRIO_HIGH) self.failUnless(constants.OP_PRIO_HIGH > constants.OP_PRIO_HIGHEST) def testDiskDefaults(self): self.failUnless( set(constants.DISK_LD_DEFAULTS.keys()) == set(constants.DISK_TEMPLATES) - set([constants.DT_DISKLESS])) self.failUnless(set(constants.DISK_DT_DEFAULTS.keys()) == constants.DISK_TEMPLATES) def testJobStatus(self): self.assertFalse(constants.JOBS_PENDING & constants.JOBS_FINALIZED) self.assertFalse(constants.JOBS_PENDING - constants.JOB_STATUS_ALL) self.assertFalse(constants.JOBS_FINALIZED - constants.JOB_STATUS_ALL) def testDefaultsForAllHypervisors(self): self.assertEqual(frozenset(constants.HVC_DEFAULTS.keys()), constants.HYPER_TYPES) def testDefaultHypervisor(self): self.assertTrue(constants.DEFAULT_ENABLED_HYPERVISOR in constants.HYPER_TYPES) def testExtraLogfiles(self): for daemon in constants.DAEMONS_EXTRA_LOGBASE: self.assertTrue(daemon in constants.DAEMONS) for log_reason in constants.DAEMONS_EXTRA_LOGBASE[daemon]: self.assertTrue(log_reason in constants.VALID_EXTRA_LOGREASONS) class TestExportedNames(unittest.TestCase): _VALID_NAME_RE = re.compile(r"^[A-Z][A-Z0-9_]+$") _BUILTIN_NAME_RE = re.compile(r"^__\w+__$") _EXCEPTIONS = frozenset([ "SplitVersion", "BuildVersion", ]) def test(self): wrong = \ set(itertools.ifilterfalse(self._BUILTIN_NAME_RE.match, itertools.ifilterfalse(self._VALID_NAME_RE.match, dir(constants)))) wrong -= self._EXCEPTIONS self.assertFalse(wrong, msg=("Invalid names exported from constants module: %s" % utils.CommaJoin(sorted(wrong)))) class TestParameterNames(unittest.TestCase): """HV/BE parameter tests""" VALID_NAME = re.compile("^[a-zA-Z_][a-zA-Z0-9_]*$") def testNoDashes(self): for kind, source in [("hypervisor", constants.HVS_PARAMETER_TYPES), ("backend", constants.BES_PARAMETER_TYPES), ("nic", constants.NICS_PARAMETER_TYPES), ("instdisk", constants.IDISK_PARAMS_TYPES), ("instnic", constants.INIC_PARAMS_TYPES), ]: for key in source: self.failUnless(self.VALID_NAME.match(key), "The %s parameter '%s' contains invalid characters" % (kind, key)) class TestConfdConstants(unittest.TestCase): """Test the confd constants""" def testFourCc(self): self.assertEqual(len(constants.CONFD_MAGIC_FOURCC), 4, msg="Invalid fourcc len, should be 4") def testReqs(self): self.assertFalse(utils.FindDuplicates(constants.CONFD_REQS), msg="Duplicated confd request code") def testReplStatuses(self): self.assertFalse(utils.FindDuplicates(constants.CONFD_REPL_STATUSES), msg="Duplicated confd reply status code") class TestDiskTemplateConstants(unittest.TestCase): def testPreference(self): self.assertEqual(set(constants.DISK_TEMPLATE_PREFERENCE), set(constants.DISK_TEMPLATES)) def testMapToStorageTypes(self): for disk_template in constants.DISK_TEMPLATES: self.assertTrue( constants.MAP_DISK_TEMPLATE_STORAGE_TYPE[disk_template] is not None) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.utils.log_unittest.py0000744000000000000000000002045612244641676022124 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.utils.log""" import os import unittest import logging import tempfile import shutil import threading from cStringIO import StringIO from ganeti import constants from ganeti import errors from ganeti import compat from ganeti import utils import testutils class TestLogHandler(unittest.TestCase): def testNormal(self): tmpfile = tempfile.NamedTemporaryFile() handler = utils.log._ReopenableLogHandler(tmpfile.name) handler.setFormatter(logging.Formatter("%(asctime)s: %(message)s")) logger = logging.Logger("TestLogger") logger.addHandler(handler) self.assertEqual(len(logger.handlers), 1) logger.error("Test message ERROR") logger.info("Test message INFO") logger.removeHandler(handler) self.assertFalse(logger.handlers) handler.close() self.assertEqual(len(utils.ReadFile(tmpfile.name).splitlines()), 2) def testReopen(self): tmpfile = tempfile.NamedTemporaryFile() tmpfile2 = tempfile.NamedTemporaryFile() handler = utils.log._ReopenableLogHandler(tmpfile.name) self.assertFalse(utils.ReadFile(tmpfile.name)) self.assertFalse(utils.ReadFile(tmpfile2.name)) logger = logging.Logger("TestLoggerReopen") logger.addHandler(handler) for _ in range(3): logger.error("Test message ERROR") handler.flush() self.assertEqual(len(utils.ReadFile(tmpfile.name).splitlines()), 3) before_id = utils.GetFileID(tmpfile.name) handler.RequestReopen() self.assertTrue(handler._reopen) self.assertTrue(utils.VerifyFileID(utils.GetFileID(tmpfile.name), before_id)) # Rename only after requesting reopen os.rename(tmpfile.name, tmpfile2.name) assert not os.path.exists(tmpfile.name) # Write another message, should reopen for _ in range(4): logger.info("Test message INFO") # Flag must be reset self.assertFalse(handler._reopen) self.assertFalse(utils.VerifyFileID(utils.GetFileID(tmpfile.name), before_id)) logger.removeHandler(handler) self.assertFalse(logger.handlers) handler.close() self.assertEqual(len(utils.ReadFile(tmpfile.name).splitlines()), 4) self.assertEqual(len(utils.ReadFile(tmpfile2.name).splitlines()), 3) def testConsole(self): for (console, check) in [(None, False), (tempfile.NamedTemporaryFile(), True), (self._FailingFile(os.devnull), False)]: # Create a handler which will fail when handling errors cls = utils.log._LogErrorsToConsole(self._FailingHandler) # Instantiate handler with file which will fail when writing, # provoking a write to the console handler = cls(console, self._FailingFile(os.devnull)) logger = logging.Logger("TestLogger") logger.addHandler(handler) self.assertEqual(len(logger.handlers), 1) # Provoke write logger.error("Test message ERROR") # Take everything apart logger.removeHandler(handler) self.assertFalse(logger.handlers) handler.close() if console and check: console.flush() # Check console output consout = utils.ReadFile(console.name) self.assertTrue("Cannot log message" in consout) self.assertTrue("Test message ERROR" in consout) class _FailingFile(file): def write(self, _): raise Exception class _FailingHandler(logging.StreamHandler): def handleError(self, _): raise Exception class TestSetupLogging(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def testSimple(self): logfile = utils.PathJoin(self.tmpdir, "basic.log") logger = logging.Logger("TestLogger") self.assertTrue(callable(utils.SetupLogging(logfile, "test", console_logging=False, syslog=constants.SYSLOG_NO, stderr_logging=False, multithreaded=False, root_logger=logger))) self.assertEqual(utils.ReadFile(logfile), "") logger.error("This is a test") # Ensure SetupLogging used custom logger logging.error("This message should not show up in the test log file") self.assertTrue(utils.ReadFile(logfile).endswith("This is a test\n")) def testReopen(self): logfile = utils.PathJoin(self.tmpdir, "reopen.log") logfile2 = utils.PathJoin(self.tmpdir, "reopen.log.OLD") logger = logging.Logger("TestLogger") reopen_fn = utils.SetupLogging(logfile, "test", console_logging=False, syslog=constants.SYSLOG_NO, stderr_logging=False, multithreaded=False, root_logger=logger) self.assertTrue(callable(reopen_fn)) self.assertEqual(utils.ReadFile(logfile), "") logger.error("This is a test") self.assertTrue(utils.ReadFile(logfile).endswith("This is a test\n")) os.rename(logfile, logfile2) assert not os.path.exists(logfile) # Notify logger to reopen on the next message reopen_fn() assert not os.path.exists(logfile) # Provoke actual reopen logger.error("First message") self.assertTrue(utils.ReadFile(logfile).endswith("First message\n")) self.assertTrue(utils.ReadFile(logfile2).endswith("This is a test\n")) class TestSetupToolLogging(unittest.TestCase): def test(self): error_name = logging.getLevelName(logging.ERROR) warn_name = logging.getLevelName(logging.WARNING) info_name = logging.getLevelName(logging.INFO) debug_name = logging.getLevelName(logging.DEBUG) for debug in [False, True]: for verbose in [False, True]: logger = logging.Logger("TestLogger") buf = StringIO() utils.SetupToolLogging(debug, verbose, _root_logger=logger, _stream=buf) logger.error("level=error") logger.warning("level=warning") logger.info("level=info") logger.debug("level=debug") lines = buf.getvalue().splitlines() self.assertTrue(compat.all(line.count(":") == 3 for line in lines)) messages = [line.split(":", 3)[-1].strip() for line in lines] if debug: self.assertEqual(messages, [ "%s level=error" % error_name, "%s level=warning" % warn_name, "%s level=info" % info_name, "%s level=debug" % debug_name, ]) elif verbose: self.assertEqual(messages, [ "%s level=error" % error_name, "%s level=warning" % warn_name, "%s level=info" % info_name, ]) else: self.assertEqual(messages, [ "level=error", "level=warning", ]) def testThreadName(self): thread_name = threading.currentThread().getName() for enable_threadname in [False, True]: logger = logging.Logger("TestLogger") buf = StringIO() utils.SetupToolLogging(True, True, threadname=enable_threadname, _root_logger=logger, _stream=buf) logger.debug("test134042376") lines = buf.getvalue().splitlines() self.assertEqual(len(lines), 1) if enable_threadname: self.assertTrue((" %s " % thread_name) in lines[0]) else: self.assertTrue(thread_name not in lines[0]) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.rapi.baserlib_unittest.py0000744000000000000000000001266112271422343022704 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.rapi.baserlib""" import unittest import itertools from ganeti import errors from ganeti import opcodes from ganeti import ht from ganeti import http from ganeti import compat from ganeti.rapi import baserlib import testutils class TestFillOpcode(unittest.TestCase): class OpTest(opcodes.OpCode): OP_PARAMS = [ ("test", None, ht.TMaybeString, None), ] def test(self): for static in [None, {}]: op = baserlib.FillOpcode(self.OpTest, {}, static) self.assertTrue(isinstance(op, self.OpTest)) self.assertFalse(hasattr(op, "test")) def testStatic(self): op = baserlib.FillOpcode(self.OpTest, {}, {"test": "abc"}) self.assertTrue(isinstance(op, self.OpTest)) self.assertEqual(op.test, "abc") # Overwrite static parameter self.assertRaises(http.HttpBadRequest, baserlib.FillOpcode, self.OpTest, {"test": 123}, {"test": "abc"}) def testType(self): self.assertRaises(http.HttpBadRequest, baserlib.FillOpcode, self.OpTest, {"test": [1, 2, 3]}, {}) def testStaticType(self): self.assertRaises(http.HttpBadRequest, baserlib.FillOpcode, self.OpTest, {}, {"test": [1, 2, 3]}) def testUnicode(self): op = baserlib.FillOpcode(self.OpTest, {u"test": "abc"}, {}) self.assertTrue(isinstance(op, self.OpTest)) self.assertEqual(op.test, "abc") op = baserlib.FillOpcode(self.OpTest, {}, {u"test": "abc"}) self.assertTrue(isinstance(op, self.OpTest)) self.assertEqual(op.test, "abc") def testUnknownParameter(self): self.assertRaises(http.HttpBadRequest, baserlib.FillOpcode, self.OpTest, {"othervalue": 123}, None) def testInvalidBody(self): self.assertRaises(http.HttpBadRequest, baserlib.FillOpcode, self.OpTest, "", None) self.assertRaises(http.HttpBadRequest, baserlib.FillOpcode, self.OpTest, range(10), None) def testRenameBothSpecified(self): self.assertRaises(http.HttpBadRequest, baserlib.FillOpcode, self.OpTest, { "old": 123, "new": 999, }, None, rename={ "old": "new", }) def testRename(self): value = "Hello World" op = baserlib.FillOpcode(self.OpTest, { "data": value, }, None, rename={ "data": "test", }) self.assertEqual(op.test, value) def testRenameStatic(self): self.assertRaises(http.HttpBadRequest, baserlib.FillOpcode, self.OpTest, { "data": 0, }, { "test": None, }, rename={ "data": "test", }) class TestOpcodeResource(unittest.TestCase): @staticmethod def _MakeClass(method, attrs): return type("Test%s" % method, (baserlib.OpcodeResource, ), attrs) @staticmethod def _GetMethodAttributes(method): attrs = ["%s_OPCODE" % method, "%s_RENAME" % method, "Get%sOpInput" % method.capitalize()] assert attrs == dict((opattrs[0], list(opattrs[1:])) for opattrs in baserlib.OPCODE_ATTRS)[method] return attrs def test(self): for method in baserlib._SUPPORTED_METHODS: # Empty handler obj = self._MakeClass(method, {})(None, {}, None) for attr in itertools.chain(*baserlib.OPCODE_ATTRS): self.assertFalse(hasattr(obj, attr)) # Direct handler function obj = self._MakeClass(method, { method: lambda _: None, })(None, {}, None) self.assertFalse(compat.all(hasattr(obj, attr) for i in baserlib._SUPPORTED_METHODS for attr in self._GetMethodAttributes(i))) # Let metaclass define handler function for opcls in [None, object()]: obj = self._MakeClass(method, { "%s_OPCODE" % method: opcls, })(None, {}, None) self.assertTrue(callable(getattr(obj, method))) self.assertEqual(getattr(obj, "%s_OPCODE" % method), opcls) self.assertFalse(hasattr(obj, "%s_RENAME" % method)) self.assertFalse(compat.any(hasattr(obj, attr) for i in baserlib._SUPPORTED_METHODS if i != method for attr in self._GetMethodAttributes(i))) def testIllegalRename(self): class _TClass(baserlib.OpcodeResource): PUT_RENAME = None def PUT(self): pass self.assertRaises(AssertionError, _TClass, None, None, None) def testEmpty(self): class _Empty(baserlib.OpcodeResource): pass obj = _Empty(None, {}, None) for attr in itertools.chain(*baserlib.OPCODE_ATTRS): self.assertFalse(hasattr(obj, attr)) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.hypervisor.hv_lxc_unittest.py0000744000000000000000000000277512271422343023670 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.hypervisor.hv_lxc""" import unittest from ganeti import constants from ganeti import objects from ganeti import hypervisor from ganeti.hypervisor import hv_lxc import testutils class TestConsole(unittest.TestCase): def test(self): instance = objects.Instance(name="lxc.example.com", primary_node="node199-uuid") node = objects.Node(name="node199", uuid="node199-uuid") cons = hv_lxc.LXCHypervisor.GetInstanceConsole(instance, node, {}, {}) self.assertTrue(cons.Validate()) self.assertEqual(cons.kind, constants.CONS_SSH) self.assertEqual(cons.host, node.name) self.assertEqual(cons.command[-1], instance.name) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.config_unittest.py0000744000000000000000000005504112271422343021433 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the config module""" import unittest import os import tempfile import operator from ganeti import bootstrap from ganeti import config from ganeti import constants from ganeti import errors from ganeti import objects from ganeti import utils from ganeti import netutils from ganeti import compat from ganeti.cmdlib import instance from ganeti.config import TemporaryReservationManager import testutils import mocks import mock def _StubGetEntResolver(): return mocks.FakeGetentResolver() class TestConfigRunner(unittest.TestCase): """Testing case for HooksRunner""" def setUp(self): fd, self.cfg_file = tempfile.mkstemp() os.close(fd) self._init_cluster(self.cfg_file) def tearDown(self): try: os.unlink(self.cfg_file) except OSError: pass def _get_object(self): """Returns an instance of ConfigWriter""" cfg = config.ConfigWriter(cfg_file=self.cfg_file, offline=True, _getents=_StubGetEntResolver) return cfg def _init_cluster(self, cfg): """Initializes the cfg object""" me = netutils.Hostname() ip = constants.IP4_ADDRESS_LOCALHOST # master_ip must not conflict with the node ip address master_ip = "127.0.0.2" cluster_config = objects.Cluster( serial_no=1, rsahostkeypub="", dsahostkeypub="", highest_used_port=(constants.FIRST_DRBD_PORT - 1), mac_prefix="aa:00:00", volume_group_name="xenvg", drbd_usermode_helper="/bin/true", nicparams={constants.PP_DEFAULT: constants.NICC_DEFAULTS}, ndparams=constants.NDC_DEFAULTS, tcpudp_port_pool=set(), enabled_hypervisors=[constants.HT_FAKE], master_node=me.name, master_ip=master_ip, master_netdev=constants.DEFAULT_BRIDGE, cluster_name="cluster.local", file_storage_dir="/tmp", uid_pool=[], ) master_node_config = objects.Node(name=me.name, primary_ip=me.ip, secondary_ip=ip, serial_no=1, master_candidate=True) bootstrap.InitConfig(constants.CONFIG_VERSION, cluster_config, master_node_config, self.cfg_file) def _create_instance(self): """Create and return an instance object""" inst = objects.Instance(name="test.example.com", uuid="test-uuid", disks=[], nics=[], disk_template=constants.DT_DISKLESS, primary_node=self._get_object().GetMasterNode()) return inst def testEmpty(self): """Test instantiate config object""" self._get_object() def testInit(self): """Test initialize the config file""" cfg = self._get_object() self.failUnlessEqual(1, len(cfg.GetNodeList())) self.failUnlessEqual(0, len(cfg.GetInstanceList())) def testUpdateCluster(self): """Test updates on the cluster object""" cfg = self._get_object() # construct a fake cluster object fake_cl = objects.Cluster() # fail if we didn't read the config self.failUnlessRaises(errors.ConfigurationError, cfg.Update, fake_cl, None) cl = cfg.GetClusterInfo() # first pass, must not fail cfg.Update(cl, None) # second pass, also must not fail (after the config has been written) cfg.Update(cl, None) # but the fake_cl update should still fail self.failUnlessRaises(errors.ConfigurationError, cfg.Update, fake_cl, None) def testUpdateNode(self): """Test updates on one node object""" cfg = self._get_object() # construct a fake node fake_node = objects.Node() # fail if we didn't read the config self.failUnlessRaises(errors.ConfigurationError, cfg.Update, fake_node, None) node = cfg.GetNodeInfo(cfg.GetNodeList()[0]) # first pass, must not fail cfg.Update(node, None) # second pass, also must not fail (after the config has been written) cfg.Update(node, None) # but the fake_node update should still fail self.failUnlessRaises(errors.ConfigurationError, cfg.Update, fake_node, None) def testUpdateInstance(self): """Test updates on one instance object""" cfg = self._get_object() # construct a fake instance inst = self._create_instance() fake_instance = objects.Instance() # fail if we didn't read the config self.failUnlessRaises(errors.ConfigurationError, cfg.Update, fake_instance, None) cfg.AddInstance(inst, "my-job") instance = cfg.GetInstanceInfo(cfg.GetInstanceList()[0]) # first pass, must not fail cfg.Update(instance, None) # second pass, also must not fail (after the config has been written) cfg.Update(instance, None) # but the fake_instance update should still fail self.failUnlessRaises(errors.ConfigurationError, cfg.Update, fake_instance, None) def testUpgradeSave(self): """Test that any modification done during upgrading is saved back""" cfg = self._get_object() # Remove an element, run upgrade, and check if the element is # back and the file upgraded node = cfg.GetNodeInfo(cfg.GetNodeList()[0]) # For a ConfigObject, None is the same as a missing field node.ndparams = None oldsaved = utils.ReadFile(self.cfg_file) cfg._UpgradeConfig() self.assertTrue(node.ndparams is not None) newsaved = utils.ReadFile(self.cfg_file) # We rely on the fact that at least the serial number changes self.assertNotEqual(oldsaved, newsaved) # Add something that should not be there this time key = list(constants.NDC_GLOBALS)[0] node.ndparams[key] = constants.NDC_DEFAULTS[key] cfg._WriteConfig(None) oldsaved = utils.ReadFile(self.cfg_file) cfg._UpgradeConfig() self.assertTrue(node.ndparams.get(key) is None) newsaved = utils.ReadFile(self.cfg_file) self.assertNotEqual(oldsaved, newsaved) # Do the upgrade again, this time there should be no update oldsaved = newsaved cfg._UpgradeConfig() newsaved = utils.ReadFile(self.cfg_file) self.assertEqual(oldsaved, newsaved) # Reload the configuration again: it shouldn't change the file oldsaved = newsaved self._get_object() newsaved = utils.ReadFile(self.cfg_file) self.assertEqual(oldsaved, newsaved) def testNICParameterSyntaxCheck(self): """Test the NIC's CheckParameterSyntax function""" mode = constants.NIC_MODE link = constants.NIC_LINK m_bridged = constants.NIC_MODE_BRIDGED m_routed = constants.NIC_MODE_ROUTED CheckSyntax = objects.NIC.CheckParameterSyntax CheckSyntax(constants.NICC_DEFAULTS) CheckSyntax({mode: m_bridged, link: "br1"}) CheckSyntax({mode: m_routed, link: "default"}) self.assertRaises(errors.ConfigurationError, CheckSyntax, {mode: "000invalid", link: "any"}) self.assertRaises(errors.ConfigurationError, CheckSyntax, {mode: m_bridged, link: None}) self.assertRaises(errors.ConfigurationError, CheckSyntax, {mode: m_bridged, link: ""}) def testGetNdParamsDefault(self): cfg = self._get_object() node = cfg.GetNodeInfo(cfg.GetNodeList()[0]) self.assertEqual(cfg.GetNdParams(node), constants.NDC_DEFAULTS) def testGetNdParamsModifiedNode(self): my_ndparams = { constants.ND_OOB_PROGRAM: "/bin/node-oob", constants.ND_SPINDLE_COUNT: 1, constants.ND_EXCLUSIVE_STORAGE: False, } cfg = self._get_object() node = cfg.GetNodeInfo(cfg.GetNodeList()[0]) node.ndparams = my_ndparams cfg.Update(node, None) self.assertEqual(cfg.GetNdParams(node), my_ndparams) def testGetNdParamsInheritance(self): node_ndparams = { constants.ND_OOB_PROGRAM: "/bin/node-oob", } group_ndparams = { constants.ND_SPINDLE_COUNT: 10, } expected_ndparams = { constants.ND_OOB_PROGRAM: "/bin/node-oob", constants.ND_SPINDLE_COUNT: 10, constants.ND_EXCLUSIVE_STORAGE: constants.NDC_DEFAULTS[constants.ND_EXCLUSIVE_STORAGE], } cfg = self._get_object() node = cfg.GetNodeInfo(cfg.GetNodeList()[0]) node.ndparams = node_ndparams cfg.Update(node, None) group = cfg.GetNodeGroup(node.group) group.ndparams = group_ndparams cfg.Update(group, None) self.assertEqual(cfg.GetNdParams(node), expected_ndparams) def testAddGroupFillsFieldsIfMissing(self): cfg = self._get_object() group = objects.NodeGroup(name="test", members=[]) cfg.AddNodeGroup(group, "my-job") self.assert_(utils.UUID_RE.match(group.uuid)) self.assertEqual(constants.ALLOC_POLICY_PREFERRED, group.alloc_policy) def testAddGroupPreservesFields(self): cfg = self._get_object() group = objects.NodeGroup(name="test", members=[], alloc_policy=constants.ALLOC_POLICY_LAST_RESORT) cfg.AddNodeGroup(group, "my-job") self.assertEqual(constants.ALLOC_POLICY_LAST_RESORT, group.alloc_policy) def testAddGroupDoesNotPreserveFields(self): cfg = self._get_object() group = objects.NodeGroup(name="test", members=[], serial_no=17, ctime=123, mtime=456) cfg.AddNodeGroup(group, "my-job") self.assertEqual(1, group.serial_no) self.assert_(group.ctime > 1200000000) self.assert_(group.mtime > 1200000000) def testAddGroupCanSkipUUIDCheck(self): cfg = self._get_object() uuid = cfg.GenerateUniqueID("my-job") group = objects.NodeGroup(name="test", members=[], uuid=uuid, serial_no=17, ctime=123, mtime=456) self.assertRaises(errors.ConfigurationError, cfg.AddNodeGroup, group, "my-job") cfg.AddNodeGroup(group, "my-job", check_uuid=False) # Does not raise. self.assertEqual(uuid, group.uuid) def testAssignGroupNodes(self): me = netutils.Hostname() cfg = self._get_object() # Create two groups grp1 = objects.NodeGroup(name="grp1", members=[], uuid="2f2fadf7-2a70-4a23-9ab5-2568c252032c") grp1_serial = 1 cfg.AddNodeGroup(grp1, "job") grp2 = objects.NodeGroup(name="grp2", members=[], uuid="798d0de3-680f-4a0e-b29a-0f54f693b3f1") grp2_serial = 1 cfg.AddNodeGroup(grp2, "job") self.assertEqual(set(map(operator.attrgetter("name"), cfg.GetAllNodeGroupsInfo().values())), set(["grp1", "grp2", constants.INITIAL_NODE_GROUP_NAME])) # No-op cluster_serial = cfg.GetClusterInfo().serial_no cfg.AssignGroupNodes([]) cluster_serial += 1 # Create two nodes node1 = objects.Node(name="node1", group=grp1.uuid, ndparams={}, uuid="node1-uuid") node1_serial = 1 node2 = objects.Node(name="node2", group=grp2.uuid, ndparams={}, uuid="node2-uuid") node2_serial = 1 cfg.AddNode(node1, "job") cfg.AddNode(node2, "job") cluster_serial += 2 self.assertEqual(set(cfg.GetNodeList()), set(["node1-uuid", "node2-uuid", cfg.GetNodeInfoByName(me.name).uuid])) def _VerifySerials(): self.assertEqual(cfg.GetClusterInfo().serial_no, cluster_serial) self.assertEqual(node1.serial_no, node1_serial) self.assertEqual(node2.serial_no, node2_serial) self.assertEqual(grp1.serial_no, grp1_serial) self.assertEqual(grp2.serial_no, grp2_serial) _VerifySerials() self.assertEqual(set(grp1.members), set(["node1-uuid"])) self.assertEqual(set(grp2.members), set(["node2-uuid"])) # Check invalid nodes and groups self.assertRaises(errors.ConfigurationError, cfg.AssignGroupNodes, [ ("unknown.node.example.com", grp2.uuid), ]) self.assertRaises(errors.ConfigurationError, cfg.AssignGroupNodes, [ (node1.name, "unknown-uuid"), ]) self.assertEqual(node1.group, grp1.uuid) self.assertEqual(node2.group, grp2.uuid) self.assertEqual(set(grp1.members), set(["node1-uuid"])) self.assertEqual(set(grp2.members), set(["node2-uuid"])) # Another no-op cfg.AssignGroupNodes([]) cluster_serial += 1 _VerifySerials() # Assign to the same group (should be a no-op) self.assertEqual(node2.group, grp2.uuid) cfg.AssignGroupNodes([ (node2.uuid, grp2.uuid), ]) cluster_serial += 1 self.assertEqual(node2.group, grp2.uuid) _VerifySerials() self.assertEqual(set(grp1.members), set(["node1-uuid"])) self.assertEqual(set(grp2.members), set(["node2-uuid"])) # Assign node 2 to group 1 self.assertEqual(node2.group, grp2.uuid) cfg.AssignGroupNodes([ (node2.uuid, grp1.uuid), ]) cluster_serial += 1 node2_serial += 1 grp1_serial += 1 grp2_serial += 1 self.assertEqual(node2.group, grp1.uuid) _VerifySerials() self.assertEqual(set(grp1.members), set(["node1-uuid", "node2-uuid"])) self.assertFalse(grp2.members) # And assign both nodes to group 2 self.assertEqual(node1.group, grp1.uuid) self.assertEqual(node2.group, grp1.uuid) self.assertNotEqual(grp1.uuid, grp2.uuid) cfg.AssignGroupNodes([ (node1.uuid, grp2.uuid), (node2.uuid, grp2.uuid), ]) cluster_serial += 1 node1_serial += 1 node2_serial += 1 grp1_serial += 1 grp2_serial += 1 self.assertEqual(node1.group, grp2.uuid) self.assertEqual(node2.group, grp2.uuid) _VerifySerials() self.assertFalse(grp1.members) self.assertEqual(set(grp2.members), set(["node1-uuid", "node2-uuid"])) # Destructive tests orig_group = node2.group try: other_uuid = "68b3d087-6ea5-491c-b81f-0a47d90228c5" assert compat.all(node.group != other_uuid for node in cfg.GetAllNodesInfo().values()) node2.group = "68b3d087-6ea5-491c-b81f-0a47d90228c5" self.assertRaises(errors.ConfigurationError, cfg.AssignGroupNodes, [ (node2.uuid, grp2.uuid), ]) _VerifySerials() finally: node2.group = orig_group def _TestVerifyConfigIPolicy(self, ipolicy, ipowner, cfg, isgroup): INVALID_KEY = "this_key_cannot_exist" ipolicy[INVALID_KEY] = None # A call to cluster.SimpleFillIPolicy causes different kinds of error # depending on the owner (cluster or group) if isgroup: errs = cfg.VerifyConfig() self.assertTrue(len(errs) >= 1) errstr = "%s has invalid instance policy" % ipowner self.assertTrue(_IsErrorInList(errstr, errs)) else: self.assertRaises(AssertionError, cfg.VerifyConfig) del ipolicy[INVALID_KEY] errs = cfg.VerifyConfig() self.assertFalse(errs) key = list(constants.IPOLICY_PARAMETERS)[0] hasoldv = (key in ipolicy) if hasoldv: oldv = ipolicy[key] ipolicy[key] = "blah" errs = cfg.VerifyConfig() self.assertTrue(len(errs) >= 1) self.assertTrue(_IsErrorInList("%s has invalid instance policy" % ipowner, errs)) if hasoldv: ipolicy[key] = oldv else: del ipolicy[key] ispeclist = [] if constants.ISPECS_MINMAX in ipolicy: for k in range(len(ipolicy[constants.ISPECS_MINMAX])): ispeclist.extend([ (ipolicy[constants.ISPECS_MINMAX][k][constants.ISPECS_MIN], "%s[%s]/%s" % (constants.ISPECS_MINMAX, k, constants.ISPECS_MIN)), (ipolicy[constants.ISPECS_MINMAX][k][constants.ISPECS_MAX], "%s[%s]/%s" % (constants.ISPECS_MINMAX, k, constants.ISPECS_MAX)), ]) if constants.ISPECS_STD in ipolicy: ispeclist.append((ipolicy[constants.ISPECS_STD], constants.ISPECS_STD)) for (ispec, ispecpath) in ispeclist: ispec[INVALID_KEY] = None errs = cfg.VerifyConfig() self.assertTrue(len(errs) >= 1) self.assertTrue(_IsErrorInList(("%s has invalid ipolicy/%s" % (ipowner, ispecpath)), errs)) del ispec[INVALID_KEY] errs = cfg.VerifyConfig() self.assertFalse(errs) for par in constants.ISPECS_PARAMETERS: hasoldv = par in ispec if hasoldv: oldv = ispec[par] ispec[par] = "blah" errs = cfg.VerifyConfig() self.assertTrue(len(errs) >= 1) self.assertTrue(_IsErrorInList(("%s has invalid ipolicy/%s" % (ipowner, ispecpath)), errs)) if hasoldv: ispec[par] = oldv else: del ispec[par] errs = cfg.VerifyConfig() self.assertFalse(errs) if constants.ISPECS_MINMAX in ipolicy: # Test partial minmax specs for minmax in ipolicy[constants.ISPECS_MINMAX]: for key in constants.ISPECS_MINMAX_KEYS: self.assertTrue(key in minmax) ispec = minmax[key] del minmax[key] errs = cfg.VerifyConfig() self.assertTrue(len(errs) >= 1) self.assertTrue(_IsErrorInList("Missing instance specification", errs)) minmax[key] = ispec for par in constants.ISPECS_PARAMETERS: oldv = ispec[par] del ispec[par] errs = cfg.VerifyConfig() self.assertTrue(len(errs) >= 1) self.assertTrue(_IsErrorInList("Missing instance specs parameters", errs)) ispec[par] = oldv errs = cfg.VerifyConfig() self.assertFalse(errs) def _TestVerifyConfigGroupIPolicy(self, groupinfo, cfg): old_ipolicy = groupinfo.ipolicy ipolicy = cfg.GetClusterInfo().SimpleFillIPolicy({}) groupinfo.ipolicy = ipolicy # Test partial policies for key in constants.IPOLICY_ALL_KEYS: self.assertTrue(key in ipolicy) oldv = ipolicy[key] del ipolicy[key] errs = cfg.VerifyConfig() self.assertFalse(errs) ipolicy[key] = oldv groupinfo.ipolicy = old_ipolicy def _TestVerifyConfigClusterIPolicy(self, ipolicy, cfg): # Test partial policies for key in constants.IPOLICY_ALL_KEYS: self.assertTrue(key in ipolicy) oldv = ipolicy[key] del ipolicy[key] self.assertRaises(AssertionError, cfg.VerifyConfig) ipolicy[key] = oldv errs = cfg.VerifyConfig() self.assertFalse(errs) # Partial standard specs ispec = ipolicy[constants.ISPECS_STD] for par in constants.ISPECS_PARAMETERS: oldv = ispec[par] del ispec[par] errs = cfg.VerifyConfig() self.assertTrue(len(errs) >= 1) self.assertTrue(_IsErrorInList("Missing instance specs parameters", errs)) ispec[par] = oldv errs = cfg.VerifyConfig() self.assertFalse(errs) def testVerifyConfig(self): cfg = self._get_object() errs = cfg.VerifyConfig() self.assertFalse(errs) node = cfg.GetNodeInfo(cfg.GetNodeList()[0]) key = list(constants.NDC_GLOBALS)[0] node.ndparams[key] = constants.NDC_DEFAULTS[key] errs = cfg.VerifyConfig() self.assertTrue(len(errs) >= 1) self.assertTrue(_IsErrorInList("has some global parameters set", errs)) del node.ndparams[key] errs = cfg.VerifyConfig() self.assertFalse(errs) cluster = cfg.GetClusterInfo() nodegroup = cfg.GetNodeGroup(cfg.GetNodeGroupList()[0]) self._TestVerifyConfigIPolicy(cluster.ipolicy, "cluster", cfg, False) self._TestVerifyConfigClusterIPolicy(cluster.ipolicy, cfg) self._TestVerifyConfigIPolicy(nodegroup.ipolicy, nodegroup.name, cfg, True) self._TestVerifyConfigGroupIPolicy(nodegroup, cfg) nodegroup.ipolicy = cluster.SimpleFillIPolicy(nodegroup.ipolicy) self._TestVerifyConfigIPolicy(nodegroup.ipolicy, nodegroup.name, cfg, True) # Tests for Ssconf helper functions def testUnlockedGetHvparamsString(self): hvparams = {"a": "A", "b": "B", "c": "C"} hvname = "myhv" cfg_writer = self._get_object() cfg_writer._config_data = mock.Mock() cfg_writer._config_data.cluster = mock.Mock() cfg_writer._config_data.cluster.hvparams = {hvname: hvparams} result = cfg_writer._UnlockedGetHvparamsString(hvname) self.assertTrue("a=A" in result) lines = [line for line in result.split('\n') if line != ''] self.assertEqual(len(hvparams.keys()), len(lines)) def testExtendByAllHvparamsStrings(self): all_hvparams = {constants.HT_XEN_PVM: "foo"} ssconf_values = {} cfg_writer = self._get_object() cfg_writer._ExtendByAllHvparamsStrings(ssconf_values, all_hvparams) expected_key = constants.SS_HVPARAMS_PREF + constants.HT_XEN_PVM self.assertTrue(expected_key in ssconf_values) def _IsErrorInList(err_str, err_list): return any(map(lambda e: err_str in e, err_list)) class TestTRM(unittest.TestCase): EC_ID = 1 def testEmpty(self): t = TemporaryReservationManager() t.Reserve(self.EC_ID, "a") self.assertFalse(t.Reserved(self.EC_ID)) self.assertTrue(t.Reserved("a")) self.assertEqual(len(t.GetReserved()), 1) def testDuplicate(self): t = TemporaryReservationManager() t.Reserve(self.EC_ID, "a") self.assertRaises(errors.ReservationError, t.Reserve, 2, "a") t.DropECReservations(self.EC_ID) self.assertFalse(t.Reserved("a")) class TestCheckInstanceDiskIvNames(unittest.TestCase): @staticmethod def _MakeDisks(names): return [objects.Disk(iv_name=name) for name in names] def testNoError(self): disks = self._MakeDisks(["disk/0", "disk/1"]) self.assertEqual(config._CheckInstanceDiskIvNames(disks), []) instance._UpdateIvNames(0, disks) self.assertEqual(config._CheckInstanceDiskIvNames(disks), []) def testWrongNames(self): disks = self._MakeDisks(["disk/1", "disk/3", "disk/2"]) self.assertEqual(config._CheckInstanceDiskIvNames(disks), [ (0, "disk/0", "disk/1"), (1, "disk/1", "disk/3"), ]) # Fix names instance._UpdateIvNames(0, disks) self.assertEqual(config._CheckInstanceDiskIvNames(disks), []) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.qlang_unittest.py0000744000000000000000000002433712244641676021310 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.qlang""" import unittest import string from ganeti import utils from ganeti import errors from ganeti import qlang from ganeti import query import testutils class TestMakeSimpleFilter(unittest.TestCase): def _Test(self, field, names, expected, parse_exp=None): if parse_exp is None: parse_exp = names qfilter = qlang.MakeSimpleFilter(field, names) self.assertEqual(qfilter, expected) def test(self): self._Test("name", None, None, parse_exp=[]) self._Test("name", [], None) self._Test("name", ["node1.example.com"], ["|", ["=", "name", "node1.example.com"]]) self._Test("xyz", ["a", "b", "c"], ["|", ["=", "xyz", "a"], ["=", "xyz", "b"], ["=", "xyz", "c"]]) class TestParseFilter(unittest.TestCase): def setUp(self): self.parser = qlang.BuildFilterParser() def _Test(self, qfilter, expected, expect_filter=True): self.assertEqual(qlang.MakeFilter([qfilter], not expect_filter), expected) self.assertEqual(qlang.ParseFilter(qfilter, parser=self.parser), expected) def test(self): self._Test("name==\"foobar\"", [qlang.OP_EQUAL, "name", "foobar"]) self._Test("name=='foobar'", [qlang.OP_EQUAL, "name", "foobar"]) self._Test("valA==1 and valB==2 or valC==3", [qlang.OP_OR, [qlang.OP_AND, [qlang.OP_EQUAL, "valA", 1], [qlang.OP_EQUAL, "valB", 2]], [qlang.OP_EQUAL, "valC", 3]]) self._Test(("(name\n==\"foobar\") and (xyz==\"va)ue\" and k == 256 or" " x ==\t\"y\"\n) and mc"), [qlang.OP_AND, [qlang.OP_EQUAL, "name", "foobar"], [qlang.OP_OR, [qlang.OP_AND, [qlang.OP_EQUAL, "xyz", "va)ue"], [qlang.OP_EQUAL, "k", 256]], [qlang.OP_EQUAL, "x", "y"]], [qlang.OP_TRUE, "mc"]]) self._Test("(xyz==\"v\" or k == 256 and x == \"y\")", [qlang.OP_OR, [qlang.OP_EQUAL, "xyz", "v"], [qlang.OP_AND, [qlang.OP_EQUAL, "k", 256], [qlang.OP_EQUAL, "x", "y"]]]) self._Test("valA==1 and valB==2 and valC==3", [qlang.OP_AND, [qlang.OP_EQUAL, "valA", 1], [qlang.OP_EQUAL, "valB", 2], [qlang.OP_EQUAL, "valC", 3]]) self._Test("master or field", [qlang.OP_OR, [qlang.OP_TRUE, "master"], [qlang.OP_TRUE, "field"]]) self._Test("mem == 128", [qlang.OP_EQUAL, "mem", 128]) self._Test("negfield != -1", [qlang.OP_NOT_EQUAL, "negfield", -1]) self._Test("master", [qlang.OP_TRUE, "master"], expect_filter=False) self._Test("not master", [qlang.OP_NOT, [qlang.OP_TRUE, "master"]]) for op in ["not", "and", "or"]: self._Test("%sxyz" % op, [qlang.OP_TRUE, "%sxyz" % op], expect_filter=False) self._Test("not %sxyz" % op, [qlang.OP_NOT, [qlang.OP_TRUE, "%sxyz" % op]]) self._Test(" not \t%sfoo" % op, [qlang.OP_NOT, [qlang.OP_TRUE, "%sfoo" % op]]) self._Test("%sname =~ m/abc/" % op, [qlang.OP_REGEXP, "%sname" % op, "abc"]) self._Test("master and not other", [qlang.OP_AND, [qlang.OP_TRUE, "master"], [qlang.OP_NOT, [qlang.OP_TRUE, "other"]]]) self._Test("not (master or other == 4)", [qlang.OP_NOT, [qlang.OP_OR, [qlang.OP_TRUE, "master"], [qlang.OP_EQUAL, "other", 4]]]) self._Test("some==\"val\\\"ue\"", [qlang.OP_EQUAL, "some", "val\\\"ue"]) self._Test("123 in ips", [qlang.OP_CONTAINS, "ips", 123]) self._Test("99 not in ips", [qlang.OP_NOT, [qlang.OP_CONTAINS, "ips", 99]]) self._Test("\"a\" in valA and \"b\" not in valB", [qlang.OP_AND, [qlang.OP_CONTAINS, "valA", "a"], [qlang.OP_NOT, [qlang.OP_CONTAINS, "valB", "b"]]]) self._Test("name =~ m/test/", [qlang.OP_REGEXP, "name", "test"]) self._Test("name =~ m/^node.*example.com$/i", [qlang.OP_REGEXP, "name", "(?i)^node.*example.com$"]) self._Test("(name =~ m/^node.*example.com$/s and master) or pip =~ |^3.*|", [qlang.OP_OR, [qlang.OP_AND, [qlang.OP_REGEXP, "name", "(?s)^node.*example.com$"], [qlang.OP_TRUE, "master"]], [qlang.OP_REGEXP, "pip", "^3.*"]]) for flags in ["si", "is", "ssss", "iiiisiii"]: self._Test("name =~ m/gi/%s" % flags, [qlang.OP_REGEXP, "name", "(?%s)gi" % "".join(sorted(flags))]) for i in qlang._KNOWN_REGEXP_DELIM: self._Test("name =~ m%stest%s" % (i, i), [qlang.OP_REGEXP, "name", "test"]) self._Test("name !~ m%stest%s" % (i, i), [qlang.OP_NOT, [qlang.OP_REGEXP, "name", "test"]]) self._Test("not\tname =~ m%stest%s" % (i, i), [qlang.OP_NOT, [qlang.OP_REGEXP, "name", "test"]]) self._Test("notname =~ m%stest%s" % (i, i), [qlang.OP_REGEXP, "notname", "test"]) self._Test("name =* '*.site'", [qlang.OP_REGEXP, "name", utils.DnsNameGlobPattern("*.site")]) self._Test("field !* '*.example.*'", [qlang.OP_NOT, [qlang.OP_REGEXP, "field", utils.DnsNameGlobPattern("*.example.*")]]) self._Test("ctime < 1234", [qlang.OP_LT, "ctime", 1234]) self._Test("ctime > 1234", [qlang.OP_GT, "ctime", 1234]) self._Test("mtime <= 9999", [qlang.OP_LE, "mtime", 9999]) self._Test("mtime >= 9999", [qlang.OP_GE, "mtime", 9999]) def testAllFields(self): for name in frozenset(i for d in query.ALL_FIELD_LISTS for i in d.keys()): self._Test("%s == \"value\"" % name, [qlang.OP_EQUAL, name, "value"]) def testError(self): # Invalid field names, meaning no boolean check is done tests = ["#invalid!filter#", "m/x/,"] # Unknown regexp flag tests.append("name=~m#a#g") # Incomplete regexp group tests.append("name=~^[^") # Valid flag, but in uppercase tests.append("asdf =~ m|abc|I") # Non-matching regexp delimiters tests.append("name =~ /foobarbaz#") # Invalid operators tests.append("name <> value") tests.append("name => value") tests.append("name =< value") for qfilter in tests: try: qlang.ParseFilter(qfilter, parser=self.parser) except errors.QueryFilterParseError, err: self.assertEqual(len(err.GetDetails()), 3) else: self.fail("Invalid filter '%s' did not raise exception" % qfilter) class TestMakeFilter(unittest.TestCase): def testNoNames(self): self.assertEqual(qlang.MakeFilter([], False), None) self.assertEqual(qlang.MakeFilter(None, False), None) def testPlainNames(self): self.assertEqual(qlang.MakeFilter(["web1", "web2"], False), [qlang.OP_OR, [qlang.OP_EQUAL, "name", "web1"], [qlang.OP_EQUAL, "name", "web2"]]) def testPlainNamesOtherNamefield(self): self.assertEqual(qlang.MakeFilter(["mailA", "mailB"], False, namefield="id"), [qlang.OP_OR, [qlang.OP_EQUAL, "id", "mailA"], [qlang.OP_EQUAL, "id", "mailB"]]) def testForcedFilter(self): for i in [None, [], ["1", "2"], ["", "", ""], ["a", "b", "c", "d"]]: self.assertRaises(errors.OpPrereqError, qlang.MakeFilter, i, True) # Glob pattern shouldn't parse as filter self.assertRaises(errors.QueryFilterParseError, qlang.MakeFilter, ["*.site"], True) # Plain name parses as boolean filter self.assertEqual(qlang.MakeFilter(["web1"], True), [qlang.OP_TRUE, "web1"]) def testFilter(self): self.assertEqual(qlang.MakeFilter(["foo/bar"], False), [qlang.OP_TRUE, "foo/bar"]) self.assertEqual(qlang.MakeFilter(["foo=='bar'"], False), [qlang.OP_EQUAL, "foo", "bar"]) self.assertEqual(qlang.MakeFilter(["field=*'*.site'"], False), [qlang.OP_REGEXP, "field", utils.DnsNameGlobPattern("*.site")]) # Plain name parses as name filter, not boolean for name in ["node1", "n-o-d-e", "n_o_d_e", "node1.example.com", "node1.example.com."]: self.assertEqual(qlang.MakeFilter([name], False), [qlang.OP_OR, [qlang.OP_EQUAL, "name", name]]) # Invalid filters for i in ["foo==bar", "foo+=1"]: self.assertRaises(errors.QueryFilterParseError, qlang.MakeFilter, [i], False) def testGlob(self): self.assertEqual(qlang.MakeFilter(["*.site"], False), [qlang.OP_OR, [qlang.OP_REGEXP, "name", utils.DnsNameGlobPattern("*.site")]]) self.assertEqual(qlang.MakeFilter(["web?.example"], False), [qlang.OP_OR, [qlang.OP_REGEXP, "name", utils.DnsNameGlobPattern("web?.example")]]) self.assertEqual(qlang.MakeFilter(["*.a", "*.b", "?.c"], False), [qlang.OP_OR, [qlang.OP_REGEXP, "name", utils.DnsNameGlobPattern("*.a")], [qlang.OP_REGEXP, "name", utils.DnsNameGlobPattern("*.b")], [qlang.OP_REGEXP, "name", utils.DnsNameGlobPattern("?.c")]]) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.rapi.resources_unittest.py0000744000000000000000000000402712244641676023144 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2007, 2008 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the RAPI resources module""" import unittest import tempfile from ganeti import errors from ganeti import http from ganeti.rapi import connector from ganeti.rapi import rlib2 import testutils class MapperTests(unittest.TestCase): """Tests for remote API URI mapper.""" def setUp(self): self.map = connector.Mapper() def _TestUri(self, uri, result): self.assertEquals(self.map.getController(uri), result) def _TestFailingUri(self, uri): self.failUnlessRaises(http.HttpNotFound, self.map.getController, uri) def testMapper(self): """Testing Mapper""" self._TestFailingUri("/tags") self._TestFailingUri("/instances") self._TestUri("/version", (rlib2.R_version, [], {})) self._TestUri("/2/instances/www.test.com", (rlib2.R_2_instances_name, ["www.test.com"], {})) self._TestUri("/2/instances/www.test.com/tags?f=5&f=6&alt=html", (rlib2.R_2_instances_name_tags, ["www.test.com"], {"alt": ["html"], "f": ["5", "6"], })) self._TestFailingUri("/tag") self._TestFailingUri("/instances/does/not/exist") if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.locking_unittest.py0000744000000000000000000026772212244641676021643 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the locking module""" import os import unittest import time import Queue import threading import random import gc import itertools from ganeti import constants from ganeti import locking from ganeti import errors from ganeti import utils from ganeti import compat from ganeti import objects from ganeti import query import testutils # This is used to test the ssynchronize decorator. # Since it's passed as input to a decorator it must be declared as a global. _decoratorlock = locking.SharedLock("decorator lock") #: List for looping tests ITERATIONS = range(8) def _Repeat(fn): """Decorator for executing a function many times""" def wrapper(*args, **kwargs): for i in ITERATIONS: fn(*args, **kwargs) return wrapper def SafeSleep(duration): start = time.time() while True: delay = start + duration - time.time() if delay <= 0.0: break time.sleep(delay) class _ThreadedTestCase(unittest.TestCase): """Test class that supports adding/waiting on threads""" def setUp(self): unittest.TestCase.setUp(self) self.done = Queue.Queue(0) self.threads = [] def _addThread(self, *args, **kwargs): """Create and remember a new thread""" t = threading.Thread(*args, **kwargs) self.threads.append(t) t.start() return t def _waitThreads(self): """Wait for all our threads to finish""" for t in self.threads: t.join(60) self.failIf(t.isAlive()) self.threads = [] class _ConditionTestCase(_ThreadedTestCase): """Common test case for conditions""" def setUp(self, cls): _ThreadedTestCase.setUp(self) self.lock = threading.Lock() self.cond = cls(self.lock) def _testAcquireRelease(self): self.assertFalse(self.cond._is_owned()) self.assertRaises(RuntimeError, self.cond.wait, None) self.assertRaises(RuntimeError, self.cond.notifyAll) self.cond.acquire() self.assert_(self.cond._is_owned()) self.cond.notifyAll() self.assert_(self.cond._is_owned()) self.cond.release() self.assertFalse(self.cond._is_owned()) self.assertRaises(RuntimeError, self.cond.wait, None) self.assertRaises(RuntimeError, self.cond.notifyAll) def _testNotification(self): def _NotifyAll(): self.done.put("NE") self.cond.acquire() self.done.put("NA") self.cond.notifyAll() self.done.put("NN") self.cond.release() self.cond.acquire() self._addThread(target=_NotifyAll) self.assertEqual(self.done.get(True, 1), "NE") self.assertRaises(Queue.Empty, self.done.get_nowait) self.cond.wait(None) self.assertEqual(self.done.get(True, 1), "NA") self.assertEqual(self.done.get(True, 1), "NN") self.assert_(self.cond._is_owned()) self.cond.release() self.assertFalse(self.cond._is_owned()) class TestSingleNotifyPipeCondition(_ConditionTestCase): """SingleNotifyPipeCondition tests""" def setUp(self): _ConditionTestCase.setUp(self, locking.SingleNotifyPipeCondition) def testAcquireRelease(self): self._testAcquireRelease() def testNotification(self): self._testNotification() def testWaitReuse(self): self.cond.acquire() self.cond.wait(0) self.cond.wait(0.1) self.cond.release() def testNoNotifyReuse(self): self.cond.acquire() self.cond.notifyAll() self.assertRaises(RuntimeError, self.cond.wait, None) self.assertRaises(RuntimeError, self.cond.notifyAll) self.cond.release() class TestPipeCondition(_ConditionTestCase): """PipeCondition tests""" def setUp(self): _ConditionTestCase.setUp(self, locking.PipeCondition) def testAcquireRelease(self): self._testAcquireRelease() def testNotification(self): self._testNotification() def _TestWait(self, fn): threads = [ self._addThread(target=fn), self._addThread(target=fn), self._addThread(target=fn), ] # Wait for threads to be waiting for _ in threads: self.assertEqual(self.done.get(True, 1), "A") self.assertRaises(Queue.Empty, self.done.get_nowait) self.cond.acquire() self.assertEqual(len(self.cond._waiters), 3) self.assertEqual(self.cond._waiters, set(threads)) self.assertTrue(repr(self.cond).startswith("<")) self.assertTrue("waiters=" in repr(self.cond)) # This new thread can't acquire the lock, and thus call wait, before we # release it self._addThread(target=fn) self.cond.notifyAll() self.assertRaises(Queue.Empty, self.done.get_nowait) self.cond.release() # We should now get 3 W and 1 A (for the new thread) in whatever order w = 0 a = 0 for i in range(4): got = self.done.get(True, 1) if got == "W": w += 1 elif got == "A": a += 1 else: self.fail("Got %s on the done queue" % got) self.assertEqual(w, 3) self.assertEqual(a, 1) self.cond.acquire() self.cond.notifyAll() self.cond.release() self._waitThreads() self.assertEqual(self.done.get_nowait(), "W") self.assertRaises(Queue.Empty, self.done.get_nowait) def testBlockingWait(self): def _BlockingWait(): self.cond.acquire() self.done.put("A") self.cond.wait(None) self.cond.release() self.done.put("W") self._TestWait(_BlockingWait) def testLongTimeoutWait(self): def _Helper(): self.cond.acquire() self.done.put("A") self.cond.wait(15.0) self.cond.release() self.done.put("W") self._TestWait(_Helper) def _TimeoutWait(self, timeout, check): self.cond.acquire() self.cond.wait(timeout) self.cond.release() self.done.put(check) def testShortTimeoutWait(self): self._addThread(target=self._TimeoutWait, args=(0.1, "T1")) self._addThread(target=self._TimeoutWait, args=(0.1, "T1")) self._waitThreads() self.assertEqual(self.done.get_nowait(), "T1") self.assertEqual(self.done.get_nowait(), "T1") self.assertRaises(Queue.Empty, self.done.get_nowait) def testZeroTimeoutWait(self): self._addThread(target=self._TimeoutWait, args=(0, "T0")) self._addThread(target=self._TimeoutWait, args=(0, "T0")) self._addThread(target=self._TimeoutWait, args=(0, "T0")) self._waitThreads() self.assertEqual(self.done.get_nowait(), "T0") self.assertEqual(self.done.get_nowait(), "T0") self.assertEqual(self.done.get_nowait(), "T0") self.assertRaises(Queue.Empty, self.done.get_nowait) class TestSharedLock(_ThreadedTestCase): """SharedLock tests""" def setUp(self): _ThreadedTestCase.setUp(self) self.sl = locking.SharedLock("TestSharedLock") self.assertTrue(repr(self.sl).startswith("<")) self.assertTrue("name=TestSharedLock" in repr(self.sl)) def testSequenceAndOwnership(self): self.assertFalse(self.sl.is_owned()) self.sl.acquire(shared=1) self.assert_(self.sl.is_owned()) self.assert_(self.sl.is_owned(shared=1)) self.assertFalse(self.sl.is_owned(shared=0)) self.sl.release() self.assertFalse(self.sl.is_owned()) self.sl.acquire() self.assert_(self.sl.is_owned()) self.assertFalse(self.sl.is_owned(shared=1)) self.assert_(self.sl.is_owned(shared=0)) self.sl.release() self.assertFalse(self.sl.is_owned()) self.sl.acquire(shared=1) self.assert_(self.sl.is_owned()) self.assert_(self.sl.is_owned(shared=1)) self.assertFalse(self.sl.is_owned(shared=0)) self.sl.release() self.assertFalse(self.sl.is_owned()) def testBooleanValue(self): # semaphores are supposed to return a true value on a successful acquire self.assert_(self.sl.acquire(shared=1)) self.sl.release() self.assert_(self.sl.acquire()) self.sl.release() def testDoubleLockingStoE(self): self.sl.acquire(shared=1) self.assertRaises(AssertionError, self.sl.acquire) def testDoubleLockingEtoS(self): self.sl.acquire() self.assertRaises(AssertionError, self.sl.acquire, shared=1) def testDoubleLockingStoS(self): self.sl.acquire(shared=1) self.assertRaises(AssertionError, self.sl.acquire, shared=1) def testDoubleLockingEtoE(self): self.sl.acquire() self.assertRaises(AssertionError, self.sl.acquire) # helper functions: called in a separate thread they acquire the lock, send # their identifier on the done queue, then release it. def _doItSharer(self): try: self.sl.acquire(shared=1) self.done.put("SHR") self.sl.release() except errors.LockError: self.done.put("ERR") def _doItExclusive(self): try: self.sl.acquire() self.done.put("EXC") self.sl.release() except errors.LockError: self.done.put("ERR") def _doItDelete(self): try: self.sl.delete() self.done.put("DEL") except errors.LockError: self.done.put("ERR") def testSharersCanCoexist(self): self.sl.acquire(shared=1) threading.Thread(target=self._doItSharer).start() self.assert_(self.done.get(True, 1)) self.sl.release() @_Repeat def testExclusiveBlocksExclusive(self): self.sl.acquire() self._addThread(target=self._doItExclusive) self.assertRaises(Queue.Empty, self.done.get_nowait) self.sl.release() self._waitThreads() self.failUnlessEqual(self.done.get_nowait(), "EXC") @_Repeat def testExclusiveBlocksDelete(self): self.sl.acquire() self._addThread(target=self._doItDelete) self.assertRaises(Queue.Empty, self.done.get_nowait) self.sl.release() self._waitThreads() self.failUnlessEqual(self.done.get_nowait(), "DEL") self.sl = locking.SharedLock(self.sl.name) @_Repeat def testExclusiveBlocksSharer(self): self.sl.acquire() self._addThread(target=self._doItSharer) self.assertRaises(Queue.Empty, self.done.get_nowait) self.sl.release() self._waitThreads() self.failUnlessEqual(self.done.get_nowait(), "SHR") @_Repeat def testSharerBlocksExclusive(self): self.sl.acquire(shared=1) self._addThread(target=self._doItExclusive) self.assertRaises(Queue.Empty, self.done.get_nowait) self.sl.release() self._waitThreads() self.failUnlessEqual(self.done.get_nowait(), "EXC") @_Repeat def testSharerBlocksDelete(self): self.sl.acquire(shared=1) self._addThread(target=self._doItDelete) self.assertRaises(Queue.Empty, self.done.get_nowait) self.sl.release() self._waitThreads() self.failUnlessEqual(self.done.get_nowait(), "DEL") self.sl = locking.SharedLock(self.sl.name) @_Repeat def testWaitingExclusiveBlocksSharer(self): """SKIPPED testWaitingExclusiveBlockSharer""" return self.sl.acquire(shared=1) # the lock is acquired in shared mode... self._addThread(target=self._doItExclusive) # ...but now an exclusive is waiting... self._addThread(target=self._doItSharer) # ...so the sharer should be blocked as well self.assertRaises(Queue.Empty, self.done.get_nowait) self.sl.release() self._waitThreads() # The exclusive passed before self.failUnlessEqual(self.done.get_nowait(), "EXC") self.failUnlessEqual(self.done.get_nowait(), "SHR") @_Repeat def testWaitingSharerBlocksExclusive(self): """SKIPPED testWaitingSharerBlocksExclusive""" return self.sl.acquire() # the lock is acquired in exclusive mode... self._addThread(target=self._doItSharer) # ...but now a sharer is waiting... self._addThread(target=self._doItExclusive) # ...the exclusive is waiting too... self.assertRaises(Queue.Empty, self.done.get_nowait) self.sl.release() self._waitThreads() # The sharer passed before self.assertEqual(self.done.get_nowait(), "SHR") self.assertEqual(self.done.get_nowait(), "EXC") def testDelete(self): self.sl.delete() self.assertRaises(errors.LockError, self.sl.acquire) self.assertRaises(errors.LockError, self.sl.acquire, shared=1) self.assertRaises(errors.LockError, self.sl.delete) def testDeleteTimeout(self): self.assertTrue(self.sl.delete(timeout=60)) def testDeleteTimeoutFail(self): ready = threading.Event() finish = threading.Event() def fn(): self.sl.acquire(shared=0) ready.set() finish.wait() self.sl.release() self._addThread(target=fn) ready.wait() # Test if deleting a lock owned in exclusive mode by another thread fails # to delete when a timeout is used self.assertFalse(self.sl.delete(timeout=0.02)) finish.set() self._waitThreads() self.assertTrue(self.sl.delete()) self.assertRaises(errors.LockError, self.sl.acquire) def testNoDeleteIfSharer(self): self.sl.acquire(shared=1) self.assertRaises(AssertionError, self.sl.delete) @_Repeat def testDeletePendingSharersExclusiveDelete(self): self.sl.acquire() self._addThread(target=self._doItSharer) self._addThread(target=self._doItSharer) self._addThread(target=self._doItExclusive) self._addThread(target=self._doItDelete) self.sl.delete() self._waitThreads() # The threads who were pending return ERR for _ in range(4): self.assertEqual(self.done.get_nowait(), "ERR") self.sl = locking.SharedLock(self.sl.name) @_Repeat def testDeletePendingDeleteExclusiveSharers(self): self.sl.acquire() self._addThread(target=self._doItDelete) self._addThread(target=self._doItExclusive) self._addThread(target=self._doItSharer) self._addThread(target=self._doItSharer) self.sl.delete() self._waitThreads() # The two threads who were pending return both ERR self.assertEqual(self.done.get_nowait(), "ERR") self.assertEqual(self.done.get_nowait(), "ERR") self.assertEqual(self.done.get_nowait(), "ERR") self.assertEqual(self.done.get_nowait(), "ERR") self.sl = locking.SharedLock(self.sl.name) @_Repeat def testExclusiveAcquireTimeout(self): for shared in [0, 1]: on_queue = threading.Event() release_exclusive = threading.Event() def _LockExclusive(): self.sl.acquire(shared=0, test_notify=on_queue.set) self.done.put("A: start wait") release_exclusive.wait() self.done.put("A: end wait") self.sl.release() # Start thread to hold lock in exclusive mode self._addThread(target=_LockExclusive) # Wait for wait to begin self.assertEqual(self.done.get(timeout=60), "A: start wait") # Wait up to 60s to get lock, but release exclusive lock as soon as we're # on the queue self.failUnless(self.sl.acquire(shared=shared, timeout=60, test_notify=release_exclusive.set)) self.done.put("got 2nd") self.sl.release() self._waitThreads() self.assertEqual(self.done.get_nowait(), "A: end wait") self.assertEqual(self.done.get_nowait(), "got 2nd") self.assertRaises(Queue.Empty, self.done.get_nowait) @_Repeat def testAcquireExpiringTimeout(self): def _AcquireWithTimeout(shared, timeout): if not self.sl.acquire(shared=shared, timeout=timeout): self.done.put("timeout") for shared in [0, 1]: # Lock exclusively self.sl.acquire() # Start shared acquires with timeout between 0 and 20 ms for i in range(11): self._addThread(target=_AcquireWithTimeout, args=(shared, i * 2.0 / 1000.0)) # Wait for threads to finish (makes sure the acquire timeout expires # before releasing the lock) self._waitThreads() # Release lock self.sl.release() for _ in range(11): self.assertEqual(self.done.get_nowait(), "timeout") self.assertRaises(Queue.Empty, self.done.get_nowait) @_Repeat def testSharedSkipExclusiveAcquires(self): # Tests whether shared acquires jump in front of exclusive acquires in the # queue. def _Acquire(shared, name, notify_ev, wait_ev): if notify_ev: notify_fn = notify_ev.set else: notify_fn = None if wait_ev: wait_ev.wait() if not self.sl.acquire(shared=shared, test_notify=notify_fn): return self.done.put(name) self.sl.release() # Get exclusive lock while we fill the queue self.sl.acquire() shrcnt1 = 5 shrcnt2 = 7 shrcnt3 = 9 shrcnt4 = 2 # Add acquires using threading.Event for synchronization. They'll be # acquired exactly in the order defined in this list. acquires = (shrcnt1 * [(1, "shared 1")] + 3 * [(0, "exclusive 1")] + shrcnt2 * [(1, "shared 2")] + shrcnt3 * [(1, "shared 3")] + shrcnt4 * [(1, "shared 4")] + 3 * [(0, "exclusive 2")]) ev_cur = None ev_prev = None for args in acquires: ev_cur = threading.Event() self._addThread(target=_Acquire, args=args + (ev_cur, ev_prev)) ev_prev = ev_cur # Wait for last acquire to start ev_prev.wait() # Expect 6 pending exclusive acquires and 1 for all shared acquires # together self.assertEqual(self.sl._count_pending(), 7) # Release exclusive lock and wait self.sl.release() self._waitThreads() # Check sequence for _ in range(shrcnt1 + shrcnt2 + shrcnt3 + shrcnt4): # Shared locks aren't guaranteed to be notified in order, but they'll be # first tmp = self.done.get_nowait() if tmp == "shared 1": shrcnt1 -= 1 elif tmp == "shared 2": shrcnt2 -= 1 elif tmp == "shared 3": shrcnt3 -= 1 elif tmp == "shared 4": shrcnt4 -= 1 self.assertEqual(shrcnt1, 0) self.assertEqual(shrcnt2, 0) self.assertEqual(shrcnt3, 0) self.assertEqual(shrcnt3, 0) for _ in range(3): self.assertEqual(self.done.get_nowait(), "exclusive 1") for _ in range(3): self.assertEqual(self.done.get_nowait(), "exclusive 2") self.assertRaises(Queue.Empty, self.done.get_nowait) def testIllegalDowngrade(self): # Not yet acquired self.assertRaises(AssertionError, self.sl.downgrade) # Acquire in shared mode, downgrade should be no-op self.assertTrue(self.sl.acquire(shared=1)) self.assertTrue(self.sl.is_owned(shared=1)) self.assertTrue(self.sl.downgrade()) self.assertTrue(self.sl.is_owned(shared=1)) self.sl.release() def testDowngrade(self): self.assertTrue(self.sl.acquire()) self.assertTrue(self.sl.is_owned(shared=0)) self.assertTrue(self.sl.downgrade()) self.assertTrue(self.sl.is_owned(shared=1)) self.sl.release() @_Repeat def testDowngradeJumpsAheadOfExclusive(self): def _KeepExclusive(ev_got, ev_downgrade, ev_release): self.assertTrue(self.sl.acquire()) self.assertTrue(self.sl.is_owned(shared=0)) ev_got.set() ev_downgrade.wait() self.assertTrue(self.sl.is_owned(shared=0)) self.assertTrue(self.sl.downgrade()) self.assertTrue(self.sl.is_owned(shared=1)) ev_release.wait() self.assertTrue(self.sl.is_owned(shared=1)) self.sl.release() def _KeepExclusive2(ev_started, ev_release): self.assertTrue(self.sl.acquire(test_notify=ev_started.set)) self.assertTrue(self.sl.is_owned(shared=0)) ev_release.wait() self.assertTrue(self.sl.is_owned(shared=0)) self.sl.release() def _KeepShared(ev_started, ev_got, ev_release): self.assertTrue(self.sl.acquire(shared=1, test_notify=ev_started.set)) self.assertTrue(self.sl.is_owned(shared=1)) ev_got.set() ev_release.wait() self.assertTrue(self.sl.is_owned(shared=1)) self.sl.release() # Acquire lock in exclusive mode ev_got_excl1 = threading.Event() ev_downgrade_excl1 = threading.Event() ev_release_excl1 = threading.Event() th_excl1 = self._addThread(target=_KeepExclusive, args=(ev_got_excl1, ev_downgrade_excl1, ev_release_excl1)) ev_got_excl1.wait() # Start a second exclusive acquire ev_started_excl2 = threading.Event() ev_release_excl2 = threading.Event() th_excl2 = self._addThread(target=_KeepExclusive2, args=(ev_started_excl2, ev_release_excl2)) ev_started_excl2.wait() # Start shared acquires, will jump ahead of second exclusive acquire when # first exclusive acquire downgrades ev_shared = [(threading.Event(), threading.Event()) for _ in range(5)] ev_release_shared = threading.Event() th_shared = [self._addThread(target=_KeepShared, args=(ev_started, ev_got, ev_release_shared)) for (ev_started, ev_got) in ev_shared] # Wait for all shared acquires to start for (ev, _) in ev_shared: ev.wait() # Check lock information self.assertEqual(self.sl.GetLockInfo(set([query.LQ_MODE, query.LQ_OWNER])), [(self.sl.name, "exclusive", [th_excl1.getName()], None)]) [(_, _, _, pending), ] = self.sl.GetLockInfo(set([query.LQ_PENDING])) self.assertEqual([(pendmode, sorted(waiting)) for (pendmode, waiting) in pending], [("exclusive", [th_excl2.getName()]), ("shared", sorted(th.getName() for th in th_shared))]) # Shared acquires won't start until the exclusive lock is downgraded ev_downgrade_excl1.set() # Wait for all shared acquires to be successful for (_, ev) in ev_shared: ev.wait() # Check lock information again self.assertEqual(self.sl.GetLockInfo(set([query.LQ_MODE, query.LQ_PENDING])), [(self.sl.name, "shared", None, [("exclusive", [th_excl2.getName()])])]) [(_, _, owner, _), ] = self.sl.GetLockInfo(set([query.LQ_OWNER])) self.assertEqual(set(owner), set([th_excl1.getName()] + [th.getName() for th in th_shared])) ev_release_excl1.set() ev_release_excl2.set() ev_release_shared.set() self._waitThreads() self.assertEqual(self.sl.GetLockInfo(set([query.LQ_MODE, query.LQ_OWNER, query.LQ_PENDING])), [(self.sl.name, None, None, [])]) @_Repeat def testMixedAcquireTimeout(self): sync = threading.Event() def _AcquireShared(ev): if not self.sl.acquire(shared=1, timeout=None): return self.done.put("shared") # Notify main thread ev.set() # Wait for notification from main thread sync.wait() # Release lock self.sl.release() acquires = [] for _ in range(3): ev = threading.Event() self._addThread(target=_AcquireShared, args=(ev, )) acquires.append(ev) # Wait for all acquires to finish for i in acquires: i.wait() self.assertEqual(self.sl._count_pending(), 0) # Try to get exclusive lock self.failIf(self.sl.acquire(shared=0, timeout=0.02)) # Acquire exclusive without timeout exclsync = threading.Event() exclev = threading.Event() def _AcquireExclusive(): if not self.sl.acquire(shared=0): return self.done.put("exclusive") # Notify main thread exclev.set() # Wait for notification from main thread exclsync.wait() self.sl.release() self._addThread(target=_AcquireExclusive) # Try to get exclusive lock self.failIf(self.sl.acquire(shared=0, timeout=0.02)) # Make all shared holders release their locks sync.set() # Wait for exclusive acquire to succeed exclev.wait() self.assertEqual(self.sl._count_pending(), 0) # Try to get exclusive lock self.failIf(self.sl.acquire(shared=0, timeout=0.02)) def _AcquireSharedSimple(): if self.sl.acquire(shared=1, timeout=None): self.done.put("shared2") self.sl.release() for _ in range(10): self._addThread(target=_AcquireSharedSimple) # Tell exclusive lock to release exclsync.set() # Wait for everything to finish self._waitThreads() self.assertEqual(self.sl._count_pending(), 0) # Check sequence for _ in range(3): self.assertEqual(self.done.get_nowait(), "shared") self.assertEqual(self.done.get_nowait(), "exclusive") for _ in range(10): self.assertEqual(self.done.get_nowait(), "shared2") self.assertRaises(Queue.Empty, self.done.get_nowait) def testPriority(self): # Acquire in exclusive mode self.assert_(self.sl.acquire(shared=0)) # Queue acquires def _Acquire(prev, next, shared, priority, result): prev.wait() self.sl.acquire(shared=shared, priority=priority, test_notify=next.set) try: self.done.put(result) finally: self.sl.release() counter = itertools.count(0) priorities = range(-20, 30) first = threading.Event() prev = first # Data structure: # { # priority: # [(shared/exclusive, set(acquire names), set(pending threads)), # (shared/exclusive, ...), # ..., # ], # } perprio = {} # References shared acquire per priority in L{perprio}. Data structure: # { # priority: (shared=1, set(acquire names), set(pending threads)), # } prioshared = {} for seed in [4979, 9523, 14902, 32440]: # Use a deterministic random generator rnd = random.Random(seed) for priority in [rnd.choice(priorities) for _ in range(30)]: modes = [0, 1] rnd.shuffle(modes) for shared in modes: # Unique name acqname = "%s/shr=%s/prio=%s" % (counter.next(), shared, priority) ev = threading.Event() thread = self._addThread(target=_Acquire, args=(prev, ev, shared, priority, acqname)) prev = ev # Record expected aqcuire, see above for structure data = (shared, set([acqname]), set([thread])) priolist = perprio.setdefault(priority, []) if shared: priosh = prioshared.get(priority, None) if priosh: # Shared acquires are merged for i, j in zip(priosh[1:], data[1:]): i.update(j) assert data[0] == priosh[0] else: prioshared[priority] = data priolist.append(data) else: priolist.append(data) # Start all acquires and wait for them first.set() prev.wait() # Check lock information self.assertEqual(self.sl.GetLockInfo(set()), [(self.sl.name, None, None, None)]) self.assertEqual(self.sl.GetLockInfo(set([query.LQ_MODE, query.LQ_OWNER])), [(self.sl.name, "exclusive", [threading.currentThread().getName()], None)]) self._VerifyPrioPending(self.sl.GetLockInfo(set([query.LQ_PENDING])), perprio) # Let threads acquire the lock self.sl.release() # Wait for everything to finish self._waitThreads() self.assert_(self.sl._check_empty()) # Check acquires by priority for acquires in [perprio[i] for i in sorted(perprio.keys())]: for (_, names, _) in acquires: # For shared acquires, the set will contain 1..n entries. For exclusive # acquires only one. while names: names.remove(self.done.get_nowait()) self.assertFalse(compat.any(names for (_, names, _) in acquires)) self.assertRaises(Queue.Empty, self.done.get_nowait) def _VerifyPrioPending(self, ((name, mode, owner, pending), ), perprio): self.assertEqual(name, self.sl.name) self.assert_(mode is None) self.assert_(owner is None) self.assertEqual([(pendmode, sorted(waiting)) for (pendmode, waiting) in pending], [(["exclusive", "shared"][int(bool(shared))], sorted(t.getName() for t in threads)) for acquires in [perprio[i] for i in sorted(perprio.keys())] for (shared, _, threads) in acquires]) class _FakeTimeForSpuriousNotifications: def __init__(self, now, check_end): self.now = now self.check_end = check_end # Deterministic random number generator self.rnd = random.Random(15086) def time(self): # Advance time if the random number generator thinks so (this is to test # multiple notifications without advancing the time) if self.rnd.random() < 0.3: self.now += self.rnd.random() self.check_end(self.now) return self.now @_Repeat def testAcquireTimeoutWithSpuriousNotifications(self): ready = threading.Event() locked = threading.Event() req = Queue.Queue(0) epoch = 4000.0 timeout = 60.0 def check_end(now): self.assertFalse(locked.isSet()) # If we waited long enough (in virtual time), tell main thread to release # lock, otherwise tell it to notify once more req.put(now < (epoch + (timeout * 0.8))) time_fn = self._FakeTimeForSpuriousNotifications(epoch, check_end).time sl = locking.SharedLock("test", _time_fn=time_fn) # Acquire in exclusive mode sl.acquire(shared=0) def fn(): self.assertTrue(sl.acquire(shared=0, timeout=timeout, test_notify=ready.set)) locked.set() sl.release() self.done.put("success") # Start acquire with timeout and wait for it to be ready self._addThread(target=fn) ready.wait() # The separate thread is now waiting to acquire the lock, so start sending # spurious notifications. # Wait for separate thread to ask for another notification count = 0 while req.get(): # After sending the notification, the lock will take a short amount of # time to notice and to retrieve the current time sl._notify_topmost() count += 1 self.assertTrue(count > 100, "Not enough notifications were sent") self.assertFalse(locked.isSet()) # Some notifications have been sent, now actually release the lock sl.release() # Wait for lock to be acquired locked.wait() self._waitThreads() self.assertEqual(self.done.get_nowait(), "success") self.assertRaises(Queue.Empty, self.done.get_nowait) class TestSharedLockInCondition(_ThreadedTestCase): """SharedLock as a condition lock tests""" def setUp(self): _ThreadedTestCase.setUp(self) self.sl = locking.SharedLock("TestSharedLockInCondition") self.setCondition() def setCondition(self): self.cond = threading.Condition(self.sl) def testKeepMode(self): self.cond.acquire(shared=1) self.assert_(self.sl.is_owned(shared=1)) self.cond.wait(0) self.assert_(self.sl.is_owned(shared=1)) self.cond.release() self.cond.acquire(shared=0) self.assert_(self.sl.is_owned(shared=0)) self.cond.wait(0) self.assert_(self.sl.is_owned(shared=0)) self.cond.release() class TestSharedLockInPipeCondition(TestSharedLockInCondition): """SharedLock as a pipe condition lock tests""" def setCondition(self): self.cond = locking.PipeCondition(self.sl) class TestSSynchronizedDecorator(_ThreadedTestCase): """Shared Lock Synchronized decorator test""" def setUp(self): _ThreadedTestCase.setUp(self) @locking.ssynchronized(_decoratorlock) def _doItExclusive(self): self.assert_(_decoratorlock.is_owned()) self.done.put("EXC") @locking.ssynchronized(_decoratorlock, shared=1) def _doItSharer(self): self.assert_(_decoratorlock.is_owned(shared=1)) self.done.put("SHR") def testDecoratedFunctions(self): self._doItExclusive() self.assertFalse(_decoratorlock.is_owned()) self._doItSharer() self.assertFalse(_decoratorlock.is_owned()) def testSharersCanCoexist(self): _decoratorlock.acquire(shared=1) threading.Thread(target=self._doItSharer).start() self.assert_(self.done.get(True, 1)) _decoratorlock.release() @_Repeat def testExclusiveBlocksExclusive(self): _decoratorlock.acquire() self._addThread(target=self._doItExclusive) # give it a bit of time to check that it's not actually doing anything self.assertRaises(Queue.Empty, self.done.get_nowait) _decoratorlock.release() self._waitThreads() self.failUnlessEqual(self.done.get_nowait(), "EXC") @_Repeat def testExclusiveBlocksSharer(self): _decoratorlock.acquire() self._addThread(target=self._doItSharer) self.assertRaises(Queue.Empty, self.done.get_nowait) _decoratorlock.release() self._waitThreads() self.failUnlessEqual(self.done.get_nowait(), "SHR") @_Repeat def testSharerBlocksExclusive(self): _decoratorlock.acquire(shared=1) self._addThread(target=self._doItExclusive) self.assertRaises(Queue.Empty, self.done.get_nowait) _decoratorlock.release() self._waitThreads() self.failUnlessEqual(self.done.get_nowait(), "EXC") class TestLockSet(_ThreadedTestCase): """LockSet tests""" def setUp(self): _ThreadedTestCase.setUp(self) self._setUpLS() def _setUpLS(self): """Helper to (re)initialize the lock set""" self.resources = ["one", "two", "three"] self.ls = locking.LockSet(self.resources, "TestLockSet") def testResources(self): self.assertEquals(self.ls._names(), set(self.resources)) newls = locking.LockSet([], "TestLockSet.testResources") self.assertEquals(newls._names(), set()) def testCheckOwnedUnknown(self): self.assertFalse(self.ls.check_owned("certainly-not-owning-this-one")) for shared in [-1, 0, 1, 6378, 24255]: self.assertFalse(self.ls.check_owned("certainly-not-owning-this-one", shared=shared)) def testCheckOwnedUnknownWhileHolding(self): self.assertFalse(self.ls.check_owned([])) self.ls.acquire("one", shared=1) self.assertRaises(errors.LockError, self.ls.check_owned, "nonexist") self.assertTrue(self.ls.check_owned("one", shared=1)) self.assertFalse(self.ls.check_owned("one", shared=0)) self.assertFalse(self.ls.check_owned(["one", "two"])) self.assertRaises(errors.LockError, self.ls.check_owned, ["one", "nonexist"]) self.assertRaises(errors.LockError, self.ls.check_owned, "") self.ls.release() self.assertFalse(self.ls.check_owned([])) self.assertFalse(self.ls.check_owned("one")) def testAcquireRelease(self): self.assertFalse(self.ls.check_owned(self.ls._names())) self.assert_(self.ls.acquire("one")) self.assertEquals(self.ls.list_owned(), set(["one"])) self.assertTrue(self.ls.check_owned("one")) self.assertTrue(self.ls.check_owned("one", shared=0)) self.assertFalse(self.ls.check_owned("one", shared=1)) self.ls.release() self.assertEquals(self.ls.list_owned(), set()) self.assertFalse(self.ls.check_owned(self.ls._names())) self.assertEquals(self.ls.acquire(["one"]), set(["one"])) self.assertEquals(self.ls.list_owned(), set(["one"])) self.ls.release() self.assertEquals(self.ls.list_owned(), set()) self.ls.acquire(["one", "two", "three"]) self.assertEquals(self.ls.list_owned(), set(["one", "two", "three"])) self.assertTrue(self.ls.check_owned(self.ls._names())) self.assertTrue(self.ls.check_owned(self.ls._names(), shared=0)) self.assertFalse(self.ls.check_owned(self.ls._names(), shared=1)) self.ls.release("one") self.assertFalse(self.ls.check_owned(["one"])) self.assertTrue(self.ls.check_owned(["two", "three"])) self.assertTrue(self.ls.check_owned(["two", "three"], shared=0)) self.assertFalse(self.ls.check_owned(["two", "three"], shared=1)) self.assertEquals(self.ls.list_owned(), set(["two", "three"])) self.ls.release(["three"]) self.assertEquals(self.ls.list_owned(), set(["two"])) self.ls.release() self.assertEquals(self.ls.list_owned(), set()) self.assertEquals(self.ls.acquire(["one", "three"]), set(["one", "three"])) self.assertEquals(self.ls.list_owned(), set(["one", "three"])) self.ls.release() self.assertEquals(self.ls.list_owned(), set()) for name in self.ls._names(): self.assertFalse(self.ls.check_owned(name)) def testNoDoubleAcquire(self): self.ls.acquire("one") self.assertRaises(AssertionError, self.ls.acquire, "one") self.assertRaises(AssertionError, self.ls.acquire, ["two"]) self.assertRaises(AssertionError, self.ls.acquire, ["two", "three"]) self.ls.release() self.ls.acquire(["one", "three"]) self.ls.release("one") self.assertRaises(AssertionError, self.ls.acquire, ["two"]) self.ls.release("three") def testNoWrongRelease(self): self.assertRaises(AssertionError, self.ls.release) self.ls.acquire("one") self.assertRaises(AssertionError, self.ls.release, "two") def testAddRemove(self): self.ls.add("four") self.assertEquals(self.ls.list_owned(), set()) self.assert_("four" in self.ls._names()) self.ls.add(["five", "six", "seven"], acquired=1) self.assert_("five" in self.ls._names()) self.assert_("six" in self.ls._names()) self.assert_("seven" in self.ls._names()) self.assertEquals(self.ls.list_owned(), set(["five", "six", "seven"])) self.assertEquals(self.ls.remove(["five", "six"]), ["five", "six"]) self.assert_("five" not in self.ls._names()) self.assert_("six" not in self.ls._names()) self.assertEquals(self.ls.list_owned(), set(["seven"])) self.assertRaises(AssertionError, self.ls.add, "eight", acquired=1) self.ls.remove("seven") self.assert_("seven" not in self.ls._names()) self.assertEquals(self.ls.list_owned(), set([])) self.ls.acquire(None, shared=1) self.assertRaises(AssertionError, self.ls.add, "eight") self.ls.release() self.ls.acquire(None) self.ls.add("eight", acquired=1) self.assert_("eight" in self.ls._names()) self.assert_("eight" in self.ls.list_owned()) self.ls.add("nine") self.assert_("nine" in self.ls._names()) self.assert_("nine" not in self.ls.list_owned()) self.ls.release() self.ls.remove(["two"]) self.assert_("two" not in self.ls._names()) self.ls.acquire("three") self.assertEquals(self.ls.remove(["three"]), ["three"]) self.assert_("three" not in self.ls._names()) self.assertEquals(self.ls.remove("three"), []) self.assertEquals(self.ls.remove(["one", "three", "six"]), ["one"]) self.assert_("one" not in self.ls._names()) def testRemoveNonBlocking(self): self.ls.acquire("one") self.assertEquals(self.ls.remove("one"), ["one"]) self.ls.acquire(["two", "three"]) self.assertEquals(self.ls.remove(["two", "three"]), ["two", "three"]) def testNoDoubleAdd(self): self.assertRaises(errors.LockError, self.ls.add, "two") self.ls.add("four") self.assertRaises(errors.LockError, self.ls.add, "four") def testNoWrongRemoves(self): self.ls.acquire(["one", "three"], shared=1) # Cannot remove "two" while holding something which is not a superset self.assertRaises(AssertionError, self.ls.remove, "two") # Cannot remove "three" as we are sharing it self.assertRaises(AssertionError, self.ls.remove, "three") def testAcquireSetLock(self): # acquire the set-lock exclusively self.assertEquals(self.ls.acquire(None), set(["one", "two", "three"])) self.assertEquals(self.ls.list_owned(), set(["one", "two", "three"])) self.assertEquals(self.ls.is_owned(), True) self.assertEquals(self.ls._names(), set(["one", "two", "three"])) # I can still add/remove elements... self.assertEquals(self.ls.remove(["two", "three"]), ["two", "three"]) self.assert_(self.ls.add("six")) self.ls.release() # share the set-lock self.assertEquals(self.ls.acquire(None, shared=1), set(["one", "six"])) # adding new elements is not possible self.assertRaises(AssertionError, self.ls.add, "five") self.ls.release() def testAcquireWithRepetitions(self): self.assertEquals(self.ls.acquire(["two", "two", "three"], shared=1), set(["two", "two", "three"])) self.ls.release(["two", "two"]) self.assertEquals(self.ls.list_owned(), set(["three"])) def testEmptyAcquire(self): # Acquire an empty list of locks... self.assertEquals(self.ls.acquire([]), set()) self.assertEquals(self.ls.list_owned(), set()) # New locks can still be addded self.assert_(self.ls.add("six")) # "re-acquiring" is not an issue, since we had really acquired nothing self.assertEquals(self.ls.acquire([], shared=1), set()) self.assertEquals(self.ls.list_owned(), set()) # We haven't really acquired anything, so we cannot release self.assertRaises(AssertionError, self.ls.release) def _doLockSet(self, names, shared): try: self.ls.acquire(names, shared=shared) self.done.put("DONE") self.ls.release() except errors.LockError: self.done.put("ERR") def _doAddSet(self, names): try: self.ls.add(names, acquired=1) self.done.put("DONE") self.ls.release() except errors.LockError: self.done.put("ERR") def _doRemoveSet(self, names): self.done.put(self.ls.remove(names)) @_Repeat def testConcurrentSharedAcquire(self): self.ls.acquire(["one", "two"], shared=1) self._addThread(target=self._doLockSet, args=(["one", "two"], 1)) self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") self._addThread(target=self._doLockSet, args=(["one", "two", "three"], 1)) self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") self._addThread(target=self._doLockSet, args=("three", 1)) self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") self._addThread(target=self._doLockSet, args=(["one", "two"], 0)) self._addThread(target=self._doLockSet, args=(["two", "three"], 0)) self.assertRaises(Queue.Empty, self.done.get_nowait) self.ls.release() self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") self.assertEqual(self.done.get_nowait(), "DONE") @_Repeat def testConcurrentExclusiveAcquire(self): self.ls.acquire(["one", "two"]) self._addThread(target=self._doLockSet, args=("three", 1)) self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") self._addThread(target=self._doLockSet, args=("three", 0)) self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") self.assertRaises(Queue.Empty, self.done.get_nowait) self._addThread(target=self._doLockSet, args=(["one", "two"], 0)) self._addThread(target=self._doLockSet, args=(["one", "two"], 1)) self._addThread(target=self._doLockSet, args=("one", 0)) self._addThread(target=self._doLockSet, args=("one", 1)) self._addThread(target=self._doLockSet, args=(["two", "three"], 0)) self._addThread(target=self._doLockSet, args=(["two", "three"], 1)) self.assertRaises(Queue.Empty, self.done.get_nowait) self.ls.release() self._waitThreads() for _ in range(6): self.failUnlessEqual(self.done.get_nowait(), "DONE") @_Repeat def testSimpleAcquireTimeoutExpiring(self): names = sorted(self.ls._names()) self.assert_(len(names) >= 3) # Get name of first lock first = names[0] # Get name of last lock last = names.pop() checks = [ # Block first and try to lock it again (first, first), # Block last and try to lock all locks (None, first), # Block last and try to lock it again (last, last), ] for (wanted, block) in checks: # Lock in exclusive mode self.assert_(self.ls.acquire(block, shared=0)) def _AcquireOne(): # Try to get the same lock again with a timeout (should never succeed) acquired = self.ls.acquire(wanted, timeout=0.1, shared=0) if acquired: self.done.put("acquired") self.ls.release() else: self.assert_(acquired is None) self.assertFalse(self.ls.list_owned()) self.assertFalse(self.ls.is_owned()) self.done.put("not acquired") self._addThread(target=_AcquireOne) # Wait for timeout in thread to expire self._waitThreads() # Release exclusive lock again self.ls.release() self.assertEqual(self.done.get_nowait(), "not acquired") self.assertRaises(Queue.Empty, self.done.get_nowait) @_Repeat def testDelayedAndExpiringLockAcquire(self): self._setUpLS() self.ls.add(["five", "six", "seven", "eight", "nine"]) for expire in (False, True): names = sorted(self.ls._names()) self.assertEqual(len(names), 8) lock_ev = dict([(i, threading.Event()) for i in names]) # Lock all in exclusive mode self.assert_(self.ls.acquire(names, shared=0)) if expire: # We'll wait at least 300ms per lock lockwait = len(names) * [0.3] # Fail if we can't acquire all locks in 400ms. There are 8 locks, so # this gives us up to 2.4s to fail. lockall_timeout = 0.4 else: # This should finish rather quickly lockwait = None lockall_timeout = len(names) * 5.0 def _LockAll(): def acquire_notification(name): if not expire: self.done.put("getting %s" % name) # Kick next lock lock_ev[name].set() if self.ls.acquire(names, shared=0, timeout=lockall_timeout, test_notify=acquire_notification): self.done.put("got all") self.ls.release() else: self.done.put("timeout on all") # Notify all locks for ev in lock_ev.values(): ev.set() t = self._addThread(target=_LockAll) for idx, name in enumerate(names): # Wait for actual acquire on this lock to start lock_ev[name].wait(10.0) if expire and t.isAlive(): # Wait some time after getting the notification to make sure the lock # acquire will expire SafeSleep(lockwait[idx]) self.ls.release(names=name) self.assertFalse(self.ls.list_owned()) self._waitThreads() if expire: # Not checking which locks were actually acquired. Doing so would be # too timing-dependant. self.assertEqual(self.done.get_nowait(), "timeout on all") else: for i in names: self.assertEqual(self.done.get_nowait(), "getting %s" % i) self.assertEqual(self.done.get_nowait(), "got all") self.assertRaises(Queue.Empty, self.done.get_nowait) @_Repeat def testConcurrentRemove(self): self.ls.add("four") self.ls.acquire(["one", "two", "four"]) self._addThread(target=self._doLockSet, args=(["one", "four"], 0)) self._addThread(target=self._doLockSet, args=(["one", "four"], 1)) self._addThread(target=self._doLockSet, args=(["one", "two"], 0)) self._addThread(target=self._doLockSet, args=(["one", "two"], 1)) self.assertRaises(Queue.Empty, self.done.get_nowait) self.ls.remove("one") self.ls.release() self._waitThreads() for i in range(4): self.failUnlessEqual(self.done.get_nowait(), "ERR") self.ls.add(["five", "six"], acquired=1) self._addThread(target=self._doLockSet, args=(["three", "six"], 1)) self._addThread(target=self._doLockSet, args=(["three", "six"], 0)) self._addThread(target=self._doLockSet, args=(["four", "six"], 1)) self._addThread(target=self._doLockSet, args=(["four", "six"], 0)) self.ls.remove("five") self.ls.release() self._waitThreads() for i in range(4): self.failUnlessEqual(self.done.get_nowait(), "DONE") self.ls.acquire(["three", "four"]) self._addThread(target=self._doRemoveSet, args=(["four", "six"], )) self.assertRaises(Queue.Empty, self.done.get_nowait) self.ls.remove("four") self._waitThreads() self.assertEqual(self.done.get_nowait(), ["six"]) self._addThread(target=self._doRemoveSet, args=(["two"])) self._waitThreads() self.assertEqual(self.done.get_nowait(), ["two"]) self.ls.release() # reset lockset self._setUpLS() @_Repeat def testConcurrentSharedSetLock(self): # share the set-lock... self.ls.acquire(None, shared=1) # ...another thread can share it too self._addThread(target=self._doLockSet, args=(None, 1)) self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") # ...or just share some elements self._addThread(target=self._doLockSet, args=(["one", "three"], 1)) self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") # ...but not add new ones or remove any t = self._addThread(target=self._doAddSet, args=(["nine"])) self._addThread(target=self._doRemoveSet, args=(["two"], )) self.assertRaises(Queue.Empty, self.done.get_nowait) # this just releases the set-lock self.ls.release([]) t.join(60) self.assertEqual(self.done.get_nowait(), "DONE") # release the lock on the actual elements so remove() can proceed too self.ls.release() self._waitThreads() self.failUnlessEqual(self.done.get_nowait(), ["two"]) # reset lockset self._setUpLS() @_Repeat def testConcurrentExclusiveSetLock(self): # acquire the set-lock... self.ls.acquire(None, shared=0) # ...no one can do anything else self._addThread(target=self._doLockSet, args=(None, 1)) self._addThread(target=self._doLockSet, args=(None, 0)) self._addThread(target=self._doLockSet, args=(["three"], 0)) self._addThread(target=self._doLockSet, args=(["two"], 1)) self._addThread(target=self._doAddSet, args=(["nine"])) self.assertRaises(Queue.Empty, self.done.get_nowait) self.ls.release() self._waitThreads() for _ in range(5): self.assertEqual(self.done.get(True, 1), "DONE") # cleanup self._setUpLS() @_Repeat def testConcurrentSetLockAdd(self): self.ls.acquire("one") # Another thread wants the whole SetLock self._addThread(target=self._doLockSet, args=(None, 0)) self._addThread(target=self._doLockSet, args=(None, 1)) self.assertRaises(Queue.Empty, self.done.get_nowait) self.assertRaises(AssertionError, self.ls.add, "four") self.ls.release() self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") self.assertEqual(self.done.get_nowait(), "DONE") self.ls.acquire(None) self._addThread(target=self._doLockSet, args=(None, 0)) self._addThread(target=self._doLockSet, args=(None, 1)) self.assertRaises(Queue.Empty, self.done.get_nowait) self.ls.add("four") self.ls.add("five", acquired=1) self.ls.add("six", acquired=1, shared=1) self.assertEquals(self.ls.list_owned(), set(["one", "two", "three", "five", "six"])) self.assertEquals(self.ls.is_owned(), True) self.assertEquals(self.ls._names(), set(["one", "two", "three", "four", "five", "six"])) self.ls.release() self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") self.assertEqual(self.done.get_nowait(), "DONE") self._setUpLS() @_Repeat def testEmptyLockSet(self): # get the set-lock self.assertEqual(self.ls.acquire(None), set(["one", "two", "three"])) # now empty it... self.ls.remove(["one", "two", "three"]) self.assertFalse(self.ls._names()) # and adds/locks by another thread still wait self._addThread(target=self._doAddSet, args=(["nine"])) self._addThread(target=self._doLockSet, args=(None, 1)) self._addThread(target=self._doLockSet, args=(None, 0)) self.assertRaises(Queue.Empty, self.done.get_nowait) self.ls.release() self._waitThreads() for _ in range(3): self.assertEqual(self.done.get_nowait(), "DONE") # empty it again... self.assertEqual(self.ls.remove(["nine"]), ["nine"]) # now share it... self.assertEqual(self.ls.acquire(None, shared=1), set()) # other sharers can go, adds still wait self._addThread(target=self._doLockSet, args=(None, 1)) self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") self._addThread(target=self._doAddSet, args=(["nine"])) self.assertRaises(Queue.Empty, self.done.get_nowait) self.ls.release() self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") self._setUpLS() def testAcquireWithNamesDowngrade(self): self.assertEquals(self.ls.acquire("two", shared=0), set(["two"])) self.assertTrue(self.ls.is_owned()) self.assertFalse(self.ls._get_lock().is_owned()) self.ls.release() self.assertFalse(self.ls.is_owned()) self.assertFalse(self.ls._get_lock().is_owned()) # Can't downgrade after releasing self.assertRaises(AssertionError, self.ls.downgrade, "two") def testDowngrade(self): # Not owning anything, must raise an exception self.assertFalse(self.ls.is_owned()) self.assertRaises(AssertionError, self.ls.downgrade) self.assertFalse(compat.any(i.is_owned() for i in self.ls._get_lockdict().values())) self.assertFalse(self.ls.check_owned(self.ls._names())) for name in self.ls._names(): self.assertFalse(self.ls.check_owned(name)) self.assertEquals(self.ls.acquire(None, shared=0), set(["one", "two", "three"])) self.assertRaises(AssertionError, self.ls.downgrade, "unknown lock") self.assertTrue(self.ls.check_owned(self.ls._names(), shared=0)) for name in self.ls._names(): self.assertTrue(self.ls.check_owned(name)) self.assertTrue(self.ls.check_owned(name, shared=0)) self.assertFalse(self.ls.check_owned(name, shared=1)) self.assertTrue(self.ls._get_lock().is_owned(shared=0)) self.assertTrue(compat.all(i.is_owned(shared=0) for i in self.ls._get_lockdict().values())) # Start downgrading locks self.assertTrue(self.ls.downgrade(names=["one"])) self.assertTrue(self.ls._get_lock().is_owned(shared=0)) self.assertTrue(compat.all(lock.is_owned(shared=[0, 1][int(name == "one")]) for name, lock in self.ls._get_lockdict().items())) self.assertFalse(self.ls.check_owned("one", shared=0)) self.assertTrue(self.ls.check_owned("one", shared=1)) self.assertTrue(self.ls.check_owned("two", shared=0)) self.assertTrue(self.ls.check_owned("three", shared=0)) # Downgrade second lock self.assertTrue(self.ls.downgrade(names="two")) self.assertTrue(self.ls._get_lock().is_owned(shared=0)) should_share = lambda name: [0, 1][int(name in ("one", "two"))] self.assertTrue(compat.all(lock.is_owned(shared=should_share(name)) for name, lock in self.ls._get_lockdict().items())) self.assertFalse(self.ls.check_owned("one", shared=0)) self.assertTrue(self.ls.check_owned("one", shared=1)) self.assertFalse(self.ls.check_owned("two", shared=0)) self.assertTrue(self.ls.check_owned("two", shared=1)) self.assertTrue(self.ls.check_owned("three", shared=0)) # Downgrading the last exclusive lock to shared must downgrade the # lockset-internal lock too self.assertTrue(self.ls.downgrade(names="three")) self.assertTrue(self.ls._get_lock().is_owned(shared=1)) self.assertTrue(compat.all(i.is_owned(shared=1) for i in self.ls._get_lockdict().values())) # Verify owned locks for name in self.ls._names(): self.assertTrue(self.ls.check_owned(name, shared=1)) # Downgrading a shared lock must be a no-op self.assertTrue(self.ls.downgrade(names=["one", "three"])) self.assertTrue(self.ls._get_lock().is_owned(shared=1)) self.assertTrue(compat.all(i.is_owned(shared=1) for i in self.ls._get_lockdict().values())) self.ls.release() def testDowngradeEverything(self): self.assertEqual(self.ls.acquire(locking.ALL_SET, shared=0), set(["one", "two", "three"])) self.assertTrue(self.ls.owning_all()) # Ensure all locks are now owned in exclusive mode for name in self.ls._names(): self.assertTrue(self.ls.check_owned(name, shared=0)) # Downgrade everything self.assertTrue(self.ls.downgrade()) # Ensure all locks are now owned in shared mode for name in self.ls._names(): self.assertTrue(self.ls.check_owned(name, shared=1)) self.assertTrue(self.ls.owning_all()) def testPriority(self): def _Acquire(prev, next, name, priority, success_fn): prev.wait() self.assert_(self.ls.acquire(name, shared=0, priority=priority, test_notify=lambda _: next.set())) try: success_fn() finally: self.ls.release() # Get all in exclusive mode self.assert_(self.ls.acquire(locking.ALL_SET, shared=0)) done_two = Queue.Queue(0) first = threading.Event() prev = first acquires = [("one", prio, self.done) for prio in range(1, 33)] acquires.extend([("two", prio, done_two) for prio in range(1, 33)]) # Use a deterministic random generator random.Random(741).shuffle(acquires) for (name, prio, done) in acquires: ev = threading.Event() self._addThread(target=_Acquire, args=(prev, ev, name, prio, compat.partial(done.put, "Prio%s" % prio))) prev = ev # Start acquires first.set() # Wait for last acquire to start prev.wait() # Let threads acquire locks self.ls.release() # Wait for threads to finish self._waitThreads() for i in range(1, 33): self.assertEqual(self.done.get_nowait(), "Prio%s" % i) self.assertEqual(done_two.get_nowait(), "Prio%s" % i) self.assertRaises(Queue.Empty, self.done.get_nowait) self.assertRaises(Queue.Empty, done_two.get_nowait) def testNamesWithOpportunisticAndTimeout(self): self.assertRaises(AssertionError, self.ls.acquire, ["one", "two"], timeout=1.0, opportunistic=True) def testOpportunisticWithUnknownName(self): name = "unknown" self.assertFalse(name in self.ls._names()) result = self.ls.acquire(name, opportunistic=True) self.assertFalse(result) self.assertFalse(self.ls.list_owned()) result = self.ls.acquire(["two", name], opportunistic=True) self.assertEqual(result, set(["two"])) self.assertEqual(self.ls.list_owned(), set(["two"])) self.ls.release() def testSimpleOpportunisticAcquisition(self): self.assertEquals(self.ls._names(), set(["one", "two", "three"])) # Hold a lock in main thread self.assertEqual(self.ls.acquire("two", shared=0), set(["two"])) def fn(): # The lock "two" is held by the main thread result = self.ls.acquire(["one", "two"], shared=0, opportunistic=True) self.assertEqual(result, set(["one"])) self.assertEqual(self.ls.list_owned(), set(["one"])) self.assertFalse(self.ls._get_lock().is_owned()) self.ls.release() self.assertFalse(self.ls.list_owned()) # Try to acquire the lock held by the main thread result = self.ls.acquire(["two"], shared=0, opportunistic=True) self.assertFalse(self.ls._get_lock().is_owned()) self.assertFalse(result) self.assertFalse(self.ls.list_owned()) # Try to acquire all locks result = self.ls.acquire(locking.ALL_SET, shared=0, opportunistic=True) self.assertTrue(self.ls._get_lock().is_owned(), msg="Internal lock is not owned") self.assertEqual(result, set(["one", "three"])) self.assertEqual(self.ls.list_owned(), set(["one", "three"])) self.ls.release() self.assertFalse(self.ls.list_owned()) self.done.put(True) self._addThread(target=fn) # Wait for threads to finish self._waitThreads() self.assertEqual(self.ls.list_owned(), set(["two"])) self.ls.release() self.assertFalse(self.ls.list_owned()) self.assertFalse(self.ls._get_lock().is_owned()) self.assertTrue(self.done.get_nowait()) self.assertRaises(Queue.Empty, self.done.get_nowait) def testOpportunisticAcquisitionWithoutNamesExpires(self): self.assertEquals(self.ls._names(), set(["one", "two", "three"])) # Hold all locks in main thread self.ls.acquire(locking.ALL_SET, shared=0) self.assertTrue(self.ls._get_lock().is_owned()) def fn(): # Try to acquire all locks in separate thread result = self.ls.acquire(locking.ALL_SET, shared=0, opportunistic=True, timeout=0.1) self.assertFalse(result) self.assertFalse(self.ls._get_lock().is_owned()) self.assertFalse(self.ls.list_owned()) # Try once more without a timeout self.assertFalse(self.ls.acquire("one", shared=0, opportunistic=True)) self.done.put(True) self._addThread(target=fn) # Wait for threads to finish self._waitThreads() self.assertEqual(self.ls.list_owned(), set(["one", "two", "three"])) self.ls.release() self.assertFalse(self.ls.list_owned()) self.assertFalse(self.ls._get_lock().is_owned(shared=0)) self.assertTrue(self.done.get_nowait()) self.assertRaises(Queue.Empty, self.done.get_nowait) def testSharedOpportunisticAcquisitionWithoutNames(self): self.assertEquals(self.ls._names(), set(["one", "two", "three"])) # Hold all locks in main thread self.ls.acquire(locking.ALL_SET, shared=1) self.assertTrue(self.ls._get_lock().is_owned(shared=1)) def fn(): # Try to acquire all locks in separate thread in shared mode result = self.ls.acquire(locking.ALL_SET, shared=1, opportunistic=True, timeout=0.1) self.assertEqual(result, set(["one", "two", "three"])) self.assertTrue(self.ls._get_lock().is_owned(shared=1)) self.ls.release() self.assertFalse(self.ls._get_lock().is_owned()) # Try one in exclusive mode self.assertFalse(self.ls.acquire("one", shared=0, opportunistic=True)) self.done.put(True) self._addThread(target=fn) # Wait for threads to finish self._waitThreads() self.assertEqual(self.ls.list_owned(), set(["one", "two", "three"])) self.ls.release() self.assertFalse(self.ls.list_owned()) self.assertFalse(self.ls._get_lock().is_owned()) self.assertTrue(self.done.get_nowait()) self.assertRaises(Queue.Empty, self.done.get_nowait) def testLockDeleteWithOpportunisticAcquisition(self): # This test exercises some code handling LockError on acquisition, that is # after all lock names have been gathered. This shouldn't happen in reality # as removing locks from the set requires the lockset-internal lock, but # the code should handle the situation anyway. ready = threading.Event() finished = threading.Event() self.assertEquals(self.ls._names(), set(["one", "two", "three"])) # Thread function to delete lock def fn(): # Wait for notification ready.wait() # Delete lock named "two" by accessing lockset-internal data ld = self.ls._get_lockdict() self.assertTrue(ld["two"].delete()) self.done.put("deleted.two") # Notify helper finished.set() self._addThread(target=fn) # Notification helper, called when lock already holds internal lock. # Therefore only one of the locks not yet locked can be deleted. def notify(name): self.done.put("notify.%s" % name) if name == "one": # Tell helper thread to delete lock "two" ready.set() finished.wait() # Hold all locks in main thread self.ls.acquire(locking.ALL_SET, shared=0, test_notify=notify) self.assertEqual(self.ls.list_owned(), set(["one", "three"])) # Wait for threads to finish self._waitThreads() # Release all locks self.ls.release() self.assertFalse(self.ls.list_owned()) self.assertFalse(self.ls._get_lock().is_owned()) self.assertEqual(self.done.get_nowait(), "notify.one") self.assertEqual(self.done.get_nowait(), "deleted.two") self.assertEqual(self.done.get_nowait(), "notify.three") self.assertEqual(self.done.get_nowait(), "notify.two") self.assertRaises(Queue.Empty, self.done.get_nowait) class TestGetLsAcquireModeAndTimeouts(unittest.TestCase): def setUp(self): self.fn = locking._GetLsAcquireModeAndTimeouts def testOpportunisticWithoutNames(self): (mode, ls_timeout_fn, timeout_fn) = self.fn(False, None, True) self.assertEqual(mode, locking._LS_ACQUIRE_OPPORTUNISTIC) self.assertTrue(ls_timeout_fn is None) self.assertEqual(timeout_fn(), 0) def testAllInputCombinations(self): for want_all in [False, True]: for timeout in [None, 0, 100]: for opportunistic in [False, True]: if (opportunistic and not want_all and timeout is not None): # Can't accept a timeout when acquiring opportunistically self.assertRaises(AssertionError, self.fn, want_all, timeout, opportunistic) else: (mode, ls_timeout_fn, timeout_fn) = \ self.fn(want_all, timeout, opportunistic) if opportunistic: self.assertEqual(mode, locking._LS_ACQUIRE_OPPORTUNISTIC) self.assertEqual(timeout_fn(), 0) else: self.assertTrue(callable(timeout_fn)) if want_all: self.assertEqual(mode, locking._LS_ACQUIRE_ALL) else: self.assertEqual(mode, locking._LS_ACQUIRE_EXACT) if want_all: self.assertTrue(callable(ls_timeout_fn)) else: self.assertTrue(ls_timeout_fn is None) class TestGanetiLockManager(_ThreadedTestCase): def setUp(self): _ThreadedTestCase.setUp(self) self.nodes = ["n1", "n2"] self.nodegroups = ["g1", "g2"] self.instances = ["i1", "i2", "i3"] self.networks = ["net1", "net2", "net3"] self.GL = locking.GanetiLockManager(self.nodes, self.nodegroups, self.instances, self.networks) def tearDown(self): # Don't try this at home... locking.GanetiLockManager._instance = None def testLockingConstants(self): # The locking library internally cheats by assuming its constants have some # relationships with each other. Check those hold true. # This relationship is also used in the Processor to recursively acquire # the right locks. Again, please don't break it. for i in range(len(locking.LEVELS)): self.assertEqual(i, locking.LEVELS[i]) def testDoubleGLFails(self): self.assertRaises(AssertionError, locking.GanetiLockManager, [], [], [], []) def testLockNames(self): self.assertEqual(self.GL._names(locking.LEVEL_CLUSTER), set(["BGL"])) self.assertEqual(self.GL._names(locking.LEVEL_NODE_ALLOC), set(["NAL"])) self.assertEqual(self.GL._names(locking.LEVEL_NODE), set(self.nodes)) self.assertEqual(self.GL._names(locking.LEVEL_NODEGROUP), set(self.nodegroups)) self.assertEqual(self.GL._names(locking.LEVEL_INSTANCE), set(self.instances)) self.assertEqual(self.GL._names(locking.LEVEL_NETWORK), set(self.networks)) def testInitAndResources(self): locking.GanetiLockManager._instance = None self.GL = locking.GanetiLockManager([], [], [], []) self.assertEqual(self.GL._names(locking.LEVEL_CLUSTER), set(["BGL"])) self.assertEqual(self.GL._names(locking.LEVEL_NODE_ALLOC), set(["NAL"])) self.assertEqual(self.GL._names(locking.LEVEL_NODE), set()) self.assertEqual(self.GL._names(locking.LEVEL_NODEGROUP), set()) self.assertEqual(self.GL._names(locking.LEVEL_INSTANCE), set()) self.assertEqual(self.GL._names(locking.LEVEL_NETWORK), set()) locking.GanetiLockManager._instance = None self.GL = locking.GanetiLockManager(self.nodes, self.nodegroups, [], []) self.assertEqual(self.GL._names(locking.LEVEL_CLUSTER), set(["BGL"])) self.assertEqual(self.GL._names(locking.LEVEL_NODE_ALLOC), set(["NAL"])) self.assertEqual(self.GL._names(locking.LEVEL_NODE), set(self.nodes)) self.assertEqual(self.GL._names(locking.LEVEL_NODEGROUP), set(self.nodegroups)) self.assertEqual(self.GL._names(locking.LEVEL_INSTANCE), set()) self.assertEqual(self.GL._names(locking.LEVEL_NETWORK), set()) locking.GanetiLockManager._instance = None self.GL = locking.GanetiLockManager([], [], self.instances, []) self.assertEqual(self.GL._names(locking.LEVEL_CLUSTER), set(["BGL"])) self.assertEqual(self.GL._names(locking.LEVEL_NODE_ALLOC), set(["NAL"])) self.assertEqual(self.GL._names(locking.LEVEL_NODE), set()) self.assertEqual(self.GL._names(locking.LEVEL_NODEGROUP), set()) self.assertEqual(self.GL._names(locking.LEVEL_INSTANCE), set(self.instances)) locking.GanetiLockManager._instance = None self.GL = locking.GanetiLockManager([], [], [], self.networks) self.assertEqual(self.GL._names(locking.LEVEL_CLUSTER), set(["BGL"])) self.assertEqual(self.GL._names(locking.LEVEL_NODE_ALLOC), set(["NAL"])) self.assertEqual(self.GL._names(locking.LEVEL_NODE), set()) self.assertEqual(self.GL._names(locking.LEVEL_NODEGROUP), set()) self.assertEqual(self.GL._names(locking.LEVEL_INSTANCE), set()) self.assertEqual(self.GL._names(locking.LEVEL_NETWORK), set(self.networks)) def testAcquireRelease(self): self.GL.acquire(locking.LEVEL_CLUSTER, ["BGL"], shared=1) self.assertEquals(self.GL.list_owned(locking.LEVEL_CLUSTER), set(["BGL"])) self.GL.acquire(locking.LEVEL_INSTANCE, ["i1"]) self.GL.acquire(locking.LEVEL_NODEGROUP, ["g2"]) self.GL.acquire(locking.LEVEL_NODE, ["n1", "n2"], shared=1) self.assertTrue(self.GL.check_owned(locking.LEVEL_NODE, ["n1", "n2"], shared=1)) self.assertFalse(self.GL.check_owned(locking.LEVEL_INSTANCE, ["i1", "i3"])) self.GL.release(locking.LEVEL_NODE, ["n2"]) self.assertEquals(self.GL.list_owned(locking.LEVEL_NODE), set(["n1"])) self.assertEquals(self.GL.list_owned(locking.LEVEL_NODEGROUP), set(["g2"])) self.assertEquals(self.GL.list_owned(locking.LEVEL_INSTANCE), set(["i1"])) self.GL.release(locking.LEVEL_NODE) self.assertEquals(self.GL.list_owned(locking.LEVEL_NODE), set()) self.assertEquals(self.GL.list_owned(locking.LEVEL_NODEGROUP), set(["g2"])) self.assertEquals(self.GL.list_owned(locking.LEVEL_INSTANCE), set(["i1"])) self.GL.release(locking.LEVEL_NODEGROUP) self.GL.release(locking.LEVEL_INSTANCE) self.assertRaises(errors.LockError, self.GL.acquire, locking.LEVEL_INSTANCE, ["i5"]) self.GL.acquire(locking.LEVEL_INSTANCE, ["i3"], shared=1) self.assertEquals(self.GL.list_owned(locking.LEVEL_INSTANCE), set(["i3"])) def testAcquireWholeSets(self): self.GL.acquire(locking.LEVEL_CLUSTER, ["BGL"], shared=1) self.assertEquals(self.GL.acquire(locking.LEVEL_INSTANCE, None), set(self.instances)) self.assertEquals(self.GL.list_owned(locking.LEVEL_INSTANCE), set(self.instances)) self.assertEquals(self.GL.acquire(locking.LEVEL_NODEGROUP, None), set(self.nodegroups)) self.assertEquals(self.GL.list_owned(locking.LEVEL_NODEGROUP), set(self.nodegroups)) self.assertEquals(self.GL.acquire(locking.LEVEL_NODE, None, shared=1), set(self.nodes)) self.assertEquals(self.GL.list_owned(locking.LEVEL_NODE), set(self.nodes)) self.assertTrue(self.GL.owning_all(locking.LEVEL_INSTANCE)) self.assertTrue(self.GL.owning_all(locking.LEVEL_NODEGROUP)) self.assertTrue(self.GL.owning_all(locking.LEVEL_NODE)) self.GL.release(locking.LEVEL_NODE) self.GL.release(locking.LEVEL_NODEGROUP) self.GL.release(locking.LEVEL_INSTANCE) self.GL.release(locking.LEVEL_CLUSTER) def testAcquireWholeAndPartial(self): self.assertFalse(self.GL.owning_all(locking.LEVEL_INSTANCE)) self.GL.acquire(locking.LEVEL_CLUSTER, ["BGL"], shared=1) self.assertEquals(self.GL.acquire(locking.LEVEL_INSTANCE, None), set(self.instances)) self.assertEquals(self.GL.list_owned(locking.LEVEL_INSTANCE), set(self.instances)) self.assertEquals(self.GL.acquire(locking.LEVEL_NODE, ["n2"], shared=1), set(["n2"])) self.assertEquals(self.GL.list_owned(locking.LEVEL_NODE), set(["n2"])) self.assertTrue(self.GL.owning_all(locking.LEVEL_INSTANCE)) self.assertFalse(self.GL.owning_all(locking.LEVEL_NODE)) self.GL.release(locking.LEVEL_NODE) self.GL.release(locking.LEVEL_INSTANCE) self.GL.release(locking.LEVEL_CLUSTER) def testBGLDependency(self): self.assertRaises(AssertionError, self.GL.acquire, locking.LEVEL_NODE, ["n1", "n2"]) self.assertRaises(AssertionError, self.GL.acquire, locking.LEVEL_INSTANCE, ["i3"]) self.assertRaises(AssertionError, self.GL.acquire, locking.LEVEL_NODEGROUP, ["g1"]) self.GL.acquire(locking.LEVEL_CLUSTER, ["BGL"], shared=1) self.GL.acquire(locking.LEVEL_NODE, ["n1"]) self.assertRaises(AssertionError, self.GL.release, locking.LEVEL_CLUSTER, ["BGL"]) self.assertRaises(AssertionError, self.GL.release, locking.LEVEL_CLUSTER) self.GL.release(locking.LEVEL_NODE) self.GL.acquire(locking.LEVEL_INSTANCE, ["i1", "i2"]) self.assertRaises(AssertionError, self.GL.release, locking.LEVEL_CLUSTER, ["BGL"]) self.assertRaises(AssertionError, self.GL.release, locking.LEVEL_CLUSTER) self.GL.release(locking.LEVEL_INSTANCE) self.GL.acquire(locking.LEVEL_NODEGROUP, None) self.GL.release(locking.LEVEL_NODEGROUP, ["g1"]) self.assertRaises(AssertionError, self.GL.release, locking.LEVEL_CLUSTER, ["BGL"]) self.assertRaises(AssertionError, self.GL.release, locking.LEVEL_CLUSTER) self.GL.release(locking.LEVEL_NODEGROUP) self.GL.release(locking.LEVEL_CLUSTER) def testWrongOrder(self): self.GL.acquire(locking.LEVEL_CLUSTER, ["BGL"], shared=1) self.GL.acquire(locking.LEVEL_NODE, ["n2"]) self.assertRaises(AssertionError, self.GL.acquire, locking.LEVEL_NODE, ["n1"]) self.assertRaises(AssertionError, self.GL.acquire, locking.LEVEL_NODEGROUP, ["g1"]) self.assertRaises(AssertionError, self.GL.acquire, locking.LEVEL_INSTANCE, ["i2"]) def testModifiableLevels(self): self.assertRaises(AssertionError, self.GL.add, locking.LEVEL_CLUSTER, ["BGL2"]) self.assertRaises(AssertionError, self.GL.add, locking.LEVEL_NODE_ALLOC, ["NAL2"]) self.GL.acquire(locking.LEVEL_CLUSTER, ["BGL"]) self.GL.add(locking.LEVEL_INSTANCE, ["i4"]) self.GL.remove(locking.LEVEL_INSTANCE, ["i3"]) self.GL.remove(locking.LEVEL_INSTANCE, ["i1"]) self.assertEqual(self.GL._names(locking.LEVEL_INSTANCE), set(["i2", "i4"])) self.GL.add(locking.LEVEL_NODE, ["n3"]) self.GL.remove(locking.LEVEL_NODE, ["n1"]) self.assertEqual(self.GL._names(locking.LEVEL_NODE), set(["n2", "n3"])) self.GL.add(locking.LEVEL_NODEGROUP, ["g3"]) self.GL.remove(locking.LEVEL_NODEGROUP, ["g2"]) self.GL.remove(locking.LEVEL_NODEGROUP, ["g1"]) self.assertEqual(self.GL._names(locking.LEVEL_NODEGROUP), set(["g3"])) self.assertRaises(AssertionError, self.GL.remove, locking.LEVEL_CLUSTER, ["BGL2"]) # Helper function to run as a thread that shared the BGL and then acquires # some locks at another level. def _doLock(self, level, names, shared): try: self.GL.acquire(locking.LEVEL_CLUSTER, ["BGL"], shared=1) self.GL.acquire(level, names, shared=shared) self.done.put("DONE") self.GL.release(level) self.GL.release(locking.LEVEL_CLUSTER) except errors.LockError: self.done.put("ERR") @_Repeat def testConcurrency(self): self.GL.acquire(locking.LEVEL_CLUSTER, ["BGL"], shared=1) self._addThread(target=self._doLock, args=(locking.LEVEL_INSTANCE, "i1", 1)) self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") self.GL.acquire(locking.LEVEL_INSTANCE, ["i3"]) self._addThread(target=self._doLock, args=(locking.LEVEL_INSTANCE, "i1", 1)) self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") self._addThread(target=self._doLock, args=(locking.LEVEL_INSTANCE, "i3", 1)) self.assertRaises(Queue.Empty, self.done.get_nowait) self.GL.release(locking.LEVEL_INSTANCE) self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") self.GL.acquire(locking.LEVEL_INSTANCE, ["i2"], shared=1) self._addThread(target=self._doLock, args=(locking.LEVEL_INSTANCE, "i2", 1)) self._waitThreads() self.assertEqual(self.done.get_nowait(), "DONE") self._addThread(target=self._doLock, args=(locking.LEVEL_INSTANCE, "i2", 0)) self.assertRaises(Queue.Empty, self.done.get_nowait) self.GL.release(locking.LEVEL_INSTANCE) self._waitThreads() self.assertEqual(self.done.get(True, 1), "DONE") self.GL.release(locking.LEVEL_CLUSTER, ["BGL"]) class TestLockMonitor(_ThreadedTestCase): def setUp(self): _ThreadedTestCase.setUp(self) self.lm = locking.LockMonitor() def testSingleThread(self): locks = [] for i in range(100): name = "TestLock%s" % i locks.append(locking.SharedLock(name, monitor=self.lm)) self.assertEqual(len(self.lm._locks), len(locks)) result = objects.QueryResponse.FromDict(self.lm.QueryLocks(["name"])) self.assertEqual(len(result.fields), 1) self.assertEqual(len(result.data), 100) # Delete all locks del locks[:] # The garbage collector might needs some time def _CheckLocks(): if self.lm._locks: raise utils.RetryAgain() utils.Retry(_CheckLocks, 0.1, 30.0) self.assertFalse(self.lm._locks) def testMultiThread(self): locks = [] def _CreateLock(prev, next, name): prev.wait() locks.append(locking.SharedLock(name, monitor=self.lm)) if next: next.set() expnames = [] first = threading.Event() prev = first # Use a deterministic random generator for i in random.Random(4263).sample(range(100), 33): name = "MtTestLock%s" % i expnames.append(name) ev = threading.Event() self._addThread(target=_CreateLock, args=(prev, ev, name)) prev = ev # Add locks first.set() self._waitThreads() # Check order in which locks were added self.assertEqual([i.name for i in locks], expnames) # Check query result result = self.lm.QueryLocks(["name", "mode", "owner", "pending"]) self.assert_(isinstance(result, dict)) response = objects.QueryResponse.FromDict(result) self.assertEqual(response.data, [[(constants.RS_NORMAL, name), (constants.RS_NORMAL, None), (constants.RS_NORMAL, None), (constants.RS_NORMAL, [])] for name in utils.NiceSort(expnames)]) self.assertEqual(len(response.fields), 4) self.assertEqual(["name", "mode", "owner", "pending"], [fdef.name for fdef in response.fields]) # Test exclusive acquire for tlock in locks[::4]: tlock.acquire(shared=0) try: def _GetExpResult(name): if tlock.name == name: return [(constants.RS_NORMAL, name), (constants.RS_NORMAL, "exclusive"), (constants.RS_NORMAL, [threading.currentThread().getName()]), (constants.RS_NORMAL, [])] return [(constants.RS_NORMAL, name), (constants.RS_NORMAL, None), (constants.RS_NORMAL, None), (constants.RS_NORMAL, [])] result = self.lm.QueryLocks(["name", "mode", "owner", "pending"]) self.assertEqual(objects.QueryResponse.FromDict(result).data, [_GetExpResult(name) for name in utils.NiceSort(expnames)]) finally: tlock.release() # Test shared acquire def _Acquire(lock, shared, ev, notify): lock.acquire(shared=shared) try: notify.set() ev.wait() finally: lock.release() for tlock1 in locks[::11]: for tlock2 in locks[::-15]: if tlock2 == tlock1: # Avoid deadlocks continue for tlock3 in locks[::10]: if tlock3 in (tlock2, tlock1): # Avoid deadlocks continue releaseev = threading.Event() # Acquire locks acquireev = [] tthreads1 = [] for i in range(3): ev = threading.Event() tthreads1.append(self._addThread(target=_Acquire, args=(tlock1, 1, releaseev, ev))) acquireev.append(ev) ev = threading.Event() tthread2 = self._addThread(target=_Acquire, args=(tlock2, 1, releaseev, ev)) acquireev.append(ev) ev = threading.Event() tthread3 = self._addThread(target=_Acquire, args=(tlock3, 0, releaseev, ev)) acquireev.append(ev) # Wait for all locks to be acquired for i in acquireev: i.wait() # Check query result result = self.lm.QueryLocks(["name", "mode", "owner"]) response = objects.QueryResponse.FromDict(result) for (name, mode, owner) in response.data: (name_status, name_value) = name (owner_status, owner_value) = owner self.assertEqual(name_status, constants.RS_NORMAL) self.assertEqual(owner_status, constants.RS_NORMAL) if name_value == tlock1.name: self.assertEqual(mode, (constants.RS_NORMAL, "shared")) self.assertEqual(set(owner_value), set(i.getName() for i in tthreads1)) continue if name_value == tlock2.name: self.assertEqual(mode, (constants.RS_NORMAL, "shared")) self.assertEqual(owner_value, [tthread2.getName()]) continue if name_value == tlock3.name: self.assertEqual(mode, (constants.RS_NORMAL, "exclusive")) self.assertEqual(owner_value, [tthread3.getName()]) continue self.assert_(name_value in expnames) self.assertEqual(mode, (constants.RS_NORMAL, None)) self.assert_(owner_value is None) # Release locks again releaseev.set() self._waitThreads() result = self.lm.QueryLocks(["name", "mode", "owner"]) self.assertEqual(objects.QueryResponse.FromDict(result).data, [[(constants.RS_NORMAL, name), (constants.RS_NORMAL, None), (constants.RS_NORMAL, None)] for name in utils.NiceSort(expnames)]) def testDelete(self): lock = locking.SharedLock("TestLock", monitor=self.lm) self.assertEqual(len(self.lm._locks), 1) result = self.lm.QueryLocks(["name", "mode", "owner"]) self.assertEqual(objects.QueryResponse.FromDict(result).data, [[(constants.RS_NORMAL, lock.name), (constants.RS_NORMAL, None), (constants.RS_NORMAL, None)]]) lock.delete() result = self.lm.QueryLocks(["name", "mode", "owner"]) self.assertEqual(objects.QueryResponse.FromDict(result).data, [[(constants.RS_NORMAL, lock.name), (constants.RS_NORMAL, "deleted"), (constants.RS_NORMAL, None)]]) self.assertEqual(len(self.lm._locks), 1) def testPending(self): def _Acquire(lock, shared, prev, next): prev.wait() lock.acquire(shared=shared, test_notify=next.set) try: pass finally: lock.release() lock = locking.SharedLock("ExcLock", monitor=self.lm) for shared in [0, 1]: lock.acquire() try: self.assertEqual(len(self.lm._locks), 1) result = self.lm.QueryLocks(["name", "mode", "owner"]) self.assertEqual(objects.QueryResponse.FromDict(result).data, [[(constants.RS_NORMAL, lock.name), (constants.RS_NORMAL, "exclusive"), (constants.RS_NORMAL, [threading.currentThread().getName()])]]) threads = [] first = threading.Event() prev = first for i in range(5): ev = threading.Event() threads.append(self._addThread(target=_Acquire, args=(lock, shared, prev, ev))) prev = ev # Start acquires first.set() # Wait for last acquire to start waiting prev.wait() # NOTE: This works only because QueryLocks will acquire the # lock-internal lock again and won't be able to get the information # until it has the lock. By then the acquire should be registered in # SharedLock.__pending (otherwise it's a bug). # All acquires are waiting now if shared: pending = [("shared", utils.NiceSort(t.getName() for t in threads))] else: pending = [("exclusive", [t.getName()]) for t in threads] result = self.lm.QueryLocks(["name", "mode", "owner", "pending"]) self.assertEqual(objects.QueryResponse.FromDict(result).data, [[(constants.RS_NORMAL, lock.name), (constants.RS_NORMAL, "exclusive"), (constants.RS_NORMAL, [threading.currentThread().getName()]), (constants.RS_NORMAL, pending)]]) self.assertEqual(len(self.lm._locks), 1) finally: lock.release() self._waitThreads() # No pending acquires result = self.lm.QueryLocks(["name", "mode", "owner", "pending"]) self.assertEqual(objects.QueryResponse.FromDict(result).data, [[(constants.RS_NORMAL, lock.name), (constants.RS_NORMAL, None), (constants.RS_NORMAL, None), (constants.RS_NORMAL, [])]]) self.assertEqual(len(self.lm._locks), 1) def testDeleteAndRecreate(self): lname = "TestLock101923193" # Create some locks with the same name and keep all references locks = [locking.SharedLock(lname, monitor=self.lm) for _ in range(5)] self.assertEqual(len(self.lm._locks), len(locks)) result = self.lm.QueryLocks(["name", "mode", "owner"]) self.assertEqual(objects.QueryResponse.FromDict(result).data, [[(constants.RS_NORMAL, lname), (constants.RS_NORMAL, None), (constants.RS_NORMAL, None)]] * 5) locks[2].delete() # Check information order result = self.lm.QueryLocks(["name", "mode", "owner"]) self.assertEqual(objects.QueryResponse.FromDict(result).data, [[(constants.RS_NORMAL, lname), (constants.RS_NORMAL, None), (constants.RS_NORMAL, None)]] * 2 + [[(constants.RS_NORMAL, lname), (constants.RS_NORMAL, "deleted"), (constants.RS_NORMAL, None)]] + [[(constants.RS_NORMAL, lname), (constants.RS_NORMAL, None), (constants.RS_NORMAL, None)]] * 2) locks[1].acquire(shared=0) last_status = [ [(constants.RS_NORMAL, lname), (constants.RS_NORMAL, None), (constants.RS_NORMAL, None)], [(constants.RS_NORMAL, lname), (constants.RS_NORMAL, "exclusive"), (constants.RS_NORMAL, [threading.currentThread().getName()])], [(constants.RS_NORMAL, lname), (constants.RS_NORMAL, "deleted"), (constants.RS_NORMAL, None)], [(constants.RS_NORMAL, lname), (constants.RS_NORMAL, None), (constants.RS_NORMAL, None)], [(constants.RS_NORMAL, lname), (constants.RS_NORMAL, None), (constants.RS_NORMAL, None)], ] # Check information order result = self.lm.QueryLocks(["name", "mode", "owner"]) self.assertEqual(objects.QueryResponse.FromDict(result).data, last_status) self.assertEqual(len(set(self.lm._locks.values())), len(locks)) self.assertEqual(len(self.lm._locks), len(locks)) # Check lock deletion for idx in range(len(locks)): del locks[0] assert gc.isenabled() gc.collect() self.assertEqual(len(self.lm._locks), len(locks)) result = self.lm.QueryLocks(["name", "mode", "owner"]) self.assertEqual(objects.QueryResponse.FromDict(result).data, last_status[idx + 1:]) # All locks should have been deleted assert not locks self.assertFalse(self.lm._locks) result = self.lm.QueryLocks(["name", "mode", "owner"]) self.assertEqual(objects.QueryResponse.FromDict(result).data, []) class _FakeLock: def __init__(self): self._info = [] def AddResult(self, *args): self._info.append(args) def CountPending(self): return len(self._info) def GetLockInfo(self, requested): (exp_requested, result) = self._info.pop(0) if exp_requested != requested: raise Exception("Requested information (%s) does not match" " expectations (%s)" % (requested, exp_requested)) return result def testMultipleResults(self): fl1 = self._FakeLock() fl2 = self._FakeLock() self.lm.RegisterLock(fl1) self.lm.RegisterLock(fl2) # Empty information for i in [fl1, fl2]: i.AddResult(set([query.LQ_MODE, query.LQ_OWNER]), []) result = self.lm.QueryLocks(["name", "mode", "owner"]) self.assertEqual(objects.QueryResponse.FromDict(result).data, []) for i in [fl1, fl2]: self.assertEqual(i.CountPending(), 0) # Check ordering for fn in [lambda x: x, reversed, sorted]: fl1.AddResult(set(), list(fn([ ("aaa", None, None, None), ("bbb", None, None, None), ]))) fl2.AddResult(set(), []) result = self.lm.QueryLocks(["name"]) self.assertEqual(objects.QueryResponse.FromDict(result).data, [ [(constants.RS_NORMAL, "aaa")], [(constants.RS_NORMAL, "bbb")], ]) for i in [fl1, fl2]: self.assertEqual(i.CountPending(), 0) for fn2 in [lambda x: x, reversed, sorted]: fl1.AddResult(set([query.LQ_MODE]), list(fn([ # Same name, but different information ("aaa", "mode0", None, None), ("aaa", "mode1", None, None), ("aaa", "mode2", None, None), ("aaa", "mode3", None, None), ]))) fl2.AddResult(set([query.LQ_MODE]), [ ("zzz", "end", None, None), ("000", "start", None, None), ] + list(fn2([ ("aaa", "b200", None, None), ("aaa", "b300", None, None), ]))) result = self.lm.QueryLocks(["name", "mode"]) self.assertEqual(objects.QueryResponse.FromDict(result).data, [ [(constants.RS_NORMAL, "000"), (constants.RS_NORMAL, "start")], ] + list(fn([ # Name is the same, so order must be equal to incoming order [(constants.RS_NORMAL, "aaa"), (constants.RS_NORMAL, "mode0")], [(constants.RS_NORMAL, "aaa"), (constants.RS_NORMAL, "mode1")], [(constants.RS_NORMAL, "aaa"), (constants.RS_NORMAL, "mode2")], [(constants.RS_NORMAL, "aaa"), (constants.RS_NORMAL, "mode3")], ])) + list(fn2([ [(constants.RS_NORMAL, "aaa"), (constants.RS_NORMAL, "b200")], [(constants.RS_NORMAL, "aaa"), (constants.RS_NORMAL, "b300")], ])) + [ [(constants.RS_NORMAL, "zzz"), (constants.RS_NORMAL, "end")], ]) for i in [fl1, fl2]: self.assertEqual(i.CountPending(), 0) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.utils.x509_unittest.py0000744000000000000000000003271012271422343022030 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.utils.x509""" import os import tempfile import unittest import shutil import time import OpenSSL import distutils.version import string from ganeti import constants from ganeti import utils from ganeti import compat from ganeti import errors import testutils class TestParseAsn1Generalizedtime(unittest.TestCase): def setUp(self): self._Parse = utils.x509._ParseAsn1Generalizedtime def test(self): # UTC self.assertEqual(self._Parse("19700101000000Z"), 0) self.assertEqual(self._Parse("20100222174152Z"), 1266860512) self.assertEqual(self._Parse("20380119031407Z"), (2**31) - 1) # With offset self.assertEqual(self._Parse("20100222174152+0000"), 1266860512) self.assertEqual(self._Parse("20100223131652+0000"), 1266931012) self.assertEqual(self._Parse("20100223051808-0800"), 1266931088) self.assertEqual(self._Parse("20100224002135+1100"), 1266931295) self.assertEqual(self._Parse("19700101000000-0100"), 3600) # Leap seconds are not supported by datetime.datetime self.assertRaises(ValueError, self._Parse, "19841231235960+0000") self.assertRaises(ValueError, self._Parse, "19920630235960+0000") # Errors self.assertRaises(ValueError, self._Parse, "") self.assertRaises(ValueError, self._Parse, "invalid") self.assertRaises(ValueError, self._Parse, "20100222174152") self.assertRaises(ValueError, self._Parse, "Mon Feb 22 17:47:02 UTC 2010") self.assertRaises(ValueError, self._Parse, "2010-02-22 17:42:02") class TestGetX509CertValidity(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) pyopenssl_version = distutils.version.LooseVersion(OpenSSL.__version__) # Test whether we have pyOpenSSL 0.7 or above self.pyopenssl0_7 = (pyopenssl_version >= "0.7") if not self.pyopenssl0_7: warnings.warn("This test requires pyOpenSSL 0.7 or above to" " function correctly") def _LoadCert(self, name): return OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, testutils.ReadTestData(name)) def test(self): validity = utils.GetX509CertValidity(self._LoadCert("cert1.pem")) if self.pyopenssl0_7: self.assertEqual(validity, (1266919967, 1267524767)) else: self.assertEqual(validity, (None, None)) class TestSignX509Certificate(unittest.TestCase): KEY = "My private key!" KEY_OTHER = "Another key" def test(self): # Generate certificate valid for 5 minutes (_, cert_pem) = utils.GenerateSelfSignedX509Cert(None, 300) cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, cert_pem) # No signature at all self.assertRaises(errors.GenericError, utils.LoadSignedX509Certificate, cert_pem, self.KEY) # Invalid input self.assertRaises(errors.GenericError, utils.LoadSignedX509Certificate, "", self.KEY) self.assertRaises(errors.GenericError, utils.LoadSignedX509Certificate, "X-Ganeti-Signature: \n", self.KEY) self.assertRaises(errors.GenericError, utils.LoadSignedX509Certificate, "X-Ganeti-Sign: $1234$abcdef\n", self.KEY) self.assertRaises(errors.GenericError, utils.LoadSignedX509Certificate, "X-Ganeti-Signature: $1234567890$abcdef\n", self.KEY) self.assertRaises(errors.GenericError, utils.LoadSignedX509Certificate, "X-Ganeti-Signature: $1234$abc\n\n" + cert_pem, self.KEY) # Invalid salt for salt in list("-_@$,:;/\\ \t\n"): self.assertRaises(errors.GenericError, utils.SignX509Certificate, cert_pem, self.KEY, "foo%sbar" % salt) for salt in ["HelloWorld", "salt", string.letters, string.digits, utils.GenerateSecret(numbytes=4), utils.GenerateSecret(numbytes=16), "{123:456}".encode("hex")]: signed_pem = utils.SignX509Certificate(cert, self.KEY, salt) self._Check(cert, salt, signed_pem) self._Check(cert, salt, "X-Another-Header: with a value\n" + signed_pem) self._Check(cert, salt, (10 * "Hello World!\n") + signed_pem) self._Check(cert, salt, (signed_pem + "\n\na few more\n" "lines----\n------ at\nthe end!")) def _Check(self, cert, salt, pem): (cert2, salt2) = utils.LoadSignedX509Certificate(pem, self.KEY) self.assertEqual(salt, salt2) self.assertEqual(cert.digest("sha1"), cert2.digest("sha1")) # Other key self.assertRaises(errors.GenericError, utils.LoadSignedX509Certificate, pem, self.KEY_OTHER) class TestCertVerification(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def testVerifyCertificate(self): cert_pem = testutils.ReadTestData("cert1.pem") cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, cert_pem) # Not checking return value as this certificate is expired utils.VerifyX509Certificate(cert, 30, 7) @staticmethod def _GenCert(key, before, validity): # Urgh... mostly copied from x509.py :( # Create self-signed certificate cert = OpenSSL.crypto.X509() cert.set_serial_number(1) if before != 0: cert.gmtime_adj_notBefore(int(before)) cert.gmtime_adj_notAfter(validity) cert.set_issuer(cert.get_subject()) cert.set_pubkey(key) cert.sign(key, constants.X509_CERT_SIGN_DIGEST) return cert def testClockSkew(self): SKEW = constants.NODE_MAX_CLOCK_SKEW # Create private and public key key = OpenSSL.crypto.PKey() key.generate_key(OpenSSL.crypto.TYPE_RSA, constants.RSA_KEY_BITS) validity = 7 * 86400 # skew small enough, accepting cert; note that this is a timed # test, and could fail if the machine is so loaded that the next # few lines take more than NODE_MAX_CLOCK_SKEW / 2 for before in [-1, 0, SKEW / 4, SKEW / 2]: cert = self._GenCert(key, before, validity) result = utils.VerifyX509Certificate(cert, 1, 2) self.assertEqual(result, (None, None)) # skew too great, not accepting certs for before in [SKEW * 2, SKEW * 10]: cert = self._GenCert(key, before, validity) (status, msg) = utils.VerifyX509Certificate(cert, 1, 2) self.assertEqual(status, utils.CERT_WARNING) self.assertTrue(msg.startswith("Certificate not yet valid")) class TestVerifyCertificateInner(unittest.TestCase): def test(self): vci = utils.x509._VerifyCertificateInner # Valid self.assertEqual(vci(False, 1263916313, 1298476313, 1266940313, 30, 7), (None, None)) # Not yet valid (errcode, msg) = vci(False, 1266507600, 1267544400, 1266075600, 30, 7) self.assertEqual(errcode, utils.CERT_WARNING) # Expiring soon (errcode, msg) = vci(False, 1266507600, 1267544400, 1266939600, 30, 7) self.assertEqual(errcode, utils.CERT_ERROR) (errcode, msg) = vci(False, 1266507600, 1267544400, 1266939600, 30, 1) self.assertEqual(errcode, utils.CERT_WARNING) (errcode, msg) = vci(False, 1266507600, None, 1266939600, 30, 7) self.assertEqual(errcode, None) # Expired (errcode, msg) = vci(True, 1266507600, 1267544400, 1266939600, 30, 7) self.assertEqual(errcode, utils.CERT_ERROR) (errcode, msg) = vci(True, None, 1267544400, 1266939600, 30, 7) self.assertEqual(errcode, utils.CERT_ERROR) (errcode, msg) = vci(True, 1266507600, None, 1266939600, 30, 7) self.assertEqual(errcode, utils.CERT_ERROR) (errcode, msg) = vci(True, None, None, 1266939600, 30, 7) self.assertEqual(errcode, utils.CERT_ERROR) class TestGenerateSelfSignedX509Cert(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def _checkRsaPrivateKey(self, key): lines = key.splitlines() return (("-----BEGIN RSA PRIVATE KEY-----" in lines and "-----END RSA PRIVATE KEY-----" in lines) or ("-----BEGIN PRIVATE KEY-----" in lines and "-----END PRIVATE KEY-----" in lines)) def _checkCertificate(self, cert): lines = cert.splitlines() return ("-----BEGIN CERTIFICATE-----" in lines and "-----END CERTIFICATE-----" in lines) def test(self): for common_name in [None, ".", "Ganeti", "node1.example.com"]: (key_pem, cert_pem) = utils.GenerateSelfSignedX509Cert(common_name, 300) self._checkRsaPrivateKey(key_pem) self._checkCertificate(cert_pem) key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, key_pem) self.assert_(key.bits() >= 1024) self.assertEqual(key.bits(), constants.RSA_KEY_BITS) self.assertEqual(key.type(), OpenSSL.crypto.TYPE_RSA) x509 = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, cert_pem) self.failIf(x509.has_expired()) self.assertEqual(x509.get_issuer().CN, common_name) self.assertEqual(x509.get_subject().CN, common_name) self.assertEqual(x509.get_pubkey().bits(), constants.RSA_KEY_BITS) def testLegacy(self): cert1_filename = os.path.join(self.tmpdir, "cert1.pem") utils.GenerateSelfSignedSslCert(cert1_filename, validity=1) cert1 = utils.ReadFile(cert1_filename) self.assert_(self._checkRsaPrivateKey(cert1)) self.assert_(self._checkCertificate(cert1)) class TestCheckNodeCertificate(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) self.tmpdir = tempfile.mkdtemp() def tearDown(self): testutils.GanetiTestCase.tearDown(self) shutil.rmtree(self.tmpdir) def testMismatchingKey(self): other_cert = testutils.TestDataFilename("cert1.pem") node_cert = testutils.TestDataFilename("cert2.pem") cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, utils.ReadFile(other_cert)) try: utils.CheckNodeCertificate(cert, _noded_cert_file=node_cert) except errors.GenericError, err: self.assertEqual(str(err), "Given cluster certificate does not match local key") else: self.fail("Exception was not raised") def testMatchingKey(self): cert_filename = testutils.TestDataFilename("cert2.pem") # Extract certificate cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, utils.ReadFile(cert_filename)) cert_pem = OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, cert) utils.CheckNodeCertificate(cert, _noded_cert_file=cert_filename) def testMissingFile(self): cert_path = testutils.TestDataFilename("cert1.pem") nodecert = utils.PathJoin(self.tmpdir, "does-not-exist") utils.CheckNodeCertificate(NotImplemented, _noded_cert_file=nodecert) self.assertFalse(os.path.exists(nodecert)) def testInvalidCertificate(self): tmpfile = utils.PathJoin(self.tmpdir, "cert") utils.WriteFile(tmpfile, data="not a certificate") self.assertRaises(errors.X509CertError, utils.CheckNodeCertificate, NotImplemented, _noded_cert_file=tmpfile) def testNoPrivateKey(self): cert = testutils.TestDataFilename("cert1.pem") self.assertRaises(errors.X509CertError, utils.CheckNodeCertificate, NotImplemented, _noded_cert_file=cert) def testMismatchInNodeCert(self): cert1_path = testutils.TestDataFilename("cert1.pem") cert2_path = testutils.TestDataFilename("cert2.pem") tmpfile = utils.PathJoin(self.tmpdir, "cert") # Extract certificate cert1 = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, utils.ReadFile(cert1_path)) cert1_pem = OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, cert1) # Extract mismatching key key2 = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, utils.ReadFile(cert2_path)) key2_pem = OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, key2) # Write to file utils.WriteFile(tmpfile, data=cert1_pem + key2_pem) try: utils.CheckNodeCertificate(cert1, _noded_cert_file=tmpfile) except errors.X509CertError, err: self.assertEqual(err.args, (tmpfile, "Certificate does not match with private key")) else: self.fail("Exception was not raised") if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti-cli.test0000644000000000000000000000077012244641676017336 0ustar00rootroot00000000000000# test the various gnt-commands for common options $DAEMONS/ganeti-masterd --help >>>/Usage:/ >>>2 >>>= 0 $DAEMONS/ganeti-masterd --version >>>/^ganeti-/ >>>2 >>>= 0 $DAEMONS/ganeti-noded --help >>>/Usage:/ >>>2 >>>= 0 $DAEMONS/ganeti-noded --version >>>/^ganeti-/ >>>2 >>>= 0 $DAEMONS/ganeti-rapi --help >>>/Usage:/ >>>2 >>>= 0 $DAEMONS/ganeti-rapi --version >>>/^ganeti-/ >>>2 >>>= 0 $DAEMONS/ganeti-watcher --help >>>/Usage:/ >>>2 >>>= 0 $DAEMONS/ganeti-watcher --version >>>/^ganeti-/ >>>2 >>>= 0 ganeti-2.9.3/test/py/ganeti.hypervisor_unittest.py0000744000000000000000000000506512244641676022415 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing hypervisor functionality""" import unittest from ganeti import constants from ganeti import compat from ganeti import objects from ganeti import errors from ganeti import hypervisor from ganeti.hypervisor import hv_base import testutils class TestParameters(unittest.TestCase): def test(self): for hv, const_params in constants.HVC_DEFAULTS.items(): hyp = hypervisor.GetHypervisorClass(hv) for pname in const_params: self.assertTrue(pname in hyp.PARAMETERS, "Hypervisor %s: parameter %s defined in constants" " but not in the permitted hypervisor parameters" % (hv, pname)) for pname in hyp.PARAMETERS: self.assertTrue(pname in const_params, "Hypervisor %s: parameter %s defined in the hypervisor" " but missing a default value" % (hv, pname)) class TestBase(unittest.TestCase): def testVerifyResults(self): fn = hv_base.BaseHypervisor._FormatVerifyResults # FIXME: use assertIsNone when py 2.7 is minimum supported version self.assertEqual(fn([]), None) self.assertEqual(fn(["a"]), "a") self.assertEqual(fn(["a", "b"]), "a; b") def testGetLinuxNodeInfo(self): meminfo = testutils.TestDataFilename("proc_meminfo.txt") cpuinfo = testutils.TestDataFilename("proc_cpuinfo.txt") result = hv_base.BaseHypervisor.GetLinuxNodeInfo(meminfo, cpuinfo) self.assertEqual(result["memory_total"], 7686) self.assertEqual(result["memory_free"], 6272) self.assertEqual(result["memory_dom0"], 2722) self.assertEqual(result["cpu_total"], 4) self.assertEqual(result["cpu_nodes"], 1) self.assertEqual(result["cpu_sockets"], 1) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.client.gnt_instance_unittest.py0000744000000000000000000002023712244641676024132 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.client.gnt_instance""" import unittest from ganeti import constants from ganeti import utils from ganeti import errors from ganeti import objects from ganeti.client import gnt_instance import testutils class TestConsole(unittest.TestCase): def setUp(self): self._output = [] self._cmds = [] self._next_cmd_exitcode = 0 def _Test(self, console, show_command, cluster_name): return gnt_instance._DoConsole(console, show_command, cluster_name, feedback_fn=self._Feedback, _runcmd_fn=self._FakeRunCmd) def _Feedback(self, msg, *args): if args: msg = msg % args self._output.append(msg) def _FakeRunCmd(self, cmd, interactive=None): self.assertTrue(interactive) self.assertTrue(isinstance(cmd, list)) self._cmds.append(cmd) return utils.RunResult(self._next_cmd_exitcode, None, "", "", "cmd", utils.process._TIMEOUT_NONE, 5) def testMessage(self): cons = objects.InstanceConsole(instance="inst98.example.com", kind=constants.CONS_MESSAGE, message="Hello World") self.assertEqual(self._Test(cons, False, "cluster.example.com"), constants.EXIT_SUCCESS) self.assertEqual(len(self._cmds), 0) self.assertEqual(self._output, ["Hello World"]) def testVnc(self): cons = objects.InstanceConsole(instance="inst1.example.com", kind=constants.CONS_VNC, host="node1.example.com", port=5901, display=1) self.assertEqual(self._Test(cons, False, "cluster.example.com"), constants.EXIT_SUCCESS) self.assertEqual(len(self._cmds), 0) self.assertEqual(len(self._output), 1) self.assertTrue(" inst1.example.com " in self._output[0]) self.assertTrue(" node1.example.com:5901 " in self._output[0]) self.assertTrue("vnc://node1.example.com:5901/" in self._output[0]) def testSshShow(self): cons = objects.InstanceConsole(instance="inst31.example.com", kind=constants.CONS_SSH, host="node93.example.com", user="user_abc", command="xm console x.y.z") self.assertEqual(self._Test(cons, True, "cluster.example.com"), constants.EXIT_SUCCESS) self.assertEqual(len(self._cmds), 0) self.assertEqual(len(self._output), 1) self.assertTrue(" user_abc@node93.example.com " in self._output[0]) self.assertTrue("'xm console x.y.z'" in self._output[0]) def testSshRun(self): cons = objects.InstanceConsole(instance="inst31.example.com", kind=constants.CONS_SSH, host="node93.example.com", user="user_abc", command=["xm", "console", "x.y.z"]) self.assertEqual(self._Test(cons, False, "cluster.example.com"), constants.EXIT_SUCCESS) self.assertEqual(len(self._cmds), 1) self.assertEqual(len(self._output), 0) # This is very important to prevent escapes from the console self.assertTrue("-oEscapeChar=none" in self._cmds[0]) def testSshRunFail(self): cons = objects.InstanceConsole(instance="inst31.example.com", kind=constants.CONS_SSH, host="node93.example.com", user="user_abc", command=["xm", "console", "x.y.z"]) self._next_cmd_exitcode = 100 self.assertRaises(errors.OpExecError, self._Test, cons, False, "cluster.example.com") self.assertEqual(len(self._cmds), 1) self.assertEqual(len(self._output), 0) class TestConvertNicDiskModifications(unittest.TestCase): def test(self): fn = gnt_instance._ConvertNicDiskModifications self.assertEqual(fn([]), []) # Error cases self.assertRaises(errors.OpPrereqError, fn, [ (constants.DDM_REMOVE, { "param": "value", }), ]) self.assertRaises(errors.OpPrereqError, fn, [ (0, { constants.DDM_REMOVE: True, "param": "value", }), ]) self.assertRaises(errors.OpPrereqError, fn, [ (0, { constants.DDM_REMOVE: True, constants.DDM_ADD: True, }), ]) # Legacy calls for action in constants.DDMS_VALUES: self.assertEqual(fn([ (action, {}), ]), [ (action, -1, {}), ]) self.assertRaises(errors.OpPrereqError, fn, [ (0, { action: True, constants.DDM_MODIFY: True, }), ]) self.assertEqual(fn([ (constants.DDM_ADD, { constants.IDISK_SIZE: 1024, }), ]), [ (constants.DDM_ADD, -1, { constants.IDISK_SIZE: 1024, }), ]) # New-style calls self.assertEqual(fn([ (2, { constants.IDISK_MODE: constants.DISK_RDWR, }), ]), [ (constants.DDM_MODIFY, 2, { constants.IDISK_MODE: constants.DISK_RDWR, }), ]) self.assertEqual(fn([ (0, { constants.DDM_ADD: True, constants.IDISK_SIZE: 4096, }), ]), [ (constants.DDM_ADD, 0, { constants.IDISK_SIZE: 4096, }), ]) self.assertEqual(fn([ (-1, { constants.DDM_REMOVE: True, }), ]), [ (constants.DDM_REMOVE, -1, {}), ]) self.assertEqual(fn([ (-1, { constants.DDM_MODIFY: True, constants.IDISK_SIZE: 1024, }), ]), [ (constants.DDM_MODIFY, -1, { constants.IDISK_SIZE: 1024, }), ]) # Names and UUIDs self.assertEqual(fn([ ('name', { constants.IDISK_MODE: constants.DISK_RDWR, constants.IDISK_NAME: "rename", }), ]), [ (constants.DDM_MODIFY, 'name', { constants.IDISK_MODE: constants.DISK_RDWR, constants.IDISK_NAME: "rename", }), ]) self.assertEqual(fn([ ('024ef14d-4879-400e-8767-d61c051950bf', { constants.DDM_MODIFY: True, constants.IDISK_SIZE: 1024, constants.IDISK_NAME: "name", }), ]), [ (constants.DDM_MODIFY, '024ef14d-4879-400e-8767-d61c051950bf', { constants.IDISK_SIZE: 1024, constants.IDISK_NAME: "name", }), ]) self.assertEqual(fn([ ('name', { constants.DDM_REMOVE: True, }), ]), [ (constants.DDM_REMOVE, 'name', {}), ]) class TestParseDiskSizes(unittest.TestCase): def test(self): fn = gnt_instance._ParseDiskSizes self.assertEqual(fn([]), []) # Missing size parameter self.assertRaises(errors.OpPrereqError, fn, [ (constants.DDM_ADD, 0, {}), ]) # Converting disk size self.assertEqual(fn([ (constants.DDM_ADD, 11, { constants.IDISK_SIZE: "9G", }), ]), [ (constants.DDM_ADD, 11, { constants.IDISK_SIZE: 9216, }), ]) # No size parameter self.assertEqual(fn([ (constants.DDM_REMOVE, 11, { "other": "24M", }), ]), [ (constants.DDM_REMOVE, 11, { "other": "24M", }), ]) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.storage.drbd_unittest.py0000744000000000000000000004171412271422343022546 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2010, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the drbd module""" import os from ganeti import constants from ganeti import errors from ganeti.storage import drbd from ganeti.storage import drbd_info from ganeti.storage import drbd_cmdgen import testutils class TestDRBD8(testutils.GanetiTestCase): def testGetVersion(self): data = [ "version: 8.0.0 (api:76/proto:80)", "version: 8.0.12 (api:76/proto:86-91)", "version: 8.2.7 (api:88/proto:0-100)", "version: 8.3.7.49 (api:188/proto:13-191)", ] result = [ { "k_major": 8, "k_minor": 0, "k_point": 0, "api": 76, "proto": 80, }, { "k_major": 8, "k_minor": 0, "k_point": 12, "api": 76, "proto": 86, "proto2": "91", }, { "k_major": 8, "k_minor": 2, "k_point": 7, "api": 88, "proto": 0, "proto2": "100", }, { "k_major": 8, "k_minor": 3, "k_point": 7, "k_fix": "49", "api": 188, "proto": 13, "proto2": "191", } ] for d, r in zip(data, result): info = drbd.DRBD8Info.CreateFromLines([d]) self.assertEqual(info.GetVersion(), r) self.assertEqual(info.GetVersionString(), d.replace("version: ", "")) class TestDRBD8Runner(testutils.GanetiTestCase): """Testing case for drbd.DRBD8Dev""" @staticmethod def _has_disk(data, dname, mname, meta_index=0): """Check local disk corectness""" retval = ( "local_dev" in data and data["local_dev"] == dname and "meta_dev" in data and data["meta_dev"] == mname and ((meta_index is None and "meta_index" not in data) or ("meta_index" in data and data["meta_index"] == meta_index) ) ) return retval @staticmethod def _has_net(data, local, remote): """Check network connection parameters""" retval = ( "local_addr" in data and data["local_addr"] == local and "remote_addr" in data and data["remote_addr"] == remote ) return retval def testParser83Creation(self): """Test drbdsetup show parser creation""" drbd_info.DRBD83ShowInfo._GetShowParser() def testParser84Creation(self): """Test drbdsetup show parser creation""" drbd_info.DRBD84ShowInfo._GetShowParser() def testParser80(self): """Test drbdsetup show parser for disk and network version 8.0""" data = testutils.ReadTestData("bdev-drbd-8.0.txt") result = drbd_info.DRBD83ShowInfo.GetDevInfo(data) self.failUnless(self._has_disk(result, "/dev/xenvg/test.data", "/dev/xenvg/test.meta"), "Wrong local disk info") self.failUnless(self._has_net(result, ("192.0.2.1", 11000), ("192.0.2.2", 11000)), "Wrong network info (8.0.x)") def testParser83(self): """Test drbdsetup show parser for disk and network version 8.3""" data = testutils.ReadTestData("bdev-drbd-8.3.txt") result = drbd_info.DRBD83ShowInfo.GetDevInfo(data) self.failUnless(self._has_disk(result, "/dev/xenvg/test.data", "/dev/xenvg/test.meta"), "Wrong local disk info") self.failUnless(self._has_net(result, ("192.0.2.1", 11000), ("192.0.2.2", 11000)), "Wrong network info (8.3.x)") def testParser84(self): """Test drbdsetup show parser for disk and network version 8.4""" data = testutils.ReadTestData("bdev-drbd-8.4.txt") result = drbd_info.DRBD84ShowInfo.GetDevInfo(data) self.failUnless(self._has_disk(result, "/dev/xenvg/test.data", "/dev/xenvg/test.meta"), "Wrong local disk info") self.failUnless(self._has_net(result, ("192.0.2.1", 11000), ("192.0.2.2", 11000)), "Wrong network info (8.4.x)") def testParser84NoDiskParams(self): """Test drbdsetup show parser for 8.4 without disk params The missing disk parameters occur after re-attaching a local disk but before setting the disk params. """ data = testutils.ReadTestData("bdev-drbd-8.4-no-disk-params.txt") result = drbd_info.DRBD84ShowInfo.GetDevInfo(data) self.failUnless(self._has_disk(result, "/dev/xenvg/test.data", "/dev/xenvg/test.meta", meta_index=None), "Wrong local disk info") self.failUnless(self._has_net(result, ("192.0.2.1", 11000), ("192.0.2.2", 11000)), "Wrong network info (8.4.x)") def testParserNetIP4(self): """Test drbdsetup show parser for IPv4 network""" data = testutils.ReadTestData("bdev-drbd-net-ip4.txt") result = drbd_info.DRBD83ShowInfo.GetDevInfo(data) self.failUnless(("local_dev" not in result and "meta_dev" not in result and "meta_index" not in result), "Should not find local disk info") self.failUnless(self._has_net(result, ("192.0.2.1", 11002), ("192.0.2.2", 11002)), "Wrong network info (IPv4)") def testParserNetIP6(self): """Test drbdsetup show parser for IPv6 network""" data = testutils.ReadTestData("bdev-drbd-net-ip6.txt") result = drbd_info.DRBD83ShowInfo.GetDevInfo(data) self.failUnless(("local_dev" not in result and "meta_dev" not in result and "meta_index" not in result), "Should not find local disk info") self.failUnless(self._has_net(result, ("2001:db8:65::1", 11048), ("2001:db8:66::1", 11048)), "Wrong network info (IPv6)") def testParserDisk(self): """Test drbdsetup show parser for disk""" data = testutils.ReadTestData("bdev-drbd-disk.txt") result = drbd_info.DRBD83ShowInfo.GetDevInfo(data) self.failUnless(self._has_disk(result, "/dev/xenvg/test.data", "/dev/xenvg/test.meta"), "Wrong local disk info") self.failUnless(("local_addr" not in result and "remote_addr" not in result), "Should not find network info") def testBarriersOptions(self): """Test class method that generates drbdsetup options for disk barriers""" # Tests that should fail because of wrong version/options combinations should_fail = [ (8, 0, 12, "bfd", True), (8, 0, 12, "fd", False), (8, 0, 12, "b", True), (8, 2, 7, "bfd", True), (8, 2, 7, "b", True) ] for vmaj, vmin, vrel, opts, meta in should_fail: self.assertRaises(errors.BlockDeviceError, drbd_cmdgen.DRBD83CmdGenerator._ComputeDiskBarrierArgs, vmaj, vmin, vrel, opts, meta) # get the valid options from the frozenset(frozenset()) in constants. valid_options = [list(x)[0] for x in constants.DRBD_VALID_BARRIER_OPT] # Versions that do not support anything for vmaj, vmin, vrel in ((8, 0, 0), (8, 0, 11), (8, 2, 6)): for opts in valid_options: self.assertRaises( errors.BlockDeviceError, drbd_cmdgen.DRBD83CmdGenerator._ComputeDiskBarrierArgs, vmaj, vmin, vrel, opts, True) # Versions with partial support (testing only options that are supported) tests = [ (8, 0, 12, "n", False, []), (8, 0, 12, "n", True, ["--no-md-flushes"]), (8, 2, 7, "n", False, []), (8, 2, 7, "fd", False, ["--no-disk-flushes", "--no-disk-drain"]), (8, 0, 12, "n", True, ["--no-md-flushes"]), ] # Versions that support everything for vmaj, vmin, vrel in ((8, 3, 0), (8, 3, 12)): tests.append((vmaj, vmin, vrel, "bfd", True, ["--no-disk-barrier", "--no-disk-drain", "--no-disk-flushes", "--no-md-flushes"])) tests.append((vmaj, vmin, vrel, "n", False, [])) tests.append((vmaj, vmin, vrel, "b", True, ["--no-disk-barrier", "--no-md-flushes"])) tests.append((vmaj, vmin, vrel, "fd", False, ["--no-disk-flushes", "--no-disk-drain"])) tests.append((vmaj, vmin, vrel, "n", True, ["--no-md-flushes"])) # Test execution for test in tests: vmaj, vmin, vrel, disabled_barriers, disable_meta_flush, expected = test args = \ drbd_cmdgen.DRBD83CmdGenerator._ComputeDiskBarrierArgs( vmaj, vmin, vrel, disabled_barriers, disable_meta_flush) self.failUnless(set(args) == set(expected), "For test %s, got wrong results %s" % (test, args)) # Unsupported or invalid versions for vmaj, vmin, vrel in ((0, 7, 25), (9, 0, 0), (7, 0, 0), (8, 4, 0)): self.assertRaises(errors.BlockDeviceError, drbd_cmdgen.DRBD83CmdGenerator._ComputeDiskBarrierArgs, vmaj, vmin, vrel, "n", True) # Invalid options for option in ("", "c", "whatever", "nbdfc", "nf"): self.assertRaises(errors.BlockDeviceError, drbd_cmdgen.DRBD83CmdGenerator._ComputeDiskBarrierArgs, 8, 3, 11, option, True) class TestDRBD8Status(testutils.GanetiTestCase): """Testing case for DRBD8Dev /proc status""" def setUp(self): """Read in txt data""" testutils.GanetiTestCase.setUp(self) proc_data = testutils.TestDataFilename("proc_drbd8.txt") proc80e_data = testutils.TestDataFilename("proc_drbd80-emptyline.txt") proc83_data = testutils.TestDataFilename("proc_drbd83.txt") proc83_sync_data = testutils.TestDataFilename("proc_drbd83_sync.txt") proc83_sync_krnl_data = \ testutils.TestDataFilename("proc_drbd83_sync_krnl2.6.39.txt") proc84_data = testutils.TestDataFilename("proc_drbd84.txt") proc84_sync_data = testutils.TestDataFilename("proc_drbd84_sync.txt") self.proc80ev_data = \ testutils.TestDataFilename("proc_drbd80-emptyversion.txt") self.drbd_info = drbd.DRBD8Info.CreateFromFile(filename=proc_data) self.drbd_info80e = drbd.DRBD8Info.CreateFromFile(filename=proc80e_data) self.drbd_info83 = drbd.DRBD8Info.CreateFromFile(filename=proc83_data) self.drbd_info83_sync = \ drbd.DRBD8Info.CreateFromFile(filename=proc83_sync_data) self.drbd_info83_sync_krnl = \ drbd.DRBD8Info.CreateFromFile(filename=proc83_sync_krnl_data) self.drbd_info84 = drbd.DRBD8Info.CreateFromFile(filename=proc84_data) self.drbd_info84_sync = \ drbd.DRBD8Info.CreateFromFile(filename=proc84_sync_data) def testIOErrors(self): """Test handling of errors while reading the proc file.""" temp_file = self._CreateTempFile() os.unlink(temp_file) self.failUnlessRaises(errors.BlockDeviceError, drbd.DRBD8Info.CreateFromFile, filename=temp_file) def testHelper(self): """Test reading usermode_helper in /sys.""" sys_drbd_helper = testutils.TestDataFilename("sys_drbd_usermode_helper.txt") drbd_helper = drbd.DRBD8.GetUsermodeHelper(filename=sys_drbd_helper) self.failUnlessEqual(drbd_helper, "/bin/true") def testHelperIOErrors(self): """Test handling of errors while reading usermode_helper in /sys.""" temp_file = self._CreateTempFile() os.unlink(temp_file) self.failUnlessRaises(errors.BlockDeviceError, drbd.DRBD8.GetUsermodeHelper, filename=temp_file) def testMinorNotFound(self): """Test not-found-minor in /proc""" self.failUnless(not self.drbd_info.HasMinorStatus(9)) self.failUnless(not self.drbd_info83.HasMinorStatus(9)) self.failUnless(not self.drbd_info80e.HasMinorStatus(3)) def testLineNotMatch(self): """Test wrong line passed to drbd_info.DRBD8Status""" self.assertRaises(errors.BlockDeviceError, drbd_info.DRBD8Status, "foo") def testMinor0(self): """Test connected, primary device""" for info in [self.drbd_info, self.drbd_info83, self.drbd_info84]: stats = info.GetMinorStatus(0) self.failUnless(stats.is_in_use) self.failUnless(stats.is_connected and stats.is_primary and stats.peer_secondary and stats.is_disk_uptodate) def testMinor1(self): """Test connected, secondary device""" for info in [self.drbd_info, self.drbd_info83, self.drbd_info84]: stats = info.GetMinorStatus(1) self.failUnless(stats.is_in_use) self.failUnless(stats.is_connected and stats.is_secondary and stats.peer_primary and stats.is_disk_uptodate) def testMinor2(self): """Test unconfigured device""" for info in [self.drbd_info, self.drbd_info83, self.drbd_info80e, self.drbd_info84]: stats = info.GetMinorStatus(2) self.failIf(stats.is_in_use) def testMinor4(self): """Test WFconn device""" for info in [self.drbd_info, self.drbd_info83, self.drbd_info84]: stats = info.GetMinorStatus(4) self.failUnless(stats.is_in_use) self.failUnless(stats.is_wfconn and stats.is_primary and stats.rrole == "Unknown" and stats.is_disk_uptodate) def testMinor6(self): """Test diskless device""" for info in [self.drbd_info, self.drbd_info83, self.drbd_info84]: stats = info.GetMinorStatus(6) self.failUnless(stats.is_in_use) self.failUnless(stats.is_connected and stats.is_secondary and stats.peer_primary and stats.is_diskless) def testMinor8(self): """Test standalone device""" for info in [self.drbd_info, self.drbd_info83, self.drbd_info84]: stats = info.GetMinorStatus(8) self.failUnless(stats.is_in_use) self.failUnless(stats.is_standalone and stats.rrole == "Unknown" and stats.is_disk_uptodate) def testDRBD83SyncFine(self): stats = self.drbd_info83_sync.GetMinorStatus(3) self.failUnless(stats.is_in_resync) self.assertAlmostEqual(stats.sync_percent, 34.9) def testDRBD83SyncBroken(self): stats = self.drbd_info83_sync_krnl.GetMinorStatus(3) self.failUnless(stats.is_in_resync) self.assertAlmostEqual(stats.sync_percent, 2.4) def testDRBD84Sync(self): stats = self.drbd_info84_sync.GetMinorStatus(5) self.failUnless(stats.is_in_resync) self.assertAlmostEqual(stats.sync_percent, 68.5) def testDRBDEmptyVersion(self): self.assertRaises(errors.BlockDeviceError, drbd.DRBD8Info.CreateFromFile, filename=self.proc80ev_data) class TestDRBD8Construction(testutils.GanetiTestCase): def setUp(self): """Read in txt data""" testutils.GanetiTestCase.setUp(self) self.proc80_info = \ drbd_info.DRBD8Info.CreateFromFile( filename=testutils.TestDataFilename("proc_drbd8.txt")) self.proc83_info = \ drbd_info.DRBD8Info.CreateFromFile( filename=testutils.TestDataFilename("proc_drbd83.txt")) self.proc84_info = \ drbd_info.DRBD8Info.CreateFromFile( filename=testutils.TestDataFilename("proc_drbd84.txt")) self.test_unique_id = ("hosta.com", 123, "host2.com", 123, 0, "secret") @testutils.patch_object(drbd.DRBD8, "GetProcInfo") def testConstructionWith80Data(self, mock_create_from_file): mock_create_from_file.return_value = self.proc80_info inst = drbd.DRBD8Dev(self.test_unique_id, [], 123, {}) self.assertEqual(inst._show_info_cls, drbd_info.DRBD83ShowInfo) self.assertTrue(isinstance(inst._cmd_gen, drbd_cmdgen.DRBD83CmdGenerator)) @testutils.patch_object(drbd.DRBD8, "GetProcInfo") def testConstructionWith83Data(self, mock_create_from_file): mock_create_from_file.return_value = self.proc83_info inst = drbd.DRBD8Dev(self.test_unique_id, [], 123, {}) self.assertEqual(inst._show_info_cls, drbd_info.DRBD83ShowInfo) self.assertTrue(isinstance(inst._cmd_gen, drbd_cmdgen.DRBD83CmdGenerator)) @testutils.patch_object(drbd.DRBD8, "GetProcInfo") def testConstructionWith84Data(self, mock_create_from_file): mock_create_from_file.return_value = self.proc84_info inst = drbd.DRBD8Dev(self.test_unique_id, [], 123, {}) self.assertEqual(inst._show_info_cls, drbd_info.DRBD84ShowInfo) self.assertTrue(isinstance(inst._cmd_gen, drbd_cmdgen.DRBD84CmdGenerator)) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.ssconf_unittest.py0000744000000000000000000002120612267470014021460 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.ssconf""" import os import unittest import tempfile import shutil import errno from ganeti import utils from ganeti import constants from ganeti import errors from ganeti import ssconf import testutils import mock class TestReadSsconfFile(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def testReadDirectory(self): self.assertRaises(EnvironmentError, ssconf.ReadSsconfFile, self.tmpdir) def testNonExistantFile(self): testfile = utils.PathJoin(self.tmpdir, "does.not.exist") self.assertFalse(os.path.exists(testfile)) try: ssconf.ReadSsconfFile(testfile) except EnvironmentError, err: self.assertEqual(err.errno, errno.ENOENT) else: self.fail("Exception was not raised") def testEmptyFile(self): testfile = utils.PathJoin(self.tmpdir, "empty") utils.WriteFile(testfile, data="") self.assertEqual(ssconf.ReadSsconfFile(testfile), "") def testSingleLine(self): testfile = utils.PathJoin(self.tmpdir, "data") for nl in range(0, 10): utils.WriteFile(testfile, data="Hello World" + ("\n" * nl)) self.assertEqual(ssconf.ReadSsconfFile(testfile), "Hello World") def testExactlyMaxSize(self): testfile = utils.PathJoin(self.tmpdir, "data") data = "A" * ssconf._MAX_SIZE utils.WriteFile(testfile, data=data) self.assertEqual(os.path.getsize(testfile), ssconf._MAX_SIZE) self.assertEqual(ssconf.ReadSsconfFile(testfile), data) def testLargeFile(self): testfile = utils.PathJoin(self.tmpdir, "data") for size in [ssconf._MAX_SIZE + 1, ssconf._MAX_SIZE * 2]: utils.WriteFile(testfile, data="A" * size) self.assertTrue(os.path.getsize(testfile) > ssconf._MAX_SIZE) self.assertRaises(RuntimeError, ssconf.ReadSsconfFile, testfile) class TestSimpleStore(unittest.TestCase): def setUp(self): self._tmpdir = tempfile.mkdtemp() self.ssdir = utils.PathJoin(self._tmpdir, "files") lockfile = utils.PathJoin(self._tmpdir, "lock") os.mkdir(self.ssdir) self.sstore = ssconf.SimpleStore(cfg_location=self.ssdir, _lockfile=lockfile) def tearDown(self): shutil.rmtree(self._tmpdir) def _ReadSsFile(self, filename): return utils.ReadFile(utils.PathJoin(self.ssdir, "ssconf_%s" % filename)) def testInvalidKey(self): self.assertRaises(errors.ProgrammerError, self.sstore.KeyToFilename, "not a valid key") self.assertRaises(errors.ProgrammerError, self.sstore._ReadFile, "not a valid key") def testKeyToFilename(self): for key in ssconf._VALID_KEYS: result = self.sstore.KeyToFilename(key) self.assertTrue(utils.IsBelowDir(self.ssdir, result)) self.assertTrue(os.path.basename(result).startswith("ssconf_")) def testReadFileNonExistingFile(self): filename = self.sstore.KeyToFilename(constants.SS_CLUSTER_NAME) self.assertFalse(os.path.exists(filename)) try: self.sstore._ReadFile(constants.SS_CLUSTER_NAME) except errors.ConfigurationError, err: self.assertTrue(str(err).startswith("Can't read ssconf file")) else: self.fail("Exception was not raised") for default in ["", "Hello World", 0, 100]: self.assertFalse(os.path.exists(filename)) result = self.sstore._ReadFile(constants.SS_CLUSTER_NAME, default=default) self.assertEqual(result, default) def testReadFile(self): utils.WriteFile(self.sstore.KeyToFilename(constants.SS_CLUSTER_NAME), data="cluster.example.com") self.assertEqual(self.sstore._ReadFile(constants.SS_CLUSTER_NAME), "cluster.example.com") self.assertEqual(self.sstore._ReadFile(constants.SS_CLUSTER_NAME, default="something.example.com"), "cluster.example.com") def testReadAllNoFiles(self): self.assertEqual(self.sstore.ReadAll(), {}) def testReadAllSingleFile(self): utils.WriteFile(self.sstore.KeyToFilename(constants.SS_CLUSTER_NAME), data="cluster.example.com") self.assertEqual(self.sstore.ReadAll(), { constants.SS_CLUSTER_NAME: "cluster.example.com", }) def testWriteFiles(self): values = { constants.SS_CLUSTER_NAME: "cluster.example.com", constants.SS_CLUSTER_TAGS: "value\nwith\nnewlines\n", constants.SS_INSTANCE_LIST: "", } self.sstore.WriteFiles(values) self.assertEqual(sorted(os.listdir(self.ssdir)), sorted([ "ssconf_cluster_name", "ssconf_cluster_tags", "ssconf_instance_list", ])) self.assertEqual(self._ReadSsFile(constants.SS_CLUSTER_NAME), "cluster.example.com\n") self.assertEqual(self._ReadSsFile(constants.SS_CLUSTER_TAGS), "value\nwith\nnewlines\n") self.assertEqual(self._ReadSsFile(constants.SS_INSTANCE_LIST), "") def testWriteFilesUnknownKey(self): values = { "unknown key": "value", } self.assertRaises(errors.ProgrammerError, self.sstore.WriteFiles, values, dry_run=True) self.assertEqual(os.listdir(self.ssdir), []) def testWriteFilesDryRun(self): values = { constants.SS_CLUSTER_NAME: "cluster.example.com", } self.sstore.WriteFiles(values, dry_run=True) self.assertEqual(os.listdir(self.ssdir), []) def testWriteFilesNoValues(self): for dry_run in [False, True]: self.sstore.WriteFiles({}, dry_run=dry_run) self.assertEqual(os.listdir(self.ssdir), []) def testWriteFilesTooLong(self): values = { constants.SS_INSTANCE_LIST: "A" * ssconf._MAX_SIZE, } for dry_run in [False, True]: try: self.sstore.WriteFiles(values, dry_run=dry_run) except errors.ConfigurationError, err: self.assertTrue(str(err).startswith("Value 'instance_list' has")) else: self.fail("Exception was not raised") self.assertEqual(os.listdir(self.ssdir), []) def testGetHvparamsForHypervisor(self): hvparams = [("a", "A"), ("b", "B"), ("c", "C")] ssconf_file_content = '\n'.join("%s=%s" % (key, value) for (key, value) in hvparams) self.sstore._ReadFile = mock.Mock(return_value=ssconf_file_content) result = self.sstore.GetHvparamsForHypervisor("foo") for (key, value) in hvparams: self.assertTrue(key in result) self.assertEqual(value, result[key]) class TestVerifyClusterName(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def testMissingFile(self): tmploc = utils.PathJoin(self.tmpdir, "does-not-exist") ssconf.VerifyClusterName(NotImplemented, _cfg_location=tmploc) def testMatchingName(self): tmpfile = utils.PathJoin(self.tmpdir, "ssconf_cluster_name") for content in ["cluster.example.com", "cluster.example.com\n\n"]: utils.WriteFile(tmpfile, data=content) ssconf.VerifyClusterName("cluster.example.com", _cfg_location=self.tmpdir) def testNameMismatch(self): tmpfile = utils.PathJoin(self.tmpdir, "ssconf_cluster_name") for content in ["something.example.com", "foobar\n\ncluster.example.com"]: utils.WriteFile(tmpfile, data=content) self.assertRaises(errors.GenericError, ssconf.VerifyClusterName, "cluster.example.com", _cfg_location=self.tmpdir) class TestVerifyKeys(unittest.TestCase): def testNoKeys(self): ssconf.VerifyKeys({}) def testValidKeys(self): ssconf.VerifyKeys(ssconf._VALID_KEYS) for key in ssconf._VALID_KEYS: ssconf.VerifyKeys([key]) def testInvalidKeys(self): for key in ["", ".", " ", "foo", "bar", "HelloWorld"]: self.assertRaises(errors.GenericError, ssconf.VerifyKeys, [key]) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/import-export_unittest.bash0000744000000000000000000002653212244641676022055 0ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2010, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. set -e set -o pipefail export PYTHON=${PYTHON:=python} impexpd="$PYTHON daemons/import-export -d" err() { echo "$@" echo 'Aborting' show_output exit 1 } show_output() { if [[ -s "$gencert_output" ]]; then echo echo 'Generating certificates:' cat $gencert_output fi if [[ -s "$dst_output" ]]; then echo echo 'Import output:' cat $dst_output fi if [[ -s "$src_output" ]]; then echo echo 'Export output:' cat $src_output fi } checkpids() { local result=0 # Unlike combining the "wait" commands using || or &&, this ensures we # actually wait for all PIDs. for pid in "$@"; do if ! wait $pid; then result=1 fi done return $result } get_testpath() { echo "${TOP_SRCDIR:-.}/test" } get_testfile() { echo "$(get_testpath)/data/$1" } upto() { echo "$(date '+%F %T'):" "$@" '...' } statusdir=$(mktemp -d) trap "rm -rf $statusdir" EXIT gencert_output=$statusdir/gencert.output src_statusfile=$statusdir/src.status src_output=$statusdir/src.output src_x509=$statusdir/src.pem dst_statusfile=$statusdir/dst.status dst_output=$statusdir/dst.output dst_x509=$statusdir/dst.pem other_x509=$statusdir/other.pem testdata=$statusdir/data1 largetestdata=$statusdir/data2 upto 'Command line parameter tests' $impexpd >/dev/null 2>&1 && err "daemon-util succeeded without parameters" $impexpd foo bar baz moo boo >/dev/null 2>&1 && err "daemon-util succeeded with wrong parameters" $impexpd $src_statusfile >/dev/null 2>&1 && err "daemon-util succeeded with insufficient parameters" $impexpd $src_statusfile invalidmode >/dev/null 2>&1 && err "daemon-util succeeded with invalid mode" for mode in import export; do $impexpd $src_statusfile $mode --compression=rot13 >/dev/null 2>&1 && err "daemon-util succeeded with invalid compression" for host in '' ' ' ' s p a c e' ... , foo.example.net... \ 'some"evil"name' 'x\ny\tmoo'; do $impexpd $src_statusfile $mode --host="$host" >/dev/null 2>&1 && err "daemon-util succeeded with invalid host '$host'" done for port in '' ' ' -1234 'some ` port " here'; do $impexpd $src_statusfile $mode --port="$port" >/dev/null 2>&1 && err "daemon-util succeeded with invalid port '$port'" done for magic in '' ' ' 'this`is' 'invalid!magic' 'he"re'; do $impexpd $src_statusfile $mode --magic="$magic" >/dev/null 2>&1 && err "daemon-util succeeded with invalid magic '$magic'" done done upto 'Generate test data' cat $(get_testfile proc_drbd8.txt) $(get_testfile cert1.pem) > $testdata # Generate about 7.5 MB of test data { tmp="$(<$testdata)" for (( i=0; i < 100; ++i )); do echo "$tmp $tmp $tmp $tmp $tmp $tmp" done dd if=/dev/zero bs=1024 count=4096 2>/dev/null for (( i=0; i < 100; ++i )); do echo "$tmp $tmp $tmp $tmp $tmp $tmp" done } > $largetestdata impexpd_helper() { $PYTHON $(get_testpath)/py/import-export_unittest-helper "$@" } start_test() { upto "$@" rm -f $src_statusfile $dst_output $dst_statusfile $dst_output rm -f $gencert_output imppid= exppid= cmd_prefix= cmd_suffix= connect_timeout=30 connect_retries=1 compress=gzip magic= } wait_import_ready() { # Wait for listening port impexpd_helper $dst_statusfile listen-port } do_export() { local port=$1 $impexpd $src_statusfile export --bind=127.0.0.1 \ --host=127.0.0.1 --port=$port \ --key=$src_x509 --cert=$src_x509 --ca=$dst_x509 \ --cmd-prefix="$cmd_prefix" --cmd-suffix="$cmd_suffix" \ --connect-timeout=$connect_timeout \ --connect-retries=$connect_retries \ --compress=$compress ${magic:+--magic="$magic"} } do_import() { $impexpd $dst_statusfile import --bind=127.0.0.1 \ --host=127.0.0.1 \ --key=$dst_x509 --cert=$dst_x509 --ca=$src_x509 \ --cmd-prefix="$cmd_prefix" --cmd-suffix="$cmd_suffix" \ --connect-timeout=$connect_timeout \ --connect-retries=$connect_retries \ --compress=$compress ${magic:+--magic="$magic"} } upto 'Generate X509 certificates and keys' impexpd_helper $src_x509 gencert 2>$gencert_output & srccertpid=$! impexpd_helper $dst_x509 gencert 2>$gencert_output & dstcertpid=$! impexpd_helper $other_x509 gencert 2>$gencert_output & othercertpid=$! checkpids $srccertpid $dstcertpid $othercertpid || \ err 'Failed to generate certificates' start_test 'Normal case' do_import > $statusdir/recv1 2>$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then do_export $port < $testdata >>$src_output 2>&1 & exppid=$! fi checkpids $exppid $imppid || err 'An error occurred' cmp $testdata $statusdir/recv1 || err 'Received data does not match input' start_test 'Export using wrong CA' # Setting lower timeout to not wait for too long connect_timeout=1 do_import &>$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then : | dst_x509=$other_x509 do_export $port >>$src_output 2>&1 & exppid=$! fi checkpids $exppid $imppid && err 'Export did not fail when using wrong CA' start_test 'Import using wrong CA' # Setting lower timeout to not wait for too long src_x509=$other_x509 connect_timeout=1 do_import &>$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then : | do_export $port >>$src_output 2>&1 & exppid=$! fi checkpids $exppid $imppid && err 'Import did not fail when using wrong CA' start_test 'Suffix command on import' cmd_suffix="| cksum > $statusdir/recv2" do_import &>$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then do_export $port < $testdata >>$src_output 2>&1 & exppid=$! fi checkpids $exppid $imppid || err 'Testing additional commands failed' cmp $statusdir/recv2 <(cksum < $testdata) || \ err 'Checksum of received data does not match' start_test 'Prefix command on export' do_import > $statusdir/recv3 2>$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then cmd_prefix='cksum |' do_export $port <$testdata >>$src_output 2>&1 & exppid=$! fi checkpids $exppid $imppid || err 'Testing additional commands failed' cmp $statusdir/recv3 <(cksum < $testdata) || \ err 'Received checksum does not match' start_test 'Failing prefix command on export' : | cmd_prefix='exit 1;' do_export 0 &>$src_output & exppid=$! checkpids $exppid && err 'Prefix command on export did not fail when it should' start_test 'Failing suffix command on export' do_import >&$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then : | cmd_suffix='| exit 1' do_export $port >>$src_output 2>&1 & exppid=$! fi checkpids $imppid $exppid && \ err 'Suffix command on export did not fail when it should' start_test 'Failing prefix command on import' cmd_prefix='exit 1;' do_import &>$dst_output & imppid=$! checkpids $imppid && err 'Prefix command on import did not fail when it should' start_test 'Failing suffix command on import' cmd_suffix='| exit 1' do_import &>$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then : | do_export $port >>$src_output 2>&1 & exppid=$! fi checkpids $imppid $exppid && \ err 'Suffix command on import did not fail when it should' start_test 'Listen timeout A' # Setting lower timeout to not wait too long (there won't be anything trying to # connect) connect_timeout=1 do_import &>$dst_output & imppid=$! checkpids $imppid && \ err 'Listening with timeout did not fail when it should' start_test 'Listen timeout B' do_import &>$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then { sleep 1; : | do_export $port; } >>$src_output 2>&1 & exppid=$! fi checkpids $exppid $imppid || \ err 'Listening with timeout failed when it should not' start_test 'Connect timeout' # Setting lower timeout as nothing will be listening on port 0 : | connect_timeout=1 do_export 0 &>$src_output & exppid=$! checkpids $exppid && err 'Connection did not time out when it should' start_test 'No compression' compress=none do_import > $statusdir/recv-nocompr 2>$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then compress=none do_export $port < $testdata >>$src_output 2>&1 & exppid=$! fi checkpids $exppid $imppid || err 'An error occurred' cmp $testdata $statusdir/recv-nocompr || \ err 'Received data does not match input' start_test 'Compression mismatch A' compress=none do_import > $statusdir/recv-miscompr 2>$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then compress=gzip do_export $port < $testdata >>$src_output 2>&1 & exppid=$! fi checkpids $exppid $imppid || err 'An error occurred' cmp -s $testdata $statusdir/recv-miscompr && \ err 'Received data matches input when it should not' start_test 'Compression mismatch B' compress=gzip do_import > $statusdir/recv-miscompr2 2>$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then compress=none do_export $port < $testdata >>$src_output 2>&1 & exppid=$! fi checkpids $exppid $imppid && err 'Did not fail when it should' cmp -s $testdata $statusdir/recv-miscompr2 && \ err 'Received data matches input when it should not' start_test 'Magic without compression' compress=none magic=MagicValue13582 \ do_import > $statusdir/recv-magic1 2>$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then compress=none magic=MagicValue13582 \ do_export $port < $testdata >>$src_output 2>&1 & exppid=$! fi checkpids $exppid $imppid || err 'An error occurred' cmp $testdata $statusdir/recv-magic1 || err 'Received data does not match input' start_test 'Magic with compression' compress=gzip magic=yzD1FBH7Iw \ do_import > $statusdir/recv-magic2 2>$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then compress=gzip magic=yzD1FBH7Iw \ do_export $port < $testdata >>$src_output 2>&1 & exppid=$! fi checkpids $exppid $imppid || err 'An error occurred' cmp $testdata $statusdir/recv-magic2 || err 'Received data does not match input' start_test 'Magic mismatch A (same length)' magic=h0tmIKXK do_import > $statusdir/recv-magic3 2>$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then magic=bo6m9uAw do_export $port < $testdata >>$src_output 2>&1 & exppid=$! fi checkpids $exppid $imppid && err 'Did not fail when it should' start_test 'Magic mismatch B' magic=AUxVEWXVr5GK do_import > $statusdir/recv-magic4 2>$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then magic=74RiP9KP do_export $port < $testdata >>$src_output 2>&1 & exppid=$! fi checkpids $exppid $imppid && err 'Did not fail when it should' start_test 'Large transfer' do_import > $statusdir/recv-large 2>$dst_output & imppid=$! if port=$(wait_import_ready 2>$src_output); then do_export $port < $largetestdata >>$src_output 2>&1 & exppid=$! fi checkpids $exppid $imppid || err 'An error occurred' cmp $largetestdata $statusdir/recv-large || \ err 'Received data does not match input' exit 0 ganeti-2.9.3/test/py/lockperf.py0000744000000000000000000000702712244641676016603 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing lock performance""" import os import sys import time import optparse import threading import resource from ganeti import locking def ParseOptions(): """Parses the command line options. In case of command line errors, it will show the usage and exit the program. @return: the options in a tuple """ parser = optparse.OptionParser() parser.add_option("-t", dest="thread_count", default=1, type="int", help="Number of threads", metavar="NUM") parser.add_option("-d", dest="duration", default=5, type="float", help="Duration", metavar="SECS") (opts, args) = parser.parse_args() if opts.thread_count < 1: parser.error("Number of threads must be at least 1") return (opts, args) class State: def __init__(self, thread_count): """Initializes this class. """ self.verify = [0 for _ in range(thread_count)] self.counts = [0 for _ in range(thread_count)] self.total_count = 0 def _Counter(lock, state, me): """Thread function for acquiring locks. """ counts = state.counts verify = state.verify while True: lock.acquire() try: verify[me] = 1 counts[me] += 1 state.total_count += 1 if state.total_count % 1000 == 0: sys.stdout.write(" %8d\r" % state.total_count) sys.stdout.flush() if sum(verify) != 1: print "Inconsistent state!" os._exit(1) # pylint: disable=W0212 verify[me] = 0 finally: lock.release() def main(): (opts, _) = ParseOptions() lock = locking.SharedLock("TestLock") state = State(opts.thread_count) lock.acquire(shared=0) try: for i in range(opts.thread_count): t = threading.Thread(target=_Counter, args=(lock, state, i)) t.setDaemon(True) t.start() start = time.clock() finally: lock.release() while True: if (time.clock() - start) > opts.duration: break time.sleep(0.1) # Make sure we get a consistent view lock.acquire(shared=0) lock_cputime = time.clock() - start res = resource.getrusage(resource.RUSAGE_SELF) print "Total number of acquisitions: %s" % state.total_count print "Per-thread acquisitions:" for (i, count) in enumerate(state.counts): print (" Thread %s: %d (%0.1f%%)" % (i, count, (100.0 * count / state.total_count))) print "Benchmark CPU time: %0.3fs" % lock_cputime print ("Average time per lock acquisition: %0.5fms" % (1000.0 * lock_cputime / state.total_count)) print "Process:" print " User time: %0.3fs" % res.ru_utime print " System time: %0.3fs" % res.ru_stime print " Total time: %0.3fs" % (res.ru_utime + res.ru_stime) # Exit directly without attempting to clean up threads os._exit(0) # pylint: disable=W0212 if __name__ == "__main__": main() ganeti-2.9.3/test/py/ganeti.rapi.client_unittest.py0000744000000000000000000016333112271422343022400 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the RAPI client module""" import unittest import warnings import pycurl from ganeti import opcodes from ganeti import constants from ganeti import http from ganeti import serializer from ganeti import utils from ganeti import query from ganeti import objects from ganeti import rapi from ganeti import errors import ganeti.rapi.testutils from ganeti.rapi import connector from ganeti.rapi import rlib2 from ganeti.rapi import client import testutils # List of resource handlers which aren't used by the RAPI client _KNOWN_UNUSED = set([ rlib2.R_root, rlib2.R_2, ]) # Global variable for collecting used handlers _used_handlers = None class RapiMock(object): def __init__(self): self._mapper = connector.Mapper() self._responses = [] self._last_handler = None self._last_req_data = None def ResetResponses(self): del self._responses[:] def AddResponse(self, response, code=200): self._responses.insert(0, (code, response)) def CountPending(self): return len(self._responses) def GetLastHandler(self): return self._last_handler def GetLastRequestData(self): return self._last_req_data def FetchResponse(self, path, method, headers, request_body): self._last_req_data = request_body try: (handler_cls, items, args) = self._mapper.getController(path) # Record handler as used _used_handlers.add(handler_cls) self._last_handler = handler_cls(items, args, None) if not hasattr(self._last_handler, method.upper()): raise http.HttpNotImplemented(message="Method not implemented") except http.HttpException, ex: code = ex.code response = ex.message else: if not self._responses: raise Exception("No responses") (code, response) = self._responses.pop() return (code, NotImplemented, response) class TestConstants(unittest.TestCase): def test(self): self.assertEqual(client.GANETI_RAPI_PORT, constants.DEFAULT_RAPI_PORT) self.assertEqual(client.GANETI_RAPI_VERSION, constants.RAPI_VERSION) self.assertEqual(client.HTTP_APP_JSON, http.HTTP_APP_JSON) self.assertEqual(client._REQ_DATA_VERSION_FIELD, rlib2._REQ_DATA_VERSION) self.assertEqual(client.JOB_STATUS_QUEUED, constants.JOB_STATUS_QUEUED) self.assertEqual(client.JOB_STATUS_WAITING, constants.JOB_STATUS_WAITING) self.assertEqual(client.JOB_STATUS_CANCELING, constants.JOB_STATUS_CANCELING) self.assertEqual(client.JOB_STATUS_RUNNING, constants.JOB_STATUS_RUNNING) self.assertEqual(client.JOB_STATUS_CANCELED, constants.JOB_STATUS_CANCELED) self.assertEqual(client.JOB_STATUS_SUCCESS, constants.JOB_STATUS_SUCCESS) self.assertEqual(client.JOB_STATUS_ERROR, constants.JOB_STATUS_ERROR) self.assertEqual(client.JOB_STATUS_PENDING, constants.JOBS_PENDING) self.assertEqual(client.JOB_STATUS_FINALIZED, constants.JOBS_FINALIZED) self.assertEqual(client.JOB_STATUS_ALL, constants.JOB_STATUS_ALL) # Node evacuation self.assertEqual(client.NODE_EVAC_PRI, constants.NODE_EVAC_PRI) self.assertEqual(client.NODE_EVAC_SEC, constants.NODE_EVAC_SEC) self.assertEqual(client.NODE_EVAC_ALL, constants.NODE_EVAC_ALL) # Legacy name self.assertEqual(client.JOB_STATUS_WAITLOCK, constants.JOB_STATUS_WAITING) # RAPI feature strings self.assertEqual(client._INST_CREATE_REQV1, rlib2._INST_CREATE_REQV1) self.assertEqual(client.INST_CREATE_REQV1, rlib2._INST_CREATE_REQV1) self.assertEqual(client._INST_REINSTALL_REQV1, rlib2._INST_REINSTALL_REQV1) self.assertEqual(client.INST_REINSTALL_REQV1, rlib2._INST_REINSTALL_REQV1) self.assertEqual(client._NODE_MIGRATE_REQV1, rlib2._NODE_MIGRATE_REQV1) self.assertEqual(client.NODE_MIGRATE_REQV1, rlib2._NODE_MIGRATE_REQV1) self.assertEqual(client._NODE_EVAC_RES1, rlib2._NODE_EVAC_RES1) self.assertEqual(client.NODE_EVAC_RES1, rlib2._NODE_EVAC_RES1) def testErrors(self): self.assertEqual(client.ECODE_ALL, errors.ECODE_ALL) # Make sure all error codes are in both RAPI client and errors module for name in filter(lambda s: (s.startswith("ECODE_") and s != "ECODE_ALL"), dir(client)): value = getattr(client, name) self.assertEqual(value, getattr(errors, name)) self.assertTrue(value in client.ECODE_ALL) self.assertTrue(value in errors.ECODE_ALL) class RapiMockTest(unittest.TestCase): def test404(self): (code, _, body) = RapiMock().FetchResponse("/foo", "GET", None, None) self.assertEqual(code, 404) self.assertTrue(body is None) def test501(self): (code, _, body) = RapiMock().FetchResponse("/version", "POST", None, None) self.assertEqual(code, 501) self.assertEqual(body, "Method not implemented") def test200(self): rapi = RapiMock() rapi.AddResponse("2") (code, _, response) = rapi.FetchResponse("/version", "GET", None, None) self.assertEqual(200, code) self.assertEqual("2", response) self.failUnless(isinstance(rapi.GetLastHandler(), rlib2.R_version)) def _FakeNoSslPycurlVersion(): # Note: incomplete version tuple return (3, "7.16.0", 462848, "mysystem", 1581, None, 0) def _FakeFancySslPycurlVersion(): # Note: incomplete version tuple return (3, "7.16.0", 462848, "mysystem", 1581, "FancySSL/1.2.3", 0) def _FakeOpenSslPycurlVersion(): # Note: incomplete version tuple return (2, "7.15.5", 462597, "othersystem", 668, "OpenSSL/0.9.8c", 0) def _FakeGnuTlsPycurlVersion(): # Note: incomplete version tuple return (3, "7.18.0", 463360, "somesystem", 1581, "GnuTLS/2.0.4", 0) class TestExtendedConfig(unittest.TestCase): def testAuth(self): cl = client.GanetiRapiClient("master.example.com", username="user", password="pw", curl_factory=lambda: rapi.testutils.FakeCurl(RapiMock())) curl = cl._CreateCurl() self.assertEqual(curl.getopt(pycurl.HTTPAUTH), pycurl.HTTPAUTH_BASIC) self.assertEqual(curl.getopt(pycurl.USERPWD), "user:pw") def testInvalidAuth(self): # No username self.assertRaises(client.Error, client.GanetiRapiClient, "master-a.example.com", password="pw") # No password self.assertRaises(client.Error, client.GanetiRapiClient, "master-b.example.com", username="user") def testCertVerifyInvalidCombinations(self): self.assertRaises(client.Error, client.GenericCurlConfig, use_curl_cabundle=True, cafile="cert1.pem") self.assertRaises(client.Error, client.GenericCurlConfig, use_curl_cabundle=True, capath="certs/") self.assertRaises(client.Error, client.GenericCurlConfig, use_curl_cabundle=True, cafile="cert1.pem", capath="certs/") def testProxySignalVerifyHostname(self): for use_gnutls in [False, True]: if use_gnutls: pcverfn = _FakeGnuTlsPycurlVersion else: pcverfn = _FakeOpenSslPycurlVersion for proxy in ["", "http://127.0.0.1:1234"]: for use_signal in [False, True]: for verify_hostname in [False, True]: cfgfn = client.GenericCurlConfig(proxy=proxy, use_signal=use_signal, verify_hostname=verify_hostname, _pycurl_version_fn=pcverfn) curl_factory = lambda: rapi.testutils.FakeCurl(RapiMock()) cl = client.GanetiRapiClient("master.example.com", curl_config_fn=cfgfn, curl_factory=curl_factory) curl = cl._CreateCurl() self.assertEqual(curl.getopt(pycurl.PROXY), proxy) self.assertEqual(curl.getopt(pycurl.NOSIGNAL), not use_signal) if verify_hostname: self.assertEqual(curl.getopt(pycurl.SSL_VERIFYHOST), 2) else: self.assertEqual(curl.getopt(pycurl.SSL_VERIFYHOST), 0) def testNoCertVerify(self): cfgfn = client.GenericCurlConfig() curl_factory = lambda: rapi.testutils.FakeCurl(RapiMock()) cl = client.GanetiRapiClient("master.example.com", curl_config_fn=cfgfn, curl_factory=curl_factory) curl = cl._CreateCurl() self.assertFalse(curl.getopt(pycurl.SSL_VERIFYPEER)) self.assertFalse(curl.getopt(pycurl.CAINFO)) self.assertFalse(curl.getopt(pycurl.CAPATH)) def testCertVerifyCurlBundle(self): cfgfn = client.GenericCurlConfig(use_curl_cabundle=True) curl_factory = lambda: rapi.testutils.FakeCurl(RapiMock()) cl = client.GanetiRapiClient("master.example.com", curl_config_fn=cfgfn, curl_factory=curl_factory) curl = cl._CreateCurl() self.assert_(curl.getopt(pycurl.SSL_VERIFYPEER)) self.assertFalse(curl.getopt(pycurl.CAINFO)) self.assertFalse(curl.getopt(pycurl.CAPATH)) def testCertVerifyCafile(self): mycert = "/tmp/some/UNUSED/cert/file.pem" cfgfn = client.GenericCurlConfig(cafile=mycert) curl_factory = lambda: rapi.testutils.FakeCurl(RapiMock()) cl = client.GanetiRapiClient("master.example.com", curl_config_fn=cfgfn, curl_factory=curl_factory) curl = cl._CreateCurl() self.assert_(curl.getopt(pycurl.SSL_VERIFYPEER)) self.assertEqual(curl.getopt(pycurl.CAINFO), mycert) self.assertFalse(curl.getopt(pycurl.CAPATH)) def testCertVerifyCapath(self): certdir = "/tmp/some/UNUSED/cert/directory" pcverfn = _FakeOpenSslPycurlVersion cfgfn = client.GenericCurlConfig(capath=certdir, _pycurl_version_fn=pcverfn) curl_factory = lambda: rapi.testutils.FakeCurl(RapiMock()) cl = client.GanetiRapiClient("master.example.com", curl_config_fn=cfgfn, curl_factory=curl_factory) curl = cl._CreateCurl() self.assert_(curl.getopt(pycurl.SSL_VERIFYPEER)) self.assertEqual(curl.getopt(pycurl.CAPATH), certdir) self.assertFalse(curl.getopt(pycurl.CAINFO)) def testCertVerifyCapathGnuTls(self): certdir = "/tmp/some/UNUSED/cert/directory" pcverfn = _FakeGnuTlsPycurlVersion cfgfn = client.GenericCurlConfig(capath=certdir, _pycurl_version_fn=pcverfn) curl_factory = lambda: rapi.testutils.FakeCurl(RapiMock()) cl = client.GanetiRapiClient("master.example.com", curl_config_fn=cfgfn, curl_factory=curl_factory) self.assertRaises(client.Error, cl._CreateCurl) def testCertVerifyNoSsl(self): certdir = "/tmp/some/UNUSED/cert/directory" pcverfn = _FakeNoSslPycurlVersion cfgfn = client.GenericCurlConfig(capath=certdir, _pycurl_version_fn=pcverfn) curl_factory = lambda: rapi.testutils.FakeCurl(RapiMock()) cl = client.GanetiRapiClient("master.example.com", curl_config_fn=cfgfn, curl_factory=curl_factory) self.assertRaises(client.Error, cl._CreateCurl) def testCertVerifyFancySsl(self): certdir = "/tmp/some/UNUSED/cert/directory" pcverfn = _FakeFancySslPycurlVersion cfgfn = client.GenericCurlConfig(capath=certdir, _pycurl_version_fn=pcverfn) curl_factory = lambda: rapi.testutils.FakeCurl(RapiMock()) cl = client.GanetiRapiClient("master.example.com", curl_config_fn=cfgfn, curl_factory=curl_factory) self.assertRaises(NotImplementedError, cl._CreateCurl) def testCertVerifyCapath(self): for connect_timeout in [None, 1, 5, 10, 30, 60, 300]: for timeout in [None, 1, 30, 60, 3600, 24 * 3600]: cfgfn = client.GenericCurlConfig(connect_timeout=connect_timeout, timeout=timeout) curl_factory = lambda: rapi.testutils.FakeCurl(RapiMock()) cl = client.GanetiRapiClient("master.example.com", curl_config_fn=cfgfn, curl_factory=curl_factory) curl = cl._CreateCurl() self.assertEqual(curl.getopt(pycurl.CONNECTTIMEOUT), connect_timeout) self.assertEqual(curl.getopt(pycurl.TIMEOUT), timeout) class GanetiRapiClientTests(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) self.rapi = RapiMock() self.curl = rapi.testutils.FakeCurl(self.rapi) self.client = client.GanetiRapiClient("master.example.com", curl_factory=lambda: self.curl) def assertHandler(self, handler_cls): self.failUnless(isinstance(self.rapi.GetLastHandler(), handler_cls)) def assertQuery(self, key, value): self.assertEqual(value, self.rapi.GetLastHandler().queryargs.get(key, None)) def assertItems(self, items): self.assertEqual(items, self.rapi.GetLastHandler().items) def assertBulk(self): self.assertTrue(self.rapi.GetLastHandler().useBulk()) def assertDryRun(self): self.assertTrue(self.rapi.GetLastHandler().dryRun()) def assertUseForce(self): self.assertTrue(self.rapi.GetLastHandler().useForce()) def testEncodeQuery(self): query = [ ("a", None), ("b", 1), ("c", 2), ("d", "Foo"), ("e", True), ] expected = [ ("a", ""), ("b", 1), ("c", 2), ("d", "Foo"), ("e", 1), ] self.assertEqualValues(self.client._EncodeQuery(query), expected) # invalid types for i in [[1, 2, 3], {"moo": "boo"}, (1, 2, 3)]: self.assertRaises(ValueError, self.client._EncodeQuery, [("x", i)]) def testCurlSettings(self): self.rapi.AddResponse("2") self.assertEqual(2, self.client.GetVersion()) self.assertHandler(rlib2.R_version) # Signals should be disabled by default self.assert_(self.curl.getopt(pycurl.NOSIGNAL)) # No auth and no proxy self.assertFalse(self.curl.getopt(pycurl.USERPWD)) self.assert_(self.curl.getopt(pycurl.PROXY) is None) # Content-type is required for requests headers = self.curl.getopt(pycurl.HTTPHEADER) self.assert_("Content-type: application/json" in headers) def testHttpError(self): self.rapi.AddResponse(None, code=404) try: self.client.GetJobStatus(15140) except client.GanetiApiError, err: self.assertEqual(err.code, 404) else: self.fail("Didn't raise exception") def testGetVersion(self): self.rapi.AddResponse("2") self.assertEqual(2, self.client.GetVersion()) self.assertHandler(rlib2.R_version) def testGetFeatures(self): for features in [[], ["foo", "bar", "baz"]]: self.rapi.AddResponse(serializer.DumpJson(features)) self.assertEqual(features, self.client.GetFeatures()) self.assertHandler(rlib2.R_2_features) def testGetFeaturesNotFound(self): self.rapi.AddResponse(None, code=404) self.assertEqual([], self.client.GetFeatures()) def testGetOperatingSystems(self): self.rapi.AddResponse("[\"beos\"]") self.assertEqual(["beos"], self.client.GetOperatingSystems()) self.assertHandler(rlib2.R_2_os) def testGetClusterTags(self): self.rapi.AddResponse("[\"tag\"]") self.assertEqual(["tag"], self.client.GetClusterTags()) self.assertHandler(rlib2.R_2_tags) def testAddClusterTags(self): self.rapi.AddResponse("1234") self.assertEqual(1234, self.client.AddClusterTags(["awesome"], dry_run=True)) self.assertHandler(rlib2.R_2_tags) self.assertDryRun() self.assertQuery("tag", ["awesome"]) def testDeleteClusterTags(self): self.rapi.AddResponse("5107") self.assertEqual(5107, self.client.DeleteClusterTags(["awesome"], dry_run=True)) self.assertHandler(rlib2.R_2_tags) self.assertDryRun() self.assertQuery("tag", ["awesome"]) def testGetInfo(self): self.rapi.AddResponse("{}") self.assertEqual({}, self.client.GetInfo()) self.assertHandler(rlib2.R_2_info) def testGetInstances(self): self.rapi.AddResponse("[]") self.assertEqual([], self.client.GetInstances(bulk=True)) self.assertHandler(rlib2.R_2_instances) self.assertBulk() def testGetInstance(self): self.rapi.AddResponse("[]") self.assertEqual([], self.client.GetInstance("instance")) self.assertHandler(rlib2.R_2_instances_name) self.assertItems(["instance"]) def testGetInstanceInfo(self): self.rapi.AddResponse("21291") self.assertEqual(21291, self.client.GetInstanceInfo("inst3")) self.assertHandler(rlib2.R_2_instances_name_info) self.assertItems(["inst3"]) self.assertQuery("static", None) self.rapi.AddResponse("3428") self.assertEqual(3428, self.client.GetInstanceInfo("inst31", static=False)) self.assertHandler(rlib2.R_2_instances_name_info) self.assertItems(["inst31"]) self.assertQuery("static", ["0"]) self.rapi.AddResponse("15665") self.assertEqual(15665, self.client.GetInstanceInfo("inst32", static=True)) self.assertHandler(rlib2.R_2_instances_name_info) self.assertItems(["inst32"]) self.assertQuery("static", ["1"]) def testInstancesMultiAlloc(self): response = { constants.JOB_IDS_KEY: ["23423"], opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY: ["foobar"], opcodes.OpInstanceMultiAlloc.FAILED_KEY: ["foobar2"], } self.rapi.AddResponse(serializer.DumpJson(response)) insts = [self.client.InstanceAllocation("create", "foobar", "plain", [], []), self.client.InstanceAllocation("create", "foobar2", "drbd8", [{"size": 100}], [])] resp = self.client.InstancesMultiAlloc(insts) self.assertEqual(resp, response) self.assertHandler(rlib2.R_2_instances_multi_alloc) def testCreateInstanceOldVersion(self): # The old request format, version 0, is no longer supported self.rapi.AddResponse(None, code=404) self.assertRaises(client.GanetiApiError, self.client.CreateInstance, "create", "inst1.example.com", "plain", [], []) self.assertEqual(self.rapi.CountPending(), 0) def testCreateInstance(self): self.rapi.AddResponse(serializer.DumpJson([rlib2._INST_CREATE_REQV1])) self.rapi.AddResponse("23030") job_id = self.client.CreateInstance("create", "inst1.example.com", "plain", [], [], dry_run=True) self.assertEqual(job_id, 23030) self.assertHandler(rlib2.R_2_instances) self.assertDryRun() data = serializer.LoadJson(self.rapi.GetLastRequestData()) for field in ["dry_run", "beparams", "hvparams", "start"]: self.assertFalse(field in data) self.assertEqual(data["name"], "inst1.example.com") self.assertEqual(data["disk_template"], "plain") def testCreateInstance2(self): self.rapi.AddResponse(serializer.DumpJson([rlib2._INST_CREATE_REQV1])) self.rapi.AddResponse("24740") job_id = self.client.CreateInstance("import", "inst2.example.com", "drbd8", [{"size": 100,}], [{}, {"bridge": "br1", }], dry_run=False, start=True, pnode="node1", snode="node9", ip_check=False) self.assertEqual(job_id, 24740) self.assertHandler(rlib2.R_2_instances) data = serializer.LoadJson(self.rapi.GetLastRequestData()) self.assertEqual(data[rlib2._REQ_DATA_VERSION], 1) self.assertEqual(data["name"], "inst2.example.com") self.assertEqual(data["disk_template"], "drbd8") self.assertEqual(data["start"], True) self.assertEqual(data["ip_check"], False) self.assertEqualValues(data["disks"], [{"size": 100,}]) self.assertEqualValues(data["nics"], [{}, {"bridge": "br1", }]) def testDeleteInstance(self): self.rapi.AddResponse("1234") self.assertEqual(1234, self.client.DeleteInstance("instance", dry_run=True)) self.assertHandler(rlib2.R_2_instances_name) self.assertItems(["instance"]) self.assertDryRun() def testGetInstanceTags(self): self.rapi.AddResponse("[]") self.assertEqual([], self.client.GetInstanceTags("fooinstance")) self.assertHandler(rlib2.R_2_instances_name_tags) self.assertItems(["fooinstance"]) def testAddInstanceTags(self): self.rapi.AddResponse("1234") self.assertEqual(1234, self.client.AddInstanceTags("fooinstance", ["awesome"], dry_run=True)) self.assertHandler(rlib2.R_2_instances_name_tags) self.assertItems(["fooinstance"]) self.assertDryRun() self.assertQuery("tag", ["awesome"]) def testDeleteInstanceTags(self): self.rapi.AddResponse("25826") self.assertEqual(25826, self.client.DeleteInstanceTags("foo", ["awesome"], dry_run=True)) self.assertHandler(rlib2.R_2_instances_name_tags) self.assertItems(["foo"]) self.assertDryRun() self.assertQuery("tag", ["awesome"]) def testRebootInstance(self): self.rapi.AddResponse("6146") job_id = self.client.RebootInstance("i-bar", reboot_type="hard", ignore_secondaries=True, dry_run=True, reason="Updates") self.assertEqual(6146, job_id) self.assertHandler(rlib2.R_2_instances_name_reboot) self.assertItems(["i-bar"]) self.assertDryRun() self.assertQuery("type", ["hard"]) self.assertQuery("ignore_secondaries", ["1"]) self.assertQuery("reason", ["Updates"]) def testRebootInstanceDefaultReason(self): self.rapi.AddResponse("6146") job_id = self.client.RebootInstance("i-bar", reboot_type="hard", ignore_secondaries=True, dry_run=True) self.assertEqual(6146, job_id) self.assertHandler(rlib2.R_2_instances_name_reboot) self.assertItems(["i-bar"]) self.assertDryRun() self.assertQuery("type", ["hard"]) self.assertQuery("ignore_secondaries", ["1"]) self.assertQuery("reason", None) def testShutdownInstance(self): self.rapi.AddResponse("1487") self.assertEqual(1487, self.client.ShutdownInstance("foo-instance", dry_run=True, reason="NoMore")) self.assertHandler(rlib2.R_2_instances_name_shutdown) self.assertItems(["foo-instance"]) self.assertDryRun() self.assertQuery("reason", ["NoMore"]) def testShutdownInstanceDefaultReason(self): self.rapi.AddResponse("1487") self.assertEqual(1487, self.client.ShutdownInstance("foo-instance", dry_run=True)) self.assertHandler(rlib2.R_2_instances_name_shutdown) self.assertItems(["foo-instance"]) self.assertDryRun() self.assertQuery("reason", None) def testStartupInstance(self): self.rapi.AddResponse("27149") self.assertEqual(27149, self.client.StartupInstance("bar-instance", dry_run=True, reason="New")) self.assertHandler(rlib2.R_2_instances_name_startup) self.assertItems(["bar-instance"]) self.assertDryRun() self.assertQuery("reason", ["New"]) def testStartupInstanceDefaultReason(self): self.rapi.AddResponse("27149") self.assertEqual(27149, self.client.StartupInstance("bar-instance", dry_run=True)) self.assertHandler(rlib2.R_2_instances_name_startup) self.assertItems(["bar-instance"]) self.assertDryRun() self.assertQuery("reason", None) def testReinstallInstance(self): self.rapi.AddResponse(serializer.DumpJson([])) self.rapi.AddResponse("19119") self.assertEqual(19119, self.client.ReinstallInstance("baz-instance", os="DOS", no_startup=True)) self.assertHandler(rlib2.R_2_instances_name_reinstall) self.assertItems(["baz-instance"]) self.assertQuery("os", ["DOS"]) self.assertQuery("nostartup", ["1"]) self.assertEqual(self.rapi.CountPending(), 0) def testReinstallInstanceNew(self): self.rapi.AddResponse(serializer.DumpJson([rlib2._INST_REINSTALL_REQV1])) self.rapi.AddResponse("25689") self.assertEqual(25689, self.client.ReinstallInstance("moo-instance", os="Debian", no_startup=True)) self.assertHandler(rlib2.R_2_instances_name_reinstall) self.assertItems(["moo-instance"]) data = serializer.LoadJson(self.rapi.GetLastRequestData()) self.assertEqual(len(data), 2) self.assertEqual(data["os"], "Debian") self.assertEqual(data["start"], False) self.assertEqual(self.rapi.CountPending(), 0) def testReinstallInstanceWithOsparams1(self): self.rapi.AddResponse(serializer.DumpJson([])) self.assertRaises(client.GanetiApiError, self.client.ReinstallInstance, "doo-instance", osparams={"x": "y"}) self.assertEqual(self.rapi.CountPending(), 0) def testReinstallInstanceWithOsparams2(self): osparams = { "Hello": "World", "foo": "bar", } self.rapi.AddResponse(serializer.DumpJson([rlib2._INST_REINSTALL_REQV1])) self.rapi.AddResponse("1717") self.assertEqual(1717, self.client.ReinstallInstance("zoo-instance", osparams=osparams)) self.assertHandler(rlib2.R_2_instances_name_reinstall) self.assertItems(["zoo-instance"]) data = serializer.LoadJson(self.rapi.GetLastRequestData()) self.assertEqual(len(data), 2) self.assertEqual(data["osparams"], osparams) self.assertEqual(data["start"], True) self.assertEqual(self.rapi.CountPending(), 0) def testReplaceInstanceDisks(self): self.rapi.AddResponse("999") job_id = self.client.ReplaceInstanceDisks("instance-name", disks=[0, 1], iallocator="hail") self.assertEqual(999, job_id) self.assertHandler(rlib2.R_2_instances_name_replace_disks) self.assertItems(["instance-name"]) self.assertQuery("disks", ["0,1"]) self.assertQuery("mode", ["replace_auto"]) self.assertQuery("iallocator", ["hail"]) self.rapi.AddResponse("1000") job_id = self.client.ReplaceInstanceDisks("instance-bar", disks=[1], mode="replace_on_secondary", remote_node="foo-node") self.assertEqual(1000, job_id) self.assertItems(["instance-bar"]) self.assertQuery("disks", ["1"]) self.assertQuery("remote_node", ["foo-node"]) self.rapi.AddResponse("5175") self.assertEqual(5175, self.client.ReplaceInstanceDisks("instance-moo")) self.assertItems(["instance-moo"]) self.assertQuery("disks", None) def testPrepareExport(self): self.rapi.AddResponse("8326") self.assertEqual(8326, self.client.PrepareExport("inst1", "local")) self.assertHandler(rlib2.R_2_instances_name_prepare_export) self.assertItems(["inst1"]) self.assertQuery("mode", ["local"]) def testExportInstance(self): self.rapi.AddResponse("19695") job_id = self.client.ExportInstance("inst2", "local", "nodeX", shutdown=True) self.assertEqual(job_id, 19695) self.assertHandler(rlib2.R_2_instances_name_export) self.assertItems(["inst2"]) data = serializer.LoadJson(self.rapi.GetLastRequestData()) self.assertEqual(data["mode"], "local") self.assertEqual(data["destination"], "nodeX") self.assertEqual(data["shutdown"], True) def testMigrateInstanceDefaults(self): self.rapi.AddResponse("24873") job_id = self.client.MigrateInstance("inst91") self.assertEqual(job_id, 24873) self.assertHandler(rlib2.R_2_instances_name_migrate) self.assertItems(["inst91"]) data = serializer.LoadJson(self.rapi.GetLastRequestData()) self.assertFalse(data) def testMigrateInstance(self): for mode in constants.HT_MIGRATION_MODES: for cleanup in [False, True]: self.rapi.AddResponse("31910") job_id = self.client.MigrateInstance("inst289", mode=mode, cleanup=cleanup) self.assertEqual(job_id, 31910) self.assertHandler(rlib2.R_2_instances_name_migrate) self.assertItems(["inst289"]) data = serializer.LoadJson(self.rapi.GetLastRequestData()) self.assertEqual(len(data), 2) self.assertEqual(data["mode"], mode) self.assertEqual(data["cleanup"], cleanup) def testFailoverInstanceDefaults(self): self.rapi.AddResponse("7639") job_id = self.client.FailoverInstance("inst13579") self.assertEqual(job_id, 7639) self.assertHandler(rlib2.R_2_instances_name_failover) self.assertItems(["inst13579"]) data = serializer.LoadJson(self.rapi.GetLastRequestData()) self.assertFalse(data) def testFailoverInstance(self): for iallocator in ["dumb", "hail"]: for ignore_consistency in [False, True]: for target_node in ["node-a", "node2"]: self.rapi.AddResponse("19161") job_id = \ self.client.FailoverInstance("inst251", iallocator=iallocator, ignore_consistency=ignore_consistency, target_node=target_node) self.assertEqual(job_id, 19161) self.assertHandler(rlib2.R_2_instances_name_failover) self.assertItems(["inst251"]) data = serializer.LoadJson(self.rapi.GetLastRequestData()) self.assertEqual(len(data), 3) self.assertEqual(data["iallocator"], iallocator) self.assertEqual(data["ignore_consistency"], ignore_consistency) self.assertEqual(data["target_node"], target_node) self.assertEqual(self.rapi.CountPending(), 0) def testRenameInstanceDefaults(self): new_name = "newnametha7euqu" self.rapi.AddResponse("8791") job_id = self.client.RenameInstance("inst18821", new_name) self.assertEqual(job_id, 8791) self.assertHandler(rlib2.R_2_instances_name_rename) self.assertItems(["inst18821"]) data = serializer.LoadJson(self.rapi.GetLastRequestData()) self.assertEqualValues(data, {"new_name": new_name, }) def testRenameInstance(self): new_name = "new-name-yiux1iin" for ip_check in [False, True]: for name_check in [False, True]: self.rapi.AddResponse("24776") job_id = self.client.RenameInstance("inst20967", new_name, ip_check=ip_check, name_check=name_check) self.assertEqual(job_id, 24776) self.assertHandler(rlib2.R_2_instances_name_rename) self.assertItems(["inst20967"]) data = serializer.LoadJson(self.rapi.GetLastRequestData()) self.assertEqual(len(data), 3) self.assertEqual(data["new_name"], new_name) self.assertEqual(data["ip_check"], ip_check) self.assertEqual(data["name_check"], name_check) def testGetJobs(self): self.rapi.AddResponse('[ { "id": "123", "uri": "\\/2\\/jobs\\/123" },' ' { "id": "124", "uri": "\\/2\\/jobs\\/124" } ]') self.assertEqual([123, 124], self.client.GetJobs()) self.assertHandler(rlib2.R_2_jobs) self.rapi.AddResponse('[ { "id": "123", "uri": "\\/2\\/jobs\\/123" },' ' { "id": "124", "uri": "\\/2\\/jobs\\/124" } ]') self.assertEqual([{"id": "123", "uri": "/2/jobs/123"}, {"id": "124", "uri": "/2/jobs/124"}], self.client.GetJobs(bulk=True)) self.assertHandler(rlib2.R_2_jobs) self.assertBulk() def testGetJobStatus(self): self.rapi.AddResponse("{\"foo\": \"bar\"}") self.assertEqual({"foo": "bar"}, self.client.GetJobStatus(1234)) self.assertHandler(rlib2.R_2_jobs_id) self.assertItems(["1234"]) def testWaitForJobChange(self): fields = ["id", "summary"] expected = { "job_info": [123, "something"], "log_entries": [], } self.rapi.AddResponse(serializer.DumpJson(expected)) result = self.client.WaitForJobChange(123, fields, [], -1) self.assertEqualValues(expected, result) self.assertHandler(rlib2.R_2_jobs_id_wait) self.assertItems(["123"]) def testCancelJob(self): self.rapi.AddResponse("[true, \"Job 123 will be canceled\"]") self.assertEqual([True, "Job 123 will be canceled"], self.client.CancelJob(999, dry_run=True)) self.assertHandler(rlib2.R_2_jobs_id) self.assertItems(["999"]) self.assertDryRun() def testGetNodes(self): self.rapi.AddResponse("[ { \"id\": \"node1\", \"uri\": \"uri1\" }," " { \"id\": \"node2\", \"uri\": \"uri2\" } ]") self.assertEqual(["node1", "node2"], self.client.GetNodes()) self.assertHandler(rlib2.R_2_nodes) self.rapi.AddResponse("[ { \"id\": \"node1\", \"uri\": \"uri1\" }," " { \"id\": \"node2\", \"uri\": \"uri2\" } ]") self.assertEqual([{"id": "node1", "uri": "uri1"}, {"id": "node2", "uri": "uri2"}], self.client.GetNodes(bulk=True)) self.assertHandler(rlib2.R_2_nodes) self.assertBulk() def testGetNode(self): self.rapi.AddResponse("{}") self.assertEqual({}, self.client.GetNode("node-foo")) self.assertHandler(rlib2.R_2_nodes_name) self.assertItems(["node-foo"]) def testEvacuateNode(self): self.rapi.AddResponse(serializer.DumpJson([rlib2._NODE_EVAC_RES1])) self.rapi.AddResponse("9876") job_id = self.client.EvacuateNode("node-1", remote_node="node-2") self.assertEqual(9876, job_id) self.assertHandler(rlib2.R_2_nodes_name_evacuate) self.assertItems(["node-1"]) self.assertEqual(serializer.LoadJson(self.rapi.GetLastRequestData()), { "remote_node": "node-2", }) self.assertEqual(self.rapi.CountPending(), 0) self.rapi.AddResponse(serializer.DumpJson([rlib2._NODE_EVAC_RES1])) self.rapi.AddResponse("8888") job_id = self.client.EvacuateNode("node-3", iallocator="hail", dry_run=True, mode=constants.NODE_EVAC_ALL, early_release=True) self.assertEqual(8888, job_id) self.assertItems(["node-3"]) self.assertEqual(serializer.LoadJson(self.rapi.GetLastRequestData()), { "iallocator": "hail", "mode": "all", "early_release": True, }) self.assertDryRun() self.assertRaises(client.GanetiApiError, self.client.EvacuateNode, "node-4", iallocator="hail", remote_node="node-5") self.assertEqual(self.rapi.CountPending(), 0) def testEvacuateNodeOldResponse(self): self.rapi.AddResponse(serializer.DumpJson([])) self.assertRaises(client.GanetiApiError, self.client.EvacuateNode, "node-4", accept_old=False) self.assertEqual(self.rapi.CountPending(), 0) for mode in [client.NODE_EVAC_PRI, client.NODE_EVAC_ALL]: self.rapi.AddResponse(serializer.DumpJson([])) self.assertRaises(client.GanetiApiError, self.client.EvacuateNode, "node-4", accept_old=True, mode=mode) self.assertEqual(self.rapi.CountPending(), 0) self.rapi.AddResponse(serializer.DumpJson([])) self.rapi.AddResponse(serializer.DumpJson("21533")) result = self.client.EvacuateNode("node-3", iallocator="hail", dry_run=True, accept_old=True, mode=client.NODE_EVAC_SEC, early_release=True) self.assertEqual(result, "21533") self.assertItems(["node-3"]) self.assertQuery("iallocator", ["hail"]) self.assertQuery("early_release", ["1"]) self.assertFalse(self.rapi.GetLastRequestData()) self.assertDryRun() self.assertEqual(self.rapi.CountPending(), 0) def testMigrateNode(self): self.rapi.AddResponse(serializer.DumpJson([])) self.rapi.AddResponse("1111") self.assertEqual(1111, self.client.MigrateNode("node-a", dry_run=True)) self.assertHandler(rlib2.R_2_nodes_name_migrate) self.assertItems(["node-a"]) self.assert_("mode" not in self.rapi.GetLastHandler().queryargs) self.assertDryRun() self.assertFalse(self.rapi.GetLastRequestData()) self.rapi.AddResponse(serializer.DumpJson([])) self.rapi.AddResponse("1112") self.assertEqual(1112, self.client.MigrateNode("node-a", dry_run=True, mode="live")) self.assertHandler(rlib2.R_2_nodes_name_migrate) self.assertItems(["node-a"]) self.assertQuery("mode", ["live"]) self.assertDryRun() self.assertFalse(self.rapi.GetLastRequestData()) self.rapi.AddResponse(serializer.DumpJson([])) self.assertRaises(client.GanetiApiError, self.client.MigrateNode, "node-c", target_node="foonode") self.assertEqual(self.rapi.CountPending(), 0) def testMigrateNodeBodyData(self): self.rapi.AddResponse(serializer.DumpJson([rlib2._NODE_MIGRATE_REQV1])) self.rapi.AddResponse("27539") self.assertEqual(27539, self.client.MigrateNode("node-a", dry_run=False, mode="live")) self.assertHandler(rlib2.R_2_nodes_name_migrate) self.assertItems(["node-a"]) self.assertFalse(self.rapi.GetLastHandler().queryargs) self.assertEqual(serializer.LoadJson(self.rapi.GetLastRequestData()), { "mode": "live", }) self.rapi.AddResponse(serializer.DumpJson([rlib2._NODE_MIGRATE_REQV1])) self.rapi.AddResponse("14219") self.assertEqual(14219, self.client.MigrateNode("node-x", dry_run=True, target_node="node9", iallocator="ial")) self.assertHandler(rlib2.R_2_nodes_name_migrate) self.assertItems(["node-x"]) self.assertDryRun() self.assertEqual(serializer.LoadJson(self.rapi.GetLastRequestData()), { "target_node": "node9", "iallocator": "ial", }) self.assertEqual(self.rapi.CountPending(), 0) def testGetNodeRole(self): self.rapi.AddResponse("\"master\"") self.assertEqual("master", self.client.GetNodeRole("node-a")) self.assertHandler(rlib2.R_2_nodes_name_role) self.assertItems(["node-a"]) def testSetNodeRole(self): self.rapi.AddResponse("789") self.assertEqual(789, self.client.SetNodeRole("node-foo", "master-candidate", force=True)) self.assertHandler(rlib2.R_2_nodes_name_role) self.assertItems(["node-foo"]) self.assertQuery("force", ["1"]) self.assertEqual("\"master-candidate\"", self.rapi.GetLastRequestData()) def testPowercycleNode(self): self.rapi.AddResponse("23051") self.assertEqual(23051, self.client.PowercycleNode("node5468", force=True)) self.assertHandler(rlib2.R_2_nodes_name_powercycle) self.assertItems(["node5468"]) self.assertQuery("force", ["1"]) self.assertFalse(self.rapi.GetLastRequestData()) self.assertEqual(self.rapi.CountPending(), 0) def testModifyNode(self): self.rapi.AddResponse("3783") job_id = self.client.ModifyNode("node16979.example.com", drained=True) self.assertEqual(job_id, 3783) self.assertHandler(rlib2.R_2_nodes_name_modify) self.assertItems(["node16979.example.com"]) self.assertEqual(self.rapi.CountPending(), 0) def testGetNodeStorageUnits(self): self.rapi.AddResponse("42") self.assertEqual(42, self.client.GetNodeStorageUnits("node-x", "lvm-pv", "fields")) self.assertHandler(rlib2.R_2_nodes_name_storage) self.assertItems(["node-x"]) self.assertQuery("storage_type", ["lvm-pv"]) self.assertQuery("output_fields", ["fields"]) def testModifyNodeStorageUnits(self): self.rapi.AddResponse("14") self.assertEqual(14, self.client.ModifyNodeStorageUnits("node-z", "lvm-pv", "hda")) self.assertHandler(rlib2.R_2_nodes_name_storage_modify) self.assertItems(["node-z"]) self.assertQuery("storage_type", ["lvm-pv"]) self.assertQuery("name", ["hda"]) self.assertQuery("allocatable", None) for allocatable, query_allocatable in [(True, "1"), (False, "0")]: self.rapi.AddResponse("7205") job_id = self.client.ModifyNodeStorageUnits("node-z", "lvm-pv", "hda", allocatable=allocatable) self.assertEqual(7205, job_id) self.assertHandler(rlib2.R_2_nodes_name_storage_modify) self.assertItems(["node-z"]) self.assertQuery("storage_type", ["lvm-pv"]) self.assertQuery("name", ["hda"]) self.assertQuery("allocatable", [query_allocatable]) def testRepairNodeStorageUnits(self): self.rapi.AddResponse("99") self.assertEqual(99, self.client.RepairNodeStorageUnits("node-z", "lvm-pv", "hda")) self.assertHandler(rlib2.R_2_nodes_name_storage_repair) self.assertItems(["node-z"]) self.assertQuery("storage_type", ["lvm-pv"]) self.assertQuery("name", ["hda"]) def testGetNodeTags(self): self.rapi.AddResponse("[\"fry\", \"bender\"]") self.assertEqual(["fry", "bender"], self.client.GetNodeTags("node-k")) self.assertHandler(rlib2.R_2_nodes_name_tags) self.assertItems(["node-k"]) def testAddNodeTags(self): self.rapi.AddResponse("1234") self.assertEqual(1234, self.client.AddNodeTags("node-v", ["awesome"], dry_run=True)) self.assertHandler(rlib2.R_2_nodes_name_tags) self.assertItems(["node-v"]) self.assertDryRun() self.assertQuery("tag", ["awesome"]) def testDeleteNodeTags(self): self.rapi.AddResponse("16861") self.assertEqual(16861, self.client.DeleteNodeTags("node-w", ["awesome"], dry_run=True)) self.assertHandler(rlib2.R_2_nodes_name_tags) self.assertItems(["node-w"]) self.assertDryRun() self.assertQuery("tag", ["awesome"]) def testGetGroups(self): groups = [{"name": "group1", "uri": "/2/groups/group1", }, {"name": "group2", "uri": "/2/groups/group2", }, ] self.rapi.AddResponse(serializer.DumpJson(groups)) self.assertEqual(["group1", "group2"], self.client.GetGroups()) self.assertHandler(rlib2.R_2_groups) def testGetGroupsBulk(self): groups = [{"name": "group1", "uri": "/2/groups/group1", "node_cnt": 2, "node_list": ["gnt1.test", "gnt2.test", ], }, {"name": "group2", "uri": "/2/groups/group2", "node_cnt": 1, "node_list": ["gnt3.test", ], }, ] self.rapi.AddResponse(serializer.DumpJson(groups)) self.assertEqual(groups, self.client.GetGroups(bulk=True)) self.assertHandler(rlib2.R_2_groups) self.assertBulk() def testGetGroup(self): group = {"ctime": None, "name": "default", } self.rapi.AddResponse(serializer.DumpJson(group)) self.assertEqual({"ctime": None, "name": "default"}, self.client.GetGroup("default")) self.assertHandler(rlib2.R_2_groups_name) self.assertItems(["default"]) def testCreateGroup(self): self.rapi.AddResponse("12345") job_id = self.client.CreateGroup("newgroup", dry_run=True) self.assertEqual(job_id, 12345) self.assertHandler(rlib2.R_2_groups) self.assertDryRun() def testDeleteGroup(self): self.rapi.AddResponse("12346") job_id = self.client.DeleteGroup("newgroup", dry_run=True) self.assertEqual(job_id, 12346) self.assertHandler(rlib2.R_2_groups_name) self.assertDryRun() def testRenameGroup(self): self.rapi.AddResponse("12347") job_id = self.client.RenameGroup("oldname", "newname") self.assertEqual(job_id, 12347) self.assertHandler(rlib2.R_2_groups_name_rename) def testModifyGroup(self): self.rapi.AddResponse("12348") job_id = self.client.ModifyGroup("mygroup", alloc_policy="foo") self.assertEqual(job_id, 12348) self.assertHandler(rlib2.R_2_groups_name_modify) def testAssignGroupNodes(self): self.rapi.AddResponse("12349") job_id = self.client.AssignGroupNodes("mygroup", ["node1", "node2"], force=True, dry_run=True) self.assertEqual(job_id, 12349) self.assertHandler(rlib2.R_2_groups_name_assign_nodes) self.assertDryRun() self.assertUseForce() def testGetNetworksBulk(self): networks = [{"name": "network1", "uri": "/2/networks/network1", "network": "192.168.0.0/24", }, {"name": "network2", "uri": "/2/networks/network2", "network": "192.168.0.0/24", }, ] self.rapi.AddResponse(serializer.DumpJson(networks)) self.assertEqual(networks, self.client.GetNetworks(bulk=True)) self.assertHandler(rlib2.R_2_networks) self.assertBulk() def testGetNetwork(self): network = {"ctime": None, "name": "network1", } self.rapi.AddResponse(serializer.DumpJson(network)) self.assertEqual({"ctime": None, "name": "network1"}, self.client.GetNetwork("network1")) self.assertHandler(rlib2.R_2_networks_name) self.assertItems(["network1"]) def testCreateNetwork(self): self.rapi.AddResponse("12345") job_id = self.client.CreateNetwork("newnetwork", network="192.168.0.0/24", dry_run=True) self.assertEqual(job_id, 12345) self.assertHandler(rlib2.R_2_networks) self.assertDryRun() def testModifyNetwork(self): self.rapi.AddResponse("12346") job_id = self.client.ModifyNetwork("mynetwork", gateway="192.168.0.10", dry_run=True) self.assertEqual(job_id, 12346) self.assertHandler(rlib2.R_2_networks_name_modify) def testDeleteNetwork(self): self.rapi.AddResponse("12347") job_id = self.client.DeleteNetwork("newnetwork", dry_run=True) self.assertEqual(job_id, 12347) self.assertHandler(rlib2.R_2_networks_name) self.assertDryRun() def testConnectNetwork(self): self.rapi.AddResponse("12348") job_id = self.client.ConnectNetwork("mynetwork", "default", "bridged", "br0", dry_run=True) self.assertEqual(job_id, 12348) self.assertHandler(rlib2.R_2_networks_name_connect) self.assertDryRun() def testDisconnectNetwork(self): self.rapi.AddResponse("12349") job_id = self.client.DisconnectNetwork("mynetwork", "default", dry_run=True) self.assertEqual(job_id, 12349) self.assertHandler(rlib2.R_2_networks_name_disconnect) self.assertDryRun() def testGetNetworkTags(self): self.rapi.AddResponse("[]") self.assertEqual([], self.client.GetNetworkTags("fooNetwork")) self.assertHandler(rlib2.R_2_networks_name_tags) self.assertItems(["fooNetwork"]) def testAddNetworkTags(self): self.rapi.AddResponse("1234") self.assertEqual(1234, self.client.AddNetworkTags("fooNetwork", ["awesome"], dry_run=True)) self.assertHandler(rlib2.R_2_networks_name_tags) self.assertItems(["fooNetwork"]) self.assertDryRun() self.assertQuery("tag", ["awesome"]) def testDeleteNetworkTags(self): self.rapi.AddResponse("25826") self.assertEqual(25826, self.client.DeleteNetworkTags("foo", ["awesome"], dry_run=True)) self.assertHandler(rlib2.R_2_networks_name_tags) self.assertItems(["foo"]) self.assertDryRun() self.assertQuery("tag", ["awesome"]) def testModifyInstance(self): self.rapi.AddResponse("23681") job_id = self.client.ModifyInstance("inst7210", os_name="linux") self.assertEqual(job_id, 23681) self.assertItems(["inst7210"]) self.assertHandler(rlib2.R_2_instances_name_modify) self.assertEqual(serializer.LoadJson(self.rapi.GetLastRequestData()), { "os_name": "linux", }) def testModifyCluster(self): for mnh in [None, False, True]: self.rapi.AddResponse("14470") self.assertEqual(14470, self.client.ModifyCluster(maintain_node_health=mnh)) self.assertHandler(rlib2.R_2_cluster_modify) self.assertItems([]) data = serializer.LoadJson(self.rapi.GetLastRequestData()) self.assertEqual(len(data), 1) self.assertEqual(data["maintain_node_health"], mnh) self.assertEqual(self.rapi.CountPending(), 0) def testRedistributeConfig(self): self.rapi.AddResponse("3364") job_id = self.client.RedistributeConfig() self.assertEqual(job_id, 3364) self.assertItems([]) self.assertHandler(rlib2.R_2_redist_config) def testActivateInstanceDisks(self): self.rapi.AddResponse("23547") job_id = self.client.ActivateInstanceDisks("inst28204") self.assertEqual(job_id, 23547) self.assertItems(["inst28204"]) self.assertHandler(rlib2.R_2_instances_name_activate_disks) self.assertFalse(self.rapi.GetLastHandler().queryargs) def testActivateInstanceDisksIgnoreSize(self): self.rapi.AddResponse("11044") job_id = self.client.ActivateInstanceDisks("inst28204", ignore_size=True) self.assertEqual(job_id, 11044) self.assertItems(["inst28204"]) self.assertHandler(rlib2.R_2_instances_name_activate_disks) self.assertQuery("ignore_size", ["1"]) def testDeactivateInstanceDisks(self): self.rapi.AddResponse("14591") job_id = self.client.DeactivateInstanceDisks("inst28234") self.assertEqual(job_id, 14591) self.assertItems(["inst28234"]) self.assertHandler(rlib2.R_2_instances_name_deactivate_disks) self.assertFalse(self.rapi.GetLastHandler().queryargs) def testRecreateInstanceDisks(self): self.rapi.AddResponse("13553") job_id = self.client.RecreateInstanceDisks("inst23153") self.assertEqual(job_id, 13553) self.assertItems(["inst23153"]) self.assertHandler(rlib2.R_2_instances_name_recreate_disks) self.assertFalse(self.rapi.GetLastHandler().queryargs) def testGetInstanceConsole(self): self.rapi.AddResponse("26876") job_id = self.client.GetInstanceConsole("inst21491") self.assertEqual(job_id, 26876) self.assertItems(["inst21491"]) self.assertHandler(rlib2.R_2_instances_name_console) self.assertFalse(self.rapi.GetLastHandler().queryargs) self.assertFalse(self.rapi.GetLastRequestData()) def testGrowInstanceDisk(self): for idx, wait_for_sync in enumerate([None, False, True]): amount = 128 + (512 * idx) self.assertEqual(self.rapi.CountPending(), 0) self.rapi.AddResponse("30783") self.assertEqual(30783, self.client.GrowInstanceDisk("eze8ch", idx, amount, wait_for_sync=wait_for_sync)) self.assertHandler(rlib2.R_2_instances_name_disk_grow) self.assertItems(["eze8ch", str(idx)]) data = serializer.LoadJson(self.rapi.GetLastRequestData()) if wait_for_sync is None: self.assertEqual(len(data), 1) self.assert_("wait_for_sync" not in data) else: self.assertEqual(len(data), 2) self.assertEqual(data["wait_for_sync"], wait_for_sync) self.assertEqual(data["amount"], amount) self.assertEqual(self.rapi.CountPending(), 0) def testGetGroupTags(self): self.rapi.AddResponse("[]") self.assertEqual([], self.client.GetGroupTags("fooGroup")) self.assertHandler(rlib2.R_2_groups_name_tags) self.assertItems(["fooGroup"]) def testAddGroupTags(self): self.rapi.AddResponse("1234") self.assertEqual(1234, self.client.AddGroupTags("fooGroup", ["awesome"], dry_run=True)) self.assertHandler(rlib2.R_2_groups_name_tags) self.assertItems(["fooGroup"]) self.assertDryRun() self.assertQuery("tag", ["awesome"]) def testDeleteGroupTags(self): self.rapi.AddResponse("25826") self.assertEqual(25826, self.client.DeleteGroupTags("foo", ["awesome"], dry_run=True)) self.assertHandler(rlib2.R_2_groups_name_tags) self.assertItems(["foo"]) self.assertDryRun() self.assertQuery("tag", ["awesome"]) def testQuery(self): for idx, what in enumerate(constants.QR_VIA_RAPI): for idx2, qfilter in enumerate([None, ["?", "name"]]): job_id = 11010 + (idx << 4) + (idx2 << 16) fields = sorted(query.ALL_FIELDS[what].keys())[:10] self.rapi.AddResponse(str(job_id)) self.assertEqual(self.client.Query(what, fields, qfilter=qfilter), job_id) self.assertItems([what]) self.assertHandler(rlib2.R_2_query) self.assertFalse(self.rapi.GetLastHandler().queryargs) data = serializer.LoadJson(self.rapi.GetLastRequestData()) self.assertEqual(data["fields"], fields) if qfilter is None: self.assertTrue("qfilter" not in data) else: self.assertEqual(data["qfilter"], qfilter) self.assertEqual(self.rapi.CountPending(), 0) def testQueryFields(self): exp_result = objects.QueryFieldsResponse(fields=[ objects.QueryFieldDefinition(name="pnode", title="PNode", kind=constants.QFT_NUMBER), objects.QueryFieldDefinition(name="other", title="Other", kind=constants.QFT_BOOL), ]) for what in constants.QR_VIA_RAPI: for fields in [None, ["name", "_unknown_"], ["&", "?|"]]: self.rapi.AddResponse(serializer.DumpJson(exp_result.ToDict())) result = self.client.QueryFields(what, fields=fields) self.assertItems([what]) self.assertHandler(rlib2.R_2_query_fields) self.assertFalse(self.rapi.GetLastRequestData()) queryargs = self.rapi.GetLastHandler().queryargs if fields is None: self.assertFalse(queryargs) else: self.assertEqual(queryargs, { "fields": [",".join(fields)], }) self.assertEqual(objects.QueryFieldsResponse.FromDict(result).ToDict(), exp_result.ToDict()) self.assertEqual(self.rapi.CountPending(), 0) def testWaitForJobCompletionNoChange(self): resp = serializer.DumpJson({ "status": constants.JOB_STATUS_WAITING, }) for retries in [1, 5, 25]: for _ in range(retries): self.rapi.AddResponse(resp) self.assertFalse(self.client.WaitForJobCompletion(22789, period=None, retries=retries)) self.assertHandler(rlib2.R_2_jobs_id) self.assertItems(["22789"]) self.assertEqual(self.rapi.CountPending(), 0) def testWaitForJobCompletionAlreadyFinished(self): self.rapi.AddResponse(serializer.DumpJson({ "status": constants.JOB_STATUS_SUCCESS, })) self.assertTrue(self.client.WaitForJobCompletion(22793, period=None, retries=1)) self.assertHandler(rlib2.R_2_jobs_id) self.assertItems(["22793"]) self.assertEqual(self.rapi.CountPending(), 0) def testWaitForJobCompletionEmptyResponse(self): self.rapi.AddResponse("{}") self.assertFalse(self.client.WaitForJobCompletion(22793, period=None, retries=10)) self.assertHandler(rlib2.R_2_jobs_id) self.assertItems(["22793"]) self.assertEqual(self.rapi.CountPending(), 0) def testWaitForJobCompletionOutOfRetries(self): for retries in [3, 10, 21]: for _ in range(retries): self.rapi.AddResponse(serializer.DumpJson({ "status": constants.JOB_STATUS_RUNNING, })) self.assertFalse(self.client.WaitForJobCompletion(30948, period=None, retries=retries - 1)) self.assertHandler(rlib2.R_2_jobs_id) self.assertItems(["30948"]) self.assertEqual(self.rapi.CountPending(), 1) self.rapi.ResetResponses() def testWaitForJobCompletionSuccessAndFailure(self): for retries in [1, 4, 13]: for (success, end_status) in [(False, constants.JOB_STATUS_ERROR), (True, constants.JOB_STATUS_SUCCESS)]: for _ in range(retries): self.rapi.AddResponse(serializer.DumpJson({ "status": constants.JOB_STATUS_RUNNING, })) self.rapi.AddResponse(serializer.DumpJson({ "status": end_status, })) result = self.client.WaitForJobCompletion(3187, period=None, retries=retries + 1) self.assertEqual(result, success) self.assertHandler(rlib2.R_2_jobs_id) self.assertItems(["3187"]) self.assertEqual(self.rapi.CountPending(), 0) class RapiTestRunner(unittest.TextTestRunner): def run(self, *args): global _used_handlers assert _used_handlers is None _used_handlers = set() try: # Run actual tests result = unittest.TextTestRunner.run(self, *args) diff = (set(connector.CONNECTOR.values()) - _used_handlers - _KNOWN_UNUSED) if diff: raise AssertionError("The following RAPI resources were not used by the" " RAPI client: %r" % utils.CommaJoin(diff)) finally: # Reset global variable _used_handlers = None return result if __name__ == "__main__": client.UsesRapiClient(testutils.GanetiTestProgram)(testRunner=RapiTestRunner) ganeti-2.9.3/test/py/tempfile_fork_unittest.py0000744000000000000000000000667512244641676021573 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing utils.ResetTempfileModule""" import os import sys import errno import shutil import tempfile import unittest import logging from ganeti import utils import testutils # This constant is usually at a much higher value. Setting it lower for test # purposes. tempfile.TMP_MAX = 3 class TestResetTempfileModule(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def testNoReset(self): if ((sys.hexversion >= 0x020703F0 and sys.hexversion < 0x03000000) or sys.hexversion >= 0x030203F0): # We can't test the no_reset case on Python 2.7+ return # evil Debian sid... if (hasattr(tempfile._RandomNameSequence, "rng") and type(tempfile._RandomNameSequence.rng) == property): return self._Test(False) def testReset(self): self._Test(True) def _Test(self, reset): self.failIf(tempfile.TMP_MAX > 10) # Initialize tempfile module (fd, _) = tempfile.mkstemp(dir=self.tmpdir, prefix="init.", suffix="") os.close(fd) (notify_read, notify_write) = os.pipe() pid = os.fork() if pid == 0: # Child try: try: if reset: utils.ResetTempfileModule() os.close(notify_write) # Wait for parent to close pipe os.read(notify_read, 1) try: # This is a short-lived process, not caring about closing file # descriptors (_, path) = tempfile.mkstemp(dir=self.tmpdir, prefix="test.", suffix="") except EnvironmentError, err: if err.errno == errno.EEXIST: # Couldnt' create temporary file (e.g. because we run out of # retries) os._exit(2) raise logging.debug("Child created %s", path) os._exit(0) except Exception: logging.exception("Unhandled error") finally: os._exit(1) # Parent os.close(notify_read) # Create parent's temporary files for _ in range(tempfile.TMP_MAX): (fd, path) = tempfile.mkstemp(dir=self.tmpdir, prefix="test.", suffix="") os.close(fd) logging.debug("Parent created %s", path) # Notify child by closing pipe os.close(notify_write) (_, status) = os.waitpid(pid, 0) self.failIf(os.WIFSIGNALED(status)) if reset: # If the tempfile module was reset, it should not fail to create # temporary files expected = 0 else: expected = 2 self.assertEqual(os.WEXITSTATUS(status), expected) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.hypervisor.hv_xen_unittest.py0000744000000000000000000007673012271422343023676 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.hypervisor.hv_xen""" import string # pylint: disable=W0402 import unittest import tempfile import shutil import random import os import mock from ganeti import constants from ganeti import objects from ganeti import pathutils from ganeti import hypervisor from ganeti import utils from ganeti import errors from ganeti import compat from ganeti.hypervisor import hv_xen import testutils # Map from hypervisor class to hypervisor name HVCLASS_TO_HVNAME = utils.InvertDict(hypervisor._HYPERVISOR_MAP) class TestConsole(unittest.TestCase): def test(self): hvparams = {constants.HV_XEN_CMD: constants.XEN_CMD_XL} for cls in [hv_xen.XenPvmHypervisor(), hv_xen.XenHvmHypervisor()]: instance = objects.Instance(name="xen.example.com", primary_node="node24828-uuid") node = objects.Node(name="node24828", uuid="node24828-uuid") cons = cls.GetInstanceConsole(instance, node, hvparams, {}) self.assertTrue(cons.Validate()) self.assertEqual(cons.kind, constants.CONS_SSH) self.assertEqual(cons.host, node.name) self.assertEqual(cons.command[-1], instance.name) class TestCreateConfigCpus(unittest.TestCase): def testEmpty(self): for cpu_mask in [None, ""]: self.assertEqual(hv_xen._CreateConfigCpus(cpu_mask), "cpus = [ ]") def testAll(self): self.assertEqual(hv_xen._CreateConfigCpus(constants.CPU_PINNING_ALL), None) def testOne(self): self.assertEqual(hv_xen._CreateConfigCpus("9"), "cpu = \"9\"") def testMultiple(self): self.assertEqual(hv_xen._CreateConfigCpus("0-2,4,5-5:3:all"), ("cpus = [ \"0,1,2,4,5\", \"3\", \"%s\" ]" % constants.CPU_PINNING_ALL_XEN)) class TestGetCommand(testutils.GanetiTestCase): def testCommandExplicit(self): """Test the case when the command is given as class parameter explicitly. """ expected_cmd = "xl" hv = hv_xen.XenHypervisor(_cmd=constants.XEN_CMD_XL) self.assertEqual(hv._GetCommand(None), expected_cmd) def testCommandInvalid(self): """Test the case an invalid command is given as class parameter explicitly. """ hv = hv_xen.XenHypervisor(_cmd="invalidcommand") self.assertRaises(errors.ProgrammerError, hv._GetCommand, None) def testCommandHvparams(self): expected_cmd = "xl" test_hvparams = {constants.HV_XEN_CMD: constants.XEN_CMD_XL} hv = hv_xen.XenHypervisor() self.assertEqual(hv._GetCommand(test_hvparams), expected_cmd) def testCommandHvparamsInvalid(self): test_hvparams = {} hv = hv_xen.XenHypervisor() self.assertRaises(errors.HypervisorError, hv._GetCommand, test_hvparams) def testCommandHvparamsCmdInvalid(self): test_hvparams = {constants.HV_XEN_CMD: "invalidcommand"} hv = hv_xen.XenHypervisor() self.assertRaises(errors.ProgrammerError, hv._GetCommand, test_hvparams) class TestParseInstanceList(testutils.GanetiTestCase): def test(self): data = testutils.ReadTestData("xen-xm-list-4.0.1-dom0-only.txt") # Exclude node self.assertEqual(hv_xen._ParseInstanceList(data.splitlines(), False), []) # Include node result = hv_xen._ParseInstanceList(data.splitlines(), True) self.assertEqual(len(result), 1) self.assertEqual(len(result[0]), 6) # Name self.assertEqual(result[0][0], hv_xen._DOM0_NAME) # ID self.assertEqual(result[0][1], 0) # Memory self.assertEqual(result[0][2], 1023) # VCPUs self.assertEqual(result[0][3], 1) # State self.assertEqual(result[0][4], "r-----") # Time self.assertAlmostEqual(result[0][5], 121152.6) def testWrongLineFormat(self): tests = [ ["three fields only"], ["name InvalidID 128 1 r----- 12345"], ] for lines in tests: try: hv_xen._ParseInstanceList(["Header would be here"] + lines, False) except errors.HypervisorError, err: self.assertTrue("Can't parse instance list" in str(err)) else: self.fail("Exception was not raised") class TestGetInstanceList(testutils.GanetiTestCase): def _Fail(self): return utils.RunResult(constants.EXIT_FAILURE, None, "stdout", "stderr", None, NotImplemented, NotImplemented) def testTimeout(self): fn = testutils.CallCounter(self._Fail) try: hv_xen._GetInstanceList(fn, False, _timeout=0.1) except errors.HypervisorError, err: self.assertTrue("timeout exceeded" in str(err)) else: self.fail("Exception was not raised") self.assertTrue(fn.Count() < 10, msg="'xm list' was called too many times") def _Success(self, stdout): return utils.RunResult(constants.EXIT_SUCCESS, None, stdout, "", None, NotImplemented, NotImplemented) def testSuccess(self): data = testutils.ReadTestData("xen-xm-list-4.0.1-four-instances.txt") fn = testutils.CallCounter(compat.partial(self._Success, data)) result = hv_xen._GetInstanceList(fn, True, _timeout=0.1) self.assertEqual(len(result), 4) self.assertEqual(map(compat.fst, result), [ "Domain-0", "server01.example.com", "web3106215069.example.com", "testinstance.example.com", ]) self.assertEqual(fn.Count(), 1) class TestParseNodeInfo(testutils.GanetiTestCase): def testEmpty(self): self.assertEqual(hv_xen._ParseNodeInfo(""), {}) def testUnknownInput(self): data = "\n".join([ "foo bar", "something else goes", "here", ]) self.assertEqual(hv_xen._ParseNodeInfo(data), {}) def testBasicInfo(self): data = testutils.ReadTestData("xen-xm-info-4.0.1.txt") result = hv_xen._ParseNodeInfo(data) self.assertEqual(result, { "cpu_nodes": 1, "cpu_sockets": 2, "cpu_total": 4, "hv_version": (4, 0), "memory_free": 8004, "memory_total": 16378, }) class TestMergeInstanceInfo(testutils.GanetiTestCase): def testEmpty(self): self.assertEqual(hv_xen._MergeInstanceInfo({}, []), {}) def _FakeXmList(self, include_node): return [ (hv_xen._DOM0_NAME, NotImplemented, 4096, 7, NotImplemented, NotImplemented), ("inst1.example.com", NotImplemented, 2048, 4, NotImplemented, NotImplemented), ] def testMissingNodeInfo(self): instance_list = self._FakeXmList(True) result = hv_xen._MergeInstanceInfo({}, instance_list) self.assertEqual(result, { "memory_dom0": 4096, "cpu_dom0": 7, }) def testWithNodeInfo(self): info = testutils.ReadTestData("xen-xm-info-4.0.1.txt") instance_list = self._FakeXmList(True) result = hv_xen._GetNodeInfo(info, instance_list) self.assertEqual(result, { "cpu_nodes": 1, "cpu_sockets": 2, "cpu_total": 4, "cpu_dom0": 7, "hv_version": (4, 0), "memory_dom0": 4096, "memory_free": 8004, "memory_hv": 2230, "memory_total": 16378, }) class TestGetConfigFileDiskData(unittest.TestCase): def testLetterCount(self): self.assertEqual(len(hv_xen._DISK_LETTERS), 26) def testNoDisks(self): self.assertEqual(hv_xen._GetConfigFileDiskData([], "hd"), []) def testManyDisks(self): for offset in [0, 1, 10]: disks = [(objects.Disk(dev_type=constants.DT_PLAIN), "/tmp/disk/%s" % idx) for idx in range(len(hv_xen._DISK_LETTERS) + offset)] if offset == 0: result = hv_xen._GetConfigFileDiskData(disks, "hd") self.assertEqual(result, [ "'phy:/tmp/disk/%s,hd%s,r'" % (idx, string.ascii_lowercase[idx]) for idx in range(len(hv_xen._DISK_LETTERS) + offset) ]) else: try: hv_xen._GetConfigFileDiskData(disks, "hd") except errors.HypervisorError, err: self.assertEqual(str(err), "Too many disks") else: self.fail("Exception was not raised") def testTwoLvDisksWithMode(self): disks = [ (objects.Disk(dev_type=constants.DT_PLAIN, mode=constants.DISK_RDWR), "/tmp/diskFirst"), (objects.Disk(dev_type=constants.DT_PLAIN, mode=constants.DISK_RDONLY), "/tmp/diskLast"), ] result = hv_xen._GetConfigFileDiskData(disks, "hd") self.assertEqual(result, [ "'phy:/tmp/diskFirst,hda,w'", "'phy:/tmp/diskLast,hdb,r'", ]) def testFileDisks(self): disks = [ (objects.Disk(dev_type=constants.DT_FILE, mode=constants.DISK_RDWR, physical_id=[constants.FD_LOOP]), "/tmp/diskFirst"), (objects.Disk(dev_type=constants.DT_FILE, mode=constants.DISK_RDONLY, physical_id=[constants.FD_BLKTAP]), "/tmp/diskTwo"), (objects.Disk(dev_type=constants.DT_FILE, mode=constants.DISK_RDWR, physical_id=[constants.FD_LOOP]), "/tmp/diskThree"), (objects.Disk(dev_type=constants.DT_FILE, mode=constants.DISK_RDONLY, physical_id=[constants.FD_BLKTAP2]), "/tmp/diskFour"), (objects.Disk(dev_type=constants.DT_FILE, mode=constants.DISK_RDWR, physical_id=[constants.FD_BLKTAP]), "/tmp/diskLast"), ] result = hv_xen._GetConfigFileDiskData(disks, "sd") self.assertEqual(result, [ "'file:/tmp/diskFirst,sda,w'", "'tap:aio:/tmp/diskTwo,sdb,r'", "'file:/tmp/diskThree,sdc,w'", "'tap2:tapdisk:aio:/tmp/diskFour,sdd,r'", "'tap:aio:/tmp/diskLast,sde,w'", ]) def testInvalidFileDisk(self): disks = [ (objects.Disk(dev_type=constants.DT_FILE, mode=constants.DISK_RDWR, physical_id=["#unknown#"]), "/tmp/diskinvalid"), ] self.assertRaises(KeyError, hv_xen._GetConfigFileDiskData, disks, "sd") class TestXenHypervisorRunXen(unittest.TestCase): XEN_SUB_CMD = "help" def testCommandUnknown(self): cmd = "#unknown command#" self.assertFalse(cmd in constants.KNOWN_XEN_COMMANDS) hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented, _run_cmd_fn=NotImplemented, _cmd=cmd) self.assertRaises(errors.ProgrammerError, hv._RunXen, [], None) def testCommandNoHvparams(self): hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented, _run_cmd_fn=NotImplemented) hvparams = None self.assertRaises(errors.HypervisorError, hv._RunXen, [self.XEN_SUB_CMD], hvparams) def testCommandFromHvparams(self): expected_xen_cmd = "xl" hvparams = {constants.HV_XEN_CMD: constants.XEN_CMD_XL} mock_run_cmd = mock.Mock() hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented, _run_cmd_fn=mock_run_cmd) hv._RunXen([self.XEN_SUB_CMD], hvparams=hvparams) mock_run_cmd.assert_called_with([expected_xen_cmd, self.XEN_SUB_CMD]) class TestXenHypervisorGetInstanceList(unittest.TestCase): RESULT_OK = utils.RunResult(0, None, "", "", "", None, None) XEN_LIST = "list" def testNoHvparams(self): expected_xen_cmd = "xm" mock_run_cmd = mock.Mock( return_value=self.RESULT_OK ) hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented, _run_cmd_fn=mock_run_cmd) self.assertRaises(errors.HypervisorError, hv._GetInstanceList, True, None) def testFromHvparams(self): expected_xen_cmd = "xl" hvparams = {constants.HV_XEN_CMD: constants.XEN_CMD_XL} mock_run_cmd = mock.Mock( return_value=self.RESULT_OK ) hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented, _run_cmd_fn=mock_run_cmd) hv._GetInstanceList(True, hvparams) mock_run_cmd.assert_called_with([expected_xen_cmd, self.XEN_LIST]) class TestXenHypervisorListInstances(unittest.TestCase): RESULT_OK = utils.RunResult(0, None, "", "", "", None, None) XEN_LIST = "list" def testNoHvparams(self): expected_xen_cmd = "xm" mock_run_cmd = mock.Mock( return_value=self.RESULT_OK ) hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented, _run_cmd_fn=mock_run_cmd) self.assertRaises(errors.HypervisorError, hv.ListInstances) def testHvparamsXl(self): expected_xen_cmd = "xl" hvparams = {constants.HV_XEN_CMD: constants.XEN_CMD_XL} mock_run_cmd = mock.Mock( return_value=self.RESULT_OK ) hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented, _run_cmd_fn=mock_run_cmd) hv.ListInstances(hvparams=hvparams) mock_run_cmd.assert_called_with([expected_xen_cmd, self.XEN_LIST]) class TestXenHypervisorCheckToolstack(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() self.cfg_name = "xen_config" self.cfg_path = utils.PathJoin(self.tmpdir, self.cfg_name) self.hv = hv_xen.XenHypervisor() def tearDown(self): shutil.rmtree(self.tmpdir) def testBinaryNotFound(self): RESULT_FAILED = utils.RunResult(1, None, "", "", "", None, None) mock_run_cmd = mock.Mock(return_value=RESULT_FAILED) hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented, _run_cmd_fn=mock_run_cmd) result = hv._CheckToolstackBinary("xl") self.assertFalse(result) def testCheckToolstackXlConfigured(self): RESULT_OK = utils.RunResult(0, None, "", "", "", None, None) mock_run_cmd = mock.Mock(return_value=RESULT_OK) hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented, _run_cmd_fn=mock_run_cmd) result = hv._CheckToolstackXlConfigured() self.assertTrue(result) def testCheckToolstackXlNotConfigured(self): RESULT_FAILED = utils.RunResult( 1, None, "", "ERROR: A different toolstack (xm) have been selected!", "", None, None) mock_run_cmd = mock.Mock(return_value=RESULT_FAILED) hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented, _run_cmd_fn=mock_run_cmd) result = hv._CheckToolstackXlConfigured() self.assertFalse(result) def testCheckToolstackXlFails(self): RESULT_FAILED = utils.RunResult( 1, None, "", "ERROR: The pink bunny hid the binary.", "", None, None) mock_run_cmd = mock.Mock(return_value=RESULT_FAILED) hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented, _run_cmd_fn=mock_run_cmd) self.assertRaises(errors.HypervisorError, hv._CheckToolstackXlConfigured) class TestXenHypervisorWriteConfigFile(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def testWriteError(self): cfgdir = utils.PathJoin(self.tmpdir, "foobar") hv = hv_xen.XenHypervisor(_cfgdir=cfgdir, _run_cmd_fn=NotImplemented, _cmd=NotImplemented) self.assertFalse(os.path.exists(cfgdir)) try: hv._WriteConfigFile("name", "data") except errors.HypervisorError, err: self.assertTrue(str(err).startswith("Cannot write Xen instance")) else: self.fail("Exception was not raised") class TestXenHypervisorVerify(unittest.TestCase): def setUp(self): output = testutils.ReadTestData("xen-xm-info-4.0.1.txt") self._result_ok = utils.RunResult(0, None, output, "", "", None, None) def testVerify(self): hvparams = {constants.HV_XEN_CMD : constants.XEN_CMD_XL} mock_run_cmd = mock.Mock(return_value=self._result_ok) hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented, _run_cmd_fn=mock_run_cmd) hv._CheckToolstack = mock.Mock(return_value=True) result = hv.Verify(hvparams) self.assertTrue(result is None) def testVerifyToolstackNotOk(self): hvparams = {constants.HV_XEN_CMD : constants.XEN_CMD_XL} mock_run_cmd = mock.Mock(return_value=self._result_ok) hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented, _run_cmd_fn=mock_run_cmd) hv._CheckToolstack = mock.Mock() hv._CheckToolstack.side_effect = errors.HypervisorError("foo") result = hv.Verify(hvparams) self.assertTrue(result is not None) def testVerifyFailing(self): result_failed = utils.RunResult(1, None, "", "", "", None, None) mock_run_cmd = mock.Mock(return_value=result_failed) hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented, _run_cmd_fn=mock_run_cmd) hv._CheckToolstack = mock.Mock(return_value=True) result = hv.Verify() self.assertTrue(result is not None) class _TestXenHypervisor(object): TARGET = NotImplemented CMD = NotImplemented HVNAME = NotImplemented VALID_HVPARAMS = {constants.HV_XEN_CMD: constants.XEN_CMD_XL} def setUp(self): super(_TestXenHypervisor, self).setUp() self.tmpdir = tempfile.mkdtemp() self.vncpw = "".join(random.sample(string.ascii_letters, 10)) self.vncpw_path = utils.PathJoin(self.tmpdir, "vncpw") utils.WriteFile(self.vncpw_path, data=self.vncpw) def tearDown(self): super(_TestXenHypervisor, self).tearDown() shutil.rmtree(self.tmpdir) def _GetHv(self, run_cmd=NotImplemented): return self.TARGET(_cfgdir=self.tmpdir, _run_cmd_fn=run_cmd, _cmd=self.CMD) def _SuccessCommand(self, stdout, cmd): self.assertEqual(cmd[0], self.CMD) return utils.RunResult(constants.EXIT_SUCCESS, None, stdout, "", None, NotImplemented, NotImplemented) def _FailingCommand(self, cmd): self.assertEqual(cmd[0], self.CMD) return utils.RunResult(constants.EXIT_FAILURE, None, "", "This command failed", None, NotImplemented, NotImplemented) def _FakeTcpPing(self, expected, result, target, port, **kwargs): self.assertEqual((target, port), expected) return result def testReadingNonExistentConfigFile(self): hv = self._GetHv() try: hv._ReadConfigFile("inst15780.example.com") except errors.HypervisorError, err: self.assertTrue(str(err).startswith("Failed to load Xen config file:")) else: self.fail("Exception was not raised") def testRemovingAutoConfigFile(self): name = "inst8206.example.com" cfgfile = utils.PathJoin(self.tmpdir, name) autodir = utils.PathJoin(self.tmpdir, "auto") autocfgfile = utils.PathJoin(autodir, name) os.mkdir(autodir) utils.WriteFile(autocfgfile, data="") hv = self._GetHv() self.assertTrue(os.path.isfile(autocfgfile)) hv._WriteConfigFile(name, "content") self.assertFalse(os.path.exists(autocfgfile)) self.assertEqual(utils.ReadFile(cfgfile), "content") def _XenList(self, cmd): self.assertEqual(cmd, [self.CMD, "list"]) # TODO: Use actual data from "xl" command output = testutils.ReadTestData("xen-xm-list-4.0.1-four-instances.txt") return self._SuccessCommand(output, cmd) def testGetInstanceInfo(self): hv = self._GetHv(run_cmd=self._XenList) (name, instid, memory, vcpus, state, runtime) = \ hv.GetInstanceInfo("server01.example.com") self.assertEqual(name, "server01.example.com") self.assertEqual(instid, 1) self.assertEqual(memory, 1024) self.assertEqual(vcpus, 1) self.assertEqual(state, "-b----") self.assertAlmostEqual(runtime, 167643.2) def testGetInstanceInfoDom0(self): hv = self._GetHv(run_cmd=self._XenList) # TODO: Not sure if this is actually used anywhere (can't find it), but the # code supports querying for Dom0 (name, instid, memory, vcpus, state, runtime) = \ hv.GetInstanceInfo(hv_xen._DOM0_NAME) self.assertEqual(name, "Domain-0") self.assertEqual(instid, 0) self.assertEqual(memory, 1023) self.assertEqual(vcpus, 1) self.assertEqual(state, "r-----") self.assertAlmostEqual(runtime, 154706.1) def testGetInstanceInfoUnknown(self): hv = self._GetHv(run_cmd=self._XenList) result = hv.GetInstanceInfo("unknown.example.com") self.assertTrue(result is None) def testGetAllInstancesInfo(self): hv = self._GetHv(run_cmd=self._XenList) result = hv.GetAllInstancesInfo() self.assertEqual(map(compat.fst, result), [ "server01.example.com", "web3106215069.example.com", "testinstance.example.com", ]) def testListInstances(self): hv = self._GetHv(run_cmd=self._XenList) self.assertEqual(hv.ListInstances(), [ "server01.example.com", "web3106215069.example.com", "testinstance.example.com", ]) def _StartInstanceCommand(self, inst, paused, failcreate, cmd): if cmd == [self.CMD, "info"]: output = testutils.ReadTestData("xen-xm-info-4.0.1.txt") elif cmd == [self.CMD, "list"]: output = testutils.ReadTestData("xen-xm-list-4.0.1-dom0-only.txt") elif cmd[:2] == [self.CMD, "create"]: args = cmd[2:] cfgfile = utils.PathJoin(self.tmpdir, inst.name) if paused: self.assertEqual(args, ["-p", cfgfile]) else: self.assertEqual(args, [cfgfile]) if failcreate: return self._FailingCommand(cmd) output = "" else: self.fail("Unhandled command: %s" % (cmd, )) return self._SuccessCommand(output, cmd) def _MakeInstance(self): # Copy default parameters bep = objects.FillDict(constants.BEC_DEFAULTS, {}) hvp = objects.FillDict(constants.HVC_DEFAULTS[self.HVNAME], {}) # Override default VNC password file path if constants.HV_VNC_PASSWORD_FILE in hvp: hvp[constants.HV_VNC_PASSWORD_FILE] = self.vncpw_path disks = [ (objects.Disk(dev_type=constants.DT_PLAIN, mode=constants.DISK_RDWR), utils.PathJoin(self.tmpdir, "disk0")), (objects.Disk(dev_type=constants.DT_PLAIN, mode=constants.DISK_RDONLY), utils.PathJoin(self.tmpdir, "disk1")), ] inst = objects.Instance(name="server01.example.com", hvparams=hvp, beparams=bep, osparams={}, nics=[], os="deb1", disks=map(compat.fst, disks)) inst.UpgradeConfig() return (inst, disks) def testStartInstance(self): (inst, disks) = self._MakeInstance() pathutils.LOG_XEN_DIR = self.tmpdir for failcreate in [False, True]: for paused in [False, True]: run_cmd = compat.partial(self._StartInstanceCommand, inst, paused, failcreate) hv = self._GetHv(run_cmd=run_cmd) # Ensure instance is not listed self.assertTrue(inst.name not in hv.ListInstances()) # Remove configuration cfgfile = utils.PathJoin(self.tmpdir, inst.name) utils.RemoveFile(cfgfile) if failcreate: self.assertRaises(errors.HypervisorError, hv.StartInstance, inst, disks, paused) # Check whether a stale config file is left behind self.assertFalse(os.path.exists(cfgfile)) else: hv.StartInstance(inst, disks, paused) # Check if configuration was updated lines = utils.ReadFile(cfgfile).splitlines() if constants.HV_VNC_PASSWORD_FILE in inst.hvparams: self.assertTrue(("vncpasswd = '%s'" % self.vncpw) in lines) else: extra = inst.hvparams[constants.HV_KERNEL_ARGS] self.assertTrue(("extra = '%s'" % extra) in lines) def _StopInstanceCommand(self, instance_name, force, fail, cmd): if (cmd == [self.CMD, "list"]): output = "Name ID Mem VCPUs State Time(s)\n" \ "Domain-0 0 1023 1 r----- 142691.0\n" \ "%s 417 128 1 r----- 3.2\n" % instance_name elif cmd[:2] == [self.CMD, "destroy"]: self.assertEqual(cmd[2:], [instance_name]) output = "" elif not force and cmd[:3] == [self.CMD, "shutdown", "-w"]: self.assertEqual(cmd[3:], [instance_name]) output = "" else: self.fail("Unhandled command: %s" % (cmd, )) if fail: # Simulate a failing command return self._FailingCommand(cmd) else: return self._SuccessCommand(output, cmd) def testStopInstance(self): name = "inst4284.example.com" cfgfile = utils.PathJoin(self.tmpdir, name) cfgdata = "config file content\n" for force in [False, True]: for fail in [False, True]: utils.WriteFile(cfgfile, data=cfgdata) run_cmd = compat.partial(self._StopInstanceCommand, name, force, fail) hv = self._GetHv(run_cmd=run_cmd) self.assertTrue(os.path.isfile(cfgfile)) if fail: try: hv._StopInstance(name, force, None) except errors.HypervisorError, err: self.assertTrue(str(err).startswith("listing instances failed"), msg=str(err)) else: self.fail("Exception was not raised") self.assertEqual(utils.ReadFile(cfgfile), cfgdata, msg=("Configuration was removed when stopping" " instance failed")) else: hv._StopInstance(name, force, None) self.assertFalse(os.path.exists(cfgfile)) def _MigrateNonRunningInstCmd(self, cmd): if cmd == [self.CMD, "list"]: output = testutils.ReadTestData("xen-xm-list-4.0.1-dom0-only.txt") else: self.fail("Unhandled command: %s" % (cmd, )) return self._SuccessCommand(output, cmd) def testMigrateInstanceNotRunning(self): name = "nonexistinginstance.example.com" target = constants.IP4_ADDRESS_LOCALHOST port = 14618 hv = self._GetHv(run_cmd=self._MigrateNonRunningInstCmd) for live in [False, True]: try: hv._MigrateInstance(NotImplemented, name, target, port, live, self.VALID_HVPARAMS, _ping_fn=NotImplemented) except errors.HypervisorError, err: self.assertEqual(str(err), "Instance not running, cannot migrate") else: self.fail("Exception was not raised") def _MigrateInstTargetUnreachCmd(self, cmd): if cmd == [self.CMD, "list"]: output = testutils.ReadTestData("xen-xm-list-4.0.1-four-instances.txt") else: self.fail("Unhandled command: %s" % (cmd, )) return self._SuccessCommand(output, cmd) def testMigrateTargetUnreachable(self): name = "server01.example.com" target = constants.IP4_ADDRESS_LOCALHOST port = 28349 hv = self._GetHv(run_cmd=self._MigrateInstTargetUnreachCmd) hvparams = {constants.HV_XEN_CMD: self.CMD} for live in [False, True]: if self.CMD == constants.XEN_CMD_XL: # TODO: Detect unreachable targets pass else: try: hv._MigrateInstance(NotImplemented, name, target, port, live, hvparams, _ping_fn=compat.partial(self._FakeTcpPing, (target, port), False)) except errors.HypervisorError, err: wanted = "Remote host %s not" % target self.assertTrue(str(err).startswith(wanted)) else: self.fail("Exception was not raised") def _MigrateInstanceCmd(self, cluster_name, instance_name, target, port, live, fail, cmd): if cmd == [self.CMD, "list"]: output = testutils.ReadTestData("xen-xm-list-4.0.1-four-instances.txt") elif cmd[:2] == [self.CMD, "migrate"]: if self.CMD == constants.XEN_CMD_XM: args = ["-p", str(port)] if live: args.append("-l") elif self.CMD == constants.XEN_CMD_XL: args = [ "-s", constants.XL_SSH_CMD % cluster_name, "-C", utils.PathJoin(self.tmpdir, instance_name), ] else: self.fail("Unknown Xen command '%s'" % self.CMD) args.extend([instance_name, target]) self.assertEqual(cmd[2:], args) if fail: return self._FailingCommand(cmd) output = "" else: self.fail("Unhandled command: %s" % (cmd, )) return self._SuccessCommand(output, cmd) def testMigrateInstance(self): clustername = "cluster.example.com" instname = "server01.example.com" target = constants.IP4_ADDRESS_LOCALHOST port = 22364 hvparams = {constants.HV_XEN_CMD: self.CMD} for live in [False, True]: for fail in [False, True]: ping_fn = \ testutils.CallCounter(compat.partial(self._FakeTcpPing, (target, port), True)) run_cmd = \ compat.partial(self._MigrateInstanceCmd, clustername, instname, target, port, live, fail) hv = self._GetHv(run_cmd=run_cmd) if fail: try: hv._MigrateInstance(clustername, instname, target, port, live, hvparams, _ping_fn=ping_fn) except errors.HypervisorError, err: self.assertTrue(str(err).startswith("Failed to migrate instance")) else: self.fail("Exception was not raised") else: hv._MigrateInstance(clustername, instname, target, port, live, hvparams, _ping_fn=ping_fn) if self.CMD == constants.XEN_CMD_XM: expected_pings = 1 else: expected_pings = 0 self.assertEqual(ping_fn.Count(), expected_pings) def _GetNodeInfoCmd(self, fail, cmd): if cmd == [self.CMD, "info"]: if fail: return self._FailingCommand(cmd) else: output = testutils.ReadTestData("xen-xm-info-4.0.1.txt") elif cmd == [self.CMD, "list"]: if fail: self.fail("'xm list' shouldn't be called when 'xm info' failed") else: output = testutils.ReadTestData("xen-xm-list-4.0.1-four-instances.txt") else: self.fail("Unhandled command: %s" % (cmd, )) return self._SuccessCommand(output, cmd) def testGetNodeInfo(self): run_cmd = compat.partial(self._GetNodeInfoCmd, False) hv = self._GetHv(run_cmd=run_cmd) result = hv.GetNodeInfo() self.assertEqual(result["hv_version"], (4, 0)) self.assertEqual(result["memory_free"], 8004) def testGetNodeInfoFailing(self): run_cmd = compat.partial(self._GetNodeInfoCmd, True) hv = self._GetHv(run_cmd=run_cmd) self.assertTrue(hv.GetNodeInfo() is None) def _MakeTestClass(cls, cmd): """Makes a class for testing. The returned class has structure as shown in the following pseudo code: class Test{cls.__name__}{cmd}(_TestXenHypervisor, unittest.TestCase): TARGET = {cls} CMD = {cmd} HVNAME = {Hypervisor name retrieved using class} @type cls: class @param cls: Hypervisor class to be tested @type cmd: string @param cmd: Hypervisor command @rtype: tuple @return: Class name and class object (not instance) """ name = "Test%sCmd%s" % (cls.__name__, cmd.title()) bases = (_TestXenHypervisor, unittest.TestCase) hvname = HVCLASS_TO_HVNAME[cls] return (name, type(name, bases, dict(TARGET=cls, CMD=cmd, HVNAME=hvname))) # Create test classes programmatically instead of manually to reduce the risk # of forgetting some combinations for cls in [hv_xen.XenPvmHypervisor, hv_xen.XenHvmHypervisor]: for cmd in constants.KNOWN_XEN_COMMANDS: (name, testcls) = _MakeTestClass(cls, cmd) assert name not in locals() locals()[name] = testcls if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.serializer_unittest.py0000744000000000000000000001046212244641676022351 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2008 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the serializer module""" import unittest from ganeti import serializer from ganeti import errors from ganeti import ht import testutils class TestSerializer(testutils.GanetiTestCase): """Serializer tests""" _TESTDATA = [ "test", 255, [1, 2, 3], (1, 2, 3), { "1": 2, "foo": "bar", }, ["abc", 1, 2, 3, 999, { "a1": ("Hello", "World"), "a2": "This is only a test", "a3": None, }, { "foo": "bar", }, ] ] def _TestSerializer(self, dump_fn, load_fn): for data in self._TESTDATA: self.failUnless(dump_fn(data).endswith("\n")) self.assertEqualValues(load_fn(dump_fn(data)), data) def testGeneric(self): self._TestSerializer(serializer.Dump, serializer.Load) def testSignedGeneric(self): self._TestSigned(serializer.DumpSigned, serializer.LoadSigned) def testJson(self): self._TestSerializer(serializer.DumpJson, serializer.LoadJson) def testSignedJson(self): self._TestSigned(serializer.DumpSignedJson, serializer.LoadSignedJson) def _TestSigned(self, dump_fn, load_fn): for data in self._TESTDATA: self.assertEqualValues(load_fn(dump_fn(data, "mykey"), "mykey"), (data, "")) self.assertEqualValues(load_fn(dump_fn(data, "myprivatekey", salt="mysalt"), "myprivatekey"), (data, "mysalt")) keydict = { "mykey_id": "myprivatekey", } self.assertEqualValues(load_fn(dump_fn(data, "myprivatekey", salt="mysalt", key_selector="mykey_id"), keydict.get), (data, "mysalt")) self.assertRaises(errors.SignatureError, load_fn, dump_fn(data, "myprivatekey", salt="mysalt", key_selector="mykey_id"), {}.get) self.assertRaises(errors.SignatureError, load_fn, dump_fn("test", "myprivatekey"), "myotherkey") self.assertRaises(errors.SignatureError, load_fn, serializer.DumpJson("This is a test"), "mykey") self.assertRaises(errors.SignatureError, load_fn, serializer.DumpJson({}), "mykey") # Message missing salt and HMAC tdata = { "msg": "Foo", } self.assertRaises(errors.SignatureError, load_fn, serializer.DumpJson(tdata), "mykey") class TestLoadAndVerifyJson(unittest.TestCase): def testNoJson(self): self.assertRaises(errors.ParseError, serializer.LoadAndVerifyJson, "", NotImplemented) self.assertRaises(errors.ParseError, serializer.LoadAndVerifyJson, "}", NotImplemented) def testVerificationFails(self): self.assertRaises(errors.ParseError, serializer.LoadAndVerifyJson, "{}", lambda _: False) verify_fn = ht.TListOf(ht.TNonEmptyString) try: serializer.LoadAndVerifyJson("{}", verify_fn) except errors.ParseError, err: self.assertTrue(str(err).endswith(str(verify_fn))) else: self.fail("Exception not raised") def testSuccess(self): self.assertEqual(serializer.LoadAndVerifyJson("{}", ht.TAny), {}) self.assertEqual(serializer.LoadAndVerifyJson("\"Foo\"", ht.TAny), "Foo") if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.outils_unittest.py0000744000000000000000000000575612244641676021531 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the outils module""" import unittest from ganeti import outils import testutils class SlotsAutoSlot(outils.AutoSlots): @classmethod def _GetSlots(mcs, attr): return attr["SLOTS"] class AutoSlotted(object): __metaclass__ = SlotsAutoSlot SLOTS = ["foo", "bar", "baz"] class TestAutoSlot(unittest.TestCase): def test(self): slotted = AutoSlotted() self.assertEqual(slotted.__slots__, AutoSlotted.SLOTS) class TestContainerToDicts(unittest.TestCase): def testUnknownType(self): for value in [None, 19410, "xyz"]: try: outils.ContainerToDicts(value) except TypeError, err: self.assertTrue(str(err).startswith("Unknown container type")) else: self.fail("Exception was not raised") def testEmptyDict(self): value = {} self.assertFalse(type(value) in outils._SEQUENCE_TYPES) self.assertEqual(outils.ContainerToDicts(value), {}) def testEmptySequences(self): for cls in [list, tuple, set, frozenset]: self.assertEqual(outils.ContainerToDicts(cls()), []) class _FakeWithFromDict: def FromDict(self, _): raise NotImplemented class TestContainerFromDicts(unittest.TestCase): def testUnknownType(self): for cls in [str, int, bool]: try: outils.ContainerFromDicts(None, cls, NotImplemented) except TypeError, err: self.assertTrue(str(err).startswith("Unknown container type")) else: self.fail("Exception was not raised") try: outils.ContainerFromDicts(None, cls(), NotImplemented) except TypeError, err: self.assertTrue(str(err).endswith("is not a type")) else: self.fail("Exception was not raised") def testEmptyDict(self): value = {} self.assertFalse(type(value) in outils._SEQUENCE_TYPES) self.assertEqual(outils.ContainerFromDicts(value, dict, NotImplemented), {}) def testEmptySequences(self): for cls in [list, tuple, set, frozenset]: self.assertEqual(outils.ContainerFromDicts([], cls, _FakeWithFromDict), cls()) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.utils_unittest.py0000744000000000000000000003115112244641676021336 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the utils module""" import errno import fcntl import glob import os import os.path import re import shutil import signal import socket import stat import tempfile import time import unittest import warnings import random import operator import testutils from ganeti import constants from ganeti import compat from ganeti import utils from ganeti import errors from ganeti.utils import RunCmd, \ FirstFree, \ RunParts class TestParseCpuMask(unittest.TestCase): """Test case for the ParseCpuMask function.""" def testWellFormed(self): self.assertEqual(utils.ParseCpuMask(""), []) self.assertEqual(utils.ParseCpuMask("1"), [1]) self.assertEqual(utils.ParseCpuMask("0-2,4,5-5"), [0,1,2,4,5]) def testInvalidInput(self): for data in ["garbage", "0,", "0-1-2", "2-1", "1-a"]: self.assertRaises(errors.ParseError, utils.ParseCpuMask, data) class TestParseMultiCpuMask(unittest.TestCase): """Test case for the ParseMultiCpuMask function.""" def testWellFormed(self): self.assertEqual(utils.ParseMultiCpuMask(""), []) self.assertEqual(utils.ParseMultiCpuMask("1"), [[1]]) self.assertEqual(utils.ParseMultiCpuMask("0-2,4,5-5"), [[0, 1, 2, 4, 5]]) self.assertEqual(utils.ParseMultiCpuMask("all"), [[-1]]) self.assertEqual(utils.ParseMultiCpuMask("0-2:all:4,6-8"), [[0, 1, 2], [-1], [4, 6, 7, 8]]) def testInvalidInput(self): for data in ["garbage", "0,", "0-1-2", "2-1", "1-a", "all-all"]: self.assertRaises(errors.ParseError, utils.ParseCpuMask, data) class TestGetMounts(unittest.TestCase): """Test case for GetMounts().""" TESTDATA = ( "rootfs / rootfs rw 0 0\n" "none /sys sysfs rw,nosuid,nodev,noexec,relatime 0 0\n" "none /proc proc rw,nosuid,nodev,noexec,relatime 0 0\n") def setUp(self): self.tmpfile = tempfile.NamedTemporaryFile() utils.WriteFile(self.tmpfile.name, data=self.TESTDATA) def testGetMounts(self): self.assertEqual(utils.GetMounts(filename=self.tmpfile.name), [ ("rootfs", "/", "rootfs", "rw"), ("none", "/sys", "sysfs", "rw,nosuid,nodev,noexec,relatime"), ("none", "/proc", "proc", "rw,nosuid,nodev,noexec,relatime"), ]) class TestFirstFree(unittest.TestCase): """Test case for the FirstFree function""" def test(self): """Test FirstFree""" self.failUnlessEqual(FirstFree([0, 1, 3]), 2) self.failUnlessEqual(FirstFree([]), None) self.failUnlessEqual(FirstFree([3, 4, 6]), 0) self.failUnlessEqual(FirstFree([3, 4, 6], base=3), 5) self.failUnlessRaises(AssertionError, FirstFree, [0, 3, 4, 6], base=3) class TestTimeFunctions(unittest.TestCase): """Test case for time functions""" def runTest(self): self.assertEqual(utils.SplitTime(1), (1, 0)) self.assertEqual(utils.SplitTime(1.5), (1, 500000)) self.assertEqual(utils.SplitTime(1218448917.4809151), (1218448917, 480915)) self.assertEqual(utils.SplitTime(123.48012), (123, 480120)) self.assertEqual(utils.SplitTime(123.9996), (123, 999600)) self.assertEqual(utils.SplitTime(123.9995), (123, 999500)) self.assertEqual(utils.SplitTime(123.9994), (123, 999400)) self.assertEqual(utils.SplitTime(123.999999999), (123, 999999)) self.assertRaises(AssertionError, utils.SplitTime, -1) self.assertEqual(utils.MergeTime((1, 0)), 1.0) self.assertEqual(utils.MergeTime((1, 500000)), 1.5) self.assertEqual(utils.MergeTime((1218448917, 500000)), 1218448917.5) self.assertEqual(round(utils.MergeTime((1218448917, 481000)), 3), 1218448917.481) self.assertEqual(round(utils.MergeTime((1, 801000)), 3), 1.801) self.assertRaises(AssertionError, utils.MergeTime, (0, -1)) self.assertRaises(AssertionError, utils.MergeTime, (0, 1000000)) self.assertRaises(AssertionError, utils.MergeTime, (0, 9999999)) self.assertRaises(AssertionError, utils.MergeTime, (-1, 0)) self.assertRaises(AssertionError, utils.MergeTime, (-9999, 0)) class FieldSetTestCase(unittest.TestCase): """Test case for FieldSets""" def testSimpleMatch(self): f = utils.FieldSet("a", "b", "c", "def") self.failUnless(f.Matches("a")) self.failIf(f.Matches("d"), "Substring matched") self.failIf(f.Matches("defghi"), "Prefix string matched") self.failIf(f.NonMatching(["b", "c"])) self.failIf(f.NonMatching(["a", "b", "c", "def"])) self.failUnless(f.NonMatching(["a", "d"])) def testRegexMatch(self): f = utils.FieldSet("a", "b([0-9]+)", "c") self.failUnless(f.Matches("b1")) self.failUnless(f.Matches("b99")) self.failIf(f.Matches("b/1")) self.failIf(f.NonMatching(["b12", "c"])) self.failUnless(f.NonMatching(["a", "1"])) class TestForceDictType(unittest.TestCase): """Test case for ForceDictType""" KEY_TYPES = { "a": constants.VTYPE_INT, "b": constants.VTYPE_BOOL, "c": constants.VTYPE_STRING, "d": constants.VTYPE_SIZE, "e": constants.VTYPE_MAYBE_STRING, } def _fdt(self, dict, allowed_values=None): if allowed_values is None: utils.ForceDictType(dict, self.KEY_TYPES) else: utils.ForceDictType(dict, self.KEY_TYPES, allowed_values=allowed_values) return dict def testSimpleDict(self): self.assertEqual(self._fdt({}), {}) self.assertEqual(self._fdt({"a": 1}), {"a": 1}) self.assertEqual(self._fdt({"a": "1"}), {"a": 1}) self.assertEqual(self._fdt({"a": 1, "b": 1}), {"a":1, "b": True}) self.assertEqual(self._fdt({"b": 1, "c": "foo"}), {"b": True, "c": "foo"}) self.assertEqual(self._fdt({"b": 1, "c": False}), {"b": True, "c": ""}) self.assertEqual(self._fdt({"b": "false"}), {"b": False}) self.assertEqual(self._fdt({"b": "False"}), {"b": False}) self.assertEqual(self._fdt({"b": False}), {"b": False}) self.assertEqual(self._fdt({"b": "true"}), {"b": True}) self.assertEqual(self._fdt({"b": "True"}), {"b": True}) self.assertEqual(self._fdt({"d": "4"}), {"d": 4}) self.assertEqual(self._fdt({"d": "4M"}), {"d": 4}) self.assertEqual(self._fdt({"e": None, }), {"e": None, }) self.assertEqual(self._fdt({"e": "Hello World", }), {"e": "Hello World", }) self.assertEqual(self._fdt({"e": False, }), {"e": "", }) self.assertEqual(self._fdt({"b": "hello", }, ["hello"]), {"b": "hello"}) def testErrors(self): self.assertRaises(errors.TypeEnforcementError, self._fdt, {"a": "astring"}) self.assertRaises(errors.TypeEnforcementError, self._fdt, {"b": "hello"}) self.assertRaises(errors.TypeEnforcementError, self._fdt, {"c": True}) self.assertRaises(errors.TypeEnforcementError, self._fdt, {"d": "astring"}) self.assertRaises(errors.TypeEnforcementError, self._fdt, {"d": "4 L"}) self.assertRaises(errors.TypeEnforcementError, self._fdt, {"e": object(), }) self.assertRaises(errors.TypeEnforcementError, self._fdt, {"e": [], }) self.assertRaises(errors.TypeEnforcementError, self._fdt, {"x": None, }) self.assertRaises(errors.TypeEnforcementError, self._fdt, []) self.assertRaises(errors.ProgrammerError, utils.ForceDictType, {"b": "hello"}, {"b": "no-such-type"}) class TestValidateServiceName(unittest.TestCase): def testValid(self): testnames = [ 0, 1, 2, 3, 1024, 65000, 65534, 65535, "ganeti", "gnt-masterd", "HELLO_WORLD_SVC", "hello.world.1", "0", "80", "1111", "65535", ] for name in testnames: self.assertEqual(utils.ValidateServiceName(name), name) def testInvalid(self): testnames = [ -15756, -1, 65536, 133428083, "", "Hello World!", "!", "'", "\"", "\t", "\n", "`", "-8546", "-1", "65536", (129 * "A"), ] for name in testnames: self.assertRaises(errors.OpPrereqError, utils.ValidateServiceName, name) class TestReadLockedPidFile(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def testNonExistent(self): path = utils.PathJoin(self.tmpdir, "nonexist") self.assert_(utils.ReadLockedPidFile(path) is None) def testUnlocked(self): path = utils.PathJoin(self.tmpdir, "pid") utils.WriteFile(path, data="123") self.assert_(utils.ReadLockedPidFile(path) is None) def testLocked(self): path = utils.PathJoin(self.tmpdir, "pid") utils.WriteFile(path, data="123") fl = utils.FileLock.Open(path) try: fl.Exclusive(blocking=True) self.assertEqual(utils.ReadLockedPidFile(path), 123) finally: fl.Close() self.assert_(utils.ReadLockedPidFile(path) is None) def testError(self): path = utils.PathJoin(self.tmpdir, "foobar", "pid") utils.WriteFile(utils.PathJoin(self.tmpdir, "foobar"), data="") # open(2) should return ENOTDIR self.assertRaises(EnvironmentError, utils.ReadLockedPidFile, path) class TestFindMatch(unittest.TestCase): def test(self): data = { "aaaa": "Four A", "bb": {"Two B": True}, re.compile(r"^x(foo|bar|bazX)([0-9]+)$"): (1, 2, 3), } self.assertEqual(utils.FindMatch(data, "aaaa"), ("Four A", [])) self.assertEqual(utils.FindMatch(data, "bb"), ({"Two B": True}, [])) for i in ["foo", "bar", "bazX"]: for j in range(1, 100, 7): self.assertEqual(utils.FindMatch(data, "x%s%s" % (i, j)), ((1, 2, 3), [i, str(j)])) def testNoMatch(self): self.assert_(utils.FindMatch({}, "") is None) self.assert_(utils.FindMatch({}, "foo") is None) self.assert_(utils.FindMatch({}, 1234) is None) data = { "X": "Hello World", re.compile("^(something)$"): "Hello World", } self.assert_(utils.FindMatch(data, "") is None) self.assert_(utils.FindMatch(data, "Hello World") is None) class TestTryConvert(unittest.TestCase): def test(self): for src, fn, result in [ ("1", int, 1), ("a", int, "a"), ("", bool, False), ("a", bool, True), ]: self.assertEqual(utils.TryConvert(fn, src), result) class TestVerifyDictOptions(unittest.TestCase): def setUp(self): self.defaults = { "first_key": "foobar", "foobar": { "key1": "value2", "key2": "value1", }, "another_key": "another_value", } def test(self): some_keys = { "first_key": "blubb", "foobar": { "key2": "foo", }, } utils.VerifyDictOptions(some_keys, self.defaults) def testInvalid(self): some_keys = { "invalid_key": "blubb", "foobar": { "key2": "foo", }, } self.assertRaises(errors.OpPrereqError, utils.VerifyDictOptions, some_keys, self.defaults) def testNestedInvalid(self): some_keys = { "foobar": { "key2": "foo", "key3": "blibb" }, } self.assertRaises(errors.OpPrereqError, utils.VerifyDictOptions, some_keys, self.defaults) def testMultiInvalid(self): some_keys = { "foobar": { "key1": "value3", "key6": "Right here", }, "invalid_with_sub": { "sub1": "value3", }, } self.assertRaises(errors.OpPrereqError, utils.VerifyDictOptions, some_keys, self.defaults) class TestValidateDeviceNames(unittest.TestCase): def testEmpty(self): utils.ValidateDeviceNames("NIC", []) utils.ValidateDeviceNames("disk", []) def testNoName(self): nics = [{}, {}] utils.ValidateDeviceNames("NIC", nics) def testInvalidName(self): self.assertRaises(errors.OpPrereqError, utils.ValidateDeviceNames, "disk", [{constants.IDISK_NAME: "42"}]) self.assertRaises(errors.OpPrereqError, utils.ValidateDeviceNames, "NIC", [{constants.INIC_NAME: "42"}]) def testUsedName(self): disks = [{constants.IDISK_NAME: "name1"}, {constants.IDISK_NAME: "name1"}] self.assertRaises(errors.OpPrereqError, utils.ValidateDeviceNames, "disk", disks) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.query_unittest.py0000744000000000000000000022506612271422343021341 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.query""" import re import unittest import random from ganeti import constants from ganeti import utils from ganeti import compat from ganeti import errors from ganeti import query from ganeti import objects from ganeti import cmdlib import ganeti.masterd.instance as gmi import testutils class TestConstants(unittest.TestCase): def test(self): self.assertEqual(set(query._VERIFY_FN.keys()), constants.QFT_ALL) class _QueryData: def __init__(self, data, **kwargs): self.data = data for name, value in kwargs.items(): setattr(self, name, value) def __iter__(self): return iter(self.data) def _GetDiskSize(nr, ctx, item): disks = item["disks"] try: return disks[nr] except IndexError: return query._FS_UNAVAIL class TestQuery(unittest.TestCase): def test(self): (STATIC, DISK) = range(10, 12) fielddef = query._PrepareFieldList([ (query._MakeField("name", "Name", constants.QFT_TEXT, "Name"), STATIC, 0, lambda ctx, item: item["name"]), (query._MakeField("master", "Master", constants.QFT_BOOL, "Master"), STATIC, 0, lambda ctx, item: ctx.mastername == item["name"]), ] + [(query._MakeField("disk%s.size" % i, "DiskSize%s" % i, constants.QFT_UNIT, "Disk size %s" % i), DISK, 0, compat.partial(_GetDiskSize, i)) for i in range(4)], []) q = query.Query(fielddef, ["name"]) self.assertEqual(q.RequestedData(), set([STATIC])) self.assertEqual(len(q._fields), 1) self.assertEqual(len(q.GetFields()), 1) self.assertEqual(q.GetFields()[0].ToDict(), objects.QueryFieldDefinition(name="name", title="Name", kind=constants.QFT_TEXT, doc="Name").ToDict()) # Create data only once query has been prepared data = [ { "name": "node1", "disks": [0, 1, 2], }, { "name": "node2", "disks": [3, 4], }, { "name": "node3", "disks": [5, 6, 7], }, ] self.assertEqual(q.Query(_QueryData(data, mastername="node3")), [[(constants.RS_NORMAL, "node1")], [(constants.RS_NORMAL, "node2")], [(constants.RS_NORMAL, "node3")]]) self.assertEqual(q.OldStyleQuery(_QueryData(data, mastername="node3")), [["node1"], ["node2"], ["node3"]]) q = query.Query(fielddef, ["name", "master"]) self.assertEqual(q.RequestedData(), set([STATIC])) self.assertEqual(len(q._fields), 2) self.assertEqual(q.Query(_QueryData(data, mastername="node3")), [[(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, False)], [(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, False)], [(constants.RS_NORMAL, "node3"), (constants.RS_NORMAL, True)], ]) q = query.Query(fielddef, ["name", "master", "disk0.size"]) self.assertEqual(q.RequestedData(), set([STATIC, DISK])) self.assertEqual(len(q._fields), 3) self.assertEqual(q.Query(_QueryData(data, mastername="node2")), [[(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, False), (constants.RS_NORMAL, 0)], [(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, True), (constants.RS_NORMAL, 3)], [(constants.RS_NORMAL, "node3"), (constants.RS_NORMAL, False), (constants.RS_NORMAL, 5)], ]) # With unknown column q = query.Query(fielddef, ["disk2.size", "disk1.size", "disk99.size", "disk0.size"]) self.assertEqual(q.RequestedData(), set([DISK])) self.assertEqual(len(q._fields), 4) self.assertEqual(q.Query(_QueryData(data, mastername="node2")), [[(constants.RS_NORMAL, 2), (constants.RS_NORMAL, 1), (constants.RS_UNKNOWN, None), (constants.RS_NORMAL, 0)], [(constants.RS_UNAVAIL, None), (constants.RS_NORMAL, 4), (constants.RS_UNKNOWN, None), (constants.RS_NORMAL, 3)], [(constants.RS_NORMAL, 7), (constants.RS_NORMAL, 6), (constants.RS_UNKNOWN, None), (constants.RS_NORMAL, 5)], ]) self.assertRaises(errors.OpPrereqError, q.OldStyleQuery, _QueryData(data, mastername="node2")) self.assertEqual([fdef.ToDict() for fdef in q.GetFields()], [ { "name": "disk2.size", "title": "DiskSize2", "kind": constants.QFT_UNIT, "doc": "Disk size 2", }, { "name": "disk1.size", "title": "DiskSize1", "kind": constants.QFT_UNIT, "doc": "Disk size 1", }, { "name": "disk99.size", "title": "disk99.size", "kind": constants.QFT_UNKNOWN, "doc": "Unknown field 'disk99.size'", }, { "name": "disk0.size", "title": "DiskSize0", "kind": constants.QFT_UNIT, "doc": "Disk size 0", }, ]) # Empty query q = query.Query(fielddef, []) self.assertEqual(q.RequestedData(), set([])) self.assertEqual(len(q._fields), 0) self.assertEqual(q.Query(_QueryData(data, mastername="node2")), [[], [], []]) self.assertEqual(q.OldStyleQuery(_QueryData(data, mastername="node2")), [[], [], []]) self.assertEqual(q.GetFields(), []) def testPrepareFieldList(self): # Duplicate titles for (a, b) in [("name", "name"), ("NAME", "name")]: self.assertRaises(AssertionError, query._PrepareFieldList, [ (query._MakeField("name", b, constants.QFT_TEXT, "Name"), None, 0, lambda *args: None), (query._MakeField("other", a, constants.QFT_TEXT, "Other"), None, 0, lambda *args: None), ], []) # Non-lowercase names self.assertRaises(AssertionError, query._PrepareFieldList, [ (query._MakeField("NAME", "Name", constants.QFT_TEXT, "Name"), None, 0, lambda *args: None), ], []) self.assertRaises(AssertionError, query._PrepareFieldList, [ (query._MakeField("Name", "Name", constants.QFT_TEXT, "Name"), None, 0, lambda *args: None), ], []) # Empty name self.assertRaises(AssertionError, query._PrepareFieldList, [ (query._MakeField("", "Name", constants.QFT_TEXT, "Name"), None, 0, lambda *args: None), ], []) # Empty title self.assertRaises(AssertionError, query._PrepareFieldList, [ (query._MakeField("name", "", constants.QFT_TEXT, "Name"), None, 0, lambda *args: None), ], []) # Whitespace in title self.assertRaises(AssertionError, query._PrepareFieldList, [ (query._MakeField("name", "Co lu mn", constants.QFT_TEXT, "Name"), None, 0, lambda *args: None), ], []) # No callable function self.assertRaises(AssertionError, query._PrepareFieldList, [ (query._MakeField("name", "Name", constants.QFT_TEXT, "Name"), None, 0, None), ], []) # Invalid documentation for doc in ["", ".", "Hello world\n", "Hello\nWo\nrld", "Hello World!", "HelloWorld.", "only lowercase", ",", " x y z .\t", " "]: self.assertRaises(AssertionError, query._PrepareFieldList, [ (query._MakeField("name", "Name", constants.QFT_TEXT, doc), None, 0, lambda *args: None), ], []) # Duplicate field name self.assertRaises(ValueError, query._PrepareFieldList, [ (query._MakeField("name", "Name", constants.QFT_TEXT, "Name"), None, 0, lambda *args: None), (query._MakeField("name", "Other", constants.QFT_OTHER, "Other"), None, 0, lambda *args: None), ], []) def testUnknown(self): fielddef = query._PrepareFieldList([ (query._MakeField("name", "Name", constants.QFT_TEXT, "Name"), None, 0, lambda _, item: "name%s" % item), (query._MakeField("other0", "Other0", constants.QFT_TIMESTAMP, "Other"), None, 0, lambda *args: 1234), (query._MakeField("nodata", "NoData", constants.QFT_NUMBER, "No data"), None, 0, lambda *args: query._FS_NODATA ), (query._MakeField("unavail", "Unavail", constants.QFT_BOOL, "Unavail"), None, 0, lambda *args: query._FS_UNAVAIL), ], []) for selected in [["foo"], ["Hello", "World"], ["name1", "other", "foo"]]: q = query.Query(fielddef, selected) self.assertEqual(len(q._fields), len(selected)) self.assert_(compat.all(len(row) == len(selected) for row in q.Query(_QueryData(range(1, 10))))) self.assertEqual(q.Query(_QueryData(range(1, 10))), [[(constants.RS_UNKNOWN, None)] * len(selected) for i in range(1, 10)]) self.assertEqual([fdef.ToDict() for fdef in q.GetFields()], [{ "name": name, "title": name, "kind": constants.QFT_UNKNOWN, "doc": "Unknown field '%s'" % name} for name in selected]) q = query.Query(fielddef, ["name", "other0", "nodata", "unavail"]) self.assertEqual(len(q._fields), 4) self.assertEqual(q.OldStyleQuery(_QueryData(range(1, 10))), [ ["name%s" % i, 1234, None, None] for i in range(1, 10) ]) q = query.Query(fielddef, ["name", "other0", "nodata", "unavail", "unk"]) self.assertEqual(len(q._fields), 5) self.assertEqual(q.Query(_QueryData(range(1, 10))), [[(constants.RS_NORMAL, "name%s" % i), (constants.RS_NORMAL, 1234), (constants.RS_NODATA, None), (constants.RS_UNAVAIL, None), (constants.RS_UNKNOWN, None)] for i in range(1, 10)]) def testAliases(self): fields = [ (query._MakeField("a", "a-title", constants.QFT_TEXT, "Field A"), None, 0, lambda *args: None), (query._MakeField("b", "b-title", constants.QFT_TEXT, "Field B"), None, 0, lambda *args: None), ] # duplicate field self.assertRaises(AssertionError, query._PrepareFieldList, fields, [("b", "a")]) self.assertRaises(AssertionError, query._PrepareFieldList, fields, [("c", "b"), ("c", "a")]) # missing target self.assertRaises(AssertionError, query._PrepareFieldList, fields, [("c", "d")]) fdefs = query._PrepareFieldList(fields, [("c", "b")]) self.assertEqual(len(fdefs), 3) self.assertEqual(fdefs["b"][1:], fdefs["c"][1:]) class TestGetNodeRole(unittest.TestCase): def test(self): tested_role = set() master_uuid = "969502b9-f632-4d3d-83a5-a78b0ca8cdf6" node_uuid = "d75499b5-83e3-4b80-b6fe-3e1aee7e5a35" checks = [ (constants.NR_MASTER, objects.Node(name="node1", uuid=master_uuid)), (constants.NR_MCANDIDATE, objects.Node(name="node1", uuid=node_uuid, master_candidate=True)), (constants.NR_REGULAR, objects.Node(name="node1", uuid=node_uuid)), (constants.NR_DRAINED, objects.Node(name="node1", uuid=node_uuid, drained=True)), (constants.NR_OFFLINE, objects.Node(name="node1", uuid=node_uuid, offline=True)), ] for (role, node) in checks: result = query._GetNodeRole(node, master_uuid) self.assertEqual(result, role) tested_role.add(result) self.assertEqual(tested_role, constants.NR_ALL) class TestNodeQuery(unittest.TestCase): def _Create(self, selected): return query.Query(query.NODE_FIELDS, selected) def testSimple(self): cluster = objects.Cluster(cluster_name="testcluster", ndparams=constants.NDC_DEFAULTS.copy()) grp1 = objects.NodeGroup(name="default", uuid="c0e89160-18e7-11e0-a46e-001d0904baeb", alloc_policy=constants.ALLOC_POLICY_PREFERRED, ipolicy=objects.MakeEmptyIPolicy(), ndparams={}, ) grp2 = objects.NodeGroup(name="group2", uuid="c0e89160-18e7-11e0-a46e-001d0904babe", alloc_policy=constants.ALLOC_POLICY_PREFERRED, ipolicy=objects.MakeEmptyIPolicy(), ndparams={constants.ND_SPINDLE_COUNT: 2}, ) groups = {grp1.uuid: grp1, grp2.uuid: grp2} nodes = [ objects.Node(name="node1", drained=False, group=grp1.uuid, ndparams={}), objects.Node(name="node2", drained=True, group=grp2.uuid, ndparams={}), objects.Node(name="node3", drained=False, group=grp1.uuid, ndparams={constants.ND_SPINDLE_COUNT: 4}), ] for live_data in [None, dict.fromkeys([node.name for node in nodes], {})]: nqd = query.NodeQueryData(nodes, live_data, None, None, None, None, groups, None, cluster) q = self._Create(["name", "drained"]) self.assertEqual(q.RequestedData(), set([query.NQ_CONFIG])) self.assertEqual(q.Query(nqd), [[(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, False)], [(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, True)], [(constants.RS_NORMAL, "node3"), (constants.RS_NORMAL, False)], ]) self.assertEqual(q.OldStyleQuery(nqd), [["node1", False], ["node2", True], ["node3", False]]) q = self._Create(["ndp/spindle_count"]) self.assertEqual(q.RequestedData(), set([query.NQ_GROUP])) self.assertEqual(q.Query(nqd), [[(constants.RS_NORMAL, constants.NDC_DEFAULTS[constants.ND_SPINDLE_COUNT])], [(constants.RS_NORMAL, grp2.ndparams[constants.ND_SPINDLE_COUNT])], [(constants.RS_NORMAL, nodes[2].ndparams[constants.ND_SPINDLE_COUNT])], ]) def test(self): selected = query.NODE_FIELDS.keys() field_index = dict((field, idx) for idx, field in enumerate(selected)) q = self._Create(selected) self.assertEqual(q.RequestedData(), set([query.NQ_CONFIG, query.NQ_LIVE, query.NQ_INST, query.NQ_GROUP, query.NQ_OOB])) cluster = objects.Cluster(cluster_name="testcluster", hvparams=constants.HVC_DEFAULTS, beparams={ constants.PP_DEFAULT: constants.BEC_DEFAULTS, }, nicparams={ constants.PP_DEFAULT: constants.NICC_DEFAULTS, }, ndparams=constants.NDC_DEFAULTS, ) node_names = ["node%s" % i for i in range(20)] master_name = node_names[3] nodes = [ objects.Node(name=name, primary_ip="192.0.2.%s" % idx, secondary_ip="192.0.100.%s" % idx, serial_no=7789 * idx, master_candidate=(name != master_name and idx % 3 == 0), offline=False, drained=False, powered=True, vm_capable=True, master_capable=False, ndparams={}, group="default", ctime=1290006900, mtime=1290006913, uuid="fd9ccebe-6339-43c9-a82e-94bbe575%04d" % idx) for idx, name in enumerate(node_names) ] master_node = nodes[3] master_node.AddTag("masternode") master_node.AddTag("another") master_node.AddTag("tag") master_node.ctime = None master_node.mtime = None assert master_node.name == master_name live_data_node = nodes[4] assert live_data_node.name != master_name fake_live_data = { "bootid": "a2504766-498e-4b25-b21e-d23098dc3af4", "cnodes": 4, "cnos": 3, "csockets": 4, "ctotal": 8, "mnode": 128, "mfree": 100, "mtotal": 4096, "dfree": 5 * 1024 * 1024, "dtotal": 100 * 1024 * 1024, "spfree": 0, "sptotal": 0, } assert (sorted(query._NODE_LIVE_FIELDS.keys()) == sorted(fake_live_data.keys())) live_data = dict.fromkeys([node.uuid for node in nodes], {}) live_data[live_data_node.uuid] = \ dict((query._NODE_LIVE_FIELDS[name][2], value) for name, value in fake_live_data.items()) node_to_primary_uuid = dict((node.uuid, set()) for node in nodes) node_to_primary_uuid[master_node.uuid].update(["inst1", "inst2"]) node_to_secondary_uuid = dict((node.uuid, set()) for node in nodes) node_to_secondary_uuid[live_data_node.uuid].update(["instX", "instY", "instZ"]) inst_uuid_to_inst_name = { "inst1": "inst1-name", "inst2": "inst2-name", "instX": "instX-name", "instY": "instY-name", "instZ": "instZ-name" } ng_uuid = "492b4b74-8670-478a-b98d-4c53a76238e6" groups = { ng_uuid: objects.NodeGroup(name="ng1", uuid=ng_uuid, ndparams={}), } oob_not_powered_node = nodes[0] oob_not_powered_node.powered = False oob_support = dict((node.uuid, False) for node in nodes) oob_support[master_node.uuid] = True oob_support[oob_not_powered_node.uuid] = True master_node.group = ng_uuid nqd = query.NodeQueryData(nodes, live_data, master_node.uuid, node_to_primary_uuid, node_to_secondary_uuid, inst_uuid_to_inst_name, groups, oob_support, cluster) result = q.Query(nqd) self.assert_(compat.all(len(row) == len(selected) for row in result)) self.assertEqual([row[field_index["name"]] for row in result], [(constants.RS_NORMAL, name) for name in node_names]) node_to_row = dict((row[field_index["name"]][1], idx) for idx, row in enumerate(result)) master_row = result[node_to_row[master_name]] self.assert_(master_row[field_index["master"]]) self.assert_(master_row[field_index["role"]], "M") self.assertEqual(master_row[field_index["group"]], (constants.RS_NORMAL, "ng1")) self.assertEqual(master_row[field_index["group.uuid"]], (constants.RS_NORMAL, ng_uuid)) self.assertEqual(master_row[field_index["ctime"]], (constants.RS_UNAVAIL, None)) self.assertEqual(master_row[field_index["mtime"]], (constants.RS_UNAVAIL, None)) self.assert_(row[field_index["pip"]] == node.primary_ip and row[field_index["sip"]] == node.secondary_ip and set(row[field_index["tags"]]) == node.GetTags() and row[field_index["serial_no"]] == node.serial_no and row[field_index["role"]] == query._GetNodeRole(node, master_name) and (node.name == master_name or (row[field_index["group"]] == "" and row[field_index["group.uuid"]] is None and row[field_index["ctime"]] == (constants.RS_NORMAL, node.ctime) and row[field_index["mtime"]] == (constants.RS_NORMAL, node.mtime) and row[field_index["powered"]] == (constants.RS_NORMAL, True))) or (node.name == oob_not_powered_node and row[field_index["powered"]] == (constants.RS_NORMAL, False)) or row[field_index["powered"]] == (constants.RS_UNAVAIL, None) for row, node in zip(result, nodes)) live_data_row = result[node_to_row[live_data_node.name]] for (field, value) in fake_live_data.items(): self.assertEqual(live_data_row[field_index[field]], (constants.RS_NORMAL, value)) self.assertEqual(master_row[field_index["pinst_cnt"]], (constants.RS_NORMAL, 2)) self.assertEqual(live_data_row[field_index["sinst_cnt"]], (constants.RS_NORMAL, 3)) self.assertEqual(master_row[field_index["pinst_list"]], (constants.RS_NORMAL, [inst_uuid_to_inst_name[uuid] for uuid in node_to_primary_uuid[master_node.uuid]])) self.assertEqual(live_data_row[field_index["sinst_list"]], (constants.RS_NORMAL, utils.NiceSort( [inst_uuid_to_inst_name[uuid] for uuid in node_to_secondary_uuid[live_data_node.uuid]]))) def testGetLiveNodeField(self): nodes = [ objects.Node(name="node1", drained=False, offline=False, vm_capable=True), objects.Node(name="node2", drained=True, offline=False, vm_capable=True), objects.Node(name="node3", drained=False, offline=False, vm_capable=True), objects.Node(name="node4", drained=False, offline=True, vm_capable=True), objects.Node(name="node5", drained=False, offline=False, vm_capable=False), ] live_data = dict.fromkeys([node.name for node in nodes], {}) # No data nqd = query.NodeQueryData(None, None, None, None, None, None, None, None, None) self.assertEqual(query._GetLiveNodeField("hello", constants.QFT_NUMBER, nqd, nodes[0]), query._FS_NODATA) # Missing field ctx = _QueryData(None, curlive_data={ "some": 1, "other": 2, }) self.assertEqual(query._GetLiveNodeField("hello", constants.QFT_NUMBER, ctx, nodes[0]), query._FS_UNAVAIL) # Wrong format/datatype ctx = _QueryData(None, curlive_data={ "hello": ["Hello World"], "other": 2, }) self.assertEqual(query._GetLiveNodeField("hello", constants.QFT_NUMBER, ctx, nodes[0]), query._FS_UNAVAIL) # Offline node assert nodes[3].offline ctx = _QueryData(None, curlive_data={}) self.assertEqual(query._GetLiveNodeField("hello", constants.QFT_NUMBER, ctx, nodes[3]), query._FS_OFFLINE, None) # Wrong field type ctx = _QueryData(None, curlive_data={"hello": 123}) self.assertRaises(AssertionError, query._GetLiveNodeField, "hello", constants.QFT_BOOL, ctx, nodes[0]) # Non-vm_capable node assert not nodes[4].vm_capable ctx = _QueryData(None, curlive_data={}) self.assertEqual(query._GetLiveNodeField("hello", constants.QFT_NUMBER, ctx, nodes[4]), query._FS_UNAVAIL, None) class TestInstanceQuery(unittest.TestCase): def _Create(self, selected): return query.Query(query.INSTANCE_FIELDS, selected) def testSimple(self): q = self._Create(["name", "be/maxmem", "ip"]) self.assertEqual(q.RequestedData(), set([query.IQ_CONFIG])) cluster = objects.Cluster(cluster_name="testcluster", hvparams=constants.HVC_DEFAULTS, beparams={ constants.PP_DEFAULT: constants.BEC_DEFAULTS, }, nicparams={ constants.PP_DEFAULT: constants.NICC_DEFAULTS, }, os_hvp={}, osparams={}) instances = [ objects.Instance(name="inst1", hvparams={}, beparams={}, osparams={}, nics=[], os="deb1"), objects.Instance(name="inst2", hvparams={}, nics=[], osparams={}, os="foomoo", beparams={ constants.BE_MAXMEM: 512, }), objects.Instance(name="inst3", hvparams={}, beparams={}, osparams={}, os="dos", nics=[objects.NIC(ip="192.0.2.99", nicparams={})]), ] iqd = query.InstanceQueryData(instances, cluster, None, [], [], {}, set(), {}, None, None, None) self.assertEqual(q.Query(iqd), [[(constants.RS_NORMAL, "inst1"), (constants.RS_NORMAL, 128), (constants.RS_UNAVAIL, None), ], [(constants.RS_NORMAL, "inst2"), (constants.RS_NORMAL, 512), (constants.RS_UNAVAIL, None), ], [(constants.RS_NORMAL, "inst3"), (constants.RS_NORMAL, 128), (constants.RS_NORMAL, "192.0.2.99"), ]]) self.assertEqual(q.OldStyleQuery(iqd), [["inst1", 128, None], ["inst2", 512, None], ["inst3", 128, "192.0.2.99"]]) def test(self): selected = query.INSTANCE_FIELDS.keys() fieldidx = dict((field, idx) for idx, field in enumerate(selected)) macs = ["00:11:22:%02x:%02x:%02x" % (i % 255, i % 3, (i * 123) % 255) for i in range(20)] q = self._Create(selected) self.assertEqual(q.RequestedData(), set([query.IQ_CONFIG, query.IQ_LIVE, query.IQ_DISKUSAGE, query.IQ_CONSOLE, query.IQ_NODES, query.IQ_NETWORKS])) cluster = objects.Cluster(cluster_name="testcluster", hvparams=constants.HVC_DEFAULTS, beparams={ constants.PP_DEFAULT: constants.BEC_DEFAULTS, }, nicparams={ constants.PP_DEFAULT: constants.NICC_DEFAULTS, }, os_hvp={}, tcpudp_port_pool=set(), osparams={ "deb99": { "clean_install": "yes", }, }) offline_nodes = ["nodeoff1-uuid", "nodeoff2-uuid"] bad_nodes = ["nodebad1-uuid", "nodebad2-uuid", "nodebad3-uuid"] +\ offline_nodes node_uuids = ["node%s-uuid" % i for i in range(10)] + bad_nodes instances = [ objects.Instance(name="inst1", hvparams={}, beparams={}, nics=[], uuid="inst1-uuid", ctime=1291244000, mtime=1291244400, serial_no=30, admin_state=constants.ADMINST_UP, hypervisor=constants.HT_XEN_PVM, os="linux1", primary_node="node1-uuid", disk_template=constants.DT_PLAIN, disks=[], disks_active=True, osparams={}), objects.Instance(name="inst2", hvparams={}, nics=[], uuid="inst2-uuid", ctime=1291211000, mtime=1291211077, serial_no=1, admin_state=constants.ADMINST_UP, hypervisor=constants.HT_XEN_HVM, os="deb99", primary_node="node5-uuid", disk_template=constants.DT_DISKLESS, disks=[], disks_active=True, beparams={ constants.BE_MAXMEM: 512, constants.BE_MINMEM: 256, }, osparams={}), objects.Instance(name="inst3", hvparams={}, beparams={}, uuid="inst3-uuid", ctime=1291011000, mtime=1291013000, serial_no=1923, admin_state=constants.ADMINST_DOWN, hypervisor=constants.HT_KVM, os="busybox", primary_node="node6-uuid", disk_template=constants.DT_DRBD8, disks=[], disks_active=False, nics=[ objects.NIC(ip="192.0.2.99", mac=macs.pop(), nicparams={ constants.NIC_LINK: constants.DEFAULT_BRIDGE, }), objects.NIC(ip=None, mac=macs.pop(), nicparams={}), ], osparams={}), objects.Instance(name="inst4", hvparams={}, beparams={}, uuid="inst4-uuid", ctime=1291244390, mtime=1291244395, serial_no=25, admin_state=constants.ADMINST_DOWN, hypervisor=constants.HT_XEN_PVM, os="linux1", primary_node="nodeoff2-uuid", disk_template=constants.DT_DRBD8, disks=[], disks_active=True, nics=[ objects.NIC(ip="192.0.2.1", mac=macs.pop(), nicparams={ constants.NIC_LINK: constants.DEFAULT_BRIDGE, }), objects.NIC(ip="192.0.2.2", mac=macs.pop(), nicparams={}), objects.NIC(ip="192.0.2.3", mac=macs.pop(), nicparams={ constants.NIC_MODE: constants.NIC_MODE_ROUTED, }), objects.NIC(ip="192.0.2.4", mac=macs.pop(), nicparams={ constants.NIC_MODE: constants.NIC_MODE_BRIDGED, constants.NIC_LINK: "eth123", }), ], osparams={}), objects.Instance(name="inst5", hvparams={}, nics=[], uuid="inst5-uuid", ctime=1231211000, mtime=1261200000, serial_no=3, admin_state=constants.ADMINST_UP, hypervisor=constants.HT_XEN_HVM, os="deb99", primary_node="nodebad2-uuid", disk_template=constants.DT_DISKLESS, disks=[], disks_active=True, beparams={ constants.BE_MAXMEM: 512, constants.BE_MINMEM: 512, }, osparams={}), objects.Instance(name="inst6", hvparams={}, nics=[], uuid="inst6-uuid", ctime=7513, mtime=11501, serial_no=13390, admin_state=constants.ADMINST_DOWN, hypervisor=constants.HT_XEN_HVM, os="deb99", primary_node="node7-uuid", disk_template=constants.DT_DISKLESS, disks=[], disks_active=False, beparams={ constants.BE_MAXMEM: 768, constants.BE_MINMEM: 256, }, osparams={ "clean_install": "no", }), objects.Instance(name="inst7", hvparams={}, nics=[], uuid="inst7-uuid", ctime=None, mtime=None, serial_no=1947, admin_state=constants.ADMINST_DOWN, hypervisor=constants.HT_XEN_HVM, os="deb99", primary_node="node6-uuid", disk_template=constants.DT_DISKLESS, disks=[], disks_active=False, beparams={}, osparams={}), objects.Instance(name="inst8", hvparams={}, nics=[], uuid="inst8-uuid", ctime=None, mtime=None, serial_no=19478, admin_state=constants.ADMINST_OFFLINE, hypervisor=constants.HT_XEN_HVM, os="deb99", primary_node="node6-uuid", disk_template=constants.DT_DISKLESS, disks=[], disks_active=False, beparams={}, osparams={}), ] assert not utils.FindDuplicates(inst.uuid for inst in instances) assert not utils.FindDuplicates(inst.name for inst in instances) instbyname = dict((inst.name, inst) for inst in instances) disk_usage = dict((inst.uuid, gmi.ComputeDiskSize(inst.disk_template, [{"size": disk.size} for disk in inst.disks])) for inst in instances) inst_bridges = { "inst3-uuid": [constants.DEFAULT_BRIDGE, constants.DEFAULT_BRIDGE], "inst4-uuid": [constants.DEFAULT_BRIDGE, constants.DEFAULT_BRIDGE, None, "eth123"], } live_data = { "inst2-uuid": { "vcpus": 3, }, "inst4-uuid": { "memory": 123, }, "inst6-uuid": { "memory": 768, }, "inst7-uuid": { "vcpus": 3, }, } wrongnode_inst = set(["inst7-uuid"]) consinfo = dict((inst.uuid, None) for inst in instances) consinfo["inst7-uuid"] = \ objects.InstanceConsole(instance="inst7", kind=constants.CONS_SSH, host=instbyname["inst7"].primary_node, user="root", command=["hostname"]).ToDict() nodes = dict([(uuid, objects.Node( name="%s.example.com" % uuid, uuid=uuid, group="default-uuid")) for uuid in node_uuids]) iqd = query.InstanceQueryData(instances, cluster, disk_usage, offline_nodes, bad_nodes, live_data, wrongnode_inst, consinfo, nodes, {}, {}) result = q.Query(iqd) self.assertEqual(len(result), len(instances)) self.assert_(compat.all(len(row) == len(selected) for row in result)) assert len(set(bad_nodes) & set(offline_nodes)) == len(offline_nodes), \ "Offline nodes not included in bad nodes" tested_status = set() for (inst, row) in zip(instances, result): assert inst.primary_node in node_uuids self.assertEqual(row[fieldidx["name"]], (constants.RS_NORMAL, inst.name)) if inst.primary_node in offline_nodes: exp_status = constants.INSTST_NODEOFFLINE elif inst.primary_node in bad_nodes: exp_status = constants.INSTST_NODEDOWN elif inst.uuid in live_data: if inst.uuid in wrongnode_inst: exp_status = constants.INSTST_WRONGNODE elif inst.admin_state == constants.ADMINST_UP: exp_status = constants.INSTST_RUNNING else: exp_status = constants.INSTST_ERRORUP elif inst.admin_state == constants.ADMINST_UP: exp_status = constants.INSTST_ERRORDOWN elif inst.admin_state == constants.ADMINST_DOWN: exp_status = constants.INSTST_ADMINDOWN else: exp_status = constants.INSTST_ADMINOFFLINE self.assertEqual(row[fieldidx["status"]], (constants.RS_NORMAL, exp_status)) (_, status) = row[fieldidx["status"]] tested_status.add(status) #FIXME(dynmem): check oper_ram vs min/max mem for (field, livefield) in [("oper_vcpus", "vcpus")]: if inst.primary_node in bad_nodes: exp = (constants.RS_NODATA, None) elif inst.uuid in live_data: value = live_data[inst.uuid].get(livefield, None) if value is None: exp = (constants.RS_UNAVAIL, None) else: exp = (constants.RS_NORMAL, value) else: exp = (constants.RS_UNAVAIL, None) self.assertEqual(row[fieldidx[field]], exp) bridges = inst_bridges.get(inst.uuid, []) self.assertEqual(row[fieldidx["nic.bridges"]], (constants.RS_NORMAL, bridges)) if bridges: self.assertEqual(row[fieldidx["bridge"]], (constants.RS_NORMAL, bridges[0])) else: self.assertEqual(row[fieldidx["bridge"]], (constants.RS_UNAVAIL, None)) for i in range(constants.MAX_NICS): if i < len(bridges) and bridges[i] is not None: exp = (constants.RS_NORMAL, bridges[i]) else: exp = (constants.RS_UNAVAIL, None) self.assertEqual(row[fieldidx["nic.bridge/%s" % i]], exp) if inst.primary_node in bad_nodes: exp = (constants.RS_NODATA, None) else: exp = (constants.RS_NORMAL, inst.uuid in live_data) self.assertEqual(row[fieldidx["oper_state"]], exp) cust_exp = (constants.RS_NORMAL, {}) if inst.os == "deb99": if inst.uuid == "inst6-uuid": exp = (constants.RS_NORMAL, {"clean_install": "no"}) cust_exp = exp else: exp = (constants.RS_NORMAL, {"clean_install": "yes"}) else: exp = (constants.RS_NORMAL, {}) self.assertEqual(row[fieldidx["osparams"]], exp) self.assertEqual(row[fieldidx["custom_osparams"]], cust_exp) usage = disk_usage[inst.uuid] if usage is None: usage = 0 self.assertEqual(row[fieldidx["disk_usage"]], (constants.RS_NORMAL, usage)) for alias, target in [("sda_size", "disk.size/0"), ("sdb_size", "disk.size/1"), ("vcpus", "be/vcpus"), ("ip", "nic.ip/0"), ("mac", "nic.mac/0"), ("bridge", "nic.bridge/0"), ("nic_mode", "nic.mode/0"), ("nic_link", "nic.link/0"), ]: self.assertEqual(row[fieldidx[alias]], row[fieldidx[target]]) for field in ["ctime", "mtime"]: if getattr(inst, field) is None: # No ctime/mtime exp = (constants.RS_UNAVAIL, None) else: exp = (constants.RS_NORMAL, getattr(inst, field)) self.assertEqual(row[fieldidx[field]], exp) self._CheckInstanceConsole(inst, row[fieldidx["console"]]) # Ensure all possible status' have been tested self.assertEqual(tested_status, constants.INSTST_ALL) def _CheckInstanceConsole(self, instance, (status, consdata)): if instance.name == "inst7": self.assertEqual(status, constants.RS_NORMAL) console = objects.InstanceConsole.FromDict(consdata) self.assertTrue(console.Validate()) self.assertEqual(console.host, instance.primary_node) else: self.assertEqual(status, constants.RS_UNAVAIL) class TestGroupQuery(unittest.TestCase): def setUp(self): self.custom_diskparams = { constants.DT_DRBD8: { constants.DRBD_DEFAULT_METAVG: "foobar", }, } self.groups = [ objects.NodeGroup(name="default", uuid="c0e89160-18e7-11e0-a46e-001d0904baeb", alloc_policy=constants.ALLOC_POLICY_PREFERRED, ipolicy=objects.MakeEmptyIPolicy(), ndparams={}, diskparams={}, ), objects.NodeGroup(name="restricted", uuid="d2a40a74-18e7-11e0-9143-001d0904baeb", alloc_policy=constants.ALLOC_POLICY_LAST_RESORT, ipolicy=objects.MakeEmptyIPolicy(), ndparams={}, diskparams=self.custom_diskparams, ), ] self.cluster = objects.Cluster(cluster_name="testcluster", hvparams=constants.HVC_DEFAULTS, beparams={ constants.PP_DEFAULT: constants.BEC_DEFAULTS, }, nicparams={ constants.PP_DEFAULT: constants.NICC_DEFAULTS, }, ndparams=constants.NDC_DEFAULTS, ipolicy=constants.IPOLICY_DEFAULTS, diskparams=constants.DISK_DT_DEFAULTS, ) def _Create(self, selected): return query.Query(query.GROUP_FIELDS, selected) def testSimple(self): q = self._Create(["name", "uuid", "alloc_policy"]) gqd = query.GroupQueryData(self.cluster, self.groups, None, None, False) self.assertEqual(q.RequestedData(), set([query.GQ_CONFIG])) self.assertEqual(q.Query(gqd), [[(constants.RS_NORMAL, "default"), (constants.RS_NORMAL, "c0e89160-18e7-11e0-a46e-001d0904baeb"), (constants.RS_NORMAL, constants.ALLOC_POLICY_PREFERRED) ], [(constants.RS_NORMAL, "restricted"), (constants.RS_NORMAL, "d2a40a74-18e7-11e0-9143-001d0904baeb"), (constants.RS_NORMAL, constants.ALLOC_POLICY_LAST_RESORT) ], ]) def testNodes(self): groups_to_nodes = { "c0e89160-18e7-11e0-a46e-001d0904baeb": ["node1", "node2"], "d2a40a74-18e7-11e0-9143-001d0904baeb": ["node1", "node10", "node9"], } q = self._Create(["name", "node_cnt", "node_list"]) gqd = query.GroupQueryData(self.cluster, self.groups, groups_to_nodes, None, False) self.assertEqual(q.RequestedData(), set([query.GQ_CONFIG, query.GQ_NODE])) self.assertEqual(q.Query(gqd), [[(constants.RS_NORMAL, "default"), (constants.RS_NORMAL, 2), (constants.RS_NORMAL, ["node1", "node2"]), ], [(constants.RS_NORMAL, "restricted"), (constants.RS_NORMAL, 3), (constants.RS_NORMAL, ["node1", "node9", "node10"]), ], ]) def testInstances(self): groups_to_instances = { "c0e89160-18e7-11e0-a46e-001d0904baeb": ["inst1", "inst2"], "d2a40a74-18e7-11e0-9143-001d0904baeb": ["inst1", "inst10", "inst9"], } q = self._Create(["pinst_cnt", "pinst_list"]) gqd = query.GroupQueryData(self.cluster, self.groups, None, groups_to_instances, False) self.assertEqual(q.RequestedData(), set([query.GQ_INST])) self.assertEqual(q.Query(gqd), [[(constants.RS_NORMAL, 2), (constants.RS_NORMAL, ["inst1", "inst2"]), ], [(constants.RS_NORMAL, 3), (constants.RS_NORMAL, ["inst1", "inst9", "inst10"]), ], ]) def testDiskparams(self): q = self._Create(["name", "uuid", "diskparams", "custom_diskparams"]) gqd = query.GroupQueryData(self.cluster, self.groups, None, None, True) self.assertEqual(q.RequestedData(), set([query.GQ_CONFIG, query.GQ_DISKPARAMS])) self.assertEqual(q.Query(gqd), [[(constants.RS_NORMAL, "default"), (constants.RS_NORMAL, "c0e89160-18e7-11e0-a46e-001d0904baeb"), (constants.RS_NORMAL, constants.DISK_DT_DEFAULTS), (constants.RS_NORMAL, {}), ], [(constants.RS_NORMAL, "restricted"), (constants.RS_NORMAL, "d2a40a74-18e7-11e0-9143-001d0904baeb"), (constants.RS_NORMAL, objects.FillDiskParams(constants.DISK_DT_DEFAULTS, self.custom_diskparams)), (constants.RS_NORMAL, self.custom_diskparams), ], ]) class TestOsQuery(unittest.TestCase): def _Create(self, selected): return query.Query(query.OS_FIELDS, selected) def test(self): variants = ["v00", "plain", "v3", "var0", "v33", "v20"] api_versions = [10, 0, 15, 5] parameters = ["zpar3", "apar9"] assert variants != sorted(variants) and variants != utils.NiceSort(variants) assert (api_versions != sorted(api_versions) and api_versions != utils.NiceSort(variants)) assert (parameters != sorted(parameters) and parameters != utils.NiceSort(parameters)) data = [ query.OsInfo(name="debian", valid=False, hidden=False, blacklisted=False, variants=set(), api_versions=set(), parameters=set(), node_status={ "some": "status", }), query.OsInfo(name="dos", valid=True, hidden=False, blacklisted=True, variants=set(variants), api_versions=set(api_versions), parameters=set(parameters), node_status={ "some": "other", "status": None, }), ] q = self._Create(["name", "valid", "hidden", "blacklisted", "variants", "api_versions", "parameters", "node_status"]) self.assertEqual(q.RequestedData(), set([])) self.assertEqual(q.Query(data), [[(constants.RS_NORMAL, "debian"), (constants.RS_NORMAL, False), (constants.RS_NORMAL, False), (constants.RS_NORMAL, False), (constants.RS_NORMAL, []), (constants.RS_NORMAL, []), (constants.RS_NORMAL, []), (constants.RS_NORMAL, {"some": "status"})], [(constants.RS_NORMAL, "dos"), (constants.RS_NORMAL, True), (constants.RS_NORMAL, False), (constants.RS_NORMAL, True), (constants.RS_NORMAL, ["plain", "v00", "v3", "v20", "v33", "var0"]), (constants.RS_NORMAL, [0, 5, 10, 15]), (constants.RS_NORMAL, ["apar9", "zpar3"]), (constants.RS_NORMAL, { "some": "other", "status": None, }) ]]) class TestQueryFields(unittest.TestCase): def testAllFields(self): for fielddefs in query.ALL_FIELD_LISTS: result = query.QueryFields(fielddefs, None) self.assert_(isinstance(result, dict)) response = objects.QueryFieldsResponse.FromDict(result) self.assertEqual([(fdef.name, fdef.title) for fdef in response.fields], [(fdef2.name, fdef2.title) for (fdef2, _, _, _) in utils.NiceSort(fielddefs.values(), key=lambda x: x[0].name)]) def testSomeFields(self): rnd = random.Random(5357) for _ in range(10): for fielddefs in query.ALL_FIELD_LISTS: if len(fielddefs) > 20: sample_size = rnd.randint(5, 20) else: sample_size = rnd.randint(1, max(1, len(fielddefs) - 1)) fields = [fdef for (fdef, _, _, _) in rnd.sample(fielddefs.values(), sample_size)] result = query.QueryFields(fielddefs, [fdef.name for fdef in fields]) self.assert_(isinstance(result, dict)) response = objects.QueryFieldsResponse.FromDict(result) self.assertEqual([(fdef.name, fdef.title) for fdef in response.fields], [(fdef2.name, fdef2.title) for fdef2 in fields]) class TestQueryFilter(unittest.TestCase): def testRequestedNames(self): for (what, fielddefs) in query.ALL_FIELDS.items(): if what == constants.QR_JOB: namefield = "id" nameval = 123 namevalempty = 0 genval = lambda i: i * 10 randvals = [17361, 22015, 13193, 15215] else: nameval = "abc" namevalempty = "" genval = lambda i: "x%s" % i randvals = ["x17361", "x22015", "x13193", "x15215"] if what == constants.QR_EXPORT: namefield = "export" else: namefield = "name" assert namefield in fielddefs reqnames = [genval(i) for i in range(4)] innerfilter = [["=", namefield, v] for v in reqnames] # No name field q = query.Query(fielddefs, [namefield], qfilter=["=", namefield, nameval], namefield=None) self.assertEqual(q.RequestedNames(), None) # No filter q = query.Query(fielddefs, [namefield], qfilter=None, namefield=namefield) self.assertEqual(q.RequestedNames(), None) # Check empty query q = query.Query(fielddefs, [namefield], qfilter=["|"], namefield=namefield) self.assertEqual(q.RequestedNames(), None) # Check order q = query.Query(fielddefs, [namefield], qfilter=["|"] + innerfilter, namefield=namefield) self.assertEqual(q.RequestedNames(), reqnames) # Check reverse order q = query.Query(fielddefs, [namefield], qfilter=["|"] + list(reversed(innerfilter)), namefield=namefield) self.assertEqual(q.RequestedNames(), list(reversed(reqnames))) # Duplicates q = query.Query(fielddefs, [namefield], qfilter=["|"] + innerfilter + list(reversed(innerfilter)), namefield=namefield) self.assertEqual(q.RequestedNames(), reqnames) # Unknown name field self.assertRaises(AssertionError, query.Query, fielddefs, [namefield], namefield="_unknown_field_") # Filter with AND q = query.Query(fielddefs, [namefield], qfilter=["|", ["=", namefield, nameval], ["&", ["=", namefield, namevalempty]]], namefield=namefield) self.assertTrue(q.RequestedNames() is None) # Filter with NOT q = query.Query(fielddefs, [namefield], qfilter=["|", ["=", namefield, nameval], ["!", ["=", namefield, namevalempty]]], namefield=namefield) self.assertTrue(q.RequestedNames() is None) # Filter with only OR (names must be in correct order) q = query.Query(fielddefs, [namefield], qfilter=["|", ["=", namefield, randvals[0]], ["|", ["=", namefield, randvals[1]]], ["|", ["|", ["=", namefield, randvals[2]]]], ["=", namefield, randvals[3]]], namefield=namefield) self.assertEqual(q.RequestedNames(), randvals) @staticmethod def _GenNestedFilter(namefield, op, depth, nameval): nested = ["=", namefield, nameval] for i in range(depth): nested = [op, nested] return nested def testCompileFilter(self): levels_max = query._FilterCompilerHelper._LEVELS_MAX for (what, fielddefs) in query.ALL_FIELDS.items(): if what == constants.QR_JOB: namefield = "id" nameval = 123 elif what == constants.QR_EXPORT: namefield = "export" nameval = "value" else: namefield = "name" nameval = "value" checks = [ [], ["="], ["=", "foo"], ["unknownop"], ["!"], ["=", "_unknown_field", "value"], self._GenNestedFilter(namefield, "|", levels_max, nameval), self._GenNestedFilter(namefield, "|", levels_max * 3, nameval), self._GenNestedFilter(namefield, "!", levels_max, nameval), ] for qfilter in checks: self.assertRaises(errors.ParameterError, query._CompileFilter, fielddefs, None, qfilter) for op in ["|", "!"]: qfilter = self._GenNestedFilter(namefield, op, levels_max - 1, nameval) self.assertTrue(callable(query._CompileFilter(fielddefs, None, qfilter))) def testQueryInputOrder(self): fielddefs = query._PrepareFieldList([ (query._MakeField("pnode", "PNode", constants.QFT_TEXT, "Primary"), None, 0, lambda ctx, item: item["pnode"]), (query._MakeField("snode", "SNode", constants.QFT_TEXT, "Secondary"), None, 0, lambda ctx, item: item["snode"]), ], []) data = [ { "pnode": "node1", "snode": "node44", }, { "pnode": "node30", "snode": "node90", }, { "pnode": "node25", "snode": "node1", }, { "pnode": "node20", "snode": "node1", }, ] qfilter = ["|", ["=", "pnode", "node1"], ["=", "snode", "node1"]] q = query.Query(fielddefs, ["pnode", "snode"], namefield="pnode", qfilter=qfilter) self.assertTrue(q.RequestedNames() is None) self.assertFalse(q.RequestedData()) self.assertEqual(q.Query(data), [[(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, "node44")], [(constants.RS_NORMAL, "node20"), (constants.RS_NORMAL, "node1")], [(constants.RS_NORMAL, "node25"), (constants.RS_NORMAL, "node1")]]) # Try again with reversed input data self.assertEqual(q.Query(reversed(data)), [[(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, "node44")], [(constants.RS_NORMAL, "node20"), (constants.RS_NORMAL, "node1")], [(constants.RS_NORMAL, "node25"), (constants.RS_NORMAL, "node1")]]) # No name field, result must be in incoming order q = query.Query(fielddefs, ["pnode", "snode"], namefield=None, qfilter=qfilter) self.assertFalse(q.RequestedData()) self.assertEqual(q.Query(data), [[(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, "node44")], [(constants.RS_NORMAL, "node25"), (constants.RS_NORMAL, "node1")], [(constants.RS_NORMAL, "node20"), (constants.RS_NORMAL, "node1")]]) self.assertEqual(q.OldStyleQuery(data), [ ["node1", "node44"], ["node25", "node1"], ["node20", "node1"], ]) self.assertEqual(q.Query(reversed(data)), [[(constants.RS_NORMAL, "node20"), (constants.RS_NORMAL, "node1")], [(constants.RS_NORMAL, "node25"), (constants.RS_NORMAL, "node1")], [(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, "node44")]]) self.assertEqual(q.OldStyleQuery(reversed(data)), [ ["node20", "node1"], ["node25", "node1"], ["node1", "node44"], ]) # Name field, but no sorting, result must be in incoming order q = query.Query(fielddefs, ["pnode", "snode"], namefield="pnode") self.assertFalse(q.RequestedData()) self.assertEqual(q.Query(data, sort_by_name=False), [[(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, "node44")], [(constants.RS_NORMAL, "node30"), (constants.RS_NORMAL, "node90")], [(constants.RS_NORMAL, "node25"), (constants.RS_NORMAL, "node1")], [(constants.RS_NORMAL, "node20"), (constants.RS_NORMAL, "node1")]]) self.assertEqual(q.OldStyleQuery(data, sort_by_name=False), [ ["node1", "node44"], ["node30", "node90"], ["node25", "node1"], ["node20", "node1"], ]) self.assertEqual(q.Query(reversed(data), sort_by_name=False), [[(constants.RS_NORMAL, "node20"), (constants.RS_NORMAL, "node1")], [(constants.RS_NORMAL, "node25"), (constants.RS_NORMAL, "node1")], [(constants.RS_NORMAL, "node30"), (constants.RS_NORMAL, "node90")], [(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, "node44")]]) self.assertEqual(q.OldStyleQuery(reversed(data), sort_by_name=False), [ ["node20", "node1"], ["node25", "node1"], ["node30", "node90"], ["node1", "node44"], ]) def testEqualNamesOrder(self): fielddefs = query._PrepareFieldList([ (query._MakeField("pnode", "PNode", constants.QFT_TEXT, "Primary"), None, 0, lambda ctx, item: item["pnode"]), (query._MakeField("num", "Num", constants.QFT_NUMBER, "Num"), None, 0, lambda ctx, item: item["num"]), ], []) data = [ { "pnode": "node1", "num": 100, }, { "pnode": "node1", "num": 25, }, { "pnode": "node2", "num": 90, }, { "pnode": "node2", "num": 30, }, ] q = query.Query(fielddefs, ["pnode", "num"], namefield="pnode", qfilter=["|", ["=", "pnode", "node1"], ["=", "pnode", "node2"], ["=", "pnode", "node1"]]) self.assertEqual(q.RequestedNames(), ["node1", "node2"], msg="Did not return unique names") self.assertFalse(q.RequestedData()) self.assertEqual(q.Query(data), [[(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, 100)], [(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, 25)], [(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, 90)], [(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, 30)]]) self.assertEqual(q.Query(data, sort_by_name=False), [[(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, 100)], [(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, 25)], [(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, 90)], [(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, 30)]]) data = [ { "pnode": "nodeX", "num": 50, }, { "pnode": "nodeY", "num": 40, }, { "pnode": "nodeX", "num": 30, }, { "pnode": "nodeX", "num": 20, }, { "pnode": "nodeM", "num": 10, }, ] q = query.Query(fielddefs, ["pnode", "num"], namefield="pnode", qfilter=["|", ["=", "pnode", "nodeX"], ["=", "pnode", "nodeY"], ["=", "pnode", "nodeY"], ["=", "pnode", "nodeY"], ["=", "pnode", "nodeM"]]) self.assertEqual(q.RequestedNames(), ["nodeX", "nodeY", "nodeM"], msg="Did not return unique names") self.assertFalse(q.RequestedData()) # First sorted by name, then input order self.assertEqual(q.Query(data, sort_by_name=True), [[(constants.RS_NORMAL, "nodeM"), (constants.RS_NORMAL, 10)], [(constants.RS_NORMAL, "nodeX"), (constants.RS_NORMAL, 50)], [(constants.RS_NORMAL, "nodeX"), (constants.RS_NORMAL, 30)], [(constants.RS_NORMAL, "nodeX"), (constants.RS_NORMAL, 20)], [(constants.RS_NORMAL, "nodeY"), (constants.RS_NORMAL, 40)]]) # Input order self.assertEqual(q.Query(data, sort_by_name=False), [[(constants.RS_NORMAL, "nodeX"), (constants.RS_NORMAL, 50)], [(constants.RS_NORMAL, "nodeY"), (constants.RS_NORMAL, 40)], [(constants.RS_NORMAL, "nodeX"), (constants.RS_NORMAL, 30)], [(constants.RS_NORMAL, "nodeX"), (constants.RS_NORMAL, 20)], [(constants.RS_NORMAL, "nodeM"), (constants.RS_NORMAL, 10)]]) def testFilter(self): (DK_A, DK_B) = range(1000, 1002) fielddefs = query._PrepareFieldList([ (query._MakeField("name", "Name", constants.QFT_TEXT, "Name"), DK_A, 0, lambda ctx, item: item["name"]), (query._MakeField("other", "Other", constants.QFT_TEXT, "Other"), DK_B, 0, lambda ctx, item: item["other"]), ], []) data = [ { "name": "node1", "other": "foo", }, { "name": "node2", "other": "bar", }, { "name": "node3", "other": "Hello", }, ] # Empty filter q = query.Query(fielddefs, ["name", "other"], namefield="name", qfilter=["|"]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.RequestedData(), set([DK_A, DK_B])) self.assertEqual(q.Query(data), []) # Normal filter q = query.Query(fielddefs, ["name", "other"], namefield="name", qfilter=["=", "name", "node1"]) self.assertEqual(q.RequestedNames(), ["node1"]) self.assertEqual(q.Query(data), [[(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, "foo")]]) q = query.Query(fielddefs, ["name", "other"], namefield="name", qfilter=(["|", ["=", "name", "node1"], ["=", "name", "node3"]])) self.assertEqual(q.RequestedNames(), ["node1", "node3"]) self.assertEqual(q.Query(data), [[(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, "foo")], [(constants.RS_NORMAL, "node3"), (constants.RS_NORMAL, "Hello")]]) # Complex filter q = query.Query(fielddefs, ["name", "other"], namefield="name", qfilter=(["|", ["=", "name", "node1"], ["|", ["=", "name", "node3"], ["=", "name", "node2"]], ["=", "name", "node3"]])) self.assertEqual(q.RequestedNames(), ["node1", "node3", "node2"]) self.assertEqual(q.RequestedData(), set([DK_A, DK_B])) self.assertEqual(q.Query(data), [[(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, "foo")], [(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, "bar")], [(constants.RS_NORMAL, "node3"), (constants.RS_NORMAL, "Hello")]]) # Filter data type mismatch for i in [-1, 0, 1, 123, [], None, True, False]: self.assertRaises(errors.ParameterError, query.Query, fielddefs, ["name", "other"], namefield="name", qfilter=["=", "name", i]) # Negative filter q = query.Query(fielddefs, ["name", "other"], namefield="name", qfilter=["!", ["|", ["=", "name", "node1"], ["=", "name", "node3"]]]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [[(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, "bar")]]) # Not equal q = query.Query(fielddefs, ["name", "other"], namefield="name", qfilter=["!=", "name", "node3"]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [[(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, "foo")], [(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, "bar")]]) # Data type q = query.Query(fielddefs, [], namefield="name", qfilter=["|", ["=", "other", "bar"], ["=", "name", "foo"]]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.RequestedData(), set([DK_A, DK_B])) self.assertEqual(q.Query(data), [[]]) # Only one data type q = query.Query(fielddefs, ["other"], namefield="name", qfilter=["=", "other", "bar"]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.RequestedData(), set([DK_B])) self.assertEqual(q.Query(data), [[(constants.RS_NORMAL, "bar")]]) q = query.Query(fielddefs, [], namefield="name", qfilter=["=", "other", "bar"]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.RequestedData(), set([DK_B])) self.assertEqual(q.Query(data), [[]]) # Data type in boolean operator q = query.Query(fielddefs, [], namefield="name", qfilter=["?", "name"]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.RequestedData(), set([DK_A])) self.assertEqual(q.Query(data), [[], [], []]) q = query.Query(fielddefs, [], namefield="name", qfilter=["!", ["?", "name"]]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.RequestedData(), set([DK_A])) self.assertEqual(q.Query(data), []) def testFilterContains(self): fielddefs = query._PrepareFieldList([ (query._MakeField("name", "Name", constants.QFT_TEXT, "Name"), None, 0, lambda ctx, item: item["name"]), (query._MakeField("other", "Other", constants.QFT_OTHER, "Other"), None, 0, lambda ctx, item: item["other"]), ], []) data = [ { "name": "node2", "other": ["x", "y", "bar"], }, { "name": "node3", "other": "Hello", }, { "name": "node1", "other": ["a", "b", "foo"], }, { "name": "empty", "other": []}, ] q = query.Query(fielddefs, ["name", "other"], namefield="name", qfilter=["=[]", "other", "bar"]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, ["x", "y", "bar"])], ]) q = query.Query(fielddefs, ["name", "other"], namefield="name", qfilter=["|", ["=[]", "other", "bar"], ["=[]", "other", "a"], ["=[]", "other", "b"]]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, ["a", "b", "foo"])], [(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, ["x", "y", "bar"])], ]) self.assertEqual(q.OldStyleQuery(data), [ ["node1", ["a", "b", "foo"]], ["node2", ["x", "y", "bar"]], ]) # Boolean test q = query.Query(fielddefs, ["name", "other"], namefield="name", qfilter=["?", "other"]) self.assertEqual(q.OldStyleQuery(data), [ ["node1", ["a", "b", "foo"]], ["node2", ["x", "y", "bar"]], ["node3", "Hello"], ]) q = query.Query(fielddefs, ["name", "other"], namefield="name", qfilter=["!", ["?", "other"]]) self.assertEqual(q.OldStyleQuery(data), [ ["empty", []], ]) def testFilterHostname(self): fielddefs = query._PrepareFieldList([ (query._MakeField("name", "Name", constants.QFT_TEXT, "Name"), None, query.QFF_HOSTNAME, lambda ctx, item: item["name"]), ], []) data = [ { "name": "node1.example.com", }, { "name": "node2.example.com", }, { "name": "node2.example.net", }, ] q = query.Query(fielddefs, ["name"], namefield="name", qfilter=["=", "name", "node2"]) self.assertEqual(q.RequestedNames(), ["node2"]) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "node2.example.com")], [(constants.RS_NORMAL, "node2.example.net")], ]) q = query.Query(fielddefs, ["name"], namefield="name", qfilter=["=", "name", "node1"]) self.assertEqual(q.RequestedNames(), ["node1"]) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "node1.example.com")], ]) q = query.Query(fielddefs, ["name"], namefield="name", qfilter=["=", "name", "othername"]) self.assertEqual(q.RequestedNames(), ["othername"]) self.assertEqual(q.Query(data), []) q = query.Query(fielddefs, ["name"], namefield="name", qfilter=["|", ["=", "name", "node1.example.com"], ["=", "name", "node2"]]) self.assertEqual(q.RequestedNames(), ["node1.example.com", "node2"]) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "node1.example.com")], [(constants.RS_NORMAL, "node2.example.com")], [(constants.RS_NORMAL, "node2.example.net")], ]) self.assertEqual(q.OldStyleQuery(data), [ ["node1.example.com"], ["node2.example.com"], ["node2.example.net"], ]) q = query.Query(fielddefs, ["name"], namefield="name", qfilter=["!=", "name", "node1"]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "node2.example.com")], [(constants.RS_NORMAL, "node2.example.net")], ]) self.assertEqual(q.OldStyleQuery(data), [ ["node2.example.com"], ["node2.example.net"], ]) def testFilterBoolean(self): fielddefs = query._PrepareFieldList([ (query._MakeField("name", "Name", constants.QFT_TEXT, "Name"), None, query.QFF_HOSTNAME, lambda ctx, item: item["name"]), (query._MakeField("value", "Value", constants.QFT_BOOL, "Value"), None, 0, lambda ctx, item: item["value"]), ], []) data = [ { "name": "node1", "value": False, }, { "name": "node2", "value": True, }, { "name": "node3", "value": True, }, ] q = query.Query(fielddefs, ["name", "value"], qfilter=["|", ["=", "value", False], ["=", "value", True]]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, False)], [(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, True)], [(constants.RS_NORMAL, "node3"), (constants.RS_NORMAL, True)], ]) q = query.Query(fielddefs, ["name", "value"], qfilter=["|", ["=", "value", False], ["!", ["=", "value", False]]]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, False)], [(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, True)], [(constants.RS_NORMAL, "node3"), (constants.RS_NORMAL, True)], ]) # Comparing bool with string for i in ["False", "True", "0", "1", "no", "yes", "N", "Y"]: self.assertRaises(errors.ParameterError, query.Query, fielddefs, ["name", "value"], qfilter=["=", "value", i]) # Truth filter q = query.Query(fielddefs, ["name", "value"], qfilter=["?", "value"]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, True)], [(constants.RS_NORMAL, "node3"), (constants.RS_NORMAL, True)], ]) # Negative bool filter q = query.Query(fielddefs, ["name", "value"], qfilter=["!", ["?", "value"]]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, False)], ]) # Complex truth filter q = query.Query(fielddefs, ["name", "value"], qfilter=["|", ["&", ["=", "name", "node1"], ["!", ["?", "value"]]], ["?", "value"]]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "node1"), (constants.RS_NORMAL, False)], [(constants.RS_NORMAL, "node2"), (constants.RS_NORMAL, True)], [(constants.RS_NORMAL, "node3"), (constants.RS_NORMAL, True)], ]) def testFilterRegex(self): fielddefs = query._PrepareFieldList([ (query._MakeField("name", "Name", constants.QFT_TEXT, "Name"), None, 0, lambda ctx, item: item["name"]), ], []) data = [ { "name": "node1.example.com", }, { "name": "node2.site.example.com", }, { "name": "node2.example.net", }, # Empty name { "name": "", }, ] q = query.Query(fielddefs, ["name"], namefield="name", qfilter=["=~", "name", "site"]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "node2.site.example.com")], ]) q = query.Query(fielddefs, ["name"], namefield="name", qfilter=["=~", "name", "^node2"]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "node2.example.net")], [(constants.RS_NORMAL, "node2.site.example.com")], ]) q = query.Query(fielddefs, ["name"], namefield="name", qfilter=["=~", "name", r"(?i)\.COM$"]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "node1.example.com")], [(constants.RS_NORMAL, "node2.site.example.com")], ]) q = query.Query(fielddefs, ["name"], namefield="name", qfilter=["=~", "name", r"."]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "node1.example.com")], [(constants.RS_NORMAL, "node2.example.net")], [(constants.RS_NORMAL, "node2.site.example.com")], ]) q = query.Query(fielddefs, ["name"], namefield="name", qfilter=["=~", "name", r"^$"]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "")], ]) # Invalid regular expression self.assertRaises(errors.ParameterError, query.Query, fielddefs, ["name"], qfilter=["=~", "name", r"["]) def testFilterLessGreater(self): fielddefs = query._PrepareFieldList([ (query._MakeField("value", "Value", constants.QFT_NUMBER, "Value"), None, 0, lambda ctx, item: item), ], []) data = range(100) q = query.Query(fielddefs, ["value"], qfilter=["<", "value", 20]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [[(constants.RS_NORMAL, i)] for i in range(20)]) q = query.Query(fielddefs, ["value"], qfilter=["<=", "value", 30]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [[(constants.RS_NORMAL, i)] for i in range(31)]) q = query.Query(fielddefs, ["value"], qfilter=[">", "value", 40]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [[(constants.RS_NORMAL, i)] for i in range(41, 100)]) q = query.Query(fielddefs, ["value"], qfilter=[">=", "value", 50]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [[(constants.RS_NORMAL, i)] for i in range(50, 100)]) def testFilterLessGreaterJobId(self): fielddefs = query._PrepareFieldList([ (query._MakeField("id", "ID", constants.QFT_TEXT, "Job ID"), None, query.QFF_JOB_ID, lambda ctx, item: item), ], []) data = ["1", "2", "3", "10", "102", "120", "125", "15", "100", "7"] assert data != utils.NiceSort(data), "Test data should not be sorted" q = query.Query(fielddefs, ["id"], qfilter=["<", "id", "20"]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "1")], [(constants.RS_NORMAL, "2")], [(constants.RS_NORMAL, "3")], [(constants.RS_NORMAL, "10")], [(constants.RS_NORMAL, "15")], [(constants.RS_NORMAL, "7")], ]) q = query.Query(fielddefs, ["id"], qfilter=[">=", "id", "100"]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, "102")], [(constants.RS_NORMAL, "120")], [(constants.RS_NORMAL, "125")], [(constants.RS_NORMAL, "100")], ]) # Integers are no valid job IDs self.assertRaises(errors.ParameterError, query.Query, fielddefs, ["id"], qfilter=[">=", "id", 10]) def testFilterLessGreaterSplitTimestamp(self): fielddefs = query._PrepareFieldList([ (query._MakeField("ts", "Timestamp", constants.QFT_OTHER, "Timestamp"), None, query.QFF_SPLIT_TIMESTAMP, lambda ctx, item: item), ], []) data = [ utils.SplitTime(0), utils.SplitTime(0.1), utils.SplitTime(18224.7872), utils.SplitTime(919896.12623), utils.SplitTime(999), utils.SplitTime(989.9999), ] for i in [0, [0, 0]]: q = query.Query(fielddefs, ["ts"], qfilter=["<", "ts", i]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), []) q = query.Query(fielddefs, ["ts"], qfilter=["<", "ts", 1000]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, (0, 0))], [(constants.RS_NORMAL, (0, 100000))], [(constants.RS_NORMAL, (999, 0))], [(constants.RS_NORMAL, (989, 999900))], ]) q = query.Query(fielddefs, ["ts"], qfilter=[">=", "ts", 5000.3]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, (18224, 787200))], [(constants.RS_NORMAL, (919896, 126230))], ]) for i in [18224.7772, utils.SplitTime(18224.7772)]: q = query.Query(fielddefs, ["ts"], qfilter=[">=", "ts", i]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, (18224, 787200))], [(constants.RS_NORMAL, (919896, 126230))], ]) q = query.Query(fielddefs, ["ts"], qfilter=[">", "ts", 18224.7880]) self.assertTrue(q.RequestedNames() is None) self.assertEqual(q.Query(data), [ [(constants.RS_NORMAL, (919896, 126230))], ]) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.ssh_unittest.py0000744000000000000000000001207212244641676020774 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2008 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the ssh module""" import os import tempfile import unittest import shutil import testutils import mocks from ganeti import constants from ganeti import utils from ganeti import ssh from ganeti import errors class TestKnownHosts(testutils.GanetiTestCase): """Test case for function writing the known_hosts file""" def setUp(self): testutils.GanetiTestCase.setUp(self) self.tmpfile = self._CreateTempFile() def test(self): cfg = mocks.FakeConfig() ssh.WriteKnownHostsFile(cfg, self.tmpfile) self.assertFileContent(self.tmpfile, "%s ssh-rsa %s\n%s ssh-dss %s\n" % (cfg.GetClusterName(), mocks.FAKE_CLUSTER_KEY, cfg.GetClusterName(), mocks.FAKE_CLUSTER_KEY)) class TestGetUserFiles(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) @staticmethod def _GetNoHomedir(_): return None def _GetTempHomedir(self, _): return self.tmpdir def testNonExistantUser(self): for kind in constants.SSHK_ALL: self.assertRaises(errors.OpExecError, ssh.GetUserFiles, "example", kind=kind, _homedir_fn=self._GetNoHomedir) def testUnknownKind(self): kind = "something-else" assert kind not in constants.SSHK_ALL self.assertRaises(errors.ProgrammerError, ssh.GetUserFiles, "example4645", kind=kind, _homedir_fn=self._GetTempHomedir) self.assertEqual(os.listdir(self.tmpdir), []) def testNoSshDirectory(self): for kind in constants.SSHK_ALL: self.assertRaises(errors.OpExecError, ssh.GetUserFiles, "example29694", kind=kind, _homedir_fn=self._GetTempHomedir) self.assertEqual(os.listdir(self.tmpdir), []) def testSshIsFile(self): utils.WriteFile(os.path.join(self.tmpdir, ".ssh"), data="") for kind in constants.SSHK_ALL: self.assertRaises(errors.OpExecError, ssh.GetUserFiles, "example26237", kind=kind, _homedir_fn=self._GetTempHomedir) self.assertEqual(os.listdir(self.tmpdir), [".ssh"]) def testMakeSshDirectory(self): sshdir = os.path.join(self.tmpdir, ".ssh") self.assertEqual(os.listdir(self.tmpdir), []) for kind in constants.SSHK_ALL: ssh.GetUserFiles("example20745", mkdir=True, kind=kind, _homedir_fn=self._GetTempHomedir) self.assertEqual(os.listdir(self.tmpdir), [".ssh"]) self.assertEqual(os.stat(sshdir).st_mode & 0777, 0700) def testFilenames(self): sshdir = os.path.join(self.tmpdir, ".ssh") os.mkdir(sshdir) for kind in constants.SSHK_ALL: result = ssh.GetUserFiles("example15103", mkdir=False, kind=kind, _homedir_fn=self._GetTempHomedir) self.assertEqual(result, [ os.path.join(self.tmpdir, ".ssh", "id_%s" % kind), os.path.join(self.tmpdir, ".ssh", "id_%s.pub" % kind), os.path.join(self.tmpdir, ".ssh", "authorized_keys"), ]) self.assertEqual(os.listdir(self.tmpdir), [".ssh"]) self.assertEqual(os.listdir(sshdir), []) def testNoDirCheck(self): self.assertEqual(os.listdir(self.tmpdir), []) for kind in constants.SSHK_ALL: ssh.GetUserFiles("example14528", mkdir=False, dircheck=False, kind=kind, _homedir_fn=self._GetTempHomedir) self.assertEqual(os.listdir(self.tmpdir), []) def testGetAllUserFiles(self): result = ssh.GetAllUserFiles("example7475", mkdir=False, dircheck=False, _homedir_fn=self._GetTempHomedir) self.assertEqual(result, (os.path.join(self.tmpdir, ".ssh", "authorized_keys"), { constants.SSHK_RSA: (os.path.join(self.tmpdir, ".ssh", "id_rsa"), os.path.join(self.tmpdir, ".ssh", "id_rsa.pub")), constants.SSHK_DSA: (os.path.join(self.tmpdir, ".ssh", "id_dsa"), os.path.join(self.tmpdir, ".ssh", "id_dsa.pub")), })) self.assertEqual(os.listdir(self.tmpdir), []) def testGetAllUserFilesNoDirectoryNoMkdir(self): self.assertRaises(errors.OpExecError, ssh.GetAllUserFiles, "example17270", mkdir=False, dircheck=True, _homedir_fn=self._GetTempHomedir) self.assertEqual(os.listdir(self.tmpdir), []) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.backend_unittest-runasroot.py0000744000000000000000000000377512244641676023632 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.backend (tests requiring root access)""" import os import tempfile import shutil import errno from ganeti import constants from ganeti import utils from ganeti import compat from ganeti import backend import testutils class TestCommonRestrictedCmdCheck(testutils.GanetiTestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def _PrepareTest(self): tmpname = utils.PathJoin(self.tmpdir, "foobar") os.mkdir(tmpname) os.chmod(tmpname, 0700) return tmpname def testCorrectOwner(self): tmpname = self._PrepareTest() os.chown(tmpname, 0, 0) (status, value) = backend._CommonRestrictedCmdCheck(tmpname, None) self.assertTrue(status) self.assertTrue(value) def testWrongOwner(self): tmpname = self._PrepareTest() tests = [ (1, 0), (0, 1), (100, 50), ] for (uid, gid) in tests: self.assertFalse(uid == os.getuid() and gid == os.getgid()) os.chown(tmpname, uid, gid) (status, errmsg) = backend._CommonRestrictedCmdCheck(tmpname, None) self.assertFalse(status) self.assertTrue("foobar' is not owned by " in errmsg) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.opcodes_unittest.py0000744000000000000000000003724012271422343021623 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.backend""" import os import sys import unittest from ganeti import utils from ganeti import opcodes from ganeti import ht from ganeti import constants from ganeti import errors from ganeti import compat import testutils #: Unless an opcode is included in the following list it must have a result #: check of some sort MISSING_RESULT_CHECK = compat.UniqueFrozenset([ opcodes.OpTestAllocator, opcodes.OpTestDelay, opcodes.OpTestDummy, opcodes.OpTestJqueue, ]) class TestOpcodes(unittest.TestCase): def test(self): self.assertRaises(ValueError, opcodes.OpCode.LoadOpCode, None) self.assertRaises(ValueError, opcodes.OpCode.LoadOpCode, "") self.assertRaises(ValueError, opcodes.OpCode.LoadOpCode, {}) self.assertRaises(ValueError, opcodes.OpCode.LoadOpCode, {"OP_ID": ""}) for cls in opcodes.OP_MAPPING.values(): self.assert_(cls.OP_ID.startswith("OP_")) self.assert_(len(cls.OP_ID) > 3) self.assertEqual(cls.OP_ID, cls.OP_ID.upper()) self.assertEqual(cls.OP_ID, opcodes._NameToId(cls.__name__)) self.assertFalse(compat.any(cls.OP_ID.startswith(prefix) for prefix in opcodes._SUMMARY_PREFIX.keys())) if cls in MISSING_RESULT_CHECK: self.assertTrue(cls.OP_RESULT is None, msg=("%s is listed to not have a result check" % cls.OP_ID)) else: self.assertTrue(callable(cls.OP_RESULT), msg=("%s should have a result check" % cls.OP_ID)) self.assertRaises(TypeError, cls, unsupported_parameter="some value") args = [ # No variables {}, # Variables supported by all opcodes {"dry_run": False, "debug_level": 0, }, # All variables dict([(name, []) for name in cls.GetAllSlots()]) ] for i in args: op = cls(**i) self.assertEqual(op.OP_ID, cls.OP_ID) self._checkSummary(op) # Try a restore state = op.__getstate__() self.assert_(isinstance(state, dict)) restored = opcodes.OpCode.LoadOpCode(state) self.assert_(isinstance(restored, cls)) self._checkSummary(restored) for name in ["x_y_z", "hello_world"]: assert name not in cls.GetAllSlots() for value in [None, True, False, [], "Hello World"]: self.assertRaises(AttributeError, setattr, op, name, value) def _checkSummary(self, op): summary = op.Summary() if hasattr(op, "OP_DSC_FIELD"): self.assert_(("OP_%s" % summary).startswith("%s(" % op.OP_ID)) self.assert_(summary.endswith(")")) else: self.assertEqual("OP_%s" % summary, op.OP_ID) def testSummary(self): class OpTest(opcodes.OpCode): OP_DSC_FIELD = "data" OP_PARAMS = [ ("data", ht.NoDefault, ht.TString, None), ] self.assertEqual(OpTest(data="").Summary(), "TEST()") self.assertEqual(OpTest(data="Hello World").Summary(), "TEST(Hello World)") self.assertEqual(OpTest(data="node1.example.com").Summary(), "TEST(node1.example.com)") def testSummaryFormatter(self): class OpTest(opcodes.OpCode): OP_DSC_FIELD = "data" OP_DSC_FORMATTER = lambda _, v: "a" OP_PARAMS = [ ("data", ht.NoDefault, ht.TString, None), ] self.assertEqual(OpTest(data="").Summary(), "TEST(a)") self.assertEqual(OpTest(data="b").Summary(), "TEST(a)") def testTinySummary(self): self.assertFalse(utils.FindDuplicates(opcodes._SUMMARY_PREFIX.values())) self.assertTrue(compat.all(prefix.endswith("_") and supplement.endswith("_") for (prefix, supplement) in opcodes._SUMMARY_PREFIX.items())) self.assertEqual(opcodes.OpClusterPostInit().TinySummary(), "C_POST_INIT") self.assertEqual(opcodes.OpNodeRemove().TinySummary(), "N_REMOVE") self.assertEqual(opcodes.OpInstanceMigrate().TinySummary(), "I_MIGRATE") self.assertEqual(opcodes.OpGroupQuery().TinySummary(), "G_QUERY") self.assertEqual(opcodes.OpTestJqueue().TinySummary(), "TEST_JQUEUE") def testListSummary(self): class OpTest(opcodes.OpCode): OP_DSC_FIELD = "data" OP_PARAMS = [ ("data", ht.NoDefault, ht.TList, None), ] self.assertEqual(OpTest(data=["a", "b", "c"]).Summary(), "TEST(a,b,c)") self.assertEqual(OpTest(data=["a", None, "c"]).Summary(), "TEST(a,None,c)") self.assertEqual(OpTest(data=[1, 2, 3, 4]).Summary(), "TEST(1,2,3,4)") def testOpId(self): self.assertFalse(utils.FindDuplicates(cls.OP_ID for cls in opcodes._GetOpList())) self.assertEqual(len(opcodes._GetOpList()), len(opcodes.OP_MAPPING)) def testParams(self): supported_by_all = set(["debug_level", "dry_run", "priority"]) self.assertTrue(opcodes.BaseOpCode not in opcodes.OP_MAPPING.values()) self.assertTrue(opcodes.OpCode not in opcodes.OP_MAPPING.values()) for cls in opcodes.OP_MAPPING.values() + [opcodes.OpCode]: all_slots = cls.GetAllSlots() self.assertEqual(len(set(all_slots) & supported_by_all), 3, msg=("Opcode %s doesn't support all base" " parameters (%r)" % (cls.OP_ID, supported_by_all))) # All opcodes must have OP_PARAMS self.assert_(hasattr(cls, "OP_PARAMS"), msg="%s doesn't have OP_PARAMS" % cls.OP_ID) param_names = [name for (name, _, _, _) in cls.GetAllParams()] self.assertEqual(all_slots, param_names) # Without inheritance self.assertEqual(cls.__slots__, [name for (name, _, _, _) in cls.OP_PARAMS]) # This won't work if parameters are converted to a dictionary duplicates = utils.FindDuplicates(param_names) self.assertFalse(duplicates, msg=("Found duplicate parameters %r in %s" % (duplicates, cls.OP_ID))) # Check parameter definitions for attr_name, aval, test, doc in cls.GetAllParams(): self.assert_(attr_name) self.assert_(test is None or test is ht.NoType or callable(test), msg=("Invalid type check for %s.%s" % (cls.OP_ID, attr_name))) self.assertTrue(doc is None or isinstance(doc, basestring)) if callable(aval): default_value = aval() self.assertFalse(callable(default_value), msg=("Default value of %s.%s returned by function" " is callable" % (cls.OP_ID, attr_name))) else: self.assertFalse(isinstance(aval, (list, dict, set)), msg=("Default value of %s.%s is mutable (%s)" % (cls.OP_ID, attr_name, repr(aval)))) default_value = aval if aval is not ht.NoDefault and test is not ht.NoType: self.assertTrue(test(default_value), msg=("Default value of %s.%s does not verify" % (cls.OP_ID, attr_name))) # If any parameter has documentation, all others need to have it as well has_doc = [doc is not None for (_, _, _, doc) in cls.OP_PARAMS] self.assertTrue(not compat.any(has_doc) or compat.all(has_doc), msg="%s does not document all parameters" % cls) def testValidateNoModification(self): class OpTest(opcodes.OpCode): OP_PARAMS = [ ("nodef", ht.NoDefault, ht.TMaybeString, None), ("wdef", "default", ht.TMaybeString, None), ("number", 0, ht.TInt, None), ("notype", None, ht.NoType, None), ] # Missing required parameter "nodef" op = OpTest() before = op.__getstate__() self.assertRaises(errors.OpPrereqError, op.Validate, False) self.assertFalse(hasattr(op, "nodef")) self.assertFalse(hasattr(op, "wdef")) self.assertFalse(hasattr(op, "number")) self.assertFalse(hasattr(op, "notype")) self.assertEqual(op.__getstate__(), before, msg="Opcode was modified") # Required parameter "nodef" is provided op = OpTest(nodef="foo") before = op.__getstate__() op.Validate(False) self.assertEqual(op.__getstate__(), before, msg="Opcode was modified") self.assertEqual(op.nodef, "foo") self.assertFalse(hasattr(op, "wdef")) self.assertFalse(hasattr(op, "number")) self.assertFalse(hasattr(op, "notype")) # Missing required parameter "nodef" op = OpTest(wdef="hello", number=999) before = op.__getstate__() self.assertRaises(errors.OpPrereqError, op.Validate, False) self.assertFalse(hasattr(op, "nodef")) self.assertFalse(hasattr(op, "notype")) self.assertEqual(op.__getstate__(), before, msg="Opcode was modified") # Wrong type for "nodef" op = OpTest(nodef=987) before = op.__getstate__() self.assertRaises(errors.OpPrereqError, op.Validate, False) self.assertEqual(op.nodef, 987) self.assertFalse(hasattr(op, "notype")) self.assertEqual(op.__getstate__(), before, msg="Opcode was modified") # Testing different types for "notype" op = OpTest(nodef="foo", notype=[1, 2, 3]) before = op.__getstate__() op.Validate(False) self.assertEqual(op.nodef, "foo") self.assertEqual(op.notype, [1, 2, 3]) self.assertEqual(op.__getstate__(), before, msg="Opcode was modified") op = OpTest(nodef="foo", notype="Hello World") before = op.__getstate__() op.Validate(False) self.assertEqual(op.nodef, "foo") self.assertEqual(op.notype, "Hello World") self.assertEqual(op.__getstate__(), before, msg="Opcode was modified") def testValidateSetDefaults(self): class OpTest(opcodes.OpCode): OP_PARAMS = [ # Static default value ("value1", "default", ht.TMaybeString, None), # Default value callback ("value2", lambda: "result", ht.TMaybeString, None), ] op = OpTest() before = op.__getstate__() op.Validate(True) self.assertNotEqual(op.__getstate__(), before, msg="Opcode was not modified") self.assertEqual(op.value1, "default") self.assertEqual(op.value2, "result") self.assert_(op.dry_run is None) self.assert_(op.debug_level is None) self.assertEqual(op.priority, constants.OP_PRIO_DEFAULT) op = OpTest(value1="hello", value2="world", debug_level=123) before = op.__getstate__() op.Validate(True) self.assertNotEqual(op.__getstate__(), before, msg="Opcode was not modified") self.assertEqual(op.value1, "hello") self.assertEqual(op.value2, "world") self.assertEqual(op.debug_level, 123) def testOpInstanceMultiAlloc(self): inst = dict([(name, []) for name in opcodes.OpInstanceCreate.GetAllSlots()]) inst_op = opcodes.OpInstanceCreate(**inst) inst_state = inst_op.__getstate__() multialloc = opcodes.OpInstanceMultiAlloc(instances=[inst_op]) state = multialloc.__getstate__() self.assertEquals(state["instances"], [inst_state]) loaded_multialloc = opcodes.OpCode.LoadOpCode(state) (loaded_inst,) = loaded_multialloc.instances self.assertNotEquals(loaded_inst, inst_op) self.assertEquals(loaded_inst.__getstate__(), inst_state) class TestOpcodeDepends(unittest.TestCase): def test(self): check_relative = opcodes._BuildJobDepCheck(True) check_norelative = opcodes.TNoRelativeJobDependencies for fn in [check_relative, check_norelative]: self.assertTrue(fn(None)) self.assertTrue(fn([])) self.assertTrue(fn([(1, [])])) self.assertTrue(fn([(719833, [])])) self.assertTrue(fn([("24879", [])])) self.assertTrue(fn([(2028, [constants.JOB_STATUS_ERROR])])) self.assertTrue(fn([ (2028, [constants.JOB_STATUS_ERROR]), (18750, []), (5063, [constants.JOB_STATUS_SUCCESS, constants.JOB_STATUS_ERROR]), ])) self.assertFalse(fn(1)) self.assertFalse(fn([(9, )])) self.assertFalse(fn([(15194, constants.JOB_STATUS_ERROR)])) for i in [ [(-1, [])], [(-27740, [constants.JOB_STATUS_CANCELED, constants.JOB_STATUS_ERROR]), (-1, [constants.JOB_STATUS_ERROR]), (9921, [])], ]: self.assertTrue(check_relative(i)) self.assertFalse(check_norelative(i)) class TestResultChecks(unittest.TestCase): def testJobIdList(self): for i in [[], [(False, "error")], [(False, "")], [(True, 123), (True, "999")]]: self.assertTrue(opcodes.TJobIdList(i)) for i in ["", [("x", 1)], [[], []], [[False, "", None], [True, 123]]]: self.assertFalse(opcodes.TJobIdList(i)) def testJobIdListOnly(self): self.assertTrue(opcodes.TJobIdListOnly({ constants.JOB_IDS_KEY: [], })) self.assertTrue(opcodes.TJobIdListOnly({ constants.JOB_IDS_KEY: [(True, "9282")], })) self.assertFalse(opcodes.TJobIdListOnly({ "x": None, })) self.assertFalse(opcodes.TJobIdListOnly({ constants.JOB_IDS_KEY: [], "x": None, })) self.assertFalse(opcodes.TJobIdListOnly({ constants.JOB_IDS_KEY: [("foo", "bar")], })) self.assertFalse(opcodes.TJobIdListOnly({ constants.JOB_IDS_KEY: [("one", "two", "three")], })) class TestClusterOsList(unittest.TestCase): def test(self): good = [ None, [], [(constants.DDM_ADD, "dos"), (constants.DDM_REMOVE, "linux")], ] for i in good: self.assertTrue(opcodes._TestClusterOsList(i)) wrong = ["", 0, "xy", ["Hello World"], object(), [("foo", "bar")], [("", "")], [[constants.DDM_ADD]], [(constants.DDM_ADD, "")], [(constants.DDM_REMOVE, "")], [(constants.DDM_ADD, None)], [(constants.DDM_REMOVE, None)], ] for i in wrong: self.assertFalse(opcodes._TestClusterOsList(i)) class TestOpInstanceSetParams(unittest.TestCase): def _GenericTests(self, fn): self.assertTrue(fn([])) self.assertTrue(fn([(constants.DDM_ADD, {})])) self.assertTrue(fn([(constants.DDM_REMOVE, {})])) for i in [0, 1, 2, 3, 9, 10, 1024]: self.assertTrue(fn([(i, {})])) self.assertFalse(fn(None)) self.assertFalse(fn({})) self.assertFalse(fn("")) self.assertFalse(fn(0)) self.assertFalse(fn([(-100, {})])) self.assertFalse(fn([(constants.DDM_ADD, 2, 3)])) self.assertFalse(fn([[constants.DDM_ADD]])) def testNicModifications(self): fn = opcodes.OpInstanceSetParams.TestNicModifications self._GenericTests(fn) for param in constants.INIC_PARAMS: self.assertTrue(fn([[constants.DDM_ADD, {param: None}]])) self.assertTrue(fn([[constants.DDM_ADD, {param: param}]])) def testDiskModifications(self): fn = opcodes.OpInstanceSetParams.TestDiskModifications self._GenericTests(fn) for param in constants.IDISK_PARAMS: self.assertTrue(fn([[constants.DDM_ADD, {param: 0}]])) self.assertTrue(fn([[constants.DDM_ADD, {param: param}]])) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.utils.algo_unittest.py0000744000000000000000000003006212244641676022257 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.utils.algo""" import unittest import random import operator from ganeti import constants from ganeti import compat from ganeti.utils import algo import testutils class TestUniqueSequence(unittest.TestCase): """Test case for UniqueSequence""" def _test(self, input, expected): self.assertEqual(algo.UniqueSequence(input), expected) def runTest(self): # Ordered input self._test([1, 2, 3], [1, 2, 3]) self._test([1, 1, 2, 2, 3, 3], [1, 2, 3]) self._test([1, 2, 2, 3], [1, 2, 3]) self._test([1, 2, 3, 3], [1, 2, 3]) # Unordered input self._test([1, 2, 3, 1, 2, 3], [1, 2, 3]) self._test([1, 1, 2, 3, 3, 1, 2], [1, 2, 3]) # Strings self._test(["a", "a"], ["a"]) self._test(["a", "b"], ["a", "b"]) self._test(["a", "b", "a"], ["a", "b"]) class TestFindDuplicates(unittest.TestCase): """Test case for FindDuplicates""" def _Test(self, seq, expected): result = algo.FindDuplicates(seq) self.assertEqual(result, algo.UniqueSequence(result)) self.assertEqual(set(result), set(expected)) def test(self): self._Test([], []) self._Test([1, 2, 3], []) self._Test([9, 8, 8, 0, 5, 1, 7, 0, 6, 7], [8, 0, 7]) for exp in [[1, 2, 3], [3, 2, 1]]: self._Test([1, 1, 2, 2, 3, 3], exp) self._Test(["A", "a", "B"], []) self._Test(["a", "A", "a", "B"], ["a"]) self._Test("Hello World out there!", ["e", " ", "o", "r", "t", "l"]) self._Test(self._Gen(False), []) self._Test(self._Gen(True), range(1, 10)) @staticmethod def _Gen(dup): for i in range(10): yield i if dup: for _ in range(i): yield i class TestNiceSort(unittest.TestCase): def test(self): self.assertEqual(algo.NiceSort([]), []) self.assertEqual(algo.NiceSort(["foo"]), ["foo"]) self.assertEqual(algo.NiceSort(["bar", ""]), ["", "bar"]) self.assertEqual(algo.NiceSort([",", "."]), [",", "."]) self.assertEqual(algo.NiceSort(["0.1", "0.2"]), ["0.1", "0.2"]) self.assertEqual(algo.NiceSort(["0;099", "0,099", "0.1", "0.2"]), ["0,099", "0.1", "0.2", "0;099"]) data = ["a0", "a1", "a99", "a20", "a2", "b10", "b70", "b00", "0000"] self.assertEqual(algo.NiceSort(data), ["0000", "a0", "a1", "a2", "a20", "a99", "b00", "b10", "b70"]) data = ["a0-0", "a1-0", "a99-10", "a20-3", "a0-4", "a99-3", "a09-2", "Z", "a9-1", "A", "b"] self.assertEqual(algo.NiceSort(data), ["A", "Z", "a0-0", "a0-4", "a1-0", "a9-1", "a09-2", "a20-3", "a99-3", "a99-10", "b"]) self.assertEqual(algo.NiceSort(data, key=str.lower), ["A", "a0-0", "a0-4", "a1-0", "a9-1", "a09-2", "a20-3", "a99-3", "a99-10", "b", "Z"]) self.assertEqual(algo.NiceSort(data, key=str.upper), ["A", "a0-0", "a0-4", "a1-0", "a9-1", "a09-2", "a20-3", "a99-3", "a99-10", "b", "Z"]) def testLargeA(self): data = [ "Eegah9ei", "xij88brTulHYAv8IEOyU", "3jTwJPtrXOY22bwL2YoW", "Z8Ljf1Pf5eBfNg171wJR", "WvNJd91OoXvLzdEiEXa6", "uHXAyYYftCSG1o7qcCqe", "xpIUJeVT1Rp", "KOt7vn1dWXi", "a07h8feON165N67PIE", "bH4Q7aCu3PUPjK3JtH", "cPRi0lM7HLnSuWA2G9", "KVQqLPDjcPjf8T3oyzjcOsfkb", "guKJkXnkULealVC8CyF1xefym", "pqF8dkU5B1cMnyZuREaSOADYx", ] self.assertEqual(algo.NiceSort(data), [ "3jTwJPtrXOY22bwL2YoW", "Eegah9ei", "KOt7vn1dWXi", "KVQqLPDjcPjf8T3oyzjcOsfkb", "WvNJd91OoXvLzdEiEXa6", "Z8Ljf1Pf5eBfNg171wJR", "a07h8feON165N67PIE", "bH4Q7aCu3PUPjK3JtH", "cPRi0lM7HLnSuWA2G9", "guKJkXnkULealVC8CyF1xefym", "pqF8dkU5B1cMnyZuREaSOADYx", "uHXAyYYftCSG1o7qcCqe", "xij88brTulHYAv8IEOyU", "xpIUJeVT1Rp" ]) def testLargeB(self): data = [ "inst-0.0.0.0-0.0.0.0", "inst-0.1.0.0-0.0.0.0", "inst-0.2.0.0-0.0.0.0", "inst-0.2.1.0-0.0.0.0", "inst-0.2.2.0-0.0.0.0", "inst-0.2.2.0-0.0.0.9", "inst-0.2.2.0-0.0.3.9", "inst-0.2.2.0-0.2.0.9", "inst-0.2.2.0-0.9.0.9", "inst-0.20.2.0-0.0.0.0", "inst-0.20.2.0-0.9.0.9", "inst-10.020.2.0-0.9.0.10", "inst-15.020.2.0-0.9.1.00", "inst-100.020.2.0-0.9.0.9", # Only the last group, not converted to a number anymore, differs "inst-100.020.2.0a999", "inst-100.020.2.0b000", "inst-100.020.2.0c10", "inst-100.020.2.0c101", "inst-100.020.2.0c2", "inst-100.020.2.0c20", "inst-100.020.2.0c3", "inst-100.020.2.0c39123", ] rnd = random.Random(16205) for _ in range(10): testdata = data[:] rnd.shuffle(testdata) assert testdata != data self.assertEqual(algo.NiceSort(testdata), data) class _CallCount: def __init__(self, fn): self.count = 0 self.fn = fn def __call__(self, *args): self.count += 1 return self.fn(*args) def testKeyfuncA(self): # Generate some random numbers rnd = random.Random(21131) numbers = [rnd.randint(0, 10000) for _ in range(999)] assert numbers != sorted(numbers) # Convert to hex data = [hex(i) for i in numbers] datacopy = data[:] keyfn = self._CallCount(lambda value: str(int(value, 16))) # Sort with key function converting hex to decimal result = algo.NiceSort(data, key=keyfn) self.assertEqual([hex(i) for i in sorted(numbers)], result) self.assertEqual(data, datacopy, msg="Input data was modified in NiceSort") self.assertEqual(keyfn.count, len(numbers), msg="Key function was not called once per value") class _TestData: def __init__(self, name, value): self.name = name self.value = value def testKeyfuncB(self): rnd = random.Random(27396) data = [] for i in range(123): v1 = rnd.randint(0, 5) v2 = rnd.randint(0, 5) data.append(self._TestData("inst-%s-%s-%s" % (v1, v2, i), (v1, v2, i))) rnd.shuffle(data) assert data != sorted(data, key=operator.attrgetter("name")) keyfn = self._CallCount(operator.attrgetter("name")) # Sort by name result = algo.NiceSort(data, key=keyfn) self.assertEqual(result, sorted(data, key=operator.attrgetter("value"))) self.assertEqual(keyfn.count, len(data), msg="Key function was not called once per value") def testNiceSortKey(self): self.assertEqual(algo.NiceSortKey(""), ([None] * algo._SORTER_GROUPS) + [""]) self.assertEqual(algo.NiceSortKey("Hello World"), ["Hello World"] + ([None] * int(algo._SORTER_GROUPS - 1)) + [""]) self.assertEqual(algo.NiceSortKey("node1.net75.bld3.example.com"), ["node", 1, ".net", 75, ".bld", 3, ".example.com", None, ""]) class TestInvertDict(unittest.TestCase): def testInvertDict(self): test_dict = { "foo": 1, "bar": 2, "baz": 5 } self.assertEqual(algo.InvertDict(test_dict), { 1: "foo", 2: "bar", 5: "baz"}) class TestInsertAtPos(unittest.TestCase): def test(self): a = [1, 5, 6] b = [2, 3, 4] self.assertEqual(algo.InsertAtPos(a, 1, b), [1, 2, 3, 4, 5, 6]) self.assertEqual(algo.InsertAtPos(a, 0, b), b + a) self.assertEqual(algo.InsertAtPos(a, len(a), b), a + b) self.assertEqual(algo.InsertAtPos(a, 2, b), [1, 5, 2, 3, 4, 6]) class TimeMock: def __init__(self, values): self.values = values def __call__(self): return self.values.pop(0) class TestRunningTimeout(unittest.TestCase): def setUp(self): self.time_fn = TimeMock([0.0, 0.3, 4.6, 6.5]) def testRemainingFloat(self): timeout = algo.RunningTimeout(5.0, True, _time_fn=self.time_fn) self.assertAlmostEqual(timeout.Remaining(), 4.7) self.assertAlmostEqual(timeout.Remaining(), 0.4) self.assertAlmostEqual(timeout.Remaining(), -1.5) def testRemaining(self): self.time_fn = TimeMock([0, 2, 4, 5, 6]) timeout = algo.RunningTimeout(5, True, _time_fn=self.time_fn) self.assertEqual(timeout.Remaining(), 3) self.assertEqual(timeout.Remaining(), 1) self.assertEqual(timeout.Remaining(), 0) self.assertEqual(timeout.Remaining(), -1) def testRemainingNonNegative(self): timeout = algo.RunningTimeout(5.0, False, _time_fn=self.time_fn) self.assertAlmostEqual(timeout.Remaining(), 4.7) self.assertAlmostEqual(timeout.Remaining(), 0.4) self.assertEqual(timeout.Remaining(), 0.0) def testNegativeTimeout(self): self.assertRaises(ValueError, algo.RunningTimeout, -1.0, True) class TestJoinDisjointDicts(unittest.TestCase): def setUp(self): self.non_empty_dict = {"a": 1, "b": 2} self.empty_dict = {} def testWithEmptyDicts(self): self.assertEqual(self.empty_dict, algo.JoinDisjointDicts(self.empty_dict, self.empty_dict)) self.assertEqual(self.non_empty_dict, algo.JoinDisjointDicts( self.empty_dict, self.non_empty_dict)) self.assertEqual(self.non_empty_dict, algo.JoinDisjointDicts( self.non_empty_dict, self.empty_dict)) def testNonDisjoint(self): self.assertRaises(AssertionError, algo.JoinDisjointDicts, self.non_empty_dict, self.non_empty_dict) def testCommonCase(self): dict_a = {"TEST1": 1, "TEST2": 2} dict_b = {"TEST3": 3, "TEST4": 4} result = dict_a.copy() result.update(dict_b) self.assertEqual(result, algo.JoinDisjointDicts(dict_a, dict_b)) self.assertEqual(result, algo.JoinDisjointDicts(dict_b, dict_a)) class TestSequenceToDict(unittest.TestCase): def testEmpty(self): self.assertEqual(algo.SequenceToDict([]), {}) self.assertEqual(algo.SequenceToDict({}), {}) def testSimple(self): data = [(i, str(i), "test%s" % i) for i in range(391)] self.assertEqual(algo.SequenceToDict(data), dict((i, (i, str(i), "test%s" % i)) for i in range(391))) def testCustomKey(self): data = [(i, hex(i), "test%s" % i) for i in range(100)] self.assertEqual(algo.SequenceToDict(data, key=compat.snd), dict((hex(i), (i, hex(i), "test%s" % i)) for i in range(100))) self.assertEqual(algo.SequenceToDict(data, key=lambda (a, b, val): hash(val)), dict((hash("test%s" % i), (i, hex(i), "test%s" % i)) for i in range(100))) def testDuplicate(self): self.assertRaises(ValueError, algo.SequenceToDict, [(0, 0), (0, 0)]) self.assertRaises(ValueError, algo.SequenceToDict, [(i, ) for i in range(200)] + [(10, )]) class TestFlatToDict(unittest.TestCase): def testNormal(self): data = [ ("lv/xenvg", {"foo": "bar", "bar": "baz"}), ("lv/xenfoo", {"foo": "bar", "baz": "blubb"}), ("san/foo", {"ip": "127.0.0.1", "port": 1337}), ("san/blubb/blibb", 54), ] reference = { "lv": { "xenvg": {"foo": "bar", "bar": "baz"}, "xenfoo": {"foo": "bar", "baz": "blubb"}, }, "san": { "foo": {"ip": "127.0.0.1", "port": 1337}, "blubb": {"blibb": 54}, }, } self.assertEqual(algo.FlatToDict(data), reference) def testUnlikeDepth(self): data = [ ("san/foo", {"ip": "127.0.0.1", "port": 1337}), ("san/foo/blubb", 23), # Another foo entry under san ("san/blubb/blibb", 54), ] self.assertRaises(AssertionError, algo.FlatToDict, data) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/check-cert-expired_unittest.bash0000744000000000000000000000360612244641676022667 0ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2010, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. set -e set -o pipefail export PYTHON=${PYTHON:=python} CCE=tools/check-cert-expired err() { echo "$@" echo 'Aborting' exit 1 } impexpd_helper() { $PYTHON "${TOP_SRCDIR:-.}/test/py/import-export_unittest-helper" "$@" } $CCE 2>/dev/null && err 'Accepted empty argument list' $CCE foo bar 2>/dev/null && err 'Accepted more than one argument' $CCE foo bar baz 2>/dev/null && err 'Accepted more than one argument' tmpdir=$(mktemp -d) trap "rm -rf $tmpdir" EXIT [[ -f "$tmpdir/cert-not" ]] && err 'File existed when it should not' $CCE $tmpdir/cert-not 2>/dev/null && err 'Accepted non-existent file' VALIDITY=1 impexpd_helper $tmpdir/cert-valid gencert $CCE $tmpdir/cert-valid 2>/dev/null && \ err 'Reported valid certificate as expired' VALIDITY=-50 impexpd_helper $tmpdir/cert-expired gencert $CCE $tmpdir/cert-expired 2>/dev/null || \ err 'Reported expired certificate as valid' echo > $tmpdir/cert-invalid $CCE $tmpdir/cert-invalid 2>/dev/null && \ err 'Reported invalid certificate as expired' echo 'Hello World' > $tmpdir/cert-invalid2 $CCE $tmpdir/cert-invalid2 2>/dev/null && \ err 'Reported invalid certificate as expired' exit 0 ganeti-2.9.3/test/py/ganeti.tools.burnin_unittest.py0000744000000000000000000000235512244641676022636 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.tools.burnin""" import unittest from ganeti import constants from ganeti.tools import burnin import testutils class TestConstants(unittest.TestCase): def testSupportedDiskTemplates(self): # Ignore disk templates not supported by burnin supported = (constants.DISK_TEMPLATES - frozenset([ constants.DT_BLOCK, ])) self.assertEqual(burnin._SUPPORTED_DISK_TEMPLATES, supported) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.utils.process_unittest.py0000744000000000000000000006126612244641676023025 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.utils.process""" import unittest import tempfile import shutil import os import stat import time import select import signal from ganeti import constants from ganeti import utils from ganeti import errors import testutils class TestIsProcessAlive(unittest.TestCase): """Testing case for IsProcessAlive""" def testExists(self): mypid = os.getpid() self.assert_(utils.IsProcessAlive(mypid), "can't find myself running") def testNotExisting(self): pid_non_existing = os.fork() if pid_non_existing == 0: os._exit(0) elif pid_non_existing < 0: raise SystemError("can't fork") os.waitpid(pid_non_existing, 0) self.assertFalse(utils.IsProcessAlive(pid_non_existing), "nonexisting process detected") class TestGetProcStatusPath(unittest.TestCase): def test(self): self.assert_("/1234/" in utils.process._GetProcStatusPath(1234)) self.assertNotEqual(utils.process._GetProcStatusPath(1), utils.process._GetProcStatusPath(2)) class TestIsProcessHandlingSignal(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def testParseSigsetT(self): parse_sigset_t_fn = utils.process._ParseSigsetT self.assertEqual(len(parse_sigset_t_fn("0")), 0) self.assertEqual(parse_sigset_t_fn("1"), set([1])) self.assertEqual(parse_sigset_t_fn("1000a"), set([2, 4, 17])) self.assertEqual(parse_sigset_t_fn("810002"), set([2, 17, 24, ])) self.assertEqual(parse_sigset_t_fn("0000000180000202"), set([2, 10, 32, 33])) self.assertEqual(parse_sigset_t_fn("0000000180000002"), set([2, 32, 33])) self.assertEqual(parse_sigset_t_fn("0000000188000002"), set([2, 28, 32, 33])) self.assertEqual(parse_sigset_t_fn("000000004b813efb"), set([1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 17, 24, 25, 26, 28, 31])) self.assertEqual(parse_sigset_t_fn("ffffff"), set(range(1, 25))) def testGetProcStatusField(self): for field in ["SigCgt", "Name", "FDSize"]: for value in ["", "0", "cat", " 1234 KB"]: pstatus = "\n".join([ "VmPeak: 999 kB", "%s: %s" % (field, value), "TracerPid: 0", ]) result = utils.process._GetProcStatusField(pstatus, field) self.assertEqual(result, value.strip()) def test(self): sp = utils.PathJoin(self.tmpdir, "status") utils.WriteFile(sp, data="\n".join([ "Name: bash", "State: S (sleeping)", "SleepAVG: 98%", "Pid: 22250", "PPid: 10858", "TracerPid: 0", "SigBlk: 0000000000010000", "SigIgn: 0000000000384004", "SigCgt: 000000004b813efb", "CapEff: 0000000000000000", ])) self.assert_(utils.IsProcessHandlingSignal(1234, 10, status_path=sp)) def testNoSigCgt(self): sp = utils.PathJoin(self.tmpdir, "status") utils.WriteFile(sp, data="\n".join([ "Name: bash", ])) self.assertRaises(RuntimeError, utils.IsProcessHandlingSignal, 1234, 10, status_path=sp) def testNoSuchFile(self): sp = utils.PathJoin(self.tmpdir, "notexist") self.assertFalse(utils.IsProcessHandlingSignal(1234, 10, status_path=sp)) @staticmethod def _TestRealProcess(): signal.signal(signal.SIGUSR1, signal.SIG_DFL) if utils.IsProcessHandlingSignal(os.getpid(), signal.SIGUSR1): raise Exception("SIGUSR1 is handled when it should not be") signal.signal(signal.SIGUSR1, lambda signum, frame: None) if not utils.IsProcessHandlingSignal(os.getpid(), signal.SIGUSR1): raise Exception("SIGUSR1 is not handled when it should be") signal.signal(signal.SIGUSR1, signal.SIG_IGN) if utils.IsProcessHandlingSignal(os.getpid(), signal.SIGUSR1): raise Exception("SIGUSR1 is not handled when it should be") signal.signal(signal.SIGUSR1, signal.SIG_DFL) if utils.IsProcessHandlingSignal(os.getpid(), signal.SIGUSR1): raise Exception("SIGUSR1 is handled when it should not be") return True def testRealProcess(self): self.assert_(utils.RunInSeparateProcess(self._TestRealProcess)) class _PostforkProcessReadyHelper: """A helper to use with C{postfork_fn} in RunCmd. It makes sure a process has reached a certain state by reading from a fifo. @ivar write_fd: The fd number to write to """ def __init__(self, timeout): """Initialize the helper. @param fifo_dir: The dir where we can create the fifo @param timeout: The time in seconds to wait before giving up """ self.timeout = timeout (self.read_fd, self.write_fd) = os.pipe() def Ready(self, pid): """Waits until the process is ready. @param pid: The pid of the process """ (read_ready, _, _) = select.select([self.read_fd], [], [], self.timeout) if not read_ready: # We hit the timeout raise AssertionError("Timeout %d reached while waiting for process %d" " to become ready" % (self.timeout, pid)) def Cleanup(self): """Cleans up the helper. """ os.close(self.read_fd) os.close(self.write_fd) class TestRunCmd(testutils.GanetiTestCase): """Testing case for the RunCmd function""" def setUp(self): testutils.GanetiTestCase.setUp(self) self.magic = time.ctime() + " ganeti test" self.fname = self._CreateTempFile() self.fifo_tmpdir = tempfile.mkdtemp() self.fifo_file = os.path.join(self.fifo_tmpdir, "ganeti_test_fifo") os.mkfifo(self.fifo_file) # If the process is not ready after 20 seconds we have bigger issues self.proc_ready_helper = _PostforkProcessReadyHelper(20) def tearDown(self): self.proc_ready_helper.Cleanup() shutil.rmtree(self.fifo_tmpdir) testutils.GanetiTestCase.tearDown(self) def testOk(self): """Test successful exit code""" result = utils.RunCmd("/bin/sh -c 'exit 0'") self.assertEqual(result.exit_code, 0) self.assertEqual(result.output, "") def testFail(self): """Test fail exit code""" result = utils.RunCmd("/bin/sh -c 'exit 1'") self.assertEqual(result.exit_code, 1) self.assertEqual(result.output, "") def testStdout(self): """Test standard output""" cmd = 'echo -n "%s"' % self.magic result = utils.RunCmd("/bin/sh -c '%s'" % cmd) self.assertEqual(result.stdout, self.magic) result = utils.RunCmd("/bin/sh -c '%s'" % cmd, output=self.fname) self.assertEqual(result.output, "") self.assertFileContent(self.fname, self.magic) def testStderr(self): """Test standard error""" cmd = 'echo -n "%s"' % self.magic result = utils.RunCmd("/bin/sh -c '%s' 1>&2" % cmd) self.assertEqual(result.stderr, self.magic) result = utils.RunCmd("/bin/sh -c '%s' 1>&2" % cmd, output=self.fname) self.assertEqual(result.output, "") self.assertFileContent(self.fname, self.magic) def testCombined(self): """Test combined output""" cmd = 'echo -n "A%s"; echo -n "B%s" 1>&2' % (self.magic, self.magic) expected = "A" + self.magic + "B" + self.magic result = utils.RunCmd("/bin/sh -c '%s'" % cmd) self.assertEqual(result.output, expected) result = utils.RunCmd("/bin/sh -c '%s'" % cmd, output=self.fname) self.assertEqual(result.output, "") self.assertFileContent(self.fname, expected) def testSignal(self): """Test signal""" result = utils.RunCmd(["python", "-c", "import os; os.kill(os.getpid(), 15)"]) self.assertEqual(result.signal, 15) self.assertEqual(result.output, "") def testTimeoutClean(self): cmd = ("trap 'exit 0' TERM; echo >&%d; read < %s" % (self.proc_ready_helper.write_fd, self.fifo_file)) result = utils.RunCmd(["/bin/sh", "-c", cmd], timeout=0.2, noclose_fds=[self.proc_ready_helper.write_fd], postfork_fn=self.proc_ready_helper.Ready) self.assertEqual(result.exit_code, 0) def testTimeoutKill(self): cmd = ["/bin/sh", "-c", "trap '' TERM; echo >&%d; read < %s" % (self.proc_ready_helper.write_fd, self.fifo_file)] timeout = 0.2 (out, err, status, ta) = \ utils.process._RunCmdPipe(cmd, {}, False, "/", False, timeout, [self.proc_ready_helper.write_fd], None, _linger_timeout=0.2, postfork_fn=self.proc_ready_helper.Ready) self.assert_(status < 0) self.assertEqual(-status, signal.SIGKILL) def testTimeoutOutputAfterTerm(self): cmd = ("trap 'echo sigtermed; exit 1' TERM; echo >&%d; read < %s" % (self.proc_ready_helper.write_fd, self.fifo_file)) result = utils.RunCmd(["/bin/sh", "-c", cmd], timeout=0.2, noclose_fds=[self.proc_ready_helper.write_fd], postfork_fn=self.proc_ready_helper.Ready) self.assert_(result.failed) self.assertEqual(result.stdout, "sigtermed\n") def testListRun(self): """Test list runs""" result = utils.RunCmd(["true"]) self.assertEqual(result.signal, None) self.assertEqual(result.exit_code, 0) result = utils.RunCmd(["/bin/sh", "-c", "exit 1"]) self.assertEqual(result.signal, None) self.assertEqual(result.exit_code, 1) result = utils.RunCmd(["echo", "-n", self.magic]) self.assertEqual(result.signal, None) self.assertEqual(result.exit_code, 0) self.assertEqual(result.stdout, self.magic) def testFileEmptyOutput(self): """Test file output""" result = utils.RunCmd(["true"], output=self.fname) self.assertEqual(result.signal, None) self.assertEqual(result.exit_code, 0) self.assertFileContent(self.fname, "") def testLang(self): """Test locale environment""" old_env = os.environ.copy() try: os.environ["LANG"] = "en_US.UTF-8" os.environ["LC_ALL"] = "en_US.UTF-8" result = utils.RunCmd(["locale"]) for line in result.output.splitlines(): key, value = line.split("=", 1) # Ignore these variables, they're overridden by LC_ALL if key == "LANG" or key == "LANGUAGE": continue self.failIf(value and value != "C" and value != '"C"', "Variable %s is set to the invalid value '%s'" % (key, value)) finally: os.environ = old_env def testDefaultCwd(self): """Test default working directory""" self.failUnlessEqual(utils.RunCmd(["pwd"]).stdout.strip(), "/") def testCwd(self): """Test default working directory""" self.failUnlessEqual(utils.RunCmd(["pwd"], cwd="/").stdout.strip(), "/") self.failUnlessEqual(utils.RunCmd(["pwd"], cwd="/tmp").stdout.strip(), "/tmp") cwd = os.getcwd() self.failUnlessEqual(utils.RunCmd(["pwd"], cwd=cwd).stdout.strip(), cwd) def testResetEnv(self): """Test environment reset functionality""" self.failUnlessEqual(utils.RunCmd(["env"], reset_env=True).stdout.strip(), "") self.failUnlessEqual(utils.RunCmd(["env"], reset_env=True, env={"FOO": "bar",}).stdout.strip(), "FOO=bar") def testNoFork(self): """Test that nofork raise an error""" self.assertFalse(utils.process._no_fork) utils.DisableFork() try: self.assertTrue(utils.process._no_fork) self.assertRaises(errors.ProgrammerError, utils.RunCmd, ["true"]) finally: utils.process._no_fork = False self.assertFalse(utils.process._no_fork) def testWrongParams(self): """Test wrong parameters""" self.assertRaises(errors.ProgrammerError, utils.RunCmd, ["true"], output="/dev/null", interactive=True) def testNocloseFds(self): """Test selective fd retention (noclose_fds)""" temp = open(self.fname, "r+") try: temp.write("test") temp.seek(0) cmd = "read -u %d; echo $REPLY" % temp.fileno() result = utils.RunCmd(["/bin/bash", "-c", cmd]) self.assertEqual(result.stdout.strip(), "") temp.seek(0) result = utils.RunCmd(["/bin/bash", "-c", cmd], noclose_fds=[temp.fileno()]) self.assertEqual(result.stdout.strip(), "test") finally: temp.close() def testNoInputRead(self): testfile = testutils.TestDataFilename("cert1.pem") result = utils.RunCmd(["cat"], timeout=10.0) self.assertFalse(result.failed) self.assertEqual(result.stderr, "") self.assertEqual(result.stdout, "") def testInputFileHandle(self): testfile = testutils.TestDataFilename("cert1.pem") result = utils.RunCmd(["cat"], input_fd=open(testfile, "r")) self.assertFalse(result.failed) self.assertEqual(result.stdout, utils.ReadFile(testfile)) self.assertEqual(result.stderr, "") def testInputNumericFileDescriptor(self): testfile = testutils.TestDataFilename("cert2.pem") fh = open(testfile, "r") try: result = utils.RunCmd(["cat"], input_fd=fh.fileno()) finally: fh.close() self.assertFalse(result.failed) self.assertEqual(result.stdout, utils.ReadFile(testfile)) self.assertEqual(result.stderr, "") def testInputWithCloseFds(self): testfile = testutils.TestDataFilename("cert1.pem") temp = open(self.fname, "r+") try: temp.write("test283523367") temp.seek(0) result = utils.RunCmd(["/bin/bash", "-c", ("cat && read -u %s; echo $REPLY" % temp.fileno())], input_fd=open(testfile, "r"), noclose_fds=[temp.fileno()]) self.assertFalse(result.failed) self.assertEqual(result.stdout.strip(), utils.ReadFile(testfile) + "test283523367") self.assertEqual(result.stderr, "") finally: temp.close() def testOutputAndInteractive(self): self.assertRaises(errors.ProgrammerError, utils.RunCmd, [], output=self.fname, interactive=True) def testOutputAndInput(self): self.assertRaises(errors.ProgrammerError, utils.RunCmd, [], output=self.fname, input_fd=open(self.fname)) class TestRunParts(testutils.GanetiTestCase): """Testing case for the RunParts function""" def setUp(self): self.rundir = tempfile.mkdtemp(prefix="ganeti-test", suffix=".tmp") def tearDown(self): shutil.rmtree(self.rundir) def testEmpty(self): """Test on an empty dir""" self.failUnlessEqual(utils.RunParts(self.rundir, reset_env=True), []) def testSkipWrongName(self): """Test that wrong files are skipped""" fname = os.path.join(self.rundir, "00test.dot") utils.WriteFile(fname, data="") os.chmod(fname, stat.S_IREAD | stat.S_IEXEC) relname = os.path.basename(fname) self.failUnlessEqual(utils.RunParts(self.rundir, reset_env=True), [(relname, constants.RUNPARTS_SKIP, None)]) def testSkipNonExec(self): """Test that non executable files are skipped""" fname = os.path.join(self.rundir, "00test") utils.WriteFile(fname, data="") relname = os.path.basename(fname) self.failUnlessEqual(utils.RunParts(self.rundir, reset_env=True), [(relname, constants.RUNPARTS_SKIP, None)]) def testError(self): """Test error on a broken executable""" fname = os.path.join(self.rundir, "00test") utils.WriteFile(fname, data="") os.chmod(fname, stat.S_IREAD | stat.S_IEXEC) (relname, status, error) = utils.RunParts(self.rundir, reset_env=True)[0] self.failUnlessEqual(relname, os.path.basename(fname)) self.failUnlessEqual(status, constants.RUNPARTS_ERR) self.failUnless(error) def testSorted(self): """Test executions are sorted""" files = [] files.append(os.path.join(self.rundir, "64test")) files.append(os.path.join(self.rundir, "00test")) files.append(os.path.join(self.rundir, "42test")) for fname in files: utils.WriteFile(fname, data="") results = utils.RunParts(self.rundir, reset_env=True) for fname in sorted(files): self.failUnlessEqual(os.path.basename(fname), results.pop(0)[0]) def testOk(self): """Test correct execution""" fname = os.path.join(self.rundir, "00test") utils.WriteFile(fname, data="#!/bin/sh\n\necho -n ciao") os.chmod(fname, stat.S_IREAD | stat.S_IEXEC) (relname, status, runresult) = \ utils.RunParts(self.rundir, reset_env=True)[0] self.failUnlessEqual(relname, os.path.basename(fname)) self.failUnlessEqual(status, constants.RUNPARTS_RUN) self.failUnlessEqual(runresult.stdout, "ciao") def testRunFail(self): """Test correct execution, with run failure""" fname = os.path.join(self.rundir, "00test") utils.WriteFile(fname, data="#!/bin/sh\n\nexit 1") os.chmod(fname, stat.S_IREAD | stat.S_IEXEC) (relname, status, runresult) = \ utils.RunParts(self.rundir, reset_env=True)[0] self.failUnlessEqual(relname, os.path.basename(fname)) self.failUnlessEqual(status, constants.RUNPARTS_RUN) self.failUnlessEqual(runresult.exit_code, 1) self.failUnless(runresult.failed) def testRunMix(self): files = [] files.append(os.path.join(self.rundir, "00test")) files.append(os.path.join(self.rundir, "42test")) files.append(os.path.join(self.rundir, "64test")) files.append(os.path.join(self.rundir, "99test")) files.sort() # 1st has errors in execution utils.WriteFile(files[0], data="#!/bin/sh\n\nexit 1") os.chmod(files[0], stat.S_IREAD | stat.S_IEXEC) # 2nd is skipped utils.WriteFile(files[1], data="") # 3rd cannot execute properly utils.WriteFile(files[2], data="") os.chmod(files[2], stat.S_IREAD | stat.S_IEXEC) # 4th execs utils.WriteFile(files[3], data="#!/bin/sh\n\necho -n ciao") os.chmod(files[3], stat.S_IREAD | stat.S_IEXEC) results = utils.RunParts(self.rundir, reset_env=True) (relname, status, runresult) = results[0] self.failUnlessEqual(relname, os.path.basename(files[0])) self.failUnlessEqual(status, constants.RUNPARTS_RUN) self.failUnlessEqual(runresult.exit_code, 1) self.failUnless(runresult.failed) (relname, status, runresult) = results[1] self.failUnlessEqual(relname, os.path.basename(files[1])) self.failUnlessEqual(status, constants.RUNPARTS_SKIP) self.failUnlessEqual(runresult, None) (relname, status, runresult) = results[2] self.failUnlessEqual(relname, os.path.basename(files[2])) self.failUnlessEqual(status, constants.RUNPARTS_ERR) self.failUnless(runresult) (relname, status, runresult) = results[3] self.failUnlessEqual(relname, os.path.basename(files[3])) self.failUnlessEqual(status, constants.RUNPARTS_RUN) self.failUnlessEqual(runresult.output, "ciao") self.failUnlessEqual(runresult.exit_code, 0) self.failUnless(not runresult.failed) def testMissingDirectory(self): nosuchdir = utils.PathJoin(self.rundir, "no/such/directory") self.assertEqual(utils.RunParts(nosuchdir), []) class TestStartDaemon(testutils.GanetiTestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp(prefix="ganeti-test") self.tmpfile = os.path.join(self.tmpdir, "test") def tearDown(self): shutil.rmtree(self.tmpdir) def testShell(self): utils.StartDaemon("echo Hello World > %s" % self.tmpfile) self._wait(self.tmpfile, 60.0, "Hello World") def testShellOutput(self): utils.StartDaemon("echo Hello World", output=self.tmpfile) self._wait(self.tmpfile, 60.0, "Hello World") def testNoShellNoOutput(self): utils.StartDaemon(["pwd"]) def testNoShellNoOutputTouch(self): testfile = os.path.join(self.tmpdir, "check") self.failIf(os.path.exists(testfile)) utils.StartDaemon(["touch", testfile]) self._wait(testfile, 60.0, "") def testNoShellOutput(self): utils.StartDaemon(["pwd"], output=self.tmpfile) self._wait(self.tmpfile, 60.0, "/") def testNoShellOutputCwd(self): utils.StartDaemon(["pwd"], output=self.tmpfile, cwd=os.getcwd()) self._wait(self.tmpfile, 60.0, os.getcwd()) def testShellEnv(self): utils.StartDaemon("echo \"$GNT_TEST_VAR\"", output=self.tmpfile, env={ "GNT_TEST_VAR": "Hello World", }) self._wait(self.tmpfile, 60.0, "Hello World") def testNoShellEnv(self): utils.StartDaemon(["printenv", "GNT_TEST_VAR"], output=self.tmpfile, env={ "GNT_TEST_VAR": "Hello World", }) self._wait(self.tmpfile, 60.0, "Hello World") def testOutputFd(self): fd = os.open(self.tmpfile, os.O_WRONLY | os.O_CREAT) try: utils.StartDaemon(["pwd"], output_fd=fd, cwd=os.getcwd()) finally: os.close(fd) self._wait(self.tmpfile, 60.0, os.getcwd()) def testPid(self): pid = utils.StartDaemon("echo $$ > %s" % self.tmpfile) self._wait(self.tmpfile, 60.0, str(pid)) def testPidFile(self): pidfile = os.path.join(self.tmpdir, "pid") checkfile = os.path.join(self.tmpdir, "abort") pid = utils.StartDaemon("while sleep 5; do :; done", pidfile=pidfile, output=self.tmpfile) try: fd = os.open(pidfile, os.O_RDONLY) try: # Check file is locked self.assertRaises(errors.LockError, utils.LockFile, fd) pidtext = os.read(fd, 100) finally: os.close(fd) self.assertEqual(int(pidtext.strip()), pid) self.assert_(utils.IsProcessAlive(pid)) finally: # No matter what happens, kill daemon utils.KillProcess(pid, timeout=5.0, waitpid=False) self.failIf(utils.IsProcessAlive(pid)) self.assertEqual(utils.ReadFile(self.tmpfile), "") def _wait(self, path, timeout, expected): # Due to the asynchronous nature of daemon processes, polling is necessary. # A timeout makes sure the test doesn't hang forever. def _CheckFile(): if not (os.path.isfile(path) and utils.ReadFile(path).strip() == expected): raise utils.RetryAgain() try: utils.Retry(_CheckFile, (0.01, 1.5, 1.0), timeout) except utils.RetryTimeout: self.fail("Apparently the daemon didn't run in %s seconds and/or" " didn't write the correct output" % timeout) def testError(self): self.assertRaises(errors.OpExecError, utils.StartDaemon, ["./does-NOT-EXIST/here/0123456789"]) self.assertRaises(errors.OpExecError, utils.StartDaemon, ["./does-NOT-EXIST/here/0123456789"], output=os.path.join(self.tmpdir, "DIR/NOT/EXIST")) self.assertRaises(errors.OpExecError, utils.StartDaemon, ["./does-NOT-EXIST/here/0123456789"], cwd=os.path.join(self.tmpdir, "DIR/NOT/EXIST")) self.assertRaises(errors.OpExecError, utils.StartDaemon, ["./does-NOT-EXIST/here/0123456789"], output=os.path.join(self.tmpdir, "DIR/NOT/EXIST")) fd = os.open(self.tmpfile, os.O_WRONLY | os.O_CREAT) try: self.assertRaises(errors.ProgrammerError, utils.StartDaemon, ["./does-NOT-EXIST/here/0123456789"], output=self.tmpfile, output_fd=fd) finally: os.close(fd) class RunInSeparateProcess(unittest.TestCase): def test(self): for exp in [True, False]: def _child(): return exp self.assertEqual(exp, utils.RunInSeparateProcess(_child)) def testArgs(self): for arg in [0, 1, 999, "Hello World", (1, 2, 3)]: def _child(carg1, carg2): return carg1 == "Foo" and carg2 == arg self.assert_(utils.RunInSeparateProcess(_child, "Foo", arg)) def testPid(self): parent_pid = os.getpid() def _check(): return os.getpid() == parent_pid self.failIf(utils.RunInSeparateProcess(_check)) def testSignal(self): def _kill(): os.kill(os.getpid(), signal.SIGTERM) self.assertRaises(errors.GenericError, utils.RunInSeparateProcess, _kill) def testException(self): def _exc(): raise errors.GenericError("This is a test") self.assertRaises(errors.GenericError, utils.RunInSeparateProcess, _exc) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.server.rapi_unittest.py0000744000000000000000000002207512244641676022443 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.server.rapi""" import re import unittest import random import mimetools import base64 from cStringIO import StringIO from ganeti import constants from ganeti import utils from ganeti import compat from ganeti import errors from ganeti import serializer from ganeti import rapi from ganeti import http from ganeti import objects import ganeti.rapi.baserlib import ganeti.rapi.testutils import ganeti.rapi.rlib2 import ganeti.http.auth import testutils class TestRemoteApiHandler(unittest.TestCase): @staticmethod def _LookupWrongUser(_): return None def _Test(self, method, path, headers, reqbody, user_fn=NotImplemented, luxi_client=NotImplemented, reqauth=False): rm = rapi.testutils._RapiMock(user_fn, luxi_client, reqauth=reqauth) (resp_code, resp_headers, resp_body) = \ rm.FetchResponse(path, method, http.ParseHeaders(StringIO(headers)), reqbody) self.assertTrue(resp_headers[http.HTTP_DATE]) self.assertEqual(resp_headers[http.HTTP_CONNECTION], "close") self.assertEqual(resp_headers[http.HTTP_CONTENT_TYPE], http.HTTP_APP_JSON) self.assertEqual(resp_headers[http.HTTP_SERVER], http.HTTP_GANETI_VERSION) return (resp_code, resp_headers, serializer.LoadJson(resp_body)) def testRoot(self): (code, _, data) = self._Test(http.HTTP_GET, "/", "", None) self.assertEqual(code, http.HTTP_OK) self.assertTrue(data is None) def testRootReqAuth(self): (code, _, _) = self._Test(http.HTTP_GET, "/", "", None, reqauth=True) self.assertEqual(code, http.HttpUnauthorized.code) def testVersion(self): (code, _, data) = self._Test(http.HTTP_GET, "/version", "", None) self.assertEqual(code, http.HTTP_OK) self.assertEqual(data, constants.RAPI_VERSION) def testSlashTwo(self): (code, _, data) = self._Test(http.HTTP_GET, "/2", "", None) self.assertEqual(code, http.HTTP_OK) self.assertTrue(data is None) def testFeatures(self): (code, _, data) = self._Test(http.HTTP_GET, "/2/features", "", None) self.assertEqual(code, http.HTTP_OK) self.assertEqual(set(data), set(rapi.rlib2.ALL_FEATURES)) def testPutInstances(self): (code, _, data) = self._Test(http.HTTP_PUT, "/2/instances", "", None) self.assertEqual(code, http.HttpNotImplemented.code) self.assertTrue(data["message"].startswith("Method PUT is unsupported")) def testPostInstancesNoAuth(self): (code, _, _) = self._Test(http.HTTP_POST, "/2/instances", "", None) self.assertEqual(code, http.HttpUnauthorized.code) def testRequestWithUnsupportedMediaType(self): for fn in [lambda s: s, lambda s: s.upper(), lambda s: s.title()]: headers = rapi.testutils._FormatHeaders([ "%s: %s" % (http.HTTP_CONTENT_TYPE, fn("un/supported/media/type")), ]) (code, _, data) = self._Test(http.HTTP_GET, "/", headers, "body") self.assertEqual(code, http.HttpUnsupportedMediaType.code) self.assertEqual(data["message"], "Unsupported Media Type") def testRequestWithInvalidJsonData(self): body = "_this/is/no'valid.json" self.assertRaises(Exception, serializer.LoadJson, body) headers = rapi.testutils._FormatHeaders([ "%s: %s" % (http.HTTP_CONTENT_TYPE, http.HTTP_APP_JSON), ]) (code, _, data) = self._Test(http.HTTP_GET, "/", headers, body) self.assertEqual(code, http.HttpBadRequest.code) self.assertEqual(data["message"], "Unable to parse JSON data") def testUnsupportedAuthScheme(self): headers = rapi.testutils._FormatHeaders([ "%s: %s" % (http.HTTP_AUTHORIZATION, "Unsupported scheme"), ]) (code, _, _) = self._Test(http.HTTP_POST, "/2/instances", headers, "") self.assertEqual(code, http.HttpUnauthorized.code) def testIncompleteBasicAuth(self): headers = rapi.testutils._FormatHeaders([ "%s: Basic" % http.HTTP_AUTHORIZATION, ]) (code, _, data) = self._Test(http.HTTP_POST, "/2/instances", headers, "") self.assertEqual(code, http.HttpBadRequest.code) self.assertEqual(data["message"], "Basic authentication requires credentials") def testInvalidBasicAuth(self): for auth in ["!invalid=base!64.", base64.b64encode(" "), base64.b64encode("missingcolonchar")]: headers = rapi.testutils._FormatHeaders([ "%s: Basic %s" % (http.HTTP_AUTHORIZATION, auth), ]) (code, _, data) = self._Test(http.HTTP_POST, "/2/instances", headers, "") self.assertEqual(code, http.HttpUnauthorized.code) @staticmethod def _MakeAuthHeaders(username, password, correct_password): if correct_password: pw = password else: pw = "wrongpass" return rapi.testutils._FormatHeaders([ "%s: Basic %s" % (http.HTTP_AUTHORIZATION, base64.b64encode("%s:%s" % (username, pw))), "%s: %s" % (http.HTTP_CONTENT_TYPE, http.HTTP_APP_JSON), ]) def testQueryAuth(self): username = "admin" password = "2046920054" header_fn = compat.partial(self._MakeAuthHeaders, username, password) def _LookupUserNoWrite(name): if name == username: return http.auth.PasswordFileUser(name, password, []) else: return None for access in [rapi.RAPI_ACCESS_WRITE, rapi.RAPI_ACCESS_READ]: def _LookupUserWithWrite(name): if name == username: return http.auth.PasswordFileUser(name, password, [ access, ]) else: return None for qr in constants.QR_VIA_RAPI: # The /2/query resource has somewhat special rules for authentication as # it can be used to retrieve critical information path = "/2/query/%s" % qr for method in rapi.baserlib._SUPPORTED_METHODS: # No authorization (code, _, _) = self._Test(method, path, "", "") if method in (http.HTTP_DELETE, http.HTTP_POST): self.assertEqual(code, http.HttpNotImplemented.code) continue self.assertEqual(code, http.HttpUnauthorized.code) # Incorrect user (code, _, _) = self._Test(method, path, header_fn(True), "", user_fn=self._LookupWrongUser) self.assertEqual(code, http.HttpUnauthorized.code) # User has no write access, but the password is correct (code, _, _) = self._Test(method, path, header_fn(True), "", user_fn=_LookupUserNoWrite) self.assertEqual(code, http.HttpForbidden.code) # Wrong password and no write access (code, _, _) = self._Test(method, path, header_fn(False), "", user_fn=_LookupUserNoWrite) self.assertEqual(code, http.HttpUnauthorized.code) # Wrong password with write access (code, _, _) = self._Test(method, path, header_fn(False), "", user_fn=_LookupUserWithWrite) self.assertEqual(code, http.HttpUnauthorized.code) # Prepare request information if method == http.HTTP_PUT: reqpath = path body = serializer.DumpJson({ "fields": ["name"], }) elif method == http.HTTP_GET: reqpath = "%s?fields=name" % path body = "" else: self.fail("Unknown method '%s'" % method) # User has write access, password is correct (code, _, data) = self._Test(method, reqpath, header_fn(True), body, user_fn=_LookupUserWithWrite, luxi_client=_FakeLuxiClientForQuery) self.assertEqual(code, http.HTTP_OK) self.assertTrue(objects.QueryResponse.FromDict(data)) def testConsole(self): path = "/2/instances/inst1.example.com/console" for method in rapi.baserlib._SUPPORTED_METHODS: for reqauth in [False, True]: # No authorization (code, _, _) = self._Test(method, path, "", "", reqauth=reqauth) if method == http.HTTP_GET or reqauth: self.assertEqual(code, http.HttpUnauthorized.code) else: self.assertEqual(code, http.HttpNotImplemented.code) class _FakeLuxiClientForQuery: def __init__(self, *args, **kwargs): pass def Query(self, *args): return objects.QueryResponse(fields=[]) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.hypervisor.hv_kvm_unittest.py0000744000000000000000000003070212271422343023666 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing the hypervisor.hv_kvm module""" import threading import tempfile import unittest import socket import os import struct from ganeti import serializer from ganeti import constants from ganeti import compat from ganeti import objects from ganeti import errors from ganeti import utils from ganeti import pathutils from ganeti.hypervisor import hv_kvm import testutils class QmpStub(threading.Thread): """Stub for a QMP endpoint for a KVM instance """ _QMP_BANNER_DATA = { "QMP": { "version": { "package": "", "qemu": { "micro": 50, "minor": 13, "major": 0, }, "capabilities": [], }, } } _EMPTY_RESPONSE = { "return": [], } def __init__(self, socket_filename, server_responses): """Creates a QMP stub @type socket_filename: string @param socket_filename: filename of the UNIX socket that will be created this class and used for the communication @type server_responses: list @param server_responses: list of responses that the server sends in response to whatever it receives """ threading.Thread.__init__(self) self.socket_filename = socket_filename self.script = server_responses self.socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) self.socket.bind(self.socket_filename) self.socket.listen(1) def run(self): # Hypothesis: the messages we receive contain only a complete QMP message # encoded in JSON. conn, addr = self.socket.accept() # Send the banner as the first thing conn.send(self.encode_string(self._QMP_BANNER_DATA)) # Expect qmp_capabilities and return an empty response conn.recv(4096) conn.send(self.encode_string(self._EMPTY_RESPONSE)) while True: # We ignore the expected message, as the purpose of this object is not # to verify the correctness of the communication but to act as a # partner for the SUT (System Under Test, that is QmpConnection) msg = conn.recv(4096) if not msg: break if not self.script: break response = self.script.pop(0) if isinstance(response, str): conn.send(response) elif isinstance(response, list): for chunk in response: conn.send(chunk) else: raise errors.ProgrammerError("Unknown response type for %s" % response) conn.close() def encode_string(self, message): return (serializer.DumpJson(message) + hv_kvm.QmpConnection._MESSAGE_END_TOKEN) class TestQmpMessage(testutils.GanetiTestCase): def testSerialization(self): test_data = { "execute": "command", "arguments": ["a", "b", "c"], } message = hv_kvm.QmpMessage(test_data) for k, v in test_data.items(): self.assertEqual(message[k], v) serialized = str(message) self.assertEqual(len(serialized.splitlines()), 1, msg="Got multi-line message") rebuilt_message = hv_kvm.QmpMessage.BuildFromJsonString(serialized) self.assertEqual(rebuilt_message, message) self.assertEqual(len(rebuilt_message), len(test_data)) def testDelete(self): toDelete = "execute" test_data = { toDelete: "command", "arguments": ["a", "b", "c"], } message = hv_kvm.QmpMessage(test_data) oldLen = len(message) del(message[toDelete]) newLen = len(message) self.assertEqual(oldLen - 1, newLen) class TestQmp(testutils.GanetiTestCase): def testQmp(self): requests = [ {"execute": "query-kvm", "arguments": []}, {"execute": "eject", "arguments": {"device": "ide1-cd0"}}, {"execute": "query-status", "arguments": []}, {"execute": "query-name", "arguments": []}, ] server_responses = [ # One message, one send() '{"return": {"enabled": true, "present": true}}\r\n', # Message sent using multiple send() ['{"retur', 'n": {}}\r\n'], # Multiple messages sent using one send() '{"return": [{"name": "quit"}, {"name": "eject"}]}\r\n' '{"return": {"running": true, "singlestep": false}}\r\n', ] expected_responses = [ {"return": {"enabled": True, "present": True}}, {"return": {}}, {"return": [{"name": "quit"}, {"name": "eject"}]}, {"return": {"running": True, "singlestep": False}}, ] # Set up the stub socket_file = tempfile.NamedTemporaryFile() os.remove(socket_file.name) qmp_stub = QmpStub(socket_file.name, server_responses) qmp_stub.start() # Set up the QMP connection qmp_connection = hv_kvm.QmpConnection(socket_file.name) qmp_connection.connect() # Format the script for request, expected_response in zip(requests, expected_responses): response = qmp_connection.Execute(request) msg = hv_kvm.QmpMessage(expected_response) self.assertEqual(len(str(msg).splitlines()), 1, msg="Got multi-line message") self.assertEqual(response, msg) class TestConsole(unittest.TestCase): def _Test(self, instance, node, hvparams): cons = hv_kvm.KVMHypervisor.GetInstanceConsole(instance, node, hvparams, {}) self.assertTrue(cons.Validate()) return cons def testSerial(self): instance = objects.Instance(name="kvm.example.com", primary_node="node6017-uuid") node = objects.Node(name="node6017", uuid="node6017-uuid") hvparams = { constants.HV_SERIAL_CONSOLE: True, constants.HV_VNC_BIND_ADDRESS: None, constants.HV_KVM_SPICE_BIND: None, } cons = self._Test(instance, node, hvparams) self.assertEqual(cons.kind, constants.CONS_SSH) self.assertEqual(cons.host, node.name) self.assertEqual(cons.command[0], pathutils.KVM_CONSOLE_WRAPPER) self.assertEqual(cons.command[1], constants.SOCAT_PATH) def testVnc(self): instance = objects.Instance(name="kvm.example.com", primary_node="node7235-uuid", network_port=constants.VNC_BASE_PORT + 10) node = objects.Node(name="node7235", uuid="node7235-uuid") hvparams = { constants.HV_SERIAL_CONSOLE: False, constants.HV_VNC_BIND_ADDRESS: "192.0.2.1", constants.HV_KVM_SPICE_BIND: None, } cons = self._Test(instance, node, hvparams) self.assertEqual(cons.kind, constants.CONS_VNC) self.assertEqual(cons.host, "192.0.2.1") self.assertEqual(cons.port, constants.VNC_BASE_PORT + 10) self.assertEqual(cons.display, 10) def testSpice(self): instance = objects.Instance(name="kvm.example.com", primary_node="node7235", network_port=11000) node = objects.Node(name="node7235", uuid="node7235-uuid") hvparams = { constants.HV_SERIAL_CONSOLE: False, constants.HV_VNC_BIND_ADDRESS: None, constants.HV_KVM_SPICE_BIND: "192.0.2.1", } cons = self._Test(instance, node, hvparams) self.assertEqual(cons.kind, constants.CONS_SPICE) self.assertEqual(cons.host, "192.0.2.1") self.assertEqual(cons.port, 11000) def testNoConsole(self): instance = objects.Instance(name="kvm.example.com", primary_node="node24325", network_port=0) node = objects.Node(name="node24325", uuid="node24325-uuid") hvparams = { constants.HV_SERIAL_CONSOLE: False, constants.HV_VNC_BIND_ADDRESS: None, constants.HV_KVM_SPICE_BIND: None, } cons = self._Test(instance, node, hvparams) self.assertEqual(cons.kind, constants.CONS_MESSAGE) class TestVersionChecking(testutils.GanetiTestCase): def testParseVersion(self): parse = hv_kvm.KVMHypervisor._ParseKVMVersion help_112 = testutils.ReadTestData("kvm_1.1.2_help.txt") help_10 = testutils.ReadTestData("kvm_1.0_help.txt") help_01590 = testutils.ReadTestData("kvm_0.15.90_help.txt") help_0125 = testutils.ReadTestData("kvm_0.12.5_help.txt") help_091 = testutils.ReadTestData("kvm_0.9.1_help.txt") self.assertEqual(parse(help_112), ("1.1.2", 1, 1, 2)) self.assertEqual(parse(help_10), ("1.0", 1, 0, 0)) self.assertEqual(parse(help_01590), ("0.15.90", 0, 15, 90)) self.assertEqual(parse(help_0125), ("0.12.5", 0, 12, 5)) self.assertEqual(parse(help_091), ("0.9.1", 0, 9, 1)) class TestSpiceParameterList(unittest.TestCase): def test(self): defaults = constants.HVC_DEFAULTS[constants.HT_KVM] params = \ compat.UniqueFrozenset(getattr(constants, name) for name in dir(constants) if name.startswith("HV_KVM_SPICE_")) # Parameters whose default value evaluates to True and don't need to be set defaults_true = frozenset(filter(defaults.__getitem__, params)) self.assertEqual(defaults_true, frozenset([ constants.HV_KVM_SPICE_AUDIO_COMPR, constants.HV_KVM_SPICE_USE_VDAGENT, constants.HV_KVM_SPICE_TLS_CIPHERS, ])) # HV_KVM_SPICE_BIND decides whether the other parameters must be set if # their default evaluates to False assert constants.HV_KVM_SPICE_BIND in params assert constants.HV_KVM_SPICE_BIND not in defaults_true # Exclude some parameters params -= defaults_true | frozenset([ constants.HV_KVM_SPICE_BIND, ]) self.assertEqual(hv_kvm._SPICE_ADDITIONAL_PARAMS, params) class TestHelpRegexps(testutils.GanetiTestCase): def testBootRe(self): """Check _BOOT_RE It has too match -drive.*boot=on|off except if there is another dash-option at the beginning of the line. """ boot_re = hv_kvm.KVMHypervisor._BOOT_RE help_112 = testutils.ReadTestData("kvm_1.1.2_help.txt") help_10 = testutils.ReadTestData("kvm_1.0_help.txt") help_01590 = testutils.ReadTestData("kvm_0.15.90_help.txt") help_0125 = testutils.ReadTestData("kvm_0.12.5_help.txt") help_091 = testutils.ReadTestData("kvm_0.9.1_help.txt") help_091_fake = testutils.ReadTestData("kvm_0.9.1_help_boot_test.txt") self.assertTrue(boot_re.search(help_091)) self.assertTrue(boot_re.search(help_0125)) self.assertFalse(boot_re.search(help_091_fake)) self.assertFalse(boot_re.search(help_112)) self.assertFalse(boot_re.search(help_10)) self.assertFalse(boot_re.search(help_01590)) class TestGetTunFeatures(unittest.TestCase): def testWrongIoctl(self): tmpfile = tempfile.NamedTemporaryFile() # A file does not have the right ioctls, so this must always fail result = hv_kvm._GetTunFeatures(tmpfile.fileno()) self.assertTrue(result is None) def _FakeIoctl(self, features, fd, request, buf): self.assertEqual(request, hv_kvm.TUNGETFEATURES) (reqno, ) = struct.unpack("I", buf) self.assertEqual(reqno, 0) return struct.pack("I", features) def test(self): tmpfile = tempfile.NamedTemporaryFile() fd = tmpfile.fileno() for features in [0, hv_kvm.IFF_VNET_HDR]: fn = compat.partial(self._FakeIoctl, features) result = hv_kvm._GetTunFeatures(fd, _ioctl=fn) self.assertEqual(result, features) class TestProbeTapVnetHdr(unittest.TestCase): def _FakeTunFeatures(self, expected_fd, flags, fd): self.assertEqual(fd, expected_fd) return flags def test(self): tmpfile = tempfile.NamedTemporaryFile() fd = tmpfile.fileno() for flags in [0, hv_kvm.IFF_VNET_HDR]: fn = compat.partial(self._FakeTunFeatures, fd, flags) result = hv_kvm._ProbeTapVnetHdr(fd, _features_fn=fn) if flags == 0: self.assertFalse(result) else: self.assertTrue(result) def testUnsupported(self): tmpfile = tempfile.NamedTemporaryFile() fd = tmpfile.fileno() self.assertFalse(hv_kvm._ProbeTapVnetHdr(fd, _features_fn=lambda _: None)) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/pycurl_reset_unittest.py0000744000000000000000000000445512244641676021457 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing for an issue in PycURL""" import sys import warnings import unittest import textwrap import pycurl import testutils DETAILS = [ ("PycURL 7.19.0 added a new function named \"reset\" on \"pycurl.Curl\"" " objects to release all references to other resources. Unfortunately that" " version contains a bug with reference counting on the \"None\" singleton," " leading to a crash of the Python interpreter after a certain amount of" " performed requests. Your system uses a version of PycURL affected by this" " issue. A patch is available at [1]. A detailed description can be found" " at [2].\n"), "\n", ("[1] http://sf.net/tracker/?" "func=detail&aid=2893665&group_id=28236&atid=392777\n"), "[2] https://bugzilla.redhat.com/show_bug.cgi?id=624559", ] class TestPyCurlReset(unittest.TestCase): def test(self): start_refcount = sys.getrefcount(None) abort_refcount = int(start_refcount * 0.8) assert start_refcount > 100 curl = pycurl.Curl() try: reset_fn = curl.reset except AttributeError: pass else: for i in range(start_refcount * 2): reset_fn() # The bug can be detected if calling "reset" several times continously # reduces the number of references if sys.getrefcount(None) < abort_refcount: print >>sys.stderr, "#" * 78 for line in DETAILS: print >>sys.stderr, textwrap.fill(line, width=78) print >>sys.stderr, "#" * 78 break if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.utils.io_unittest.py0000744000000000000000000010674112244641676021754 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.utils.io""" import os import tempfile import unittest import shutil import glob import time import signal import stat import errno from ganeti import constants from ganeti import utils from ganeti import compat from ganeti import errors import testutils class TestReadFile(testutils.GanetiTestCase): def testReadAll(self): data = utils.ReadFile(testutils.TestDataFilename("cert1.pem")) self.assertEqual(len(data), 814) h = compat.md5_hash() h.update(data) self.assertEqual(h.hexdigest(), "a491efb3efe56a0535f924d5f8680fd4") def testReadSize(self): data = utils.ReadFile(testutils.TestDataFilename("cert1.pem"), size=100) self.assertEqual(len(data), 100) h = compat.md5_hash() h.update(data) self.assertEqual(h.hexdigest(), "893772354e4e690b9efd073eed433ce7") def testCallback(self): def _Cb(fh): self.assertEqual(fh.tell(), 0) data = utils.ReadFile(testutils.TestDataFilename("cert1.pem"), preread=_Cb) self.assertEqual(len(data), 814) def testError(self): self.assertRaises(EnvironmentError, utils.ReadFile, "/dev/null/does-not-exist") class TestReadOneLineFile(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) def testDefault(self): data = utils.ReadOneLineFile(testutils.TestDataFilename("cert1.pem")) self.assertEqual(len(data), 27) self.assertEqual(data, "-----BEGIN CERTIFICATE-----") def testNotStrict(self): data = utils.ReadOneLineFile(testutils.TestDataFilename("cert1.pem"), strict=False) self.assertEqual(len(data), 27) self.assertEqual(data, "-----BEGIN CERTIFICATE-----") def testStrictFailure(self): self.assertRaises(errors.GenericError, utils.ReadOneLineFile, testutils.TestDataFilename("cert1.pem"), strict=True) def testLongLine(self): dummydata = (1024 * "Hello World! ") myfile = self._CreateTempFile() utils.WriteFile(myfile, data=dummydata) datastrict = utils.ReadOneLineFile(myfile, strict=True) datalax = utils.ReadOneLineFile(myfile, strict=False) self.assertEqual(dummydata, datastrict) self.assertEqual(dummydata, datalax) def testNewline(self): myfile = self._CreateTempFile() myline = "myline" for nl in ["", "\n", "\r\n"]: dummydata = "%s%s" % (myline, nl) utils.WriteFile(myfile, data=dummydata) datalax = utils.ReadOneLineFile(myfile, strict=False) self.assertEqual(myline, datalax) datastrict = utils.ReadOneLineFile(myfile, strict=True) self.assertEqual(myline, datastrict) def testWhitespaceAndMultipleLines(self): myfile = self._CreateTempFile() for nl in ["", "\n", "\r\n"]: for ws in [" ", "\t", "\t\t \t", "\t "]: dummydata = (1024 * ("Foo bar baz %s%s" % (ws, nl))) utils.WriteFile(myfile, data=dummydata) datalax = utils.ReadOneLineFile(myfile, strict=False) if nl: self.assert_(set("\r\n") & set(dummydata)) self.assertRaises(errors.GenericError, utils.ReadOneLineFile, myfile, strict=True) explen = len("Foo bar baz ") + len(ws) self.assertEqual(len(datalax), explen) self.assertEqual(datalax, dummydata[:explen]) self.assertFalse(set("\r\n") & set(datalax)) else: datastrict = utils.ReadOneLineFile(myfile, strict=True) self.assertEqual(dummydata, datastrict) self.assertEqual(dummydata, datalax) def testEmptylines(self): myfile = self._CreateTempFile() myline = "myline" for nl in ["\n", "\r\n"]: for ol in ["", "otherline"]: dummydata = "%s%s%s%s%s%s" % (nl, nl, myline, nl, ol, nl) utils.WriteFile(myfile, data=dummydata) self.assert_(set("\r\n") & set(dummydata)) datalax = utils.ReadOneLineFile(myfile, strict=False) self.assertEqual(myline, datalax) if ol: self.assertRaises(errors.GenericError, utils.ReadOneLineFile, myfile, strict=True) else: datastrict = utils.ReadOneLineFile(myfile, strict=True) self.assertEqual(myline, datastrict) def testEmptyfile(self): myfile = self._CreateTempFile() self.assertRaises(errors.GenericError, utils.ReadOneLineFile, myfile) class TestTimestampForFilename(unittest.TestCase): def test(self): self.assert_("." not in utils.TimestampForFilename()) self.assert_(":" not in utils.TimestampForFilename()) class TestCreateBackup(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) self.tmpdir = tempfile.mkdtemp() def tearDown(self): testutils.GanetiTestCase.tearDown(self) shutil.rmtree(self.tmpdir) def testEmpty(self): filename = utils.PathJoin(self.tmpdir, "config.data") utils.WriteFile(filename, data="") bname = utils.CreateBackup(filename) self.assertFileContent(bname, "") self.assertEqual(len(glob.glob("%s*" % filename)), 2) utils.CreateBackup(filename) self.assertEqual(len(glob.glob("%s*" % filename)), 3) utils.CreateBackup(filename) self.assertEqual(len(glob.glob("%s*" % filename)), 4) fifoname = utils.PathJoin(self.tmpdir, "fifo") os.mkfifo(fifoname) self.assertRaises(errors.ProgrammerError, utils.CreateBackup, fifoname) def testContent(self): bkpcount = 0 for data in ["", "X", "Hello World!\n" * 100, "Binary data\0\x01\x02\n"]: for rep in [1, 2, 10, 127]: testdata = data * rep filename = utils.PathJoin(self.tmpdir, "test.data_") utils.WriteFile(filename, data=testdata) self.assertFileContent(filename, testdata) for _ in range(3): bname = utils.CreateBackup(filename) bkpcount += 1 self.assertFileContent(bname, testdata) self.assertEqual(len(glob.glob("%s*" % filename)), 1 + bkpcount) class TestListVisibleFiles(unittest.TestCase): """Test case for ListVisibleFiles""" def setUp(self): self.path = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.path) def _CreateFiles(self, files): for name in files: utils.WriteFile(os.path.join(self.path, name), data="test") def _test(self, files, expected): self._CreateFiles(files) found = utils.ListVisibleFiles(self.path) self.assertEqual(set(found), set(expected)) def testAllVisible(self): files = ["a", "b", "c"] expected = files self._test(files, expected) def testNoneVisible(self): files = [".a", ".b", ".c"] expected = [] self._test(files, expected) def testSomeVisible(self): files = ["a", "b", ".c"] expected = ["a", "b"] self._test(files, expected) def testNonAbsolutePath(self): self.failUnlessRaises(errors.ProgrammerError, utils.ListVisibleFiles, "abc") def testNonNormalizedPath(self): self.failUnlessRaises(errors.ProgrammerError, utils.ListVisibleFiles, "/bin/../tmp") def testMountpoint(self): lvfmp_fn = compat.partial(utils.ListVisibleFiles, _is_mountpoint=lambda _: True) self.assertEqual(lvfmp_fn(self.path), []) # Create "lost+found" as a regular file self._CreateFiles(["foo", "bar", ".baz", "lost+found"]) self.assertEqual(set(lvfmp_fn(self.path)), set(["foo", "bar", "lost+found"])) # Replace "lost+found" with a directory laf_path = utils.PathJoin(self.path, "lost+found") utils.RemoveFile(laf_path) os.mkdir(laf_path) self.assertEqual(set(lvfmp_fn(self.path)), set(["foo", "bar"])) def testLostAndFoundNoMountpoint(self): files = ["foo", "bar", ".Hello World", "lost+found"] expected = ["foo", "bar", "lost+found"] self._test(files, expected) class TestWriteFile(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) self.tmpdir = None self.tfile = tempfile.NamedTemporaryFile() self.did_pre = False self.did_post = False self.did_write = False def tearDown(self): testutils.GanetiTestCase.tearDown(self) if self.tmpdir: shutil.rmtree(self.tmpdir) def markPre(self, fd): self.did_pre = True def markPost(self, fd): self.did_post = True def markWrite(self, fd): self.did_write = True def testWrite(self): data = "abc" utils.WriteFile(self.tfile.name, data=data) self.assertEqual(utils.ReadFile(self.tfile.name), data) def testWriteSimpleUnicode(self): data = u"abc" utils.WriteFile(self.tfile.name, data=data) self.assertEqual(utils.ReadFile(self.tfile.name), data) def testErrors(self): self.assertRaises(errors.ProgrammerError, utils.WriteFile, self.tfile.name, data="test", fn=lambda fd: None) self.assertRaises(errors.ProgrammerError, utils.WriteFile, self.tfile.name) self.assertRaises(errors.ProgrammerError, utils.WriteFile, self.tfile.name, data="test", atime=0) self.assertRaises(errors.ProgrammerError, utils.WriteFile, self.tfile.name, mode=0400, keep_perms=utils.KP_ALWAYS) self.assertRaises(errors.ProgrammerError, utils.WriteFile, self.tfile.name, uid=0, keep_perms=utils.KP_ALWAYS) self.assertRaises(errors.ProgrammerError, utils.WriteFile, self.tfile.name, gid=0, keep_perms=utils.KP_ALWAYS) self.assertRaises(errors.ProgrammerError, utils.WriteFile, self.tfile.name, mode=0400, uid=0, keep_perms=utils.KP_ALWAYS) def testPreWrite(self): utils.WriteFile(self.tfile.name, data="", prewrite=self.markPre) self.assertTrue(self.did_pre) self.assertFalse(self.did_post) self.assertFalse(self.did_write) def testPostWrite(self): utils.WriteFile(self.tfile.name, data="", postwrite=self.markPost) self.assertFalse(self.did_pre) self.assertTrue(self.did_post) self.assertFalse(self.did_write) def testWriteFunction(self): utils.WriteFile(self.tfile.name, fn=self.markWrite) self.assertFalse(self.did_pre) self.assertFalse(self.did_post) self.assertTrue(self.did_write) def testDryRun(self): orig = "abc" self.tfile.write(orig) self.tfile.flush() utils.WriteFile(self.tfile.name, data="hello", dry_run=True) self.assertEqual(utils.ReadFile(self.tfile.name), orig) def testTimes(self): f = self.tfile.name for at, mt in [(0, 0), (1000, 1000), (2000, 3000), (int(time.time()), 5000)]: utils.WriteFile(f, data="hello", atime=at, mtime=mt) st = os.stat(f) self.assertEqual(st.st_atime, at) self.assertEqual(st.st_mtime, mt) def testNoClose(self): data = "hello" self.assertEqual(utils.WriteFile(self.tfile.name, data="abc"), None) fd = utils.WriteFile(self.tfile.name, data=data, close=False) try: os.lseek(fd, 0, 0) self.assertEqual(os.read(fd, 4096), data) finally: os.close(fd) def testNoLeftovers(self): self.tmpdir = tempfile.mkdtemp() self.assertEqual(utils.WriteFile(utils.PathJoin(self.tmpdir, "test"), data="abc"), None) self.assertEqual(os.listdir(self.tmpdir), ["test"]) def testFailRename(self): self.tmpdir = tempfile.mkdtemp() target = utils.PathJoin(self.tmpdir, "target") os.mkdir(target) self.assertRaises(OSError, utils.WriteFile, target, data="abc") self.assertTrue(os.path.isdir(target)) self.assertEqual(os.listdir(self.tmpdir), ["target"]) self.assertFalse(os.listdir(target)) def testFailRenameDryRun(self): self.tmpdir = tempfile.mkdtemp() target = utils.PathJoin(self.tmpdir, "target") os.mkdir(target) self.assertEqual(utils.WriteFile(target, data="abc", dry_run=True), None) self.assertTrue(os.path.isdir(target)) self.assertEqual(os.listdir(self.tmpdir), ["target"]) self.assertFalse(os.listdir(target)) def testBackup(self): self.tmpdir = tempfile.mkdtemp() testfile = utils.PathJoin(self.tmpdir, "test") self.assertEqual(utils.WriteFile(testfile, data="foo", backup=True), None) self.assertEqual(utils.ReadFile(testfile), "foo") self.assertEqual(os.listdir(self.tmpdir), ["test"]) # Write again assert os.path.isfile(testfile) self.assertEqual(utils.WriteFile(testfile, data="bar", backup=True), None) self.assertEqual(utils.ReadFile(testfile), "bar") self.assertEqual(len(glob.glob("%s.backup*" % testfile)), 1) self.assertTrue("test" in os.listdir(self.tmpdir)) self.assertEqual(len(os.listdir(self.tmpdir)), 2) # Write again as dry-run assert os.path.isfile(testfile) self.assertEqual(utils.WriteFile(testfile, data="000", backup=True, dry_run=True), None) self.assertEqual(utils.ReadFile(testfile), "bar") self.assertEqual(len(glob.glob("%s.backup*" % testfile)), 1) self.assertTrue("test" in os.listdir(self.tmpdir)) self.assertEqual(len(os.listdir(self.tmpdir)), 2) def testFileMode(self): self.tmpdir = tempfile.mkdtemp() target = utils.PathJoin(self.tmpdir, "target") self.assertRaises(OSError, utils.WriteFile, target, data="data", keep_perms=utils.KP_ALWAYS) # All masks have only user bits set, to avoid interactions with umask utils.WriteFile(target, data="data", mode=0200) self.assertFileMode(target, 0200) utils.WriteFile(target, data="data", mode=0400, keep_perms=utils.KP_IF_EXISTS) self.assertFileMode(target, 0200) utils.WriteFile(target, data="data", keep_perms=utils.KP_ALWAYS) self.assertFileMode(target, 0200) utils.WriteFile(target, data="data", mode=0700) self.assertFileMode(target, 0700) def testNewFileMode(self): self.tmpdir = tempfile.mkdtemp() target = utils.PathJoin(self.tmpdir, "target") utils.WriteFile(target, data="data", mode=0400, keep_perms=utils.KP_IF_EXISTS) self.assertFileMode(target, 0400) class TestFileID(testutils.GanetiTestCase): def testEquality(self): name = self._CreateTempFile() oldi = utils.GetFileID(path=name) self.failUnless(utils.VerifyFileID(oldi, oldi)) def testUpdate(self): name = self._CreateTempFile() oldi = utils.GetFileID(path=name) fd = os.open(name, os.O_RDWR) try: newi = utils.GetFileID(fd=fd) self.failUnless(utils.VerifyFileID(oldi, newi)) self.failUnless(utils.VerifyFileID(newi, oldi)) finally: os.close(fd) def testWriteFile(self): name = self._CreateTempFile() oldi = utils.GetFileID(path=name) mtime = oldi[2] os.utime(name, (mtime + 10, mtime + 10)) self.assertRaises(errors.LockError, utils.SafeWriteFile, name, oldi, data="") os.utime(name, (mtime - 10, mtime - 10)) utils.SafeWriteFile(name, oldi, data="") oldi = utils.GetFileID(path=name) mtime = oldi[2] os.utime(name, (mtime + 10, mtime + 10)) # this doesn't raise, since we passed None utils.SafeWriteFile(name, None, data="") def testError(self): t = tempfile.NamedTemporaryFile() self.assertRaises(errors.ProgrammerError, utils.GetFileID, path=t.name, fd=t.fileno()) class TestRemoveFile(unittest.TestCase): """Test case for the RemoveFile function""" def setUp(self): """Create a temp dir and file for each case""" self.tmpdir = tempfile.mkdtemp("", "ganeti-unittest-") fd, self.tmpfile = tempfile.mkstemp("", "", self.tmpdir) os.close(fd) def tearDown(self): if os.path.exists(self.tmpfile): os.unlink(self.tmpfile) os.rmdir(self.tmpdir) def testIgnoreDirs(self): """Test that RemoveFile() ignores directories""" self.assertEqual(None, utils.RemoveFile(self.tmpdir)) def testIgnoreNotExisting(self): """Test that RemoveFile() ignores non-existing files""" utils.RemoveFile(self.tmpfile) utils.RemoveFile(self.tmpfile) def testRemoveFile(self): """Test that RemoveFile does remove a file""" utils.RemoveFile(self.tmpfile) if os.path.exists(self.tmpfile): self.fail("File '%s' not removed" % self.tmpfile) def testRemoveSymlink(self): """Test that RemoveFile does remove symlinks""" symlink = self.tmpdir + "/symlink" os.symlink("no-such-file", symlink) utils.RemoveFile(symlink) if os.path.exists(symlink): self.fail("File '%s' not removed" % symlink) os.symlink(self.tmpfile, symlink) utils.RemoveFile(symlink) if os.path.exists(symlink): self.fail("File '%s' not removed" % symlink) class TestRemoveDir(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): try: shutil.rmtree(self.tmpdir) except EnvironmentError: pass def testEmptyDir(self): utils.RemoveDir(self.tmpdir) self.assertFalse(os.path.isdir(self.tmpdir)) def testNonEmptyDir(self): self.tmpfile = os.path.join(self.tmpdir, "test1") open(self.tmpfile, "w").close() self.assertRaises(EnvironmentError, utils.RemoveDir, self.tmpdir) class TestRename(unittest.TestCase): """Test case for RenameFile""" def setUp(self): """Create a temporary directory""" self.tmpdir = tempfile.mkdtemp() self.tmpfile = os.path.join(self.tmpdir, "test1") # Touch the file open(self.tmpfile, "w").close() def tearDown(self): """Remove temporary directory""" shutil.rmtree(self.tmpdir) def testSimpleRename1(self): """Simple rename 1""" utils.RenameFile(self.tmpfile, os.path.join(self.tmpdir, "xyz")) self.assert_(os.path.isfile(os.path.join(self.tmpdir, "xyz"))) def testSimpleRename2(self): """Simple rename 2""" utils.RenameFile(self.tmpfile, os.path.join(self.tmpdir, "xyz"), mkdir=True) self.assert_(os.path.isfile(os.path.join(self.tmpdir, "xyz"))) def testRenameMkdir(self): """Rename with mkdir""" utils.RenameFile(self.tmpfile, os.path.join(self.tmpdir, "test/xyz"), mkdir=True) self.assert_(os.path.isdir(os.path.join(self.tmpdir, "test"))) self.assert_(os.path.isfile(os.path.join(self.tmpdir, "test/xyz"))) self.assertRaises(EnvironmentError, utils.RenameFile, os.path.join(self.tmpdir, "test/xyz"), os.path.join(self.tmpdir, "test/foo/bar/baz"), mkdir=True) self.assertTrue(os.path.exists(os.path.join(self.tmpdir, "test/xyz"))) self.assertFalse(os.path.exists(os.path.join(self.tmpdir, "test/foo/bar"))) self.assertFalse(os.path.exists(os.path.join(self.tmpdir, "test/foo/bar/baz"))) class TestMakedirs(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def testNonExisting(self): path = utils.PathJoin(self.tmpdir, "foo") utils.Makedirs(path) self.assert_(os.path.isdir(path)) def testExisting(self): path = utils.PathJoin(self.tmpdir, "foo") os.mkdir(path) utils.Makedirs(path) self.assert_(os.path.isdir(path)) def testRecursiveNonExisting(self): path = utils.PathJoin(self.tmpdir, "foo/bar/baz") utils.Makedirs(path) self.assert_(os.path.isdir(path)) def testRecursiveExisting(self): path = utils.PathJoin(self.tmpdir, "B/moo/xyz") self.assertFalse(os.path.exists(path)) os.mkdir(utils.PathJoin(self.tmpdir, "B")) utils.Makedirs(path) self.assert_(os.path.isdir(path)) class TestEnsureDirs(unittest.TestCase): """Tests for EnsureDirs""" def setUp(self): self.dir = tempfile.mkdtemp() self.old_umask = os.umask(0777) def testEnsureDirs(self): utils.EnsureDirs([ (utils.PathJoin(self.dir, "foo"), 0777), (utils.PathJoin(self.dir, "bar"), 0000), ]) self.assertEquals(os.stat(utils.PathJoin(self.dir, "foo"))[0] & 0777, 0777) self.assertEquals(os.stat(utils.PathJoin(self.dir, "bar"))[0] & 0777, 0000) def tearDown(self): os.rmdir(utils.PathJoin(self.dir, "foo")) os.rmdir(utils.PathJoin(self.dir, "bar")) os.rmdir(self.dir) os.umask(self.old_umask) class TestIsNormAbsPath(unittest.TestCase): """Testing case for IsNormAbsPath""" def _pathTestHelper(self, path, result): if result: self.assert_(utils.IsNormAbsPath(path), msg="Path %s should result absolute and normalized" % path) else: self.assertFalse(utils.IsNormAbsPath(path), msg="Path %s should not result absolute and normalized" % path) def testBase(self): self._pathTestHelper("/etc", True) self._pathTestHelper("/srv", True) self._pathTestHelper("etc", False) self._pathTestHelper("/etc/../root", False) self._pathTestHelper("/etc/", False) def testSlashes(self): # Root directory self._pathTestHelper("/", True) # POSIX' "implementation-defined" double slashes self._pathTestHelper("//", True) # Three and more slashes count as one, so the path is not normalized for i in range(3, 10): self._pathTestHelper("/" * i, False) class TestIsBelowDir(unittest.TestCase): """Testing case for IsBelowDir""" def testExactlyTheSame(self): self.assertFalse(utils.IsBelowDir("/a/b", "/a/b")) self.assertFalse(utils.IsBelowDir("/a/b", "/a/b/")) self.assertFalse(utils.IsBelowDir("/a/b/", "/a/b")) self.assertFalse(utils.IsBelowDir("/a/b/", "/a/b/")) def testSamePrefix(self): self.assertTrue(utils.IsBelowDir("/a/b", "/a/b/c")) self.assertTrue(utils.IsBelowDir("/a/b/", "/a/b/e")) def testSamePrefixButDifferentDir(self): self.assertFalse(utils.IsBelowDir("/a/b", "/a/bc/d")) self.assertFalse(utils.IsBelowDir("/a/b/", "/a/bc/e")) def testSamePrefixButDirTraversal(self): self.assertFalse(utils.IsBelowDir("/a/b", "/a/b/../c")) self.assertFalse(utils.IsBelowDir("/a/b/", "/a/b/../d")) def testSamePrefixAndTraversal(self): self.assertTrue(utils.IsBelowDir("/a/b", "/a/b/c/../d")) self.assertTrue(utils.IsBelowDir("/a/b", "/a/b/c/./e")) self.assertTrue(utils.IsBelowDir("/a/b", "/a/b/../b/./e")) def testBothAbsPath(self): self.assertRaises(ValueError, utils.IsBelowDir, "/a/b/c", "d") self.assertRaises(ValueError, utils.IsBelowDir, "a/b/c", "/d") self.assertRaises(ValueError, utils.IsBelowDir, "a/b/c", "d") self.assertRaises(ValueError, utils.IsBelowDir, "", "/") self.assertRaises(ValueError, utils.IsBelowDir, "/", "") def testRoot(self): self.assertFalse(utils.IsBelowDir("/", "/")) for i in ["/a", "/tmp", "/tmp/foo/bar", "/tmp/"]: self.assertTrue(utils.IsBelowDir("/", i)) def testSlashes(self): # In POSIX a double slash is "implementation-defined". self.assertFalse(utils.IsBelowDir("//", "//")) self.assertFalse(utils.IsBelowDir("//", "/tmp")) self.assertTrue(utils.IsBelowDir("//tmp", "//tmp/x")) # Three (or more) slashes count as one self.assertFalse(utils.IsBelowDir("/", "///")) self.assertTrue(utils.IsBelowDir("/", "///tmp")) self.assertTrue(utils.IsBelowDir("/tmp", "///tmp/a/b")) class TestPathJoin(unittest.TestCase): """Testing case for PathJoin""" def testBasicItems(self): mlist = ["/a", "b", "c"] self.failUnlessEqual(utils.PathJoin(*mlist), "/".join(mlist)) def testNonAbsPrefix(self): self.failUnlessRaises(ValueError, utils.PathJoin, "a", "b") def testBackTrack(self): self.failUnlessRaises(ValueError, utils.PathJoin, "/a", "b/../c") def testMultiAbs(self): self.failUnlessRaises(ValueError, utils.PathJoin, "/a", "/b") class TestTailFile(testutils.GanetiTestCase): """Test case for the TailFile function""" def testEmpty(self): fname = self._CreateTempFile() self.failUnlessEqual(utils.TailFile(fname), []) self.failUnlessEqual(utils.TailFile(fname, lines=25), []) def testAllLines(self): data = ["test %d" % i for i in range(30)] for i in range(30): fname = self._CreateTempFile() fd = open(fname, "w") fd.write("\n".join(data[:i])) if i > 0: fd.write("\n") fd.close() self.failUnlessEqual(utils.TailFile(fname, lines=i), data[:i]) def testPartialLines(self): data = ["test %d" % i for i in range(30)] fname = self._CreateTempFile() fd = open(fname, "w") fd.write("\n".join(data)) fd.write("\n") fd.close() for i in range(1, 30): self.failUnlessEqual(utils.TailFile(fname, lines=i), data[-i:]) def testBigFile(self): data = ["test %d" % i for i in range(30)] fname = self._CreateTempFile() fd = open(fname, "w") fd.write("X" * 1048576) fd.write("\n") fd.write("\n".join(data)) fd.write("\n") fd.close() for i in range(1, 30): self.failUnlessEqual(utils.TailFile(fname, lines=i), data[-i:]) class TestPidFileFunctions(unittest.TestCase): """Tests for WritePidFile and ReadPidFile""" def setUp(self): self.dir = tempfile.mkdtemp() self.f_dpn = lambda name: os.path.join(self.dir, "%s.pid" % name) def testPidFileFunctions(self): pid_file = self.f_dpn("test") fd = utils.WritePidFile(self.f_dpn("test")) self.failUnless(os.path.exists(pid_file), "PID file should have been created") read_pid = utils.ReadPidFile(pid_file) self.failUnlessEqual(read_pid, os.getpid()) self.failUnless(utils.IsProcessAlive(read_pid)) self.failUnlessRaises(errors.PidFileLockError, utils.WritePidFile, self.f_dpn("test")) os.close(fd) utils.RemoveFile(self.f_dpn("test")) self.failIf(os.path.exists(pid_file), "PID file should not exist anymore") self.failUnlessEqual(utils.ReadPidFile(pid_file), 0, "ReadPidFile should return 0 for missing pid file") fh = open(pid_file, "w") fh.write("blah\n") fh.close() self.failUnlessEqual(utils.ReadPidFile(pid_file), 0, "ReadPidFile should return 0 for invalid pid file") # but now, even with the file existing, we should be able to lock it fd = utils.WritePidFile(self.f_dpn("test")) os.close(fd) utils.RemoveFile(self.f_dpn("test")) self.failIf(os.path.exists(pid_file), "PID file should not exist anymore") def testKill(self): pid_file = self.f_dpn("child") r_fd, w_fd = os.pipe() new_pid = os.fork() if new_pid == 0: #child utils.WritePidFile(self.f_dpn("child")) os.write(w_fd, "a") signal.pause() os._exit(0) return # else we are in the parent # wait until the child has written the pid file os.read(r_fd, 1) read_pid = utils.ReadPidFile(pid_file) self.failUnlessEqual(read_pid, new_pid) self.failUnless(utils.IsProcessAlive(new_pid)) # Try writing to locked file try: utils.WritePidFile(pid_file) except errors.PidFileLockError, err: errmsg = str(err) self.assertTrue(errmsg.endswith(" %s" % new_pid), msg=("Error message ('%s') didn't contain correct" " PID (%s)" % (errmsg, new_pid))) else: self.fail("Writing to locked file didn't fail") utils.KillProcess(new_pid, waitpid=True) self.failIf(utils.IsProcessAlive(new_pid)) utils.RemoveFile(self.f_dpn("child")) self.failUnlessRaises(errors.ProgrammerError, utils.KillProcess, 0) def testExceptionType(self): # Make sure the PID lock error is a subclass of LockError in case some code # depends on it self.assertTrue(issubclass(errors.PidFileLockError, errors.LockError)) def tearDown(self): shutil.rmtree(self.dir) class TestSshKeys(testutils.GanetiTestCase): """Test case for the AddAuthorizedKey function""" KEY_A = "ssh-dss AAAAB3NzaC1w5256closdj32mZaQU root@key-a" KEY_B = ('command="/usr/bin/fooserver -t --verbose",from="198.51.100.4" ' "ssh-dss AAAAB3NzaC1w520smc01ms0jfJs22 root@key-b") def setUp(self): testutils.GanetiTestCase.setUp(self) self.tmpname = self._CreateTempFile() handle = open(self.tmpname, "w") try: handle.write("%s\n" % TestSshKeys.KEY_A) handle.write("%s\n" % TestSshKeys.KEY_B) finally: handle.close() def testAddingNewKey(self): utils.AddAuthorizedKey(self.tmpname, "ssh-dss AAAAB3NzaC1kc3MAAACB root@test") self.assertFileContent(self.tmpname, "ssh-dss AAAAB3NzaC1w5256closdj32mZaQU root@key-a\n" 'command="/usr/bin/fooserver -t --verbose",from="198.51.100.4"' " ssh-dss AAAAB3NzaC1w520smc01ms0jfJs22 root@key-b\n" "ssh-dss AAAAB3NzaC1kc3MAAACB root@test\n") def testAddingAlmostButNotCompletelyTheSameKey(self): utils.AddAuthorizedKey(self.tmpname, "ssh-dss AAAAB3NzaC1w5256closdj32mZaQU root@test") # Only significant fields are compared, therefore the key won't be # updated/added self.assertFileContent(self.tmpname, "ssh-dss AAAAB3NzaC1w5256closdj32mZaQU root@key-a\n" 'command="/usr/bin/fooserver -t --verbose",from="198.51.100.4"' " ssh-dss AAAAB3NzaC1w520smc01ms0jfJs22 root@key-b\n") def testAddingExistingKeyWithSomeMoreSpaces(self): utils.AddAuthorizedKey(self.tmpname, "ssh-dss AAAAB3NzaC1w5256closdj32mZaQU root@key-a") utils.AddAuthorizedKey(self.tmpname, "ssh-dss AAAAB3NzaC1w520smc01ms0jfJs22") self.assertFileContent(self.tmpname, "ssh-dss AAAAB3NzaC1w5256closdj32mZaQU root@key-a\n" 'command="/usr/bin/fooserver -t --verbose",from="198.51.100.4"' " ssh-dss AAAAB3NzaC1w520smc01ms0jfJs22 root@key-b\n" "ssh-dss AAAAB3NzaC1w520smc01ms0jfJs22\n") def testRemovingExistingKeyWithSomeMoreSpaces(self): utils.RemoveAuthorizedKey(self.tmpname, "ssh-dss AAAAB3NzaC1w5256closdj32mZaQU root@key-a") self.assertFileContent(self.tmpname, 'command="/usr/bin/fooserver -t --verbose",from="198.51.100.4"' " ssh-dss AAAAB3NzaC1w520smc01ms0jfJs22 root@key-b\n") def testRemovingNonExistingKey(self): utils.RemoveAuthorizedKey(self.tmpname, "ssh-dss AAAAB3Nsdfj230xxjxJjsjwjsjdjU root@test") self.assertFileContent(self.tmpname, "ssh-dss AAAAB3NzaC1w5256closdj32mZaQU root@key-a\n" 'command="/usr/bin/fooserver -t --verbose",from="198.51.100.4"' " ssh-dss AAAAB3NzaC1w520smc01ms0jfJs22 root@key-b\n") class TestNewUUID(unittest.TestCase): """Test case for NewUUID""" def runTest(self): self.failUnless(utils.UUID_RE.match(utils.NewUUID())) def _MockStatResult(cb, mode, uid, gid): def _fn(path): if cb: cb() return { stat.ST_MODE: mode, stat.ST_UID: uid, stat.ST_GID: gid, } return _fn def _RaiseNoEntError(): raise EnvironmentError(errno.ENOENT, "not found") def _OtherStatRaise(): raise EnvironmentError() class TestPermissionEnforcements(unittest.TestCase): UID_A = 16024 UID_B = 25850 GID_A = 14028 GID_B = 29801 def setUp(self): self._chown_calls = [] self._chmod_calls = [] self._mkdir_calls = [] def tearDown(self): self.assertRaises(IndexError, self._mkdir_calls.pop) self.assertRaises(IndexError, self._chmod_calls.pop) self.assertRaises(IndexError, self._chown_calls.pop) def _FakeMkdir(self, path): self._mkdir_calls.append(path) def _FakeChown(self, path, uid, gid): self._chown_calls.append((path, uid, gid)) def _ChmodWrapper(self, cb): def _fn(path, mode): self._chmod_calls.append((path, mode)) if cb: cb() return _fn def _VerifyPerm(self, path, mode, uid=-1, gid=-1): self.assertEqual(path, "/ganeti-qa-non-test") self.assertEqual(mode, 0700) self.assertEqual(uid, self.UID_A) self.assertEqual(gid, self.GID_A) def testMakeDirWithPerm(self): is_dir_stat = _MockStatResult(None, stat.S_IFDIR, 0, 0) utils.MakeDirWithPerm("/ganeti-qa-non-test", 0700, self.UID_A, self.GID_A, _lstat_fn=is_dir_stat, _perm_fn=self._VerifyPerm) def testDirErrors(self): self.assertRaises(errors.GenericError, utils.MakeDirWithPerm, "/ganeti-qa-non-test", 0700, 0, 0, _lstat_fn=_MockStatResult(None, 0, 0, 0)) self.assertRaises(IndexError, self._mkdir_calls.pop) other_stat_raise = _MockStatResult(_OtherStatRaise, stat.S_IFDIR, 0, 0) self.assertRaises(errors.GenericError, utils.MakeDirWithPerm, "/ganeti-qa-non-test", 0700, 0, 0, _lstat_fn=other_stat_raise) self.assertRaises(IndexError, self._mkdir_calls.pop) non_exist_stat = _MockStatResult(_RaiseNoEntError, stat.S_IFDIR, 0, 0) utils.MakeDirWithPerm("/ganeti-qa-non-test", 0700, self.UID_A, self.GID_A, _lstat_fn=non_exist_stat, _mkdir_fn=self._FakeMkdir, _perm_fn=self._VerifyPerm) self.assertEqual(self._mkdir_calls.pop(0), "/ganeti-qa-non-test") def testEnforcePermissionNoEnt(self): self.assertRaises(errors.GenericError, utils.EnforcePermission, "/ganeti-qa-non-test", 0600, _chmod_fn=NotImplemented, _chown_fn=NotImplemented, _stat_fn=_MockStatResult(_RaiseNoEntError, 0, 0, 0)) def testEnforcePermissionNoEntMustNotExist(self): utils.EnforcePermission("/ganeti-qa-non-test", 0600, must_exist=False, _chmod_fn=NotImplemented, _chown_fn=NotImplemented, _stat_fn=_MockStatResult(_RaiseNoEntError, 0, 0, 0)) def testEnforcePermissionOtherErrorMustNotExist(self): self.assertRaises(errors.GenericError, utils.EnforcePermission, "/ganeti-qa-non-test", 0600, must_exist=False, _chmod_fn=NotImplemented, _chown_fn=NotImplemented, _stat_fn=_MockStatResult(_OtherStatRaise, 0, 0, 0)) def testEnforcePermissionNoChanges(self): utils.EnforcePermission("/ganeti-qa-non-test", 0600, _stat_fn=_MockStatResult(None, 0600, 0, 0), _chmod_fn=self._ChmodWrapper(None), _chown_fn=self._FakeChown) def testEnforcePermissionChangeMode(self): utils.EnforcePermission("/ganeti-qa-non-test", 0444, _stat_fn=_MockStatResult(None, 0600, 0, 0), _chmod_fn=self._ChmodWrapper(None), _chown_fn=self._FakeChown) self.assertEqual(self._chmod_calls.pop(0), ("/ganeti-qa-non-test", 0444)) def testEnforcePermissionSetUidGid(self): utils.EnforcePermission("/ganeti-qa-non-test", 0600, uid=self.UID_B, gid=self.GID_B, _stat_fn=_MockStatResult(None, 0600, self.UID_A, self.GID_A), _chmod_fn=self._ChmodWrapper(None), _chown_fn=self._FakeChown) self.assertEqual(self._chown_calls.pop(0), ("/ganeti-qa-non-test", self.UID_B, self.GID_B)) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.hooks_unittest.py0000744000000000000000000004502212271422343021307 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the hooks module""" import unittest import os import time import tempfile import os.path from ganeti import errors from ganeti import opcodes from ganeti import hooksmaster from ganeti import backend from ganeti import constants from ganeti import cmdlib from ganeti import rpc from ganeti import compat from ganeti import pathutils from ganeti.constants import HKR_SUCCESS, HKR_FAIL, HKR_SKIP from mocks import FakeConfig, FakeProc, FakeContext import testutils class FakeLU(cmdlib.LogicalUnit): HPATH = "test" def BuildHooksEnv(self): return {} def BuildHooksNodes(self): return ["a"], ["a"] class TestHooksRunner(unittest.TestCase): """Testing case for HooksRunner""" def setUp(self): self.torm = [] self.tmpdir = tempfile.mkdtemp() self.torm.append((self.tmpdir, True)) self.logdir = tempfile.mkdtemp() self.torm.append((self.logdir, True)) self.hpath = "fake" self.ph_dirs = {} for i in (constants.HOOKS_PHASE_PRE, constants.HOOKS_PHASE_POST): dname = "%s/%s-%s.d" % (self.tmpdir, self.hpath, i) os.mkdir(dname) self.torm.append((dname, True)) self.ph_dirs[i] = dname self.hr = backend.HooksRunner(hooks_base_dir=self.tmpdir) def tearDown(self): self.torm.reverse() for path, kind in self.torm: if kind: os.rmdir(path) else: os.unlink(path) def _rname(self, fname): return "/".join(fname.split("/")[-2:]) def testEmpty(self): """Test no hooks""" for phase in (constants.HOOKS_PHASE_PRE, constants.HOOKS_PHASE_POST): self.failUnlessEqual(self.hr.RunHooks(self.hpath, phase, {}), []) def testSkipNonExec(self): """Test skip non-exec file""" for phase in (constants.HOOKS_PHASE_PRE, constants.HOOKS_PHASE_POST): fname = "%s/test" % self.ph_dirs[phase] f = open(fname, "w") f.close() self.torm.append((fname, False)) self.failUnlessEqual(self.hr.RunHooks(self.hpath, phase, {}), [(self._rname(fname), HKR_SKIP, "")]) def testSkipInvalidName(self): """Test skip script with invalid name""" for phase in (constants.HOOKS_PHASE_PRE, constants.HOOKS_PHASE_POST): fname = "%s/a.off" % self.ph_dirs[phase] f = open(fname, "w") f.write("#!/bin/sh\nexit 0\n") f.close() os.chmod(fname, 0700) self.torm.append((fname, False)) self.failUnlessEqual(self.hr.RunHooks(self.hpath, phase, {}), [(self._rname(fname), HKR_SKIP, "")]) def testSkipDir(self): """Test skip directory""" for phase in (constants.HOOKS_PHASE_PRE, constants.HOOKS_PHASE_POST): fname = "%s/testdir" % self.ph_dirs[phase] os.mkdir(fname) self.torm.append((fname, True)) self.failUnlessEqual(self.hr.RunHooks(self.hpath, phase, {}), [(self._rname(fname), HKR_SKIP, "")]) def testSuccess(self): """Test success execution""" for phase in (constants.HOOKS_PHASE_PRE, constants.HOOKS_PHASE_POST): fname = "%s/success" % self.ph_dirs[phase] f = open(fname, "w") f.write("#!/bin/sh\nexit 0\n") f.close() self.torm.append((fname, False)) os.chmod(fname, 0700) self.failUnlessEqual(self.hr.RunHooks(self.hpath, phase, {}), [(self._rname(fname), HKR_SUCCESS, "")]) def testSymlink(self): """Test running a symlink""" for phase in (constants.HOOKS_PHASE_PRE, constants.HOOKS_PHASE_POST): fname = "%s/success" % self.ph_dirs[phase] os.symlink("/bin/true", fname) self.torm.append((fname, False)) self.failUnlessEqual(self.hr.RunHooks(self.hpath, phase, {}), [(self._rname(fname), HKR_SUCCESS, "")]) def testFail(self): """Test success execution""" for phase in (constants.HOOKS_PHASE_PRE, constants.HOOKS_PHASE_POST): fname = "%s/success" % self.ph_dirs[phase] f = open(fname, "w") f.write("#!/bin/sh\nexit 1\n") f.close() self.torm.append((fname, False)) os.chmod(fname, 0700) self.failUnlessEqual(self.hr.RunHooks(self.hpath, phase, {}), [(self._rname(fname), HKR_FAIL, "")]) def testCombined(self): """Test success, failure and skip all in one test""" for phase in (constants.HOOKS_PHASE_PRE, constants.HOOKS_PHASE_POST): expect = [] for fbase, ecode, rs in [("00succ", 0, HKR_SUCCESS), ("10fail", 1, HKR_FAIL), ("20inv.", 0, HKR_SKIP), ]: fname = "%s/%s" % (self.ph_dirs[phase], fbase) f = open(fname, "w") f.write("#!/bin/sh\nexit %d\n" % ecode) f.close() self.torm.append((fname, False)) os.chmod(fname, 0700) expect.append((self._rname(fname), rs, "")) self.failUnlessEqual(self.hr.RunHooks(self.hpath, phase, {}), expect) def testOrdering(self): for phase in (constants.HOOKS_PHASE_PRE, constants.HOOKS_PHASE_POST): expect = [] for fbase in ["10s1", "00s0", "10sa", "80sc", "60sd", ]: fname = "%s/%s" % (self.ph_dirs[phase], fbase) os.symlink("/bin/true", fname) self.torm.append((fname, False)) expect.append((self._rname(fname), HKR_SUCCESS, "")) expect.sort() self.failUnlessEqual(self.hr.RunHooks(self.hpath, phase, {}), expect) def testEnv(self): """Test environment execution""" for phase in (constants.HOOKS_PHASE_PRE, constants.HOOKS_PHASE_POST): fbase = "success" fname = "%s/%s" % (self.ph_dirs[phase], fbase) os.symlink("/usr/bin/env", fname) self.torm.append((fname, False)) env_snt = {"PHASE": phase} env_exp = "PHASE=%s" % phase self.failUnlessEqual(self.hr.RunHooks(self.hpath, phase, env_snt), [(self._rname(fname), HKR_SUCCESS, env_exp)]) def FakeHooksRpcSuccess(node_list, hpath, phase, env): """Fake call_hooks_runner function. @rtype: dict of node -> L{rpc.RpcResult} with a successful script result @return: script execution from all nodes """ rr = rpc.RpcResult return dict([(node, rr((True, [("utest", constants.HKR_SUCCESS, "ok")]), node=node, call="FakeScriptOk")) for node in node_list]) class TestHooksMaster(unittest.TestCase): """Testing case for HooksMaster""" def _call_false(*args): """Fake call_hooks_runner function which returns False.""" return False @staticmethod def _call_nodes_false(node_list, hpath, phase, env): """Fake call_hooks_runner function. @rtype: dict of node -> L{rpc.RpcResult} with an rpc error @return: rpc failure from all nodes """ return dict([(node, rpc.RpcResult("error", failed=True, node=node, call="FakeError")) for node in node_list]) @staticmethod def _call_script_fail(node_list, hpath, phase, env): """Fake call_hooks_runner function. @rtype: dict of node -> L{rpc.RpcResult} with a failed script result @return: script execution failure from all nodes """ rr = rpc.RpcResult return dict([(node, rr((True, [("utest", constants.HKR_FAIL, "err")]), node=node, call="FakeScriptFail")) for node in node_list]) def setUp(self): self.op = opcodes.OpCode() self.context = FakeContext() # WARNING: here we pass None as RpcRunner instance since we know # our usage via HooksMaster will not use lu.rpc self.lu = FakeLU(FakeProc(), self.op, self.context, None) def testTotalFalse(self): """Test complete rpc failure""" hm = hooksmaster.HooksMaster.BuildFromLu(self._call_false, self.lu) self.failUnlessRaises(errors.HooksFailure, hm.RunPhase, constants.HOOKS_PHASE_PRE) hm.RunPhase(constants.HOOKS_PHASE_POST) def testIndividualFalse(self): """Test individual node failure""" hm = hooksmaster.HooksMaster.BuildFromLu(self._call_nodes_false, self.lu) hm.RunPhase(constants.HOOKS_PHASE_PRE) #self.failUnlessRaises(errors.HooksFailure, # hm.RunPhase, constants.HOOKS_PHASE_PRE) hm.RunPhase(constants.HOOKS_PHASE_POST) def testScriptFalse(self): """Test individual rpc failure""" hm = hooksmaster.HooksMaster.BuildFromLu(self._call_script_fail, self.lu) self.failUnlessRaises(errors.HooksAbort, hm.RunPhase, constants.HOOKS_PHASE_PRE) hm.RunPhase(constants.HOOKS_PHASE_POST) def testScriptSucceed(self): """Test individual rpc failure""" hm = hooksmaster.HooksMaster.BuildFromLu(FakeHooksRpcSuccess, self.lu) for phase in (constants.HOOKS_PHASE_PRE, constants.HOOKS_PHASE_POST): hm.RunPhase(phase) class FakeEnvLU(cmdlib.LogicalUnit): HPATH = "env_test_lu" HTYPE = constants.HTYPE_GROUP def __init__(self, *args): cmdlib.LogicalUnit.__init__(self, *args) self.hook_env = None def BuildHooksEnv(self): assert self.hook_env is not None return self.hook_env def BuildHooksNodes(self): return (["a"], ["a"]) class FakeNoHooksLU(cmdlib.NoHooksLU): pass class TestHooksRunnerEnv(unittest.TestCase): def setUp(self): self._rpcs = [] self.op = opcodes.OpTestDummy(result=False, messages=[], fail=False) self.lu = FakeEnvLU(FakeProc(), self.op, FakeContext(), None) def _HooksRpc(self, *args): self._rpcs.append(args) return FakeHooksRpcSuccess(*args) def _CheckEnv(self, env, phase, hpath): self.assertTrue(env["PATH"].startswith("/sbin")) self.assertEqual(env["GANETI_HOOKS_PHASE"], phase) self.assertEqual(env["GANETI_HOOKS_PATH"], hpath) self.assertEqual(env["GANETI_OP_CODE"], self.op.OP_ID) self.assertEqual(env["GANETI_HOOKS_VERSION"], str(constants.HOOKS_VERSION)) self.assertEqual(env["GANETI_DATA_DIR"], pathutils.DATA_DIR) if "GANETI_OBJECT_TYPE" in env: self.assertEqual(env["GANETI_OBJECT_TYPE"], constants.HTYPE_GROUP) else: self.assertTrue(self.lu.HTYPE is None) def testEmptyEnv(self): # Check pre-phase hook self.lu.hook_env = {} hm = hooksmaster.HooksMaster.BuildFromLu(self._HooksRpc, self.lu) hm.RunPhase(constants.HOOKS_PHASE_PRE) (node_list, hpath, phase, env) = self._rpcs.pop(0) self.assertEqual(node_list, set(["node_a.example.com"])) self.assertEqual(hpath, self.lu.HPATH) self.assertEqual(phase, constants.HOOKS_PHASE_PRE) self._CheckEnv(env, constants.HOOKS_PHASE_PRE, self.lu.HPATH) # Check post-phase hook self.lu.hook_env = {} hm.RunPhase(constants.HOOKS_PHASE_POST) (node_list, hpath, phase, env) = self._rpcs.pop(0) self.assertEqual(node_list, set(["node_a.example.com"])) self.assertEqual(hpath, self.lu.HPATH) self.assertEqual(phase, constants.HOOKS_PHASE_POST) self._CheckEnv(env, constants.HOOKS_PHASE_POST, self.lu.HPATH) self.assertRaises(IndexError, self._rpcs.pop) def testEnv(self): # Check pre-phase hook self.lu.hook_env = { "FOO": "pre-foo-value", } hm = hooksmaster.HooksMaster.BuildFromLu(self._HooksRpc, self.lu) hm.RunPhase(constants.HOOKS_PHASE_PRE) (node_list, hpath, phase, env) = self._rpcs.pop(0) self.assertEqual(node_list, set(["node_a.example.com"])) self.assertEqual(hpath, self.lu.HPATH) self.assertEqual(phase, constants.HOOKS_PHASE_PRE) self.assertEqual(env["GANETI_FOO"], "pre-foo-value") self.assertFalse(compat.any(key.startswith("GANETI_POST") for key in env)) self._CheckEnv(env, constants.HOOKS_PHASE_PRE, self.lu.HPATH) # Check post-phase hook self.lu.hook_env = { "FOO": "post-value", "BAR": 123, } hm.RunPhase(constants.HOOKS_PHASE_POST) (node_list, hpath, phase, env) = self._rpcs.pop(0) self.assertEqual(node_list, set(["node_a.example.com"])) self.assertEqual(hpath, self.lu.HPATH) self.assertEqual(phase, constants.HOOKS_PHASE_POST) self.assertEqual(env["GANETI_FOO"], "pre-foo-value") self.assertEqual(env["GANETI_POST_FOO"], "post-value") self.assertEqual(env["GANETI_POST_BAR"], "123") self.assertFalse("GANETI_BAR" in env) self._CheckEnv(env, constants.HOOKS_PHASE_POST, self.lu.HPATH) self.assertRaises(IndexError, self._rpcs.pop) # Check configuration update hook hm.RunConfigUpdate() (node_list, hpath, phase, env) = self._rpcs.pop(0) self.assertEqual(set(node_list), set([self.lu.cfg.GetMasterNodeName()])) self.assertEqual(hpath, constants.HOOKS_NAME_CFGUPDATE) self.assertEqual(phase, constants.HOOKS_PHASE_POST) self._CheckEnv(env, constants.HOOKS_PHASE_POST, constants.HOOKS_NAME_CFGUPDATE) self.assertFalse(compat.any(key.startswith("GANETI_POST") for key in env)) self.assertEqual(env["GANETI_FOO"], "pre-foo-value") self.assertRaises(IndexError, self._rpcs.pop) def testConflict(self): for name in ["DATA_DIR", "OP_CODE"]: self.lu.hook_env = { name: "value" } # Test using a clean HooksMaster instance hm = hooksmaster.HooksMaster.BuildFromLu(self._HooksRpc, self.lu) for phase in [constants.HOOKS_PHASE_PRE, constants.HOOKS_PHASE_POST]: self.assertRaises(AssertionError, hm.RunPhase, phase) self.assertRaises(IndexError, self._rpcs.pop) def testNoNodes(self): self.lu.hook_env = {} hm = hooksmaster.HooksMaster.BuildFromLu(self._HooksRpc, self.lu) hm.RunPhase(constants.HOOKS_PHASE_PRE, node_names=[]) self.assertRaises(IndexError, self._rpcs.pop) def testSpecificNodes(self): self.lu.hook_env = {} nodes = [ "node1.example.com", "node93782.example.net", ] hm = hooksmaster.HooksMaster.BuildFromLu(self._HooksRpc, self.lu) for phase in [constants.HOOKS_PHASE_PRE, constants.HOOKS_PHASE_POST]: hm.RunPhase(phase, node_names=nodes) (node_list, hpath, rpc_phase, env) = self._rpcs.pop(0) self.assertEqual(set(node_list), set(nodes)) self.assertEqual(hpath, self.lu.HPATH) self.assertEqual(rpc_phase, phase) self._CheckEnv(env, phase, self.lu.HPATH) self.assertRaises(IndexError, self._rpcs.pop) def testRunConfigUpdateNoPre(self): self.lu.hook_env = { "FOO": "value", } hm = hooksmaster.HooksMaster.BuildFromLu(self._HooksRpc, self.lu) hm.RunConfigUpdate() (node_list, hpath, phase, env) = self._rpcs.pop(0) self.assertEqual(set(node_list), set([self.lu.cfg.GetMasterNodeName()])) self.assertEqual(hpath, constants.HOOKS_NAME_CFGUPDATE) self.assertEqual(phase, constants.HOOKS_PHASE_POST) self.assertEqual(env["GANETI_FOO"], "value") self.assertFalse(compat.any(key.startswith("GANETI_POST") for key in env)) self._CheckEnv(env, constants.HOOKS_PHASE_POST, constants.HOOKS_NAME_CFGUPDATE) self.assertRaises(IndexError, self._rpcs.pop) def testNoPreBeforePost(self): self.lu.hook_env = { "FOO": "value", } hm = hooksmaster.HooksMaster.BuildFromLu(self._HooksRpc, self.lu) hm.RunPhase(constants.HOOKS_PHASE_POST) (node_list, hpath, phase, env) = self._rpcs.pop(0) self.assertEqual(node_list, set(["node_a.example.com"])) self.assertEqual(hpath, self.lu.HPATH) self.assertEqual(phase, constants.HOOKS_PHASE_POST) self.assertEqual(env["GANETI_FOO"], "value") self.assertEqual(env["GANETI_POST_FOO"], "value") self._CheckEnv(env, constants.HOOKS_PHASE_POST, self.lu.HPATH) self.assertRaises(IndexError, self._rpcs.pop) def testNoHooksLU(self): self.lu = FakeNoHooksLU(FakeProc(), self.op, FakeContext(), None) self.assertRaises(AssertionError, self.lu.BuildHooksEnv) self.assertRaises(AssertionError, self.lu.BuildHooksNodes) hm = hooksmaster.HooksMaster.BuildFromLu(self._HooksRpc, self.lu) self.assertEqual(hm.pre_env, {}) self.assertRaises(IndexError, self._rpcs.pop) hm.RunPhase(constants.HOOKS_PHASE_PRE) self.assertRaises(IndexError, self._rpcs.pop) hm.RunPhase(constants.HOOKS_PHASE_POST) self.assertRaises(IndexError, self._rpcs.pop) hm.RunConfigUpdate() (node_list, hpath, phase, env) = self._rpcs.pop(0) self.assertEqual(set(node_list), set([self.lu.cfg.GetMasterNodeName()])) self.assertEqual(hpath, constants.HOOKS_NAME_CFGUPDATE) self.assertEqual(phase, constants.HOOKS_PHASE_POST) self.assertFalse(compat.any(key.startswith("GANETI_POST") for key in env)) self._CheckEnv(env, constants.HOOKS_PHASE_POST, constants.HOOKS_NAME_CFGUPDATE) self.assertRaises(IndexError, self._rpcs.pop) assert isinstance(self.lu, FakeNoHooksLU), "LU was replaced" class FakeEnvWithNodeNameLU(cmdlib.LogicalUnit): HPATH = "env_test_lu" HTYPE = constants.HTYPE_GROUP def __init__(self, *args): cmdlib.LogicalUnit.__init__(self, *args) def BuildHooksEnv(self): return {} def BuildHooksNodes(self): return (["a"], ["a"], ["explicit.node1.com", "explicit.node2.com"]) class TestHooksRunnerEnv(unittest.TestCase): def setUp(self): self._rpcs = [] self.op = opcodes.OpTestDummy(result=False, messages=[], fail=False) self.lu = FakeEnvWithNodeNameLU(FakeProc(), self.op, FakeContext(), None) def _HooksRpc(self, *args): self._rpcs.append(args) return FakeHooksRpcSuccess(*args) def testEmptyEnv(self): # Check pre-phase hook hm = hooksmaster.HooksMaster.BuildFromLu(self._HooksRpc, self.lu) hm.RunPhase(constants.HOOKS_PHASE_PRE) (node_list, hpath, phase, env) = self._rpcs.pop(0) self.assertEqual(node_list, set(["node_a.example.com"])) # Check post-phase hook hm.RunPhase(constants.HOOKS_PHASE_POST) (node_list, hpath, phase, env) = self._rpcs.pop(0) self.assertEqual(node_list, set(["node_a.example.com", "explicit.node1.com", "explicit.node2.com"])) self.assertRaises(IndexError, self._rpcs.pop) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.rapi.testutils_unittest.py0000744000000000000000000001256012271422343023157 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.rapi.testutils""" import unittest from ganeti import compat from ganeti import constants from ganeti import errors from ganeti import opcodes from ganeti import luxi from ganeti import rapi from ganeti import utils import ganeti.rapi.testutils import ganeti.rapi.client import testutils KNOWN_UNUSED_LUXI = compat.UniqueFrozenset([ luxi.REQ_SUBMIT_MANY_JOBS, luxi.REQ_ARCHIVE_JOB, luxi.REQ_AUTO_ARCHIVE_JOBS, luxi.REQ_CHANGE_JOB_PRIORITY, luxi.REQ_QUERY_EXPORTS, luxi.REQ_QUERY_CONFIG_VALUES, luxi.REQ_QUERY_NETWORKS, luxi.REQ_QUERY_TAGS, luxi.REQ_SET_DRAIN_FLAG, luxi.REQ_SET_WATCHER_PAUSE, ]) # Global variable for storing used LUXI calls _used_luxi_calls = None class TestHideInternalErrors(unittest.TestCase): def test(self): def inner(): raise errors.GenericError("error") fn = rapi.testutils._HideInternalErrors(inner) self.assertRaises(rapi.testutils.VerificationError, fn) class TestVerifyOpInput(unittest.TestCase): def testUnknownOpId(self): voi = rapi.testutils.VerifyOpInput self.assertRaises(rapi.testutils.VerificationError, voi, "UNK_OP_ID", None) def testUnknownParameter(self): voi = rapi.testutils.VerifyOpInput self.assertRaises(rapi.testutils.VerificationError, voi, opcodes.OpClusterRename.OP_ID, { "unk": "unk", }) def testWrongParameterValue(self): voi = rapi.testutils.VerifyOpInput self.assertRaises(rapi.testutils.VerificationError, voi, opcodes.OpClusterRename.OP_ID, { "name": object(), }) def testSuccess(self): voi = rapi.testutils.VerifyOpInput voi(opcodes.OpClusterRename.OP_ID, { "name": "new-name.example.com", }) class TestVerifyOpResult(unittest.TestCase): def testSuccess(self): vor = rapi.testutils.VerifyOpResult vor(opcodes.OpClusterVerify.OP_ID, { constants.JOB_IDS_KEY: [ (False, "error message"), ], }) def testWrongResult(self): vor = rapi.testutils.VerifyOpResult self.assertRaises(rapi.testutils.VerificationError, vor, opcodes.OpClusterVerify.OP_ID, []) def testNoResultCheck(self): vor = rapi.testutils.VerifyOpResult assert opcodes.OpTestDummy.OP_RESULT is None vor(opcodes.OpTestDummy.OP_ID, None) class TestInputTestClient(unittest.TestCase): def setUp(self): self.cl = rapi.testutils.InputTestClient() def tearDown(self): _used_luxi_calls.update(self.cl._GetLuxiCalls()) def testGetInfo(self): self.assertTrue(self.cl.GetInfo() is NotImplemented) def testPrepareExport(self): result = self.cl.PrepareExport("inst1.example.com", constants.EXPORT_MODE_LOCAL) self.assertTrue(result is NotImplemented) self.assertRaises(rapi.testutils.VerificationError, self.cl.PrepareExport, "inst1.example.com", "###invalid###") def testGetJobs(self): self.assertTrue(self.cl.GetJobs() is NotImplemented) def testQuery(self): result = self.cl.Query(constants.QR_NODE, ["name"]) self.assertTrue(result is NotImplemented) def testQueryFields(self): result = self.cl.QueryFields(constants.QR_INSTANCE) self.assertTrue(result is NotImplemented) def testCancelJob(self): self.assertTrue(self.cl.CancelJob("1") is NotImplemented) def testGetNodes(self): self.assertTrue(self.cl.GetNodes() is NotImplemented) def testGetInstances(self): self.assertTrue(self.cl.GetInstances() is NotImplemented) def testGetGroups(self): self.assertTrue(self.cl.GetGroups() is NotImplemented) def testWaitForJobChange(self): result = self.cl.WaitForJobChange("1", ["id"], None, None) self.assertTrue(result is NotImplemented) class CustomTestRunner(unittest.TextTestRunner): def run(self, *args): global _used_luxi_calls assert _used_luxi_calls is None diff = (KNOWN_UNUSED_LUXI - luxi.REQ_ALL) assert not diff, "Non-existing LUXI calls listed as unused: %s" % diff _used_luxi_calls = set() try: # Run actual tests result = unittest.TextTestRunner.run(self, *args) diff = _used_luxi_calls & KNOWN_UNUSED_LUXI if diff: raise AssertionError("LUXI methods marked as unused were called: %s" % utils.CommaJoin(diff)) diff = (luxi.REQ_ALL - KNOWN_UNUSED_LUXI - _used_luxi_calls) if diff: raise AssertionError("The following LUXI methods were not used: %s" % utils.CommaJoin(diff)) finally: # Reset global variable _used_luxi_calls = None return result if __name__ == "__main__": testutils.GanetiTestProgram(testRunner=CustomTestRunner) ganeti-2.9.3/test/py/daemon-util_unittest.bash0000744000000000000000000000555112271422343021424 0ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. set -e daemon_util=daemons/daemon-util err() { echo "$@" echo 'Aborting' exit 1 } if ! grep -q '^ENABLE_CONFD = ' lib/_autoconf.py; then err "Please update $0, confd enable feature is missing" fi if ! grep -q '^ENABLE_MOND = ' lib/_autoconf.py; then err "Please update $0, mond enable feature is missing" fi DAEMONS_LIST="noded masterd rapi" STOPDAEMONS_LIST="rapi masterd noded" if grep -q '^ENABLE_CONFD = True' lib/_autoconf.py; then DAEMONS_LIST="$DAEMONS_LIST confd luxid" STOPDAEMONS_LIST="luxid confd $STOPDAEMONS_LIST" fi if grep -q '^ENABLE_MOND = True' lib/_autoconf.py; then DAEMONS_LIST="$DAEMONS_LIST mond" STOPDAEMONS_LIST="mond $STOPDAEMONS_LIST" fi DAEMONS=$(echo $(for d in $DAEMONS_LIST; do echo "ganeti-$d"; done)) STOPDAEMONS=$(echo $(for d in $STOPDAEMONS_LIST; do echo "ganeti-$d"; done)) $daemon_util >/dev/null 2>&1 && err "daemon-util succeeded without command" $daemon_util this-is-an-unimplemented-command >/dev/null 2>&1 && err "daemon-util accepted unimplemented command" $daemon_util list_start_daemons >/dev/null 2>&1 && err "daemon-util accepted command with underscores" $daemon_util check-exitcode 0 || err "check-exitcode 0 failed" for i in 1 2 3 4 20 25 33; do $daemon_util check-exitcode $i >/dev/null 2>&1 && rc=0 || rc=$? test "$rc" == 1 || err "check-exitcode $i didn't return 1" done $daemon_util check-exitcode 11 >/dev/null 2>&1 || err "check-exitcode 11 (not master) didn't return 0" tmp=$(echo $($daemon_util list-start-daemons)) test "$tmp" == "$DAEMONS" || err "list-start-daemons didn't return correct list of daemons" tmp=$(echo $($daemon_util list-stop-daemons)) test "$tmp" == "$STOPDAEMONS" || err "list-stop-daemons didn't return correct list of daemons" $daemon_util is-daemon-name >/dev/null 2>&1 && err "is-daemon-name didn't require daemon name" for i in '' '.' '..' '-' 'not-a-daemon'; do $daemon_util is-daemon-name "$i" >/dev/null 2>&1 && err "is-daemon-name thinks '$i' is a daemon name" done for i in $DAEMONS; do $daemon_util is-daemon-name $i >/dev/null 2>&1 || err "is-daemon-name doesn't think '$i' is a daemon name" done ganeti-2.9.3/test/py/ganeti.utils.mlock_unittest.py0000744000000000000000000000300112244641676022433 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing utils.Mlockall This test is run in a separate process because it changes memory behaviour. """ import unittest from ganeti import utils from ganeti import errors import testutils # WARNING: The following tests modify the memory behaviour at runtime. Don't # add unrelated tests here. class TestMlockallWithCtypes(unittest.TestCase): """Whether Mlockall() works if ctypes is present. """ def test(self): if utils.ctypes: utils.Mlockall() class TestMlockallWithNoCtypes(unittest.TestCase): """Whether Mlockall() raises an error if ctypes is not present. """ def test(self): self.assertRaises(errors.NoCtypesError, utils.Mlockall, _ctypes=None) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.compat_unittest.py0000744000000000000000000000765312244641676021473 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the compat module""" import unittest from ganeti import compat import testutils class TestPartial(testutils.GanetiTestCase): def test(self): # Test standard version self._Test(compat.partial) # Test our version self._Test(compat._partial) def _Test(self, fn): def _TestFunc1(x, power=2): return x ** power cubic = fn(_TestFunc1, power=3) self.assertEqual(cubic(1), 1) self.assertEqual(cubic(3), 27) self.assertEqual(cubic(4), 64) def _TestFunc2(*args, **kwargs): return (args, kwargs) self.assertEqualValues(fn(_TestFunc2, "Hello", "World")("Foo"), (("Hello", "World", "Foo"), {})) self.assertEqualValues(fn(_TestFunc2, "Hello", xyz=123)("Foo"), (("Hello", "Foo"), {"xyz": 123})) self.assertEqualValues(fn(_TestFunc2, xyz=123)("Foo", xyz=999), (("Foo", ), {"xyz": 999,})) class TestTryToRoman(testutils.GanetiTestCase): """test the compat.TryToRoman function""" def setUp(self): testutils.GanetiTestCase.setUp(self) # Save the compat.roman module so we can alter it with a fake... self.compat_roman_module = compat.roman def tearDown(self): # ...and restore it at the end of the test compat.roman = self.compat_roman_module testutils.GanetiTestCase.tearDown(self) def testAFewIntegers(self): # This test only works is the roman module is installed if compat.roman is not None: self.assertEquals(compat.TryToRoman(0), 0) self.assertEquals(compat.TryToRoman(1), "I") self.assertEquals(compat.TryToRoman(4), "IV") self.assertEquals(compat.TryToRoman(5), "V") def testWithNoRoman(self): # compat.roman is saved/restored in setUp/tearDown compat.roman = None self.assertEquals(compat.TryToRoman(0), 0) self.assertEquals(compat.TryToRoman(1), 1) self.assertEquals(compat.TryToRoman(4), 4) self.assertEquals(compat.TryToRoman(5), 5) def testStrings(self): self.assertEquals(compat.TryToRoman("astring"), "astring") self.assertEquals(compat.TryToRoman("5"), "5") def testDontConvert(self): self.assertEquals(compat.TryToRoman(0, convert=False), 0) self.assertEquals(compat.TryToRoman(1, convert=False), 1) self.assertEquals(compat.TryToRoman(7, convert=False), 7) self.assertEquals(compat.TryToRoman("astring", convert=False), "astring") self.assertEquals(compat.TryToRoman("19", convert=False), "19") class TestUniqueFrozenset(unittest.TestCase): def testDuplicates(self): for values in [["", ""], ["Hello", "World", "Hello"]]: self.assertRaises(ValueError, compat.UniqueFrozenset, values) def testEmpty(self): self.assertEqual(compat.UniqueFrozenset([]), frozenset([])) def testUnique(self): self.assertEqual(compat.UniqueFrozenset([1, 2, 3]), frozenset([1, 2, 3])) def testGenerator(self): seq = ("Foo%s" % i for i in range(10)) self.assertTrue(callable(seq.next)) self.assertFalse(isinstance(seq, (list, tuple))) self.assertEqual(compat.UniqueFrozenset(seq), frozenset(["Foo%s" % i for i in range(10)])) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.cmdlib.cluster_unittest.py0000744000000000000000000000544312271422343023101 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the cmdlib module 'cluster'""" import unittest from ganeti.cmdlib import cluster from ganeti import constants from ganeti import errors import testutils import mock class TestCheckFileStoragePath(unittest.TestCase): def setUp(self): unittest.TestCase.setUp(self) self.log_warning = mock.Mock() def enableFileStorage(self, file_storage_enabled): if file_storage_enabled: self.enabled_disk_templates = [constants.DT_FILE] else: # anything != 'file' would do here self.enabled_disk_templates = [constants.DT_DISKLESS] def testNone(self): self.enableFileStorage(True) self.assertRaises( errors.ProgrammerError, cluster.CheckFileStoragePathVsEnabledDiskTemplates, self.log_warning, None, self.enabled_disk_templates) def testNotEmptyAndEnabled(self): self.enableFileStorage(True) cluster.CheckFileStoragePathVsEnabledDiskTemplates( self.log_warning, "/some/path", self.enabled_disk_templates) def testNotEnabled(self): self.enableFileStorage(False) cluster.CheckFileStoragePathVsEnabledDiskTemplates( self.log_warning, "/some/path", self.enabled_disk_templates) self.assertTrue(self.log_warning.called) def testEmptyAndEnabled(self): self.enableFileStorage(True) self.assertRaises( errors.OpPrereqError, cluster.CheckFileStoragePathVsEnabledDiskTemplates, self.log_warning, "", self.enabled_disk_templates) def testEmptyAndDisabled(self): self.enableFileStorage(False) cluster.CheckFileStoragePathVsEnabledDiskTemplates( NotImplemented, "", self.enabled_disk_templates) class TestGetEnabledDiskTemplates(unittest.TestCase): def testNoNew(self): op_dts = [constants.DT_DISKLESS] old_dts = [constants.DT_DISKLESS] (enabled_dts, new_dts) =\ cluster.LUClusterSetParams._GetEnabledDiskTemplatesInner( op_dts, old_dts) self.assertEqual(enabled_dts, old_dts) self.assertEqual(new_dts, []) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.asyncnotifier_unittest.py0000744000000000000000000001524112244641676023055 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the asyncnotifier module""" import unittest import signal import os import tempfile import shutil try: # pylint: disable=E0611 from pyinotify import pyinotify except ImportError: import pyinotify from ganeti import asyncnotifier from ganeti import daemon from ganeti import utils from ganeti import errors import testutils class _MyErrorLoggingAsyncNotifier(asyncnotifier.ErrorLoggingAsyncNotifier): def __init__(self, *args, **kwargs): asyncnotifier.ErrorLoggingAsyncNotifier.__init__(self, *args, **kwargs) self.error_count = 0 def handle_error(self): self.error_count += 1 raise class TestSingleFileEventHandler(testutils.GanetiTestCase): """Test daemon.Mainloop""" NOTIFIERS = [NOTIFIER_TERM, NOTIFIER_NORM, NOTIFIER_ERR] = range(3) def setUp(self): testutils.GanetiTestCase.setUp(self) self.mainloop = daemon.Mainloop() self.chk_files = [self._CreateTempFile() for i in self.NOTIFIERS] self.notified = [False for i in self.NOTIFIERS] # We need one watch manager per notifier, as those contain the file # descriptor which is monitored by asyncore self.wms = [pyinotify.WatchManager() for i in self.NOTIFIERS] self.cbk = [self.OnInotifyCallback(self, i) for i in self.NOTIFIERS] self.ihandler = [asyncnotifier.SingleFileEventHandler(wm, cb, cf) for (wm, cb, cf) in zip(self.wms, self.cbk, self.chk_files)] self.notifiers = [_MyErrorLoggingAsyncNotifier(wm, ih) for (wm, ih) in zip(self.wms, self.ihandler)] # TERM notifier is enabled by default, as we use it to get out of the loop self.ihandler[self.NOTIFIER_TERM].enable() def tearDown(self): # disable the inotifiers, before removing the files for i in self.ihandler: i.disable() testutils.GanetiTestCase.tearDown(self) # and unregister the fd's being polled for n in self.notifiers: n.del_channel() class OnInotifyCallback: def __init__(self, testobj, i): self.testobj = testobj self.notified = testobj.notified self.i = i def __call__(self, enabled): self.notified[self.i] = True if self.i == self.testobj.NOTIFIER_TERM: os.kill(os.getpid(), signal.SIGTERM) elif self.i == self.testobj.NOTIFIER_ERR: raise errors.GenericError("an error") def testReplace(self): utils.WriteFile(self.chk_files[self.NOTIFIER_TERM], data="dummy") self.mainloop.Run() self.assert_(self.notified[self.NOTIFIER_TERM]) self.assertFalse(self.notified[self.NOTIFIER_NORM]) self.assertEquals(self.notifiers[self.NOTIFIER_TERM].error_count, 0) self.assertEquals(self.notifiers[self.NOTIFIER_NORM].error_count, 0) def testEnableDisable(self): self.ihandler[self.NOTIFIER_TERM].enable() self.ihandler[self.NOTIFIER_TERM].disable() self.ihandler[self.NOTIFIER_TERM].disable() self.ihandler[self.NOTIFIER_TERM].enable() self.ihandler[self.NOTIFIER_TERM].disable() self.ihandler[self.NOTIFIER_TERM].enable() utils.WriteFile(self.chk_files[self.NOTIFIER_TERM], data="dummy") self.mainloop.Run() self.assert_(self.notified[self.NOTIFIER_TERM]) self.assertFalse(self.notified[self.NOTIFIER_NORM]) self.assertEquals(self.notifiers[self.NOTIFIER_TERM].error_count, 0) self.assertEquals(self.notifiers[self.NOTIFIER_NORM].error_count, 0) def testDoubleEnable(self): self.ihandler[self.NOTIFIER_TERM].enable() self.ihandler[self.NOTIFIER_TERM].enable() utils.WriteFile(self.chk_files[self.NOTIFIER_TERM], data="dummy") self.mainloop.Run() self.assert_(self.notified[self.NOTIFIER_TERM]) self.assertFalse(self.notified[self.NOTIFIER_NORM]) self.assertEquals(self.notifiers[self.NOTIFIER_TERM].error_count, 0) self.assertEquals(self.notifiers[self.NOTIFIER_NORM].error_count, 0) def testDefaultDisabled(self): utils.WriteFile(self.chk_files[self.NOTIFIER_NORM], data="dummy") utils.WriteFile(self.chk_files[self.NOTIFIER_TERM], data="dummy") self.mainloop.Run() self.assert_(self.notified[self.NOTIFIER_TERM]) # NORM notifier is disabled by default self.assertFalse(self.notified[self.NOTIFIER_NORM]) self.assertEquals(self.notifiers[self.NOTIFIER_TERM].error_count, 0) self.assertEquals(self.notifiers[self.NOTIFIER_NORM].error_count, 0) def testBothEnabled(self): self.ihandler[self.NOTIFIER_NORM].enable() utils.WriteFile(self.chk_files[self.NOTIFIER_NORM], data="dummy") utils.WriteFile(self.chk_files[self.NOTIFIER_TERM], data="dummy") self.mainloop.Run() self.assert_(self.notified[self.NOTIFIER_TERM]) self.assert_(self.notified[self.NOTIFIER_NORM]) self.assertEquals(self.notifiers[self.NOTIFIER_TERM].error_count, 0) self.assertEquals(self.notifiers[self.NOTIFIER_NORM].error_count, 0) def testError(self): self.ihandler[self.NOTIFIER_ERR].enable() utils.WriteFile(self.chk_files[self.NOTIFIER_ERR], data="dummy") self.assertRaises(errors.GenericError, self.mainloop.Run) self.assert_(self.notified[self.NOTIFIER_ERR]) self.assertEquals(self.notifiers[self.NOTIFIER_ERR].error_count, 1) self.assertEquals(self.notifiers[self.NOTIFIER_NORM].error_count, 0) self.assertEquals(self.notifiers[self.NOTIFIER_TERM].error_count, 0) class TestSingleFileEventHandlerError(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def test(self): wm = pyinotify.WatchManager() handler = asyncnotifier.SingleFileEventHandler(wm, None, utils.PathJoin(self.tmpdir, "nonexist")) self.assertRaises(errors.InotifyError, handler.enable) self.assertRaises(errors.InotifyError, handler.enable) handler.disable() self.assertRaises(errors.InotifyError, handler.enable) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.storage.bdev_unittest.py0000744000000000000000000002660312267470014022556 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2010, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the bdev module""" import os import random import unittest from ganeti import compat from ganeti import constants from ganeti import errors from ganeti import objects from ganeti import utils from ganeti.storage import bdev import testutils class TestRADOSBlockDevice(testutils.GanetiTestCase): def setUp(self): """Set up input data""" testutils.GanetiTestCase.setUp(self) self.plain_output_old_ok = \ testutils.ReadTestData("bdev-rbd/plain_output_old_ok.txt") self.plain_output_old_no_matches = \ testutils.ReadTestData("bdev-rbd/plain_output_old_no_matches.txt") self.plain_output_old_extra_matches = \ testutils.ReadTestData("bdev-rbd/plain_output_old_extra_matches.txt") self.plain_output_old_empty = \ testutils.ReadTestData("bdev-rbd/plain_output_old_empty.txt") self.plain_output_new_ok = \ testutils.ReadTestData("bdev-rbd/plain_output_new_ok.txt") self.plain_output_new_no_matches = \ testutils.ReadTestData("bdev-rbd/plain_output_new_no_matches.txt") self.plain_output_new_extra_matches = \ testutils.ReadTestData("bdev-rbd/plain_output_new_extra_matches.txt") # This file is completely empty, and as such it's not shipped. self.plain_output_new_empty = "" self.json_output_ok = testutils.ReadTestData("bdev-rbd/json_output_ok.txt") self.json_output_no_matches = \ testutils.ReadTestData("bdev-rbd/json_output_no_matches.txt") self.json_output_extra_matches = \ testutils.ReadTestData("bdev-rbd/json_output_extra_matches.txt") self.json_output_empty = \ testutils.ReadTestData("bdev-rbd/json_output_empty.txt") self.output_invalid = testutils.ReadTestData("bdev-rbd/output_invalid.txt") self.volume_name = "d7ab910a-4933-4ffe-88d0-faf2ce31390a.rbd.disk0" def test_ParseRbdShowmappedJson(self): parse_function = bdev.RADOSBlockDevice._ParseRbdShowmappedJson self.assertEqual(parse_function(self.json_output_ok, self.volume_name), "/dev/rbd3") self.assertEqual(parse_function(self.json_output_empty, self.volume_name), None) self.assertEqual(parse_function(self.json_output_no_matches, self.volume_name), None) self.assertRaises(errors.BlockDeviceError, parse_function, self.json_output_extra_matches, self.volume_name) self.assertRaises(errors.BlockDeviceError, parse_function, self.output_invalid, self.volume_name) def test_ParseRbdShowmappedPlain(self): parse_function = bdev.RADOSBlockDevice._ParseRbdShowmappedPlain self.assertEqual(parse_function(self.plain_output_new_ok, self.volume_name), "/dev/rbd3") self.assertEqual(parse_function(self.plain_output_old_ok, self.volume_name), "/dev/rbd3") self.assertEqual(parse_function(self.plain_output_new_empty, self.volume_name), None) self.assertEqual(parse_function(self.plain_output_old_empty, self.volume_name), None) self.assertEqual(parse_function(self.plain_output_new_no_matches, self.volume_name), None) self.assertEqual(parse_function(self.plain_output_old_no_matches, self.volume_name), None) self.assertRaises(errors.BlockDeviceError, parse_function, self.plain_output_new_extra_matches, self.volume_name) self.assertRaises(errors.BlockDeviceError, parse_function, self.plain_output_old_extra_matches, self.volume_name) self.assertRaises(errors.BlockDeviceError, parse_function, self.output_invalid, self.volume_name) class TestExclusiveStoragePvs(unittest.TestCase): """Test cases for functions dealing with LVM PV and exclusive storage""" # Allowance for rounding _EPS = 1e-4 _MARGIN = constants.PART_MARGIN + constants.PART_RESERVED + _EPS @staticmethod def _GenerateRandomPvInfo(rnd, name, vg): # Granularity is .01 MiB size = rnd.randint(1024 * 100, 10 * 1024 * 1024 * 100) if rnd.choice([False, True]): free = float(rnd.randint(0, size)) / 100.0 else: free = float(size) / 100.0 size = float(size) / 100.0 attr = "a-" return objects.LvmPvInfo(name=name, vg_name=vg, size=size, free=free, attributes=attr) def testGetStdPvSize(self): """Test cases for bdev.LogicalVolume._GetStdPvSize()""" rnd = random.Random(9517) for _ in range(0, 50): # Identical volumes pvi = self._GenerateRandomPvInfo(rnd, "disk", "myvg") onesize = bdev.LogicalVolume._GetStdPvSize([pvi]) self.assertTrue(onesize <= pvi.size) self.assertTrue(onesize > pvi.size * (1 - self._MARGIN)) for length in range(2, 10): n_size = bdev.LogicalVolume._GetStdPvSize([pvi] * length) self.assertEqual(onesize, n_size) # Mixed volumes for length in range(1, 10): pvlist = [self._GenerateRandomPvInfo(rnd, "disk", "myvg") for _ in range(0, length)] std_size = bdev.LogicalVolume._GetStdPvSize(pvlist) self.assertTrue(compat.all(std_size <= pvi.size for pvi in pvlist)) self.assertTrue(compat.any(std_size > pvi.size * (1 - self._MARGIN) for pvi in pvlist)) pvlist.append(pvlist[0]) p1_size = bdev.LogicalVolume._GetStdPvSize(pvlist) self.assertEqual(std_size, p1_size) def testComputeNumPvs(self): """Test cases for bdev.LogicalVolume._ComputeNumPvs()""" rnd = random.Random(8067) for _ in range(0, 1000): pvlist = [self._GenerateRandomPvInfo(rnd, "disk", "myvg")] lv_size = float(rnd.randint(10 * 100, 1024 * 1024 * 100)) / 100.0 num_pv = bdev.LogicalVolume._ComputeNumPvs(lv_size, pvlist) std_size = bdev.LogicalVolume._GetStdPvSize(pvlist) self.assertTrue(num_pv >= 1) self.assertTrue(num_pv * std_size >= lv_size) self.assertTrue((num_pv - 1) * std_size < lv_size * (1 + self._EPS)) def testGetEmptyPvNames(self): """Test cases for bdev.LogicalVolume._GetEmptyPvNames()""" rnd = random.Random(21126) for _ in range(0, 100): num_pvs = rnd.randint(1, 20) pvlist = [self._GenerateRandomPvInfo(rnd, "disk%d" % n, "myvg") for n in range(0, num_pvs)] for num_req in range(1, num_pvs + 2): epvs = bdev.LogicalVolume._GetEmptyPvNames(pvlist, num_req) epvs_set = compat.UniqueFrozenset(epvs) if len(epvs) > 1: self.assertEqual(len(epvs), len(epvs_set)) for pvi in pvlist: if pvi.name in epvs_set: self.assertEqual(pvi.size, pvi.free) else: # There should be no remaining empty PV when less than the # requeste number of PVs has been returned self.assertTrue(len(epvs) == num_req or pvi.free != pvi.size) class TestLogicalVolume(unittest.TestCase): """Tests for bdev.LogicalVolume.""" def testParseLvInfoLine(self): """Tests for LogicalVolume._ParseLvInfoLine.""" broken_lines = [ " toomuch#-wi-ao#253#3#4096.00#2#/dev/abc(20)", " -wi-ao#253#3#4096.00#/dev/abc(20)", " -wi-a#253#3#4096.00#2#/dev/abc(20)", " -wi-ao#25.3#3#4096.00#2#/dev/abc(20)", " -wi-ao#twenty#3#4096.00#2#/dev/abc(20)", " -wi-ao#253#3.1#4096.00#2#/dev/abc(20)", " -wi-ao#253#three#4096.00#2#/dev/abc(20)", " -wi-ao#253#3#four#2#/dev/abc(20)", " -wi-ao#253#3#4096..00#2#/dev/abc(20)", " -wi-ao#253#3#4096.00#2.0#/dev/abc(20)", " -wi-ao#253#3#4096.00#two#/dev/abc(20)", ] for broken in broken_lines: self.assertRaises(errors.BlockDeviceError, bdev.LogicalVolume._ParseLvInfoLine, broken, "#") # Examples of good lines from "lvs": # -wi-ao|253|3|4096.00|2|/dev/sdb(144),/dev/sdc(0) # -wi-a-|253|4|4096.00|1|/dev/sdb(208) true_out = [ ("-wi-ao", 253, 3, 4096.00, 2, ["/dev/abc"]), ("-wi-a-", 253, 7, 4096.00, 4, ["/dev/abc"]), ("-ri-a-", 253, 4, 4.00, 5, ["/dev/abc", "/dev/def"]), ("-wc-ao", 15, 18, 4096.00, 32, ["/dev/abc", "/dev/def", "/dev/ghi0"]), ] for exp in true_out: for sep in "#;|": pvs = ",".join("%s(%s)" % (d, i * 12) for (i, d) in enumerate(exp[-1])) lvs_line = (sep.join((" %s", "%d", "%d", "%.2f", "%d", "%s")) % (exp[0:-1] + (pvs,))) parsed = bdev.LogicalVolume._ParseLvInfoLine(lvs_line, sep) self.assertEqual(parsed, exp) @staticmethod def _FakeRunCmd(success, stdout): if success: exit_code = 0 else: exit_code = 1 return lambda cmd: utils.RunResult(exit_code, None, stdout, "", cmd, utils.process._TIMEOUT_NONE, 5) def testGetLvInfo(self): """Tests for LogicalVolume._GetLvInfo.""" self.assertRaises(errors.BlockDeviceError, bdev.LogicalVolume._GetLvInfo, "fake_path", _run_cmd=self._FakeRunCmd(False, "Fake error msg")) self.assertRaises(errors.BlockDeviceError, bdev.LogicalVolume._GetLvInfo, "fake_path", _run_cmd=self._FakeRunCmd(True, "")) self.assertRaises(errors.BlockDeviceError, bdev.LogicalVolume._GetLvInfo, "fake_path", _run_cmd=self._FakeRunCmd(True, "BadStdOut")) good_line = " -wi-ao|253|3|4096.00|2|/dev/abc(20)" fake_cmd = self._FakeRunCmd(True, good_line) good_res = bdev.LogicalVolume._GetLvInfo("fake_path", _run_cmd=fake_cmd) # If the same line is repeated, the result should be the same for lines in [ [good_line] * 2, [good_line] * 3, ]: fake_cmd = self._FakeRunCmd(True, "\n".join(lines)) same_res = bdev.LogicalVolume._GetLvInfo("fake_path", fake_cmd) self.assertEqual(same_res, good_res) # Complex multi-line examples one_line = " -wi-ao|253|3|4096.00|2|/dev/sda(20),/dev/sdb(50),/dev/sdc(0)" fake_cmd = self._FakeRunCmd(True, one_line) one_res = bdev.LogicalVolume._GetLvInfo("fake_path", _run_cmd=fake_cmd) # These should give the same results for multi_lines in [ (" -wi-ao|253|3|4096.00|2|/dev/sda(30),/dev/sdb(50)\n" " -wi-ao|253|3|4096.00|2|/dev/sdb(200),/dev/sdc(300)"), (" -wi-ao|253|3|4096.00|2|/dev/sda(0)\n" " -wi-ao|253|3|4096.00|2|/dev/sdb(20)\n" " -wi-ao|253|3|4096.00|2|/dev/sdc(30)"), (" -wi-ao|253|3|4096.00|2|/dev/sda(20)\n" " -wi-ao|253|3|4096.00|2|/dev/sdb(50),/dev/sdc(0)"), ]: fake_cmd = self._FakeRunCmd(True, multi_lines) multi_res = bdev.LogicalVolume._GetLvInfo("fake_path", _run_cmd=fake_cmd) self.assertEqual(multi_res, one_res) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.runtime_unittest.py0000744000000000000000000001341612271422343021651 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.runtime""" from ganeti import constants from ganeti import errors from ganeti import runtime from ganeti import ht import testutils import unittest class _EntStub: def __init__(self, uid=None, gid=None): self.pw_uid = uid self.gr_gid = gid def _StubGetpwnam(user): users = { constants.MASTERD_USER: _EntStub(uid=0), constants.CONFD_USER: _EntStub(uid=1), constants.RAPI_USER: _EntStub(uid=2), constants.NODED_USER: _EntStub(uid=3), } return users[user] def _StubGetgrnam(group): groups = { constants.MASTERD_GROUP: _EntStub(gid=0), constants.CONFD_GROUP: _EntStub(gid=1), constants.RAPI_GROUP: _EntStub(gid=2), constants.DAEMONS_GROUP: _EntStub(gid=3), constants.ADMIN_GROUP: _EntStub(gid=4), constants.NODED_GROUP: _EntStub(gid=5), } return groups[group] def _RaisingStubGetpwnam(user): raise KeyError("user not found") def _RaisingStubGetgrnam(group): raise KeyError("group not found") class ResolverStubRaising(object): def __init__(self): raise errors.ConfigurationError("No entries") class TestErrors(unittest.TestCase): def setUp(self): self.resolver = runtime.GetentResolver(_getpwnam=_StubGetpwnam, _getgrnam=_StubGetgrnam) def testEverythingSuccessful(self): self.assertEqual(self.resolver.masterd_uid, _StubGetpwnam(constants.MASTERD_USER).pw_uid) self.assertEqual(self.resolver.masterd_gid, _StubGetgrnam(constants.MASTERD_GROUP).gr_gid) self.assertEqual(self.resolver.confd_uid, _StubGetpwnam(constants.CONFD_USER).pw_uid) self.assertEqual(self.resolver.confd_gid, _StubGetgrnam(constants.CONFD_GROUP).gr_gid) self.assertEqual(self.resolver.rapi_uid, _StubGetpwnam(constants.RAPI_USER).pw_uid) self.assertEqual(self.resolver.rapi_gid, _StubGetgrnam(constants.RAPI_GROUP).gr_gid) self.assertEqual(self.resolver.noded_uid, _StubGetpwnam(constants.NODED_USER).pw_uid) self.assertEqual(self.resolver.daemons_gid, _StubGetgrnam(constants.DAEMONS_GROUP).gr_gid) self.assertEqual(self.resolver.admin_gid, _StubGetgrnam(constants.ADMIN_GROUP).gr_gid) def testUserNotFound(self): self.assertRaises(errors.ConfigurationError, runtime.GetentResolver, _getpwnam=_RaisingStubGetpwnam, _getgrnam=_StubGetgrnam) def testGroupNotFound(self): self.assertRaises(errors.ConfigurationError, runtime.GetentResolver, _getpwnam=_StubGetpwnam, _getgrnam=_RaisingStubGetgrnam) def testUserNotFoundGetEnts(self): self.assertRaises(errors.ConfigurationError, runtime.GetEnts, resolver=ResolverStubRaising) def testLookupForUser(self): master_stub = _StubGetpwnam(constants.MASTERD_USER) rapi_stub = _StubGetpwnam(constants.RAPI_USER) self.assertEqual(self.resolver.LookupUid(master_stub.pw_uid), constants.MASTERD_USER) self.assertEqual(self.resolver.LookupUid(rapi_stub.pw_uid), constants.RAPI_USER) self.assertEqual(self.resolver.LookupUser(constants.MASTERD_USER), master_stub.pw_uid) self.assertEqual(self.resolver.LookupUser(constants.RAPI_USER), rapi_stub.pw_uid) def testLookupForGroup(self): master_stub = _StubGetgrnam(constants.MASTERD_GROUP) rapi_stub = _StubGetgrnam(constants.RAPI_GROUP) self.assertEqual(self.resolver.LookupGid(master_stub.gr_gid), constants.MASTERD_GROUP) self.assertEqual(self.resolver.LookupGid(rapi_stub.gr_gid), constants.RAPI_GROUP) def testLookupForUserNotFound(self): self.assertRaises(errors.ConfigurationError, self.resolver.LookupUid, 9999) self.assertRaises(errors.ConfigurationError, self.resolver.LookupUser, "does-not-exist-foo") def testLookupForGroupNotFound(self): self.assertRaises(errors.ConfigurationError, self.resolver.LookupGid, 9999) self.assertRaises(errors.ConfigurationError, self.resolver.LookupGroup, "does-not-exist-foo") class TestArchInfo(unittest.TestCase): EXP_TYPES = \ ht.TAnd(ht.TIsLength(2), ht.TItems([ ht.TNonEmptyString, ht.TNonEmptyString, ])) def setUp(self): self.assertTrue(runtime._arch is None) def tearDown(self): runtime._arch = None def testNotInitialized(self): self.assertRaises(errors.ProgrammerError, runtime.GetArchInfo) def testInitializeMultiple(self): runtime.InitArchInfo() self.assertRaises(errors.ProgrammerError, runtime.InitArchInfo) def testNormal(self): runtime.InitArchInfo() info = runtime.GetArchInfo() self.assertTrue(self.EXP_TYPES(info), msg=("Doesn't match expected type description: %s" % self.EXP_TYPES)) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.storage.container_unittest.py0000744000000000000000000001077112267470014023617 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.storage.container""" import re import unittest import random from ganeti import constants from ganeti import utils from ganeti import compat from ganeti import errors from ganeti.storage import container import testutils class TestVGReduce(testutils.GanetiTestCase): VGNAME = "xenvg" LIST_CMD = container.LvmVgStorage.LIST_COMMAND VGREDUCE_CMD = container.LvmVgStorage.VGREDUCE_COMMAND def _runCmd(self, cmd, **kwargs): if not self.run_history: self.fail("Empty run results") exp_cmd, result = self.run_history.pop(0) self.assertEqual(cmd, exp_cmd) return result def testOldVersion(self): lvmvg = container.LvmVgStorage() stdout = testutils.ReadTestData("vgreduce-removemissing-2.02.02.txt") vgs_fail = testutils.ReadTestData("vgs-missing-pvs-2.02.02.txt") self.run_history = [ ([self.VGREDUCE_CMD, "--removemissing", self.VGNAME], utils.RunResult(0, None, stdout, "", "", None, None)), ([self.LIST_CMD, "--noheadings", "--nosuffix", self.VGNAME], utils.RunResult(0, None, "", "", "", None, None)), ] lvmvg._RemoveMissing(self.VGNAME, _runcmd_fn=self._runCmd) self.assertEqual(self.run_history, []) for ecode, out in [(1, ""), (0, vgs_fail)]: self.run_history = [ ([self.VGREDUCE_CMD, "--removemissing", self.VGNAME], utils.RunResult(0, None, stdout, "", "", None, None)), ([self.LIST_CMD, "--noheadings", "--nosuffix", self.VGNAME], utils.RunResult(ecode, None, out, "", "", None, None)), ] self.assertRaises(errors.StorageError, lvmvg._RemoveMissing, self.VGNAME, _runcmd_fn=self._runCmd) self.assertEqual(self.run_history, []) def testNewVersion(self): lvmvg = container.LvmVgStorage() stdout1 = testutils.ReadTestData("vgreduce-removemissing-2.02.66-fail.txt") stdout2 = testutils.ReadTestData("vgreduce-removemissing-2.02.66-ok.txt") vgs_fail = testutils.ReadTestData("vgs-missing-pvs-2.02.66.txt") # first: require --fail, check that it's used self.run_history = [ ([self.VGREDUCE_CMD, "--removemissing", self.VGNAME], utils.RunResult(0, None, stdout1, "", "", None, None)), ([self.VGREDUCE_CMD, "--removemissing", "--force", self.VGNAME], utils.RunResult(0, None, stdout2, "", "", None, None)), ([self.LIST_CMD, "--noheadings", "--nosuffix", self.VGNAME], utils.RunResult(0, None, "", "", "", None, None)), ] lvmvg._RemoveMissing(self.VGNAME, _runcmd_fn=self._runCmd) self.assertEqual(self.run_history, []) # second: make sure --fail is not used if not needed self.run_history = [ ([self.VGREDUCE_CMD, "--removemissing", self.VGNAME], utils.RunResult(0, None, stdout2, "", "", None, None)), ([self.LIST_CMD, "--noheadings", "--nosuffix", self.VGNAME], utils.RunResult(0, None, "", "", "", None, None)), ] lvmvg._RemoveMissing(self.VGNAME, _runcmd_fn=self._runCmd) self.assertEqual(self.run_history, []) # third: make sure we error out if vgs doesn't find the volume for ecode, out in [(1, ""), (0, vgs_fail)]: self.run_history = [ ([self.VGREDUCE_CMD, "--removemissing", self.VGNAME], utils.RunResult(0, None, stdout1, "", "", None, None)), ([self.VGREDUCE_CMD, "--removemissing", "--force", self.VGNAME], utils.RunResult(0, None, stdout2, "", "", None, None)), ([self.LIST_CMD, "--noheadings", "--nosuffix", self.VGNAME], utils.RunResult(ecode, None, out, "", "", None, None)), ] self.assertRaises(errors.StorageError, lvmvg._RemoveMissing, self.VGNAME, _runcmd_fn=self._runCmd) self.assertEqual(self.run_history, []) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/qa.qa_config_unittest.py0000744000000000000000000003337312244641676021266 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing qa.qa_config""" import unittest import tempfile import shutil import os import operator from ganeti import utils from ganeti import serializer from ganeti import constants from ganeti import compat from qa import qa_config from qa import qa_error import testutils class TestTestEnabled(unittest.TestCase): def testSimple(self): for name in ["test", ["foobar"], ["a", "b"]]: self.assertTrue(qa_config.TestEnabled(name, _cfg={})) for default in [False, True]: self.assertFalse(qa_config.TestEnabled("foo", _cfg={ "tests": { "default": default, "foo": False, }, })) self.assertTrue(qa_config.TestEnabled("bar", _cfg={ "tests": { "default": default, "bar": True, }, })) def testEitherWithDefault(self): names = qa_config.Either("one") self.assertTrue(qa_config.TestEnabled(names, _cfg={ "tests": { "default": True, }, })) self.assertFalse(qa_config.TestEnabled(names, _cfg={ "tests": { "default": False, }, })) def testEither(self): names = [qa_config.Either(["one", "two"]), qa_config.Either("foo"), "hello", ["bar", "baz"]] self.assertTrue(qa_config.TestEnabled(names, _cfg={ "tests": { "default": True, }, })) self.assertFalse(qa_config.TestEnabled(names, _cfg={ "tests": { "default": False, }, })) for name in ["foo", "bar", "baz", "hello"]: self.assertFalse(qa_config.TestEnabled(names, _cfg={ "tests": { "default": True, name: False, }, })) self.assertFalse(qa_config.TestEnabled(names, _cfg={ "tests": { "default": True, "one": False, "two": False, }, })) self.assertTrue(qa_config.TestEnabled(names, _cfg={ "tests": { "default": True, "one": False, "two": True, }, })) self.assertFalse(qa_config.TestEnabled(names, _cfg={ "tests": { "default": True, "one": True, "two": True, "foo": False, }, })) def testEitherNestedWithAnd(self): names = qa_config.Either([["one", "two"], "foo"]) self.assertTrue(qa_config.TestEnabled(names, _cfg={ "tests": { "default": True, }, })) for name in ["one", "two"]: self.assertFalse(qa_config.TestEnabled(names, _cfg={ "tests": { "default": True, "foo": False, name: False, }, })) def testCallable(self): self.assertTrue(qa_config.TestEnabled([lambda: True], _cfg={})) for value in [None, False, "", 0]: self.assertFalse(qa_config.TestEnabled(lambda: value, _cfg={})) class TestQaConfigLoad(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def testLoadNonExistent(self): filename = utils.PathJoin(self.tmpdir, "does.not.exist") self.assertRaises(EnvironmentError, qa_config._QaConfig.Load, filename) @staticmethod def _WriteConfig(filename, data): utils.WriteFile(filename, data=serializer.DumpJson(data)) def _CheckLoadError(self, filename, data, expected): self._WriteConfig(filename, data) try: qa_config._QaConfig.Load(filename) except qa_error.Error, err: self.assertTrue(str(err).startswith(expected)) else: self.fail("Exception was not raised") def testFailsValidation(self): filename = utils.PathJoin(self.tmpdir, "qa.json") testconfig = {} check_fn = compat.partial(self._CheckLoadError, filename, testconfig) # No cluster name check_fn("Cluster name is required") testconfig["name"] = "cluster.example.com" # No nodes check_fn("Need at least one node") testconfig["nodes"] = [ { "primary": "xen-test-0", "secondary": "192.0.2.1", }, ] # No instances check_fn("Need at least one instance") testconfig["instances"] = [ { "name": "xen-test-inst1", }, ] # Missing "disk" and "disk-growth" check_fn("Config option 'disks'") testconfig["disks"] = [] # Minimal accepted configuration self._WriteConfig(filename, testconfig) result = qa_config._QaConfig.Load(filename) self.assertTrue(result.get("nodes")) # Non-existent instance check script testconfig[qa_config._INSTANCE_CHECK_KEY] = \ utils.PathJoin(self.tmpdir, "instcheck") check_fn("Can't find instance check script") del testconfig[qa_config._INSTANCE_CHECK_KEY] # No enabled hypervisor testconfig[qa_config._ENABLED_HV_KEY] = None check_fn("No hypervisor is enabled") # Unknown hypervisor testconfig[qa_config._ENABLED_HV_KEY] = ["#unknownhv#"] check_fn("Unknown hypervisor(s) enabled:") del testconfig[qa_config._ENABLED_HV_KEY] # Invalid path for virtual cluster base directory testconfig[qa_config._VCLUSTER_MASTER_KEY] = "value" testconfig[qa_config._VCLUSTER_BASEDIR_KEY] = "./not//normalized/" check_fn("Path given in option 'vcluster-basedir' must be") # Inconsistent virtual cluster settings testconfig.pop(qa_config._VCLUSTER_MASTER_KEY) testconfig[qa_config._VCLUSTER_BASEDIR_KEY] = "/tmp" check_fn("All or none of the") testconfig[qa_config._VCLUSTER_MASTER_KEY] = "master.example.com" testconfig.pop(qa_config._VCLUSTER_BASEDIR_KEY) check_fn("All or none of the") # Accepted virtual cluster settings testconfig[qa_config._VCLUSTER_MASTER_KEY] = "master.example.com" testconfig[qa_config._VCLUSTER_BASEDIR_KEY] = "/tmp" self._WriteConfig(filename, testconfig) result = qa_config._QaConfig.Load(filename) self.assertEqual(result.GetVclusterSettings(), ("master.example.com", "/tmp")) class TestQaConfigWithSampleConfig(unittest.TestCase): """Tests using C{qa-sample.json}. This test case serves two purposes: - Ensure shipped C{qa-sample.json} file is considered a valid QA configuration - Test some functions of L{qa_config._QaConfig} without having to mock a whole configuration file """ def setUp(self): filename = "%s/qa/qa-sample.json" % testutils.GetSourceDir() self.config = qa_config._QaConfig.Load(filename) def testGetEnabledHypervisors(self): self.assertEqual(self.config.GetEnabledHypervisors(), [constants.DEFAULT_ENABLED_HYPERVISOR]) def testGetDefaultHypervisor(self): self.assertEqual(self.config.GetDefaultHypervisor(), constants.DEFAULT_ENABLED_HYPERVISOR) def testGetInstanceCheckScript(self): self.assertTrue(self.config.GetInstanceCheckScript() is None) def testGetAndGetItem(self): self.assertEqual(self.config["nodes"], self.config.get("nodes")) def testGetMasterNode(self): self.assertEqual(self.config.GetMasterNode(), self.config["nodes"][0]) def testGetVclusterSettings(self): # Shipped default settings should be to not use a virtual cluster self.assertEqual(self.config.GetVclusterSettings(), (None, None)) self.assertFalse(qa_config.UseVirtualCluster(_cfg=self.config)) class TestQaConfig(unittest.TestCase): def setUp(self): filename = \ testutils.TestDataFilename("qa-minimal-nodes-instances-only.json") self.config = qa_config._QaConfig.Load(filename) def testExclusiveStorage(self): self.assertRaises(AssertionError, self.config.GetExclusiveStorage) for value in [False, True, 0, 1, 30804, ""]: self.config.SetExclusiveStorage(value) self.assertEqual(self.config.GetExclusiveStorage(), bool(value)) def testIsTemplateSupported(self): enabled_dts = self.config.GetEnabledDiskTemplates() for e_s in [False, True]: self.config.SetExclusiveStorage(e_s) for template in constants.DISK_TEMPLATES: if (template not in enabled_dts or e_s and template not in constants.DTS_EXCL_STORAGE): self.assertFalse(self.config.IsTemplateSupported(template)) else: self.assertTrue(self.config.IsTemplateSupported(template)) def testInstanceConversion(self): self.assertTrue(isinstance(self.config["instances"][0], qa_config._QaInstance)) def testNodeConversion(self): self.assertTrue(isinstance(self.config["nodes"][0], qa_config._QaNode)) def testAcquireAndReleaseInstance(self): self.assertFalse(compat.any(map(operator.attrgetter("used"), self.config["instances"]))) inst = qa_config.AcquireInstance(_cfg=self.config) self.assertTrue(inst.used) self.assertTrue(inst.disk_template is None) inst.Release() self.assertFalse(inst.used) self.assertTrue(inst.disk_template is None) self.assertFalse(compat.any(map(operator.attrgetter("used"), self.config["instances"]))) def testAcquireInstanceTooMany(self): # Acquire all instances for _ in range(len(self.config["instances"])): inst = qa_config.AcquireInstance(_cfg=self.config) self.assertTrue(inst.used) self.assertTrue(inst.disk_template is None) # The next acquisition must fail self.assertRaises(qa_error.OutOfInstancesError, qa_config.AcquireInstance, _cfg=self.config) def testAcquireNodeNoneAdded(self): self.assertFalse(compat.any(map(operator.attrgetter("added"), self.config["nodes"]))) # First call must return master node node = qa_config.AcquireNode(_cfg=self.config) self.assertEqual(node, self.config.GetMasterNode()) # Next call with exclusion list fails self.assertRaises(qa_error.OutOfNodesError, qa_config.AcquireNode, exclude=[node], _cfg=self.config) def testAcquireNodeTooMany(self): # Mark all nodes as marked (master excluded) for node in self.config["nodes"]: if node != self.config.GetMasterNode(): node.MarkAdded() nodecount = len(self.config["nodes"]) self.assertTrue(nodecount > 1) acquired = [] for _ in range(nodecount): node = qa_config.AcquireNode(exclude=acquired, _cfg=self.config) if node == self.config.GetMasterNode(): self.assertFalse(node.added) else: self.assertTrue(node.added) self.assertEqual(node.use_count, 1) acquired.append(node) self.assertRaises(qa_error.OutOfNodesError, qa_config.AcquireNode, exclude=acquired, _cfg=self.config) def testAcquireNodeOrder(self): # Mark all nodes as marked (master excluded) for node in self.config["nodes"]: if node != self.config.GetMasterNode(): node.MarkAdded() nodecount = len(self.config["nodes"]) for iterations in [0, 1, 3, 100, 127, 7964]: acquired = [] for i in range(iterations): node = qa_config.AcquireNode(_cfg=self.config) self.assertTrue(node.use_count > 0) self.assertEqual(node.use_count, (i / nodecount + 1)) acquired.append((node.use_count, node.primary, node)) # Check if returned nodes were in correct order key_fn = lambda (a, b, c): (a, utils.NiceSortKey(b), c) self.assertEqual(acquired, sorted(acquired, key=key_fn)) # Release previously acquired nodes qa_config.ReleaseManyNodes(map(operator.itemgetter(2), acquired)) # Check if nodes were actually released for node in self.config["nodes"]: self.assertEqual(node.use_count, 0) self.assertTrue(node.added or node == self.config.GetMasterNode()) class TestRepresentation(unittest.TestCase): def _Check(self, target, part): self.assertTrue(part in repr(target).split()) def testQaInstance(self): inst = qa_config._QaInstance("inst1.example.com", []) self._Check(inst, "name=inst1.example.com") self._Check(inst, "nicmac=[]") # Default values self._Check(inst, "disk_template=None") self._Check(inst, "used=None") # Use instance inst.Use() self._Check(inst, "used=True") # Disk template inst.SetDiskTemplate(constants.DT_DRBD8) self._Check(inst, "disk_template=%s" % constants.DT_DRBD8) # Release instance inst.Release() self._Check(inst, "used=False") self._Check(inst, "disk_template=None") def testQaNode(self): node = qa_config._QaNode("primary.example.com", "192.0.2.1") self._Check(node, "primary=primary.example.com") self._Check(node, "secondary=192.0.2.1") self._Check(node, "added=False") self._Check(node, "use_count=0") # Mark as added node.MarkAdded() self._Check(node, "added=True") # Use node for i in range(1, 5): node.Use() self._Check(node, "use_count=%s" % i) # Release node for i in reversed(range(1, 5)): node.Release() self._Check(node, "use_count=%s" % (i - 1)) self._Check(node, "use_count=0") # Mark as added node.MarkRemoved() self._Check(node, "added=False") if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.utils.wrapper_unittest.py0000744000000000000000000001207112244641676023015 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.utils.wrapper""" import errno import fcntl import os import socket import tempfile import unittest import shutil from ganeti import constants from ganeti import utils import testutils class TestSetCloseOnExecFlag(unittest.TestCase): """Tests for SetCloseOnExecFlag""" def setUp(self): self.tmpfile = tempfile.TemporaryFile() def testEnable(self): utils.SetCloseOnExecFlag(self.tmpfile.fileno(), True) self.failUnless(fcntl.fcntl(self.tmpfile.fileno(), fcntl.F_GETFD) & fcntl.FD_CLOEXEC) def testDisable(self): utils.SetCloseOnExecFlag(self.tmpfile.fileno(), False) self.failIf(fcntl.fcntl(self.tmpfile.fileno(), fcntl.F_GETFD) & fcntl.FD_CLOEXEC) class TestSetNonblockFlag(unittest.TestCase): def setUp(self): self.tmpfile = tempfile.TemporaryFile() def testEnable(self): utils.SetNonblockFlag(self.tmpfile.fileno(), True) self.failUnless(fcntl.fcntl(self.tmpfile.fileno(), fcntl.F_GETFL) & os.O_NONBLOCK) def testDisable(self): utils.SetNonblockFlag(self.tmpfile.fileno(), False) self.failIf(fcntl.fcntl(self.tmpfile.fileno(), fcntl.F_GETFL) & os.O_NONBLOCK) class TestIgnoreProcessNotFound(unittest.TestCase): @staticmethod def _WritePid(fd): os.write(fd, str(os.getpid())) os.close(fd) return True def test(self): (pid_read_fd, pid_write_fd) = os.pipe() # Start short-lived process which writes its PID to pipe self.assert_(utils.RunInSeparateProcess(self._WritePid, pid_write_fd)) os.close(pid_write_fd) # Read PID from pipe pid = int(os.read(pid_read_fd, 1024)) os.close(pid_read_fd) # Try to send signal to process which exited recently self.assertFalse(utils.IgnoreProcessNotFound(os.kill, pid, 0)) class TestIgnoreSignals(unittest.TestCase): """Test the IgnoreSignals decorator""" @staticmethod def _Raise(exception): raise exception @staticmethod def _Return(rval): return rval def testIgnoreSignals(self): sock_err_intr = socket.error(errno.EINTR, "Message") sock_err_inval = socket.error(errno.EINVAL, "Message") env_err_intr = EnvironmentError(errno.EINTR, "Message") env_err_inval = EnvironmentError(errno.EINVAL, "Message") self.assertRaises(socket.error, self._Raise, sock_err_intr) self.assertRaises(socket.error, self._Raise, sock_err_inval) self.assertRaises(EnvironmentError, self._Raise, env_err_intr) self.assertRaises(EnvironmentError, self._Raise, env_err_inval) self.assertEquals(utils.IgnoreSignals(self._Raise, sock_err_intr), None) self.assertEquals(utils.IgnoreSignals(self._Raise, env_err_intr), None) self.assertRaises(socket.error, utils.IgnoreSignals, self._Raise, sock_err_inval) self.assertRaises(EnvironmentError, utils.IgnoreSignals, self._Raise, env_err_inval) self.assertEquals(utils.IgnoreSignals(self._Return, True), True) self.assertEquals(utils.IgnoreSignals(self._Return, 33), 33) class TestIsExecutable(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def testNonExisting(self): fname = utils.PathJoin(self.tmpdir, "file") assert not os.path.exists(fname) self.assertFalse(utils.IsExecutable(fname)) def testNoFile(self): path = utils.PathJoin(self.tmpdir, "something") os.mkdir(path) assert os.path.isdir(path) self.assertFalse(utils.IsExecutable(path)) def testExecutable(self): fname = utils.PathJoin(self.tmpdir, "file") utils.WriteFile(fname, data="#!/bin/bash", mode=0700) assert os.path.exists(fname) self.assertTrue(utils.IsExecutable(fname)) self.assertTrue(self._TestSymlink(fname)) def testFileNotExecutable(self): fname = utils.PathJoin(self.tmpdir, "file") utils.WriteFile(fname, data="#!/bin/bash", mode=0600) assert os.path.exists(fname) self.assertFalse(utils.IsExecutable(fname)) self.assertFalse(self._TestSymlink(fname)) def _TestSymlink(self, fname): assert os.path.exists(fname) linkname = utils.PathJoin(self.tmpdir, "cmd") os.symlink(fname, linkname) assert os.path.islink(linkname) return utils.IsExecutable(linkname) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti-cleaner_unittest.bash0000744000000000000000000001363112244641676022076 0ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2010, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. set -e -u set -o pipefail export PYTHON=${PYTHON:=python} GNTC=daemons/ganeti-cleaner CCE=tools/check-cert-expired err() { echo "$@" echo 'Aborting' exit 1 } upto() { echo "$(date '+%F %T'):" "$@" '...' } gencert() { local path=$1 validity=$2 VALIDITY=$validity $PYTHON \ ${TOP_SRCDIR:-.}/test/py/import-export_unittest-helper \ $path gencert } check_logfiles() { local n=$1 p=$2 path if [[ "$p" = master ]]; then path=$tmpls/log/ganeti/master-cleaner else path=$tmpls/log/ganeti/cleaner fi test -d $path || \ err "Log file directory '$path' not created" [[ "$(find $path -mindepth 1 | wc -l)" -le "$n" ]] || \ err "Found more than $n logfiles" } count_jobs() { local n=$1 local count=$(find $queuedir -mindepth 1 -type f | wc -l) [[ "$count" -eq "$n" ]] || err "Found $count jobs instead of $n" } count_watcher() { local suffix="$1" n=$2 local count=$(find $watcherdir -maxdepth 1 -type f \ -name "watcher.*-*-*-*.$suffix" | wc -l) [[ "$count" -eq "$n" ]] || \ err "Found $count watcher files with suffix '$suffix' instead of $n" } count_and_check_certs() { local n=$1 local count=$(find $cryptodir -mindepth 1 -type f -name cert | wc -l) [[ "$count" -eq "$n" ]] || err "Found $count certificates instead of $n" find $cryptodir -mindepth 1 -type d | \ while read dir; do [[ ( -e $dir/key && -e $dir/cert ) || ( ! -e $dir/cert && ! -e $dir/key ) ]] || \ err 'Inconsistent cert/key directory found' done } run_cleaner() { CHECK_CERT_EXPIRED=$CCE LOCALSTATEDIR=$tmpls $GNTC $1 } create_archived_jobs() { local i jobdir touchargs local jobarchive=$queuedir/archive local old_ts=$(date -d '25 days ago' +%Y%m%d%H%M) # Remove jobs from previous run find $jobarchive -mindepth 1 -type f | xargs -r rm i=0 for job_id in {1..50} 469581574 19857 1420164 494433 2448521 do jobdir=$jobarchive/$(( job_id / 10 )) test -d $jobdir || mkdir $jobdir if (( i % 3 == 0 || i % 7 == 0 )); then touchargs="-t $old_ts" else touchargs= fi touch $touchargs $jobdir/job-$job_id let ++i done } create_watcher_state() { local uuids=( 6792a0d5-f8b6-4531-8d8c-3680c86b8a53 ab74da37-f5f7-44c4-83ad-074159772593 fced2e48-ffff-43ae-919e-2b77d37ecafa 6e89ac57-2eb1-4a16-85a1-94daa815d643 8714e8f5-59c4-47db-b2cb-196ec37978e5 91763d73-e1f3-47c7-a735-57025d4e2a7d e27d3ff8-9546-4e86-86a4-04151223e140 aa3f63dd-be17-4ac8-bd01-d71790e124cb 05b6d7e2-003b-40d9-a6d6-ab61bf123a15 54c93e4c-61fe-40de-b47e-2a8e6c805d02 ) i=0 for uuid in ${uuids[@]}; do touch -d "$(( 5 * i )) days ago" \ $watcherdir/watcher.$uuid.{data,instance-status} let ++i done } create_certdirs() { local cert=$1; shift local certdir for name in "$@"; do certdir=$cryptodir/$name mkdir $certdir if [[ -n "$cert" ]]; then cp $cert $certdir/cert cp $cert $certdir/key fi done } tmpdir=$(mktemp -d) trap "rm -rf $tmpdir" EXIT # Temporary localstatedir tmpls=$tmpdir/var queuedir=$tmpls/lib/ganeti/queue cryptodir=$tmpls/run/ganeti/crypto watcherdir=$tmpls/lib/ganeti mkdir -p $tmpls/{lib,log,run}/ganeti $queuedir/archive $cryptodir maxlog=50 upto 'Checking log directory creation' test -d $tmpls/log/ganeti || err 'log/ganeti does not exist' test -d $tmpls/log/ganeti/cleaner && \ err 'log/ganeti/cleaner should not exist yet' run_cleaner node check_logfiles 1 node test -d $tmpls/log/ganeti/master-cleaner && \ err 'log/ganeti/master-cleaner should not exist yet' run_cleaner master check_logfiles 1 master upto 'Checking number of retained log files (master)' for (( i=0; i < (maxlog + 10); ++i )); do run_cleaner master check_logfiles 1 node check_logfiles $(( (i + 2) > $maxlog?$maxlog:(i + 2) )) master done upto 'Checking number of retained log files (node)' for (( i=0; i < (maxlog + 10); ++i )); do run_cleaner node check_logfiles $(( (i + 2) > $maxlog?$maxlog:(i + 2) )) node check_logfiles $maxlog master done upto 'Removal of archived jobs (non-master)' create_archived_jobs count_jobs 55 test -f $tmpls/lib/ganeti/ssconf_master_node && \ err 'ssconf_master_node should not exist' run_cleaner node count_jobs 55 run_cleaner master count_jobs 55 upto 'Removal of archived jobs (master node)' create_archived_jobs count_jobs 55 echo $HOSTNAME > $tmpls/lib/ganeti/ssconf_master_node run_cleaner node count_jobs 55 run_cleaner master count_jobs 31 upto 'Certificate expiration' gencert $tmpdir/validcert 30 & vcpid=${!} gencert $tmpdir/expcert -30 & ecpid=${!} wait $vcpid $ecpid create_certdirs $tmpdir/validcert foo{a,b,c}123 trvRMH4Wvt OfDlh6Pc2n create_certdirs $tmpdir/expcert bar{x,y,z}999 fx0ljoImWr em3RBC0U8c create_certdirs '' empty{1,2,3} gd2HCvRc iFG55Z0a PP28v5kg count_and_check_certs 10 run_cleaner master count_and_check_certs 10 run_cleaner node count_and_check_certs 5 check_logfiles $maxlog node check_logfiles $maxlog master count_jobs 31 upto 'Watcher status files' create_watcher_state count_watcher data 10 count_watcher instance-status 10 run_cleaner master count_watcher data 10 count_watcher instance-status 10 run_cleaner node count_watcher data 5 count_watcher instance-status 5 exit 0 ganeti-2.9.3/test/py/ganeti.tools.ensure_dirs_unittest.py0000744000000000000000000000506512244641676023664 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.tools.ensure_dirs""" import unittest import os.path from ganeti import utils from ganeti.tools import ensure_dirs import testutils class TestGetPaths(unittest.TestCase): def testEntryOrder(self): paths = [(path[0], path[1]) for path in ensure_dirs.GetPaths()] # Directories for which permissions have been set seen = set() # Current directory (changes when an entry of type C{DIR} or C{QUEUE_DIR} # is encountered) current_dir = None for (path, pathtype) in paths: self.assertTrue(pathtype in ensure_dirs.ALL_TYPES) self.assertTrue(utils.IsNormAbsPath(path), msg=("Path '%s' is not absolute and/or normalized" % path)) dirname = os.path.dirname(path) if pathtype == ensure_dirs.DIR: self.assertFalse(path in seen, msg=("Directory '%s' was seen before" % path)) current_dir = path seen.add(path) elif pathtype == ensure_dirs.QUEUE_DIR: self.assertTrue(dirname in seen, msg=("Queue directory '%s' was not seen before" % path)) current_dir = path elif pathtype == ensure_dirs.FILE: self.assertFalse(current_dir is None) self.assertTrue(dirname in seen, msg=("Directory '%s' of path '%s' has not been seen" " yet" % (dirname, path))) self.assertTrue((utils.IsBelowDir(current_dir, path) and current_dir == dirname), msg=("File '%s' not below current directory '%s'" % (path, current_dir))) else: self.fail("Unknown path type '%s'" % (pathtype, )) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.mcpu_unittest.py0000744000000000000000000002161512271422343021132 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2009, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the mcpu module""" import unittest import itertools from ganeti import compat from ganeti import mcpu from ganeti import opcodes from ganeti import cmdlib from ganeti import locking from ganeti import constants from ganeti.constants import \ LOCK_ATTEMPTS_TIMEOUT, \ LOCK_ATTEMPTS_MAXWAIT, \ LOCK_ATTEMPTS_MINWAIT import testutils REQ_BGL_WHITELIST = compat.UniqueFrozenset([ opcodes.OpClusterActivateMasterIp, opcodes.OpClusterDeactivateMasterIp, opcodes.OpClusterDestroy, opcodes.OpClusterPostInit, opcodes.OpClusterRename, opcodes.OpInstanceRename, opcodes.OpNodeAdd, opcodes.OpNodeRemove, opcodes.OpTestAllocator, ]) class TestLockAttemptTimeoutStrategy(unittest.TestCase): def testConstants(self): tpa = mcpu.LockAttemptTimeoutStrategy._TIMEOUT_PER_ATTEMPT self.assert_(len(tpa) > LOCK_ATTEMPTS_TIMEOUT / LOCK_ATTEMPTS_MAXWAIT) self.assert_(sum(tpa) >= LOCK_ATTEMPTS_TIMEOUT) self.assertTrue(LOCK_ATTEMPTS_TIMEOUT >= 1800, msg="Waiting less than half an hour per priority") self.assertTrue(LOCK_ATTEMPTS_TIMEOUT <= 3600, msg="Waiting more than an hour per priority") def testSimple(self): strat = mcpu.LockAttemptTimeoutStrategy(_random_fn=lambda: 0.5, _time_fn=lambda: 0.0) prev = None for i in range(len(strat._TIMEOUT_PER_ATTEMPT)): timeout = strat.NextAttempt() self.assert_(timeout is not None) self.assert_(timeout <= LOCK_ATTEMPTS_MAXWAIT) self.assert_(timeout >= LOCK_ATTEMPTS_MINWAIT) self.assert_(prev is None or timeout >= prev) prev = timeout for _ in range(10): self.assert_(strat.NextAttempt() is None) class TestDispatchTable(unittest.TestCase): def test(self): for opcls in opcodes.OP_MAPPING.values(): if not opcls.WITH_LU: continue self.assertTrue(opcls in mcpu.Processor.DISPATCH_TABLE, msg="%s missing handler class" % opcls) # Check against BGL whitelist lucls = mcpu.Processor.DISPATCH_TABLE[opcls] if lucls.REQ_BGL: self.assertTrue(opcls in REQ_BGL_WHITELIST, msg=("%s not whitelisted for BGL" % opcls.OP_ID)) else: self.assertFalse(opcls in REQ_BGL_WHITELIST, msg=("%s whitelisted for BGL, but doesn't use it" % opcls.OP_ID)) class TestProcessResult(unittest.TestCase): def setUp(self): self._submitted = [] self._count = itertools.count(200) def _Submit(self, jobs): job_ids = [self._count.next() for _ in jobs] self._submitted.extend(zip(job_ids, jobs)) return job_ids def testNoJobs(self): for i in [object(), [], False, True, None, 1, 929, {}]: self.assertEqual(mcpu._ProcessResult(NotImplemented, NotImplemented, i), i) def testDefaults(self): src = opcodes.OpTestDummy() res = mcpu._ProcessResult(self._Submit, src, cmdlib.ResultWithJobs([[ opcodes.OpTestDelay(), opcodes.OpTestDelay(), ], [ opcodes.OpTestDelay(), ]])) self.assertEqual(res, { constants.JOB_IDS_KEY: [200, 201], }) (_, (op1, op2)) = self._submitted.pop(0) (_, (op3, )) = self._submitted.pop(0) self.assertRaises(IndexError, self._submitted.pop) for op in [op1, op2, op3]: self.assertTrue("OP_TEST_DUMMY" in op.comment) self.assertFalse(hasattr(op, "priority")) self.assertFalse(hasattr(op, "debug_level")) def testParams(self): src = opcodes.OpTestDummy(priority=constants.OP_PRIO_HIGH, debug_level=3) res = mcpu._ProcessResult(self._Submit, src, cmdlib.ResultWithJobs([[ opcodes.OpTestDelay(priority=constants.OP_PRIO_LOW), ], [ opcodes.OpTestDelay(comment="foobar", debug_level=10), ]], other=True, value=range(10))) self.assertEqual(res, { constants.JOB_IDS_KEY: [200, 201], "other": True, "value": range(10), }) (_, (op1, )) = self._submitted.pop(0) (_, (op2, )) = self._submitted.pop(0) self.assertRaises(IndexError, self._submitted.pop) self.assertEqual(op1.priority, constants.OP_PRIO_LOW) self.assertTrue("OP_TEST_DUMMY" in op1.comment) self.assertEqual(op1.debug_level, 3) self.assertEqual(op2.priority, constants.OP_PRIO_HIGH) self.assertEqual(op2.comment, "foobar") self.assertEqual(op2.debug_level, 3) class _FakeLuWithLocks: def __init__(self, needed_locks, share_locks): self.needed_locks = needed_locks self.share_locks = share_locks class _FakeGlm: def __init__(self, owning_nal): self._owning_nal = owning_nal def check_owned(self, level, names): assert level == locking.LEVEL_NODE_ALLOC assert names == locking.NAL return self._owning_nal def owning_all(self, level): return False class TestVerifyLocks(unittest.TestCase): def testNoLocks(self): lu = _FakeLuWithLocks({}, {}) glm = _FakeGlm(False) mcpu._VerifyLocks(lu, glm, _mode_whitelist=NotImplemented, _nal_whitelist=NotImplemented) def testNotAllSameMode(self): for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]: lu = _FakeLuWithLocks({ level: ["foo"], }, { level: 0, locking.LEVEL_NODE_ALLOC: 0, }) glm = _FakeGlm(False) mcpu._VerifyLocks(lu, glm, _mode_whitelist=[], _nal_whitelist=[]) def testDifferentMode(self): for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]: lu = _FakeLuWithLocks({ level: ["foo"], }, { level: 0, locking.LEVEL_NODE_ALLOC: 1, }) glm = _FakeGlm(False) try: mcpu._VerifyLocks(lu, glm, _mode_whitelist=[], _nal_whitelist=[]) except AssertionError, err: self.assertTrue("using the same mode as nodes" in str(err)) else: self.fail("Exception not raised") # Once more with the whitelist mcpu._VerifyLocks(lu, glm, _mode_whitelist=[_FakeLuWithLocks], _nal_whitelist=[]) def testSameMode(self): for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]: lu = _FakeLuWithLocks({ level: ["foo"], locking.LEVEL_NODE_ALLOC: locking.ALL_SET, }, { level: 1, locking.LEVEL_NODE_ALLOC: 1, }) glm = _FakeGlm(True) try: mcpu._VerifyLocks(lu, glm, _mode_whitelist=[_FakeLuWithLocks], _nal_whitelist=[]) except AssertionError, err: self.assertTrue("whitelisted to use different modes" in str(err)) else: self.fail("Exception not raised") # Once more without the whitelist mcpu._VerifyLocks(lu, glm, _mode_whitelist=[], _nal_whitelist=[]) def testAllWithoutAllocLock(self): for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]: lu = _FakeLuWithLocks({ level: locking.ALL_SET, }, { level: 0, locking.LEVEL_NODE_ALLOC: 0, }) glm = _FakeGlm(False) try: mcpu._VerifyLocks(lu, glm, _mode_whitelist=[], _nal_whitelist=[]) except AssertionError, err: self.assertTrue("allocation lock must be used if" in str(err)) else: self.fail("Exception not raised") # Once more with the whitelist mcpu._VerifyLocks(lu, glm, _mode_whitelist=[], _nal_whitelist=[_FakeLuWithLocks]) def testAllWithAllocLock(self): for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]: lu = _FakeLuWithLocks({ level: locking.ALL_SET, locking.LEVEL_NODE_ALLOC: locking.ALL_SET, }, { level: 0, locking.LEVEL_NODE_ALLOC: 0, }) glm = _FakeGlm(True) try: mcpu._VerifyLocks(lu, glm, _mode_whitelist=[], _nal_whitelist=[_FakeLuWithLocks]) except AssertionError, err: self.assertTrue("whitelisted for not acquiring" in str(err)) else: self.fail("Exception not raised") # Once more without the whitelist mcpu._VerifyLocks(lu, glm, _mode_whitelist=[], _nal_whitelist=[]) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/cli-test.bash0000744000000000000000000000030412244641676016776 0ustar00rootroot00000000000000#!/bin/bash export SCRIPTS=${TOP_BUILDDIR:-.}/scripts export DAEMONS=${TOP_BUILDDIR:-.}/daemons shelltest $SHELLTESTARGS \ ${TOP_SRCDIR:-.}/test/py/{gnt,ganeti}-*.test \ -- --hide-successes ganeti-2.9.3/test/py/ganeti.utils.filelock_unittest.py0000744000000000000000000001042112244641676023122 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.utils.filelock""" import os import tempfile import unittest from ganeti import constants from ganeti import utils from ganeti import errors import testutils class _BaseFileLockTest: """Test case for the FileLock class""" def testSharedNonblocking(self): self.lock.Shared(blocking=False) self.lock.Close() def testExclusiveNonblocking(self): self.lock.Exclusive(blocking=False) self.lock.Close() def testUnlockNonblocking(self): self.lock.Unlock(blocking=False) self.lock.Close() def testSharedBlocking(self): self.lock.Shared(blocking=True) self.lock.Close() def testExclusiveBlocking(self): self.lock.Exclusive(blocking=True) self.lock.Close() def testUnlockBlocking(self): self.lock.Unlock(blocking=True) self.lock.Close() def testSharedExclusiveUnlock(self): self.lock.Shared(blocking=False) self.lock.Exclusive(blocking=False) self.lock.Unlock(blocking=False) self.lock.Close() def testExclusiveSharedUnlock(self): self.lock.Exclusive(blocking=False) self.lock.Shared(blocking=False) self.lock.Unlock(blocking=False) self.lock.Close() def testSimpleTimeout(self): # These will succeed on the first attempt, hence a short timeout self.lock.Shared(blocking=True, timeout=10.0) self.lock.Exclusive(blocking=False, timeout=10.0) self.lock.Unlock(blocking=True, timeout=10.0) self.lock.Close() @staticmethod def _TryLockInner(filename, shared, blocking): lock = utils.FileLock.Open(filename) if shared: fn = lock.Shared else: fn = lock.Exclusive try: # The timeout doesn't really matter as the parent process waits for us to # finish anyway. fn(blocking=blocking, timeout=0.01) except errors.LockError, err: return False return True def _TryLock(self, *args): return utils.RunInSeparateProcess(self._TryLockInner, self.tmpfile.name, *args) def testTimeout(self): for blocking in [True, False]: self.lock.Exclusive(blocking=True) self.failIf(self._TryLock(False, blocking)) self.failIf(self._TryLock(True, blocking)) self.lock.Shared(blocking=True) self.assert_(self._TryLock(True, blocking)) self.failIf(self._TryLock(False, blocking)) def testCloseShared(self): self.lock.Close() self.assertRaises(AssertionError, self.lock.Shared, blocking=False) def testCloseExclusive(self): self.lock.Close() self.assertRaises(AssertionError, self.lock.Exclusive, blocking=False) def testCloseUnlock(self): self.lock.Close() self.assertRaises(AssertionError, self.lock.Unlock, blocking=False) class TestFileLockWithFilename(testutils.GanetiTestCase, _BaseFileLockTest): TESTDATA = "Hello World\n" * 10 def setUp(self): testutils.GanetiTestCase.setUp(self) self.tmpfile = tempfile.NamedTemporaryFile() utils.WriteFile(self.tmpfile.name, data=self.TESTDATA) self.lock = utils.FileLock.Open(self.tmpfile.name) # Ensure "Open" didn't truncate file self.assertFileContent(self.tmpfile.name, self.TESTDATA) def tearDown(self): self.assertFileContent(self.tmpfile.name, self.TESTDATA) testutils.GanetiTestCase.tearDown(self) class TestFileLockWithFileObject(unittest.TestCase, _BaseFileLockTest): def setUp(self): self.tmpfile = tempfile.NamedTemporaryFile() self.lock = utils.FileLock(open(self.tmpfile.name, "w"), self.tmpfile.name) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.rapi.rlib2_unittest.py0000744000000000000000000016537312271422343022144 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the RAPI rlib2 module """ import unittest import itertools import random from ganeti import constants from ganeti import opcodes from ganeti import compat from ganeti import http from ganeti import query from ganeti import luxi from ganeti import errors from ganeti import rapi from ganeti.rapi import rlib2 from ganeti.rapi import baserlib from ganeti.rapi import connector import testutils class _FakeRequestPrivateData: def __init__(self, body_data): self.body_data = body_data class _FakeRequest: def __init__(self, body_data): self.private = _FakeRequestPrivateData(body_data) def _CreateHandler(cls, items, queryargs, body_data, client_cls): return cls(items, queryargs, _FakeRequest(body_data), _client_cls=client_cls) class _FakeClient: def __init__(self, address=None): self._jobs = [] def GetNextSubmittedJob(self): return self._jobs.pop(0) def SubmitJob(self, ops): job_id = str(1 + int(random.random() * 1000000)) self._jobs.append((job_id, ops)) return job_id class _FakeClientFactory: def __init__(self, cls): self._client_cls = cls self._clients = [] def GetNextClient(self): return self._clients.pop(0) def __call__(self, address=None): cl = self._client_cls(address=address) self._clients.append(cl) return cl class TestConstants(unittest.TestCase): def testConsole(self): # Exporting the console field without authentication might expose # information assert "console" in query.INSTANCE_FIELDS self.assertTrue("console" not in rlib2.I_FIELDS) def testFields(self): checks = { constants.QR_INSTANCE: rlib2.I_FIELDS, constants.QR_NODE: rlib2.N_FIELDS, constants.QR_GROUP: rlib2.G_FIELDS, } for (qr, fields) in checks.items(): self.assertFalse(set(fields) - set(query.ALL_FIELDS[qr].keys())) class TestClientConnectError(unittest.TestCase): @staticmethod def _FailingClient(address=None): raise luxi.NoMasterError("test") def test(self): resources = [ rlib2.R_2_groups, rlib2.R_2_instances, rlib2.R_2_nodes, ] for cls in resources: handler = _CreateHandler(cls, ["name"], {}, None, self._FailingClient) self.assertRaises(http.HttpBadGateway, handler.GET) class TestJobSubmitError(unittest.TestCase): class _SubmitErrorClient: def __init__(self, address=None): pass @staticmethod def SubmitJob(ops): raise errors.JobQueueFull("test") def test(self): handler = _CreateHandler(rlib2.R_2_redist_config, [], {}, None, self._SubmitErrorClient) self.assertRaises(http.HttpServiceUnavailable, handler.PUT) class TestClusterModify(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_cluster_modify, [], {}, { "vg_name": "testvg", "candidate_pool_size": 100, }, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpClusterSetParams)) self.assertEqual(op.vg_name, "testvg") self.assertEqual(op.candidate_pool_size, 100) self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testInvalidValue(self): for attr in ["vg_name", "candidate_pool_size", "beparams", "_-Unknown#"]: clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_cluster_modify, [], {}, { attr: True, }, clfactory) self.assertRaises(http.HttpBadRequest, handler.PUT) self.assertRaises(IndexError, clfactory.GetNextClient) class TestRedistConfig(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_redist_config, [], {}, None, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpClusterRedistConf)) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestNodeMigrate(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_nodes_name_migrate, ["node1"], {}, { "iallocator": "fooalloc", }, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpNodeMigrate)) self.assertEqual(op.node_name, "node1") self.assertEqual(op.iallocator, "fooalloc") self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testQueryArgsConflict(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_nodes_name_migrate, ["node2"], { "live": True, "mode": constants.HT_MIGRATION_NONLIVE, }, None, clfactory) self.assertRaises(http.HttpBadRequest, handler.POST) self.assertRaises(IndexError, clfactory.GetNextClient) def testQueryArgsMode(self): clfactory = _FakeClientFactory(_FakeClient) queryargs = { "mode": [constants.HT_MIGRATION_LIVE], } handler = _CreateHandler(rlib2.R_2_nodes_name_migrate, ["node17292"], queryargs, None, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpNodeMigrate)) self.assertEqual(op.node_name, "node17292") self.assertEqual(op.mode, constants.HT_MIGRATION_LIVE) self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testQueryArgsLive(self): clfactory = _FakeClientFactory(_FakeClient) for live in [False, True]: queryargs = { "live": [str(int(live))], } handler = _CreateHandler(rlib2.R_2_nodes_name_migrate, ["node6940"], queryargs, None, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpNodeMigrate)) self.assertEqual(op.node_name, "node6940") if live: self.assertEqual(op.mode, constants.HT_MIGRATION_LIVE) else: self.assertEqual(op.mode, constants.HT_MIGRATION_NONLIVE) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestNodeEvacuate(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_nodes_name_evacuate, ["node92"], { "dry-run": ["1"], }, { "mode": constants.IALLOCATOR_NEVAC_SEC, }, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpNodeEvacuate)) self.assertEqual(op.node_name, "node92") self.assertEqual(op.mode, constants.IALLOCATOR_NEVAC_SEC) self.assertTrue(op.dry_run) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestNodePowercycle(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_nodes_name_powercycle, ["node20744"], { "force": ["1"], }, None, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpNodePowercycle)) self.assertEqual(op.node_name, "node20744") self.assertTrue(op.force) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestGroupAssignNodes(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_groups_name_assign_nodes, ["grp-a"], { "dry-run": ["1"], "force": ["1"], }, { "nodes": ["n2", "n3"], }, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpGroupAssignNodes)) self.assertEqual(op.group_name, "grp-a") self.assertEqual(op.nodes, ["n2", "n3"]) self.assertTrue(op.dry_run) self.assertTrue(op.force) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestInstanceDelete(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_instances_name, ["inst30965"], { "dry-run": ["1"], }, {}, clfactory) job_id = handler.DELETE() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceRemove)) self.assertEqual(op.instance_name, "inst30965") self.assertTrue(op.dry_run) self.assertFalse(op.ignore_failures) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestInstanceInfo(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_instances_name_info, ["inst31217"], { "static": ["1"], }, {}, clfactory) job_id = handler.GET() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceQueryData)) self.assertEqual(op.instances, ["inst31217"]) self.assertTrue(op.static) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestInstanceReboot(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_instances_name_reboot, ["inst847"], { "dry-run": ["1"], "ignore_secondaries": ["1"], "reason": ["System update"], }, {}, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceReboot)) self.assertEqual(op.instance_name, "inst847") self.assertEqual(op.reboot_type, constants.INSTANCE_REBOOT_HARD) self.assertTrue(op.ignore_secondaries) self.assertTrue(op.dry_run) self.assertEqual(op.reason[0][0], constants.OPCODE_REASON_SRC_USER) self.assertEqual(op.reason[0][1], "System update") self.assertEqual(op.reason[1][0], "%s:%s" % (constants.OPCODE_REASON_SRC_RLIB2, "instances_name_reboot")) self.assertEqual(op.reason[1][1], "") self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestInstanceStartup(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_instances_name_startup, ["inst31083"], { "force": ["1"], "no_remember": ["1"], "reason": ["Newly created instance"], }, {}, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceStartup)) self.assertEqual(op.instance_name, "inst31083") self.assertTrue(op.no_remember) self.assertTrue(op.force) self.assertFalse(op.dry_run) self.assertEqual(op.reason[0][0], constants.OPCODE_REASON_SRC_USER) self.assertEqual(op.reason[0][1], "Newly created instance") self.assertEqual(op.reason[1][0], "%s:%s" % (constants.OPCODE_REASON_SRC_RLIB2, "instances_name_startup")) self.assertEqual(op.reason[1][1], "") self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestInstanceShutdown(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_instances_name_shutdown, ["inst26791"], { "no_remember": ["0"], "reason": ["Not used anymore"], }, {}, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceShutdown)) self.assertEqual(op.instance_name, "inst26791") self.assertFalse(op.no_remember) self.assertFalse(op.dry_run) self.assertEqual(op.reason[0][0], constants.OPCODE_REASON_SRC_USER) self.assertEqual(op.reason[0][1], "Not used anymore") self.assertEqual(op.reason[1][0], "%s:%s" % (constants.OPCODE_REASON_SRC_RLIB2, "instances_name_shutdown")) self.assertEqual(op.reason[1][1], "") self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestInstanceActivateDisks(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_instances_name_activate_disks, ["xyz"], { "ignore_size": ["1"], }, {}, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceActivateDisks)) self.assertEqual(op.instance_name, "xyz") self.assertTrue(op.ignore_size) self.assertFalse(hasattr(op, "dry_run")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestInstanceDeactivateDisks(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_instances_name_deactivate_disks, ["inst22357"], {}, {}, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceDeactivateDisks)) self.assertEqual(op.instance_name, "inst22357") self.assertFalse(hasattr(op, "dry_run")) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestInstanceRecreateDisks(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_instances_name_recreate_disks, ["inst22357"], {}, {}, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceRecreateDisks)) self.assertEqual(op.instance_name, "inst22357") self.assertFalse(hasattr(op, "dry_run")) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestInstanceFailover(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_instances_name_failover, ["inst12794"], {}, {}, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceFailover)) self.assertEqual(op.instance_name, "inst12794") self.assertFalse(hasattr(op, "dry_run")) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestInstanceDiskGrow(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) data = { "amount": 1024, } handler = _CreateHandler(rlib2.R_2_instances_name_disk_grow, ["inst10742", "3"], {}, data, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceGrowDisk)) self.assertEqual(op.instance_name, "inst10742") self.assertEqual(op.disk, 3) self.assertEqual(op.amount, 1024) self.assertFalse(hasattr(op, "dry_run")) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestBackupPrepare(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) queryargs = { "mode": constants.EXPORT_MODE_REMOTE, } handler = _CreateHandler(rlib2.R_2_instances_name_prepare_export, ["inst17925"], queryargs, {}, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpBackupPrepare)) self.assertEqual(op.instance_name, "inst17925") self.assertEqual(op.mode, constants.EXPORT_MODE_REMOTE) self.assertFalse(hasattr(op, "dry_run")) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestGroupRemove(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_groups_name, ["grp28575"], {}, {}, clfactory) job_id = handler.DELETE() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpGroupRemove)) self.assertEqual(op.group_name, "grp28575") self.assertFalse(op.dry_run) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestStorageQuery(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) queryargs = { "storage_type": constants.ST_LVM_PV, "output_fields": "name,other", } handler = _CreateHandler(rlib2.R_2_nodes_name_storage, ["node21075"], queryargs, {}, clfactory) job_id = handler.GET() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpNodeQueryStorage)) self.assertEqual(op.nodes, ["node21075"]) self.assertEqual(op.storage_type, constants.ST_LVM_PV) self.assertEqual(op.output_fields, ["name", "other"]) self.assertFalse(hasattr(op, "dry_run")) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testErrors(self): clfactory = _FakeClientFactory(_FakeClient) queryargs = { "output_fields": "name,other", } handler = _CreateHandler(rlib2.R_2_nodes_name_storage, ["node10538"], queryargs, {}, clfactory) self.assertRaises(http.HttpBadRequest, handler.GET) queryargs = { "storage_type": constants.ST_LVM_VG, } handler = _CreateHandler(rlib2.R_2_nodes_name_storage, ["node21273"], queryargs, {}, clfactory) self.assertRaises(http.HttpBadRequest, handler.GET) queryargs = { "storage_type": "##unknown_storage##", "output_fields": "name,other", } handler = _CreateHandler(rlib2.R_2_nodes_name_storage, ["node10315"], queryargs, {}, clfactory) self.assertRaises(http.HttpBadRequest, handler.GET) class TestStorageModify(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) for allocatable in [None, "1", "0"]: queryargs = { "storage_type": constants.ST_LVM_VG, "name": "pv-a", } if allocatable is not None: queryargs["allocatable"] = allocatable handler = _CreateHandler(rlib2.R_2_nodes_name_storage_modify, ["node9292"], queryargs, {}, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpNodeModifyStorage)) self.assertEqual(op.node_name, "node9292") self.assertEqual(op.storage_type, constants.ST_LVM_VG) self.assertEqual(op.name, "pv-a") if allocatable is None: self.assertFalse(op.changes) else: assert allocatable in ("0", "1") self.assertEqual(op.changes, { constants.SF_ALLOCATABLE: (allocatable == "1"), }) self.assertFalse(hasattr(op, "dry_run")) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testErrors(self): clfactory = _FakeClientFactory(_FakeClient) # No storage type queryargs = { "name": "xyz", } handler = _CreateHandler(rlib2.R_2_nodes_name_storage_modify, ["node26016"], queryargs, {}, clfactory) self.assertRaises(http.HttpBadRequest, handler.PUT) # No name queryargs = { "storage_type": constants.ST_LVM_VG, } handler = _CreateHandler(rlib2.R_2_nodes_name_storage_modify, ["node21218"], queryargs, {}, clfactory) self.assertRaises(http.HttpBadRequest, handler.PUT) # Invalid value queryargs = { "storage_type": constants.ST_LVM_VG, "name": "pv-b", "allocatable": "noint", } handler = _CreateHandler(rlib2.R_2_nodes_name_storage_modify, ["node30685"], queryargs, {}, clfactory) self.assertRaises(http.HttpBadRequest, handler.PUT) class TestStorageRepair(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) queryargs = { "storage_type": constants.ST_LVM_PV, "name": "pv16611", } handler = _CreateHandler(rlib2.R_2_nodes_name_storage_repair, ["node19265"], queryargs, {}, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpRepairNodeStorage)) self.assertEqual(op.node_name, "node19265") self.assertEqual(op.storage_type, constants.ST_LVM_PV) self.assertEqual(op.name, "pv16611") self.assertFalse(hasattr(op, "dry_run")) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testErrors(self): clfactory = _FakeClientFactory(_FakeClient) # No storage type queryargs = { "name": "xyz", } handler = _CreateHandler(rlib2.R_2_nodes_name_storage_repair, ["node11275"], queryargs, {}, clfactory) self.assertRaises(http.HttpBadRequest, handler.PUT) # No name queryargs = { "storage_type": constants.ST_LVM_VG, } handler = _CreateHandler(rlib2.R_2_nodes_name_storage_repair, ["node21218"], queryargs, {}, clfactory) self.assertRaises(http.HttpBadRequest, handler.PUT) class TestTags(unittest.TestCase): TAG_HANDLERS = [ rlib2.R_2_instances_name_tags, rlib2.R_2_nodes_name_tags, rlib2.R_2_groups_name_tags, rlib2.R_2_tags, ] def testSetAndDelete(self): clfactory = _FakeClientFactory(_FakeClient) for method, opcls in [("PUT", opcodes.OpTagsSet), ("DELETE", opcodes.OpTagsDel)]: for idx, handler in enumerate(self.TAG_HANDLERS): dry_run = bool(idx % 2) name = "test%s" % idx queryargs = { "tag": ["foo", "bar", "baz"], "dry-run": str(int(dry_run)), } handler = _CreateHandler(handler, [name], queryargs, {}, clfactory) job_id = getattr(handler, method)() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcls)) self.assertEqual(op.kind, handler.TAG_LEVEL) if handler.TAG_LEVEL == constants.TAG_CLUSTER: self.assertTrue(op.name is None) else: self.assertEqual(op.name, name) self.assertEqual(op.tags, ["foo", "bar", "baz"]) self.assertEqual(op.dry_run, dry_run) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestInstanceCreation(testutils.GanetiTestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) name = "inst863.example.com" disk_variants = [ # No disks [], # Two disks [{"size": 5, }, {"size": 100, }], # Disk with mode [{"size": 123, "mode": constants.DISK_RDWR, }], ] nic_variants = [ # No NIC [], # Three NICs [{}, {}, {}], # Two NICs [ { "ip": "192.0.2.6", "mode": constants.NIC_MODE_ROUTED, "mac": "01:23:45:67:68:9A", }, { "mode": constants.NIC_MODE_BRIDGED, "link": "br1" }, ], ] beparam_variants = [ None, {}, { constants.BE_VCPUS: 2, }, { constants.BE_MAXMEM: 200, }, { constants.BE_MEMORY: 256, }, { constants.BE_VCPUS: 2, constants.BE_MAXMEM: 1024, constants.BE_MINMEM: 1024, constants.BE_AUTO_BALANCE: True, constants.BE_ALWAYS_FAILOVER: True, } ] hvparam_variants = [ None, { constants.HV_BOOT_ORDER: "anc", }, { constants.HV_KERNEL_PATH: "/boot/fookernel", constants.HV_ROOT_PATH: "/dev/hda1", }, ] for mode in [constants.INSTANCE_CREATE, constants.INSTANCE_IMPORT]: for nics in nic_variants: for disk_template in constants.DISK_TEMPLATES: for disks in disk_variants: for beparams in beparam_variants: for hvparams in hvparam_variants: for dry_run in [False, True]: queryargs = { "dry-run": str(int(dry_run)), } data = { rlib2._REQ_DATA_VERSION: 1, "name": name, "hypervisor": constants.HT_FAKE, "disks": disks, "nics": nics, "mode": mode, "disk_template": disk_template, "os": "debootstrap", } if beparams is not None: data["beparams"] = beparams if hvparams is not None: data["hvparams"] = hvparams handler = _CreateHandler(rlib2.R_2_instances, [], queryargs, data, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertRaises(IndexError, cl.GetNextSubmittedJob) self.assertTrue(isinstance(op, opcodes.OpInstanceCreate)) self.assertEqual(op.instance_name, name) self.assertEqual(op.mode, mode) self.assertEqual(op.disk_template, disk_template) self.assertEqual(op.dry_run, dry_run) self.assertEqual(len(op.disks), len(disks)) self.assertEqual(len(op.nics), len(nics)) for opdisk, disk in zip(op.disks, disks): for key in constants.IDISK_PARAMS: self.assertEqual(opdisk.get(key), disk.get(key)) self.assertFalse("unknown" in opdisk) for opnic, nic in zip(op.nics, nics): for key in constants.INIC_PARAMS: self.assertEqual(opnic.get(key), nic.get(key)) self.assertFalse("unknown" in opnic) self.assertFalse("foobar" in opnic) if beparams is None: self.assertFalse(hasattr(op, "beparams")) else: self.assertEqualValues(op.beparams, beparams) if hvparams is None: self.assertFalse(hasattr(op, "hvparams")) else: self.assertEqualValues(op.hvparams, hvparams) def testLegacyName(self): clfactory = _FakeClientFactory(_FakeClient) name = "inst29128.example.com" data = { rlib2._REQ_DATA_VERSION: 1, "name": name, "disks": [], "nics": [], "mode": constants.INSTANCE_CREATE, "disk_template": constants.DT_PLAIN, } handler = _CreateHandler(rlib2.R_2_instances, [], {}, data, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceCreate)) self.assertEqual(op.instance_name, name) self.assertFalse(hasattr(op, "name")) self.assertFalse(op.dry_run) self.assertRaises(IndexError, cl.GetNextSubmittedJob) # Define both data["instance_name"] = "other.example.com" assert "name" in data and "instance_name" in data handler = _CreateHandler(rlib2.R_2_instances, [], {}, data, clfactory) self.assertRaises(http.HttpBadRequest, handler.POST) self.assertRaises(IndexError, clfactory.GetNextClient) def testLegacyOs(self): clfactory = _FakeClientFactory(_FakeClient) name = "inst4673.example.com" os = "linux29206" data = { rlib2._REQ_DATA_VERSION: 1, "name": name, "os_type": os, "disks": [], "nics": [], "mode": constants.INSTANCE_CREATE, "disk_template": constants.DT_PLAIN, } handler = _CreateHandler(rlib2.R_2_instances, [], {}, data, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceCreate)) self.assertEqual(op.instance_name, name) self.assertEqual(op.os_type, os) self.assertFalse(hasattr(op, "os")) self.assertFalse(op.dry_run) self.assertRaises(IndexError, cl.GetNextSubmittedJob) # Define both data["os"] = "linux9584" assert "os" in data and "os_type" in data handler = _CreateHandler(rlib2.R_2_instances, [], {}, data, clfactory) self.assertRaises(http.HttpBadRequest, handler.POST) def testErrors(self): clfactory = _FakeClientFactory(_FakeClient) # Test all required fields reqfields = { rlib2._REQ_DATA_VERSION: 1, "name": "inst1.example.com", "disks": [], "nics": [], "mode": constants.INSTANCE_CREATE, "disk_template": constants.DT_PLAIN, } for name in reqfields.keys(): data = dict(i for i in reqfields.iteritems() if i[0] != name) handler = _CreateHandler(rlib2.R_2_instances, [], {}, data, clfactory) self.assertRaises(http.HttpBadRequest, handler.POST) self.assertRaises(IndexError, clfactory.GetNextClient) # Invalid disks and nics for field in ["disks", "nics"]: invalid_values = [None, 1, "", {}, [1, 2, 3], ["hda1", "hda2"], [{"_unknown_": False, }]] for invvalue in invalid_values: data = reqfields.copy() data[field] = invvalue handler = _CreateHandler(rlib2.R_2_instances, [], {}, data, clfactory) self.assertRaises(http.HttpBadRequest, handler.POST) self.assertRaises(IndexError, clfactory.GetNextClient) def testVersion(self): clfactory = _FakeClientFactory(_FakeClient) # No version field data = { "name": "inst1.example.com", "disks": [], "nics": [], "mode": constants.INSTANCE_CREATE, "disk_template": constants.DT_PLAIN, } handler = _CreateHandler(rlib2.R_2_instances, [], {}, data, clfactory) self.assertRaises(http.HttpBadRequest, handler.POST) # Old and incorrect versions for version in [0, -1, 10483, "Hello World"]: data[rlib2._REQ_DATA_VERSION] = version handler = _CreateHandler(rlib2.R_2_instances, [], {}, data, clfactory) self.assertRaises(http.HttpBadRequest, handler.POST) self.assertRaises(IndexError, clfactory.GetNextClient) # Correct version data[rlib2._REQ_DATA_VERSION] = 1 handler = _CreateHandler(rlib2.R_2_instances, [], {}, data, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceCreate)) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestBackupExport(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) name = "instmoo" data = { "mode": constants.EXPORT_MODE_REMOTE, "destination": [(1, 2, 3), (99, 99, 99)], "shutdown": True, "remove_instance": True, "x509_key_name": ["name", "hash"], "destination_x509_ca": "---cert---" } handler = _CreateHandler(rlib2.R_2_instances_name_export, [name], {}, data, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpBackupExport)) self.assertEqual(op.instance_name, name) self.assertEqual(op.mode, constants.EXPORT_MODE_REMOTE) self.assertEqual(op.target_node, [(1, 2, 3), (99, 99, 99)]) self.assertEqual(op.shutdown, True) self.assertEqual(op.remove_instance, True) self.assertEqual(op.x509_key_name, ["name", "hash"]) self.assertEqual(op.destination_x509_ca, "---cert---") self.assertFalse(hasattr(op, "dry_run")) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testDefaults(self): clfactory = _FakeClientFactory(_FakeClient) name = "inst1" data = { "destination": "node2", "shutdown": False, } handler = _CreateHandler(rlib2.R_2_instances_name_export, [name], {}, data, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpBackupExport)) self.assertEqual(op.instance_name, name) self.assertEqual(op.target_node, "node2") self.assertFalse(hasattr(op, "mode")) self.assertFalse(hasattr(op, "remove_instance")) self.assertFalse(hasattr(op, "destination")) self.assertFalse(hasattr(op, "dry_run")) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testErrors(self): clfactory = _FakeClientFactory(_FakeClient) for value in ["True", "False"]: handler = _CreateHandler(rlib2.R_2_instances_name_export, ["err1"], {}, { "remove_instance": value, }, clfactory) self.assertRaises(http.HttpBadRequest, handler.PUT) class TestInstanceMigrate(testutils.GanetiTestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) name = "instYooho6ek" for cleanup in [False, True]: for mode in constants.HT_MIGRATION_MODES: data = { "cleanup": cleanup, "mode": mode, } handler = _CreateHandler(rlib2.R_2_instances_name_migrate, [name], {}, data, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceMigrate)) self.assertEqual(op.instance_name, name) self.assertEqual(op.mode, mode) self.assertEqual(op.cleanup, cleanup) self.assertFalse(hasattr(op, "dry_run")) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testDefaults(self): clfactory = _FakeClientFactory(_FakeClient) name = "instnohZeex0" handler = _CreateHandler(rlib2.R_2_instances_name_migrate, [name], {}, {}, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceMigrate)) self.assertEqual(op.instance_name, name) self.assertFalse(hasattr(op, "mode")) self.assertFalse(hasattr(op, "cleanup")) self.assertFalse(hasattr(op, "dry_run")) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestParseRenameInstanceRequest(testutils.GanetiTestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) name = "instij0eeph7" for new_name in ["ua0aiyoo", "fai3ongi"]: for ip_check in [False, True]: for name_check in [False, True]: data = { "new_name": new_name, "ip_check": ip_check, "name_check": name_check, } handler = _CreateHandler(rlib2.R_2_instances_name_rename, [name], {}, data, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceRename)) self.assertEqual(op.instance_name, name) self.assertEqual(op.new_name, new_name) self.assertEqual(op.ip_check, ip_check) self.assertEqual(op.name_check, name_check) self.assertFalse(hasattr(op, "dry_run")) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testDefaults(self): clfactory = _FakeClientFactory(_FakeClient) name = "instahchie3t" for new_name in ["thag9mek", "quees7oh"]: data = { "new_name": new_name, } handler = _CreateHandler(rlib2.R_2_instances_name_rename, [name], {}, data, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceRename)) self.assertEqual(op.instance_name, name) self.assertEqual(op.new_name, new_name) self.assertFalse(hasattr(op, "ip_check")) self.assertFalse(hasattr(op, "name_check")) self.assertFalse(hasattr(op, "dry_run")) self.assertFalse(hasattr(op, "force")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestParseModifyInstanceRequest(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) name = "instush8gah" test_disks = [ [], [(1, { constants.IDISK_MODE: constants.DISK_RDWR, })], ] for osparams in [{}, { "some": "value", "other": "Hello World", }]: for hvparams in [{}, { constants.HV_KERNEL_PATH: "/some/kernel", }]: for beparams in [{}, { constants.BE_MAXMEM: 128, }]: for force in [False, True]: for nics in [[], [(0, { constants.INIC_IP: "192.0.2.1", })]]: for disks in test_disks: for disk_template in constants.DISK_TEMPLATES: data = { "osparams": osparams, "hvparams": hvparams, "beparams": beparams, "nics": nics, "disks": disks, "force": force, "disk_template": disk_template, } handler = _CreateHandler(rlib2.R_2_instances_name_modify, [name], {}, data, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceSetParams)) self.assertEqual(op.instance_name, name) self.assertEqual(op.hvparams, hvparams) self.assertEqual(op.beparams, beparams) self.assertEqual(op.osparams, osparams) self.assertEqual(op.force, force) self.assertEqual(op.nics, nics) self.assertEqual(op.disks, disks) self.assertEqual(op.disk_template, disk_template) self.assertFalse(hasattr(op, "remote_node")) self.assertFalse(hasattr(op, "os_name")) self.assertFalse(hasattr(op, "force_variant")) self.assertFalse(hasattr(op, "dry_run")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testDefaults(self): clfactory = _FakeClientFactory(_FakeClient) name = "instir8aish31" handler = _CreateHandler(rlib2.R_2_instances_name_modify, [name], {}, {}, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceSetParams)) self.assertEqual(op.instance_name, name) for i in ["hvparams", "beparams", "osparams", "force", "nics", "disks", "disk_template", "remote_node", "os_name", "force_variant"]: self.assertFalse(hasattr(op, i)) class TestParseInstanceReinstallRequest(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) self.Parse = rlib2._ParseInstanceReinstallRequest def _Check(self, ops, name): expcls = [ opcodes.OpInstanceShutdown, opcodes.OpInstanceReinstall, opcodes.OpInstanceStartup, ] self.assert_(compat.all(isinstance(op, exp) for op, exp in zip(ops, expcls))) self.assert_(compat.all(op.instance_name == name for op in ops)) def test(self): name = "shoo0tihohma" ops = self.Parse(name, {"os": "sys1", "start": True,}) self.assertEqual(len(ops), 3) self._Check(ops, name) self.assertEqual(ops[1].os_type, "sys1") self.assertFalse(ops[1].osparams) ops = self.Parse(name, {"os": "sys2", "start": False,}) self.assertEqual(len(ops), 2) self._Check(ops, name) self.assertEqual(ops[1].os_type, "sys2") osparams = { "reformat": "1", } ops = self.Parse(name, {"os": "sys4035", "start": True, "osparams": osparams,}) self.assertEqual(len(ops), 3) self._Check(ops, name) self.assertEqual(ops[1].os_type, "sys4035") self.assertEqual(ops[1].osparams, osparams) def testDefaults(self): name = "noolee0g" ops = self.Parse(name, {"os": "linux1"}) self.assertEqual(len(ops), 3) self._Check(ops, name) self.assertEqual(ops[1].os_type, "linux1") self.assertFalse(ops[1].osparams) def testErrors(self): self.assertRaises(http.HttpBadRequest, self.Parse, "foo", "not a dictionary") class TestGroupRename(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) name = "group608242564" data = { "new_name": "ua0aiyoo15112", } handler = _CreateHandler(rlib2.R_2_groups_name_rename, [name], {}, data, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpGroupRename)) self.assertEqual(op.group_name, name) self.assertEqual(op.new_name, "ua0aiyoo15112") self.assertFalse(op.dry_run) self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testDryRun(self): clfactory = _FakeClientFactory(_FakeClient) name = "group28548" data = { "new_name": "ua0aiyoo", } handler = _CreateHandler(rlib2.R_2_groups_name_rename, [name], { "dry-run": ["1"], }, data, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpGroupRename)) self.assertEqual(op.group_name, name) self.assertEqual(op.new_name, "ua0aiyoo") self.assertTrue(op.dry_run) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestInstanceReplaceDisks(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) name = "inst22568" for disks in [range(1, 4), "1,2,3", "1, 2, 3"]: data = { "mode": constants.REPLACE_DISK_SEC, "disks": disks, "iallocator": "myalloc", } handler = _CreateHandler(rlib2.R_2_instances_name_replace_disks, [name], {}, data, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceReplaceDisks)) self.assertEqual(op.instance_name, name) self.assertEqual(op.mode, constants.REPLACE_DISK_SEC) self.assertEqual(op.disks, [1, 2, 3]) self.assertEqual(op.iallocator, "myalloc") self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testDefaults(self): clfactory = _FakeClientFactory(_FakeClient) name = "inst11413" data = { "mode": constants.REPLACE_DISK_AUTO, } handler = _CreateHandler(rlib2.R_2_instances_name_replace_disks, [name], {}, data, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpInstanceReplaceDisks)) self.assertEqual(op.instance_name, name) self.assertEqual(op.mode, constants.REPLACE_DISK_AUTO) self.assertFalse(hasattr(op, "iallocator")) self.assertFalse(hasattr(op, "disks")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testNoDisks(self): clfactory = _FakeClientFactory(_FakeClient) handler = _CreateHandler(rlib2.R_2_instances_name_replace_disks, ["inst20661"], {}, {}, clfactory) self.assertRaises(http.HttpBadRequest, handler.POST) for disks in [None, "", {}]: handler = _CreateHandler(rlib2.R_2_instances_name_replace_disks, ["inst20661"], {}, { "disks": disks, }, clfactory) self.assertRaises(http.HttpBadRequest, handler.POST) def testWrong(self): clfactory = _FakeClientFactory(_FakeClient) data = { "mode": constants.REPLACE_DISK_AUTO, "disks": "hello world", } handler = _CreateHandler(rlib2.R_2_instances_name_replace_disks, ["foo"], {}, data, clfactory) self.assertRaises(http.HttpBadRequest, handler.POST) class TestGroupModify(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) name = "group6002" for policy in constants.VALID_ALLOC_POLICIES: data = { "alloc_policy": policy, } handler = _CreateHandler(rlib2.R_2_groups_name_modify, [name], {}, data, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpGroupSetParams)) self.assertEqual(op.group_name, name) self.assertEqual(op.alloc_policy, policy) self.assertFalse(hasattr(op, "dry_run")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testUnknownPolicy(self): clfactory = _FakeClientFactory(_FakeClient) data = { "alloc_policy": "_unknown_policy_", } handler = _CreateHandler(rlib2.R_2_groups_name_modify, ["xyz"], {}, data, clfactory) self.assertRaises(http.HttpBadRequest, handler.PUT) self.assertRaises(IndexError, clfactory.GetNextClient) def testDefaults(self): clfactory = _FakeClientFactory(_FakeClient) name = "group6679" handler = _CreateHandler(rlib2.R_2_groups_name_modify, [name], {}, {}, clfactory) job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpGroupSetParams)) self.assertEqual(op.group_name, name) self.assertFalse(hasattr(op, "alloc_policy")) self.assertFalse(hasattr(op, "dry_run")) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestGroupAdd(unittest.TestCase): def test(self): name = "group3618" clfactory = _FakeClientFactory(_FakeClient) for policy in constants.VALID_ALLOC_POLICIES: data = { "group_name": name, "alloc_policy": policy, } handler = _CreateHandler(rlib2.R_2_groups, [], {}, data, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpGroupAdd)) self.assertEqual(op.group_name, name) self.assertEqual(op.alloc_policy, policy) self.assertFalse(op.dry_run) self.assertRaises(IndexError, cl.GetNextSubmittedJob) def testUnknownPolicy(self): clfactory = _FakeClientFactory(_FakeClient) data = { "alloc_policy": "_unknown_policy_", } handler = _CreateHandler(rlib2.R_2_groups, [], {}, data, clfactory) self.assertRaises(http.HttpBadRequest, handler.POST) self.assertRaises(IndexError, clfactory.GetNextClient) def testDefaults(self): clfactory = _FakeClientFactory(_FakeClient) name = "group15395" data = { "group_name": name, } handler = _CreateHandler(rlib2.R_2_groups, [], {}, data, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpGroupAdd)) self.assertEqual(op.group_name, name) self.assertFalse(hasattr(op, "alloc_policy")) self.assertFalse(op.dry_run) def testLegacyName(self): clfactory = _FakeClientFactory(_FakeClient) name = "group29852" data = { "name": name, } handler = _CreateHandler(rlib2.R_2_groups, [], { "dry-run": ["1"], }, data, clfactory) job_id = handler.POST() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpGroupAdd)) self.assertEqual(op.group_name, name) self.assertFalse(hasattr(op, "alloc_policy")) self.assertTrue(op.dry_run) class TestNodeRole(unittest.TestCase): def test(self): clfactory = _FakeClientFactory(_FakeClient) for role in rlib2._NR_MAP.values(): handler = _CreateHandler(rlib2.R_2_nodes_name_role, ["node-z"], {}, role, clfactory) if role == rlib2._NR_MASTER: self.assertRaises(http.HttpBadRequest, handler.PUT) else: job_id = handler.PUT() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) (exp_job_id, (op, )) = cl.GetNextSubmittedJob() self.assertEqual(job_id, exp_job_id) self.assertTrue(isinstance(op, opcodes.OpNodeSetParams)) self.assertEqual(op.node_name, "node-z") self.assertFalse(op.force) self.assertFalse(hasattr(op, "dry_run")) if role == rlib2._NR_REGULAR: self.assertFalse(op.drained) self.assertFalse(op.offline) self.assertFalse(op.master_candidate) elif role == rlib2._NR_MASTER_CANDIDATE: self.assertFalse(op.drained) self.assertFalse(op.offline) self.assertTrue(op.master_candidate) elif role == rlib2._NR_DRAINED: self.assertTrue(op.drained) self.assertFalse(op.offline) self.assertFalse(op.master_candidate) elif role == rlib2._NR_OFFLINE: self.assertFalse(op.drained) self.assertTrue(op.offline) self.assertFalse(op.master_candidate) else: self.fail("Unknown role '%s'" % role) self.assertRaises(IndexError, cl.GetNextSubmittedJob) class TestSimpleResources(unittest.TestCase): def setUp(self): self.clfactory = _FakeClientFactory(_FakeClient) def tearDown(self): self.assertRaises(IndexError, self.clfactory.GetNextClient) def testFeatures(self): handler = _CreateHandler(rlib2.R_2_features, [], {}, None, self.clfactory) self.assertEqual(set(handler.GET()), rlib2.ALL_FEATURES) def testEmpty(self): for cls in [rlib2.R_root, rlib2.R_2]: handler = _CreateHandler(cls, [], {}, None, self.clfactory) self.assertTrue(handler.GET() is None) def testVersion(self): handler = _CreateHandler(rlib2.R_version, [], {}, None, self.clfactory) self.assertEqual(handler.GET(), constants.RAPI_VERSION) class TestClusterInfo(unittest.TestCase): class _ClusterInfoClient: def __init__(self, address=None): self.cluster_info = None def QueryClusterInfo(self): assert self.cluster_info is None self.cluster_info = object() return self.cluster_info def test(self): clfactory = _FakeClientFactory(self._ClusterInfoClient) handler = _CreateHandler(rlib2.R_2_info, [], {}, None, clfactory) result = handler.GET() cl = clfactory.GetNextClient() self.assertRaises(IndexError, clfactory.GetNextClient) self.assertEqual(result, cl.cluster_info) class TestInstancesMultiAlloc(unittest.TestCase): def testInstanceUpdate(self): clfactory = _FakeClientFactory(_FakeClient) data = { "instances": [{ "instance_name": "bar", "mode": "create", }, { "instance_name": "foo", "mode": "create", }], } handler = _CreateHandler(rlib2.R_2_instances_multi_alloc, [], {}, data, clfactory) (body, _) = handler.GetPostOpInput() self.assertTrue(compat.all([inst["OP_ID"] == handler.POST_OPCODE.OP_ID for inst in body["instances"]])) class TestPermissions(unittest.TestCase): def testEquality(self): self.assertEqual(rlib2.R_2_query.GET_ACCESS, rlib2.R_2_query.PUT_ACCESS) self.assertEqual(rlib2.R_2_query.GET_ACCESS, rlib2.R_2_instances_name_console.GET_ACCESS) def testMethodAccess(self): for handler in connector.CONNECTOR.values(): for method in baserlib._SUPPORTED_METHODS: access = baserlib.GetHandlerAccess(handler, method) self.assertFalse(access is None) self.assertFalse(set(access) - rapi.RAPI_ACCESS_ALL, msg=("Handler '%s' uses unknown access options for" " method %s" % (handler, method))) self.assertTrue(rapi.RAPI_ACCESS_READ not in access or rapi.RAPI_ACCESS_WRITE in access, msg=("Handler '%s' gives query, but not write access" " for method %s (the latter includes query and" " should therefore be given as well)" % (handler, method))) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.utils.retry_unittest.py0000744000000000000000000001621612244641676022507 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.utils.retry""" import unittest from ganeti import constants from ganeti import errors from ganeti import utils import testutils class TestRetry(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) self.retries = 0 self.called = 0 self.time = 1379601882.0 self.time_for_time_fn = 0 self.time_for_retry_and_succeed = 0 def _time_fn(self): self.time += self.time_for_time_fn return self.time def _wait_fn(self, delay): self.time += delay @staticmethod def _RaiseRetryAgain(): raise utils.RetryAgain() @staticmethod def _RaiseRetryAgainWithArg(args): raise utils.RetryAgain(*args) def _WrongNestedLoop(self): return utils.Retry(self._RaiseRetryAgain, 0.01, 0.02) def _RetryAndSucceed(self, retries): self.time += self.time_for_retry_and_succeed if self.retries < retries: self.retries += 1 raise utils.RetryAgain() else: return True def _SimpleRetryAndSucceed(self, retries): self.called += 1 if self.retries < retries: self.retries += 1 return False else: return True def testRaiseTimeout(self): self.failUnlessRaises(utils.RetryTimeout, utils.Retry, self._RaiseRetryAgain, 0.01, 0.02, wait_fn = self._wait_fn, _time_fn = self._time_fn) self.failUnlessRaises(utils.RetryTimeout, utils.Retry, self._RetryAndSucceed, 0.01, 0, args=[1], wait_fn = self._wait_fn, _time_fn = self._time_fn) self.failUnlessEqual(self.retries, 1) def testComplete(self): self.failUnlessEqual(utils.Retry(lambda: True, 0, 1, wait_fn = self._wait_fn, _time_fn = self._time_fn), True) self.failUnlessEqual(utils.Retry(self._RetryAndSucceed, 0, 1, args=[2], wait_fn = self._wait_fn, _time_fn = self._time_fn), True) self.failUnlessEqual(self.retries, 2) def testCompleteNontrivialTimes(self): self.time_for_time_fn = 0.01 self.time_for_retry_and_succeed = 0.1 self.failUnlessEqual(utils.Retry(self._RetryAndSucceed, 0, 1, args=[2], wait_fn = self._wait_fn, _time_fn = self._time_fn), True) self.failUnlessEqual(self.retries, 2) def testNestedLoop(self): try: self.failUnlessRaises(errors.ProgrammerError, utils.Retry, self._WrongNestedLoop, 0, 1, wait_fn = self._wait_fn, _time_fn = self._time_fn) except utils.RetryTimeout: self.fail("Didn't detect inner loop's exception") def testTimeoutArgument(self): retry_arg="my_important_debugging_message" try: utils.Retry(self._RaiseRetryAgainWithArg, 0.01, 0.02, args=[[retry_arg]], wait_fn = self._wait_fn, _time_fn = self._time_fn) except utils.RetryTimeout, err: self.failUnlessEqual(err.args, (retry_arg, )) else: self.fail("Expected timeout didn't happen") def testTimeout(self): self.time_for_time_fn = 0.01 self.time_for_retry_and_succeed = 10 try: utils.Retry(self._RetryAndSucceed, 1, 18, args=[2], wait_fn = self._wait_fn, _time_fn = self._time_fn) except utils.RetryTimeout, err: self.failUnlessEqual(err.args, ()) else: self.fail("Expected timeout didn't happen") def testNoTimeout(self): self.time_for_time_fn = 0.01 self.time_for_retry_and_succeed = 8 self.failUnlessEqual( utils.Retry(self._RetryAndSucceed, 1, 18, args=[2], wait_fn = self._wait_fn, _time_fn = self._time_fn), True) def testRaiseInnerWithExc(self): retry_arg="my_important_debugging_message" try: try: utils.Retry(self._RaiseRetryAgainWithArg, 0.01, 0.02, args=[[errors.GenericError(retry_arg, retry_arg)]], wait_fn = self._wait_fn, _time_fn = self._time_fn) except utils.RetryTimeout, err: err.RaiseInner() else: self.fail("Expected timeout didn't happen") except errors.GenericError, err: self.failUnlessEqual(err.args, (retry_arg, retry_arg)) else: self.fail("Expected GenericError didn't happen") def testRaiseInnerWithMsg(self): retry_arg="my_important_debugging_message" try: try: utils.Retry(self._RaiseRetryAgainWithArg, 0.01, 0.02, args=[[retry_arg, retry_arg]], wait_fn = self._wait_fn, _time_fn = self._time_fn) except utils.RetryTimeout, err: err.RaiseInner() else: self.fail("Expected timeout didn't happen") except utils.RetryTimeout, err: self.failUnlessEqual(err.args, (retry_arg, retry_arg)) else: self.fail("Expected RetryTimeout didn't happen") def testSimpleRetry(self): self.assertFalse(utils.SimpleRetry(True, lambda: False, 0.01, 0.02, wait_fn = self._wait_fn, _time_fn = self._time_fn)) self.assertFalse(utils.SimpleRetry(lambda x: x, lambda: False, 0.01, 0.02, wait_fn = self._wait_fn, _time_fn = self._time_fn)) self.assertTrue(utils.SimpleRetry(True, lambda: True, 0, 1, wait_fn = self._wait_fn, _time_fn = self._time_fn)) self.assertTrue(utils.SimpleRetry(lambda x: x, lambda: True, 0, 1, wait_fn = self._wait_fn, _time_fn = self._time_fn)) self.assertTrue(utils.SimpleRetry(True, self._SimpleRetryAndSucceed, 0, 1, args=[1], wait_fn = self._wait_fn, _time_fn = self._time_fn)) self.assertEqual(self.retries, 1) self.assertEqual(self.called, 2) self.called = self.retries = 0 self.assertTrue(utils.SimpleRetry(True, self._SimpleRetryAndSucceed, 0, 1, args=[2], wait_fn = self._wait_fn, _time_fn = self._time_fn)) self.assertEqual(self.called, 3) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.objects_unittest.py0000744000000000000000000006575112271422343021630 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2008, 2010, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the objects module""" import copy import unittest from ganeti import constants from ganeti import objects from ganeti import errors import testutils class SimpleObject(objects.ConfigObject): __slots__ = ["a", "b"] class TestDictState(unittest.TestCase): """Simple dict tansformation tests""" def testSimpleObjectToDict(self): o1 = SimpleObject(a="1") self.assertEquals(o1.ToDict(), {"a": "1"}) self.assertEquals(o1.__getstate__(), {"a": "1"}) self.assertEquals(o1.__getstate__(), o1.ToDict()) o1.a = 2 o1.b = 5 self.assertEquals(o1.ToDict(), {"a": 2, "b": 5}) o2 = SimpleObject.FromDict(o1.ToDict()) self.assertEquals(o1.ToDict(), {"a": 2, "b": 5}) class TestClusterObject(unittest.TestCase): """Tests done on a L{objects.Cluster}""" def setUp(self): hvparams = { constants.HT_FAKE: { "foo": "bar", "bar": "foo", "foobar": "barfoo", }, } os_hvp = { "lenny-image": { constants.HT_FAKE: { "foo": "baz", "foobar": "foobar", "blah": "blibb", "blubb": "blah", }, constants.HT_XEN_PVM: { "root_path": "/dev/sda5", "foo": "foobar", }, }, "ubuntu-hardy": { }, } ndparams = { constants.ND_OOB_PROGRAM: "/bin/cluster-oob", constants.ND_SPINDLE_COUNT: 1, constants.ND_EXCLUSIVE_STORAGE: False, } self.fake_cl = objects.Cluster(hvparams=hvparams, os_hvp=os_hvp, ndparams=ndparams) self.fake_cl.UpgradeConfig() def testGetHVDefaults(self): cl = self.fake_cl self.failUnlessEqual(cl.GetHVDefaults(constants.HT_FAKE), cl.hvparams[constants.HT_FAKE]) self.failUnlessEqual(cl.GetHVDefaults(None), {}) defaults = cl.GetHVDefaults(constants.HT_XEN_PVM, os_name="lenny-image") for param, value in cl.os_hvp["lenny-image"][constants.HT_XEN_PVM].items(): self.assertEqual(value, defaults[param]) def testFillHvFullMerge(self): inst_hvparams = { "blah": "blubb", } fake_dict = constants.HVC_DEFAULTS[constants.HT_FAKE].copy() fake_dict.update({ "foo": "baz", "bar": "foo", "foobar": "foobar", "blah": "blubb", "blubb": "blah", }) fake_inst = objects.Instance(name="foobar", os="lenny-image", hypervisor=constants.HT_FAKE, hvparams=inst_hvparams) self.assertEqual(fake_dict, self.fake_cl.FillHV(fake_inst)) def testFillHvGlobalParams(self): fake_inst = objects.Instance(name="foobar", os="ubuntu-hardy", hypervisor=constants.HT_FAKE, hvparams={}) self.assertEqual(self.fake_cl.hvparams[constants.HT_FAKE], self.fake_cl.FillHV(fake_inst)) def testFillHvInstParams(self): inst_hvparams = { "blah": "blubb", } fake_inst = objects.Instance(name="foobar", os="ubuntu-hardy", hypervisor=constants.HT_XEN_PVM, hvparams=inst_hvparams) filled_conf = self.fake_cl.FillHV(fake_inst) for param, value in constants.HVC_DEFAULTS[constants.HT_XEN_PVM].items(): if param == "blah": value = "blubb" self.assertEqual(value, filled_conf[param]) def testFillHvDefaultParams(self): fake_inst = objects.Instance(name="foobar", os="ubuntu-hardy", hypervisor=constants.HT_XEN_PVM, hvparams={}) self.assertEqual(constants.HVC_DEFAULTS[constants.HT_XEN_PVM], self.fake_cl.FillHV(fake_inst)) def testFillHvPartialParams(self): os = "lenny-image" fake_inst = objects.Instance(name="foobar", os=os, hypervisor=constants.HT_XEN_PVM, hvparams={}) filled_conf = self.fake_cl.FillHV(fake_inst) for param, value in self.fake_cl.os_hvp[os][constants.HT_XEN_PVM].items(): self.assertEqual(value, filled_conf[param]) def testFillNdParamsCluster(self): fake_node = objects.Node(name="test", ndparams={}, group="testgroup") fake_group = objects.NodeGroup(name="testgroup", ndparams={}) self.assertEqual(self.fake_cl.ndparams, self.fake_cl.FillND(fake_node, fake_group)) def testFillNdParamsNodeGroup(self): fake_node = objects.Node(name="test", ndparams={}, group="testgroup") group_ndparams = { constants.ND_OOB_PROGRAM: "/bin/group-oob", constants.ND_SPINDLE_COUNT: 10, constants.ND_EXCLUSIVE_STORAGE: True, } fake_group = objects.NodeGroup(name="testgroup", ndparams=group_ndparams) self.assertEqual(group_ndparams, self.fake_cl.FillND(fake_node, fake_group)) def testFillNdParamsNode(self): node_ndparams = { constants.ND_OOB_PROGRAM: "/bin/node-oob", constants.ND_SPINDLE_COUNT: 2, constants.ND_EXCLUSIVE_STORAGE: True, } fake_node = objects.Node(name="test", ndparams=node_ndparams, group="testgroup") fake_group = objects.NodeGroup(name="testgroup", ndparams={}) self.assertEqual(node_ndparams, self.fake_cl.FillND(fake_node, fake_group)) def testFillNdParamsAll(self): node_ndparams = { constants.ND_OOB_PROGRAM: "/bin/node-oob", constants.ND_SPINDLE_COUNT: 5, constants.ND_EXCLUSIVE_STORAGE: True, } fake_node = objects.Node(name="test", ndparams=node_ndparams, group="testgroup") group_ndparams = { constants.ND_OOB_PROGRAM: "/bin/group-oob", constants.ND_SPINDLE_COUNT: 4, } fake_group = objects.NodeGroup(name="testgroup", ndparams=group_ndparams) self.assertEqual(node_ndparams, self.fake_cl.FillND(fake_node, fake_group)) def testPrimaryHypervisor(self): assert self.fake_cl.enabled_hypervisors is None self.fake_cl.enabled_hypervisors = [constants.HT_XEN_HVM] self.assertEqual(self.fake_cl.primary_hypervisor, constants.HT_XEN_HVM) self.fake_cl.enabled_hypervisors = [constants.HT_XEN_PVM, constants.HT_KVM] self.assertEqual(self.fake_cl.primary_hypervisor, constants.HT_XEN_PVM) self.fake_cl.enabled_hypervisors = sorted(constants.HYPER_TYPES) self.assertEqual(self.fake_cl.primary_hypervisor, constants.HT_CHROOT) def testUpgradeConfig(self): # FIXME: This test is incomplete cluster = objects.Cluster() cluster.UpgradeConfig() cluster = objects.Cluster(ipolicy={"unknown_key": None}) self.assertRaises(errors.ConfigurationError, cluster.UpgradeConfig) def testUpgradeEnabledDiskTemplates(self): cfg = objects.ConfigData() cfg.cluster = objects.Cluster() cfg.cluster.volume_group_name = "myvg" instance1 = objects.Instance() instance1.disk_template = constants.DT_DISKLESS instance2 = objects.Instance() instance2.disk_template = constants.DT_RBD cfg.instances = { "myinstance1": instance1, "myinstance2": instance2 } nodegroup = objects.NodeGroup() nodegroup.ipolicy = {} nodegroup.ipolicy[constants.IPOLICY_DTS] = [instance1.disk_template, \ constants.DT_BLOCK] cfg.cluster.ipolicy = {} cfg.cluster.ipolicy[constants.IPOLICY_DTS] = \ [constants.DT_EXT, constants.DT_DISKLESS] cfg.nodegroups = { "mynodegroup": nodegroup } cfg._UpgradeEnabledDiskTemplates() expected_disk_templates = [constants.DT_DRBD8, constants.DT_PLAIN, instance1.disk_template, instance2.disk_template] self.assertEqual(set(expected_disk_templates), set(cfg.cluster.enabled_disk_templates)) self.assertEqual(set([instance1.disk_template]), set(cfg.cluster.ipolicy[constants.IPOLICY_DTS])) class TestClusterObjectTcpUdpPortPool(unittest.TestCase): def testNewCluster(self): self.assertTrue(objects.Cluster().tcpudp_port_pool is None) def testSerializingEmpty(self): self.assertEqual(objects.Cluster().ToDict(), { "tcpudp_port_pool": [], }) def testSerializing(self): cluster = objects.Cluster.FromDict({}) self.assertEqual(cluster.tcpudp_port_pool, set()) cluster.tcpudp_port_pool.add(3546) cluster.tcpudp_port_pool.add(62511) data = cluster.ToDict() self.assertEqual(data.keys(), ["tcpudp_port_pool"]) self.assertEqual(sorted(data["tcpudp_port_pool"]), sorted([3546, 62511])) def testDeserializingEmpty(self): cluster = objects.Cluster.FromDict({}) self.assertEqual(cluster.tcpudp_port_pool, set()) def testDeserialize(self): cluster = objects.Cluster.FromDict({ "tcpudp_port_pool": [26214, 10039, 267], }) self.assertEqual(cluster.tcpudp_port_pool, set([26214, 10039, 267])) class TestOS(unittest.TestCase): ALL_DATA = [ "debootstrap", "debootstrap+default", "debootstrap++default", ] def testSplitNameVariant(self): for name in self.ALL_DATA: self.assertEqual(len(objects.OS.SplitNameVariant(name)), 2) def testVariant(self): self.assertEqual(objects.OS.GetVariant("debootstrap"), "") self.assertEqual(objects.OS.GetVariant("debootstrap+default"), "default") class TestInstance(unittest.TestCase): def _GenericCheck(self, inst): for i in [inst.all_nodes, inst.secondary_nodes]: self.assertTrue(isinstance(inst.all_nodes, (list, tuple)), msg="Data type doesn't guarantee order") self.assertTrue(inst.primary_node not in inst.secondary_nodes) self.assertEqual(inst.all_nodes[0], inst.primary_node, msg="Primary node not first node in list") def testNodesNoDisks(self): inst = objects.Instance(name="fakeinst.example.com", primary_node="pnode.example.com", disks=[ ]) self._GenericCheck(inst) self.assertEqual(len(inst.secondary_nodes), 0) self.assertEqual(set(inst.all_nodes), set([inst.primary_node])) self.assertEqual(inst.MapLVsByNode(), { inst.primary_node: [], }) def testNodesPlainDisks(self): inst = objects.Instance(name="fakeinstplain.example.com", primary_node="node3.example.com", disks=[ objects.Disk(dev_type=constants.DT_PLAIN, size=128, logical_id=("myxenvg", "disk25494")), objects.Disk(dev_type=constants.DT_PLAIN, size=512, logical_id=("myxenvg", "disk29071")), ]) self._GenericCheck(inst) self.assertEqual(len(inst.secondary_nodes), 0) self.assertEqual(set(inst.all_nodes), set([inst.primary_node])) self.assertEqual(inst.MapLVsByNode(), { inst.primary_node: ["myxenvg/disk25494", "myxenvg/disk29071"], }) def testNodesDrbdDisks(self): inst = objects.Instance(name="fakeinstdrbd.example.com", primary_node="node10.example.com", disks=[ objects.Disk(dev_type=constants.DT_DRBD8, size=786432, logical_id=("node10.example.com", "node15.example.com", 12300, 0, 0, "secret"), children=[ objects.Disk(dev_type=constants.DT_PLAIN, size=786432, logical_id=("myxenvg", "disk0")), objects.Disk(dev_type=constants.DT_PLAIN, size=128, logical_id=("myxenvg", "meta0")) ], iv_name="disk/0") ]) self._GenericCheck(inst) self.assertEqual(set(inst.secondary_nodes), set(["node15.example.com"])) self.assertEqual(set(inst.all_nodes), set([inst.primary_node, "node15.example.com"])) self.assertEqual(inst.MapLVsByNode(), { inst.primary_node: ["myxenvg/disk0", "myxenvg/meta0"], "node15.example.com": ["myxenvg/disk0", "myxenvg/meta0"], }) self.assertEqual(inst.FindDisk(0), inst.disks[0]) self.assertRaises(errors.OpPrereqError, inst.FindDisk, "hello") self.assertRaises(errors.OpPrereqError, inst.FindDisk, 100) self.assertRaises(errors.OpPrereqError, inst.FindDisk, 1) class TestNode(unittest.TestCase): def testEmpty(self): self.assertEqual(objects.Node().ToDict(), {}) self.assertTrue(isinstance(objects.Node.FromDict({}), objects.Node)) def testHvState(self): node = objects.Node(name="node18157.example.com", hv_state={ constants.HT_XEN_HVM: objects.NodeHvState(cpu_total=64), constants.HT_KVM: objects.NodeHvState(cpu_node=1), }) node2 = objects.Node.FromDict(node.ToDict()) # Make sure nothing can reference it anymore del node self.assertEqual(node2.name, "node18157.example.com") self.assertEqual(frozenset(node2.hv_state), frozenset([ constants.HT_XEN_HVM, constants.HT_KVM, ])) self.assertEqual(node2.hv_state[constants.HT_KVM].cpu_node, 1) self.assertEqual(node2.hv_state[constants.HT_XEN_HVM].cpu_total, 64) def testDiskState(self): node = objects.Node(name="node32087.example.com", disk_state={ constants.DT_PLAIN: { "lv32352": objects.NodeDiskState(total=128), "lv2082": objects.NodeDiskState(total=512), }, }) node2 = objects.Node.FromDict(node.ToDict()) # Make sure nothing can reference it anymore del node self.assertEqual(node2.name, "node32087.example.com") self.assertEqual(frozenset(node2.disk_state), frozenset([ constants.DT_PLAIN, ])) self.assertEqual(frozenset(node2.disk_state[constants.DT_PLAIN]), frozenset(["lv32352", "lv2082"])) self.assertEqual(node2.disk_state[constants.DT_PLAIN]["lv2082"].total, 512) self.assertEqual(node2.disk_state[constants.DT_PLAIN]["lv32352"].total, 128) def testFilterEsNdp(self): node1 = objects.Node(name="node11673.example.com", ndparams={ constants.ND_EXCLUSIVE_STORAGE: True, }) node2 = objects.Node(name="node11674.example.com", ndparams={ constants.ND_SPINDLE_COUNT: 3, constants.ND_EXCLUSIVE_STORAGE: False, }) self.assertTrue(constants.ND_EXCLUSIVE_STORAGE in node1.ndparams) node1.UpgradeConfig() self.assertFalse(constants.ND_EXCLUSIVE_STORAGE in node1.ndparams) self.assertTrue(constants.ND_EXCLUSIVE_STORAGE in node2.ndparams) self.assertTrue(constants.ND_SPINDLE_COUNT in node2.ndparams) node2.UpgradeConfig() self.assertFalse(constants.ND_EXCLUSIVE_STORAGE in node2.ndparams) self.assertTrue(constants.ND_SPINDLE_COUNT in node2.ndparams) class TestInstancePolicy(unittest.TestCase): def setUp(self): # Policies are big, and we want to see the difference in case of an error self.maxDiff = None def _AssertIPolicyIsFull(self, policy): self.assertEqual(frozenset(policy.keys()), constants.IPOLICY_ALL_KEYS) self.assertTrue(len(policy[constants.ISPECS_MINMAX]) > 0) for minmax in policy[constants.ISPECS_MINMAX]: self.assertEqual(frozenset(minmax.keys()), constants.ISPECS_MINMAX_KEYS) for key in constants.ISPECS_MINMAX_KEYS: self.assertEqual(frozenset(minmax[key].keys()), constants.ISPECS_PARAMETERS) self.assertEqual(frozenset(policy[constants.ISPECS_STD].keys()), constants.ISPECS_PARAMETERS) def testDefaultIPolicy(self): objects.InstancePolicy.CheckParameterSyntax(constants.IPOLICY_DEFAULTS, True) self._AssertIPolicyIsFull(constants.IPOLICY_DEFAULTS) def _AssertPolicyIsBad(self, ipolicy, do_check_std=None): if do_check_std is None: check_std_vals = [False, True] else: check_std_vals = [do_check_std] for check_std in check_std_vals: self.assertRaises(errors.ConfigurationError, objects.InstancePolicy.CheckISpecSyntax, ipolicy, check_std) def testCheckISpecSyntax(self): default_stdspec = constants.IPOLICY_DEFAULTS[constants.ISPECS_STD] incomplete_ipolicies = [ { constants.ISPECS_MINMAX: [], constants.ISPECS_STD: default_stdspec, }, { constants.ISPECS_MINMAX: [{}], constants.ISPECS_STD: default_stdspec, }, { constants.ISPECS_MINMAX: [{ constants.ISPECS_MIN: NotImplemented, }], constants.ISPECS_STD: default_stdspec, }, { constants.ISPECS_MINMAX: [{ constants.ISPECS_MAX: NotImplemented, }], constants.ISPECS_STD: default_stdspec, }, { constants.ISPECS_MINMAX: [{ constants.ISPECS_MIN: NotImplemented, constants.ISPECS_MAX: NotImplemented, }], }, ] for ipol in incomplete_ipolicies: self.assertRaises(errors.ConfigurationError, objects.InstancePolicy.CheckISpecSyntax, ipol, True) oldminmax = ipol[constants.ISPECS_MINMAX] if oldminmax: # Prepending valid specs shouldn't change the error ipol[constants.ISPECS_MINMAX] = ([constants.ISPECS_MINMAX_DEFAULTS] + oldminmax) self.assertRaises(errors.ConfigurationError, objects.InstancePolicy.CheckISpecSyntax, ipol, True) good_ipolicy = { constants.ISPECS_MINMAX: [ { constants.ISPECS_MIN: { constants.ISPEC_MEM_SIZE: 64, constants.ISPEC_CPU_COUNT: 1, constants.ISPEC_DISK_COUNT: 2, constants.ISPEC_DISK_SIZE: 64, constants.ISPEC_NIC_COUNT: 1, constants.ISPEC_SPINDLE_USE: 1, }, constants.ISPECS_MAX: { constants.ISPEC_MEM_SIZE: 16384, constants.ISPEC_CPU_COUNT: 5, constants.ISPEC_DISK_COUNT: 12, constants.ISPEC_DISK_SIZE: 1024, constants.ISPEC_NIC_COUNT: 9, constants.ISPEC_SPINDLE_USE: 18, }, }, { constants.ISPECS_MIN: { constants.ISPEC_MEM_SIZE: 32768, constants.ISPEC_CPU_COUNT: 8, constants.ISPEC_DISK_COUNT: 1, constants.ISPEC_DISK_SIZE: 1024, constants.ISPEC_NIC_COUNT: 1, constants.ISPEC_SPINDLE_USE: 1, }, constants.ISPECS_MAX: { constants.ISPEC_MEM_SIZE: 65536, constants.ISPEC_CPU_COUNT: 10, constants.ISPEC_DISK_COUNT: 5, constants.ISPEC_DISK_SIZE: 1024 * 1024, constants.ISPEC_NIC_COUNT: 3, constants.ISPEC_SPINDLE_USE: 12, }, }, ], } good_ipolicy[constants.ISPECS_STD] = copy.deepcopy( good_ipolicy[constants.ISPECS_MINMAX][0][constants.ISPECS_MAX]) # Check that it's really good before making it bad objects.InstancePolicy.CheckISpecSyntax(good_ipolicy, True) bad_ipolicy = copy.deepcopy(good_ipolicy) for minmax in bad_ipolicy[constants.ISPECS_MINMAX]: for (key, spec) in minmax.items(): for param in spec: oldv = spec[param] del spec[param] self._AssertPolicyIsBad(bad_ipolicy) if key == constants.ISPECS_MIN: spec[param] = minmax[constants.ISPECS_MAX][param] + 1 self._AssertPolicyIsBad(bad_ipolicy) spec[param] = oldv assert bad_ipolicy == good_ipolicy stdspec = bad_ipolicy[constants.ISPECS_STD] for param in stdspec: oldv = stdspec[param] del stdspec[param] self._AssertPolicyIsBad(bad_ipolicy, True) # Note that std spec is the same as a max spec stdspec[param] = oldv + 1 self._AssertPolicyIsBad(bad_ipolicy, True) stdspec[param] = oldv assert bad_ipolicy == good_ipolicy for minmax in good_ipolicy[constants.ISPECS_MINMAX]: for spec in minmax.values(): good_ipolicy[constants.ISPECS_STD] = spec objects.InstancePolicy.CheckISpecSyntax(good_ipolicy, True) def testCheckISpecParamSyntax(self): par = "my_parameter" for check_std in [True, False]: # Min and max only good_values = [(11, 11), (11, 40), (0, 0)] for (mn, mx) in good_values: minmax = dict((k, {}) for k in constants.ISPECS_MINMAX_KEYS) minmax[constants.ISPECS_MIN][par] = mn minmax[constants.ISPECS_MAX][par] = mx objects.InstancePolicy._CheckISpecParamSyntax(minmax, {}, par, check_std) minmax = dict((k, {}) for k in constants.ISPECS_MINMAX_KEYS) minmax[constants.ISPECS_MIN][par] = 11 minmax[constants.ISPECS_MAX][par] = 5 self.assertRaises(errors.ConfigurationError, objects.InstancePolicy._CheckISpecParamSyntax, minmax, {}, par, check_std) # Min, std, max good_values = [ (11, 11, 11), (11, 11, 40), (11, 40, 40), ] for (mn, st, mx) in good_values: minmax = { constants.ISPECS_MIN: {par: mn}, constants.ISPECS_MAX: {par: mx}, } stdspec = {par: st} objects.InstancePolicy._CheckISpecParamSyntax(minmax, stdspec, par, True) bad_values = [ (11, 11, 5, True), (40, 11, 11, True), (11, 80, 40, False), (11, 5, 40, False,), (11, 5, 5, True), (40, 40, 11, True), ] for (mn, st, mx, excp) in bad_values: minmax = { constants.ISPECS_MIN: {par: mn}, constants.ISPECS_MAX: {par: mx}, } stdspec = {par: st} if excp: self.assertRaises(errors.ConfigurationError, objects.InstancePolicy._CheckISpecParamSyntax, minmax, stdspec, par, True) else: ret = objects.InstancePolicy._CheckISpecParamSyntax(minmax, stdspec, par, True) self.assertFalse(ret) def testCheckDiskTemplates(self): invalid = "this_is_not_a_good_template" for dt in constants.DISK_TEMPLATES: objects.InstancePolicy.CheckDiskTemplates([dt]) objects.InstancePolicy.CheckDiskTemplates(list(constants.DISK_TEMPLATES)) bad_examples = [ [invalid], [constants.DT_DRBD8, invalid], list(constants.DISK_TEMPLATES) + [invalid], [], None, ] for dtl in bad_examples: self.assertRaises(errors.ConfigurationError, objects.InstancePolicy.CheckDiskTemplates, dtl) def testCheckParameterSyntax(self): invalid = "this_key_shouldnt_be_here" for check_std in [True, False]: objects.InstancePolicy.CheckParameterSyntax({}, check_std) policy = {invalid: None} self.assertRaises(errors.ConfigurationError, objects.InstancePolicy.CheckParameterSyntax, policy, check_std) for par in constants.IPOLICY_PARAMETERS: for val in ("blah", None, {}, [42]): policy = {par: val} self.assertRaises(errors.ConfigurationError, objects.InstancePolicy.CheckParameterSyntax, policy, check_std) def testFillIPolicyEmpty(self): policy = objects.FillIPolicy(constants.IPOLICY_DEFAULTS, {}) objects.InstancePolicy.CheckParameterSyntax(policy, True) self.assertEqual(policy, constants.IPOLICY_DEFAULTS) def _AssertISpecsMerged(self, default_spec, diff_spec, merged_spec): for (param, value) in merged_spec.items(): if param in diff_spec: self.assertEqual(value, diff_spec[param]) else: self.assertEqual(value, default_spec[param]) def _AssertIPolicyMerged(self, default_pol, diff_pol, merged_pol): for (key, value) in merged_pol.items(): if key in diff_pol: if key == constants.ISPECS_STD: self._AssertISpecsMerged(default_pol[key], diff_pol[key], value) else: self.assertEqual(value, diff_pol[key]) else: self.assertEqual(value, default_pol[key]) def testFillIPolicy(self): partial_policies = [ {constants.IPOLICY_VCPU_RATIO: 3.14}, {constants.IPOLICY_SPINDLE_RATIO: 2.72}, {constants.IPOLICY_DTS: [constants.DT_FILE]}, {constants.ISPECS_STD: {constants.ISPEC_DISK_COUNT: 3}}, {constants.ISPECS_MINMAX: [constants.ISPECS_MINMAX_DEFAULTS, constants.ISPECS_MINMAX_DEFAULTS]} ] for diff_pol in partial_policies: policy = objects.FillIPolicy(constants.IPOLICY_DEFAULTS, diff_pol) objects.InstancePolicy.CheckParameterSyntax(policy, True) self._AssertIPolicyIsFull(policy) self._AssertIPolicyMerged(constants.IPOLICY_DEFAULTS, diff_pol, policy) def testFillIPolicyKeepsUnknown(self): INVALID_KEY = "invalid_ipolicy_key" diff_pol = { INVALID_KEY: None, } policy = objects.FillIPolicy(constants.IPOLICY_DEFAULTS, diff_pol) self.assertTrue(INVALID_KEY in policy) class TestDisk(unittest.TestCase): def addChild(self, disk): """Adds a child of the same device type as the parent.""" disk.children = [] child = objects.Disk() child.dev_type = disk.dev_type disk.children.append(child) def testUpgradeConfigDevTypeLegacy(self): for old, new in [("drbd8", constants.DT_DRBD8), ("lvm", constants.DT_PLAIN)]: disk = objects.Disk() disk.dev_type = old self.addChild(disk) disk.UpgradeConfig() self.assertEqual(new, disk.dev_type) self.assertEqual(new, disk.children[0].dev_type) def testUpgradeConfigDevTypeLegacyUnchanged(self): dev_types = [constants.DT_FILE, constants.DT_SHARED_FILE, constants.DT_BLOCK, constants.DT_EXT, constants.DT_RBD] for dev_type in dev_types: disk = objects.Disk() disk.dev_type = dev_type self.addChild(disk) disk.UpgradeConfig() self.assertEqual(dev_type, disk.dev_type) self.assertEqual(dev_type, disk.children[0].dev_type) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.netutils_unittest.py0000744000000000000000000004530612271422343022040 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the netutils module""" import os import re import shutil import socket import tempfile import unittest import testutils from ganeti import constants from ganeti import errors from ganeti import netutils from ganeti import serializer from ganeti import utils def _GetSocketCredentials(path): """Connect to a Unix socket and return remote credentials. """ sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) try: sock.settimeout(10) sock.connect(path) return netutils.GetSocketCredentials(sock) finally: sock.close() class TestGetSocketCredentials(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() self.sockpath = utils.PathJoin(self.tmpdir, "sock") self.listener = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) self.listener.settimeout(10) self.listener.bind(self.sockpath) self.listener.listen(1) def tearDown(self): self.listener.shutdown(socket.SHUT_RDWR) self.listener.close() shutil.rmtree(self.tmpdir) def test(self): (c2pr, c2pw) = os.pipe() # Start child process child = os.fork() if child == 0: try: data = serializer.DumpJson(_GetSocketCredentials(self.sockpath)) os.write(c2pw, data) os.close(c2pw) os._exit(0) finally: os._exit(1) os.close(c2pw) # Wait for one connection (conn, _) = self.listener.accept() conn.recv(1) conn.close() # Wait for result result = os.read(c2pr, 4096) os.close(c2pr) # Check child's exit code (_, status) = os.waitpid(child, 0) self.assertFalse(os.WIFSIGNALED(status)) self.assertEqual(os.WEXITSTATUS(status), 0) # Check result (pid, uid, gid) = serializer.LoadJson(result) self.assertEqual(pid, os.getpid()) self.assertEqual(uid, os.getuid()) self.assertEqual(gid, os.getgid()) class TestHostname(unittest.TestCase): """Testing case for Hostname""" def testUppercase(self): data = "AbC.example.com" self.assertEqual(netutils.Hostname.GetNormalizedName(data), data.lower()) def testTooLongName(self): data = "a.b." + "c" * 255 self.assertRaises(errors.OpPrereqError, netutils.Hostname.GetNormalizedName, data) def testTrailingDot(self): data = "a.b.c" self.assertEqual(netutils.Hostname.GetNormalizedName(data + "."), data) def testInvalidName(self): data = [ "a b", "a/b", ".a.b", "a..b", ] for value in data: self.assertRaises(errors.OpPrereqError, netutils.Hostname.GetNormalizedName, value) def testValidName(self): data = [ "a.b", "a-b", "a_b", "a.b.c", ] for value in data: self.assertEqual(netutils.Hostname.GetNormalizedName(value), value) class TestIPAddress(unittest.TestCase): def testIsValid(self): self.assert_(netutils.IPAddress.IsValid("0.0.0.0")) self.assert_(netutils.IPAddress.IsValid("127.0.0.1")) self.assert_(netutils.IPAddress.IsValid("::")) self.assert_(netutils.IPAddress.IsValid("::1")) def testNotIsValid(self): self.assertFalse(netutils.IPAddress.IsValid("0")) self.assertFalse(netutils.IPAddress.IsValid("1.1.1.256")) self.assertFalse(netutils.IPAddress.IsValid("a:g::1")) def testGetAddressFamily(self): fn = netutils.IPAddress.GetAddressFamily self.assertEqual(fn("127.0.0.1"), socket.AF_INET) self.assertEqual(fn("10.2.0.127"), socket.AF_INET) self.assertEqual(fn("::1"), socket.AF_INET6) self.assertEqual(fn("2001:db8::1"), socket.AF_INET6) self.assertRaises(errors.IPAddressError, fn, "0") def testValidateNetmask(self): for netmask in [0, 33]: self.assertFalse(netutils.IP4Address.ValidateNetmask(netmask)) for netmask in [1, 32]: self.assertTrue(netutils.IP4Address.ValidateNetmask(netmask)) for netmask in [0, 129]: self.assertFalse(netutils.IP6Address.ValidateNetmask(netmask)) for netmask in [1, 128]: self.assertTrue(netutils.IP6Address.ValidateNetmask(netmask)) def testGetClassFromX(self): self.assert_( netutils.IPAddress.GetClassFromIpVersion(constants.IP4_VERSION) == netutils.IP4Address) self.assert_( netutils.IPAddress.GetClassFromIpVersion(constants.IP6_VERSION) == netutils.IP6Address) self.assert_( netutils.IPAddress.GetClassFromIpFamily(socket.AF_INET) == netutils.IP4Address) self.assert_( netutils.IPAddress.GetClassFromIpFamily(socket.AF_INET6) == netutils.IP6Address) def testOwnLoopback(self): # FIXME: In a pure IPv6 environment this is no longer true self.assert_(netutils.IPAddress.Own("127.0.0.1"), "Should own 127.0.0.1 address") def testNotOwnAddress(self): self.assertFalse(netutils.IPAddress.Own("2001:db8::1"), "Should not own IP address 2001:db8::1") self.assertFalse(netutils.IPAddress.Own("192.0.2.1"), "Should not own IP address 192.0.2.1") def testFamilyVersionConversions(self): # IPAddress.GetAddressFamilyFromVersion self.assertEqual( netutils.IPAddress.GetAddressFamilyFromVersion(constants.IP4_VERSION), socket.AF_INET) self.assertEqual( netutils.IPAddress.GetAddressFamilyFromVersion(constants.IP6_VERSION), socket.AF_INET6) self.assertRaises(errors.ProgrammerError, netutils.IPAddress.GetAddressFamilyFromVersion, 3) # IPAddress.GetVersionFromAddressFamily self.assertEqual( netutils.IPAddress.GetVersionFromAddressFamily(socket.AF_INET), constants.IP4_VERSION) self.assertEqual( netutils.IPAddress.GetVersionFromAddressFamily(socket.AF_INET6), constants.IP6_VERSION) self.assertRaises(errors.ProgrammerError, netutils.IPAddress.GetVersionFromAddressFamily, socket.AF_UNIX) class TestIP4Address(unittest.TestCase): def testGetIPIntFromString(self): fn = netutils.IP4Address._GetIPIntFromString self.assertEqual(fn("0.0.0.0"), 0) self.assertEqual(fn("0.0.0.1"), 1) self.assertEqual(fn("127.0.0.1"), 2130706433) self.assertEqual(fn("192.0.2.129"), 3221226113) self.assertEqual(fn("255.255.255.255"), 2**32 - 1) self.assertNotEqual(fn("0.0.0.0"), 1) self.assertNotEqual(fn("0.0.0.0"), 1) def testIsValid(self): self.assert_(netutils.IP4Address.IsValid("0.0.0.0")) self.assert_(netutils.IP4Address.IsValid("127.0.0.1")) self.assert_(netutils.IP4Address.IsValid("192.0.2.199")) self.assert_(netutils.IP4Address.IsValid("255.255.255.255")) def testNotIsValid(self): self.assertFalse(netutils.IP4Address.IsValid("0")) self.assertFalse(netutils.IP4Address.IsValid("1")) self.assertFalse(netutils.IP4Address.IsValid("1.1.1")) self.assertFalse(netutils.IP4Address.IsValid("255.255.255.256")) self.assertFalse(netutils.IP4Address.IsValid("::1")) def testInNetwork(self): self.assert_(netutils.IP4Address.InNetwork("127.0.0.0/8", "127.0.0.1")) def testNotInNetwork(self): self.assertFalse(netutils.IP4Address.InNetwork("192.0.2.0/24", "127.0.0.1")) def testIsLoopback(self): self.assert_(netutils.IP4Address.IsLoopback("127.0.0.1")) def testNotIsLoopback(self): self.assertFalse(netutils.IP4Address.IsLoopback("192.0.2.1")) class TestIP6Address(unittest.TestCase): def testGetIPIntFromString(self): fn = netutils.IP6Address._GetIPIntFromString self.assertEqual(fn("::"), 0) self.assertEqual(fn("::1"), 1) self.assertEqual(fn("2001:db8::1"), 42540766411282592856903984951653826561L) self.assertEqual(fn("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"), 2**128-1) self.assertNotEqual(netutils.IP6Address._GetIPIntFromString("::2"), 1) def testIsValid(self): self.assert_(netutils.IP6Address.IsValid("::")) self.assert_(netutils.IP6Address.IsValid("::1")) self.assert_(netutils.IP6Address.IsValid("1" + (":1" * 7))) self.assert_(netutils.IP6Address.IsValid("ffff" + (":ffff" * 7))) self.assert_(netutils.IP6Address.IsValid("::")) def testNotIsValid(self): self.assertFalse(netutils.IP6Address.IsValid("0")) self.assertFalse(netutils.IP6Address.IsValid(":1")) self.assertFalse(netutils.IP6Address.IsValid("f" + (":f" * 6))) self.assertFalse(netutils.IP6Address.IsValid("fffg" + (":ffff" * 7))) self.assertFalse(netutils.IP6Address.IsValid("fffff" + (":ffff" * 7))) self.assertFalse(netutils.IP6Address.IsValid("1" + (":1" * 8))) self.assertFalse(netutils.IP6Address.IsValid("127.0.0.1")) def testInNetwork(self): self.assert_(netutils.IP6Address.InNetwork("::1/128", "::1")) def testNotInNetwork(self): self.assertFalse(netutils.IP6Address.InNetwork("2001:db8::1/128", "::1")) def testIsLoopback(self): self.assert_(netutils.IP6Address.IsLoopback("::1")) def testNotIsLoopback(self): self.assertFalse(netutils.IP6Address.IsLoopback("2001:db8::1")) class _BaseTcpPingTest: """Base class for TcpPing tests against listen(2)ing port""" family = None address = None def setUp(self): self.listener = socket.socket(self.family, socket.SOCK_STREAM) self.listener.bind((self.address, 0)) self.listenerport = self.listener.getsockname()[1] self.listener.listen(1) def tearDown(self): self.listener.shutdown(socket.SHUT_RDWR) del self.listener del self.listenerport def testTcpPingToLocalHostAccept(self): self.assert_(netutils.TcpPing(self.address, self.listenerport, timeout=constants.TCP_PING_TIMEOUT, live_port_needed=True, source=self.address, ), "failed to connect to test listener") self.assert_(netutils.TcpPing(self.address, self.listenerport, timeout=constants.TCP_PING_TIMEOUT, live_port_needed=True), "failed to connect to test listener (no source)") class TestIP4TcpPing(unittest.TestCase, _BaseTcpPingTest): """Testcase for IPv4 TCP version of ping - against listen(2)ing port""" family = socket.AF_INET address = constants.IP4_ADDRESS_LOCALHOST def setUp(self): unittest.TestCase.setUp(self) _BaseTcpPingTest.setUp(self) def tearDown(self): unittest.TestCase.tearDown(self) _BaseTcpPingTest.tearDown(self) class TestIP6TcpPing(unittest.TestCase, _BaseTcpPingTest): """Testcase for IPv6 TCP version of ping - against listen(2)ing port""" family = socket.AF_INET6 address = constants.IP6_ADDRESS_LOCALHOST def setUp(self): unittest.TestCase.setUp(self) _BaseTcpPingTest.setUp(self) def tearDown(self): unittest.TestCase.tearDown(self) _BaseTcpPingTest.tearDown(self) class _BaseTcpPingDeafTest: """Base class for TcpPing tests against non listen(2)ing port""" family = None address = None def setUp(self): self.deaflistener = socket.socket(self.family, socket.SOCK_STREAM) self.deaflistener.bind((self.address, 0)) self.deaflistenerport = self.deaflistener.getsockname()[1] def tearDown(self): del self.deaflistener del self.deaflistenerport def testTcpPingToLocalHostAcceptDeaf(self): self.assertFalse(netutils.TcpPing(self.address, self.deaflistenerport, timeout=constants.TCP_PING_TIMEOUT, live_port_needed=True, source=self.address, ), # need successful connect(2) "successfully connected to deaf listener") self.assertFalse(netutils.TcpPing(self.address, self.deaflistenerport, timeout=constants.TCP_PING_TIMEOUT, live_port_needed=True, ), # need successful connect(2) "successfully connected to deaf listener (no source)") def testTcpPingToLocalHostNoAccept(self): self.assert_(netutils.TcpPing(self.address, self.deaflistenerport, timeout=constants.TCP_PING_TIMEOUT, live_port_needed=False, source=self.address, ), # ECONNREFUSED is OK "failed to ping alive host on deaf port") self.assert_(netutils.TcpPing(self.address, self.deaflistenerport, timeout=constants.TCP_PING_TIMEOUT, live_port_needed=False, ), # ECONNREFUSED is OK "failed to ping alive host on deaf port (no source)") class TestIP4TcpPingDeaf(unittest.TestCase, _BaseTcpPingDeafTest): """Testcase for IPv4 TCP version of ping - against non listen(2)ing port""" family = socket.AF_INET address = constants.IP4_ADDRESS_LOCALHOST def setUp(self): self.deaflistener = socket.socket(self.family, socket.SOCK_STREAM) self.deaflistener.bind((self.address, 0)) self.deaflistenerport = self.deaflistener.getsockname()[1] def tearDown(self): del self.deaflistener del self.deaflistenerport class TestIP6TcpPingDeaf(unittest.TestCase, _BaseTcpPingDeafTest): """Testcase for IPv6 TCP version of ping - against non listen(2)ing port""" family = socket.AF_INET6 address = constants.IP6_ADDRESS_LOCALHOST def setUp(self): unittest.TestCase.setUp(self) _BaseTcpPingDeafTest.setUp(self) def tearDown(self): unittest.TestCase.tearDown(self) _BaseTcpPingDeafTest.tearDown(self) class TestFormatAddress(unittest.TestCase): """Testcase for FormatAddress""" def testFormatAddressUnixSocket(self): res1 = netutils.FormatAddress(("12352", 0, 0), family=socket.AF_UNIX) self.assertEqual(res1, "pid=12352, uid=0, gid=0") def testFormatAddressIP4(self): res1 = netutils.FormatAddress(("127.0.0.1", 1234), family=socket.AF_INET) self.assertEqual(res1, "127.0.0.1:1234") res2 = netutils.FormatAddress(("192.0.2.32", None), family=socket.AF_INET) self.assertEqual(res2, "192.0.2.32") def testFormatAddressIP6(self): res1 = netutils.FormatAddress(("::1", 1234), family=socket.AF_INET6) self.assertEqual(res1, "[::1]:1234") res2 = netutils.FormatAddress(("::1", None), family=socket.AF_INET6) self.assertEqual(res2, "[::1]") res2 = netutils.FormatAddress(("2001:db8::beef", "80"), family=socket.AF_INET6) self.assertEqual(res2, "[2001:db8::beef]:80") def testFormatAddressWithoutFamily(self): res1 = netutils.FormatAddress(("127.0.0.1", 1234)) self.assertEqual(res1, "127.0.0.1:1234") res2 = netutils.FormatAddress(("::1", 1234)) self.assertEqual(res2, "[::1]:1234") def testInvalidFormatAddress(self): self.assertRaises(errors.ParameterError, netutils.FormatAddress, "127.0.0.1") self.assertRaises(errors.ParameterError, netutils.FormatAddress, "127.0.0.1", family=socket.AF_INET) self.assertRaises(errors.ParameterError, netutils.FormatAddress, ("::1"), family=socket.AF_INET ) class TestIpParsing(testutils.GanetiTestCase): """Test the code that parses the ip command output""" def testIp4(self): valid_addresses = [constants.IP4_ADDRESS_ANY, constants.IP4_ADDRESS_LOCALHOST, "192.0.2.1", # RFC5737, IPv4 address blocks for docs "198.51.100.1", "203.0.113.1", ] for addr in valid_addresses: self.failUnless(re.search(netutils._IP_RE_TEXT, addr)) def testIp6(self): valid_addresses = [constants.IP6_ADDRESS_ANY, constants.IP6_ADDRESS_LOCALHOST, "0:0:0:0:0:0:0:1", # other form for IP6_ADDRESS_LOCALHOST "0:0:0:0:0:0:0:0", # other form for IP6_ADDRESS_ANY "2001:db8:85a3::8a2e:370:7334", # RFC3849 IP6 docs block "2001:0db8:85a3:0000:0000:8a2e:0370:7334", "0:0:0:0:0:FFFF:192.0.2.1", # IPv4-compatible IPv6 "::FFFF:192.0.2.1", "0:0:0:0:0:0:203.0.113.1", # IPv4-mapped IPv6 "::203.0.113.1", ] for addr in valid_addresses: self.failUnless(re.search(netutils._IP_RE_TEXT, addr)) def testParseIpCommandOutput(self): # IPv4-only, fake loopback interface tests = ["ip-addr-show-lo-ipv4.txt", "ip-addr-show-lo-oneline-ipv4.txt"] for test_file in tests: data = testutils.ReadTestData(test_file) addr = netutils._GetIpAddressesFromIpOutput(data) self.failUnless(len(addr[4]) == 1 and addr[4][0] == "127.0.0.1" and not addr[6]) # IPv6-only, fake loopback interface tests = ["ip-addr-show-lo-ipv6.txt", "ip-addr-show-lo-ipv6.txt"] for test_file in tests: data = testutils.ReadTestData(test_file) addr = netutils._GetIpAddressesFromIpOutput(data) self.failUnless(len(addr[6]) == 1 and addr[6][0] == "::1" and not addr[4]) # IPv4 and IPv6, fake loopback interface tests = ["ip-addr-show-lo.txt", "ip-addr-show-lo-oneline.txt"] for test_file in tests: data = testutils.ReadTestData(test_file) addr = netutils._GetIpAddressesFromIpOutput(data) self.failUnless(len(addr[6]) == 1 and addr[6][0] == "::1" and len(addr[4]) == 1 and addr[4][0] == "127.0.0.1") # IPv4 and IPv6, dummy interface data = testutils.ReadTestData("ip-addr-show-dummy0.txt") addr = netutils._GetIpAddressesFromIpOutput(data) self.failUnless(len(addr[6]) == 1 and addr[6][0] == "2001:db8:85a3::8a2e:370:7334" and len(addr[4]) == 1 and addr[4][0] == "192.0.2.1") if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.cmdlib_unittest.py0000744000000000000000000021543212271422343021422 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2008, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the cmdlib module""" import os import unittest import tempfile import shutil import operator import itertools import copy from ganeti import constants from ganeti import mcpu from ganeti import cmdlib from ganeti.cmdlib import cluster from ganeti.cmdlib import group from ganeti.cmdlib import instance from ganeti.cmdlib import instance_storage from ganeti.cmdlib import instance_utils from ganeti.cmdlib import common from ganeti.cmdlib import query from ganeti import opcodes from ganeti import errors from ganeti import utils from ganeti import luxi from ganeti import ht from ganeti import objects from ganeti import compat from ganeti import rpc from ganeti import locking from ganeti import pathutils from ganeti.masterd import iallocator from ganeti.hypervisor import hv_xen import testutils import mocks class TestCertVerification(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def testVerifyCertificate(self): cluster._VerifyCertificate(testutils.TestDataFilename("cert1.pem")) nonexist_filename = os.path.join(self.tmpdir, "does-not-exist") (errcode, msg) = cluster._VerifyCertificate(nonexist_filename) self.assertEqual(errcode, cluster.LUClusterVerifyConfig.ETYPE_ERROR) # Try to load non-certificate file invalid_cert = testutils.TestDataFilename("bdev-net.txt") (errcode, msg) = cluster._VerifyCertificate(invalid_cert) self.assertEqual(errcode, cluster.LUClusterVerifyConfig.ETYPE_ERROR) class TestOpcodeParams(testutils.GanetiTestCase): def testParamsStructures(self): for op in sorted(mcpu.Processor.DISPATCH_TABLE): lu = mcpu.Processor.DISPATCH_TABLE[op] lu_name = lu.__name__ self.failIf(hasattr(lu, "_OP_REQP"), msg=("LU '%s' has old-style _OP_REQP" % lu_name)) self.failIf(hasattr(lu, "_OP_DEFS"), msg=("LU '%s' has old-style _OP_DEFS" % lu_name)) self.failIf(hasattr(lu, "_OP_PARAMS"), msg=("LU '%s' has old-style _OP_PARAMS" % lu_name)) class TestIAllocatorChecks(testutils.GanetiTestCase): def testFunction(self): class TestLU(object): def __init__(self, opcode): self.cfg = mocks.FakeConfig() self.op = opcode class OpTest(opcodes.OpCode): OP_PARAMS = [ ("iallocator", None, ht.NoType, None), ("node", None, ht.NoType, None), ] default_iallocator = mocks.FakeConfig().GetDefaultIAllocator() other_iallocator = default_iallocator + "_not" op = OpTest() lu = TestLU(op) c_i = lambda: common.CheckIAllocatorOrNode(lu, "iallocator", "node") # Neither node nor iallocator given for n in (None, []): op.iallocator = None op.node = n c_i() self.assertEqual(lu.op.iallocator, default_iallocator) self.assertEqual(lu.op.node, n) # Both, iallocator and node given for a in ("test", constants.DEFAULT_IALLOCATOR_SHORTCUT): op.iallocator = a op.node = "test" self.assertRaises(errors.OpPrereqError, c_i) # Only iallocator given for n in (None, []): op.iallocator = other_iallocator op.node = n c_i() self.assertEqual(lu.op.iallocator, other_iallocator) self.assertEqual(lu.op.node, n) # Only node given op.iallocator = None op.node = "node" c_i() self.assertEqual(lu.op.iallocator, None) self.assertEqual(lu.op.node, "node") # Asked for default iallocator, no node given op.iallocator = constants.DEFAULT_IALLOCATOR_SHORTCUT op.node = None c_i() self.assertEqual(lu.op.iallocator, default_iallocator) self.assertEqual(lu.op.node, None) # No node, iallocator or default iallocator op.iallocator = None op.node = None lu.cfg.GetDefaultIAllocator = lambda: None self.assertRaises(errors.OpPrereqError, c_i) class TestLUTestJqueue(unittest.TestCase): def test(self): self.assert_(cmdlib.LUTestJqueue._CLIENT_CONNECT_TIMEOUT < (luxi.WFJC_TIMEOUT * 0.75), msg=("Client timeout too high, might not notice bugs" " in WaitForJobChange")) class TestLUQuery(unittest.TestCase): def test(self): self.assertEqual(sorted(query._QUERY_IMPL.keys()), sorted(constants.QR_VIA_OP)) assert constants.QR_NODE in constants.QR_VIA_OP assert constants.QR_INSTANCE in constants.QR_VIA_OP for i in constants.QR_VIA_OP: self.assert_(query._GetQueryImplementation(i)) self.assertRaises(errors.OpPrereqError, query._GetQueryImplementation, "") self.assertRaises(errors.OpPrereqError, query._GetQueryImplementation, "xyz") class TestLUGroupAssignNodes(unittest.TestCase): def testCheckAssignmentForSplitInstances(self): node_data = dict((n, objects.Node(name=n, group=g)) for (n, g) in [("n1a", "g1"), ("n1b", "g1"), ("n2a", "g2"), ("n2b", "g2"), ("n3a", "g3"), ("n3b", "g3"), ("n3c", "g3"), ]) def Instance(uuid, pnode, snode): if snode is None: disks = [] disk_template = constants.DT_DISKLESS else: disks = [objects.Disk(dev_type=constants.DT_DRBD8, logical_id=[pnode, snode, 1, 17, 17])] disk_template = constants.DT_DRBD8 return objects.Instance(name="%s-name" % uuid, uuid="%s" % uuid, primary_node=pnode, disks=disks, disk_template=disk_template) instance_data = dict((uuid, Instance(uuid, pnode, snode)) for uuid, pnode, snode in [("inst1a", "n1a", "n1b"), ("inst1b", "n1b", "n1a"), ("inst2a", "n2a", "n2b"), ("inst3a", "n3a", None), ("inst3b", "n3b", "n1b"), ("inst3c", "n3b", "n2b"), ]) # Test first with the existing state. (new, prev) = \ group.LUGroupAssignNodes.CheckAssignmentForSplitInstances([], node_data, instance_data) self.assertEqual([], new) self.assertEqual(set(["inst3b", "inst3c"]), set(prev)) # And now some changes. (new, prev) = \ group.LUGroupAssignNodes.CheckAssignmentForSplitInstances([("n1b", "g3")], node_data, instance_data) self.assertEqual(set(["inst1a", "inst1b"]), set(new)) self.assertEqual(set(["inst3c"]), set(prev)) class TestClusterVerifySsh(unittest.TestCase): def testMultipleGroups(self): fn = cluster.LUClusterVerifyGroup._SelectSshCheckNodes mygroupnodes = [ objects.Node(name="node20", group="my", offline=False), objects.Node(name="node21", group="my", offline=False), objects.Node(name="node22", group="my", offline=False), objects.Node(name="node23", group="my", offline=False), objects.Node(name="node24", group="my", offline=False), objects.Node(name="node25", group="my", offline=False), objects.Node(name="node26", group="my", offline=True), ] nodes = [ objects.Node(name="node1", group="g1", offline=True), objects.Node(name="node2", group="g1", offline=False), objects.Node(name="node3", group="g1", offline=False), objects.Node(name="node4", group="g1", offline=True), objects.Node(name="node5", group="g1", offline=False), objects.Node(name="node10", group="xyz", offline=False), objects.Node(name="node11", group="xyz", offline=False), objects.Node(name="node40", group="alloff", offline=True), objects.Node(name="node41", group="alloff", offline=True), objects.Node(name="node50", group="aaa", offline=False), ] + mygroupnodes assert not utils.FindDuplicates(map(operator.attrgetter("name"), nodes)) (online, perhost) = fn(mygroupnodes, "my", nodes) self.assertEqual(online, ["node%s" % i for i in range(20, 26)]) self.assertEqual(set(perhost.keys()), set(online)) self.assertEqual(perhost, { "node20": ["node10", "node2", "node50"], "node21": ["node11", "node3", "node50"], "node22": ["node10", "node5", "node50"], "node23": ["node11", "node2", "node50"], "node24": ["node10", "node3", "node50"], "node25": ["node11", "node5", "node50"], }) def testSingleGroup(self): fn = cluster.LUClusterVerifyGroup._SelectSshCheckNodes nodes = [ objects.Node(name="node1", group="default", offline=True), objects.Node(name="node2", group="default", offline=False), objects.Node(name="node3", group="default", offline=False), objects.Node(name="node4", group="default", offline=True), ] assert not utils.FindDuplicates(map(operator.attrgetter("name"), nodes)) (online, perhost) = fn(nodes, "default", nodes) self.assertEqual(online, ["node2", "node3"]) self.assertEqual(set(perhost.keys()), set(online)) self.assertEqual(perhost, { "node2": [], "node3": [], }) class TestClusterVerifyFiles(unittest.TestCase): @staticmethod def _FakeErrorIf(errors, cond, ecode, item, msg, *args, **kwargs): assert ((ecode == constants.CV_ENODEFILECHECK and ht.TNonEmptyString(item)) or (ecode == constants.CV_ECLUSTERFILECHECK and item is None)) if args: msg = msg % args if cond: errors.append((item, msg)) def test(self): errors = [] nodeinfo = [ objects.Node(name="master.example.com", uuid="master-uuid", offline=False, vm_capable=True), objects.Node(name="node2.example.com", uuid="node2-uuid", offline=False, vm_capable=True), objects.Node(name="node3.example.com", uuid="node3-uuid", master_candidate=True, vm_capable=False), objects.Node(name="node4.example.com", uuid="node4-uuid", offline=False, vm_capable=True), objects.Node(name="nodata.example.com", uuid="nodata-uuid", offline=False, vm_capable=True), objects.Node(name="offline.example.com", uuid="offline-uuid", offline=True), ] files_all = set([ pathutils.CLUSTER_DOMAIN_SECRET_FILE, pathutils.RAPI_CERT_FILE, pathutils.RAPI_USERS_FILE, ]) files_opt = set([ pathutils.RAPI_USERS_FILE, hv_xen.XL_CONFIG_FILE, pathutils.VNC_PASSWORD_FILE, ]) files_mc = set([ pathutils.CLUSTER_CONF_FILE, ]) files_vm = set([ hv_xen.XEND_CONFIG_FILE, hv_xen.XL_CONFIG_FILE, pathutils.VNC_PASSWORD_FILE, ]) nvinfo = { "master-uuid": rpc.RpcResult(data=(True, { constants.NV_FILELIST: { pathutils.CLUSTER_CONF_FILE: "82314f897f38b35f9dab2f7c6b1593e0", pathutils.RAPI_CERT_FILE: "babbce8f387bc082228e544a2146fee4", pathutils.CLUSTER_DOMAIN_SECRET_FILE: "cds-47b5b3f19202936bb4", hv_xen.XEND_CONFIG_FILE: "b4a8a824ab3cac3d88839a9adeadf310", hv_xen.XL_CONFIG_FILE: "77935cee92afd26d162f9e525e3d49b9" }})), "node2-uuid": rpc.RpcResult(data=(True, { constants.NV_FILELIST: { pathutils.RAPI_CERT_FILE: "97f0356500e866387f4b84233848cc4a", hv_xen.XEND_CONFIG_FILE: "b4a8a824ab3cac3d88839a9adeadf310", } })), "node3-uuid": rpc.RpcResult(data=(True, { constants.NV_FILELIST: { pathutils.RAPI_CERT_FILE: "97f0356500e866387f4b84233848cc4a", pathutils.CLUSTER_DOMAIN_SECRET_FILE: "cds-47b5b3f19202936bb4", } })), "node4-uuid": rpc.RpcResult(data=(True, { constants.NV_FILELIST: { pathutils.RAPI_CERT_FILE: "97f0356500e866387f4b84233848cc4a", pathutils.CLUSTER_CONF_FILE: "conf-a6d4b13e407867f7a7b4f0f232a8f527", pathutils.CLUSTER_DOMAIN_SECRET_FILE: "cds-47b5b3f19202936bb4", pathutils.RAPI_USERS_FILE: "rapiusers-ea3271e8d810ef3", hv_xen.XL_CONFIG_FILE: "77935cee92afd26d162f9e525e3d49b9" } })), "nodata-uuid": rpc.RpcResult(data=(True, {})), "offline-uuid": rpc.RpcResult(offline=True), } assert set(nvinfo.keys()) == set(map(operator.attrgetter("uuid"), nodeinfo)) verify_lu = cluster.LUClusterVerifyGroup(mocks.FakeProc(), opcodes.OpClusterVerify(), mocks.FakeContext(), None) verify_lu._ErrorIf = compat.partial(self._FakeErrorIf, errors) # TODO: That's a bit hackish to mock only this single method. We should # build a better FakeConfig which provides such a feature already. def GetNodeName(node_uuid): for node in nodeinfo: if node.uuid == node_uuid: return node.name return None verify_lu.cfg.GetNodeName = GetNodeName verify_lu._VerifyFiles(nodeinfo, "master-uuid", nvinfo, (files_all, files_opt, files_mc, files_vm)) self.assertEqual(sorted(errors), sorted([ (None, ("File %s found with 2 different checksums (variant 1 on" " node2.example.com, node3.example.com, node4.example.com;" " variant 2 on master.example.com)" % pathutils.RAPI_CERT_FILE)), (None, ("File %s is missing from node(s) node2.example.com" % pathutils.CLUSTER_DOMAIN_SECRET_FILE)), (None, ("File %s should not exist on node(s) node4.example.com" % pathutils.CLUSTER_CONF_FILE)), (None, ("File %s is missing from node(s) node4.example.com" % hv_xen.XEND_CONFIG_FILE)), (None, ("File %s is missing from node(s) node3.example.com" % pathutils.CLUSTER_CONF_FILE)), (None, ("File %s found with 2 different checksums (variant 1 on" " master.example.com; variant 2 on node4.example.com)" % pathutils.CLUSTER_CONF_FILE)), (None, ("File %s is optional, but it must exist on all or no nodes (not" " found on master.example.com, node2.example.com," " node3.example.com)" % pathutils.RAPI_USERS_FILE)), (None, ("File %s is optional, but it must exist on all or no nodes (not" " found on node2.example.com)" % hv_xen.XL_CONFIG_FILE)), ("nodata.example.com", "Node did not return file checksum data"), ])) class _FakeLU: def __init__(self, cfg=NotImplemented, proc=NotImplemented, rpc=NotImplemented): self.warning_log = [] self.info_log = [] self.cfg = cfg self.proc = proc self.rpc = rpc def LogWarning(self, text, *args): self.warning_log.append((text, args)) def LogInfo(self, text, *args): self.info_log.append((text, args)) class TestLoadNodeEvacResult(unittest.TestCase): def testSuccess(self): for moved in [[], [ ("inst20153.example.com", "grp2", ["nodeA4509", "nodeB2912"]), ]]: for early_release in [False, True]: for use_nodes in [False, True]: jobs = [ [opcodes.OpInstanceReplaceDisks().__getstate__()], [opcodes.OpInstanceMigrate().__getstate__()], ] alloc_result = (moved, [], jobs) assert iallocator._NEVAC_RESULT(alloc_result) lu = _FakeLU() result = common.LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes) if moved: (_, (info_args, )) = lu.info_log.pop(0) for (instname, instgroup, instnodes) in moved: self.assertTrue(instname in info_args) if use_nodes: for i in instnodes: self.assertTrue(i in info_args) else: self.assertTrue(instgroup in info_args) self.assertFalse(lu.info_log) self.assertFalse(lu.warning_log) for op in itertools.chain(*result): if hasattr(op.__class__, "early_release"): self.assertEqual(op.early_release, early_release) else: self.assertFalse(hasattr(op, "early_release")) def testFailed(self): alloc_result = ([], [ ("inst5191.example.com", "errormsg21178"), ], []) assert iallocator._NEVAC_RESULT(alloc_result) lu = _FakeLU() self.assertRaises(errors.OpExecError, common.LoadNodeEvacResult, lu, alloc_result, False, False) self.assertFalse(lu.info_log) (_, (args, )) = lu.warning_log.pop(0) self.assertTrue("inst5191.example.com" in args) self.assertTrue("errormsg21178" in args) self.assertFalse(lu.warning_log) class TestUpdateAndVerifySubDict(unittest.TestCase): def setUp(self): self.type_check = { "a": constants.VTYPE_INT, "b": constants.VTYPE_STRING, "c": constants.VTYPE_BOOL, "d": constants.VTYPE_STRING, } def test(self): old_test = { "foo": { "d": "blubb", "a": 321, }, "baz": { "a": 678, "b": "678", "c": True, }, } test = { "foo": { "a": 123, "b": "123", "c": True, }, "bar": { "a": 321, "b": "321", "c": False, }, } mv = { "foo": { "a": 123, "b": "123", "c": True, "d": "blubb" }, "bar": { "a": 321, "b": "321", "c": False, }, "baz": { "a": 678, "b": "678", "c": True, }, } verified = common._UpdateAndVerifySubDict(old_test, test, self.type_check) self.assertEqual(verified, mv) def testWrong(self): test = { "foo": { "a": "blubb", "b": "123", "c": True, }, "bar": { "a": 321, "b": "321", "c": False, }, } self.assertRaises(errors.TypeEnforcementError, common._UpdateAndVerifySubDict, {}, test, self.type_check) class TestHvStateHelper(unittest.TestCase): def testWithoutOpData(self): self.assertEqual(common.MergeAndVerifyHvState(None, NotImplemented), None) def testWithoutOldData(self): new = { constants.HT_XEN_PVM: { constants.HVST_MEMORY_TOTAL: 4096, }, } self.assertEqual(common.MergeAndVerifyHvState(new, None), new) def testWithWrongHv(self): new = { "i-dont-exist": { constants.HVST_MEMORY_TOTAL: 4096, }, } self.assertRaises(errors.OpPrereqError, common.MergeAndVerifyHvState, new, None) class TestDiskStateHelper(unittest.TestCase): def testWithoutOpData(self): self.assertEqual(common.MergeAndVerifyDiskState(None, NotImplemented), None) def testWithoutOldData(self): new = { constants.DT_PLAIN: { "xenvg": { constants.DS_DISK_RESERVED: 1024, }, }, } self.assertEqual(common.MergeAndVerifyDiskState(new, None), new) def testWithWrongStorageType(self): new = { "i-dont-exist": { "xenvg": { constants.DS_DISK_RESERVED: 1024, }, }, } self.assertRaises(errors.OpPrereqError, common.MergeAndVerifyDiskState, new, None) class TestComputeMinMaxSpec(unittest.TestCase): def setUp(self): self.ispecs = { constants.ISPECS_MAX: { constants.ISPEC_MEM_SIZE: 512, constants.ISPEC_DISK_SIZE: 1024, }, constants.ISPECS_MIN: { constants.ISPEC_MEM_SIZE: 128, constants.ISPEC_DISK_COUNT: 1, }, } def testNoneValue(self): self.assertTrue(common._ComputeMinMaxSpec(constants.ISPEC_MEM_SIZE, None, self.ispecs, None) is None) def testAutoValue(self): self.assertTrue(common._ComputeMinMaxSpec(constants.ISPEC_MEM_SIZE, None, self.ispecs, constants.VALUE_AUTO) is None) def testNotDefined(self): self.assertTrue(common._ComputeMinMaxSpec(constants.ISPEC_NIC_COUNT, None, self.ispecs, 3) is None) def testNoMinDefined(self): self.assertTrue(common._ComputeMinMaxSpec(constants.ISPEC_DISK_SIZE, None, self.ispecs, 128) is None) def testNoMaxDefined(self): self.assertTrue(common._ComputeMinMaxSpec(constants.ISPEC_DISK_COUNT, None, self.ispecs, 16) is None) def testOutOfRange(self): for (name, val) in ((constants.ISPEC_MEM_SIZE, 64), (constants.ISPEC_MEM_SIZE, 768), (constants.ISPEC_DISK_SIZE, 4096), (constants.ISPEC_DISK_COUNT, 0)): min_v = self.ispecs[constants.ISPECS_MIN].get(name, val) max_v = self.ispecs[constants.ISPECS_MAX].get(name, val) self.assertEqual(common._ComputeMinMaxSpec(name, None, self.ispecs, val), "%s value %s is not in range [%s, %s]" % (name, val,min_v, max_v)) self.assertEqual(common._ComputeMinMaxSpec(name, "1", self.ispecs, val), "%s/1 value %s is not in range [%s, %s]" % (name, val,min_v, max_v)) def test(self): for (name, val) in ((constants.ISPEC_MEM_SIZE, 256), (constants.ISPEC_MEM_SIZE, 128), (constants.ISPEC_MEM_SIZE, 512), (constants.ISPEC_DISK_SIZE, 1024), (constants.ISPEC_DISK_SIZE, 0), (constants.ISPEC_DISK_COUNT, 1), (constants.ISPEC_DISK_COUNT, 5)): self.assertTrue(common._ComputeMinMaxSpec(name, None, self.ispecs, val) is None) def _ValidateComputeMinMaxSpec(name, *_): assert name in constants.ISPECS_PARAMETERS return None def _NoDiskComputeMinMaxSpec(name, *_): if name == constants.ISPEC_DISK_COUNT: return name else: return None class _SpecWrapper: def __init__(self, spec): self.spec = spec def ComputeMinMaxSpec(self, *args): return self.spec.pop(0) class TestComputeIPolicySpecViolation(unittest.TestCase): # Minimal policy accepted by _ComputeIPolicySpecViolation() _MICRO_IPOL = { constants.IPOLICY_DTS: [constants.DT_PLAIN, constants.DT_DISKLESS], constants.ISPECS_MINMAX: [NotImplemented], } def test(self): compute_fn = _ValidateComputeMinMaxSpec ret = common.ComputeIPolicySpecViolation(self._MICRO_IPOL, 1024, 1, 1, 1, [1024], 1, constants.DT_PLAIN, _compute_fn=compute_fn) self.assertEqual(ret, []) def testDiskFull(self): compute_fn = _NoDiskComputeMinMaxSpec ret = common.ComputeIPolicySpecViolation(self._MICRO_IPOL, 1024, 1, 1, 1, [1024], 1, constants.DT_PLAIN, _compute_fn=compute_fn) self.assertEqual(ret, [constants.ISPEC_DISK_COUNT]) def testDiskLess(self): compute_fn = _NoDiskComputeMinMaxSpec ret = common.ComputeIPolicySpecViolation(self._MICRO_IPOL, 1024, 1, 1, 1, [1024], 1, constants.DT_DISKLESS, _compute_fn=compute_fn) self.assertEqual(ret, []) def testWrongTemplates(self): compute_fn = _ValidateComputeMinMaxSpec ret = common.ComputeIPolicySpecViolation(self._MICRO_IPOL, 1024, 1, 1, 1, [1024], 1, constants.DT_DRBD8, _compute_fn=compute_fn) self.assertEqual(len(ret), 1) self.assertTrue("Disk template" in ret[0]) def testInvalidArguments(self): self.assertRaises(AssertionError, common.ComputeIPolicySpecViolation, self._MICRO_IPOL, 1024, 1, 1, 1, [], 1, constants.DT_PLAIN,) def testInvalidSpec(self): spec = _SpecWrapper([None, False, "foo", None, "bar", None]) compute_fn = spec.ComputeMinMaxSpec ret = common.ComputeIPolicySpecViolation(self._MICRO_IPOL, 1024, 1, 1, 1, [1024], 1, constants.DT_PLAIN, _compute_fn=compute_fn) self.assertEqual(ret, ["foo", "bar"]) self.assertFalse(spec.spec) def testWithIPolicy(self): mem_size = 2048 cpu_count = 2 disk_count = 1 disk_sizes = [512] nic_count = 1 spindle_use = 4 disk_template = "mytemplate" ispec = { constants.ISPEC_MEM_SIZE: mem_size, constants.ISPEC_CPU_COUNT: cpu_count, constants.ISPEC_DISK_COUNT: disk_count, constants.ISPEC_DISK_SIZE: disk_sizes[0], constants.ISPEC_NIC_COUNT: nic_count, constants.ISPEC_SPINDLE_USE: spindle_use, } ipolicy1 = { constants.ISPECS_MINMAX: [{ constants.ISPECS_MIN: ispec, constants.ISPECS_MAX: ispec, }], constants.IPOLICY_DTS: [disk_template], } ispec_copy = copy.deepcopy(ispec) ipolicy2 = { constants.ISPECS_MINMAX: [ { constants.ISPECS_MIN: ispec_copy, constants.ISPECS_MAX: ispec_copy, }, { constants.ISPECS_MIN: ispec, constants.ISPECS_MAX: ispec, }, ], constants.IPOLICY_DTS: [disk_template], } ipolicy3 = { constants.ISPECS_MINMAX: [ { constants.ISPECS_MIN: ispec, constants.ISPECS_MAX: ispec, }, { constants.ISPECS_MIN: ispec_copy, constants.ISPECS_MAX: ispec_copy, }, ], constants.IPOLICY_DTS: [disk_template], } def AssertComputeViolation(ipolicy, violations): ret = common.ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count, nic_count, disk_sizes, spindle_use, disk_template) self.assertEqual(len(ret), violations) AssertComputeViolation(ipolicy1, 0) AssertComputeViolation(ipolicy2, 0) AssertComputeViolation(ipolicy3, 0) for par in constants.ISPECS_PARAMETERS: ispec[par] += 1 AssertComputeViolation(ipolicy1, 1) AssertComputeViolation(ipolicy2, 0) AssertComputeViolation(ipolicy3, 0) ispec[par] -= 2 AssertComputeViolation(ipolicy1, 1) AssertComputeViolation(ipolicy2, 0) AssertComputeViolation(ipolicy3, 0) ispec[par] += 1 # Restore ipolicy1[constants.IPOLICY_DTS] = ["another_template"] AssertComputeViolation(ipolicy1, 1) class _StubComputeIPolicySpecViolation: def __init__(self, mem_size, cpu_count, disk_count, nic_count, disk_sizes, spindle_use, disk_template): self.mem_size = mem_size self.cpu_count = cpu_count self.disk_count = disk_count self.nic_count = nic_count self.disk_sizes = disk_sizes self.spindle_use = spindle_use self.disk_template = disk_template def __call__(self, _, mem_size, cpu_count, disk_count, nic_count, disk_sizes, spindle_use, disk_template): assert self.mem_size == mem_size assert self.cpu_count == cpu_count assert self.disk_count == disk_count assert self.nic_count == nic_count assert self.disk_sizes == disk_sizes assert self.spindle_use == spindle_use assert self.disk_template == disk_template return [] class _FakeConfigForComputeIPolicyInstanceViolation: def __init__(self, be, excl_stor): self.cluster = objects.Cluster(beparams={"default": be}) self.excl_stor = excl_stor def GetClusterInfo(self): return self.cluster def GetNodeInfo(self, _): return {} def GetNdParams(self, _): return { constants.ND_EXCLUSIVE_STORAGE: self.excl_stor, } class TestComputeIPolicyInstanceViolation(unittest.TestCase): def test(self): beparams = { constants.BE_MAXMEM: 2048, constants.BE_VCPUS: 2, constants.BE_SPINDLE_USE: 4, } disks = [objects.Disk(size=512, spindles=13)] cfg = _FakeConfigForComputeIPolicyInstanceViolation(beparams, False) instance = objects.Instance(beparams=beparams, disks=disks, nics=[], disk_template=constants.DT_PLAIN) stub = _StubComputeIPolicySpecViolation(2048, 2, 1, 0, [512], 4, constants.DT_PLAIN) ret = common.ComputeIPolicyInstanceViolation(NotImplemented, instance, cfg, _compute_fn=stub) self.assertEqual(ret, []) instance2 = objects.Instance(beparams={}, disks=disks, nics=[], disk_template=constants.DT_PLAIN) ret = common.ComputeIPolicyInstanceViolation(NotImplemented, instance2, cfg, _compute_fn=stub) self.assertEqual(ret, []) cfg_es = _FakeConfigForComputeIPolicyInstanceViolation(beparams, True) stub_es = _StubComputeIPolicySpecViolation(2048, 2, 1, 0, [512], 13, constants.DT_PLAIN) ret = common.ComputeIPolicyInstanceViolation(NotImplemented, instance, cfg_es, _compute_fn=stub_es) self.assertEqual(ret, []) ret = common.ComputeIPolicyInstanceViolation(NotImplemented, instance2, cfg_es, _compute_fn=stub_es) self.assertEqual(ret, []) class TestComputeIPolicyInstanceSpecViolation(unittest.TestCase): def test(self): ispec = { constants.ISPEC_MEM_SIZE: 2048, constants.ISPEC_CPU_COUNT: 2, constants.ISPEC_DISK_COUNT: 1, constants.ISPEC_DISK_SIZE: [512], constants.ISPEC_NIC_COUNT: 0, constants.ISPEC_SPINDLE_USE: 1, } stub = _StubComputeIPolicySpecViolation(2048, 2, 1, 0, [512], 1, constants.DT_PLAIN) ret = instance._ComputeIPolicyInstanceSpecViolation(NotImplemented, ispec, constants.DT_PLAIN, _compute_fn=stub) self.assertEqual(ret, []) class _CallRecorder: def __init__(self, return_value=None): self.called = False self.return_value = return_value def __call__(self, *args): self.called = True return self.return_value class TestComputeIPolicyNodeViolation(unittest.TestCase): def setUp(self): self.recorder = _CallRecorder(return_value=[]) def testSameGroup(self): ret = instance_utils._ComputeIPolicyNodeViolation( NotImplemented, NotImplemented, "foo", "foo", NotImplemented, _compute_fn=self.recorder) self.assertFalse(self.recorder.called) self.assertEqual(ret, []) def testDifferentGroup(self): ret = instance_utils._ComputeIPolicyNodeViolation( NotImplemented, NotImplemented, "foo", "bar", NotImplemented, _compute_fn=self.recorder) self.assertTrue(self.recorder.called) self.assertEqual(ret, []) class _FakeConfigForTargetNodeIPolicy: def __init__(self, node_info=NotImplemented): self._node_info = node_info def GetNodeInfo(self, _): return self._node_info class TestCheckTargetNodeIPolicy(unittest.TestCase): def setUp(self): self.instance = objects.Instance(primary_node="blubb") self.target_node = objects.Node(group="bar") node_info = objects.Node(group="foo") fake_cfg = _FakeConfigForTargetNodeIPolicy(node_info=node_info) self.lu = _FakeLU(cfg=fake_cfg) def testNoViolation(self): compute_recoder = _CallRecorder(return_value=[]) instance.CheckTargetNodeIPolicy(self.lu, NotImplemented, self.instance, self.target_node, NotImplemented, _compute_fn=compute_recoder) self.assertTrue(compute_recoder.called) self.assertEqual(self.lu.warning_log, []) def testNoIgnore(self): compute_recoder = _CallRecorder(return_value=["mem_size not in range"]) self.assertRaises(errors.OpPrereqError, instance.CheckTargetNodeIPolicy, self.lu, NotImplemented, self.instance, self.target_node, NotImplemented, _compute_fn=compute_recoder) self.assertTrue(compute_recoder.called) self.assertEqual(self.lu.warning_log, []) def testIgnoreViolation(self): compute_recoder = _CallRecorder(return_value=["mem_size not in range"]) instance.CheckTargetNodeIPolicy(self.lu, NotImplemented, self.instance, self.target_node, NotImplemented, ignore=True, _compute_fn=compute_recoder) self.assertTrue(compute_recoder.called) msg = ("Instance does not meet target node group's (bar) instance policy:" " mem_size not in range") self.assertEqual(self.lu.warning_log, [(msg, ())]) class TestApplyContainerMods(unittest.TestCase): def testEmptyContainer(self): container = [] chgdesc = [] instance._ApplyContainerMods("test", container, chgdesc, [], None, None, None) self.assertEqual(container, []) self.assertEqual(chgdesc, []) def testAdd(self): container = [] chgdesc = [] mods = instance._PrepareContainerMods([ (constants.DDM_ADD, -1, "Hello"), (constants.DDM_ADD, -1, "World"), (constants.DDM_ADD, 0, "Start"), (constants.DDM_ADD, -1, "End"), ], None) instance._ApplyContainerMods("test", container, chgdesc, mods, None, None, None) self.assertEqual(container, ["Start", "Hello", "World", "End"]) self.assertEqual(chgdesc, []) mods = instance._PrepareContainerMods([ (constants.DDM_ADD, 0, "zero"), (constants.DDM_ADD, 3, "Added"), (constants.DDM_ADD, 5, "four"), (constants.DDM_ADD, 7, "xyz"), ], None) instance._ApplyContainerMods("test", container, chgdesc, mods, None, None, None) self.assertEqual(container, ["zero", "Start", "Hello", "Added", "World", "four", "End", "xyz"]) self.assertEqual(chgdesc, []) for idx in [-2, len(container) + 1]: mods = instance._PrepareContainerMods([ (constants.DDM_ADD, idx, "error"), ], None) self.assertRaises(IndexError, instance._ApplyContainerMods, "test", container, None, mods, None, None, None) def testRemoveError(self): for idx in [0, 1, 2, 100, -1, -4]: mods = instance._PrepareContainerMods([ (constants.DDM_REMOVE, idx, None), ], None) self.assertRaises(IndexError, instance._ApplyContainerMods, "test", [], None, mods, None, None, None) mods = instance._PrepareContainerMods([ (constants.DDM_REMOVE, 0, object()), ], None) self.assertRaises(AssertionError, instance._ApplyContainerMods, "test", [""], None, mods, None, None, None) def testAddError(self): for idx in range(-100, -1) + [100]: mods = instance._PrepareContainerMods([ (constants.DDM_ADD, idx, None), ], None) self.assertRaises(IndexError, instance._ApplyContainerMods, "test", [], None, mods, None, None, None) def testRemove(self): container = ["item 1", "item 2"] mods = instance._PrepareContainerMods([ (constants.DDM_ADD, -1, "aaa"), (constants.DDM_REMOVE, -1, None), (constants.DDM_ADD, -1, "bbb"), ], None) chgdesc = [] instance._ApplyContainerMods("test", container, chgdesc, mods, None, None, None) self.assertEqual(container, ["item 1", "item 2", "bbb"]) self.assertEqual(chgdesc, [ ("test/2", "remove"), ]) def testModify(self): container = ["item 1", "item 2"] mods = instance._PrepareContainerMods([ (constants.DDM_MODIFY, -1, "a"), (constants.DDM_MODIFY, 0, "b"), (constants.DDM_MODIFY, 1, "c"), ], None) chgdesc = [] instance._ApplyContainerMods("test", container, chgdesc, mods, None, None, None) self.assertEqual(container, ["item 1", "item 2"]) self.assertEqual(chgdesc, []) for idx in [-2, len(container) + 1]: mods = instance._PrepareContainerMods([ (constants.DDM_MODIFY, idx, "error"), ], None) self.assertRaises(IndexError, instance._ApplyContainerMods, "test", container, None, mods, None, None, None) class _PrivateData: def __init__(self): self.data = None @staticmethod def _CreateTestFn(idx, params, private): private.data = ("add", idx, params) return ((100 * idx, params), [ ("test/%s" % idx, hex(idx)), ]) @staticmethod def _ModifyTestFn(idx, item, params, private): private.data = ("modify", idx, params) return [ ("test/%s" % idx, "modify %s" % params), ] @staticmethod def _RemoveTestFn(idx, item, private): private.data = ("remove", idx, item) def testAddWithCreateFunction(self): container = [] chgdesc = [] mods = instance._PrepareContainerMods([ (constants.DDM_ADD, -1, "Hello"), (constants.DDM_ADD, -1, "World"), (constants.DDM_ADD, 0, "Start"), (constants.DDM_ADD, -1, "End"), (constants.DDM_REMOVE, 2, None), (constants.DDM_MODIFY, -1, "foobar"), (constants.DDM_REMOVE, 2, None), (constants.DDM_ADD, 1, "More"), ], self._PrivateData) instance._ApplyContainerMods("test", container, chgdesc, mods, self._CreateTestFn, self._ModifyTestFn, self._RemoveTestFn) self.assertEqual(container, [ (000, "Start"), (100, "More"), (000, "Hello"), ]) self.assertEqual(chgdesc, [ ("test/0", "0x0"), ("test/1", "0x1"), ("test/0", "0x0"), ("test/3", "0x3"), ("test/2", "remove"), ("test/2", "modify foobar"), ("test/2", "remove"), ("test/1", "0x1") ]) self.assertTrue(compat.all(op == private.data[0] for (op, _, _, private) in mods)) self.assertEqual([private.data for (op, _, _, private) in mods], [ ("add", 0, "Hello"), ("add", 1, "World"), ("add", 0, "Start"), ("add", 3, "End"), ("remove", 2, (100, "World")), ("modify", 2, "foobar"), ("remove", 2, (300, "End")), ("add", 1, "More"), ]) class _FakeConfigForGenDiskTemplate: def __init__(self, enabled_disk_templates): self._unique_id = itertools.count() self._drbd_minor = itertools.count(20) self._port = itertools.count(constants.FIRST_DRBD_PORT) self._secret = itertools.count() self._enabled_disk_templates = enabled_disk_templates def GetVGName(self): return "testvg" def GenerateUniqueID(self, ec_id): return "ec%s-uq%s" % (ec_id, self._unique_id.next()) def AllocateDRBDMinor(self, nodes, instance): return [self._drbd_minor.next() for _ in nodes] def AllocatePort(self): return self._port.next() def GenerateDRBDSecret(self, ec_id): return "ec%s-secret%s" % (ec_id, self._secret.next()) def GetInstanceInfo(self, _): return "foobar" def GetClusterInfo(self): cluster = objects.Cluster() cluster.enabled_disk_templates = self._enabled_disk_templates return cluster class _FakeProcForGenDiskTemplate: def GetECId(self): return 0 class TestGenerateDiskTemplate(unittest.TestCase): def _SetUpLUWithTemplates(self, enabled_disk_templates): self._enabled_disk_templates = enabled_disk_templates cfg = _FakeConfigForGenDiskTemplate(self._enabled_disk_templates) proc = _FakeProcForGenDiskTemplate() self.lu = _FakeLU(cfg=cfg, proc=proc) def setUp(self): nodegroup = objects.NodeGroup(name="ng") nodegroup.UpgradeConfig() self._enabled_disk_templates = list(constants.DISK_TEMPLATES) self._SetUpLUWithTemplates(self._enabled_disk_templates) self.nodegroup = nodegroup @staticmethod def GetDiskParams(): return copy.deepcopy(constants.DISK_DT_DEFAULTS) def testWrongDiskTemplate(self): gdt = instance.GenerateDiskTemplate disk_template = "##unknown##" assert disk_template not in constants.DISK_TEMPLATES self.assertRaises(errors.OpPrereqError, gdt, self.lu, disk_template, "inst26831.example.com", "node30113.example.com", [], [], NotImplemented, NotImplemented, 0, self.lu.LogInfo, self.GetDiskParams()) def testDiskless(self): gdt = instance.GenerateDiskTemplate result = gdt(self.lu, constants.DT_DISKLESS, "inst27734.example.com", "node30113.example.com", [], [], NotImplemented, NotImplemented, 0, self.lu.LogInfo, self.GetDiskParams()) self.assertEqual(result, []) def _TestTrivialDisk(self, template, disk_info, base_index, exp_dev_type, file_storage_dir=NotImplemented, file_driver=NotImplemented): gdt = instance.GenerateDiskTemplate map(lambda params: utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES), disk_info) # Check if non-empty list of secondaries is rejected self.assertRaises(errors.ProgrammerError, gdt, self.lu, template, "inst25088.example.com", "node185.example.com", ["node323.example.com"], [], NotImplemented, NotImplemented, base_index, self.lu.LogInfo, self.GetDiskParams()) result = gdt(self.lu, template, "inst21662.example.com", "node21741.example.com", [], disk_info, file_storage_dir, file_driver, base_index, self.lu.LogInfo, self.GetDiskParams()) for (idx, disk) in enumerate(result): self.assertTrue(isinstance(disk, objects.Disk)) self.assertEqual(disk.dev_type, exp_dev_type) self.assertEqual(disk.size, disk_info[idx][constants.IDISK_SIZE]) self.assertEqual(disk.mode, disk_info[idx][constants.IDISK_MODE]) self.assertTrue(disk.children is None) self._CheckIvNames(result, base_index, base_index + len(disk_info)) instance._UpdateIvNames(base_index, result) self._CheckIvNames(result, base_index, base_index + len(disk_info)) return result def _CheckIvNames(self, disks, base_index, end_index): self.assertEqual(map(operator.attrgetter("iv_name"), disks), ["disk/%s" % i for i in range(base_index, end_index)]) def testPlain(self): disk_info = [{ constants.IDISK_SIZE: 1024, constants.IDISK_MODE: constants.DISK_RDWR, }, { constants.IDISK_SIZE: 4096, constants.IDISK_VG: "othervg", constants.IDISK_MODE: constants.DISK_RDWR, }] result = self._TestTrivialDisk(constants.DT_PLAIN, disk_info, 3, constants.DT_PLAIN) self.assertEqual(map(operator.attrgetter("logical_id"), result), [ ("testvg", "ec0-uq0.disk3"), ("othervg", "ec0-uq1.disk4"), ]) def testFile(self): # anything != DT_FILE would do here self._SetUpLUWithTemplates([constants.DT_PLAIN]) self.assertRaises(errors.OpPrereqError, self._TestTrivialDisk, constants.DT_FILE, [], 0, NotImplemented) self.assertRaises(errors.OpPrereqError, self._TestTrivialDisk, constants.DT_SHARED_FILE, [], 0, NotImplemented) for disk_template in [constants.DT_FILE, constants.DT_SHARED_FILE]: disk_info = [{ constants.IDISK_SIZE: 80 * 1024, constants.IDISK_MODE: constants.DISK_RDONLY, }, { constants.IDISK_SIZE: 4096, constants.IDISK_MODE: constants.DISK_RDWR, }, { constants.IDISK_SIZE: 6 * 1024, constants.IDISK_MODE: constants.DISK_RDWR, }] self._SetUpLUWithTemplates([disk_template]) result = self._TestTrivialDisk(disk_template, disk_info, 2, disk_template, file_storage_dir="/tmp", file_driver=constants.FD_BLKTAP) self.assertEqual(map(operator.attrgetter("logical_id"), result), [ (constants.FD_BLKTAP, "/tmp/disk2"), (constants.FD_BLKTAP, "/tmp/disk3"), (constants.FD_BLKTAP, "/tmp/disk4"), ]) def testBlock(self): disk_info = [{ constants.IDISK_SIZE: 8 * 1024, constants.IDISK_MODE: constants.DISK_RDWR, constants.IDISK_ADOPT: "/tmp/some/block/dev", }] result = self._TestTrivialDisk(constants.DT_BLOCK, disk_info, 10, constants.DT_BLOCK) self.assertEqual(map(operator.attrgetter("logical_id"), result), [ (constants.BLOCKDEV_DRIVER_MANUAL, "/tmp/some/block/dev"), ]) def testRbd(self): disk_info = [{ constants.IDISK_SIZE: 8 * 1024, constants.IDISK_MODE: constants.DISK_RDONLY, }, { constants.IDISK_SIZE: 100 * 1024, constants.IDISK_MODE: constants.DISK_RDWR, }] result = self._TestTrivialDisk(constants.DT_RBD, disk_info, 0, constants.DT_RBD) self.assertEqual(map(operator.attrgetter("logical_id"), result), [ ("rbd", "ec0-uq0.rbd.disk0"), ("rbd", "ec0-uq1.rbd.disk1"), ]) def testDrbd8(self): gdt = instance.GenerateDiskTemplate drbd8_defaults = constants.DISK_LD_DEFAULTS[constants.DT_DRBD8] drbd8_default_metavg = drbd8_defaults[constants.LDP_DEFAULT_METAVG] disk_info = [{ constants.IDISK_SIZE: 1024, constants.IDISK_MODE: constants.DISK_RDWR, }, { constants.IDISK_SIZE: 100 * 1024, constants.IDISK_MODE: constants.DISK_RDONLY, constants.IDISK_METAVG: "metavg", }, { constants.IDISK_SIZE: 4096, constants.IDISK_MODE: constants.DISK_RDWR, constants.IDISK_VG: "vgxyz", }, ] exp_logical_ids = [[ (self.lu.cfg.GetVGName(), "ec0-uq0.disk0_data"), (drbd8_default_metavg, "ec0-uq0.disk0_meta"), ], [ (self.lu.cfg.GetVGName(), "ec0-uq1.disk1_data"), ("metavg", "ec0-uq1.disk1_meta"), ], [ ("vgxyz", "ec0-uq2.disk2_data"), (drbd8_default_metavg, "ec0-uq2.disk2_meta"), ]] assert len(exp_logical_ids) == len(disk_info) map(lambda params: utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES), disk_info) # Check if empty list of secondaries is rejected self.assertRaises(errors.ProgrammerError, gdt, self.lu, constants.DT_DRBD8, "inst827.example.com", "node1334.example.com", [], disk_info, NotImplemented, NotImplemented, 0, self.lu.LogInfo, self.GetDiskParams()) result = gdt(self.lu, constants.DT_DRBD8, "inst827.example.com", "node1334.example.com", ["node12272.example.com"], disk_info, NotImplemented, NotImplemented, 0, self.lu.LogInfo, self.GetDiskParams()) for (idx, disk) in enumerate(result): self.assertTrue(isinstance(disk, objects.Disk)) self.assertEqual(disk.dev_type, constants.DT_DRBD8) self.assertEqual(disk.size, disk_info[idx][constants.IDISK_SIZE]) self.assertEqual(disk.mode, disk_info[idx][constants.IDISK_MODE]) for child in disk.children: self.assertTrue(isinstance(disk, objects.Disk)) self.assertEqual(child.dev_type, constants.DT_PLAIN) self.assertTrue(child.children is None) self.assertEqual(map(operator.attrgetter("logical_id"), disk.children), exp_logical_ids[idx]) self.assertEqual(len(disk.children), 2) self.assertEqual(disk.children[0].size, disk.size) self.assertEqual(disk.children[1].size, constants.DRBD_META_SIZE) self._CheckIvNames(result, 0, len(disk_info)) instance._UpdateIvNames(0, result) self._CheckIvNames(result, 0, len(disk_info)) self.assertEqual(map(operator.attrgetter("logical_id"), result), [ ("node1334.example.com", "node12272.example.com", constants.FIRST_DRBD_PORT, 20, 21, "ec0-secret0"), ("node1334.example.com", "node12272.example.com", constants.FIRST_DRBD_PORT + 1, 22, 23, "ec0-secret1"), ("node1334.example.com", "node12272.example.com", constants.FIRST_DRBD_PORT + 2, 24, 25, "ec0-secret2"), ]) class _ConfigForDiskWipe: def __init__(self, exp_node_uuid): self._exp_node_uuid = exp_node_uuid def SetDiskID(self, device, node_uuid): assert isinstance(device, objects.Disk) assert node_uuid == self._exp_node_uuid def GetNodeName(self, node_uuid): assert node_uuid == self._exp_node_uuid return "name.of.expected.node" class _RpcForDiskWipe: def __init__(self, exp_node, pause_cb, wipe_cb): self._exp_node = exp_node self._pause_cb = pause_cb self._wipe_cb = wipe_cb def call_blockdev_pause_resume_sync(self, node, disks, pause): assert node == self._exp_node return rpc.RpcResult(data=self._pause_cb(disks, pause)) def call_blockdev_wipe(self, node, bdev, offset, size): assert node == self._exp_node return rpc.RpcResult(data=self._wipe_cb(bdev, offset, size)) class _DiskPauseTracker: def __init__(self): self.history = [] def __call__(self, (disks, instance), pause): assert not (set(disks) - set(instance.disks)) self.history.extend((i.logical_id, i.size, pause) for i in disks) return (True, [True] * len(disks)) class _DiskWipeProgressTracker: def __init__(self, start_offset): self._start_offset = start_offset self.progress = {} def __call__(self, (disk, _), offset, size): assert isinstance(offset, (long, int)) assert isinstance(size, (long, int)) max_chunk_size = (disk.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT) assert offset >= self._start_offset assert (offset + size) <= disk.size assert size > 0 assert size <= constants.MAX_WIPE_CHUNK assert size <= max_chunk_size assert offset == self._start_offset or disk.logical_id in self.progress # Keep track of progress cur_progress = self.progress.setdefault(disk.logical_id, self._start_offset) assert cur_progress == offset # Record progress self.progress[disk.logical_id] += size return (True, None) class TestWipeDisks(unittest.TestCase): def _FailingPauseCb(self, (disks, _), pause): self.assertEqual(len(disks), 3) self.assertTrue(pause) # Simulate an RPC error return (False, "error") def testPauseFailure(self): node_name = "node1372.example.com" lu = _FakeLU(rpc=_RpcForDiskWipe(node_name, self._FailingPauseCb, NotImplemented), cfg=_ConfigForDiskWipe(node_name)) disks = [ objects.Disk(dev_type=constants.DT_PLAIN), objects.Disk(dev_type=constants.DT_PLAIN), objects.Disk(dev_type=constants.DT_PLAIN), ] inst = objects.Instance(name="inst21201", primary_node=node_name, disk_template=constants.DT_PLAIN, disks=disks) self.assertRaises(errors.OpExecError, instance.WipeDisks, lu, inst) def _FailingWipeCb(self, (disk, _), offset, size): # This should only ever be called for the first disk self.assertEqual(disk.logical_id, "disk0") return (False, None) def testFailingWipe(self): node_uuid = "node13445-uuid" pt = _DiskPauseTracker() lu = _FakeLU(rpc=_RpcForDiskWipe(node_uuid, pt, self._FailingWipeCb), cfg=_ConfigForDiskWipe(node_uuid)) disks = [ objects.Disk(dev_type=constants.DT_PLAIN, logical_id="disk0", size=100 * 1024), objects.Disk(dev_type=constants.DT_PLAIN, logical_id="disk1", size=500 * 1024), objects.Disk(dev_type=constants.DT_PLAIN, logical_id="disk2", size=256), ] inst = objects.Instance(name="inst562", primary_node=node_uuid, disk_template=constants.DT_PLAIN, disks=disks) try: instance.WipeDisks(lu, inst) except errors.OpExecError, err: self.assertTrue(str(err), "Could not wipe disk 0 at offset 0 ") else: self.fail("Did not raise exception") # Check if all disks were paused and resumed self.assertEqual(pt.history, [ ("disk0", 100 * 1024, True), ("disk1", 500 * 1024, True), ("disk2", 256, True), ("disk0", 100 * 1024, False), ("disk1", 500 * 1024, False), ("disk2", 256, False), ]) def _PrepareWipeTest(self, start_offset, disks): node_name = "node-with-offset%s.example.com" % start_offset pauset = _DiskPauseTracker() progresst = _DiskWipeProgressTracker(start_offset) lu = _FakeLU(rpc=_RpcForDiskWipe(node_name, pauset, progresst), cfg=_ConfigForDiskWipe(node_name)) instance = objects.Instance(name="inst3560", primary_node=node_name, disk_template=constants.DT_PLAIN, disks=disks) return (lu, instance, pauset, progresst) def testNormalWipe(self): disks = [ objects.Disk(dev_type=constants.DT_PLAIN, logical_id="disk0", size=1024), objects.Disk(dev_type=constants.DT_PLAIN, logical_id="disk1", size=500 * 1024), objects.Disk(dev_type=constants.DT_PLAIN, logical_id="disk2", size=128), objects.Disk(dev_type=constants.DT_PLAIN, logical_id="disk3", size=constants.MAX_WIPE_CHUNK), ] (lu, inst, pauset, progresst) = self._PrepareWipeTest(0, disks) instance.WipeDisks(lu, inst) self.assertEqual(pauset.history, [ ("disk0", 1024, True), ("disk1", 500 * 1024, True), ("disk2", 128, True), ("disk3", constants.MAX_WIPE_CHUNK, True), ("disk0", 1024, False), ("disk1", 500 * 1024, False), ("disk2", 128, False), ("disk3", constants.MAX_WIPE_CHUNK, False), ]) # Ensure the complete disk has been wiped self.assertEqual(progresst.progress, dict((i.logical_id, i.size) for i in disks)) def testWipeWithStartOffset(self): for start_offset in [0, 280, 8895, 1563204]: disks = [ objects.Disk(dev_type=constants.DT_PLAIN, logical_id="disk0", size=128), objects.Disk(dev_type=constants.DT_PLAIN, logical_id="disk1", size=start_offset + (100 * 1024)), ] (lu, inst, pauset, progresst) = \ self._PrepareWipeTest(start_offset, disks) # Test start offset with only one disk instance.WipeDisks(lu, inst, disks=[(1, disks[1], start_offset)]) # Only the second disk may have been paused and wiped self.assertEqual(pauset.history, [ ("disk1", start_offset + (100 * 1024), True), ("disk1", start_offset + (100 * 1024), False), ]) self.assertEqual(progresst.progress, { "disk1": disks[1].size, }) class TestDiskSizeInBytesToMebibytes(unittest.TestCase): def testLessThanOneMebibyte(self): for i in [1, 2, 7, 512, 1000, 1023]: lu = _FakeLU() result = instance_storage._DiskSizeInBytesToMebibytes(lu, i) self.assertEqual(result, 1) self.assertEqual(len(lu.warning_log), 1) self.assertEqual(len(lu.warning_log[0]), 2) (_, (warnsize, )) = lu.warning_log[0] self.assertEqual(warnsize, (1024 * 1024) - i) def testEven(self): for i in [1, 2, 7, 512, 1000, 1023]: lu = _FakeLU() result = instance_storage._DiskSizeInBytesToMebibytes(lu, i * 1024 * 1024) self.assertEqual(result, i) self.assertFalse(lu.warning_log) def testLargeNumber(self): for i in [1, 2, 7, 512, 1000, 1023, 2724, 12420]: for j in [1, 2, 486, 326, 986, 1023]: lu = _FakeLU() size = (1024 * 1024 * i) + j result = instance_storage._DiskSizeInBytesToMebibytes(lu, size) self.assertEqual(result, i + 1, msg="Amount was not rounded up") self.assertEqual(len(lu.warning_log), 1) self.assertEqual(len(lu.warning_log[0]), 2) (_, (warnsize, )) = lu.warning_log[0] self.assertEqual(warnsize, (1024 * 1024) - j) class TestCopyLockList(unittest.TestCase): def test(self): self.assertEqual(instance.CopyLockList([]), []) self.assertEqual(instance.CopyLockList(None), None) self.assertEqual(instance.CopyLockList(locking.ALL_SET), locking.ALL_SET) names = ["foo", "bar"] output = instance.CopyLockList(names) self.assertEqual(names, output) self.assertNotEqual(id(names), id(output), msg="List was not copied") class TestCheckOpportunisticLocking(unittest.TestCase): class OpTest(opcodes.OpCode): OP_PARAMS = [ opcodes._POpportunisticLocking, opcodes._PIAllocFromDesc(""), ] @classmethod def _MakeOp(cls, **kwargs): op = cls.OpTest(**kwargs) op.Validate(True) return op def testMissingAttributes(self): self.assertRaises(AttributeError, instance._CheckOpportunisticLocking, object()) def testDefaults(self): op = self._MakeOp() instance._CheckOpportunisticLocking(op) def test(self): for iallocator in [None, "something", "other"]: for opplock in [False, True]: op = self._MakeOp(iallocator=iallocator, opportunistic_locking=opplock) if opplock and not iallocator: self.assertRaises(errors.OpPrereqError, instance._CheckOpportunisticLocking, op) else: instance._CheckOpportunisticLocking(op) class _OpTestVerifyErrors(opcodes.OpCode): OP_PARAMS = [ opcodes._PDebugSimulateErrors, opcodes._PErrorCodes, opcodes._PIgnoreErrors, ] class _LuTestVerifyErrors(cluster._VerifyErrors): def __init__(self, **kwargs): cluster._VerifyErrors.__init__(self) self.op = _OpTestVerifyErrors(**kwargs) self.op.Validate(True) self.msglist = [] self._feedback_fn = self.msglist.append self.bad = False def DispatchCallError(self, which, *args, **kwargs): if which: self._Error(*args, **kwargs) else: self._ErrorIf(True, *args, **kwargs) def CallErrorIf(self, c, *args, **kwargs): self._ErrorIf(c, *args, **kwargs) class TestVerifyErrors(unittest.TestCase): # Fake cluster-verify error code structures; we use two arbitary real error # codes to pass validation of ignore_errors (_, _ERR1ID, _) = constants.CV_ECLUSTERCFG _NODESTR = "node" _NODENAME = "mynode" _ERR1CODE = (_NODESTR, _ERR1ID, "Error one") (_, _ERR2ID, _) = constants.CV_ECLUSTERCERT _INSTSTR = "instance" _INSTNAME = "myinstance" _ERR2CODE = (_INSTSTR, _ERR2ID, "Error two") # Arguments used to call _Error() or _ErrorIf() _ERR1ARGS = (_ERR1CODE, _NODENAME, "Error1 is %s", "an error") _ERR2ARGS = (_ERR2CODE, _INSTNAME, "Error2 has no argument") # Expected error messages _ERR1MSG = _ERR1ARGS[2] % _ERR1ARGS[3] _ERR2MSG = _ERR2ARGS[2] def testNoError(self): lu = _LuTestVerifyErrors() lu.CallErrorIf(False, self._ERR1CODE, *self._ERR1ARGS) self.assertFalse(lu.bad) self.assertFalse(lu.msglist) def _InitTest(self, **kwargs): self.lu1 = _LuTestVerifyErrors(**kwargs) self.lu2 = _LuTestVerifyErrors(**kwargs) def _CallError(self, *args, **kwargs): # Check that _Error() and _ErrorIf() produce the same results self.lu1.DispatchCallError(True, *args, **kwargs) self.lu2.DispatchCallError(False, *args, **kwargs) self.assertEqual(self.lu1.bad, self.lu2.bad) self.assertEqual(self.lu1.msglist, self.lu2.msglist) # Test-specific checks are made on one LU return self.lu1 def _checkMsgCommon(self, logstr, errmsg, itype, item, warning): self.assertTrue(errmsg in logstr) if warning: self.assertTrue("WARNING" in logstr) else: self.assertTrue("ERROR" in logstr) self.assertTrue(itype in logstr) self.assertTrue(item in logstr) def _checkMsg1(self, logstr, warning=False): self._checkMsgCommon(logstr, self._ERR1MSG, self._NODESTR, self._NODENAME, warning) def _checkMsg2(self, logstr, warning=False): self._checkMsgCommon(logstr, self._ERR2MSG, self._INSTSTR, self._INSTNAME, warning) def testPlain(self): self._InitTest() lu = self._CallError(*self._ERR1ARGS) self.assertTrue(lu.bad) self.assertEqual(len(lu.msglist), 1) self._checkMsg1(lu.msglist[0]) def testMultiple(self): self._InitTest() self._CallError(*self._ERR1ARGS) lu = self._CallError(*self._ERR2ARGS) self.assertTrue(lu.bad) self.assertEqual(len(lu.msglist), 2) self._checkMsg1(lu.msglist[0]) self._checkMsg2(lu.msglist[1]) def testIgnore(self): self._InitTest(ignore_errors=[self._ERR1ID]) lu = self._CallError(*self._ERR1ARGS) self.assertFalse(lu.bad) self.assertEqual(len(lu.msglist), 1) self._checkMsg1(lu.msglist[0], warning=True) def testWarning(self): self._InitTest() lu = self._CallError(*self._ERR1ARGS, code=_LuTestVerifyErrors.ETYPE_WARNING) self.assertFalse(lu.bad) self.assertEqual(len(lu.msglist), 1) self._checkMsg1(lu.msglist[0], warning=True) def testWarning2(self): self._InitTest() self._CallError(*self._ERR1ARGS) lu = self._CallError(*self._ERR2ARGS, code=_LuTestVerifyErrors.ETYPE_WARNING) self.assertTrue(lu.bad) self.assertEqual(len(lu.msglist), 2) self._checkMsg1(lu.msglist[0]) self._checkMsg2(lu.msglist[1], warning=True) def testDebugSimulate(self): lu = _LuTestVerifyErrors(debug_simulate_errors=True) lu.CallErrorIf(False, *self._ERR1ARGS) self.assertTrue(lu.bad) self.assertEqual(len(lu.msglist), 1) self._checkMsg1(lu.msglist[0]) def testErrCodes(self): self._InitTest(error_codes=True) lu = self._CallError(*self._ERR1ARGS) self.assertTrue(lu.bad) self.assertEqual(len(lu.msglist), 1) self._checkMsg1(lu.msglist[0]) self.assertTrue(self._ERR1ID in lu.msglist[0]) class TestGetUpdatedIPolicy(unittest.TestCase): """Tests for cmdlib._GetUpdatedIPolicy()""" _OLD_CLUSTER_POLICY = { constants.IPOLICY_VCPU_RATIO: 1.5, constants.ISPECS_MINMAX: [ { constants.ISPECS_MIN: { constants.ISPEC_MEM_SIZE: 32768, constants.ISPEC_CPU_COUNT: 8, constants.ISPEC_DISK_COUNT: 1, constants.ISPEC_DISK_SIZE: 1024, constants.ISPEC_NIC_COUNT: 1, constants.ISPEC_SPINDLE_USE: 1, }, constants.ISPECS_MAX: { constants.ISPEC_MEM_SIZE: 65536, constants.ISPEC_CPU_COUNT: 10, constants.ISPEC_DISK_COUNT: 5, constants.ISPEC_DISK_SIZE: 1024 * 1024, constants.ISPEC_NIC_COUNT: 3, constants.ISPEC_SPINDLE_USE: 12, }, }, constants.ISPECS_MINMAX_DEFAULTS, ], constants.ISPECS_STD: constants.IPOLICY_DEFAULTS[constants.ISPECS_STD], } _OLD_GROUP_POLICY = { constants.IPOLICY_SPINDLE_RATIO: 2.5, constants.ISPECS_MINMAX: [{ constants.ISPECS_MIN: { constants.ISPEC_MEM_SIZE: 128, constants.ISPEC_CPU_COUNT: 1, constants.ISPEC_DISK_COUNT: 1, constants.ISPEC_DISK_SIZE: 1024, constants.ISPEC_NIC_COUNT: 1, constants.ISPEC_SPINDLE_USE: 1, }, constants.ISPECS_MAX: { constants.ISPEC_MEM_SIZE: 32768, constants.ISPEC_CPU_COUNT: 8, constants.ISPEC_DISK_COUNT: 5, constants.ISPEC_DISK_SIZE: 1024 * 1024, constants.ISPEC_NIC_COUNT: 3, constants.ISPEC_SPINDLE_USE: 12, }, }], } def _TestSetSpecs(self, old_policy, isgroup): diff_minmax = [{ constants.ISPECS_MIN: { constants.ISPEC_MEM_SIZE: 64, constants.ISPEC_CPU_COUNT: 1, constants.ISPEC_DISK_COUNT: 2, constants.ISPEC_DISK_SIZE: 64, constants.ISPEC_NIC_COUNT: 1, constants.ISPEC_SPINDLE_USE: 1, }, constants.ISPECS_MAX: { constants.ISPEC_MEM_SIZE: 16384, constants.ISPEC_CPU_COUNT: 10, constants.ISPEC_DISK_COUNT: 12, constants.ISPEC_DISK_SIZE: 1024, constants.ISPEC_NIC_COUNT: 9, constants.ISPEC_SPINDLE_USE: 18, }, }] diff_std = { constants.ISPEC_DISK_COUNT: 10, constants.ISPEC_DISK_SIZE: 512, } diff_policy = { constants.ISPECS_MINMAX: diff_minmax } if not isgroup: diff_policy[constants.ISPECS_STD] = diff_std new_policy = common.GetUpdatedIPolicy(old_policy, diff_policy, group_policy=isgroup) self.assertTrue(constants.ISPECS_MINMAX in new_policy) self.assertEqual(new_policy[constants.ISPECS_MINMAX], diff_minmax) for key in old_policy: if not key in diff_policy: self.assertTrue(key in new_policy) self.assertEqual(new_policy[key], old_policy[key]) if not isgroup: new_std = new_policy[constants.ISPECS_STD] for key in diff_std: self.assertTrue(key in new_std) self.assertEqual(new_std[key], diff_std[key]) old_std = old_policy.get(constants.ISPECS_STD, {}) for key in old_std: self.assertTrue(key in new_std) if key not in diff_std: self.assertEqual(new_std[key], old_std[key]) def _TestSet(self, old_policy, diff_policy, isgroup): new_policy = common.GetUpdatedIPolicy(old_policy, diff_policy, group_policy=isgroup) for key in diff_policy: self.assertTrue(key in new_policy) self.assertEqual(new_policy[key], diff_policy[key]) for key in old_policy: if not key in diff_policy: self.assertTrue(key in new_policy) self.assertEqual(new_policy[key], old_policy[key]) def testSet(self): diff_policy = { constants.IPOLICY_VCPU_RATIO: 3, constants.IPOLICY_DTS: [constants.DT_FILE], } self._TestSet(self._OLD_GROUP_POLICY, diff_policy, True) self._TestSetSpecs(self._OLD_GROUP_POLICY, True) self._TestSet({}, diff_policy, True) self._TestSetSpecs({}, True) self._TestSet(self._OLD_CLUSTER_POLICY, diff_policy, False) self._TestSetSpecs(self._OLD_CLUSTER_POLICY, False) def testUnset(self): old_policy = self._OLD_GROUP_POLICY diff_policy = { constants.IPOLICY_SPINDLE_RATIO: constants.VALUE_DEFAULT, } new_policy = common.GetUpdatedIPolicy(old_policy, diff_policy, group_policy=True) for key in diff_policy: self.assertFalse(key in new_policy) for key in old_policy: if not key in diff_policy: self.assertTrue(key in new_policy) self.assertEqual(new_policy[key], old_policy[key]) self.assertRaises(errors.OpPrereqError, common.GetUpdatedIPolicy, old_policy, diff_policy, group_policy=False) def testUnsetEmpty(self): old_policy = {} for key in constants.IPOLICY_ALL_KEYS: diff_policy = { key: constants.VALUE_DEFAULT, } new_policy = common.GetUpdatedIPolicy(old_policy, diff_policy, group_policy=True) self.assertEqual(new_policy, old_policy) def _TestInvalidKeys(self, old_policy, isgroup): INVALID_KEY = "this_key_shouldnt_be_allowed" INVALID_DICT = { INVALID_KEY: 3, } invalid_policy = INVALID_DICT self.assertRaises(errors.OpPrereqError, common.GetUpdatedIPolicy, old_policy, invalid_policy, group_policy=isgroup) invalid_ispecs = { constants.ISPECS_MINMAX: [INVALID_DICT], } self.assertRaises(errors.TypeEnforcementError, common.GetUpdatedIPolicy, old_policy, invalid_ispecs, group_policy=isgroup) if isgroup: invalid_for_group = { constants.ISPECS_STD: constants.IPOLICY_DEFAULTS[constants.ISPECS_STD], } self.assertRaises(errors.OpPrereqError, common.GetUpdatedIPolicy, old_policy, invalid_for_group, group_policy=isgroup) good_ispecs = self._OLD_CLUSTER_POLICY[constants.ISPECS_MINMAX] invalid_ispecs = copy.deepcopy(good_ispecs) invalid_policy = { constants.ISPECS_MINMAX: invalid_ispecs, } for minmax in invalid_ispecs: for key in constants.ISPECS_MINMAX_KEYS: ispec = minmax[key] ispec[INVALID_KEY] = None self.assertRaises(errors.TypeEnforcementError, common.GetUpdatedIPolicy, old_policy, invalid_policy, group_policy=isgroup) del ispec[INVALID_KEY] for par in constants.ISPECS_PARAMETERS: oldv = ispec[par] ispec[par] = "this_is_not_good" self.assertRaises(errors.TypeEnforcementError, common.GetUpdatedIPolicy, old_policy, invalid_policy, group_policy=isgroup) ispec[par] = oldv # This is to make sure that no two errors were present during the tests common.GetUpdatedIPolicy(old_policy, invalid_policy, group_policy=isgroup) def testInvalidKeys(self): self._TestInvalidKeys(self._OLD_GROUP_POLICY, True) self._TestInvalidKeys(self._OLD_CLUSTER_POLICY, False) def testInvalidValues(self): for par in (constants.IPOLICY_PARAMETERS | frozenset([constants.IPOLICY_DTS])): bad_policy = { par: "invalid_value", } self.assertRaises(errors.OpPrereqError, common.GetUpdatedIPolicy, {}, bad_policy, group_policy=True) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.vcluster_unittest.py0000744000000000000000000002124712244641676022052 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.vcluster""" import os import unittest from ganeti import utils from ganeti import compat from ganeti import vcluster from ganeti import pathutils import testutils _ENV_DOES_NOT_EXIST = "GANETI_TEST_DOES_NOT_EXIST" _ENV_TEST = "GANETI_TESTVAR" class _EnvVarTest(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) os.environ.pop(_ENV_DOES_NOT_EXIST, None) os.environ.pop(_ENV_TEST, None) class TestGetRootDirectory(_EnvVarTest): def test(self): assert os.getenv(_ENV_TEST) is None self.assertEqual(vcluster._GetRootDirectory(_ENV_DOES_NOT_EXIST), "") self.assertEqual(vcluster._GetRootDirectory(_ENV_TEST), "") # Absolute path os.environ[_ENV_TEST] = "/tmp/xy11" self.assertEqual(vcluster._GetRootDirectory(_ENV_TEST), "/tmp/xy11") # Relative path os.environ[_ENV_TEST] = "foobar" self.assertRaises(RuntimeError, vcluster._GetRootDirectory, _ENV_TEST) class TestGetHostname(_EnvVarTest): def test(self): assert os.getenv(_ENV_TEST) is None self.assertEqual(vcluster._GetRootDirectory(_ENV_DOES_NOT_EXIST), "") self.assertEqual(vcluster._GetRootDirectory(_ENV_TEST), "") os.environ[_ENV_TEST] = "some.host.example.com" self.assertEqual(vcluster._GetHostname(_ENV_TEST), "some.host.example.com") class TestCheckHostname(_EnvVarTest): def test(self): for i in ["/", "/tmp"]: self.assertRaises(RuntimeError, vcluster._CheckHostname, i) class TestPreparePaths(_EnvVarTest): def testInvalidParameters(self): self.assertRaises(RuntimeError, vcluster._PreparePaths, None, "host.example.com") self.assertRaises(RuntimeError, vcluster._PreparePaths, "/tmp/", "") def testNonNormalizedRootDir(self): self.assertRaises(AssertionError, vcluster._PreparePaths, "/tmp////xyz//", "host.example.com") def testInvalidHostname(self): self.assertRaises(RuntimeError, vcluster._PreparePaths, "/tmp", "/") def testPathHostnameMismatch(self): self.assertRaises(RuntimeError, vcluster._PreparePaths, "/tmp/host.example.com", "server.example.com") def testNoVirtCluster(self): for i in ["", None]: self.assertEqual(vcluster._PreparePaths(i, i), ("", "", None)) def testVirtCluster(self): self.assertEqual(vcluster._PreparePaths("/tmp/host.example.com", "host.example.com"), ("/tmp", "/tmp/host.example.com", "host.example.com")) class TestMakeNodeRoot(unittest.TestCase): def test(self): self.assertRaises(RuntimeError, vcluster.MakeNodeRoot, "/tmp", "/") for i in ["/tmp", "/tmp/", "/tmp///"]: self.assertEqual(vcluster.MakeNodeRoot(i, "other.example.com"), "/tmp/other.example.com") class TestEnvironmentForHost(unittest.TestCase): def test(self): self.assertEqual(vcluster.EnvironmentForHost("host.example.com", _basedir=None), {}) for i in ["host.example.com", "other.example.com"]: self.assertEqual(vcluster.EnvironmentForHost(i, _basedir="/tmp"), { vcluster._ROOTDIR_ENVNAME: "/tmp/%s" % i, vcluster._HOSTNAME_ENVNAME: i, }) class TestExchangeNodeRoot(unittest.TestCase): def test(self): result = vcluster.ExchangeNodeRoot("node1.example.com", "/tmp/file", _basedir=None, _noderoot=None) self.assertEqual(result, "/tmp/file") self.assertRaises(RuntimeError, vcluster.ExchangeNodeRoot, "node1.example.com", "/tmp/node1.example.com", _basedir="/tmp", _noderoot="/tmp/nodeZZ.example.com") result = vcluster.ExchangeNodeRoot("node2.example.com", "/tmp/node1.example.com/file", _basedir="/tmp", _noderoot="/tmp/node1.example.com") self.assertEqual(result, "/tmp/node2.example.com/file") class TestAddNodePrefix(unittest.TestCase): def testRelativePath(self): self.assertRaises(AssertionError, vcluster.AddNodePrefix, "foobar", _noderoot=None) def testRelativeNodeRoot(self): self.assertRaises(AssertionError, vcluster.AddNodePrefix, "/tmp", _noderoot="foobar") def test(self): path = vcluster.AddNodePrefix("/file/path", _noderoot="/tmp/node1.example.com/") self.assertEqual(path, "/tmp/node1.example.com/file/path") self.assertEqual(vcluster.AddNodePrefix("/file/path", _noderoot=""), "/file/path") class TestRemoveNodePrefix(unittest.TestCase): def testRelativePath(self): self.assertRaises(AssertionError, vcluster._RemoveNodePrefix, "foobar", _noderoot=None) def testOutsideNodeRoot(self): self.assertRaises(RuntimeError, vcluster._RemoveNodePrefix, "/file/path", _noderoot="/tmp/node1.example.com") self.assertRaises(RuntimeError, vcluster._RemoveNodePrefix, "/tmp/xyzfile", _noderoot="/tmp/xyz") def test(self): path = vcluster._RemoveNodePrefix("/tmp/node1.example.com/file/path", _noderoot="/tmp/node1.example.com") self.assertEqual(path, "/file/path") path = vcluster._RemoveNodePrefix("/file/path", _noderoot=None) self.assertEqual(path, "/file/path") class TestMakeVirtualPath(unittest.TestCase): def testRelativePath(self): self.assertRaises(AssertionError, vcluster.MakeVirtualPath, "foobar", _noderoot=None) def testOutsideNodeRoot(self): self.assertRaises(RuntimeError, vcluster.MakeVirtualPath, "/file/path", _noderoot="/tmp/node1.example.com") def testWithNodeRoot(self): path = vcluster.MakeVirtualPath("/tmp/node1.example.com/tmp/file", _noderoot="/tmp/node1.example.com") self.assertEqual(path, "%s/tmp/file" % vcluster._VIRT_PATH_PREFIX) def testNormal(self): self.assertEqual(vcluster.MakeVirtualPath("/tmp/file", _noderoot=None), "/tmp/file") def testWhitelisted(self): mvp = vcluster.MakeVirtualPath for path in vcluster._VPATH_WHITELIST: self.assertEqual(mvp(path), path) self.assertEqual(mvp(path, _noderoot=None), path) self.assertEqual(mvp(path, _noderoot="/tmp"), path) class TestLocalizeVirtualPath(unittest.TestCase): def testWrongPrefix(self): self.assertRaises(RuntimeError, vcluster.LocalizeVirtualPath, "/tmp/some/path", _noderoot="/tmp/node1.example.com") def testCorrectPrefixRelativePath(self): self.assertRaises(AssertionError, vcluster.LocalizeVirtualPath, vcluster._VIRT_PATH_PREFIX + "foobar", _noderoot="/tmp/node1.example.com") def testWithNodeRoot(self): lvp = vcluster.LocalizeVirtualPath virtpath1 = "%s/tmp/file" % vcluster._VIRT_PATH_PREFIX virtpath2 = "%s////tmp////file" % vcluster._VIRT_PATH_PREFIX for i in [virtpath1, virtpath2]: result = lvp(i, _noderoot="/tmp/node1.example.com") self.assertEqual(result, "/tmp/node1.example.com/tmp/file") def testNormal(self): self.assertEqual(vcluster.LocalizeVirtualPath("/tmp/file", _noderoot=None), "/tmp/file") def testWhitelisted(self): lvp = vcluster.LocalizeVirtualPath for path in vcluster._VPATH_WHITELIST: self.assertEqual(lvp(path), path) self.assertEqual(lvp(path, _noderoot=None), path) self.assertEqual(lvp(path, _noderoot="/tmp"), path) class TestVirtualPathPrefix(unittest.TestCase): def test(self): self.assertTrue(os.path.isabs(vcluster._VIRT_PATH_PREFIX)) self.assertEqual(os.path.normcase(vcluster._VIRT_PATH_PREFIX), vcluster._VIRT_PATH_PREFIX) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.utils.nodesetup_unittest.py0000744000000000000000000000772312244641676023353 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.utils.nodesetup""" import os import tempfile import unittest from ganeti import constants from ganeti import utils import testutils class TestEtcHosts(testutils.GanetiTestCase): """Test functions modifying /etc/hosts""" def setUp(self): testutils.GanetiTestCase.setUp(self) self.tmpname = self._CreateTempFile() handle = open(self.tmpname, "w") try: handle.write("# This is a test file for /etc/hosts\n") handle.write("127.0.0.1\tlocalhost\n") handle.write("192.0.2.1 router gw\n") finally: handle.close() os.chmod(self.tmpname, 0644) def testSettingNewIp(self): utils.SetEtcHostsEntry(self.tmpname, "198.51.100.4", "myhost.example.com", ["myhost"]) self.assertFileContent(self.tmpname, "# This is a test file for /etc/hosts\n" "127.0.0.1\tlocalhost\n" "192.0.2.1 router gw\n" "198.51.100.4\tmyhost.example.com myhost\n") self.assertFileMode(self.tmpname, 0644) def testSettingExistingIp(self): utils.SetEtcHostsEntry(self.tmpname, "192.0.2.1", "myhost.example.com", ["myhost"]) self.assertFileContent(self.tmpname, "# This is a test file for /etc/hosts\n" "127.0.0.1\tlocalhost\n" "192.0.2.1\tmyhost.example.com myhost\n") self.assertFileMode(self.tmpname, 0644) def testSettingDuplicateName(self): utils.SetEtcHostsEntry(self.tmpname, "198.51.100.4", "myhost", ["myhost"]) self.assertFileContent(self.tmpname, "# This is a test file for /etc/hosts\n" "127.0.0.1\tlocalhost\n" "192.0.2.1 router gw\n" "198.51.100.4\tmyhost\n") self.assertFileMode(self.tmpname, 0644) def testSettingOrdering(self): utils.SetEtcHostsEntry(self.tmpname, "127.0.0.1", "localhost.localdomain", ["localhost"]) self.assertFileContent(self.tmpname, "# This is a test file for /etc/hosts\n" "127.0.0.1\tlocalhost.localdomain localhost\n" "192.0.2.1 router gw\n") self.assertFileMode(self.tmpname, 0644) def testRemovingExistingHost(self): utils.RemoveEtcHostsEntry(self.tmpname, "router") self.assertFileContent(self.tmpname, "# This is a test file for /etc/hosts\n" "127.0.0.1\tlocalhost\n" "192.0.2.1 gw\n") self.assertFileMode(self.tmpname, 0644) def testRemovingSingleExistingHost(self): utils.RemoveEtcHostsEntry(self.tmpname, "localhost") self.assertFileContent(self.tmpname, "# This is a test file for /etc/hosts\n" "192.0.2.1 router gw\n") self.assertFileMode(self.tmpname, 0644) def testRemovingNonExistingHost(self): utils.RemoveEtcHostsEntry(self.tmpname, "myhost") self.assertFileContent(self.tmpname, "# This is a test file for /etc/hosts\n" "127.0.0.1\tlocalhost\n" "192.0.2.1 router gw\n") self.assertFileMode(self.tmpname, 0644) def testRemovingAlias(self): utils.RemoveEtcHostsEntry(self.tmpname, "gw") self.assertFileContent(self.tmpname, "# This is a test file for /etc/hosts\n" "127.0.0.1\tlocalhost\n" "192.0.2.1 router\n") self.assertFileMode(self.tmpname, 0644) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.jqueue_unittest.py0000744000000000000000000030044512271422343021465 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.jqueue""" import os import sys import unittest import tempfile import shutil import errno import itertools import random import operator try: # pylint: disable=E0611 from pyinotify import pyinotify except ImportError: import pyinotify from ganeti import constants from ganeti import utils from ganeti import errors from ganeti import jqueue from ganeti import opcodes from ganeti import compat from ganeti import mcpu from ganeti import query from ganeti import workerpool import testutils class _FakeJob: def __init__(self, job_id, status): self.id = job_id self.writable = False self._status = status self._log = [] def SetStatus(self, status): self._status = status def AddLogEntry(self, msg): self._log.append((len(self._log), msg)) def CalcStatus(self): return self._status def GetInfo(self, fields): result = [] for name in fields: if name == "status": result.append(self._status) else: raise Exception("Unknown field") return result def GetLogEntries(self, newer_than): assert newer_than is None or newer_than >= 0 if newer_than is None: return self._log return self._log[newer_than:] class TestJobChangesChecker(unittest.TestCase): def testStatus(self): job = _FakeJob(9094, constants.JOB_STATUS_QUEUED) checker = jqueue._JobChangesChecker(["status"], None, None) self.assertEqual(checker(job), ([constants.JOB_STATUS_QUEUED], [])) job.SetStatus(constants.JOB_STATUS_RUNNING) self.assertEqual(checker(job), ([constants.JOB_STATUS_RUNNING], [])) job.SetStatus(constants.JOB_STATUS_SUCCESS) self.assertEqual(checker(job), ([constants.JOB_STATUS_SUCCESS], [])) # job.id is used by checker self.assertEqual(job.id, 9094) def testStatusWithPrev(self): job = _FakeJob(12807, constants.JOB_STATUS_QUEUED) checker = jqueue._JobChangesChecker(["status"], [constants.JOB_STATUS_QUEUED], None) self.assert_(checker(job) is None) job.SetStatus(constants.JOB_STATUS_RUNNING) self.assertEqual(checker(job), ([constants.JOB_STATUS_RUNNING], [])) def testFinalStatus(self): for status in constants.JOBS_FINALIZED: job = _FakeJob(2178711, status) checker = jqueue._JobChangesChecker(["status"], [status], None) # There won't be any changes in this status, hence it should signal # a change immediately self.assertEqual(checker(job), ([status], [])) def testLog(self): job = _FakeJob(9094, constants.JOB_STATUS_RUNNING) checker = jqueue._JobChangesChecker(["status"], None, None) self.assertEqual(checker(job), ([constants.JOB_STATUS_RUNNING], [])) job.AddLogEntry("Hello World") (job_info, log_entries) = checker(job) self.assertEqual(job_info, [constants.JOB_STATUS_RUNNING]) self.assertEqual(log_entries, [[0, "Hello World"]]) checker2 = jqueue._JobChangesChecker(["status"], job_info, len(log_entries)) self.assert_(checker2(job) is None) job.AddLogEntry("Foo Bar") job.SetStatus(constants.JOB_STATUS_ERROR) (job_info, log_entries) = checker2(job) self.assertEqual(job_info, [constants.JOB_STATUS_ERROR]) self.assertEqual(log_entries, [[1, "Foo Bar"]]) checker3 = jqueue._JobChangesChecker(["status"], None, None) (job_info, log_entries) = checker3(job) self.assertEqual(job_info, [constants.JOB_STATUS_ERROR]) self.assertEqual(log_entries, [[0, "Hello World"], [1, "Foo Bar"]]) class TestJobChangesWaiter(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() self.filename = utils.PathJoin(self.tmpdir, "job-1") utils.WriteFile(self.filename, data="") def tearDown(self): shutil.rmtree(self.tmpdir) def _EnsureNotifierClosed(self, notifier): try: os.fstat(notifier._fd) except EnvironmentError, err: self.assertEqual(err.errno, errno.EBADF) else: self.fail("File descriptor wasn't closed") def testClose(self): for wait in [False, True]: waiter = jqueue._JobFileChangesWaiter(self.filename) try: if wait: waiter.Wait(0.001) finally: waiter.Close() # Ensure file descriptor was closed self._EnsureNotifierClosed(waiter._notifier) def testChangingFile(self): waiter = jqueue._JobFileChangesWaiter(self.filename) try: self.assertFalse(waiter.Wait(0.1)) utils.WriteFile(self.filename, data="changed") self.assert_(waiter.Wait(60)) finally: waiter.Close() self._EnsureNotifierClosed(waiter._notifier) def testChangingFile2(self): waiter = jqueue._JobChangesWaiter(self.filename) try: self.assertFalse(waiter._filewaiter) self.assert_(waiter.Wait(0.1)) self.assert_(waiter._filewaiter) # File waiter is now used, but there have been no changes self.assertFalse(waiter.Wait(0.1)) utils.WriteFile(self.filename, data="changed") self.assert_(waiter.Wait(60)) finally: waiter.Close() self._EnsureNotifierClosed(waiter._filewaiter._notifier) class _FailingWatchManager(pyinotify.WatchManager): """Subclass of L{pyinotify.WatchManager} which always fails to register. """ def add_watch(self, filename, mask): assert mask == (pyinotify.EventsCodes.ALL_FLAGS["IN_MODIFY"] | pyinotify.EventsCodes.ALL_FLAGS["IN_IGNORED"]) return { filename: -1, } class TestWaitForJobChangesHelper(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() self.filename = utils.PathJoin(self.tmpdir, "job-2614226563") utils.WriteFile(self.filename, data="") def tearDown(self): shutil.rmtree(self.tmpdir) def _LoadWaitingJob(self): return _FakeJob(2614226563, constants.JOB_STATUS_WAITING) def _LoadLostJob(self): return None def testNoChanges(self): wfjc = jqueue._WaitForJobChangesHelper() # No change self.assertEqual(wfjc(self.filename, self._LoadWaitingJob, ["status"], [constants.JOB_STATUS_WAITING], None, 0.1), constants.JOB_NOTCHANGED) # No previous information self.assertEqual(wfjc(self.filename, self._LoadWaitingJob, ["status"], None, None, 1.0), ([constants.JOB_STATUS_WAITING], [])) def testLostJob(self): wfjc = jqueue._WaitForJobChangesHelper() self.assert_(wfjc(self.filename, self._LoadLostJob, ["status"], None, None, 1.0) is None) def testNonExistentFile(self): wfjc = jqueue._WaitForJobChangesHelper() filename = utils.PathJoin(self.tmpdir, "does-not-exist") self.assertFalse(os.path.exists(filename)) result = wfjc(filename, self._LoadLostJob, ["status"], None, None, 1.0, _waiter_cls=compat.partial(jqueue._JobChangesWaiter, _waiter_cls=NotImplemented)) self.assertTrue(result is None) def testInotifyError(self): jobfile_waiter_cls = \ compat.partial(jqueue._JobFileChangesWaiter, _inotify_wm_cls=_FailingWatchManager) jobchange_waiter_cls = \ compat.partial(jqueue._JobChangesWaiter, _waiter_cls=jobfile_waiter_cls) wfjc = jqueue._WaitForJobChangesHelper() # Test if failing to watch a job file (e.g. due to # fs.inotify.max_user_watches being too low) raises errors.InotifyError self.assertRaises(errors.InotifyError, wfjc, self.filename, self._LoadWaitingJob, ["status"], [constants.JOB_STATUS_WAITING], None, 1.0, _waiter_cls=jobchange_waiter_cls) class TestEncodeOpError(unittest.TestCase): def test(self): encerr = jqueue._EncodeOpError(errors.LockError("Test 1")) self.assert_(isinstance(encerr, tuple)) self.assertRaises(errors.LockError, errors.MaybeRaise, encerr) encerr = jqueue._EncodeOpError(errors.GenericError("Test 2")) self.assert_(isinstance(encerr, tuple)) self.assertRaises(errors.GenericError, errors.MaybeRaise, encerr) encerr = jqueue._EncodeOpError(NotImplementedError("Foo")) self.assert_(isinstance(encerr, tuple)) self.assertRaises(errors.OpExecError, errors.MaybeRaise, encerr) encerr = jqueue._EncodeOpError("Hello World") self.assert_(isinstance(encerr, tuple)) self.assertRaises(errors.OpExecError, errors.MaybeRaise, encerr) class TestQueuedOpCode(unittest.TestCase): def testDefaults(self): def _Check(op): self.assertFalse(hasattr(op.input, "dry_run")) self.assertEqual(op.priority, constants.OP_PRIO_DEFAULT) self.assertFalse(op.log) self.assert_(op.start_timestamp is None) self.assert_(op.exec_timestamp is None) self.assert_(op.end_timestamp is None) self.assert_(op.result is None) self.assertEqual(op.status, constants.OP_STATUS_QUEUED) op1 = jqueue._QueuedOpCode(opcodes.OpTestDelay()) _Check(op1) op2 = jqueue._QueuedOpCode.Restore(op1.Serialize()) _Check(op2) self.assertEqual(op1.Serialize(), op2.Serialize()) def testPriority(self): def _Check(op): assert constants.OP_PRIO_DEFAULT != constants.OP_PRIO_HIGH, \ "Default priority equals high priority; test can't work" self.assertEqual(op.priority, constants.OP_PRIO_HIGH) self.assertEqual(op.status, constants.OP_STATUS_QUEUED) inpop = opcodes.OpTagsGet(priority=constants.OP_PRIO_HIGH) op1 = jqueue._QueuedOpCode(inpop) _Check(op1) op2 = jqueue._QueuedOpCode.Restore(op1.Serialize()) _Check(op2) self.assertEqual(op1.Serialize(), op2.Serialize()) class TestQueuedJob(unittest.TestCase): def testNoOpCodes(self): self.assertRaises(errors.GenericError, jqueue._QueuedJob, None, 1, [], False) def testDefaults(self): job_id = 4260 ops = [ opcodes.OpTagsGet(), opcodes.OpTestDelay(), ] def _Check(job): self.assertTrue(job.writable) self.assertEqual(job.id, job_id) self.assertEqual(job.log_serial, 0) self.assert_(job.received_timestamp) self.assert_(job.start_timestamp is None) self.assert_(job.end_timestamp is None) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT) self.assert_(repr(job).startswith("<")) self.assertEqual(len(job.ops), len(ops)) self.assert_(compat.all(inp.__getstate__() == op.input.__getstate__() for (inp, op) in zip(ops, job.ops))) self.assertRaises(errors.OpPrereqError, job.GetInfo, ["unknown-field"]) self.assertEqual(job.GetInfo(["summary"]), [[op.input.Summary() for op in job.ops]]) self.assertFalse(job.archived) job1 = jqueue._QueuedJob(None, job_id, ops, True) _Check(job1) job2 = jqueue._QueuedJob.Restore(None, job1.Serialize(), True, False) _Check(job2) self.assertEqual(job1.Serialize(), job2.Serialize()) def testWritable(self): job = jqueue._QueuedJob(None, 1, [opcodes.OpTestDelay()], False) self.assertFalse(job.writable) job = jqueue._QueuedJob(None, 1, [opcodes.OpTestDelay()], True) self.assertTrue(job.writable) def testArchived(self): job = jqueue._QueuedJob(None, 1, [opcodes.OpTestDelay()], False) self.assertFalse(job.archived) newjob = jqueue._QueuedJob.Restore(None, job.Serialize(), True, True) self.assertTrue(newjob.archived) newjob2 = jqueue._QueuedJob.Restore(None, newjob.Serialize(), True, False) self.assertFalse(newjob2.archived) def testPriority(self): job_id = 4283 ops = [ opcodes.OpTagsGet(priority=constants.OP_PRIO_DEFAULT), opcodes.OpTestDelay(), ] def _Check(job): self.assertEqual(job.id, job_id) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assert_(repr(job).startswith("<")) job = jqueue._QueuedJob(None, job_id, ops, True) _Check(job) self.assert_(compat.all(op.priority == constants.OP_PRIO_DEFAULT for op in job.ops)) self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT) # Increase first job.ops[0].priority -= 1 _Check(job) self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT - 1) # Mark opcode as finished job.ops[0].status = constants.OP_STATUS_SUCCESS _Check(job) self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT) # Increase second job.ops[1].priority -= 10 self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT - 10) # Test increasing first job.ops[0].status = constants.OP_STATUS_RUNNING job.ops[0].priority -= 19 self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT - 20) def _JobForPriority(self, job_id): ops = [ opcodes.OpTagsGet(), opcodes.OpTestDelay(), opcodes.OpTagsGet(), opcodes.OpTestDelay(), ] job = jqueue._QueuedJob(None, job_id, ops, True) self.assertTrue(compat.all(op.priority == constants.OP_PRIO_DEFAULT for op in job.ops)) self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT) self.assertFalse(compat.any(hasattr(op.input, "priority") for op in job.ops)) return job def testChangePriorityAllQueued(self): job = self._JobForPriority(24984) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assertTrue(compat.all(op.status == constants.OP_STATUS_QUEUED for op in job.ops)) result = job.ChangePriority(-10) self.assertEqual(job.CalcPriority(), -10) self.assertTrue(compat.all(op.priority == -10 for op in job.ops)) self.assertFalse(compat.any(hasattr(op.input, "priority") for op in job.ops)) self.assertEqual(result, (True, ("Priorities of pending opcodes for job 24984 have" " been changed to -10"))) def testChangePriorityAllFinished(self): job = self._JobForPriority(16405) for (idx, op) in enumerate(job.ops): if idx > 2: op.status = constants.OP_STATUS_ERROR else: op.status = constants.OP_STATUS_SUCCESS self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_ERROR) self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT) result = job.ChangePriority(-10) self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT) self.assertTrue(compat.all(op.priority == constants.OP_PRIO_DEFAULT for op in job.ops)) self.assertFalse(compat.any(hasattr(op.input, "priority") for op in job.ops)) self.assertEqual(map(operator.attrgetter("status"), job.ops), [ constants.OP_STATUS_SUCCESS, constants.OP_STATUS_SUCCESS, constants.OP_STATUS_SUCCESS, constants.OP_STATUS_ERROR, ]) self.assertEqual(result, (False, "Job 16405 is finished")) def testChangePriorityCancelling(self): job = self._JobForPriority(31572) for (idx, op) in enumerate(job.ops): if idx > 1: op.status = constants.OP_STATUS_CANCELING else: op.status = constants.OP_STATUS_SUCCESS self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_CANCELING) self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT) result = job.ChangePriority(5) self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT) self.assertTrue(compat.all(op.priority == constants.OP_PRIO_DEFAULT for op in job.ops)) self.assertFalse(compat.any(hasattr(op.input, "priority") for op in job.ops)) self.assertEqual(map(operator.attrgetter("status"), job.ops), [ constants.OP_STATUS_SUCCESS, constants.OP_STATUS_SUCCESS, constants.OP_STATUS_CANCELING, constants.OP_STATUS_CANCELING, ]) self.assertEqual(result, (False, "Job 31572 is cancelling")) def testChangePriorityFirstRunning(self): job = self._JobForPriority(1716215889) for (idx, op) in enumerate(job.ops): if idx == 0: op.status = constants.OP_STATUS_RUNNING else: op.status = constants.OP_STATUS_QUEUED self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_RUNNING) self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT) result = job.ChangePriority(7) self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT) self.assertEqual(map(operator.attrgetter("priority"), job.ops), [constants.OP_PRIO_DEFAULT, 7, 7, 7]) self.assertFalse(compat.any(hasattr(op.input, "priority") for op in job.ops)) self.assertEqual(map(operator.attrgetter("status"), job.ops), [ constants.OP_STATUS_RUNNING, constants.OP_STATUS_QUEUED, constants.OP_STATUS_QUEUED, constants.OP_STATUS_QUEUED, ]) self.assertEqual(result, (True, ("Priorities of pending opcodes for job" " 1716215889 have been changed to 7"))) def testChangePriorityLastRunning(self): job = self._JobForPriority(1308) for (idx, op) in enumerate(job.ops): if idx == (len(job.ops) - 1): op.status = constants.OP_STATUS_RUNNING else: op.status = constants.OP_STATUS_SUCCESS self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_RUNNING) self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT) result = job.ChangePriority(-3) self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT) self.assertTrue(compat.all(op.priority == constants.OP_PRIO_DEFAULT for op in job.ops)) self.assertFalse(compat.any(hasattr(op.input, "priority") for op in job.ops)) self.assertEqual(map(operator.attrgetter("status"), job.ops), [ constants.OP_STATUS_SUCCESS, constants.OP_STATUS_SUCCESS, constants.OP_STATUS_SUCCESS, constants.OP_STATUS_RUNNING, ]) self.assertEqual(result, (False, "Job 1308 had no pending opcodes")) def testChangePrioritySecondOpcodeRunning(self): job = self._JobForPriority(27701) self.assertEqual(len(job.ops), 4) job.ops[0].status = constants.OP_STATUS_SUCCESS job.ops[1].status = constants.OP_STATUS_RUNNING job.ops[2].status = constants.OP_STATUS_QUEUED job.ops[3].status = constants.OP_STATUS_QUEUED self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_RUNNING) result = job.ChangePriority(-19) self.assertEqual(job.CalcPriority(), -19) self.assertEqual(map(operator.attrgetter("priority"), job.ops), [constants.OP_PRIO_DEFAULT, constants.OP_PRIO_DEFAULT, -19, -19]) self.assertFalse(compat.any(hasattr(op.input, "priority") for op in job.ops)) self.assertEqual(map(operator.attrgetter("status"), job.ops), [ constants.OP_STATUS_SUCCESS, constants.OP_STATUS_RUNNING, constants.OP_STATUS_QUEUED, constants.OP_STATUS_QUEUED, ]) self.assertEqual(result, (True, ("Priorities of pending opcodes for job" " 27701 have been changed to -19"))) def testChangePriorityWithInconsistentJob(self): job = self._JobForPriority(30097) self.assertEqual(len(job.ops), 4) # This job is invalid (as it has two opcodes marked as running) and make # the call fail because an unprocessed opcode precedes a running one (which # should never happen in reality) job.ops[0].status = constants.OP_STATUS_SUCCESS job.ops[1].status = constants.OP_STATUS_RUNNING job.ops[2].status = constants.OP_STATUS_QUEUED job.ops[3].status = constants.OP_STATUS_RUNNING self.assertRaises(AssertionError, job.ChangePriority, 19) def testCalcStatus(self): def _Queued(ops): # The default status is "queued" self.assert_(compat.all(op.status == constants.OP_STATUS_QUEUED for op in ops)) def _Waitlock1(ops): ops[0].status = constants.OP_STATUS_WAITING def _Waitlock2(ops): ops[0].status = constants.OP_STATUS_SUCCESS ops[1].status = constants.OP_STATUS_SUCCESS ops[2].status = constants.OP_STATUS_WAITING def _Running(ops): ops[0].status = constants.OP_STATUS_SUCCESS ops[1].status = constants.OP_STATUS_RUNNING for op in ops[2:]: op.status = constants.OP_STATUS_QUEUED def _Canceling1(ops): ops[0].status = constants.OP_STATUS_SUCCESS ops[1].status = constants.OP_STATUS_SUCCESS for op in ops[2:]: op.status = constants.OP_STATUS_CANCELING def _Canceling2(ops): for op in ops: op.status = constants.OP_STATUS_CANCELING def _Canceled(ops): for op in ops: op.status = constants.OP_STATUS_CANCELED def _Error1(ops): for idx, op in enumerate(ops): if idx > 3: op.status = constants.OP_STATUS_ERROR else: op.status = constants.OP_STATUS_SUCCESS def _Error2(ops): for op in ops: op.status = constants.OP_STATUS_ERROR def _Success(ops): for op in ops: op.status = constants.OP_STATUS_SUCCESS tests = { constants.JOB_STATUS_QUEUED: [_Queued], constants.JOB_STATUS_WAITING: [_Waitlock1, _Waitlock2], constants.JOB_STATUS_RUNNING: [_Running], constants.JOB_STATUS_CANCELING: [_Canceling1, _Canceling2], constants.JOB_STATUS_CANCELED: [_Canceled], constants.JOB_STATUS_ERROR: [_Error1, _Error2], constants.JOB_STATUS_SUCCESS: [_Success], } def _NewJob(): job = jqueue._QueuedJob(None, 1, [opcodes.OpTestDelay() for _ in range(10)], True) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assert_(compat.all(op.status == constants.OP_STATUS_QUEUED for op in job.ops)) return job for status in constants.JOB_STATUS_ALL: sttests = tests[status] assert sttests for fn in sttests: job = _NewJob() fn(job.ops) self.assertEqual(job.CalcStatus(), status) class _FakeDependencyManager: def __init__(self): self._checks = [] self._notifications = [] self._waiting = set() def AddCheckResult(self, job, dep_job_id, dep_status, result): self._checks.append((job, dep_job_id, dep_status, result)) def CountPendingResults(self): return len(self._checks) def CountWaitingJobs(self): return len(self._waiting) def GetNextNotification(self): return self._notifications.pop(0) def JobWaiting(self, job): return job in self._waiting def CheckAndRegister(self, job, dep_job_id, dep_status): (exp_job, exp_dep_job_id, exp_dep_status, result) = self._checks.pop(0) assert exp_job == job assert exp_dep_job_id == dep_job_id assert exp_dep_status == dep_status (result_status, _) = result if result_status == jqueue._JobDependencyManager.WAIT: self._waiting.add(job) elif result_status == jqueue._JobDependencyManager.CONTINUE: self._waiting.remove(job) return result def NotifyWaiters(self, job_id): self._notifications.append(job_id) class _DisabledFakeDependencyManager: def JobWaiting(self, _): return False def CheckAndRegister(self, *args): assert False, "Should not be called" def NotifyWaiters(self, _): pass class _FakeQueueForProc: def __init__(self, depmgr=None): self._acquired = False self._updates = [] self._submitted = [] self._accepting_jobs = True self._submit_count = itertools.count(1000) if depmgr: self.depmgr = depmgr else: self.depmgr = _DisabledFakeDependencyManager() def IsAcquired(self): return self._acquired def GetNextUpdate(self): return self._updates.pop(0) def GetNextSubmittedJob(self): return self._submitted.pop(0) def acquire(self, shared=0): assert shared == 1 self._acquired = True def release(self): assert self._acquired self._acquired = False def UpdateJobUnlocked(self, job, replicate=True): assert self._acquired, "Lock not acquired while updating job" self._updates.append((job, bool(replicate))) def SubmitManyJobs(self, jobs): assert not self._acquired, "Lock acquired while submitting jobs" job_ids = [self._submit_count.next() for _ in jobs] self._submitted.extend(zip(job_ids, jobs)) return job_ids def StopAcceptingJobs(self): self._accepting_jobs = False def AcceptingJobsUnlocked(self): return self._accepting_jobs class _FakeExecOpCodeForProc: def __init__(self, queue, before_start, after_start): self._queue = queue self._before_start = before_start self._after_start = after_start def __call__(self, op, cbs, timeout=None): assert isinstance(op, opcodes.OpTestDummy) assert not self._queue.IsAcquired(), \ "Queue lock not released when executing opcode" if self._before_start: self._before_start(timeout, cbs.CurrentPriority()) cbs.NotifyStart() if self._after_start: self._after_start(op, cbs) # Check again after the callbacks assert not self._queue.IsAcquired() if op.fail: raise errors.OpExecError("Error requested (%s)" % op.result) if hasattr(op, "submit_jobs") and op.submit_jobs is not None: return cbs.SubmitManyJobs(op.submit_jobs) return op.result class _JobProcessorTestUtils: def _CreateJob(self, queue, job_id, ops): job = jqueue._QueuedJob(queue, job_id, ops, True) self.assertFalse(job.start_timestamp) self.assertFalse(job.end_timestamp) self.assertEqual(len(ops), len(job.ops)) self.assert_(compat.all(op.input == inp for (op, inp) in zip(job.ops, ops))) self.assertEqual(job.GetInfo(["ops"]), [[op.__getstate__() for op in ops]]) return job class TestJobProcessor(unittest.TestCase, _JobProcessorTestUtils): def _GenericCheckJob(self, job): assert compat.all(isinstance(op.input, opcodes.OpTestDummy) for op in job.ops) self.assertEqual(job.GetInfo(["opstart", "opexec", "opend"]), [[op.start_timestamp for op in job.ops], [op.exec_timestamp for op in job.ops], [op.end_timestamp for op in job.ops]]) self.assertEqual(job.GetInfo(["received_ts", "start_ts", "end_ts"]), [job.received_timestamp, job.start_timestamp, job.end_timestamp]) self.assert_(job.start_timestamp) self.assert_(job.end_timestamp) self.assertEqual(job.start_timestamp, job.ops[0].start_timestamp) def testSuccess(self): queue = _FakeQueueForProc() for (job_id, opcount) in [(25351, 1), (6637, 3), (24644, 10), (32207, 100)]: ops = [opcodes.OpTestDummy(result="Res%s" % i, fail=False) for i in range(opcount)] # Create job job = self._CreateJob(queue, job_id, ops) def _BeforeStart(timeout, priority): self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) self.assertFalse(job.cur_opctx) def _AfterStart(op, cbs): self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_RUNNING) self.assertFalse(job.cur_opctx) # Job is running, cancelling shouldn't be possible (success, _) = job.Cancel() self.assertFalse(success) opexec = _FakeExecOpCodeForProc(queue, _BeforeStart, _AfterStart) for idx in range(len(ops)): self.assertRaises(IndexError, queue.GetNextUpdate) result = jqueue._JobProcessor(queue, opexec, job)() self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) if idx == len(ops) - 1: # Last opcode self.assertEqual(result, jqueue._JobProcessor.FINISHED) else: self.assertEqual(result, jqueue._JobProcessor.DEFER) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assert_(job.start_timestamp) self.assertFalse(job.end_timestamp) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_SUCCESS) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_SUCCESS]) self.assertEqual(job.GetInfo(["opresult"]), [[op.input.result for op in job.ops]]) self.assertEqual(job.GetInfo(["opstatus"]), [len(job.ops) * [constants.OP_STATUS_SUCCESS]]) self.assert_(compat.all(op.start_timestamp and op.end_timestamp for op in job.ops)) self._GenericCheckJob(job) # Calling the processor on a finished job should be a no-op self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.FINISHED) self.assertRaises(IndexError, queue.GetNextUpdate) def testOpcodeError(self): queue = _FakeQueueForProc() testdata = [ (17077, 1, 0, 0), (1782, 5, 2, 2), (18179, 10, 9, 9), (4744, 10, 3, 8), (23816, 100, 39, 45), ] for (job_id, opcount, failfrom, failto) in testdata: # Prepare opcodes ops = [opcodes.OpTestDummy(result="Res%s" % i, fail=(failfrom <= i and i <= failto)) for i in range(opcount)] # Create job job = self._CreateJob(queue, str(job_id), ops) opexec = _FakeExecOpCodeForProc(queue, None, None) for idx in range(len(ops)): self.assertRaises(IndexError, queue.GetNextUpdate) result = jqueue._JobProcessor(queue, opexec, job)() # queued to waitlock self.assertEqual(queue.GetNextUpdate(), (job, True)) # waitlock to running self.assertEqual(queue.GetNextUpdate(), (job, True)) # Opcode result self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) if idx in (failfrom, len(ops) - 1): # Last opcode self.assertEqual(result, jqueue._JobProcessor.FINISHED) break self.assertEqual(result, jqueue._JobProcessor.DEFER) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assertRaises(IndexError, queue.GetNextUpdate) # Check job status self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_ERROR) self.assertEqual(job.GetInfo(["id"]), [job_id]) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_ERROR]) # Check opcode status data = zip(job.ops, job.GetInfo(["opstatus"])[0], job.GetInfo(["opresult"])[0]) for idx, (op, opstatus, opresult) in enumerate(data): if idx < failfrom: assert not op.input.fail self.assertEqual(opstatus, constants.OP_STATUS_SUCCESS) self.assertEqual(opresult, op.input.result) elif idx <= failto: assert op.input.fail self.assertEqual(opstatus, constants.OP_STATUS_ERROR) self.assertRaises(errors.OpExecError, errors.MaybeRaise, opresult) else: assert not op.input.fail self.assertEqual(opstatus, constants.OP_STATUS_ERROR) self.assertRaises(errors.OpExecError, errors.MaybeRaise, opresult) self.assert_(compat.all(op.start_timestamp and op.end_timestamp for op in job.ops[:failfrom])) self._GenericCheckJob(job) # Calling the processor on a finished job should be a no-op self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.FINISHED) self.assertRaises(IndexError, queue.GetNextUpdate) def testCancelWhileInQueue(self): queue = _FakeQueueForProc() ops = [opcodes.OpTestDummy(result="Res%s" % i, fail=False) for i in range(5)] # Create job job_id = 17045 job = self._CreateJob(queue, job_id, ops) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) # Mark as cancelled (success, _) = job.Cancel() self.assert_(success) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(job.start_timestamp) self.assertTrue(job.end_timestamp) self.assert_(compat.all(op.status == constants.OP_STATUS_CANCELED for op in job.ops)) # Serialize to check for differences before_proc = job.Serialize() # Simulate processor called in workerpool opexec = _FakeExecOpCodeForProc(queue, None, None) self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.FINISHED) # Check result self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_CANCELED) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_CANCELED]) self.assertFalse(job.start_timestamp) self.assertTrue(job.end_timestamp) self.assertFalse(compat.any(op.start_timestamp or op.end_timestamp for op in job.ops)) self.assertEqual(job.GetInfo(["opstatus", "opresult"]), [[constants.OP_STATUS_CANCELED for _ in job.ops], ["Job canceled by request" for _ in job.ops]]) # Must not have changed or written self.assertEqual(before_proc, job.Serialize()) self.assertRaises(IndexError, queue.GetNextUpdate) def testCancelWhileWaitlockInQueue(self): queue = _FakeQueueForProc() ops = [opcodes.OpTestDummy(result="Res%s" % i, fail=False) for i in range(5)] # Create job job_id = 8645 job = self._CreateJob(queue, job_id, ops) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) job.ops[0].status = constants.OP_STATUS_WAITING assert len(job.ops) == 5 self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) # Mark as cancelling (success, _) = job.Cancel() self.assert_(success) self.assertRaises(IndexError, queue.GetNextUpdate) self.assert_(compat.all(op.status == constants.OP_STATUS_CANCELING for op in job.ops)) opexec = _FakeExecOpCodeForProc(queue, None, None) self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.FINISHED) # Check result self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_CANCELED) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_CANCELED]) self.assertFalse(job.start_timestamp) self.assert_(job.end_timestamp) self.assertFalse(compat.any(op.start_timestamp or op.end_timestamp for op in job.ops)) self.assertEqual(job.GetInfo(["opstatus", "opresult"]), [[constants.OP_STATUS_CANCELED for _ in job.ops], ["Job canceled by request" for _ in job.ops]]) def testCancelWhileWaitlock(self): queue = _FakeQueueForProc() ops = [opcodes.OpTestDummy(result="Res%s" % i, fail=False) for i in range(5)] # Create job job_id = 11009 job = self._CreateJob(queue, job_id, ops) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) def _BeforeStart(timeout, priority): self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) # Mark as cancelled (success, _) = job.Cancel() self.assert_(success) self.assert_(compat.all(op.status == constants.OP_STATUS_CANCELING for op in job.ops)) self.assertRaises(IndexError, queue.GetNextUpdate) def _AfterStart(op, cbs): self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_RUNNING) opexec = _FakeExecOpCodeForProc(queue, _BeforeStart, _AfterStart) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.FINISHED) self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) # Check result self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_CANCELED) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_CANCELED]) self.assert_(job.start_timestamp) self.assert_(job.end_timestamp) self.assertFalse(compat.all(op.start_timestamp and op.end_timestamp for op in job.ops)) self.assertEqual(job.GetInfo(["opstatus", "opresult"]), [[constants.OP_STATUS_CANCELED for _ in job.ops], ["Job canceled by request" for _ in job.ops]]) def _TestCancelWhileSomething(self, cb): queue = _FakeQueueForProc() ops = [opcodes.OpTestDummy(result="Res%s" % i, fail=False) for i in range(5)] # Create job job_id = 24314 job = self._CreateJob(queue, job_id, ops) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) def _BeforeStart(timeout, priority): self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) # Mark as cancelled (success, _) = job.Cancel() self.assert_(success) self.assert_(compat.all(op.status == constants.OP_STATUS_CANCELING for op in job.ops)) cb(queue) def _AfterStart(op, cbs): self.fail("Should not reach this") opexec = _FakeExecOpCodeForProc(queue, _BeforeStart, _AfterStart) self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.FINISHED) # Check result self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_CANCELED) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_CANCELED]) self.assert_(job.start_timestamp) self.assert_(job.end_timestamp) self.assertFalse(compat.all(op.start_timestamp and op.end_timestamp for op in job.ops)) self.assertEqual(job.GetInfo(["opstatus", "opresult"]), [[constants.OP_STATUS_CANCELED for _ in job.ops], ["Job canceled by request" for _ in job.ops]]) return queue def testCancelWhileWaitlockWithTimeout(self): def fn(_): # Fake an acquire attempt timing out raise mcpu.LockAcquireTimeout() self._TestCancelWhileSomething(fn) def testCancelDuringQueueShutdown(self): queue = self._TestCancelWhileSomething(lambda q: q.StopAcceptingJobs()) self.assertFalse(queue.AcceptingJobsUnlocked()) def testCancelWhileRunning(self): # Tests canceling a job with finished opcodes and more, unprocessed ones queue = _FakeQueueForProc() ops = [opcodes.OpTestDummy(result="Res%s" % i, fail=False) for i in range(3)] # Create job job_id = 28492 job = self._CreateJob(queue, job_id, ops) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) opexec = _FakeExecOpCodeForProc(queue, None, None) # Run one opcode self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.DEFER) # Job goes back to queued self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assertEqual(job.GetInfo(["opstatus", "opresult"]), [[constants.OP_STATUS_SUCCESS, constants.OP_STATUS_QUEUED, constants.OP_STATUS_QUEUED], ["Res0", None, None]]) # Mark as cancelled (success, _) = job.Cancel() self.assert_(success) # Try processing another opcode (this will actually cancel the job) self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.FINISHED) # Check result self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_CANCELED) self.assertEqual(job.GetInfo(["id"]), [job_id]) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_CANCELED]) self.assertEqual(job.GetInfo(["opstatus", "opresult"]), [[constants.OP_STATUS_SUCCESS, constants.OP_STATUS_CANCELED, constants.OP_STATUS_CANCELED], ["Res0", "Job canceled by request", "Job canceled by request"]]) def _TestQueueShutdown(self, queue, opexec, job, runcount): self.assertTrue(queue.AcceptingJobsUnlocked()) # Simulate shutdown queue.StopAcceptingJobs() self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.DEFER) # Check result self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_QUEUED]) self.assertFalse(job.cur_opctx) self.assertTrue(job.start_timestamp) self.assertFalse(job.end_timestamp) self.assertEqual(job.start_timestamp, job.ops[0].start_timestamp) self.assertTrue(compat.all(op.start_timestamp and op.end_timestamp for op in job.ops[:runcount])) self.assertFalse(job.ops[runcount].end_timestamp) self.assertFalse(compat.any(op.start_timestamp or op.end_timestamp for op in job.ops[(runcount + 1):])) self.assertEqual(job.GetInfo(["opstatus", "opresult"]), [(([constants.OP_STATUS_SUCCESS] * runcount) + ([constants.OP_STATUS_QUEUED] * (len(job.ops) - runcount))), (["Res%s" % i for i in range(runcount)] + ([None] * (len(job.ops) - runcount)))]) # Must have been written and replicated self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) def testQueueShutdownWhileRunning(self): # Tests shutting down the queue while a job is running queue = _FakeQueueForProc() ops = [opcodes.OpTestDummy(result="Res%s" % i, fail=False) for i in range(3)] # Create job job_id = 2718211587 job = self._CreateJob(queue, job_id, ops) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) opexec = _FakeExecOpCodeForProc(queue, None, None) self.assertRaises(IndexError, queue.GetNextUpdate) # Run one opcode self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.DEFER) # Job goes back to queued self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assertEqual(job.GetInfo(["opstatus", "opresult"]), [[constants.OP_STATUS_SUCCESS, constants.OP_STATUS_QUEUED, constants.OP_STATUS_QUEUED], ["Res0", None, None]]) self.assertFalse(job.cur_opctx) # Writes for waiting, running and result for _ in range(3): self.assertEqual(queue.GetNextUpdate(), (job, True)) # Run second opcode self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.DEFER) # Job goes back to queued self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assertEqual(job.GetInfo(["opstatus", "opresult"]), [[constants.OP_STATUS_SUCCESS, constants.OP_STATUS_SUCCESS, constants.OP_STATUS_QUEUED], ["Res0", "Res1", None]]) self.assertFalse(job.cur_opctx) # Writes for waiting, running and result for _ in range(3): self.assertEqual(queue.GetNextUpdate(), (job, True)) self._TestQueueShutdown(queue, opexec, job, 2) def testQueueShutdownWithLockTimeout(self): # Tests shutting down while a lock acquire times out queue = _FakeQueueForProc() ops = [opcodes.OpTestDummy(result="Res%s" % i, fail=False) for i in range(3)] # Create job job_id = 1304231178 job = self._CreateJob(queue, job_id, ops) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) acquire_timeout = False def _BeforeStart(timeout, priority): self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) if acquire_timeout: raise mcpu.LockAcquireTimeout() opexec = _FakeExecOpCodeForProc(queue, _BeforeStart, None) self.assertRaises(IndexError, queue.GetNextUpdate) # Run one opcode self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.DEFER) # Job goes back to queued self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assertEqual(job.GetInfo(["opstatus", "opresult"]), [[constants.OP_STATUS_SUCCESS, constants.OP_STATUS_QUEUED, constants.OP_STATUS_QUEUED], ["Res0", None, None]]) self.assertFalse(job.cur_opctx) # Writes for waiting, running and result for _ in range(3): self.assertEqual(queue.GetNextUpdate(), (job, True)) # The next opcode should have expiring lock acquires acquire_timeout = True self._TestQueueShutdown(queue, opexec, job, 1) def testQueueShutdownWhileInQueue(self): # This should never happen in reality (no new jobs are started by the # workerpool once a shutdown has been initiated), but it's better to test # the job processor for this scenario queue = _FakeQueueForProc() ops = [opcodes.OpTestDummy(result="Res%s" % i, fail=False) for i in range(5)] # Create job job_id = 2031 job = self._CreateJob(queue, job_id, ops) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(job.start_timestamp) self.assertFalse(job.end_timestamp) self.assertTrue(compat.all(op.status == constants.OP_STATUS_QUEUED for op in job.ops)) opexec = _FakeExecOpCodeForProc(queue, None, None) self._TestQueueShutdown(queue, opexec, job, 0) def testQueueShutdownWhileWaitlockInQueue(self): queue = _FakeQueueForProc() ops = [opcodes.OpTestDummy(result="Res%s" % i, fail=False) for i in range(5)] # Create job job_id = 53125685 job = self._CreateJob(queue, job_id, ops) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) job.ops[0].status = constants.OP_STATUS_WAITING assert len(job.ops) == 5 self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) self.assertRaises(IndexError, queue.GetNextUpdate) opexec = _FakeExecOpCodeForProc(queue, None, None) self._TestQueueShutdown(queue, opexec, job, 0) def testPartiallyRun(self): # Tests calling the processor on a job that's been partially run before the # program was restarted queue = _FakeQueueForProc() opexec = _FakeExecOpCodeForProc(queue, None, None) for job_id, successcount in [(30697, 1), (2552, 4), (12489, 9)]: ops = [opcodes.OpTestDummy(result="Res%s" % i, fail=False) for i in range(10)] # Create job job = self._CreateJob(queue, job_id, ops) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) for _ in range(successcount): self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.DEFER) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assertEqual(job.GetInfo(["opstatus"]), [[constants.OP_STATUS_SUCCESS for _ in range(successcount)] + [constants.OP_STATUS_QUEUED for _ in range(len(ops) - successcount)]]) self.assert_(job.ops_iter) # Serialize and restore (simulates program restart) newjob = jqueue._QueuedJob.Restore(queue, job.Serialize(), True, False) self.assertFalse(newjob.ops_iter) self._TestPartial(newjob, successcount) def _TestPartial(self, job, successcount): self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assertEqual(job.start_timestamp, job.ops[0].start_timestamp) queue = _FakeQueueForProc() opexec = _FakeExecOpCodeForProc(queue, None, None) for remaining in reversed(range(len(job.ops) - successcount)): result = jqueue._JobProcessor(queue, opexec, job)() self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) if remaining == 0: # Last opcode self.assertEqual(result, jqueue._JobProcessor.FINISHED) break self.assertEqual(result, jqueue._JobProcessor.DEFER) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_SUCCESS) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_SUCCESS]) self.assertEqual(job.GetInfo(["opresult"]), [[op.input.result for op in job.ops]]) self.assertEqual(job.GetInfo(["opstatus"]), [[constants.OP_STATUS_SUCCESS for _ in job.ops]]) self.assert_(compat.all(op.start_timestamp and op.end_timestamp for op in job.ops)) self._GenericCheckJob(job) # Calling the processor on a finished job should be a no-op self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.FINISHED) self.assertRaises(IndexError, queue.GetNextUpdate) # ... also after being restored job2 = jqueue._QueuedJob.Restore(queue, job.Serialize(), True, False) # Calling the processor on a finished job should be a no-op self.assertEqual(jqueue._JobProcessor(queue, opexec, job2)(), jqueue._JobProcessor.FINISHED) self.assertRaises(IndexError, queue.GetNextUpdate) def testProcessorOnRunningJob(self): ops = [opcodes.OpTestDummy(result="result", fail=False)] queue = _FakeQueueForProc() opexec = _FakeExecOpCodeForProc(queue, None, None) # Create job job = self._CreateJob(queue, 9571, ops) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) job.ops[0].status = constants.OP_STATUS_RUNNING assert len(job.ops) == 1 self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_RUNNING) # Calling on running job must fail self.assertRaises(errors.ProgrammerError, jqueue._JobProcessor(queue, opexec, job)) def testLogMessages(self): # Tests the "Feedback" callback function queue = _FakeQueueForProc() messages = { 1: [ (None, "Hello"), (None, "World"), (constants.ELOG_MESSAGE, "there"), ], 4: [ (constants.ELOG_JQUEUE_TEST, (1, 2, 3)), (constants.ELOG_JQUEUE_TEST, ("other", "type")), ], } ops = [opcodes.OpTestDummy(result="Logtest%s" % i, fail=False, messages=messages.get(i, [])) for i in range(5)] # Create job job = self._CreateJob(queue, 29386, ops) def _BeforeStart(timeout, priority): self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) def _AfterStart(op, cbs): self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_RUNNING) self.assertRaises(AssertionError, cbs.Feedback, "too", "many", "arguments") for (log_type, msg) in op.messages: self.assertRaises(IndexError, queue.GetNextUpdate) if log_type: cbs.Feedback(log_type, msg) else: cbs.Feedback(msg) # Check for job update without replication self.assertEqual(queue.GetNextUpdate(), (job, False)) self.assertRaises(IndexError, queue.GetNextUpdate) opexec = _FakeExecOpCodeForProc(queue, _BeforeStart, _AfterStart) for remaining in reversed(range(len(job.ops))): self.assertRaises(IndexError, queue.GetNextUpdate) result = jqueue._JobProcessor(queue, opexec, job)() self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) if remaining == 0: # Last opcode self.assertEqual(result, jqueue._JobProcessor.FINISHED) break self.assertEqual(result, jqueue._JobProcessor.DEFER) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_SUCCESS) self.assertEqual(job.GetInfo(["opresult"]), [[op.input.result for op in job.ops]]) logmsgcount = sum(len(m) for m in messages.values()) self._CheckLogMessages(job, logmsgcount) # Serialize and restore (simulates program restart) newjob = jqueue._QueuedJob.Restore(queue, job.Serialize(), True, False) self._CheckLogMessages(newjob, logmsgcount) # Check each message prevserial = -1 for idx, oplog in enumerate(job.GetInfo(["oplog"])[0]): for (serial, timestamp, log_type, msg) in oplog: (exptype, expmsg) = messages.get(idx).pop(0) if exptype: self.assertEqual(log_type, exptype) else: self.assertEqual(log_type, constants.ELOG_MESSAGE) self.assertEqual(expmsg, msg) self.assert_(serial > prevserial) prevserial = serial def _CheckLogMessages(self, job, count): # Check serial self.assertEqual(job.log_serial, count) # No filter self.assertEqual(job.GetLogEntries(None), [entry for entries in job.GetInfo(["oplog"])[0] if entries for entry in entries]) # Filter with serial assert count > 3 self.assert_(job.GetLogEntries(3)) self.assertEqual(job.GetLogEntries(3), [entry for entries in job.GetInfo(["oplog"])[0] if entries for entry in entries][3:]) # No log message after highest serial self.assertFalse(job.GetLogEntries(count)) self.assertFalse(job.GetLogEntries(count + 3)) def testSubmitManyJobs(self): queue = _FakeQueueForProc() job_id = 15656 ops = [ opcodes.OpTestDummy(result="Res0", fail=False, submit_jobs=[]), opcodes.OpTestDummy(result="Res1", fail=False, submit_jobs=[ [opcodes.OpTestDummy(result="r1j0", fail=False)], ]), opcodes.OpTestDummy(result="Res2", fail=False, submit_jobs=[ [opcodes.OpTestDummy(result="r2j0o0", fail=False), opcodes.OpTestDummy(result="r2j0o1", fail=False), opcodes.OpTestDummy(result="r2j0o2", fail=False), opcodes.OpTestDummy(result="r2j0o3", fail=False)], [opcodes.OpTestDummy(result="r2j1", fail=False)], [opcodes.OpTestDummy(result="r2j3o0", fail=False), opcodes.OpTestDummy(result="r2j3o1", fail=False)], ]), ] # Create job job = self._CreateJob(queue, job_id, ops) def _BeforeStart(timeout, priority): self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) self.assertFalse(job.cur_opctx) def _AfterStart(op, cbs): self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_RUNNING) self.assertFalse(job.cur_opctx) # Job is running, cancelling shouldn't be possible (success, _) = job.Cancel() self.assertFalse(success) opexec = _FakeExecOpCodeForProc(queue, _BeforeStart, _AfterStart) for idx in range(len(ops)): self.assertRaises(IndexError, queue.GetNextUpdate) result = jqueue._JobProcessor(queue, opexec, job)() self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) if idx == len(ops) - 1: # Last opcode self.assertEqual(result, jqueue._JobProcessor.FINISHED) else: self.assertEqual(result, jqueue._JobProcessor.DEFER) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assert_(job.start_timestamp) self.assertFalse(job.end_timestamp) self.assertRaises(IndexError, queue.GetNextUpdate) for idx, submitted_ops in enumerate(job_ops for op in ops for job_ops in op.submit_jobs): self.assertEqual(queue.GetNextSubmittedJob(), (1000 + idx, submitted_ops)) self.assertRaises(IndexError, queue.GetNextSubmittedJob) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_SUCCESS) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_SUCCESS]) self.assertEqual(job.GetInfo(["opresult"]), [[[], [1000], [1001, 1002, 1003]]]) self.assertEqual(job.GetInfo(["opstatus"]), [len(job.ops) * [constants.OP_STATUS_SUCCESS]]) self._GenericCheckJob(job) # Calling the processor on a finished job should be a no-op self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.FINISHED) self.assertRaises(IndexError, queue.GetNextUpdate) def testJobDependency(self): depmgr = _FakeDependencyManager() queue = _FakeQueueForProc(depmgr=depmgr) self.assertEqual(queue.depmgr, depmgr) prev_job_id = 22113 prev_job_id2 = 28102 job_id = 29929 ops = [ opcodes.OpTestDummy(result="Res0", fail=False, depends=[ [prev_job_id2, None], [prev_job_id, None], ]), opcodes.OpTestDummy(result="Res1", fail=False), ] # Create job job = self._CreateJob(queue, job_id, ops) def _BeforeStart(timeout, priority): if attempt == 0 or attempt > 5: # Job should only be updated when it wasn't waiting for another job self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) self.assertFalse(job.cur_opctx) def _AfterStart(op, cbs): self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_RUNNING) self.assertFalse(job.cur_opctx) # Job is running, cancelling shouldn't be possible (success, _) = job.Cancel() self.assertFalse(success) opexec = _FakeExecOpCodeForProc(queue, _BeforeStart, _AfterStart) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) counter = itertools.count() while True: attempt = counter.next() self.assertRaises(IndexError, queue.GetNextUpdate) self.assertRaises(IndexError, depmgr.GetNextNotification) if attempt < 2: depmgr.AddCheckResult(job, prev_job_id2, None, (jqueue._JobDependencyManager.WAIT, "wait2")) elif attempt == 2: depmgr.AddCheckResult(job, prev_job_id2, None, (jqueue._JobDependencyManager.CONTINUE, "cont")) # The processor will ask for the next dependency immediately depmgr.AddCheckResult(job, prev_job_id, None, (jqueue._JobDependencyManager.WAIT, "wait")) elif attempt < 5: depmgr.AddCheckResult(job, prev_job_id, None, (jqueue._JobDependencyManager.WAIT, "wait")) elif attempt == 5: depmgr.AddCheckResult(job, prev_job_id, None, (jqueue._JobDependencyManager.CONTINUE, "cont")) if attempt == 2: self.assertEqual(depmgr.CountPendingResults(), 2) elif attempt > 5: self.assertEqual(depmgr.CountPendingResults(), 0) else: self.assertEqual(depmgr.CountPendingResults(), 1) result = jqueue._JobProcessor(queue, opexec, job)() if attempt == 0 or attempt >= 5: # Job should only be updated if there was an actual change self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(depmgr.CountPendingResults()) if attempt < 5: # Simulate waiting for other job self.assertEqual(result, jqueue._JobProcessor.WAITDEP) self.assertTrue(job.cur_opctx) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) self.assertRaises(IndexError, depmgr.GetNextNotification) self.assert_(job.start_timestamp) self.assertFalse(job.end_timestamp) continue if result == jqueue._JobProcessor.FINISHED: # Last opcode self.assertFalse(job.cur_opctx) break self.assertRaises(IndexError, depmgr.GetNextNotification) self.assertEqual(result, jqueue._JobProcessor.DEFER) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assert_(job.start_timestamp) self.assertFalse(job.end_timestamp) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_SUCCESS) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_SUCCESS]) self.assertEqual(job.GetInfo(["opresult"]), [[op.input.result for op in job.ops]]) self.assertEqual(job.GetInfo(["opstatus"]), [len(job.ops) * [constants.OP_STATUS_SUCCESS]]) self.assertTrue(compat.all(op.start_timestamp and op.end_timestamp for op in job.ops)) self._GenericCheckJob(job) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertRaises(IndexError, depmgr.GetNextNotification) self.assertFalse(depmgr.CountPendingResults()) self.assertFalse(depmgr.CountWaitingJobs()) # Calling the processor on a finished job should be a no-op self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.FINISHED) self.assertRaises(IndexError, queue.GetNextUpdate) def testJobDependencyCancel(self): depmgr = _FakeDependencyManager() queue = _FakeQueueForProc(depmgr=depmgr) self.assertEqual(queue.depmgr, depmgr) prev_job_id = 13623 job_id = 30876 ops = [ opcodes.OpTestDummy(result="Res0", fail=False), opcodes.OpTestDummy(result="Res1", fail=False, depends=[ [prev_job_id, None], ]), opcodes.OpTestDummy(result="Res2", fail=False), ] # Create job job = self._CreateJob(queue, job_id, ops) def _BeforeStart(timeout, priority): if attempt == 0 or attempt > 5: # Job should only be updated when it wasn't waiting for another job self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) self.assertFalse(job.cur_opctx) def _AfterStart(op, cbs): self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_RUNNING) self.assertFalse(job.cur_opctx) # Job is running, cancelling shouldn't be possible (success, _) = job.Cancel() self.assertFalse(success) opexec = _FakeExecOpCodeForProc(queue, _BeforeStart, _AfterStart) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) counter = itertools.count() while True: attempt = counter.next() self.assertRaises(IndexError, queue.GetNextUpdate) self.assertRaises(IndexError, depmgr.GetNextNotification) if attempt == 0: # This will handle the first opcode pass elif attempt < 4: depmgr.AddCheckResult(job, prev_job_id, None, (jqueue._JobDependencyManager.WAIT, "wait")) elif attempt == 4: # Other job was cancelled depmgr.AddCheckResult(job, prev_job_id, None, (jqueue._JobDependencyManager.CANCEL, "cancel")) if attempt == 0: self.assertEqual(depmgr.CountPendingResults(), 0) else: self.assertEqual(depmgr.CountPendingResults(), 1) result = jqueue._JobProcessor(queue, opexec, job)() if attempt <= 1 or attempt >= 4: # Job should only be updated if there was an actual change self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(depmgr.CountPendingResults()) if attempt > 0 and attempt < 4: # Simulate waiting for other job self.assertEqual(result, jqueue._JobProcessor.WAITDEP) self.assertTrue(job.cur_opctx) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) self.assertRaises(IndexError, depmgr.GetNextNotification) self.assert_(job.start_timestamp) self.assertFalse(job.end_timestamp) continue if result == jqueue._JobProcessor.FINISHED: # Last opcode self.assertFalse(job.cur_opctx) break self.assertRaises(IndexError, depmgr.GetNextNotification) self.assertEqual(result, jqueue._JobProcessor.DEFER) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assert_(job.start_timestamp) self.assertFalse(job.end_timestamp) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_CANCELED) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_CANCELED]) self.assertEqual(job.GetInfo(["opstatus", "opresult"]), [[constants.OP_STATUS_SUCCESS, constants.OP_STATUS_CANCELED, constants.OP_STATUS_CANCELED], ["Res0", "Job canceled by request", "Job canceled by request"]]) self._GenericCheckJob(job) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertRaises(IndexError, depmgr.GetNextNotification) self.assertFalse(depmgr.CountPendingResults()) # Calling the processor on a finished job should be a no-op self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.FINISHED) self.assertRaises(IndexError, queue.GetNextUpdate) def testJobDependencyWrongstatus(self): depmgr = _FakeDependencyManager() queue = _FakeQueueForProc(depmgr=depmgr) self.assertEqual(queue.depmgr, depmgr) prev_job_id = 9741 job_id = 11763 ops = [ opcodes.OpTestDummy(result="Res0", fail=False), opcodes.OpTestDummy(result="Res1", fail=False, depends=[ [prev_job_id, None], ]), opcodes.OpTestDummy(result="Res2", fail=False), ] # Create job job = self._CreateJob(queue, job_id, ops) def _BeforeStart(timeout, priority): if attempt == 0 or attempt > 5: # Job should only be updated when it wasn't waiting for another job self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) self.assertFalse(job.cur_opctx) def _AfterStart(op, cbs): self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_RUNNING) self.assertFalse(job.cur_opctx) # Job is running, cancelling shouldn't be possible (success, _) = job.Cancel() self.assertFalse(success) opexec = _FakeExecOpCodeForProc(queue, _BeforeStart, _AfterStart) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) counter = itertools.count() while True: attempt = counter.next() self.assertRaises(IndexError, queue.GetNextUpdate) self.assertRaises(IndexError, depmgr.GetNextNotification) if attempt == 0: # This will handle the first opcode pass elif attempt < 4: depmgr.AddCheckResult(job, prev_job_id, None, (jqueue._JobDependencyManager.WAIT, "wait")) elif attempt == 4: # Other job failed depmgr.AddCheckResult(job, prev_job_id, None, (jqueue._JobDependencyManager.WRONGSTATUS, "w")) if attempt == 0: self.assertEqual(depmgr.CountPendingResults(), 0) else: self.assertEqual(depmgr.CountPendingResults(), 1) result = jqueue._JobProcessor(queue, opexec, job)() if attempt <= 1 or attempt >= 4: # Job should only be updated if there was an actual change self.assertEqual(queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertFalse(depmgr.CountPendingResults()) if attempt > 0 and attempt < 4: # Simulate waiting for other job self.assertEqual(result, jqueue._JobProcessor.WAITDEP) self.assertTrue(job.cur_opctx) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) self.assertRaises(IndexError, depmgr.GetNextNotification) self.assert_(job.start_timestamp) self.assertFalse(job.end_timestamp) continue if result == jqueue._JobProcessor.FINISHED: # Last opcode self.assertFalse(job.cur_opctx) break self.assertRaises(IndexError, depmgr.GetNextNotification) self.assertEqual(result, jqueue._JobProcessor.DEFER) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assert_(job.start_timestamp) self.assertFalse(job.end_timestamp) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_ERROR) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_ERROR]) self.assertEqual(job.GetInfo(["opstatus"]), [[constants.OP_STATUS_SUCCESS, constants.OP_STATUS_ERROR, constants.OP_STATUS_ERROR]]), (opresult, ) = job.GetInfo(["opresult"]) self.assertEqual(len(opresult), len(ops)) self.assertEqual(opresult[0], "Res0") self.assertTrue(errors.GetEncodedError(opresult[1])) self.assertTrue(errors.GetEncodedError(opresult[2])) self._GenericCheckJob(job) self.assertRaises(IndexError, queue.GetNextUpdate) self.assertRaises(IndexError, depmgr.GetNextNotification) self.assertFalse(depmgr.CountPendingResults()) # Calling the processor on a finished job should be a no-op self.assertEqual(jqueue._JobProcessor(queue, opexec, job)(), jqueue._JobProcessor.FINISHED) self.assertRaises(IndexError, queue.GetNextUpdate) class TestEvaluateJobProcessorResult(unittest.TestCase): def testFinished(self): depmgr = _FakeDependencyManager() job = _IdOnlyFakeJob(30953) jqueue._EvaluateJobProcessorResult(depmgr, job, jqueue._JobProcessor.FINISHED) self.assertEqual(depmgr.GetNextNotification(), job.id) self.assertRaises(IndexError, depmgr.GetNextNotification) def testDefer(self): depmgr = _FakeDependencyManager() job = _IdOnlyFakeJob(11326, priority=5463) try: jqueue._EvaluateJobProcessorResult(depmgr, job, jqueue._JobProcessor.DEFER) except workerpool.DeferTask, err: self.assertEqual(err.priority, 5463) else: self.fail("Didn't raise exception") self.assertRaises(IndexError, depmgr.GetNextNotification) def testWaitdep(self): depmgr = _FakeDependencyManager() job = _IdOnlyFakeJob(21317) jqueue._EvaluateJobProcessorResult(depmgr, job, jqueue._JobProcessor.WAITDEP) self.assertRaises(IndexError, depmgr.GetNextNotification) def testOther(self): depmgr = _FakeDependencyManager() job = _IdOnlyFakeJob(5813) self.assertRaises(errors.ProgrammerError, jqueue._EvaluateJobProcessorResult, depmgr, job, "Other result") self.assertRaises(IndexError, depmgr.GetNextNotification) class _FakeTimeoutStrategy: def __init__(self, timeouts): self.timeouts = timeouts self.attempts = 0 self.last_timeout = None def NextAttempt(self): self.attempts += 1 if self.timeouts: timeout = self.timeouts.pop(0) else: timeout = None self.last_timeout = timeout return timeout class TestJobProcessorTimeouts(unittest.TestCase, _JobProcessorTestUtils): def setUp(self): self.queue = _FakeQueueForProc() self.job = None self.curop = None self.opcounter = None self.timeout_strategy = None self.retries = 0 self.prev_tsop = None self.prev_prio = None self.prev_status = None self.lock_acq_prio = None self.gave_lock = None self.done_lock_before_blocking = False def _BeforeStart(self, timeout, priority): job = self.job # If status has changed, job must've been written if self.prev_status != self.job.ops[self.curop].status: self.assertEqual(self.queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, self.queue.GetNextUpdate) self.assertFalse(self.queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) ts = self.timeout_strategy self.assert_(timeout is None or isinstance(timeout, (int, float))) self.assertEqual(timeout, ts.last_timeout) self.assertEqual(priority, job.ops[self.curop].priority) self.gave_lock = True self.lock_acq_prio = priority if (self.curop == 3 and job.ops[self.curop].priority == constants.OP_PRIO_HIGHEST + 3): # Give locks before running into blocking acquire assert self.retries == 7 self.retries = 0 self.done_lock_before_blocking = True return if self.retries > 0: self.assert_(timeout is not None) self.retries -= 1 self.gave_lock = False raise mcpu.LockAcquireTimeout() if job.ops[self.curop].priority == constants.OP_PRIO_HIGHEST: assert self.retries == 0, "Didn't exhaust all retries at highest priority" assert not ts.timeouts self.assert_(timeout is None) def _AfterStart(self, op, cbs): job = self.job # Setting to "running" requires an update self.assertEqual(self.queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, self.queue.GetNextUpdate) self.assertFalse(self.queue.IsAcquired()) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_RUNNING) # Job is running, cancelling shouldn't be possible (success, _) = job.Cancel() self.assertFalse(success) def _NextOpcode(self): self.curop = self.opcounter.next() self.prev_prio = self.job.ops[self.curop].priority self.prev_status = self.job.ops[self.curop].status def _NewTimeoutStrategy(self): job = self.job self.assertEqual(self.retries, 0) if self.prev_tsop == self.curop: # Still on the same opcode, priority must've been increased self.assertEqual(self.prev_prio, job.ops[self.curop].priority + 1) if self.curop == 1: # Normal retry timeouts = range(10, 31, 10) self.retries = len(timeouts) - 1 elif self.curop == 2: # Let this run into a blocking acquire timeouts = range(11, 61, 12) self.retries = len(timeouts) elif self.curop == 3: # Wait for priority to increase, but give lock before blocking acquire timeouts = range(12, 100, 14) self.retries = len(timeouts) self.assertFalse(self.done_lock_before_blocking) elif self.curop == 4: self.assert_(self.done_lock_before_blocking) # Timeouts, but no need to retry timeouts = range(10, 31, 10) self.retries = 0 elif self.curop == 5: # Normal retry timeouts = range(19, 100, 11) self.retries = len(timeouts) else: timeouts = [] self.retries = 0 assert len(job.ops) == 10 assert self.retries <= len(timeouts) ts = _FakeTimeoutStrategy(timeouts) self.timeout_strategy = ts self.prev_tsop = self.curop self.prev_prio = job.ops[self.curop].priority return ts def testTimeout(self): ops = [opcodes.OpTestDummy(result="Res%s" % i, fail=False) for i in range(10)] # Create job job_id = 15801 job = self._CreateJob(self.queue, job_id, ops) self.job = job self.opcounter = itertools.count(0) opexec = _FakeExecOpCodeForProc(self.queue, self._BeforeStart, self._AfterStart) tsf = self._NewTimeoutStrategy self.assertFalse(self.done_lock_before_blocking) while True: proc = jqueue._JobProcessor(self.queue, opexec, job, _timeout_strategy_factory=tsf) self.assertRaises(IndexError, self.queue.GetNextUpdate) if self.curop is not None: self.prev_status = self.job.ops[self.curop].status self.lock_acq_prio = None result = proc(_nextop_fn=self._NextOpcode) assert self.curop is not None # Input priority should never be set or modified self.assertFalse(compat.any(hasattr(op.input, "priority") for op in job.ops)) if result == jqueue._JobProcessor.FINISHED or self.gave_lock: # Got lock and/or job is done, result must've been written self.assertFalse(job.cur_opctx) self.assertEqual(self.queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, self.queue.GetNextUpdate) self.assertEqual(self.lock_acq_prio, job.ops[self.curop].priority) self.assert_(job.ops[self.curop].exec_timestamp) if result == jqueue._JobProcessor.FINISHED: self.assertFalse(job.cur_opctx) break self.assertEqual(result, jqueue._JobProcessor.DEFER) if self.curop == 0: self.assertEqual(job.ops[self.curop].start_timestamp, job.start_timestamp) if self.gave_lock: # Opcode finished, but job not yet done self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) else: # Did not get locks self.assert_(job.cur_opctx) self.assertEqual(job.cur_opctx._timeout_strategy._fn, self.timeout_strategy.NextAttempt) self.assertFalse(job.ops[self.curop].exec_timestamp) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_WAITING) # If priority has changed since acquiring locks, the job must've been # updated if self.lock_acq_prio != job.ops[self.curop].priority: self.assertEqual(self.queue.GetNextUpdate(), (job, True)) self.assertRaises(IndexError, self.queue.GetNextUpdate) self.assert_(job.start_timestamp) self.assertFalse(job.end_timestamp) self.assertEqual(self.curop, len(job.ops) - 1) self.assertEqual(self.job, job) self.assertEqual(self.opcounter.next(), len(job.ops)) self.assert_(self.done_lock_before_blocking) self.assertRaises(IndexError, self.queue.GetNextUpdate) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_SUCCESS) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_SUCCESS]) self.assertEqual(job.GetInfo(["opresult"]), [[op.input.result for op in job.ops]]) self.assertEqual(job.GetInfo(["opstatus"]), [len(job.ops) * [constants.OP_STATUS_SUCCESS]]) self.assert_(compat.all(op.start_timestamp and op.end_timestamp for op in job.ops)) # Calling the processor on a finished job should be a no-op self.assertEqual(jqueue._JobProcessor(self.queue, opexec, job)(), jqueue._JobProcessor.FINISHED) self.assertRaises(IndexError, self.queue.GetNextUpdate) class TestJobProcessorChangePriority(unittest.TestCase, _JobProcessorTestUtils): def setUp(self): self.queue = _FakeQueueForProc() self.opexecprio = [] def _BeforeStart(self, timeout, priority): self.assertFalse(self.queue.IsAcquired()) self.opexecprio.append(priority) def testChangePriorityWhileRunning(self): # Tests changing the priority on a job while it has finished opcodes # (successful) and more, unprocessed ones ops = [opcodes.OpTestDummy(result="Res%s" % i, fail=False) for i in range(3)] # Create job job_id = 3499 job = self._CreateJob(self.queue, job_id, ops) self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) opexec = _FakeExecOpCodeForProc(self.queue, self._BeforeStart, None) # Run first opcode self.assertEqual(jqueue._JobProcessor(self.queue, opexec, job)(), jqueue._JobProcessor.DEFER) # Job goes back to queued self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT) self.assertEqual(job.GetInfo(["opstatus", "opresult"]), [[constants.OP_STATUS_SUCCESS, constants.OP_STATUS_QUEUED, constants.OP_STATUS_QUEUED], ["Res0", None, None]]) self.assertEqual(self.opexecprio.pop(0), constants.OP_PRIO_DEFAULT) self.assertRaises(IndexError, self.opexecprio.pop, 0) # Change priority self.assertEqual(job.ChangePriority(-10), (True, ("Priorities of pending opcodes for job 3499 have" " been changed to -10"))) self.assertEqual(job.CalcPriority(), -10) # Process second opcode self.assertEqual(jqueue._JobProcessor(self.queue, opexec, job)(), jqueue._JobProcessor.DEFER) self.assertEqual(self.opexecprio.pop(0), -10) self.assertRaises(IndexError, self.opexecprio.pop, 0) # Check status self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_QUEUED) self.assertEqual(job.CalcPriority(), -10) self.assertEqual(job.GetInfo(["id"]), [job_id]) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_QUEUED]) self.assertEqual(job.GetInfo(["opstatus", "opresult"]), [[constants.OP_STATUS_SUCCESS, constants.OP_STATUS_SUCCESS, constants.OP_STATUS_QUEUED], ["Res0", "Res1", None]]) # Change priority once more self.assertEqual(job.ChangePriority(5), (True, ("Priorities of pending opcodes for job 3499 have" " been changed to 5"))) self.assertEqual(job.CalcPriority(), 5) # Process third opcode self.assertEqual(jqueue._JobProcessor(self.queue, opexec, job)(), jqueue._JobProcessor.FINISHED) self.assertEqual(self.opexecprio.pop(0), 5) self.assertRaises(IndexError, self.opexecprio.pop, 0) # Check status self.assertEqual(job.CalcStatus(), constants.JOB_STATUS_SUCCESS) self.assertEqual(job.CalcPriority(), constants.OP_PRIO_DEFAULT) self.assertEqual(job.GetInfo(["id"]), [job_id]) self.assertEqual(job.GetInfo(["status"]), [constants.JOB_STATUS_SUCCESS]) self.assertEqual(job.GetInfo(["opstatus", "opresult"]), [[constants.OP_STATUS_SUCCESS, constants.OP_STATUS_SUCCESS, constants.OP_STATUS_SUCCESS], ["Res0", "Res1", "Res2"]]) self.assertEqual(map(operator.attrgetter("priority"), job.ops), [constants.OP_PRIO_DEFAULT, -10, 5]) class _IdOnlyFakeJob: def __init__(self, job_id, priority=NotImplemented): self.id = str(job_id) self._priority = priority def CalcPriority(self): return self._priority class TestJobDependencyManager(unittest.TestCase): def setUp(self): self._status = [] self._queue = [] self.jdm = jqueue._JobDependencyManager(self._GetStatus, self._Enqueue) def _GetStatus(self, job_id): (exp_job_id, result) = self._status.pop(0) self.assertEqual(exp_job_id, job_id) return result def _Enqueue(self, jobs): self.assertFalse(self.jdm._lock.is_owned(), msg=("Must not own manager lock while re-adding jobs" " (potential deadlock)")) self._queue.append(jobs) def testNotFinalizedThenCancel(self): job = _IdOnlyFakeJob(17697) job_id = str(28625) self._status.append((job_id, constants.JOB_STATUS_RUNNING)) (result, _) = self.jdm.CheckAndRegister(job, job_id, []) self.assertEqual(result, self.jdm.WAIT) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertTrue(self.jdm.JobWaiting(job)) self.assertEqual(self.jdm._waiters, { job_id: set([job]), }) self.assertEqual(self.jdm.GetLockInfo([query.LQ_PENDING]), [ ("job/28625", None, None, [("job", [job.id])]) ]) self._status.append((job_id, constants.JOB_STATUS_CANCELED)) (result, _) = self.jdm.CheckAndRegister(job, job_id, []) self.assertEqual(result, self.jdm.CANCEL) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertFalse(self.jdm.JobWaiting(job)) self.assertFalse(self.jdm.GetLockInfo([query.LQ_PENDING])) def testNotFinalizedThenQueued(self): # This can happen on a queue shutdown job = _IdOnlyFakeJob(1320) job_id = str(22971) for i in range(5): if i > 2: self._status.append((job_id, constants.JOB_STATUS_QUEUED)) else: self._status.append((job_id, constants.JOB_STATUS_RUNNING)) (result, _) = self.jdm.CheckAndRegister(job, job_id, []) self.assertEqual(result, self.jdm.WAIT) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertTrue(self.jdm.JobWaiting(job)) self.assertEqual(self.jdm._waiters, { job_id: set([job]), }) self.assertEqual(self.jdm.GetLockInfo([query.LQ_PENDING]), [ ("job/22971", None, None, [("job", [job.id])]) ]) def testRequireCancel(self): job = _IdOnlyFakeJob(5278) job_id = str(9610) dep_status = [constants.JOB_STATUS_CANCELED] self._status.append((job_id, constants.JOB_STATUS_WAITING)) (result, _) = self.jdm.CheckAndRegister(job, job_id, dep_status) self.assertEqual(result, self.jdm.WAIT) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertTrue(self.jdm.JobWaiting(job)) self.assertEqual(self.jdm._waiters, { job_id: set([job]), }) self.assertEqual(self.jdm.GetLockInfo([query.LQ_PENDING]), [ ("job/9610", None, None, [("job", [job.id])]) ]) self._status.append((job_id, constants.JOB_STATUS_CANCELED)) (result, _) = self.jdm.CheckAndRegister(job, job_id, dep_status) self.assertEqual(result, self.jdm.CONTINUE) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertFalse(self.jdm.JobWaiting(job)) self.assertFalse(self.jdm.GetLockInfo([query.LQ_PENDING])) def testRequireError(self): job = _IdOnlyFakeJob(21459) job_id = str(25519) dep_status = [constants.JOB_STATUS_ERROR] self._status.append((job_id, constants.JOB_STATUS_WAITING)) (result, _) = self.jdm.CheckAndRegister(job, job_id, dep_status) self.assertEqual(result, self.jdm.WAIT) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertTrue(self.jdm.JobWaiting(job)) self.assertEqual(self.jdm._waiters, { job_id: set([job]), }) self._status.append((job_id, constants.JOB_STATUS_ERROR)) (result, _) = self.jdm.CheckAndRegister(job, job_id, dep_status) self.assertEqual(result, self.jdm.CONTINUE) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertFalse(self.jdm.JobWaiting(job)) self.assertFalse(self.jdm.GetLockInfo([query.LQ_PENDING])) def testRequireMultiple(self): dep_status = list(constants.JOBS_FINALIZED) for end_status in dep_status: job = _IdOnlyFakeJob(21343) job_id = str(14609) self._status.append((job_id, constants.JOB_STATUS_WAITING)) (result, _) = self.jdm.CheckAndRegister(job, job_id, dep_status) self.assertEqual(result, self.jdm.WAIT) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertTrue(self.jdm.JobWaiting(job)) self.assertEqual(self.jdm._waiters, { job_id: set([job]), }) self.assertEqual(self.jdm.GetLockInfo([query.LQ_PENDING]), [ ("job/14609", None, None, [("job", [job.id])]) ]) self._status.append((job_id, end_status)) (result, _) = self.jdm.CheckAndRegister(job, job_id, dep_status) self.assertEqual(result, self.jdm.CONTINUE) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertFalse(self.jdm.JobWaiting(job)) self.assertFalse(self.jdm.GetLockInfo([query.LQ_PENDING])) def testNotify(self): job = _IdOnlyFakeJob(8227) job_id = str(4113) self._status.append((job_id, constants.JOB_STATUS_RUNNING)) (result, _) = self.jdm.CheckAndRegister(job, job_id, []) self.assertEqual(result, self.jdm.WAIT) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertTrue(self.jdm.JobWaiting(job)) self.assertEqual(self.jdm._waiters, { job_id: set([job]), }) self.jdm.NotifyWaiters(job_id) self.assertFalse(self._status) self.assertFalse(self.jdm._waiters) self.assertFalse(self.jdm.JobWaiting(job)) self.assertEqual(self._queue, [set([job])]) def testWrongStatus(self): job = _IdOnlyFakeJob(10102) job_id = str(1271) self._status.append((job_id, constants.JOB_STATUS_QUEUED)) (result, _) = self.jdm.CheckAndRegister(job, job_id, [constants.JOB_STATUS_SUCCESS]) self.assertEqual(result, self.jdm.WAIT) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertTrue(self.jdm.JobWaiting(job)) self.assertEqual(self.jdm._waiters, { job_id: set([job]), }) self._status.append((job_id, constants.JOB_STATUS_ERROR)) (result, _) = self.jdm.CheckAndRegister(job, job_id, [constants.JOB_STATUS_SUCCESS]) self.assertEqual(result, self.jdm.WRONGSTATUS) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertFalse(self.jdm.JobWaiting(job)) def testCorrectStatus(self): job = _IdOnlyFakeJob(24273) job_id = str(23885) self._status.append((job_id, constants.JOB_STATUS_QUEUED)) (result, _) = self.jdm.CheckAndRegister(job, job_id, [constants.JOB_STATUS_SUCCESS]) self.assertEqual(result, self.jdm.WAIT) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertTrue(self.jdm.JobWaiting(job)) self.assertEqual(self.jdm._waiters, { job_id: set([job]), }) self._status.append((job_id, constants.JOB_STATUS_SUCCESS)) (result, _) = self.jdm.CheckAndRegister(job, job_id, [constants.JOB_STATUS_SUCCESS]) self.assertEqual(result, self.jdm.CONTINUE) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertFalse(self.jdm.JobWaiting(job)) def testFinalizedRightAway(self): job = _IdOnlyFakeJob(224) job_id = str(3081) self._status.append((job_id, constants.JOB_STATUS_SUCCESS)) (result, _) = self.jdm.CheckAndRegister(job, job_id, [constants.JOB_STATUS_SUCCESS]) self.assertEqual(result, self.jdm.CONTINUE) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertFalse(self.jdm.JobWaiting(job)) self.assertEqual(self.jdm._waiters, { job_id: set(), }) # Force cleanup self.jdm.NotifyWaiters("0") self.assertFalse(self.jdm._waiters) self.assertFalse(self._status) self.assertFalse(self._queue) def testMultipleWaiting(self): # Use a deterministic random generator rnd = random.Random(21402) job_ids = map(str, rnd.sample(range(1, 10000), 150)) waiters = dict((job_ids.pop(), set(map(_IdOnlyFakeJob, [job_ids.pop() for _ in range(rnd.randint(1, 20))]))) for _ in range(10)) # Ensure there are no duplicate job IDs assert not utils.FindDuplicates(waiters.keys() + [job.id for jobs in waiters.values() for job in jobs]) # Register all jobs as waiters for job_id, job in [(job_id, job) for (job_id, jobs) in waiters.items() for job in jobs]: self._status.append((job_id, constants.JOB_STATUS_QUEUED)) (result, _) = self.jdm.CheckAndRegister(job, job_id, [constants.JOB_STATUS_SUCCESS]) self.assertEqual(result, self.jdm.WAIT) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertTrue(self.jdm.JobWaiting(job)) self.assertEqual(self.jdm._waiters, waiters) def _MakeSet((name, mode, owner_names, pending)): return (name, mode, owner_names, [(pendmode, set(pend)) for (pendmode, pend) in pending]) def _CheckLockInfo(): info = self.jdm.GetLockInfo([query.LQ_PENDING]) self.assertEqual(sorted(map(_MakeSet, info)), sorted([ ("job/%s" % job_id, None, None, [("job", set([job.id for job in jobs]))]) for job_id, jobs in waiters.items() if jobs ])) _CheckLockInfo() # Notify in random order for job_id in rnd.sample(waiters, len(waiters)): # Remove from pending waiter list jobs = waiters.pop(job_id) for job in jobs: self._status.append((job_id, constants.JOB_STATUS_SUCCESS)) (result, _) = self.jdm.CheckAndRegister(job, job_id, [constants.JOB_STATUS_SUCCESS]) self.assertEqual(result, self.jdm.CONTINUE) self.assertFalse(self._status) self.assertFalse(self._queue) self.assertFalse(self.jdm.JobWaiting(job)) _CheckLockInfo() self.assertFalse(self.jdm.GetLockInfo([query.LQ_PENDING])) assert not waiters def testSelfDependency(self): job = _IdOnlyFakeJob(18937) self._status.append((job.id, constants.JOB_STATUS_SUCCESS)) (result, _) = self.jdm.CheckAndRegister(job, job.id, []) self.assertEqual(result, self.jdm.ERROR) def testJobDisappears(self): job = _IdOnlyFakeJob(30540) job_id = str(23769) def _FakeStatus(_): raise errors.JobLost("#msg#") jdm = jqueue._JobDependencyManager(_FakeStatus, None) (result, _) = jdm.CheckAndRegister(job, job_id, []) self.assertEqual(result, self.jdm.ERROR) self.assertFalse(jdm.JobWaiting(job)) self.assertFalse(jdm.GetLockInfo([query.LQ_PENDING])) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/import-export_unittest-helper0000744000000000000000000000506512244641676022414 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Helpers for testing import-export daemon""" import os import sys import errno import time from ganeti import constants from ganeti import utils from ganeti import objects from ganeti import serializer RETRY_INTERVAL = (0.1, 1.1, 1) TIMEOUT = int(os.getenv("TIMEOUT", 30)) VALIDITY = int(os.getenv("VALIDITY", 1)) def Log(msg, *args): if args: line = msg % args else: line = msg sys.stderr.write("%0.6f, pid %s: %s\n" % (time.time(), os.getpid(), line)) sys.stderr.flush() def _GetImportExportData(filename): try: data = utils.ReadFile(filename) except EnvironmentError, err: Log("%s = %s", filename, err) if err.errno != errno.ENOENT: raise raise utils.RetryAgain() Log("%s = %s", filename, data.strip()) return objects.ImportExportStatus.FromDict(serializer.LoadJson(data)) def _CheckConnected(filename): if not _GetImportExportData(filename).connected: Log("Not connected") raise utils.RetryAgain() Log("Connected") def _CheckListenPort(filename): port = _GetImportExportData(filename).listen_port if not port: Log("No port") raise utils.RetryAgain() Log("Listening on %s", port) return port def WaitForListenPort(filename): return utils.Retry(_CheckListenPort, RETRY_INTERVAL, TIMEOUT, args=(filename, )) def WaitForConnected(filename): utils.Retry(_CheckConnected, RETRY_INTERVAL, TIMEOUT, args=(filename, )) def main(): (filename, what) = sys.argv[1:] Log("Running helper for %s %s", filename, what) if what == "listen-port": print WaitForListenPort(filename) elif what == "connected": WaitForConnected(filename) elif what == "gencert": utils.GenerateSelfSignedSslCert(filename, validity=VALIDITY) else: raise Exception("Unknown command '%s'" % what) if __name__ == "__main__": main() ganeti-2.9.3/test/py/gnt-cli.test0000644000000000000000000000305312244641676016654 0ustar00rootroot00000000000000# test the various gnt-commands for common options $SCRIPTS/gnt-node --help >>>/Usage:/ >>>2 >>>= 0 $SCRIPTS/gnt-node UNKNOWN >>>/Usage:/ >>>2 >>>= 1 $SCRIPTS/gnt-node --version >>>/^gnt-/ >>>2 >>>= 0 $SCRIPTS/gnt-instance --help >>>/Usage:/ >>>2 >>>= 0 $SCRIPTS/gnt-instance UNKNOWN >>>/Usage:/ >>>2 >>>= 1 $SCRIPTS/gnt-instance --version >>>/^gnt-instance/ >>>2 >>>= 0 $SCRIPTS/gnt-os --help >>>/Usage:/ >>>2 >>>= 0 $SCRIPTS/gnt-os UNKNOWN >>>/Usage:/ >>>2 >>>= 1 $SCRIPTS/gnt-os --version >>>/^gnt-/ >>>2 >>>= 0 $SCRIPTS/gnt-group --help >>>/Usage:/ >>>2 >>>= 0 $SCRIPTS/gnt-group UNKNOWN >>>/Usage:/ >>>2 >>>= 1 $SCRIPTS/gnt-group --version >>>/^gnt-/ >>>2 >>>= 0 $SCRIPTS/gnt-job --help >>>/Usage:/ >>>2 >>>= 0 $SCRIPTS/gnt-job UNKNOWN >>>/Usage:/ >>>2 >>>= 1 $SCRIPTS/gnt-job --version >>>/^gnt-/ >>>2 >>>= 0 $SCRIPTS/gnt-cluster --help >>>/Usage:/ >>>2 >>>= 0 $SCRIPTS/gnt-cluster UNKNOWN >>>/Usage:/ >>>2 >>>= 1 $SCRIPTS/gnt-cluster --version >>>/^gnt-/ >>>2 >>>= 0 $SCRIPTS/gnt-backup --help >>>/Usage:/ >>>2 >>>= 0 $SCRIPTS/gnt-backup UNKNOWN >>>/Usage:/ >>>2 >>>= 1 $SCRIPTS/gnt-backup --version >>>/^gnt-/ >>>2 >>>= 0 $SCRIPTS/gnt-debug --help >>>/Usage:/ >>>2 >>>= 0 $SCRIPTS/gnt-debug UNKNOWN >>>/Usage:/ >>>2 >>>= 1 $SCRIPTS/gnt-debug --version >>>/^gnt-/ >>>2 >>>= 0 # test that verifies all sub-commands can be run with --help, checking # that optparse doesn't reject the options list set -e; for c in scripts/gnt-*; do for i in $($c --help|grep '^ [^ ]'|awk '{print $1}'); do echo Checking command ${c##/}/$i; $c $i --help >/dev/null; done; done >>>= 0 ganeti-2.9.3/test/py/ganeti.rpc_unittest.py0000744000000000000000000010466712271422343020763 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.rpc""" import os import sys import unittest import random import tempfile from ganeti import constants from ganeti import compat from ganeti import rpc from ganeti import rpc_defs from ganeti import http from ganeti import errors from ganeti import serializer from ganeti import objects from ganeti import backend import testutils import mocks class _FakeRequestProcessor: def __init__(self, response_fn): self._response_fn = response_fn self.reqcount = 0 def __call__(self, reqs, lock_monitor_cb=None): assert lock_monitor_cb is None or callable(lock_monitor_cb) for req in reqs: self.reqcount += 1 self._response_fn(req) def GetFakeSimpleStoreClass(fn): class FakeSimpleStore: GetNodePrimaryIPList = fn GetPrimaryIPFamily = lambda _: None return FakeSimpleStore def _RaiseNotImplemented(): """Simple wrapper to raise NotImplementedError. """ raise NotImplementedError class TestRpcProcessor(unittest.TestCase): def _FakeAddressLookup(self, map): return lambda node_list: [map.get(node) for node in node_list] def _GetVersionResponse(self, req): self.assertEqual(req.host, "127.0.0.1") self.assertEqual(req.port, 24094) self.assertEqual(req.path, "/version") self.assertEqual(req.read_timeout, constants.RPC_TMO_URGENT) req.success = True req.resp_status_code = http.HTTP_OK req.resp_body = serializer.DumpJson((True, 123)) def testVersionSuccess(self): resolver = rpc._StaticResolver(["127.0.0.1"]) http_proc = _FakeRequestProcessor(self._GetVersionResponse) proc = rpc._RpcProcessor(resolver, 24094) result = proc(["localhost"], "version", {"localhost": ""}, 60, NotImplemented, _req_process_fn=http_proc) self.assertEqual(result.keys(), ["localhost"]) lhresp = result["localhost"] self.assertFalse(lhresp.offline) self.assertEqual(lhresp.node, "localhost") self.assertFalse(lhresp.fail_msg) self.assertEqual(lhresp.payload, 123) self.assertEqual(lhresp.call, "version") lhresp.Raise("should not raise") self.assertEqual(http_proc.reqcount, 1) def _ReadTimeoutResponse(self, req): self.assertEqual(req.host, "192.0.2.13") self.assertEqual(req.port, 19176) self.assertEqual(req.path, "/version") self.assertEqual(req.read_timeout, 12356) req.success = True req.resp_status_code = http.HTTP_OK req.resp_body = serializer.DumpJson((True, -1)) def testReadTimeout(self): resolver = rpc._StaticResolver(["192.0.2.13"]) http_proc = _FakeRequestProcessor(self._ReadTimeoutResponse) proc = rpc._RpcProcessor(resolver, 19176) host = "node31856" body = {host: ""} result = proc([host], "version", body, 12356, NotImplemented, _req_process_fn=http_proc) self.assertEqual(result.keys(), [host]) lhresp = result[host] self.assertFalse(lhresp.offline) self.assertEqual(lhresp.node, host) self.assertFalse(lhresp.fail_msg) self.assertEqual(lhresp.payload, -1) self.assertEqual(lhresp.call, "version") lhresp.Raise("should not raise") self.assertEqual(http_proc.reqcount, 1) def testOfflineNode(self): resolver = rpc._StaticResolver([rpc._OFFLINE]) http_proc = _FakeRequestProcessor(NotImplemented) proc = rpc._RpcProcessor(resolver, 30668) host = "n17296" body = {host: ""} result = proc([host], "version", body, 60, NotImplemented, _req_process_fn=http_proc) self.assertEqual(result.keys(), [host]) lhresp = result[host] self.assertTrue(lhresp.offline) self.assertEqual(lhresp.node, host) self.assertTrue(lhresp.fail_msg) self.assertFalse(lhresp.payload) self.assertEqual(lhresp.call, "version") # With a message self.assertRaises(errors.OpExecError, lhresp.Raise, "should raise") # No message self.assertRaises(errors.OpExecError, lhresp.Raise, None) self.assertEqual(http_proc.reqcount, 0) def _GetMultiVersionResponse(self, req): self.assert_(req.host.startswith("node")) self.assertEqual(req.port, 23245) self.assertEqual(req.path, "/version") req.success = True req.resp_status_code = http.HTTP_OK req.resp_body = serializer.DumpJson((True, 987)) def testMultiVersionSuccess(self): nodes = ["node%s" % i for i in range(50)] body = dict((n, "") for n in nodes) resolver = rpc._StaticResolver(nodes) http_proc = _FakeRequestProcessor(self._GetMultiVersionResponse) proc = rpc._RpcProcessor(resolver, 23245) result = proc(nodes, "version", body, 60, NotImplemented, _req_process_fn=http_proc) self.assertEqual(sorted(result.keys()), sorted(nodes)) for name in nodes: lhresp = result[name] self.assertFalse(lhresp.offline) self.assertEqual(lhresp.node, name) self.assertFalse(lhresp.fail_msg) self.assertEqual(lhresp.payload, 987) self.assertEqual(lhresp.call, "version") lhresp.Raise("should not raise") self.assertEqual(http_proc.reqcount, len(nodes)) def _GetVersionResponseFail(self, errinfo, req): self.assertEqual(req.path, "/version") req.success = True req.resp_status_code = http.HTTP_OK req.resp_body = serializer.DumpJson((False, errinfo)) def testVersionFailure(self): resolver = rpc._StaticResolver(["aef9ur4i.example.com"]) proc = rpc._RpcProcessor(resolver, 5903) for errinfo in [None, "Unknown error"]: http_proc = \ _FakeRequestProcessor(compat.partial(self._GetVersionResponseFail, errinfo)) host = "aef9ur4i.example.com" body = {host: ""} result = proc(body.keys(), "version", body, 60, NotImplemented, _req_process_fn=http_proc) self.assertEqual(result.keys(), [host]) lhresp = result[host] self.assertFalse(lhresp.offline) self.assertEqual(lhresp.node, host) self.assert_(lhresp.fail_msg) self.assertFalse(lhresp.payload) self.assertEqual(lhresp.call, "version") self.assertRaises(errors.OpExecError, lhresp.Raise, "failed") self.assertEqual(http_proc.reqcount, 1) def _GetHttpErrorResponse(self, httperrnodes, failnodes, req): self.assertEqual(req.path, "/vg_list") self.assertEqual(req.port, 15165) if req.host in httperrnodes: req.success = False req.error = "Node set up for HTTP errors" elif req.host in failnodes: req.success = True req.resp_status_code = 404 req.resp_body = serializer.DumpJson({ "code": 404, "message": "Method not found", "explain": "Explanation goes here", }) else: req.success = True req.resp_status_code = http.HTTP_OK req.resp_body = serializer.DumpJson((True, hash(req.host))) def testHttpError(self): nodes = ["uaf6pbbv%s" % i for i in range(50)] body = dict((n, "") for n in nodes) resolver = rpc._StaticResolver(nodes) httperrnodes = set(nodes[1::7]) self.assertEqual(len(httperrnodes), 7) failnodes = set(nodes[2::3]) - httperrnodes self.assertEqual(len(failnodes), 14) self.assertEqual(len(set(nodes) - failnodes - httperrnodes), 29) proc = rpc._RpcProcessor(resolver, 15165) http_proc = \ _FakeRequestProcessor(compat.partial(self._GetHttpErrorResponse, httperrnodes, failnodes)) result = proc(nodes, "vg_list", body, constants.RPC_TMO_URGENT, NotImplemented, _req_process_fn=http_proc) self.assertEqual(sorted(result.keys()), sorted(nodes)) for name in nodes: lhresp = result[name] self.assertFalse(lhresp.offline) self.assertEqual(lhresp.node, name) self.assertEqual(lhresp.call, "vg_list") if name in httperrnodes: self.assert_(lhresp.fail_msg) self.assertRaises(errors.OpExecError, lhresp.Raise, "failed") elif name in failnodes: self.assert_(lhresp.fail_msg) self.assertRaises(errors.OpPrereqError, lhresp.Raise, "failed", prereq=True, ecode=errors.ECODE_INVAL) else: self.assertFalse(lhresp.fail_msg) self.assertEqual(lhresp.payload, hash(name)) lhresp.Raise("should not raise") self.assertEqual(http_proc.reqcount, len(nodes)) def _GetInvalidResponseA(self, req): self.assertEqual(req.path, "/version") req.success = True req.resp_status_code = http.HTTP_OK req.resp_body = serializer.DumpJson(("This", "is", "an", "invalid", "response", "!", 1, 2, 3)) def _GetInvalidResponseB(self, req): self.assertEqual(req.path, "/version") req.success = True req.resp_status_code = http.HTTP_OK req.resp_body = serializer.DumpJson("invalid response") def testInvalidResponse(self): resolver = rpc._StaticResolver(["oqo7lanhly.example.com"]) proc = rpc._RpcProcessor(resolver, 19978) for fn in [self._GetInvalidResponseA, self._GetInvalidResponseB]: http_proc = _FakeRequestProcessor(fn) host = "oqo7lanhly.example.com" body = {host: ""} result = proc([host], "version", body, 60, NotImplemented, _req_process_fn=http_proc) self.assertEqual(result.keys(), [host]) lhresp = result[host] self.assertFalse(lhresp.offline) self.assertEqual(lhresp.node, host) self.assert_(lhresp.fail_msg) self.assertFalse(lhresp.payload) self.assertEqual(lhresp.call, "version") self.assertRaises(errors.OpExecError, lhresp.Raise, "failed") self.assertEqual(http_proc.reqcount, 1) def _GetBodyTestResponse(self, test_data, req): self.assertEqual(req.host, "192.0.2.84") self.assertEqual(req.port, 18700) self.assertEqual(req.path, "/upload_file") self.assertEqual(serializer.LoadJson(req.post_data), test_data) req.success = True req.resp_status_code = http.HTTP_OK req.resp_body = serializer.DumpJson((True, None)) def testResponseBody(self): test_data = { "Hello": "World", "xyz": range(10), } resolver = rpc._StaticResolver(["192.0.2.84"]) http_proc = _FakeRequestProcessor(compat.partial(self._GetBodyTestResponse, test_data)) proc = rpc._RpcProcessor(resolver, 18700) host = "node19759" body = {host: serializer.DumpJson(test_data)} result = proc([host], "upload_file", body, 30, NotImplemented, _req_process_fn=http_proc) self.assertEqual(result.keys(), [host]) lhresp = result[host] self.assertFalse(lhresp.offline) self.assertEqual(lhresp.node, host) self.assertFalse(lhresp.fail_msg) self.assertEqual(lhresp.payload, None) self.assertEqual(lhresp.call, "upload_file") lhresp.Raise("should not raise") self.assertEqual(http_proc.reqcount, 1) class TestSsconfResolver(unittest.TestCase): def testSsconfLookup(self): addr_list = ["192.0.2.%d" % n for n in range(0, 255, 13)] node_list = ["node%d.example.com" % n for n in range(0, 255, 13)] node_addr_list = [" ".join(t) for t in zip(node_list, addr_list)] ssc = GetFakeSimpleStoreClass(lambda _: node_addr_list) result = rpc._SsconfResolver(True, node_list, NotImplemented, ssc=ssc, nslookup_fn=NotImplemented) self.assertEqual(result, zip(node_list, addr_list, node_list)) def testNsLookup(self): addr_list = ["192.0.2.%d" % n for n in range(0, 255, 13)] node_list = ["node%d.example.com" % n for n in range(0, 255, 13)] ssc = GetFakeSimpleStoreClass(lambda _: []) node_addr_map = dict(zip(node_list, addr_list)) nslookup_fn = lambda name, family=None: node_addr_map.get(name) result = rpc._SsconfResolver(True, node_list, NotImplemented, ssc=ssc, nslookup_fn=nslookup_fn) self.assertEqual(result, zip(node_list, addr_list, node_list)) def testDisabledSsconfIp(self): addr_list = ["192.0.2.%d" % n for n in range(0, 255, 13)] node_list = ["node%d.example.com" % n for n in range(0, 255, 13)] ssc = GetFakeSimpleStoreClass(_RaiseNotImplemented) node_addr_map = dict(zip(node_list, addr_list)) nslookup_fn = lambda name, family=None: node_addr_map.get(name) result = rpc._SsconfResolver(False, node_list, NotImplemented, ssc=ssc, nslookup_fn=nslookup_fn) self.assertEqual(result, zip(node_list, addr_list, node_list)) def testBothLookups(self): addr_list = ["192.0.2.%d" % n for n in range(0, 255, 13)] node_list = ["node%d.example.com" % n for n in range(0, 255, 13)] n = len(addr_list) / 2 node_addr_list = [" ".join(t) for t in zip(node_list[n:], addr_list[n:])] ssc = GetFakeSimpleStoreClass(lambda _: node_addr_list) node_addr_map = dict(zip(node_list[:n], addr_list[:n])) nslookup_fn = lambda name, family=None: node_addr_map.get(name) result = rpc._SsconfResolver(True, node_list, NotImplemented, ssc=ssc, nslookup_fn=nslookup_fn) self.assertEqual(result, zip(node_list, addr_list, node_list)) def testAddressLookupIPv6(self): addr_list = ["2001:db8::%d" % n for n in range(0, 255, 11)] node_list = ["node%d.example.com" % n for n in range(0, 255, 11)] node_addr_list = [" ".join(t) for t in zip(node_list, addr_list)] ssc = GetFakeSimpleStoreClass(lambda _: node_addr_list) result = rpc._SsconfResolver(True, node_list, NotImplemented, ssc=ssc, nslookup_fn=NotImplemented) self.assertEqual(result, zip(node_list, addr_list, node_list)) class TestStaticResolver(unittest.TestCase): def test(self): addresses = ["192.0.2.%d" % n for n in range(0, 123, 7)] nodes = ["node%s.example.com" % n for n in range(0, 123, 7)] res = rpc._StaticResolver(addresses) self.assertEqual(res(nodes, NotImplemented), zip(nodes, addresses, nodes)) def testWrongLength(self): res = rpc._StaticResolver([]) self.assertRaises(AssertionError, res, ["abc"], NotImplemented) class TestNodeConfigResolver(unittest.TestCase): @staticmethod def _GetSingleOnlineNode(uuid): assert uuid == "node90-uuid" return objects.Node(name="node90.example.com", uuid=uuid, offline=False, primary_ip="192.0.2.90") @staticmethod def _GetSingleOfflineNode(uuid): assert uuid == "node100-uuid" return objects.Node(name="node100.example.com", uuid=uuid, offline=True, primary_ip="192.0.2.100") def testSingleOnline(self): self.assertEqual(rpc._NodeConfigResolver(self._GetSingleOnlineNode, NotImplemented, ["node90-uuid"], None), [("node90.example.com", "192.0.2.90", "node90-uuid")]) def testSingleOffline(self): self.assertEqual(rpc._NodeConfigResolver(self._GetSingleOfflineNode, NotImplemented, ["node100-uuid"], None), [("node100.example.com", rpc._OFFLINE, "node100-uuid")]) def testSingleOfflineWithAcceptOffline(self): fn = self._GetSingleOfflineNode assert fn("node100-uuid").offline self.assertEqual(rpc._NodeConfigResolver(fn, NotImplemented, ["node100-uuid"], rpc_defs.ACCEPT_OFFLINE_NODE), [("node100.example.com", "192.0.2.100", "node100-uuid")]) for i in [False, True, "", "Hello", 0, 1]: self.assertRaises(AssertionError, rpc._NodeConfigResolver, fn, NotImplemented, ["node100.example.com"], i) def testUnknownSingleNode(self): self.assertEqual(rpc._NodeConfigResolver(lambda _: None, NotImplemented, ["node110.example.com"], None), [("node110.example.com", "node110.example.com", "node110.example.com")]) def testMultiEmpty(self): self.assertEqual(rpc._NodeConfigResolver(NotImplemented, lambda: {}, [], None), []) def testMultiSomeOffline(self): nodes = dict(("node%s-uuid" % i, objects.Node(name="node%s.example.com" % i, offline=((i % 3) == 0), primary_ip="192.0.2.%s" % i, uuid="node%s-uuid" % i)) for i in range(1, 255)) # Resolve no names self.assertEqual(rpc._NodeConfigResolver(NotImplemented, lambda: nodes, [], None), []) # Offline, online and unknown hosts self.assertEqual(rpc._NodeConfigResolver(NotImplemented, lambda: nodes, ["node3-uuid", "node92-uuid", "node54-uuid", "unknown.example.com",], None), [ ("node3.example.com", rpc._OFFLINE, "node3-uuid"), ("node92.example.com", "192.0.2.92", "node92-uuid"), ("node54.example.com", rpc._OFFLINE, "node54-uuid"), ("unknown.example.com", "unknown.example.com", "unknown.example.com"), ]) class TestCompress(unittest.TestCase): def test(self): for data in ["", "Hello", "Hello World!\nnew\nlines"]: self.assertEqual(rpc._Compress(data), (constants.RPC_ENCODING_NONE, data)) for data in [512 * " ", 5242 * "Hello World!\n"]: compressed = rpc._Compress(data) self.assertEqual(len(compressed), 2) self.assertEqual(backend._Decompress(compressed), data) def testDecompression(self): self.assertRaises(AssertionError, backend._Decompress, "") self.assertRaises(AssertionError, backend._Decompress, [""]) self.assertRaises(AssertionError, backend._Decompress, ("unknown compression", "data")) self.assertRaises(Exception, backend._Decompress, (constants.RPC_ENCODING_ZLIB_BASE64, "invalid zlib data")) class TestRpcClientBase(unittest.TestCase): def testNoHosts(self): cdef = ("test_call", NotImplemented, None, constants.RPC_TMO_SLOW, [], None, None, NotImplemented) http_proc = _FakeRequestProcessor(NotImplemented) client = rpc._RpcClientBase(rpc._StaticResolver([]), NotImplemented, _req_process_fn=http_proc) self.assertEqual(client._Call(cdef, [], []), {}) # Test wrong number of arguments self.assertRaises(errors.ProgrammerError, client._Call, cdef, [], [0, 1, 2]) def testTimeout(self): def _CalcTimeout((arg1, arg2)): return arg1 + arg2 def _VerifyRequest(exp_timeout, req): self.assertEqual(req.read_timeout, exp_timeout) req.success = True req.resp_status_code = http.HTTP_OK req.resp_body = serializer.DumpJson((True, hex(req.read_timeout))) resolver = rpc._StaticResolver([ "192.0.2.1", "192.0.2.2", ]) nodes = [ "node1.example.com", "node2.example.com", ] tests = [(100, None, 100), (30, None, 30)] tests.extend((_CalcTimeout, i, i + 300) for i in [0, 5, 16485, 30516]) for timeout, arg1, exp_timeout in tests: cdef = ("test_call", NotImplemented, None, timeout, [ ("arg1", None, NotImplemented), ("arg2", None, NotImplemented), ], None, None, NotImplemented) http_proc = _FakeRequestProcessor(compat.partial(_VerifyRequest, exp_timeout)) client = rpc._RpcClientBase(resolver, NotImplemented, _req_process_fn=http_proc) result = client._Call(cdef, nodes, [arg1, 300]) self.assertEqual(len(result), len(nodes)) self.assertTrue(compat.all(not res.fail_msg and res.payload == hex(exp_timeout) for res in result.values())) def testArgumentEncoder(self): (AT1, AT2) = range(1, 3) resolver = rpc._StaticResolver([ "192.0.2.5", "192.0.2.6", ]) nodes = [ "node5.example.com", "node6.example.com", ] encoders = { AT1: hex, AT2: hash, } cdef = ("test_call", NotImplemented, None, constants.RPC_TMO_NORMAL, [ ("arg0", None, NotImplemented), ("arg1", AT1, NotImplemented), ("arg1", AT2, NotImplemented), ], None, None, NotImplemented) def _VerifyRequest(req): req.success = True req.resp_status_code = http.HTTP_OK req.resp_body = serializer.DumpJson((True, req.post_data)) http_proc = _FakeRequestProcessor(_VerifyRequest) for num in [0, 3796, 9032119]: client = rpc._RpcClientBase(resolver, encoders.get, _req_process_fn=http_proc) result = client._Call(cdef, nodes, ["foo", num, "Hello%s" % num]) self.assertEqual(len(result), len(nodes)) for res in result.values(): self.assertFalse(res.fail_msg) self.assertEqual(serializer.LoadJson(res.payload), ["foo", hex(num), hash("Hello%s" % num)]) def testPostProc(self): def _VerifyRequest(nums, req): req.success = True req.resp_status_code = http.HTTP_OK req.resp_body = serializer.DumpJson((True, nums)) resolver = rpc._StaticResolver([ "192.0.2.90", "192.0.2.95", ]) nodes = [ "node90.example.com", "node95.example.com", ] def _PostProc(res): self.assertFalse(res.fail_msg) res.payload = sum(res.payload) return res cdef = ("test_call", NotImplemented, None, constants.RPC_TMO_NORMAL, [], None, _PostProc, NotImplemented) # Seeded random generator rnd = random.Random(20299) for i in [0, 4, 74, 1391]: nums = [rnd.randint(0, 1000) for _ in range(i)] http_proc = _FakeRequestProcessor(compat.partial(_VerifyRequest, nums)) client = rpc._RpcClientBase(resolver, NotImplemented, _req_process_fn=http_proc) result = client._Call(cdef, nodes, []) self.assertEqual(len(result), len(nodes)) for res in result.values(): self.assertFalse(res.fail_msg) self.assertEqual(res.payload, sum(nums)) def testPreProc(self): def _VerifyRequest(req): req.success = True req.resp_status_code = http.HTTP_OK req.resp_body = serializer.DumpJson((True, req.post_data)) resolver = rpc._StaticResolver([ "192.0.2.30", "192.0.2.35", ]) nodes = [ "node30.example.com", "node35.example.com", ] def _PreProc(node, data): self.assertEqual(len(data), 1) return data[0] + node cdef = ("test_call", NotImplemented, None, constants.RPC_TMO_NORMAL, [ ("arg0", None, NotImplemented), ], _PreProc, None, NotImplemented) http_proc = _FakeRequestProcessor(_VerifyRequest) client = rpc._RpcClientBase(resolver, NotImplemented, _req_process_fn=http_proc) for prefix in ["foo", "bar", "baz"]: result = client._Call(cdef, nodes, [prefix]) self.assertEqual(len(result), len(nodes)) for (idx, (node, res)) in enumerate(result.items()): self.assertFalse(res.fail_msg) self.assertEqual(serializer.LoadJson(res.payload), prefix + node) def testResolverOptions(self): def _VerifyRequest(req): req.success = True req.resp_status_code = http.HTTP_OK req.resp_body = serializer.DumpJson((True, req.post_data)) nodes = [ "node30.example.com", "node35.example.com", ] def _Resolver(expected, hosts, options): self.assertEqual(hosts, nodes) self.assertEqual(options, expected) return zip(hosts, nodes, hosts) def _DynamicResolverOptions((arg0, )): return sum(arg0) tests = [ (None, None, None), (rpc_defs.ACCEPT_OFFLINE_NODE, None, rpc_defs.ACCEPT_OFFLINE_NODE), (False, None, False), (True, None, True), (0, None, 0), (_DynamicResolverOptions, [1, 2, 3], 6), (_DynamicResolverOptions, range(4, 19), 165), ] for (resolver_opts, arg0, expected) in tests: cdef = ("test_call", NotImplemented, resolver_opts, constants.RPC_TMO_NORMAL, [ ("arg0", None, NotImplemented), ], None, None, NotImplemented) http_proc = _FakeRequestProcessor(_VerifyRequest) client = rpc._RpcClientBase(compat.partial(_Resolver, expected), NotImplemented, _req_process_fn=http_proc) result = client._Call(cdef, nodes, [arg0]) self.assertEqual(len(result), len(nodes)) for (idx, (node, res)) in enumerate(result.items()): self.assertFalse(res.fail_msg) class _FakeConfigForRpcRunner: GetAllNodesInfo = NotImplemented def __init__(self, cluster=NotImplemented): self._cluster = cluster def GetNodeInfo(self, name): return objects.Node(name=name) def GetClusterInfo(self): return self._cluster def GetInstanceDiskParams(self, _): return constants.DISK_DT_DEFAULTS class TestRpcRunner(unittest.TestCase): def testUploadFile(self): data = 1779 * "Hello World\n" tmpfile = tempfile.NamedTemporaryFile() tmpfile.write(data) tmpfile.flush() st = os.stat(tmpfile.name) def _VerifyRequest(req): (uldata, ) = serializer.LoadJson(req.post_data) self.assertEqual(len(uldata), 7) self.assertEqual(uldata[0], tmpfile.name) self.assertEqual(list(uldata[1]), list(rpc._Compress(data))) self.assertEqual(uldata[2], st.st_mode) self.assertEqual(uldata[3], "user%s" % os.getuid()) self.assertEqual(uldata[4], "group%s" % os.getgid()) self.assertTrue(uldata[5] is not None) self.assertEqual(uldata[6], st.st_mtime) req.success = True req.resp_status_code = http.HTTP_OK req.resp_body = serializer.DumpJson((True, None)) http_proc = _FakeRequestProcessor(_VerifyRequest) std_runner = rpc.RpcRunner(_FakeConfigForRpcRunner(), None, _req_process_fn=http_proc, _getents=mocks.FakeGetentResolver) cfg_runner = rpc.ConfigRunner(None, ["192.0.2.13"], _req_process_fn=http_proc, _getents=mocks.FakeGetentResolver) nodes = [ "node1.example.com", ] for runner in [std_runner, cfg_runner]: result = runner.call_upload_file(nodes, tmpfile.name) self.assertEqual(len(result), len(nodes)) for (idx, (node, res)) in enumerate(result.items()): self.assertFalse(res.fail_msg) def testEncodeInstance(self): cluster = objects.Cluster(hvparams={ constants.HT_KVM: { constants.HV_CDROM_IMAGE_PATH: "foo", }, }, beparams={ constants.PP_DEFAULT: { constants.BE_MAXMEM: 8192, }, }, os_hvp={}, osparams={ "linux": { "role": "unknown", }, }) cluster.UpgradeConfig() inst = objects.Instance(name="inst1.example.com", hypervisor=constants.HT_KVM, os="linux", hvparams={ constants.HV_CDROM_IMAGE_PATH: "bar", constants.HV_ROOT_PATH: "/tmp", }, beparams={ constants.BE_MINMEM: 128, constants.BE_MAXMEM: 256, }, nics=[ objects.NIC(nicparams={ constants.NIC_MODE: "mymode", }), ], disk_template=constants.DT_PLAIN, disks=[ objects.Disk(dev_type=constants.DT_PLAIN, size=4096, logical_id=("vg", "disk6120")), objects.Disk(dev_type=constants.DT_PLAIN, size=1024, logical_id=("vg", "disk8508")), ]) inst.UpgradeConfig() cfg = _FakeConfigForRpcRunner(cluster=cluster) runner = rpc.RpcRunner(cfg, None, _req_process_fn=NotImplemented, _getents=mocks.FakeGetentResolver) def _CheckBasics(result): self.assertEqual(result["name"], "inst1.example.com") self.assertEqual(result["os"], "linux") self.assertEqual(result["beparams"][constants.BE_MINMEM], 128) self.assertEqual(len(result["nics"]), 1) self.assertEqual(result["nics"][0]["nicparams"][constants.NIC_MODE], "mymode") # Generic object serialization result = runner._encoder((rpc_defs.ED_OBJECT_DICT, inst)) _CheckBasics(result) self.assertEqual(len(result["hvparams"]), 2) result = runner._encoder((rpc_defs.ED_OBJECT_DICT_LIST, 5 * [inst])) map(_CheckBasics, result) map(lambda r: self.assertEqual(len(r["hvparams"]), 2), result) # Just an instance result = runner._encoder((rpc_defs.ED_INST_DICT, inst)) _CheckBasics(result) self.assertEqual(result["beparams"][constants.BE_MAXMEM], 256) self.assertEqual(result["hvparams"][constants.HV_CDROM_IMAGE_PATH], "bar") self.assertEqual(result["hvparams"][constants.HV_ROOT_PATH], "/tmp") self.assertEqual(result["osparams"], { "role": "unknown", }) self.assertEqual(len(result["hvparams"]), len(constants.HVC_DEFAULTS[constants.HT_KVM])) # Instance with OS parameters result = runner._encoder((rpc_defs.ED_INST_DICT_OSP_DP, (inst, { "role": "webserver", "other": "field", }))) _CheckBasics(result) self.assertEqual(result["beparams"][constants.BE_MAXMEM], 256) self.assertEqual(result["hvparams"][constants.HV_CDROM_IMAGE_PATH], "bar") self.assertEqual(result["hvparams"][constants.HV_ROOT_PATH], "/tmp") self.assertEqual(result["osparams"], { "role": "webserver", "other": "field", }) # Instance with hypervisor and backend parameters result = runner._encoder((rpc_defs.ED_INST_DICT_HVP_BEP_DP, (inst, { constants.HV_BOOT_ORDER: "xyz", }, { constants.BE_VCPUS: 100, constants.BE_MAXMEM: 4096, }))) _CheckBasics(result) self.assertEqual(result["beparams"][constants.BE_MAXMEM], 4096) self.assertEqual(result["beparams"][constants.BE_VCPUS], 100) self.assertEqual(result["hvparams"][constants.HV_BOOT_ORDER], "xyz") self.assertEqual(result["disks"], [{ "dev_type": constants.DT_PLAIN, "size": 4096, "logical_id": ("vg", "disk6120"), "params": constants.DISK_DT_DEFAULTS[inst.disk_template], }, { "dev_type": constants.DT_PLAIN, "size": 1024, "logical_id": ("vg", "disk8508"), "params": constants.DISK_DT_DEFAULTS[inst.disk_template], }]) self.assertTrue(compat.all(disk.params == {} for disk in inst.disks), msg="Configuration objects were modified") class TestLegacyNodeInfo(unittest.TestCase): KEY_BOOT = "bootid" KEY_VG0 = "name" KEY_VG1 = "storage_free" KEY_VG2 = "storage_size" KEY_HV = "cpu_count" KEY_SP1 = "spindles_free" KEY_SP2 = "spindles_total" KEY_ST = "type" # key for storage type VAL_BOOT = 0 VAL_VG0 = "xy" VAL_VG1 = 11 VAL_VG2 = 12 VAL_VG3 = "lvm-vg" VAL_HV = 2 VAL_SP0 = "ab" VAL_SP1 = 31 VAL_SP2 = 32 VAL_SP3 = "lvm-pv" DICT_VG = { KEY_VG0: VAL_VG0, KEY_VG1: VAL_VG1, KEY_VG2: VAL_VG2, KEY_ST: VAL_VG3, } DICT_HV = {KEY_HV: VAL_HV} DICT_SP = { KEY_ST: VAL_SP3, KEY_VG0: VAL_SP0, KEY_VG1: VAL_SP1, KEY_VG2: VAL_SP2, } STD_LST = [VAL_BOOT, [DICT_VG, DICT_SP], [DICT_HV]] STD_DICT = { KEY_BOOT: VAL_BOOT, KEY_VG0: VAL_VG0, KEY_VG1: VAL_VG1, KEY_VG2: VAL_VG2, KEY_HV: VAL_HV, } def testStandard(self): result = rpc.MakeLegacyNodeInfo(self.STD_LST) self.assertEqual(result, self.STD_DICT) def testSpindlesRequired(self): my_lst = [self.VAL_BOOT, [], [self.DICT_HV]] self.assertRaises(errors.OpExecError, rpc.MakeLegacyNodeInfo, my_lst, require_spindles=True) def testNoSpindlesRequired(self): my_lst = [self.VAL_BOOT, [], [self.DICT_HV]] result = rpc.MakeLegacyNodeInfo(my_lst, require_spindles = False) self.assertEqual(result, {self.KEY_BOOT: self.VAL_BOOT, self.KEY_HV: self.VAL_HV}) result = rpc.MakeLegacyNodeInfo(self.STD_LST, require_spindles = False) self.assertEqual(result, self.STD_DICT) class TestAddDefaultStorageInfoToLegacyNodeInfo(unittest.TestCase): def setUp(self): self.free_storage_file = 23 self.total_storage_file = 42 self.free_storage_lvm = 69 self.total_storage_lvm = 666 self.node_info = [{"name": "myfile", "type": constants.ST_FILE, "storage_free": self.free_storage_file, "storage_size": self.total_storage_file}, {"name": "myvg", "type": constants.ST_LVM_VG, "storage_free": self.free_storage_lvm, "storage_size": self.total_storage_lvm}, {"name": "myspindle", "type": constants.ST_LVM_PV, "storage_free": 33, "storage_size": 44}] def testAddDefaultStorageInfoToLegacyNodeInfo(self): result = {} rpc._AddDefaultStorageInfoToLegacyNodeInfo(result, self.node_info) self.assertEqual(self.free_storage_file, result["storage_free"]) self.assertEqual(self.total_storage_file, result["storage_size"]) def testAddDefaultStorageInfoToLegacyNodeInfoNoDefaults(self): result = {} rpc._AddDefaultStorageInfoToLegacyNodeInfo(result, self.node_info[-1:]) self.assertFalse("storage_free" in result) self.assertFalse("storage_size" in result) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.hypervisor.hv_chroot_unittest.py0000744000000000000000000000325212271422343024367 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.hypervisor.hv_chroot""" import unittest import tempfile import shutil from ganeti import constants from ganeti import objects from ganeti import hypervisor from ganeti.hypervisor import hv_chroot import testutils class TestConsole(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def test(self): instance = objects.Instance(name="fake.example.com", primary_node="node837-uuid") node = objects.Node(name="node837", uuid="node837-uuid") cons = hv_chroot.ChrootManager.GetInstanceConsole(instance, node, {}, {}, root_dir=self.tmpdir) self.assertTrue(cons.Validate()) self.assertEqual(cons.kind, constants.CONS_SSH) self.assertEqual(cons.host, node.name) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.hypervisor.hv_fake_unittest.py0000744000000000000000000000254012271422343023776 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.hypervisor.hv_fake""" import unittest from ganeti import constants from ganeti import objects from ganeti import hypervisor from ganeti.hypervisor import hv_fake import testutils class TestConsole(unittest.TestCase): def test(self): instance = objects.Instance(name="fake.example.com") node = objects.Node(name="fakenode.example.com") cons = hv_fake.FakeHypervisor.GetInstanceConsole(instance, node, {}, {}) self.assertTrue(cons.Validate()) self.assertEqual(cons.kind, constants.CONS_MESSAGE) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.storage.filestorage_unittest.py0000744000000000000000000001613012271422343024131 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for unittesting the ganeti.storage.filestorage module""" import os import shutil import tempfile import unittest from ganeti import errors from ganeti.storage import filestorage from ganeti import utils import testutils class TestFileStorageSpaceInfo(unittest.TestCase): def testSpaceInfoPathInvalid(self): """Tests that an error is raised when the given path is not existing. """ self.assertRaises(errors.CommandError, filestorage.GetFileStorageSpaceInfo, "/path/does/not/exist/") def testSpaceInfoPathValid(self): """Smoke test run on a directory that exists for sure. """ filestorage.GetFileStorageSpaceInfo("/") class TestCheckFileStoragePath(unittest.TestCase): def _WriteAllowedFile(self, allowed_paths_filename, allowed_paths): allowed_paths_file = open(allowed_paths_filename, 'w') allowed_paths_file.write('\n'.join(allowed_paths)) allowed_paths_file.close() def setUp(self): self.tmpdir = tempfile.mkdtemp() self.allowed_paths = [os.path.join(self.tmpdir, "allowed")] for path in self.allowed_paths: os.mkdir(path) self.allowed_paths_filename = os.path.join(self.tmpdir, "allowed-path-file") self._WriteAllowedFile(self.allowed_paths_filename, self.allowed_paths) def tearDown(self): shutil.rmtree(self.tmpdir) def testCheckFileStoragePathExistance(self): filestorage._CheckFileStoragePathExistance(self.tmpdir) def testCheckFileStoragePathExistanceFail(self): path = os.path.join(self.tmpdir, "does/not/exist") self.assertRaises(errors.FileStoragePathError, filestorage._CheckFileStoragePathExistance, path) def testCheckFileStoragePathNotWritable(self): path = os.path.join(self.tmpdir, "isnotwritable/") os.mkdir(path) os.chmod(path, 0) self.assertRaises(errors.FileStoragePathError, filestorage._CheckFileStoragePathExistance, path) os.chmod(path, 777) def testCheckFileStoragePath(self): path = os.path.join(self.allowed_paths[0], "allowedsubdir") os.mkdir(path) result = filestorage.CheckFileStoragePath( path, _allowed_paths_file=self.allowed_paths_filename) self.assertEqual(None, result) def testCheckFileStoragePathNotAllowed(self): path = os.path.join(self.tmpdir, "notallowed") result = filestorage.CheckFileStoragePath( path, _allowed_paths_file=self.allowed_paths_filename) self.assertTrue("not acceptable" in result) class TestLoadAllowedFileStoragePaths(testutils.GanetiTestCase): def testDevNull(self): self.assertEqual(filestorage._LoadAllowedFileStoragePaths("/dev/null"), []) def testNonExistantFile(self): filename = "/tmp/this/file/does/not/exist" assert not os.path.exists(filename) self.assertEqual(filestorage._LoadAllowedFileStoragePaths(filename), []) def test(self): tmpfile = self._CreateTempFile() utils.WriteFile(tmpfile, data=""" # This is a test file /tmp /srv/storage relative/path """) self.assertEqual(filestorage._LoadAllowedFileStoragePaths(tmpfile), [ "/tmp", "/srv/storage", "relative/path", ]) class TestComputeWrongFileStoragePathsInternal(unittest.TestCase): def testPaths(self): paths = filestorage._GetForbiddenFileStoragePaths() for path in ["/bin", "/usr/local/sbin", "/lib64", "/etc", "/sys"]: self.assertTrue(path in paths) self.assertEqual(set(map(os.path.normpath, paths)), paths) def test(self): vfsp = filestorage._ComputeWrongFileStoragePaths self.assertEqual(vfsp([]), []) self.assertEqual(vfsp(["/tmp"]), []) self.assertEqual(vfsp(["/bin/ls"]), ["/bin/ls"]) self.assertEqual(vfsp(["/bin"]), ["/bin"]) self.assertEqual(vfsp(["/usr/sbin/vim", "/srv/file-storage"]), ["/usr/sbin/vim"]) class TestComputeWrongFileStoragePaths(testutils.GanetiTestCase): def test(self): tmpfile = self._CreateTempFile() utils.WriteFile(tmpfile, data=""" /tmp x/y///z/relative # This is a test file /srv/storage /bin /usr/local/lib32/ relative/path """) self.assertEqual( filestorage.ComputeWrongFileStoragePaths(_filename=tmpfile), ["/bin", "/usr/local/lib32", "relative/path", "x/y/z/relative", ]) class TestCheckFileStoragePathInternal(unittest.TestCase): def testNonAbsolute(self): for i in ["", "tmp", "foo/bar/baz"]: self.assertRaises(errors.FileStoragePathError, filestorage._CheckFileStoragePath, i, ["/tmp"]) self.assertRaises(errors.FileStoragePathError, filestorage._CheckFileStoragePath, "/tmp", ["tmp", "xyz"]) def testNoAllowed(self): self.assertRaises(errors.FileStoragePathError, filestorage._CheckFileStoragePath, "/tmp", []) def testNoAdditionalPathComponent(self): self.assertRaises(errors.FileStoragePathError, filestorage._CheckFileStoragePath, "/tmp/foo", ["/tmp/foo"]) def testAllowed(self): filestorage._CheckFileStoragePath("/tmp/foo/a", ["/tmp/foo"]) filestorage._CheckFileStoragePath("/tmp/foo/a/x", ["/tmp/foo"]) class TestCheckFileStoragePathExistance(testutils.GanetiTestCase): def testNonExistantFile(self): filename = "/tmp/this/file/does/not/exist" assert not os.path.exists(filename) self.assertRaises(errors.FileStoragePathError, filestorage.CheckFileStoragePathAcceptance, "/bin/", _filename=filename) self.assertRaises(errors.FileStoragePathError, filestorage.CheckFileStoragePathAcceptance, "/srv/file-storage", _filename=filename) def testAllowedPath(self): tmpfile = self._CreateTempFile() utils.WriteFile(tmpfile, data=""" /srv/storage """) filestorage.CheckFileStoragePathAcceptance( "/srv/storage/inst1", _filename=tmpfile) # No additional path component self.assertRaises(errors.FileStoragePathError, filestorage.CheckFileStoragePathAcceptance, "/srv/storage", _filename=tmpfile) # Forbidden path self.assertRaises(errors.FileStoragePathError, filestorage.CheckFileStoragePathAcceptance, "/usr/lib64/xyz", _filename=tmpfile) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.ht_unittest.py0000744000000000000000000002234212244641676020613 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.ht""" import unittest from ganeti import constants from ganeti import ht import testutils class TestTypeChecks(unittest.TestCase): def testNone(self): self.assertFalse(ht.TNotNone(None)) self.assertTrue(ht.TNone(None)) for val in [0, True, "", "Hello World", [], range(5)]: self.assertTrue(ht.TNotNone(val)) self.assertFalse(ht.TNone(val)) def testBool(self): self.assertTrue(ht.TBool(True)) self.assertTrue(ht.TBool(False)) for val in [0, None, "", [], "Hello"]: self.assertFalse(ht.TBool(val)) for val in [True, -449, 1, 3, "x", "abc", [1, 2]]: self.assertTrue(ht.TTrue(val)) for val in [False, 0, None, []]: self.assertFalse(ht.TTrue(val)) def testInt(self): for val in [-100, -3, 0, 16, 128, 923874]: self.assertTrue(ht.TInt(val)) self.assertTrue(ht.TNumber(val)) for val in [False, True, None, "", [], "Hello", 0.0, 0.23, -3818.163]: self.assertFalse(ht.TInt(val)) for val in range(0, 100, 4): self.assertTrue(ht.TNonNegativeInt(val)) neg = -(val + 1) self.assertFalse(ht.TNonNegativeInt(neg)) self.assertFalse(ht.TPositiveInt(neg)) self.assertFalse(ht.TNonNegativeInt(0.1 + val)) self.assertFalse(ht.TPositiveInt(0.1 + val)) for val in [0, 0.1, 0.9, -0.3]: self.assertFalse(ht.TPositiveInt(val)) for val in range(1, 100, 4): self.assertTrue(ht.TPositiveInt(val)) self.assertFalse(ht.TPositiveInt(0.1 + val)) def testFloat(self): for val in [-100.21, -3.0, 0.0, 16.12, 128.3433, 923874.928]: self.assertTrue(ht.TFloat(val)) self.assertTrue(ht.TNumber(val)) for val in [False, True, None, "", [], "Hello", 0, 28, -1, -3281]: self.assertFalse(ht.TFloat(val)) def testNumber(self): for val in [-100, -3, 0, 16, 128, 923874, -100.21, -3.0, 0.0, 16.12, 128.3433, 923874.928]: self.assertTrue(ht.TNumber(val)) for val in [False, True, None, "", [], "Hello", "1"]: self.assertFalse(ht.TNumber(val)) def testString(self): for val in ["", "abc", "Hello World", "123", u"", u"\u272C", u"abc"]: self.assertTrue(ht.TString(val)) for val in [False, True, None, [], 0, 1, 5, -193, 93.8582]: self.assertFalse(ht.TString(val)) def testElemOf(self): fn = ht.TElemOf(range(10)) self.assertTrue(fn(0)) self.assertTrue(fn(3)) self.assertTrue(fn(9)) self.assertFalse(fn(-1)) self.assertFalse(fn(100)) fn = ht.TElemOf([]) self.assertFalse(fn(0)) self.assertFalse(fn(100)) self.assertFalse(fn(True)) fn = ht.TElemOf(["Hello", "World"]) self.assertTrue(fn("Hello")) self.assertTrue(fn("World")) self.assertFalse(fn("e")) def testList(self): for val in [[], range(10), ["Hello", "World", "!"]]: self.assertTrue(ht.TList(val)) for val in [False, True, None, {}, 0, 1, 5, -193, 93.8582]: self.assertFalse(ht.TList(val)) def testDict(self): for val in [{}, dict.fromkeys(range(10)), {"Hello": [], "World": "!"}]: self.assertTrue(ht.TDict(val)) for val in [False, True, None, [], 0, 1, 5, -193, 93.8582]: self.assertFalse(ht.TDict(val)) def testIsLength(self): fn = ht.TIsLength(10) self.assertTrue(fn(range(10))) self.assertFalse(fn(range(1))) self.assertFalse(fn(range(100))) def testAnd(self): fn = ht.TAnd(ht.TNotNone, ht.TString) self.assertTrue(fn("")) self.assertFalse(fn(1)) self.assertFalse(fn(None)) def testOr(self): fn = ht.TMaybe(ht.TAnd(ht.TString, ht.TIsLength(5))) self.assertTrue(fn("12345")) self.assertTrue(fn(None)) self.assertFalse(fn(1)) self.assertFalse(fn("")) self.assertFalse(fn("abc")) def testMap(self): self.assertTrue(ht.TMap(str, ht.TString)(123)) self.assertTrue(ht.TMap(int, ht.TInt)("9999")) self.assertFalse(ht.TMap(lambda x: x + 100, ht.TString)(123)) def testNonEmptyString(self): self.assertTrue(ht.TNonEmptyString("xyz")) self.assertTrue(ht.TNonEmptyString("Hello World")) self.assertFalse(ht.TNonEmptyString("")) self.assertFalse(ht.TNonEmptyString(None)) self.assertFalse(ht.TNonEmptyString([])) def testMaybeString(self): self.assertTrue(ht.TMaybeString("xyz")) self.assertTrue(ht.TMaybeString("Hello World")) self.assertTrue(ht.TMaybeString(None)) self.assertFalse(ht.TMaybeString("")) self.assertFalse(ht.TMaybeString([])) def testMaybeBool(self): self.assertTrue(ht.TMaybeBool(False)) self.assertTrue(ht.TMaybeBool(True)) self.assertTrue(ht.TMaybeBool(None)) self.assertFalse(ht.TMaybeBool([])) self.assertFalse(ht.TMaybeBool("0")) self.assertFalse(ht.TMaybeBool("False")) def testListOf(self): fn = ht.TListOf(ht.TNonEmptyString) self.assertTrue(fn([])) self.assertTrue(fn(["x"])) self.assertTrue(fn(["Hello", "World"])) self.assertFalse(fn(None)) self.assertFalse(fn(False)) self.assertFalse(fn(range(3))) self.assertFalse(fn(["x", None])) def testDictOf(self): fn = ht.TDictOf(ht.TNonEmptyString, ht.TInt) self.assertTrue(fn({})) self.assertTrue(fn({"x": 123, "y": 999})) self.assertFalse(fn(None)) self.assertFalse(fn({1: "x"})) self.assertFalse(fn({"x": ""})) self.assertFalse(fn({"x": None})) self.assertFalse(fn({"": 8234})) def testStrictDictRequireAllExclusive(self): fn = ht.TStrictDict(True, True, { "a": ht.TInt, }) self.assertFalse(fn(1)) self.assertFalse(fn(None)) self.assertFalse(fn({})) self.assertFalse(fn({"a": "Hello", })) self.assertFalse(fn({"unknown": 999,})) self.assertFalse(fn({"unknown": None,})) self.assertTrue(fn({"a": 123, })) self.assertTrue(fn({"a": -5, })) fn = ht.TStrictDict(True, True, { "a": ht.TInt, "x": ht.TString, }) self.assertFalse(fn({})) self.assertFalse(fn({"a": -5, })) self.assertTrue(fn({"a": 123, "x": "", })) self.assertFalse(fn({"a": 123, "x": None, })) def testStrictDictExclusive(self): fn = ht.TStrictDict(False, True, { "a": ht.TInt, "b": ht.TList, }) self.assertTrue(fn({})) self.assertTrue(fn({"a": 123, })) self.assertTrue(fn({"b": range(4), })) self.assertFalse(fn({"b": 123, })) self.assertFalse(fn({"foo": {}, })) self.assertFalse(fn({"bar": object(), })) def testStrictDictRequireAll(self): fn = ht.TStrictDict(True, False, { "a": ht.TInt, "m": ht.TInt, }) self.assertTrue(fn({"a": 1, "m": 2, "bar": object(), })) self.assertFalse(fn({})) self.assertFalse(fn({"a": 1, "bar": object(), })) self.assertFalse(fn({"a": 1, "m": [], "bar": object(), })) def testStrictDict(self): fn = ht.TStrictDict(False, False, { "a": ht.TInt, }) self.assertTrue(fn({})) self.assertFalse(fn({"a": ""})) self.assertTrue(fn({"a": 11})) self.assertTrue(fn({"other": 11})) self.assertTrue(fn({"other": object()})) def testJobId(self): for i in [0, 1, 4395, 2347625220]: self.assertTrue(ht.TJobId(i)) self.assertTrue(ht.TJobId(str(i))) self.assertFalse(ht.TJobId(-(i + 1))) for i in ["", "-", ".", ",", "a", "99j", "job-123", "\t", " 83 ", None, [], {}, object()]: self.assertFalse(ht.TJobId(i)) def testRelativeJobId(self): for i in [-1, -93, -4395]: self.assertTrue(ht.TRelativeJobId(i)) self.assertFalse(ht.TRelativeJobId(str(i))) for i in [0, 1, 2, 10, 9289, "", "0", "-1", "-999"]: self.assertFalse(ht.TRelativeJobId(i)) self.assertFalse(ht.TRelativeJobId(str(i))) def testItems(self): self.assertRaises(AssertionError, ht.TItems, []) fn = ht.TItems([ht.TString]) self.assertFalse(fn([0])) self.assertFalse(fn([None])) self.assertTrue(fn(["Hello"])) self.assertTrue(fn(["Hello", "World"])) self.assertTrue(fn(["Hello", 0, 1, 2, "anything"])) fn = ht.TItems([ht.TAny, ht.TInt, ht.TAny]) self.assertTrue(fn(["Hello", 0, []])) self.assertTrue(fn(["Hello", 893782])) self.assertTrue(fn([{}, -938210858947, None])) self.assertFalse(fn(["Hello", []])) def testInstanceOf(self): fn = ht.TInstanceOf(self.__class__) self.assertTrue(fn(self)) self.assertTrue(str(fn).startswith("Instance of ")) self.assertFalse(fn(None)) def testMaybeValueNone(self): fn = ht.TMaybeValueNone(ht.TInt) self.assertTrue(fn(None)) self.assertTrue(fn(0)) self.assertTrue(fn(constants.VALUE_NONE)) self.assertFalse(fn("")) self.assertFalse(fn([])) self.assertFalse(fn(constants.VALUE_DEFAULT)) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/test/py/ganeti.backend_unittest.py0000744000000000000000000007000312271422343021550 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2010, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script for testing ganeti.backend""" import mock import os import shutil import tempfile import testutils import unittest from ganeti import backend from ganeti import constants from ganeti import errors from ganeti import hypervisor from ganeti import netutils from ganeti import utils class TestX509Certificates(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def test(self): (name, cert_pem) = backend.CreateX509Certificate(300, cryptodir=self.tmpdir) self.assertEqual(utils.ReadFile(os.path.join(self.tmpdir, name, backend._X509_CERT_FILE)), cert_pem) self.assert_(0 < os.path.getsize(os.path.join(self.tmpdir, name, backend._X509_KEY_FILE))) (name2, cert_pem2) = \ backend.CreateX509Certificate(300, cryptodir=self.tmpdir) backend.RemoveX509Certificate(name, cryptodir=self.tmpdir) backend.RemoveX509Certificate(name2, cryptodir=self.tmpdir) self.assertEqual(utils.ListVisibleFiles(self.tmpdir), []) def testNonEmpty(self): (name, _) = backend.CreateX509Certificate(300, cryptodir=self.tmpdir) utils.WriteFile(utils.PathJoin(self.tmpdir, name, "hello-world"), data="Hello World") self.assertRaises(backend.RPCFail, backend.RemoveX509Certificate, name, cryptodir=self.tmpdir) self.assertEqual(utils.ListVisibleFiles(self.tmpdir), [name]) class TestNodeVerify(testutils.GanetiTestCase): def setUp(self): testutils.GanetiTestCase.setUp(self) self._mock_hv = None def _GetHypervisor(self, hv_name): self._mock_hv = hypervisor.GetHypervisor(hv_name) self._mock_hv.ValidateParameters = mock.Mock() self._mock_hv.Verify = mock.Mock() return self._mock_hv def testMasterIPLocalhost(self): # this a real functional test, but requires localhost to be reachable local_data = (netutils.Hostname.GetSysName(), constants.IP4_ADDRESS_LOCALHOST) result = backend.VerifyNode({constants.NV_MASTERIP: local_data}, None, {}) self.failUnless(constants.NV_MASTERIP in result, "Master IP data not returned") self.failUnless(result[constants.NV_MASTERIP], "Cannot reach localhost") def testMasterIPUnreachable(self): # Network 192.0.2.0/24 is reserved for test/documentation as per # RFC 5737 bad_data = ("master.example.com", "192.0.2.1") # we just test that whatever TcpPing returns, VerifyNode returns too netutils.TcpPing = lambda a, b, source=None: False result = backend.VerifyNode({constants.NV_MASTERIP: bad_data}, None, {}) self.failUnless(constants.NV_MASTERIP in result, "Master IP data not returned") self.failIf(result[constants.NV_MASTERIP], "Result from netutils.TcpPing corrupted") def testVerifyHvparams(self): test_hvparams = {constants.HV_XEN_CMD: constants.XEN_CMD_XL} test_what = {constants.NV_HVPARAMS: \ [("mynode", constants.HT_XEN_PVM, test_hvparams)]} result = {} backend._VerifyHvparams(test_what, True, result, get_hv_fn=self._GetHypervisor) self._mock_hv.ValidateParameters.assert_called_with(test_hvparams) def testVerifyHypervisors(self): hvname = constants.HT_XEN_PVM hvparams = {constants.HV_XEN_CMD: constants.XEN_CMD_XL} all_hvparams = {hvname: hvparams} test_what = {constants.NV_HYPERVISOR: [hvname]} result = {} backend._VerifyHypervisors( test_what, True, result, all_hvparams=all_hvparams, get_hv_fn=self._GetHypervisor) self._mock_hv.Verify.assert_called_with(hvparams=hvparams) def _DefRestrictedCmdOwner(): return (os.getuid(), os.getgid()) class TestVerifyRestrictedCmdName(unittest.TestCase): def testAcceptableName(self): for i in ["foo", "bar", "z1", "000first", "hello-world"]: for fn in [lambda s: s, lambda s: s.upper(), lambda s: s.title()]: (status, msg) = backend._VerifyRestrictedCmdName(fn(i)) self.assertTrue(status) self.assertTrue(msg is None) def testEmptyAndSpace(self): for i in ["", " ", "\t", "\n"]: (status, msg) = backend._VerifyRestrictedCmdName(i) self.assertFalse(status) self.assertEqual(msg, "Missing command name") def testNameWithSlashes(self): for i in ["/", "./foo", "../moo", "some/name"]: (status, msg) = backend._VerifyRestrictedCmdName(i) self.assertFalse(status) self.assertEqual(msg, "Invalid command name") def testForbiddenCharacters(self): for i in ["#", ".", "..", "bash -c ls", "'"]: (status, msg) = backend._VerifyRestrictedCmdName(i) self.assertFalse(status) self.assertEqual(msg, "Command name contains forbidden characters") class TestVerifyRestrictedCmdDirectory(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def testCanNotStat(self): tmpname = utils.PathJoin(self.tmpdir, "foobar") self.assertFalse(os.path.exists(tmpname)) (status, msg) = \ backend._VerifyRestrictedCmdDirectory(tmpname, _owner=NotImplemented) self.assertFalse(status) self.assertTrue(msg.startswith("Can't stat(2) '")) def testTooPermissive(self): tmpname = utils.PathJoin(self.tmpdir, "foobar") os.mkdir(tmpname) for mode in [0777, 0706, 0760, 0722]: os.chmod(tmpname, mode) self.assertTrue(os.path.isdir(tmpname)) (status, msg) = \ backend._VerifyRestrictedCmdDirectory(tmpname, _owner=NotImplemented) self.assertFalse(status) self.assertTrue(msg.startswith("Permissions on '")) def testNoDirectory(self): tmpname = utils.PathJoin(self.tmpdir, "foobar") utils.WriteFile(tmpname, data="empty\n") self.assertTrue(os.path.isfile(tmpname)) (status, msg) = \ backend._VerifyRestrictedCmdDirectory(tmpname, _owner=_DefRestrictedCmdOwner()) self.assertFalse(status) self.assertTrue(msg.endswith("is not a directory")) def testNormal(self): tmpname = utils.PathJoin(self.tmpdir, "foobar") os.mkdir(tmpname) os.chmod(tmpname, 0755) self.assertTrue(os.path.isdir(tmpname)) (status, msg) = \ backend._VerifyRestrictedCmdDirectory(tmpname, _owner=_DefRestrictedCmdOwner()) self.assertTrue(status) self.assertTrue(msg is None) class TestVerifyRestrictedCmd(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def testCanNotStat(self): tmpname = utils.PathJoin(self.tmpdir, "helloworld") self.assertFalse(os.path.exists(tmpname)) (status, msg) = \ backend._VerifyRestrictedCmd(self.tmpdir, "helloworld", _owner=NotImplemented) self.assertFalse(status) self.assertTrue(msg.startswith("Can't stat(2) '")) def testNotExecutable(self): tmpname = utils.PathJoin(self.tmpdir, "cmdname") utils.WriteFile(tmpname, data="empty\n") (status, msg) = \ backend._VerifyRestrictedCmd(self.tmpdir, "cmdname", _owner=_DefRestrictedCmdOwner()) self.assertFalse(status) self.assertTrue(msg.startswith("access(2) thinks '")) def testExecutable(self): tmpname = utils.PathJoin(self.tmpdir, "cmdname") utils.WriteFile(tmpname, data="empty\n", mode=0700) (status, executable) = \ backend._VerifyRestrictedCmd(self.tmpdir, "cmdname", _owner=_DefRestrictedCmdOwner()) self.assertTrue(status) self.assertEqual(executable, tmpname) class TestPrepareRestrictedCmd(unittest.TestCase): _TEST_PATH = "/tmp/some/test/path" def testDirFails(self): def fn(path): self.assertEqual(path, self._TEST_PATH) return (False, "test error 31420") (status, msg) = \ backend._PrepareRestrictedCmd(self._TEST_PATH, "cmd21152", _verify_dir=fn, _verify_name=NotImplemented, _verify_cmd=NotImplemented) self.assertFalse(status) self.assertEqual(msg, "test error 31420") def testNameFails(self): def fn(cmd): self.assertEqual(cmd, "cmd4617") return (False, "test error 591") (status, msg) = \ backend._PrepareRestrictedCmd(self._TEST_PATH, "cmd4617", _verify_dir=lambda _: (True, None), _verify_name=fn, _verify_cmd=NotImplemented) self.assertFalse(status) self.assertEqual(msg, "test error 591") def testCommandFails(self): def fn(path, cmd): self.assertEqual(path, self._TEST_PATH) self.assertEqual(cmd, "cmd17577") return (False, "test error 25524") (status, msg) = \ backend._PrepareRestrictedCmd(self._TEST_PATH, "cmd17577", _verify_dir=lambda _: (True, None), _verify_name=lambda _: (True, None), _verify_cmd=fn) self.assertFalse(status) self.assertEqual(msg, "test error 25524") def testSuccess(self): def fn(path, cmd): return (True, utils.PathJoin(path, cmd)) (status, executable) = \ backend._PrepareRestrictedCmd(self._TEST_PATH, "cmd22633", _verify_dir=lambda _: (True, None), _verify_name=lambda _: (True, None), _verify_cmd=fn) self.assertTrue(status) self.assertEqual(executable, utils.PathJoin(self._TEST_PATH, "cmd22633")) def _SleepForRestrictedCmd(duration): assert duration > 5 def _GenericRestrictedCmdError(cmd): return "Executing command '%s' failed" % cmd class TestRunRestrictedCmd(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def testNonExistantLockDirectory(self): lockfile = utils.PathJoin(self.tmpdir, "does", "not", "exist") sleep_fn = testutils.CallCounter(_SleepForRestrictedCmd) self.assertFalse(os.path.exists(lockfile)) self.assertRaises(backend.RPCFail, backend.RunRestrictedCmd, "test", _lock_timeout=NotImplemented, _lock_file=lockfile, _path=NotImplemented, _sleep_fn=sleep_fn, _prepare_fn=NotImplemented, _runcmd_fn=NotImplemented, _enabled=True) self.assertEqual(sleep_fn.Count(), 1) @staticmethod def _TryLock(lockfile): sleep_fn = testutils.CallCounter(_SleepForRestrictedCmd) result = False try: backend.RunRestrictedCmd("test22717", _lock_timeout=0.1, _lock_file=lockfile, _path=NotImplemented, _sleep_fn=sleep_fn, _prepare_fn=NotImplemented, _runcmd_fn=NotImplemented, _enabled=True) except backend.RPCFail, err: assert str(err) == _GenericRestrictedCmdError("test22717"), \ "Did not fail with generic error message" result = True assert sleep_fn.Count() == 1 return result def testLockHeldByOtherProcess(self): lockfile = utils.PathJoin(self.tmpdir, "lock") lock = utils.FileLock.Open(lockfile) lock.Exclusive(blocking=True, timeout=1.0) try: self.assertTrue(utils.RunInSeparateProcess(self._TryLock, lockfile)) finally: lock.Close() @staticmethod def _PrepareRaisingException(path, cmd): assert cmd == "test23122" raise Exception("test") def testPrepareRaisesException(self): lockfile = utils.PathJoin(self.tmpdir, "lock") sleep_fn = testutils.CallCounter(_SleepForRestrictedCmd) prepare_fn = testutils.CallCounter(self._PrepareRaisingException) try: backend.RunRestrictedCmd("test23122", _lock_timeout=1.0, _lock_file=lockfile, _path=NotImplemented, _runcmd_fn=NotImplemented, _sleep_fn=sleep_fn, _prepare_fn=prepare_fn, _enabled=True) except backend.RPCFail, err: self.assertEqual(str(err), _GenericRestrictedCmdError("test23122")) else: self.fail("Didn't fail") self.assertEqual(sleep_fn.Count(), 1) self.assertEqual(prepare_fn.Count(), 1) @staticmethod def _PrepareFails(path, cmd): assert cmd == "test29327" return ("some error message", None) def testPrepareFails(self): lockfile = utils.PathJoin(self.tmpdir, "lock") sleep_fn = testutils.CallCounter(_SleepForRestrictedCmd) prepare_fn = testutils.CallCounter(self._PrepareFails) try: backend.RunRestrictedCmd("test29327", _lock_timeout=1.0, _lock_file=lockfile, _path=NotImplemented, _runcmd_fn=NotImplemented, _sleep_fn=sleep_fn, _prepare_fn=prepare_fn, _enabled=True) except backend.RPCFail, err: self.assertEqual(str(err), _GenericRestrictedCmdError("test29327")) else: self.fail("Didn't fail") self.assertEqual(sleep_fn.Count(), 1) self.assertEqual(prepare_fn.Count(), 1) @staticmethod def _SuccessfulPrepare(path, cmd): return (True, utils.PathJoin(path, cmd)) def testRunCmdFails(self): lockfile = utils.PathJoin(self.tmpdir, "lock") def fn(args, env=NotImplemented, reset_env=NotImplemented, postfork_fn=NotImplemented): self.assertEqual(args, [utils.PathJoin(self.tmpdir, "test3079")]) self.assertEqual(env, {}) self.assertTrue(reset_env) self.assertTrue(callable(postfork_fn)) trylock = utils.FileLock.Open(lockfile) try: # See if lockfile is still held self.assertRaises(EnvironmentError, trylock.Exclusive, blocking=False) # Call back to release lock postfork_fn(NotImplemented) # See if lockfile can be acquired trylock.Exclusive(blocking=False) finally: trylock.Close() # Simulate a failed command return utils.RunResult(constants.EXIT_FAILURE, None, "stdout", "stderr406328567", utils.ShellQuoteArgs(args), NotImplemented, NotImplemented) sleep_fn = testutils.CallCounter(_SleepForRestrictedCmd) prepare_fn = testutils.CallCounter(self._SuccessfulPrepare) runcmd_fn = testutils.CallCounter(fn) try: backend.RunRestrictedCmd("test3079", _lock_timeout=1.0, _lock_file=lockfile, _path=self.tmpdir, _runcmd_fn=runcmd_fn, _sleep_fn=sleep_fn, _prepare_fn=prepare_fn, _enabled=True) except backend.RPCFail, err: self.assertTrue(str(err).startswith("Restricted command 'test3079'" " failed:")) self.assertTrue("stderr406328567" in str(err), msg="Error did not include output") else: self.fail("Didn't fail") self.assertEqual(sleep_fn.Count(), 0) self.assertEqual(prepare_fn.Count(), 1) self.assertEqual(runcmd_fn.Count(), 1) def testRunCmdSucceeds(self): lockfile = utils.PathJoin(self.tmpdir, "lock") def fn(args, env=NotImplemented, reset_env=NotImplemented, postfork_fn=NotImplemented): self.assertEqual(args, [utils.PathJoin(self.tmpdir, "test5667")]) self.assertEqual(env, {}) self.assertTrue(reset_env) # Call back to release lock postfork_fn(NotImplemented) # Simulate a successful command return utils.RunResult(constants.EXIT_SUCCESS, None, "stdout14463", "", utils.ShellQuoteArgs(args), NotImplemented, NotImplemented) sleep_fn = testutils.CallCounter(_SleepForRestrictedCmd) prepare_fn = testutils.CallCounter(self._SuccessfulPrepare) runcmd_fn = testutils.CallCounter(fn) result = backend.RunRestrictedCmd("test5667", _lock_timeout=1.0, _lock_file=lockfile, _path=self.tmpdir, _runcmd_fn=runcmd_fn, _sleep_fn=sleep_fn, _prepare_fn=prepare_fn, _enabled=True) self.assertEqual(result, "stdout14463") self.assertEqual(sleep_fn.Count(), 0) self.assertEqual(prepare_fn.Count(), 1) self.assertEqual(runcmd_fn.Count(), 1) def testCommandsDisabled(self): try: backend.RunRestrictedCmd("test", _lock_timeout=NotImplemented, _lock_file=NotImplemented, _path=NotImplemented, _sleep_fn=NotImplemented, _prepare_fn=NotImplemented, _runcmd_fn=NotImplemented, _enabled=False) except backend.RPCFail, err: self.assertEqual(str(err), "Restricted commands disabled at configure time") else: self.fail("Did not raise exception") class TestSetWatcherPause(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() self.filename = utils.PathJoin(self.tmpdir, "pause") def tearDown(self): shutil.rmtree(self.tmpdir) def testUnsetNonExisting(self): self.assertFalse(os.path.exists(self.filename)) backend.SetWatcherPause(None, _filename=self.filename) self.assertFalse(os.path.exists(self.filename)) def testSetNonNumeric(self): for i in ["", [], {}, "Hello World", "0", "1.0"]: self.assertFalse(os.path.exists(self.filename)) try: backend.SetWatcherPause(i, _filename=self.filename) except backend.RPCFail, err: self.assertEqual(str(err), "Duration must be numeric") else: self.fail("Did not raise exception") self.assertFalse(os.path.exists(self.filename)) def testSet(self): self.assertFalse(os.path.exists(self.filename)) for i in range(10): backend.SetWatcherPause(i, _filename=self.filename) self.assertEqual(utils.ReadFile(self.filename), "%s\n" % i) self.assertEqual(os.stat(self.filename).st_mode & 0777, 0644) class TestGetBlockDevSymlinkPath(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmpdir) def _Test(self, name, idx): self.assertEqual(backend._GetBlockDevSymlinkPath(name, idx, _dir=self.tmpdir), ("%s/%s%s%s" % (self.tmpdir, name, constants.DISK_SEPARATOR, idx))) def test(self): for idx in range(100): self._Test("inst1.example.com", idx) class TestGetInstanceList(unittest.TestCase): def setUp(self): self._test_hv = self._TestHypervisor() self._test_hv.ListInstances = mock.Mock( return_value=["instance1", "instance2", "instance3"] ) class _TestHypervisor(hypervisor.hv_base.BaseHypervisor): def __init__(self): hypervisor.hv_base.BaseHypervisor.__init__(self) def _GetHypervisor(self, name): return self._test_hv def testHvparams(self): fake_hvparams = {constants.HV_XEN_CMD: constants.XEN_CMD_XL} hvparams = {constants.HT_FAKE: fake_hvparams} backend.GetInstanceList([constants.HT_FAKE], all_hvparams=hvparams, get_hv_fn=self._GetHypervisor) self._test_hv.ListInstances.assert_called_with(hvparams=fake_hvparams) class TestGetHvInfo(unittest.TestCase): def setUp(self): self._test_hv = self._TestHypervisor() self._test_hv.GetNodeInfo = mock.Mock() class _TestHypervisor(hypervisor.hv_base.BaseHypervisor): def __init__(self): hypervisor.hv_base.BaseHypervisor.__init__(self) def _GetHypervisor(self, name): return self._test_hv def testGetHvInfoAllNone(self): result = backend._GetHvInfoAll(None) self.assertTrue(result is None) def testGetHvInfoAll(self): hvname = constants.HT_XEN_PVM hvparams = {constants.HV_XEN_CMD: constants.XEN_CMD_XL} hv_specs = [(hvname, hvparams)] backend._GetHvInfoAll(hv_specs, self._GetHypervisor) self._test_hv.GetNodeInfo.assert_called_with(hvparams=hvparams) class TestApplyStorageInfoFunction(unittest.TestCase): _STORAGE_KEY = "some_key" _SOME_ARGS = ["some_args"] def setUp(self): self.mock_storage_fn = mock.Mock() def testApplyValidStorageType(self): storage_type = constants.ST_LVM_VG info_fn_orig = backend._STORAGE_TYPE_INFO_FN backend._STORAGE_TYPE_INFO_FN = { storage_type: self.mock_storage_fn } backend._ApplyStorageInfoFunction( storage_type, self._STORAGE_KEY, self._SOME_ARGS) self.mock_storage_fn.assert_called_with(self._STORAGE_KEY, self._SOME_ARGS) backend._STORAGE_TYPE_INFO_FN = info_fn_orig def testApplyInValidStorageType(self): storage_type = "invalid_storage_type" info_fn_orig = backend._STORAGE_TYPE_INFO_FN backend._STORAGE_TYPE_INFO_FN = {} self.assertRaises(KeyError, backend._ApplyStorageInfoFunction, storage_type, self._STORAGE_KEY, self._SOME_ARGS) backend._STORAGE_TYPE_INFO_FN = info_fn_orig def testApplyNotImplementedStorageType(self): storage_type = "not_implemented_storage_type" info_fn_orig = backend._STORAGE_TYPE_INFO_FN backend._STORAGE_TYPE_INFO_FN = {storage_type: None} self.assertRaises(NotImplementedError, backend._ApplyStorageInfoFunction, storage_type, self._STORAGE_KEY, self._SOME_ARGS) backend._STORAGE_TYPE_INFO_FN = info_fn_orig class TestGetLvmVgSpaceInfo(unittest.TestCase): def testValid(self): path = "somepath" excl_stor = True orig_fn = backend._GetVgInfo backend._GetVgInfo = mock.Mock() backend._GetLvmVgSpaceInfo(path, [excl_stor]) backend._GetVgInfo.assert_called_with(path, excl_stor) backend._GetVgInfo = orig_fn def testNoExclStorageNotBool(self): path = "somepath" excl_stor = "123" self.assertRaises(errors.ProgrammerError, backend._GetLvmVgSpaceInfo, path, [excl_stor]) def testNoExclStorageNotInList(self): path = "somepath" excl_stor = "123" self.assertRaises(errors.ProgrammerError, backend._GetLvmVgSpaceInfo, path, excl_stor) class TestGetLvmPvSpaceInfo(unittest.TestCase): def testValid(self): path = "somepath" excl_stor = True orig_fn = backend._GetVgSpindlesInfo backend._GetVgSpindlesInfo = mock.Mock() backend._GetLvmPvSpaceInfo(path, [excl_stor]) backend._GetVgSpindlesInfo.assert_called_with(path, excl_stor) backend._GetVgSpindlesInfo = orig_fn class TestCheckStorageParams(unittest.TestCase): def testParamsNone(self): self.assertRaises(errors.ProgrammerError, backend._CheckStorageParams, None, NotImplemented) def testParamsWrongType(self): self.assertRaises(errors.ProgrammerError, backend._CheckStorageParams, "string", NotImplemented) def testParamsEmpty(self): backend._CheckStorageParams([], 0) def testParamsValidNumber(self): backend._CheckStorageParams(["a", True], 2) def testParamsInvalidNumber(self): self.assertRaises(errors.ProgrammerError, backend._CheckStorageParams, ["b", False], 3) class TestGetVgSpindlesInfo(unittest.TestCase): def setUp(self): self.vg_free = 13 self.vg_size = 31 self.mock_fn = mock.Mock(return_value=(self.vg_free, self.vg_size)) def testValidInput(self): name = "myvg" excl_stor = True result = backend._GetVgSpindlesInfo(name, excl_stor, info_fn=self.mock_fn) self.mock_fn.assert_called_with(name) self.assertEqual(name, result["name"]) self.assertEqual(constants.ST_LVM_PV, result["type"]) self.assertEqual(self.vg_free, result["storage_free"]) self.assertEqual(self.vg_size, result["storage_size"]) def testNoExclStor(self): name = "myvg" excl_stor = False result = backend._GetVgSpindlesInfo(name, excl_stor, info_fn=self.mock_fn) self.mock_fn.assert_not_called() self.assertEqual(name, result["name"]) self.assertEqual(constants.ST_LVM_PV, result["type"]) self.assertEqual(0, result["storage_free"]) self.assertEqual(0, result["storage_size"]) class TestGetVgSpindlesInfo(unittest.TestCase): def testValidInput(self): self.vg_free = 13 self.vg_size = 31 self.mock_fn = mock.Mock(return_value=[(self.vg_free, self.vg_size)]) name = "myvg" excl_stor = True result = backend._GetVgInfo(name, excl_stor, info_fn=self.mock_fn) self.mock_fn.assert_called_with([name], excl_stor) self.assertEqual(name, result["name"]) self.assertEqual(constants.ST_LVM_VG, result["type"]) self.assertEqual(self.vg_free, result["storage_free"]) self.assertEqual(self.vg_size, result["storage_size"]) def testNoExclStor(self): name = "myvg" excl_stor = True self.mock_fn = mock.Mock(return_value=None) result = backend._GetVgInfo(name, excl_stor, info_fn=self.mock_fn) self.mock_fn.assert_called_with([name], excl_stor) self.assertEqual(name, result["name"]) self.assertEqual(constants.ST_LVM_VG, result["type"]) self.assertEqual(None, result["storage_free"]) self.assertEqual(None, result["storage_size"]) class TestGetNodeInfo(unittest.TestCase): _SOME_RESULT = None def testApplyStorageInfoFunction(self): orig_fn = backend._ApplyStorageInfoFunction backend._ApplyStorageInfoFunction = mock.Mock( return_value=self._SOME_RESULT) storage_units = [(st, st + "_key", [st + "_params"]) for st in constants.STORAGE_TYPES] backend.GetNodeInfo(storage_units, None) call_args_list = backend._ApplyStorageInfoFunction.call_args_list self.assertEqual(len(constants.STORAGE_TYPES), len(call_args_list)) for call in call_args_list: storage_type, storage_key, storage_params = call[0] self.assertEqual(storage_type + "_key", storage_key) self.assertEqual([storage_type + "_params"], storage_params) self.assertTrue(storage_type in constants.STORAGE_TYPES) backend._ApplyStorageInfoFunction = orig_fn class TestSpaceReportingConstants(unittest.TestCase): """Ensures consistency between STS_REPORT and backend. These tests ensure, that the constant 'STS_REPORT' is consitent with the implementation of invoking space reporting functions in backend.py. Once space reporting is available for all types, the constant can be removed and these tests as well. """ def testAllReportingTypesHaveAReportingFunction(self): for storage_type in constants.STS_REPORT: self.assertTrue(backend._STORAGE_TYPE_INFO_FN[storage_type] is not None) def testAllNotReportingTypesDoneHaveFunction(self): non_reporting_types = set(constants.STORAGE_TYPES)\ - set(constants.STS_REPORT) for storage_type in non_reporting_types: self.assertEqual(None, backend._STORAGE_TYPE_INFO_FN[storage_type]) if __name__ == "__main__": testutils.GanetiTestProgram() ganeti-2.9.3/COPYING0000644000000000000000000004312212230001635014017 0ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. ganeti-2.9.3/devel/0000755000000000000000000000000012271445544014101 5ustar00rootroot00000000000000ganeti-2.9.3/devel/webserver0000744000000000000000000000311112244641676016031 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. import sys import BaseHTTPServer import SimpleHTTPServer def main(): if len(sys.argv) == 2: host = "127.0.0.1" (_, port) = sys.argv elif len(sys.argv) == 3: (_, port, host) = sys.argv else: sys.stderr.write("Usage: %s []\n" % sys.argv[0]) sys.stderr.write("\n") sys.stderr.write("Provides an HTTP server on the specified TCP port") sys.stderr.write(" exporting the current working directory. Binds to") sys.stderr.write(" localhost by default.\n") sys.exit(1) try: port = int(port) except (ValueError, TypeError), err: sys.stderr.write("Invalid port '%s': %s\n" % (port, err)) sys.exit(1) handler = SimpleHTTPServer.SimpleHTTPRequestHandler server = BaseHTTPServer.HTTPServer((host, port), handler) server.serve_forever() if __name__ == "__main__": main() ganeti-2.9.3/devel/upload0000744000000000000000000000753412244641676015326 0ustar00rootroot00000000000000#!/bin/bash # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # This is a test script to ease development and testing on test clusters. # It should not be used to update production environments. # Usage: upload node-{1,2,3} # it will upload the python libraries to # $prefix/lib/python2.X/dist-packages/ganeti and the command line utils to # $prefix/sbin. It needs passwordless root login to the nodes. set -e -u usage() { echo "Usage: $0 [--no-restart] [--no-cron] [--no-debug] hosts..." >&2 exit $1 } declare -r SED="sed -f autotools/replace_vars.sed" NO_RESTART= NO_CRON= NO_DEBUG= hosts= while [ "$#" -gt 0 ]; do opt="$1" case "$opt" in --no-restart) NO_RESTART=1 ;; --no-cron) NO_CRON=1 ;; --no-debug) NO_DEBUG=1 ;; -h|--help) usage 0 ;; -*) echo "Unknown option: $opt" >&2 usage 1 ;; *) hosts="$hosts $opt" ;; esac shift done if [ -z "$hosts" ]; then usage 1 fi set ${hosts} make regen-vcs-version TXD=`mktemp -d` trap 'rm -rf $TXD' EXIT if [[ -f /proc/cpuinfo ]]; then cpu_count=$(grep -E -c '^processor[[:space:]]*:' /proc/cpuinfo) make_args=-j$(( cpu_count + 1 )) else make_args= fi # Make sure that directories will get correct permissions umask 0022 # install ganeti as a real tree make $make_args install DESTDIR="$TXD" # at this point, make has been finished, so the configuration is # fixed; we can read the prefix vars/etc. PREFIX="$(echo @PREFIX@ | $SED)" SYSCONFDIR="$(echo @SYSCONFDIR@ | $SED)" LIBDIR="$(echo @LIBDIR@ | $SED)" PKGLIBDIR="$(echo @PKGLIBDIR@ | $SED)" # copy additional needed files [ -f doc/examples/ganeti.initd ] && \ install -D --mode=0755 doc/examples/ganeti.initd \ "$TXD/$SYSCONFDIR/init.d/ganeti" [ -f doc/examples/ganeti.logrotate ] && \ install -D --mode=0755 doc/examples/ganeti.logrotate \ "$TXD/$SYSCONFDIR/logrotate.d/ganeti" [ -f doc/examples/ganeti-master-role.ocf ] && \ install -D --mode=0755 doc/examples/ganeti-master-role.ocf \ "$TXD/$LIBDIR/ocf/resource.d/ganeti/ganeti-master-role" [ -f doc/examples/ganeti-node-role.ocf ] && \ install -D --mode=0755 doc/examples/ganeti-node-role.ocf \ "$TXD/$LIBDIR/ocf/resource.d/ganeti/ganeti-node-role" [ -f doc/examples/ganeti.default-debug -a -z "$NO_DEBUG" ] && \ install -D --mode=0644 doc/examples/ganeti.default-debug \ "$TXD/$SYSCONFDIR/default/ganeti" [ -f doc/examples/bash_completion-debug ] && \ install -D --mode=0644 doc/examples/bash_completion-debug \ "$TXD/$SYSCONFDIR/bash_completion.d/ganeti" if [ -f doc/examples/ganeti.cron -a -z "$NO_CRON" ]; then install -D --mode=0644 doc/examples/ganeti.cron \ "$TXD/$SYSCONFDIR/cron.d/ganeti" fi echo --- ( cd "$TXD" && find; ) echo --- # and now put it under $prefix on the target node(s) for host; do echo Uploading code to ${host}... rsync -v -rlKDc \ -e "ssh -oBatchMode=yes" \ --exclude="*.py[oc]" --exclude="*.pdf" --exclude="*.html" \ "$TXD/" \ root@${host}:/ & done wait if test -z "${NO_RESTART}"; then for host; do echo Restarting ganeti-noded on ${host}... ssh -oBatchMode=yes root@${host} $SYSCONFDIR/init.d/ganeti restart & done wait fi ganeti-2.9.3/INSTALL0000644000000000000000000002567712271422343014044 0ustar00rootroot00000000000000Ganeti quick installation guide =============================== Please note that a more detailed installation procedure is described in the :doc:`install`. Refer to it if you are setting up Ganeti the first time. This quick installation guide is mainly meant as reference for experienced users. A glossary of terms can be found in the :doc:`glossary`. Software Requirements --------------------- .. highlight:: shell-example Before installing, please verify that you have the following programs: - `Xen Hypervisor `_, version 3.0 or above, if running on Xen - `KVM Hypervisor `_, version 72 or above, if running on KVM. In order to use advanced features, such as live migration, virtio, etc, an even newer version is recommended (qemu-kvm versions 0.11.X and above have shown good behavior). - `DRBD `_, kernel module and userspace utils, version 8.0.7 or above; note that Ganeti doesn't yet support version 8.4 - `RBD `_, kernel modules (``rbd.ko``/``libceph.ko``) and userspace utils (``ceph-common``) - `LVM2 `_ - `OpenSSH `_ - `bridge utilities `_ - `iproute2 `_ - `arping `_ (part of iputils) - `ndisc6 `_ (if using IPv6) - `Python `_, version 2.6 or above, not 3.0 - `Python OpenSSL bindings `_ - `simplejson Python module `_ - `pyparsing Python module `_, version 1.4.6 or above - `pyinotify Python module `_ - `PycURL Python module `_ - `socat `_, see :ref:`note ` below - `Paramiko `_, if you want to use ``ganeti-listrunner`` - `affinity Python module `_, optional python package for supporting CPU pinning under KVM - `qemu-img `_, if you want to use ``ovfconverter`` - `fping `_ - `Python IP address manipulation library `_ - `Bitarray Python library `_ - `GNU Make `_ These programs are supplied as part of most Linux distributions, so usually they can be installed via the standard package manager. Also many of them will already be installed on a standard machine. On Debian/Ubuntu, you can use this command line to install all required packages, except for RBD, DRBD and Xen:: $ apt-get install lvm2 ssh bridge-utils iproute iputils-arping make \ ndisc6 python python-openssl openssl \ python-pyparsing python-simplejson python-bitarray \ python-pyinotify python-pycurl python-ipaddr socat fping For older distributions (eg. Debian Squeeze) the package names are different.:: $ apt-get install lvm2 ssh bridge-utils iproute iputils-arping make \ ndisc6 python python-pyopenssl openssl \ python-pyparsing python-simplejson python-bitarray \ python-pyinotify python-pycurl python-ipaddr socat fping If bitarray is missing it can be installed from easy-install:: $ easy_install bitarray Note that this does not install optional packages:: $ apt-get install python-paramiko python-affinity qemu-img If some of the python packages are not available in your system, you can try installing them using ``easy_install`` command. For example:: $ apt-get install python-setuptools python-dev $ cd / && sudo easy_install \ affinity \ bitarray \ ipaddr On Fedora to install all required packages except RBD, DRBD and Xen:: $ yum install openssh openssh-clients bridge-utils iproute ndisc6 make \ pyOpenSSL pyparsing python-simplejson python-inotify \ python-lxm socat fping python-bitarray python-ipaddr For optional packages use the command:: $ yum install python-paramiko python-affinity qemu-img If you want to build from source, please see doc/devnotes.rst for more dependencies. .. _socat-note: .. note:: Ganeti's import/export functionality uses ``socat`` with OpenSSL for transferring data between nodes. By default, OpenSSL 0.9.8 and above employ transparent compression of all data using zlib if supported by both sides of a connection. In cases where a lot of data is transferred, this can lead to an increased CPU usage. Additionally, Ganeti already compresses all data using ``gzip`` where it makes sense (for inter-cluster instance moves). To remedey this situation, patches implementing a new ``socat`` option for disabling OpenSSL compression have been contributed and will likely be included in the next feature release. Until then, users or distributions need to apply the patches on their own. Ganeti will use the option if it's detected by the ``configure`` script; auto-detection can be disabled by explicitly passing ``--enable-socat-compress`` (use the option to disable compression) or ``--disable-socat-compress`` (don't use the option). The patches and more information can be found on http://www.dest-unreach.org/socat/contrib/socat-opensslcompress.html. Haskell requirements ~~~~~~~~~~~~~~~~~~~~ Starting with Ganeti 2.7, the Haskell GHC compiler and a few base libraries are required in order to build Ganeti (but not to run and deploy Ganeti on production machines). More specifically: - `GHC `_ version 6.12 or higher - or even better, `The Haskell Platform `_ which gives you a simple way to bootstrap Haskell - `json `_, a JSON library - `network `_, a basic network library - `parallel `_, a parallel programming library (note: tested with up to version 3.x) - `bytestring `_ and `utf8-string `_ libraries; these usually come with the GHC compiler - `deepseq `_ - `curl `_, tested with versions 1.3.4 and above - `hslogger `_, version 1.1 and above (note that Debian Squeeze only has version 1.0.9) Some of these are also available as package in Debian/Ubuntu:: $ apt-get install ghc libghc-json-dev libghc-network-dev \ libghc-parallel-dev libghc-deepseq-dev \ libghc-utf8-string-dev libghc-curl-dev \ libghc-hslogger-dev Or in older versions of these distributions (using GHC 6.x):: $ apt-get install ghc6 libghc6-json-dev libghc6-network-dev \ libghc6-parallel-dev libghc6-deepseq-dev \ libghc6-curl-dev In Fedora, some of them are available via packages as well:: $ yum install ghc ghc-json-devel ghc-network-devel \ ghc-parallel-devel ghc-deepseq-devel If using a distribution which does not provide them, first install the Haskell platform. You can also install ``cabal`` manually:: $ apt-get install cabal-install $ cabal update Then install the additional libraries (only the ones not available in your distribution packages) via ``cabal``:: $ cabal install json network parallel utf8-string curl hslogger Haskell optional features ~~~~~~~~~~~~~~~~~~~~~~~~~ Optionally, more functionality can be enabled if your build machine has a few more Haskell libraries enabled: the ``ganeti-confd`` and ``ganeti-luxid`` daemon (``--enable-confd``) and the monitoring daemon (``--enable-mond``). The list of extra dependencies for these is: - `Crypto `_, tested with version 4.2.4 - `text `_ - `hinotify `_, tested with version 0.3.2 - `regex-pcre `_, bindings for the ``pcre`` library - `attoparsec `_ - `vector `_ - `snap-server` `_, version 0.8.1 and above. - `process `_, version 1.0.1.1 and above These libraries are available in Debian Wheezy (but not in Squeeze), so you can use either apt:: $ apt-get install libghc-crypto-dev libghc-text-dev \ libghc-hinotify-dev libghc-regex-pcre-dev \ libghc-attoparsec-dev libghc-vector-dev \ libghc-snap-server-dev or ``cabal``, after installing a required non-Haskell dependency:: $ apt-get install libpcre3-dev libcurl4-openssl-dev $ cabal install Crypto text hinotify==0.3.2 regex-pcre \ attoparsec vector snap-server to install them. In case you still use ghc-6.12, note that ``cabal`` would automatically try to install newer versions of some of the libraries snap-server depends on, that cannot be compiled with ghc-6.12, so you have to install snap-server on its own, esplicitly forcing the installation of compatible versions:: $ cabal install MonadCatchIO-transformers==0.2.2.0 mtl==2.0.1.0 \ hashable==1.1.2.0 case-insensitive==0.3 parsec==3.0.1 \ network==2.3 snap-server==0.8.1 The most recent Fedora doesn't provide ``crypto``, ``inotify``. So these need to be installed using ``cabal``, if desired. The other packages can be installed via ``yum``:: $ yum install ghc-hslogger-devel ghc-text-devel \ ghc-regex-pcre-devel .. _cabal-note: .. note:: If one of the cabal packages fails to install due to unfulfilled dependencies, you can try enabling symlinks in ``~/.cabal/config``. Make sure that your ``~/.cabal/bin`` directory (or whatever else is defined as ``bindir``) is in your ``PATH``. Installation of the software ---------------------------- To install, simply run the following command:: $ ./configure --localstatedir=/var --sysconfdir=/etc && \ make && \ make install This will install the software under ``/usr/local``. You then need to copy ``doc/examples/ganeti.initd`` to ``/etc/init.d/ganeti`` and integrate it into your boot sequence (``chkconfig``, ``update-rc.d``, etc.). Cluster initialisation ---------------------- Before initialising the cluster, on each node you need to create the following directories: - ``/etc/ganeti`` - ``/var/lib/ganeti`` - ``/var/log/ganeti`` - ``/srv/ganeti`` - ``/srv/ganeti/os`` - ``/srv/ganeti/export`` After this, use ``gnt-cluster init``. .. vim: set textwidth=72 syntax=rst : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/README0000644000000000000000000000027512271422343013656 0ustar00rootroot00000000000000Ganeti 2.9 ========== For installation instructions, read the INSTALL and the doc/install.rst files. For a brief introduction, read the ganeti(7) manpage and the other pages it suggests. ganeti-2.9.3/lib/0000755000000000000000000000000012271445544013550 5ustar00rootroot00000000000000ganeti-2.9.3/lib/asyncnotifier.py0000644000000000000000000001421512244641676017006 0ustar00rootroot00000000000000# # # Copyright (C) 2009 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Asynchronous pyinotify implementation""" import asyncore import logging try: # pylint: disable=E0611 from pyinotify import pyinotify except ImportError: import pyinotify from ganeti import daemon from ganeti import errors # We contributed the AsyncNotifier class back to python-pyinotify, and it's # part of their codebase since version 0.8.7. This code can be removed once # we'll be ready to depend on python-pyinotify >= 0.8.7 class AsyncNotifier(asyncore.file_dispatcher): """An asyncore dispatcher for inotify events. """ # pylint: disable=W0622,W0212 def __init__(self, watch_manager, default_proc_fun=None, map=None): """Initializes this class. This is a a special asyncore file_dispatcher that actually wraps a pyinotify Notifier, making it asyncronous. """ if default_proc_fun is None: default_proc_fun = pyinotify.ProcessEvent() self.notifier = pyinotify.Notifier(watch_manager, default_proc_fun) # here we need to steal the file descriptor from the notifier, so we can # use it in the global asyncore select, and avoid calling the # check_events() function of the notifier (which doesn't allow us to select # together with other file descriptors) self.fd = self.notifier._fd asyncore.file_dispatcher.__init__(self, self.fd, map) def handle_read(self): self.notifier.read_events() self.notifier.process_events() class ErrorLoggingAsyncNotifier(AsyncNotifier, daemon.GanetiBaseAsyncoreDispatcher): """An asyncnotifier that can survive errors in the callbacks. We define this as a separate class, since we don't want to make AsyncNotifier diverge from what we contributed upstream. """ class FileEventHandlerBase(pyinotify.ProcessEvent): """Base class for file event handlers. @ivar watch_manager: Inotify watch manager """ def __init__(self, watch_manager): """Initializes this class. @type watch_manager: pyinotify.WatchManager @param watch_manager: inotify watch manager """ # pylint: disable=W0231 # no need to call the parent's constructor self.watch_manager = watch_manager def process_default(self, event): logging.error("Received unhandled inotify event: %s", event) def AddWatch(self, filename, mask): """Adds a file watch. @param filename: Path to file @param mask: Inotify event mask @return: Result """ result = self.watch_manager.add_watch(filename, mask) ret = result.get(filename, -1) if ret <= 0: raise errors.InotifyError("Could not add inotify watcher (error code %s);" " increasing fs.inotify.max_user_watches sysctl" " might be necessary" % ret) return result[filename] def RemoveWatch(self, handle): """Removes a handle from the watcher. @param handle: Inotify handle @return: Whether removal was successful """ result = self.watch_manager.rm_watch(handle) return result[handle] class SingleFileEventHandler(FileEventHandlerBase): """Handle modify events for a single file. """ def __init__(self, watch_manager, callback, filename): """Constructor for SingleFileEventHandler @type watch_manager: pyinotify.WatchManager @param watch_manager: inotify watch manager @type callback: function accepting a boolean @param callback: function to call when an inotify event happens @type filename: string @param filename: config file to watch """ FileEventHandlerBase.__init__(self, watch_manager) self._callback = callback self._filename = filename self._watch_handle = None def enable(self): """Watch the given file. """ if self._watch_handle is not None: return # Different Pyinotify versions have the flag constants at different places, # hence not accessing them directly mask = (pyinotify.EventsCodes.ALL_FLAGS["IN_MODIFY"] | pyinotify.EventsCodes.ALL_FLAGS["IN_IGNORED"]) self._watch_handle = self.AddWatch(self._filename, mask) def disable(self): """Stop watching the given file. """ if self._watch_handle is not None and self.RemoveWatch(self._watch_handle): self._watch_handle = None # pylint: disable=C0103 # this overrides a method in pyinotify.ProcessEvent def process_IN_IGNORED(self, event): # Since we monitor a single file rather than the directory it resides in, # when that file is replaced with another one (which is what happens when # utils.WriteFile, the most normal way of updating files in ganeti, is # called) we're going to receive an IN_IGNORED event from inotify, because # of the file removal (which is contextual with the replacement). In such a # case we'll need to create a watcher for the "new" file. This can be done # by the callback by calling "enable" again on us. logging.debug("Received 'ignored' inotify event for %s", event.path) self._watch_handle = None self._callback(False) # pylint: disable=C0103 # this overrides a method in pyinotify.ProcessEvent def process_IN_MODIFY(self, event): # This gets called when the monitored file is modified. Note that this # doesn't usually happen in Ganeti, as most of the time we're just # replacing any file with a new one, at filesystem level, rather than # actually changing it. (see utils.WriteFile) logging.debug("Received 'modify' inotify event for %s", event.path) self._callback(True) ganeti-2.9.3/lib/compat.py0000644000000000000000000001044412244641676015414 0ustar00rootroot00000000000000# # # Copyright (C) 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module containing backported language/library functionality. """ import itertools import operator try: # pylint: disable=F0401 import functools except ImportError: functools = None try: # pylint: disable=F0401 import roman except ImportError: roman = None # compat.md5_hash and compat.sha1_hash can be called to generate and md5 and a # sha1 hashing modules, under python 2.4, 2.5 and 2.6, even though some changes # went on. compat.sha1 is python-version specific and is used for python # modules (hmac, for example) which have changed their behavior as well from # one version to the other. try: # Yes, these don't always exist, that's why we're testing # Yes, we're not using the imports in this module. from hashlib import md5 as md5_hash # pylint: disable=W0611,E0611,F0401 from hashlib import sha1 as sha1_hash # pylint: disable=W0611,E0611,F0401 # this additional version is needed for compatibility with the hmac module sha1 = sha1_hash except ImportError: from md5 import new as md5_hash import sha sha1 = sha sha1_hash = sha.new def _all(seq): """Returns True if all elements in the iterable are True. """ for _ in itertools.ifilterfalse(bool, seq): return False return True def _any(seq): """Returns True if any element of the iterable are True. """ for _ in itertools.ifilter(bool, seq): return True return False try: # pylint: disable=E0601 # pylint: disable=W0622 all = all except NameError: all = _all try: # pylint: disable=E0601 # pylint: disable=W0622 any = any except NameError: any = _any def partition(seq, pred=bool): # pylint: disable=W0622 """Partition a list in two, based on the given predicate. """ return (list(itertools.ifilter(pred, seq)), list(itertools.ifilterfalse(pred, seq))) # Even though we're using Python's built-in "partial" function if available, # this one is always defined for testing. def _partial(func, *args, **keywords): # pylint: disable=W0622 """Decorator with partial application of arguments and keywords. This function was copied from Python's documentation. """ def newfunc(*fargs, **fkeywords): newkeywords = keywords.copy() newkeywords.update(fkeywords) return func(*(args + fargs), **newkeywords) # pylint: disable=W0142 newfunc.func = func newfunc.args = args newfunc.keywords = keywords return newfunc if functools is None: partial = _partial else: partial = functools.partial def TryToRoman(val, convert=True): """Try to convert a value to roman numerals If the roman module could be loaded convert the given value to a roman numeral. Gracefully fail back to leaving the value untouched. @type val: integer @param val: value to convert @type convert: boolean @param convert: if False, don't try conversion at all @rtype: string or typeof(val) @return: roman numeral for val, or val if conversion didn't succeed """ if roman is not None and convert: try: return roman.toRoman(val) except roman.RomanError: return val else: return val def UniqueFrozenset(seq): """Makes C{frozenset} from sequence after checking for duplicate elements. @raise ValueError: When there are duplicate elements """ if isinstance(seq, (list, tuple)): items = seq else: items = list(seq) result = frozenset(items) if len(items) != len(result): raise ValueError("Duplicate values found") return result #: returns the first element of a list-like value fst = operator.itemgetter(0) #: returns the second element of a list-like value snd = operator.itemgetter(1) ganeti-2.9.3/lib/cli.py0000644000000000000000000042242712271422343014674 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module dealing with command line parsing""" import sys import textwrap import os.path import time import logging import errno import itertools import shlex from cStringIO import StringIO from ganeti import utils from ganeti import errors from ganeti import constants from ganeti import opcodes from ganeti import luxi from ganeti import ssconf from ganeti import rpc from ganeti import ssh from ganeti import compat from ganeti import netutils from ganeti import qlang from ganeti import objects from ganeti import pathutils from optparse import (OptionParser, TitledHelpFormatter, Option, OptionValueError) __all__ = [ # Command line options "ABSOLUTE_OPT", "ADD_UIDS_OPT", "ADD_RESERVED_IPS_OPT", "ALLOCATABLE_OPT", "ALLOC_POLICY_OPT", "ALL_OPT", "ALLOW_FAILOVER_OPT", "AUTO_PROMOTE_OPT", "AUTO_REPLACE_OPT", "BACKEND_OPT", "BLK_OS_OPT", "CAPAB_MASTER_OPT", "CAPAB_VM_OPT", "CLEANUP_OPT", "CLUSTER_DOMAIN_SECRET_OPT", "CONFIRM_OPT", "CP_SIZE_OPT", "DEBUG_OPT", "DEBUG_SIMERR_OPT", "DISKIDX_OPT", "DISK_OPT", "DISK_PARAMS_OPT", "DISK_TEMPLATE_OPT", "DRAINED_OPT", "DRY_RUN_OPT", "DRBD_HELPER_OPT", "DST_NODE_OPT", "EARLY_RELEASE_OPT", "ENABLED_HV_OPT", "ENABLED_DISK_TEMPLATES_OPT", "ERROR_CODES_OPT", "FAILURE_ONLY_OPT", "FIELDS_OPT", "FILESTORE_DIR_OPT", "FILESTORE_DRIVER_OPT", "FORCE_FILTER_OPT", "FORCE_OPT", "FORCE_VARIANT_OPT", "GATEWAY_OPT", "GATEWAY6_OPT", "GLOBAL_FILEDIR_OPT", "HID_OS_OPT", "GLOBAL_SHARED_FILEDIR_OPT", "HVLIST_OPT", "HVOPTS_OPT", "HYPERVISOR_OPT", "IALLOCATOR_OPT", "DEFAULT_IALLOCATOR_OPT", "IDENTIFY_DEFAULTS_OPT", "IGNORE_CONSIST_OPT", "IGNORE_ERRORS_OPT", "IGNORE_FAILURES_OPT", "IGNORE_OFFLINE_OPT", "IGNORE_REMOVE_FAILURES_OPT", "IGNORE_SECONDARIES_OPT", "IGNORE_SIZE_OPT", "INCLUDEDEFAULTS_OPT", "INTERVAL_OPT", "MAC_PREFIX_OPT", "MAINTAIN_NODE_HEALTH_OPT", "MASTER_NETDEV_OPT", "MASTER_NETMASK_OPT", "MC_OPT", "MIGRATION_MODE_OPT", "MODIFY_ETCHOSTS_OPT", "NET_OPT", "NETWORK_OPT", "NETWORK6_OPT", "NEW_CLUSTER_CERT_OPT", "NEW_CLUSTER_DOMAIN_SECRET_OPT", "NEW_CONFD_HMAC_KEY_OPT", "NEW_RAPI_CERT_OPT", "NEW_PRIMARY_OPT", "NEW_SECONDARY_OPT", "NEW_SPICE_CERT_OPT", "NIC_PARAMS_OPT", "NOCONFLICTSCHECK_OPT", "NODE_FORCE_JOIN_OPT", "NODE_LIST_OPT", "NODE_PLACEMENT_OPT", "NODEGROUP_OPT", "NODE_PARAMS_OPT", "NODE_POWERED_OPT", "NODRBD_STORAGE_OPT", "NOHDR_OPT", "NOIPCHECK_OPT", "NO_INSTALL_OPT", "NONAMECHECK_OPT", "NOLVM_STORAGE_OPT", "NOMODIFY_ETCHOSTS_OPT", "NOMODIFY_SSH_SETUP_OPT", "NONICS_OPT", "NONLIVE_OPT", "NONPLUS1_OPT", "NORUNTIME_CHGS_OPT", "NOSHUTDOWN_OPT", "NOSTART_OPT", "NOSSH_KEYCHECK_OPT", "NOVOTING_OPT", "NO_REMEMBER_OPT", "NWSYNC_OPT", "OFFLINE_INST_OPT", "ONLINE_INST_OPT", "ON_PRIMARY_OPT", "ON_SECONDARY_OPT", "OFFLINE_OPT", "OSPARAMS_OPT", "OS_OPT", "OS_SIZE_OPT", "OOB_TIMEOUT_OPT", "POWER_DELAY_OPT", "PREALLOC_WIPE_DISKS_OPT", "PRIMARY_IP_VERSION_OPT", "PRIMARY_ONLY_OPT", "PRINT_JOBID_OPT", "PRIORITY_OPT", "RAPI_CERT_OPT", "READD_OPT", "REASON_OPT", "REBOOT_TYPE_OPT", "REMOVE_INSTANCE_OPT", "REMOVE_RESERVED_IPS_OPT", "REMOVE_UIDS_OPT", "RESERVED_LVS_OPT", "RUNTIME_MEM_OPT", "ROMAN_OPT", "SECONDARY_IP_OPT", "SECONDARY_ONLY_OPT", "SELECT_OS_OPT", "SEP_OPT", "SHOWCMD_OPT", "SHOW_MACHINE_OPT", "SHUTDOWN_TIMEOUT_OPT", "SINGLE_NODE_OPT", "SPECS_CPU_COUNT_OPT", "SPECS_DISK_COUNT_OPT", "SPECS_DISK_SIZE_OPT", "SPECS_MEM_SIZE_OPT", "SPECS_NIC_COUNT_OPT", "SPLIT_ISPECS_OPTS", "IPOLICY_STD_SPECS_OPT", "IPOLICY_DISK_TEMPLATES", "IPOLICY_VCPU_RATIO", "SPICE_CACERT_OPT", "SPICE_CERT_OPT", "SRC_DIR_OPT", "SRC_NODE_OPT", "SUBMIT_OPT", "SUBMIT_OPTS", "STARTUP_PAUSED_OPT", "STATIC_OPT", "SYNC_OPT", "TAG_ADD_OPT", "TAG_SRC_OPT", "TIMEOUT_OPT", "TO_GROUP_OPT", "UIDPOOL_OPT", "USEUNITS_OPT", "USE_EXTERNAL_MIP_SCRIPT", "USE_REPL_NET_OPT", "VERBOSE_OPT", "VG_NAME_OPT", "WFSYNC_OPT", "YES_DOIT_OPT", "DISK_STATE_OPT", "HV_STATE_OPT", "IGNORE_IPOLICY_OPT", "INSTANCE_POLICY_OPTS", # Generic functions for CLI programs "ConfirmOperation", "CreateIPolicyFromOpts", "GenericMain", "GenericInstanceCreate", "GenericList", "GenericListFields", "GetClient", "GetOnlineNodes", "JobExecutor", "JobSubmittedException", "ParseTimespec", "RunWhileClusterStopped", "SubmitOpCode", "SubmitOrSend", "UsesRPC", # Formatting functions "ToStderr", "ToStdout", "FormatError", "FormatQueryResult", "FormatParamsDictInfo", "FormatPolicyInfo", "PrintIPolicyCommand", "PrintGenericInfo", "GenerateTable", "AskUser", "FormatTimestamp", "FormatLogMessage", # Tags functions "ListTags", "AddTags", "RemoveTags", # command line options support infrastructure "ARGS_MANY_INSTANCES", "ARGS_MANY_NODES", "ARGS_MANY_GROUPS", "ARGS_MANY_NETWORKS", "ARGS_NONE", "ARGS_ONE_INSTANCE", "ARGS_ONE_NODE", "ARGS_ONE_GROUP", "ARGS_ONE_OS", "ARGS_ONE_NETWORK", "ArgChoice", "ArgCommand", "ArgFile", "ArgGroup", "ArgHost", "ArgInstance", "ArgJobId", "ArgNetwork", "ArgNode", "ArgOs", "ArgExtStorage", "ArgSuggest", "ArgUnknown", "OPT_COMPL_INST_ADD_NODES", "OPT_COMPL_MANY_NODES", "OPT_COMPL_ONE_IALLOCATOR", "OPT_COMPL_ONE_INSTANCE", "OPT_COMPL_ONE_NODE", "OPT_COMPL_ONE_NODEGROUP", "OPT_COMPL_ONE_NETWORK", "OPT_COMPL_ONE_OS", "OPT_COMPL_ONE_EXTSTORAGE", "cli_option", "FixHvParams", "SplitNodeOption", "CalculateOSNames", "ParseFields", "COMMON_CREATE_OPTS", ] NO_PREFIX = "no_" UN_PREFIX = "-" #: Priorities (sorted) _PRIORITY_NAMES = [ ("low", constants.OP_PRIO_LOW), ("normal", constants.OP_PRIO_NORMAL), ("high", constants.OP_PRIO_HIGH), ] #: Priority dictionary for easier lookup # TODO: Replace this and _PRIORITY_NAMES with a single sorted dictionary once # we migrate to Python 2.6 _PRIONAME_TO_VALUE = dict(_PRIORITY_NAMES) # Query result status for clients (QR_NORMAL, QR_UNKNOWN, QR_INCOMPLETE) = range(3) #: Maximum batch size for ChooseJob _CHOOSE_BATCH = 25 # constants used to create InstancePolicy dictionary TISPECS_GROUP_TYPES = { constants.ISPECS_MIN: constants.VTYPE_INT, constants.ISPECS_MAX: constants.VTYPE_INT, } TISPECS_CLUSTER_TYPES = { constants.ISPECS_MIN: constants.VTYPE_INT, constants.ISPECS_MAX: constants.VTYPE_INT, constants.ISPECS_STD: constants.VTYPE_INT, } #: User-friendly names for query2 field types _QFT_NAMES = { constants.QFT_UNKNOWN: "Unknown", constants.QFT_TEXT: "Text", constants.QFT_BOOL: "Boolean", constants.QFT_NUMBER: "Number", constants.QFT_UNIT: "Storage size", constants.QFT_TIMESTAMP: "Timestamp", constants.QFT_OTHER: "Custom", } class _Argument: def __init__(self, min=0, max=None): # pylint: disable=W0622 self.min = min self.max = max def __repr__(self): return ("<%s min=%s max=%s>" % (self.__class__.__name__, self.min, self.max)) class ArgSuggest(_Argument): """Suggesting argument. Value can be any of the ones passed to the constructor. """ # pylint: disable=W0622 def __init__(self, min=0, max=None, choices=None): _Argument.__init__(self, min=min, max=max) self.choices = choices def __repr__(self): return ("<%s min=%s max=%s choices=%r>" % (self.__class__.__name__, self.min, self.max, self.choices)) class ArgChoice(ArgSuggest): """Choice argument. Value can be any of the ones passed to the constructor. Like L{ArgSuggest}, but value must be one of the choices. """ class ArgUnknown(_Argument): """Unknown argument to program (e.g. determined at runtime). """ class ArgInstance(_Argument): """Instances argument. """ class ArgNode(_Argument): """Node argument. """ class ArgNetwork(_Argument): """Network argument. """ class ArgGroup(_Argument): """Node group argument. """ class ArgJobId(_Argument): """Job ID argument. """ class ArgFile(_Argument): """File path argument. """ class ArgCommand(_Argument): """Command argument. """ class ArgHost(_Argument): """Host argument. """ class ArgOs(_Argument): """OS argument. """ class ArgExtStorage(_Argument): """ExtStorage argument. """ ARGS_NONE = [] ARGS_MANY_INSTANCES = [ArgInstance()] ARGS_MANY_NETWORKS = [ArgNetwork()] ARGS_MANY_NODES = [ArgNode()] ARGS_MANY_GROUPS = [ArgGroup()] ARGS_ONE_INSTANCE = [ArgInstance(min=1, max=1)] ARGS_ONE_NETWORK = [ArgNetwork(min=1, max=1)] ARGS_ONE_NODE = [ArgNode(min=1, max=1)] # TODO ARGS_ONE_GROUP = [ArgGroup(min=1, max=1)] ARGS_ONE_OS = [ArgOs(min=1, max=1)] def _ExtractTagsObject(opts, args): """Extract the tag type object. Note that this function will modify its args parameter. """ if not hasattr(opts, "tag_type"): raise errors.ProgrammerError("tag_type not passed to _ExtractTagsObject") kind = opts.tag_type if kind == constants.TAG_CLUSTER: retval = kind, None elif kind in (constants.TAG_NODEGROUP, constants.TAG_NODE, constants.TAG_NETWORK, constants.TAG_INSTANCE): if not args: raise errors.OpPrereqError("no arguments passed to the command", errors.ECODE_INVAL) name = args.pop(0) retval = kind, name else: raise errors.ProgrammerError("Unhandled tag type '%s'" % kind) return retval def _ExtendTags(opts, args): """Extend the args if a source file has been given. This function will extend the tags with the contents of the file passed in the 'tags_source' attribute of the opts parameter. A file named '-' will be replaced by stdin. """ fname = opts.tags_source if fname is None: return if fname == "-": new_fh = sys.stdin else: new_fh = open(fname, "r") new_data = [] try: # we don't use the nice 'new_data = [line.strip() for line in fh]' # because of python bug 1633941 while True: line = new_fh.readline() if not line: break new_data.append(line.strip()) finally: new_fh.close() args.extend(new_data) def ListTags(opts, args): """List the tags on a given object. This is a generic implementation that knows how to deal with all three cases of tag objects (cluster, node, instance). The opts argument is expected to contain a tag_type field denoting what object type we work on. """ kind, name = _ExtractTagsObject(opts, args) cl = GetClient(query=True) result = cl.QueryTags(kind, name) result = list(result) result.sort() for tag in result: ToStdout(tag) def AddTags(opts, args): """Add tags on a given object. This is a generic implementation that knows how to deal with all three cases of tag objects (cluster, node, instance). The opts argument is expected to contain a tag_type field denoting what object type we work on. """ kind, name = _ExtractTagsObject(opts, args) _ExtendTags(opts, args) if not args: raise errors.OpPrereqError("No tags to be added", errors.ECODE_INVAL) op = opcodes.OpTagsSet(kind=kind, name=name, tags=args) SubmitOrSend(op, opts) def RemoveTags(opts, args): """Remove tags from a given object. This is a generic implementation that knows how to deal with all three cases of tag objects (cluster, node, instance). The opts argument is expected to contain a tag_type field denoting what object type we work on. """ kind, name = _ExtractTagsObject(opts, args) _ExtendTags(opts, args) if not args: raise errors.OpPrereqError("No tags to be removed", errors.ECODE_INVAL) op = opcodes.OpTagsDel(kind=kind, name=name, tags=args) SubmitOrSend(op, opts) def check_unit(option, opt, value): # pylint: disable=W0613 """OptParsers custom converter for units. """ try: return utils.ParseUnit(value) except errors.UnitParseError, err: raise OptionValueError("option %s: %s" % (opt, err)) def _SplitKeyVal(opt, data, parse_prefixes): """Convert a KeyVal string into a dict. This function will convert a key=val[,...] string into a dict. Empty values will be converted specially: keys which have the prefix 'no_' will have the value=False and the prefix stripped, keys with the prefix "-" will have value=None and the prefix stripped, and the others will have value=True. @type opt: string @param opt: a string holding the option name for which we process the data, used in building error messages @type data: string @param data: a string of the format key=val,key=val,... @type parse_prefixes: bool @param parse_prefixes: whether to handle prefixes specially @rtype: dict @return: {key=val, key=val} @raises errors.ParameterError: if there are duplicate keys """ kv_dict = {} if data: for elem in utils.UnescapeAndSplit(data, sep=","): if "=" in elem: key, val = elem.split("=", 1) elif parse_prefixes: if elem.startswith(NO_PREFIX): key, val = elem[len(NO_PREFIX):], False elif elem.startswith(UN_PREFIX): key, val = elem[len(UN_PREFIX):], None else: key, val = elem, True else: raise errors.ParameterError("Missing value for key '%s' in option %s" % (elem, opt)) if key in kv_dict: raise errors.ParameterError("Duplicate key '%s' in option %s" % (key, opt)) kv_dict[key] = val return kv_dict def _SplitIdentKeyVal(opt, value, parse_prefixes): """Helper function to parse "ident:key=val,key=val" options. @type opt: string @param opt: option name, used in error messages @type value: string @param value: expected to be in the format "ident:key=val,key=val,..." @type parse_prefixes: bool @param parse_prefixes: whether to handle prefixes specially (see L{_SplitKeyVal}) @rtype: tuple @return: (ident, {key=val, key=val}) @raises errors.ParameterError: in case of duplicates or other parsing errors """ if ":" not in value: ident, rest = value, "" else: ident, rest = value.split(":", 1) if parse_prefixes and ident.startswith(NO_PREFIX): if rest: msg = "Cannot pass options when removing parameter groups: %s" % value raise errors.ParameterError(msg) retval = (ident[len(NO_PREFIX):], False) elif (parse_prefixes and ident.startswith(UN_PREFIX) and (len(ident) <= len(UN_PREFIX) or not ident[len(UN_PREFIX)].isdigit())): if rest: msg = "Cannot pass options when removing parameter groups: %s" % value raise errors.ParameterError(msg) retval = (ident[len(UN_PREFIX):], None) else: kv_dict = _SplitKeyVal(opt, rest, parse_prefixes) retval = (ident, kv_dict) return retval def check_ident_key_val(option, opt, value): # pylint: disable=W0613 """Custom parser for ident:key=val,key=val options. This will store the parsed values as a tuple (ident, {key: val}). As such, multiple uses of this option via action=append is possible. """ return _SplitIdentKeyVal(opt, value, True) def check_key_val(option, opt, value): # pylint: disable=W0613 """Custom parser class for key=val,key=val options. This will store the parsed values as a dict {key: val}. """ return _SplitKeyVal(opt, value, True) def _SplitListKeyVal(opt, value): retval = {} for elem in value.split("/"): if not elem: raise errors.ParameterError("Empty section in option '%s'" % opt) (ident, valdict) = _SplitIdentKeyVal(opt, elem, False) if ident in retval: msg = ("Duplicated parameter '%s' in parsing %s: %s" % (ident, opt, elem)) raise errors.ParameterError(msg) retval[ident] = valdict return retval def check_multilist_ident_key_val(_, opt, value): """Custom parser for "ident:key=val,key=val/ident:key=val//ident:.." options. @rtype: list of dictionary @return: [{ident: {key: val, key: val}, ident: {key: val}}, {ident:..}] """ retval = [] for line in value.split("//"): retval.append(_SplitListKeyVal(opt, line)) return retval def check_bool(option, opt, value): # pylint: disable=W0613 """Custom parser for yes/no options. This will store the parsed value as either True or False. """ value = value.lower() if value == constants.VALUE_FALSE or value == "no": return False elif value == constants.VALUE_TRUE or value == "yes": return True else: raise errors.ParameterError("Invalid boolean value '%s'" % value) def check_list(option, opt, value): # pylint: disable=W0613 """Custom parser for comma-separated lists. """ # we have to make this explicit check since "".split(",") is [""], # not an empty list :( if not value: return [] else: return utils.UnescapeAndSplit(value) def check_maybefloat(option, opt, value): # pylint: disable=W0613 """Custom parser for float numbers which might be also defaults. """ value = value.lower() if value == constants.VALUE_DEFAULT: return value else: return float(value) # completion_suggestion is normally a list. Using numeric values not evaluating # to False for dynamic completion. (OPT_COMPL_MANY_NODES, OPT_COMPL_ONE_NODE, OPT_COMPL_ONE_INSTANCE, OPT_COMPL_ONE_OS, OPT_COMPL_ONE_EXTSTORAGE, OPT_COMPL_ONE_IALLOCATOR, OPT_COMPL_ONE_NETWORK, OPT_COMPL_INST_ADD_NODES, OPT_COMPL_ONE_NODEGROUP) = range(100, 109) OPT_COMPL_ALL = compat.UniqueFrozenset([ OPT_COMPL_MANY_NODES, OPT_COMPL_ONE_NODE, OPT_COMPL_ONE_INSTANCE, OPT_COMPL_ONE_OS, OPT_COMPL_ONE_EXTSTORAGE, OPT_COMPL_ONE_IALLOCATOR, OPT_COMPL_ONE_NETWORK, OPT_COMPL_INST_ADD_NODES, OPT_COMPL_ONE_NODEGROUP, ]) class CliOption(Option): """Custom option class for optparse. """ ATTRS = Option.ATTRS + [ "completion_suggest", ] TYPES = Option.TYPES + ( "multilistidentkeyval", "identkeyval", "keyval", "unit", "bool", "list", "maybefloat", ) TYPE_CHECKER = Option.TYPE_CHECKER.copy() TYPE_CHECKER["multilistidentkeyval"] = check_multilist_ident_key_val TYPE_CHECKER["identkeyval"] = check_ident_key_val TYPE_CHECKER["keyval"] = check_key_val TYPE_CHECKER["unit"] = check_unit TYPE_CHECKER["bool"] = check_bool TYPE_CHECKER["list"] = check_list TYPE_CHECKER["maybefloat"] = check_maybefloat # optparse.py sets make_option, so we do it for our own option class, too cli_option = CliOption _YORNO = "yes|no" DEBUG_OPT = cli_option("-d", "--debug", default=0, action="count", help="Increase debugging level") NOHDR_OPT = cli_option("--no-headers", default=False, action="store_true", dest="no_headers", help="Don't display column headers") SEP_OPT = cli_option("--separator", default=None, action="store", dest="separator", help=("Separator between output fields" " (defaults to one space)")) USEUNITS_OPT = cli_option("--units", default=None, dest="units", choices=("h", "m", "g", "t"), help="Specify units for output (one of h/m/g/t)") FIELDS_OPT = cli_option("-o", "--output", dest="output", action="store", type="string", metavar="FIELDS", help="Comma separated list of output fields") FORCE_OPT = cli_option("-f", "--force", dest="force", action="store_true", default=False, help="Force the operation") CONFIRM_OPT = cli_option("--yes", dest="confirm", action="store_true", default=False, help="Do not require confirmation") IGNORE_OFFLINE_OPT = cli_option("--ignore-offline", dest="ignore_offline", action="store_true", default=False, help=("Ignore offline nodes and do as much" " as possible")) TAG_ADD_OPT = cli_option("--tags", dest="tags", default=None, help="Comma-separated list of instance" " tags") TAG_SRC_OPT = cli_option("--from", dest="tags_source", default=None, help="File with tag names") SUBMIT_OPT = cli_option("--submit", dest="submit_only", default=False, action="store_true", help=("Submit the job and return the job ID, but" " don't wait for the job to finish")) PRINT_JOBID_OPT = cli_option("--print-jobid", dest="print_jobid", default=False, action="store_true", help=("Additionally print the job as first line" " on stdout (for scripting).")) SYNC_OPT = cli_option("--sync", dest="do_locking", default=False, action="store_true", help=("Grab locks while doing the queries" " in order to ensure more consistent results")) DRY_RUN_OPT = cli_option("--dry-run", default=False, action="store_true", help=("Do not execute the operation, just run the" " check steps and verify if it could be" " executed")) VERBOSE_OPT = cli_option("-v", "--verbose", default=False, action="store_true", help="Increase the verbosity of the operation") DEBUG_SIMERR_OPT = cli_option("--debug-simulate-errors", default=False, action="store_true", dest="simulate_errors", help="Debugging option that makes the operation" " treat most runtime checks as failed") NWSYNC_OPT = cli_option("--no-wait-for-sync", dest="wait_for_sync", default=True, action="store_false", help="Don't wait for sync (DANGEROUS!)") WFSYNC_OPT = cli_option("--wait-for-sync", dest="wait_for_sync", default=False, action="store_true", help="Wait for disks to sync") ONLINE_INST_OPT = cli_option("--online", dest="online_inst", action="store_true", default=False, help="Enable offline instance") OFFLINE_INST_OPT = cli_option("--offline", dest="offline_inst", action="store_true", default=False, help="Disable down instance") DISK_TEMPLATE_OPT = cli_option("-t", "--disk-template", dest="disk_template", help=("Custom disk setup (%s)" % utils.CommaJoin(constants.DISK_TEMPLATES)), default=None, metavar="TEMPL", choices=list(constants.DISK_TEMPLATES)) NONICS_OPT = cli_option("--no-nics", default=False, action="store_true", help="Do not create any network cards for" " the instance") FILESTORE_DIR_OPT = cli_option("--file-storage-dir", dest="file_storage_dir", help="Relative path under default cluster-wide" " file storage dir to store file-based disks", default=None, metavar="") FILESTORE_DRIVER_OPT = cli_option("--file-driver", dest="file_driver", help="Driver to use for image files", default=None, metavar="", choices=list(constants.FILE_DRIVER)) IALLOCATOR_OPT = cli_option("-I", "--iallocator", metavar="", help="Select nodes for the instance automatically" " using the iallocator plugin", default=None, type="string", completion_suggest=OPT_COMPL_ONE_IALLOCATOR) DEFAULT_IALLOCATOR_OPT = cli_option("-I", "--default-iallocator", metavar="", help="Set the default instance" " allocator plugin", default=None, type="string", completion_suggest=OPT_COMPL_ONE_IALLOCATOR) OS_OPT = cli_option("-o", "--os-type", dest="os", help="What OS to run", metavar="", completion_suggest=OPT_COMPL_ONE_OS) OSPARAMS_OPT = cli_option("-O", "--os-parameters", dest="osparams", type="keyval", default={}, help="OS parameters") FORCE_VARIANT_OPT = cli_option("--force-variant", dest="force_variant", action="store_true", default=False, help="Force an unknown variant") NO_INSTALL_OPT = cli_option("--no-install", dest="no_install", action="store_true", default=False, help="Do not install the OS (will" " enable no-start)") NORUNTIME_CHGS_OPT = cli_option("--no-runtime-changes", dest="allow_runtime_chgs", default=True, action="store_false", help="Don't allow runtime changes") BACKEND_OPT = cli_option("-B", "--backend-parameters", dest="beparams", type="keyval", default={}, help="Backend parameters") HVOPTS_OPT = cli_option("-H", "--hypervisor-parameters", type="keyval", default={}, dest="hvparams", help="Hypervisor parameters") DISK_PARAMS_OPT = cli_option("-D", "--disk-parameters", dest="diskparams", help="Disk template parameters, in the format" " template:option=value,option=value,...", type="identkeyval", action="append", default=[]) SPECS_MEM_SIZE_OPT = cli_option("--specs-mem-size", dest="ispecs_mem_size", type="keyval", default={}, help="Memory size specs: list of key=value," " where key is one of min, max, std" " (in MB or using a unit)") SPECS_CPU_COUNT_OPT = cli_option("--specs-cpu-count", dest="ispecs_cpu_count", type="keyval", default={}, help="CPU count specs: list of key=value," " where key is one of min, max, std") SPECS_DISK_COUNT_OPT = cli_option("--specs-disk-count", dest="ispecs_disk_count", type="keyval", default={}, help="Disk count specs: list of key=value," " where key is one of min, max, std") SPECS_DISK_SIZE_OPT = cli_option("--specs-disk-size", dest="ispecs_disk_size", type="keyval", default={}, help="Disk size specs: list of key=value," " where key is one of min, max, std" " (in MB or using a unit)") SPECS_NIC_COUNT_OPT = cli_option("--specs-nic-count", dest="ispecs_nic_count", type="keyval", default={}, help="NIC count specs: list of key=value," " where key is one of min, max, std") IPOLICY_BOUNDS_SPECS_STR = "--ipolicy-bounds-specs" IPOLICY_BOUNDS_SPECS_OPT = cli_option(IPOLICY_BOUNDS_SPECS_STR, dest="ipolicy_bounds_specs", type="multilistidentkeyval", default=None, help="Complete instance specs limits") IPOLICY_STD_SPECS_STR = "--ipolicy-std-specs" IPOLICY_STD_SPECS_OPT = cli_option(IPOLICY_STD_SPECS_STR, dest="ipolicy_std_specs", type="keyval", default=None, help="Complte standard instance specs") IPOLICY_DISK_TEMPLATES = cli_option("--ipolicy-disk-templates", dest="ipolicy_disk_templates", type="list", default=None, help="Comma-separated list of" " enabled disk templates") IPOLICY_VCPU_RATIO = cli_option("--ipolicy-vcpu-ratio", dest="ipolicy_vcpu_ratio", type="maybefloat", default=None, help="The maximum allowed vcpu-to-cpu ratio") IPOLICY_SPINDLE_RATIO = cli_option("--ipolicy-spindle-ratio", dest="ipolicy_spindle_ratio", type="maybefloat", default=None, help=("The maximum allowed instances to" " spindle ratio")) HYPERVISOR_OPT = cli_option("-H", "--hypervisor-parameters", dest="hypervisor", help="Hypervisor and hypervisor options, in the" " format hypervisor:option=value,option=value,...", default=None, type="identkeyval") HVLIST_OPT = cli_option("-H", "--hypervisor-parameters", dest="hvparams", help="Hypervisor and hypervisor options, in the" " format hypervisor:option=value,option=value,...", default=[], action="append", type="identkeyval") NOIPCHECK_OPT = cli_option("--no-ip-check", dest="ip_check", default=True, action="store_false", help="Don't check that the instance's IP" " is alive") NONAMECHECK_OPT = cli_option("--no-name-check", dest="name_check", default=True, action="store_false", help="Don't check that the instance's name" " is resolvable") NET_OPT = cli_option("--net", help="NIC parameters", default=[], dest="nics", action="append", type="identkeyval") DISK_OPT = cli_option("--disk", help="Disk parameters", default=[], dest="disks", action="append", type="identkeyval") DISKIDX_OPT = cli_option("--disks", dest="disks", default=None, help="Comma-separated list of disks" " indices to act on (e.g. 0,2) (optional," " defaults to all disks)") OS_SIZE_OPT = cli_option("-s", "--os-size", dest="sd_size", help="Enforces a single-disk configuration using the" " given disk size, in MiB unless a suffix is used", default=None, type="unit", metavar="") IGNORE_CONSIST_OPT = cli_option("--ignore-consistency", dest="ignore_consistency", action="store_true", default=False, help="Ignore the consistency of the disks on" " the secondary") ALLOW_FAILOVER_OPT = cli_option("--allow-failover", dest="allow_failover", action="store_true", default=False, help="If migration is not possible fallback to" " failover") NONLIVE_OPT = cli_option("--non-live", dest="live", default=True, action="store_false", help="Do a non-live migration (this usually means" " freeze the instance, save the state, transfer and" " only then resume running on the secondary node)") MIGRATION_MODE_OPT = cli_option("--migration-mode", dest="migration_mode", default=None, choices=list(constants.HT_MIGRATION_MODES), help="Override default migration mode (choose" " either live or non-live") NODE_PLACEMENT_OPT = cli_option("-n", "--node", dest="node", help="Target node and optional secondary node", metavar="[:]", completion_suggest=OPT_COMPL_INST_ADD_NODES) NODE_LIST_OPT = cli_option("-n", "--node", dest="nodes", default=[], action="append", metavar="", help="Use only this node (can be used multiple" " times, if not given defaults to all nodes)", completion_suggest=OPT_COMPL_ONE_NODE) NODEGROUP_OPT_NAME = "--node-group" NODEGROUP_OPT = cli_option("-g", NODEGROUP_OPT_NAME, dest="nodegroup", help="Node group (name or uuid)", metavar="", default=None, type="string", completion_suggest=OPT_COMPL_ONE_NODEGROUP) SINGLE_NODE_OPT = cli_option("-n", "--node", dest="node", help="Target node", metavar="", completion_suggest=OPT_COMPL_ONE_NODE) NOSTART_OPT = cli_option("--no-start", dest="start", default=True, action="store_false", help="Don't start the instance after creation") SHOWCMD_OPT = cli_option("--show-cmd", dest="show_command", action="store_true", default=False, help="Show command instead of executing it") CLEANUP_OPT = cli_option("--cleanup", dest="cleanup", default=False, action="store_true", help="Instead of performing the migration/failover," " try to recover from a failed cleanup. This is safe" " to run even if the instance is healthy, but it" " will create extra replication traffic and " " disrupt briefly the replication (like during the" " migration/failover") STATIC_OPT = cli_option("-s", "--static", dest="static", action="store_true", default=False, help="Only show configuration data, not runtime data") ALL_OPT = cli_option("--all", dest="show_all", default=False, action="store_true", help="Show info on all instances on the cluster." " This can take a long time to run, use wisely") SELECT_OS_OPT = cli_option("--select-os", dest="select_os", action="store_true", default=False, help="Interactive OS reinstall, lists available" " OS templates for selection") IGNORE_FAILURES_OPT = cli_option("--ignore-failures", dest="ignore_failures", action="store_true", default=False, help="Remove the instance from the cluster" " configuration even if there are failures" " during the removal process") IGNORE_REMOVE_FAILURES_OPT = cli_option("--ignore-remove-failures", dest="ignore_remove_failures", action="store_true", default=False, help="Remove the instance from the" " cluster configuration even if there" " are failures during the removal" " process") REMOVE_INSTANCE_OPT = cli_option("--remove-instance", dest="remove_instance", action="store_true", default=False, help="Remove the instance from the cluster") DST_NODE_OPT = cli_option("-n", "--target-node", dest="dst_node", help="Specifies the new node for the instance", metavar="NODE", default=None, completion_suggest=OPT_COMPL_ONE_NODE) NEW_SECONDARY_OPT = cli_option("-n", "--new-secondary", dest="dst_node", help="Specifies the new secondary node", metavar="NODE", default=None, completion_suggest=OPT_COMPL_ONE_NODE) NEW_PRIMARY_OPT = cli_option("--new-primary", dest="new_primary_node", help="Specifies the new primary node", metavar="", default=None, completion_suggest=OPT_COMPL_ONE_NODE) ON_PRIMARY_OPT = cli_option("-p", "--on-primary", dest="on_primary", default=False, action="store_true", help="Replace the disk(s) on the primary" " node (applies only to internally mirrored" " disk templates, e.g. %s)" % utils.CommaJoin(constants.DTS_INT_MIRROR)) ON_SECONDARY_OPT = cli_option("-s", "--on-secondary", dest="on_secondary", default=False, action="store_true", help="Replace the disk(s) on the secondary" " node (applies only to internally mirrored" " disk templates, e.g. %s)" % utils.CommaJoin(constants.DTS_INT_MIRROR)) AUTO_PROMOTE_OPT = cli_option("--auto-promote", dest="auto_promote", default=False, action="store_true", help="Lock all nodes and auto-promote as needed" " to MC status") AUTO_REPLACE_OPT = cli_option("-a", "--auto", dest="auto", default=False, action="store_true", help="Automatically replace faulty disks" " (applies only to internally mirrored" " disk templates, e.g. %s)" % utils.CommaJoin(constants.DTS_INT_MIRROR)) IGNORE_SIZE_OPT = cli_option("--ignore-size", dest="ignore_size", default=False, action="store_true", help="Ignore current recorded size" " (useful for forcing activation when" " the recorded size is wrong)") SRC_NODE_OPT = cli_option("--src-node", dest="src_node", help="Source node", metavar="", completion_suggest=OPT_COMPL_ONE_NODE) SRC_DIR_OPT = cli_option("--src-dir", dest="src_dir", help="Source directory", metavar="") SECONDARY_IP_OPT = cli_option("-s", "--secondary-ip", dest="secondary_ip", help="Specify the secondary ip for the node", metavar="ADDRESS", default=None) READD_OPT = cli_option("--readd", dest="readd", default=False, action="store_true", help="Readd old node after replacing it") NOSSH_KEYCHECK_OPT = cli_option("--no-ssh-key-check", dest="ssh_key_check", default=True, action="store_false", help="Disable SSH key fingerprint checking") NODE_FORCE_JOIN_OPT = cli_option("--force-join", dest="force_join", default=False, action="store_true", help="Force the joining of a node") MC_OPT = cli_option("-C", "--master-candidate", dest="master_candidate", type="bool", default=None, metavar=_YORNO, help="Set the master_candidate flag on the node") OFFLINE_OPT = cli_option("-O", "--offline", dest="offline", metavar=_YORNO, type="bool", default=None, help=("Set the offline flag on the node" " (cluster does not communicate with offline" " nodes)")) DRAINED_OPT = cli_option("-D", "--drained", dest="drained", metavar=_YORNO, type="bool", default=None, help=("Set the drained flag on the node" " (excluded from allocation operations)")) CAPAB_MASTER_OPT = cli_option("--master-capable", dest="master_capable", type="bool", default=None, metavar=_YORNO, help="Set the master_capable flag on the node") CAPAB_VM_OPT = cli_option("--vm-capable", dest="vm_capable", type="bool", default=None, metavar=_YORNO, help="Set the vm_capable flag on the node") ALLOCATABLE_OPT = cli_option("--allocatable", dest="allocatable", type="bool", default=None, metavar=_YORNO, help="Set the allocatable flag on a volume") NOLVM_STORAGE_OPT = cli_option("--no-lvm-storage", dest="lvm_storage", help="Disable support for lvm based instances" " (cluster-wide)", action="store_false", default=True) ENABLED_HV_OPT = cli_option("--enabled-hypervisors", dest="enabled_hypervisors", help="Comma-separated list of hypervisors", type="string", default=None) ENABLED_DISK_TEMPLATES_OPT = cli_option("--enabled-disk-templates", dest="enabled_disk_templates", help="Comma-separated list of " "disk templates", type="string", default=None) NIC_PARAMS_OPT = cli_option("-N", "--nic-parameters", dest="nicparams", type="keyval", default={}, help="NIC parameters") CP_SIZE_OPT = cli_option("-C", "--candidate-pool-size", default=None, dest="candidate_pool_size", type="int", help="Set the candidate pool size") VG_NAME_OPT = cli_option("--vg-name", dest="vg_name", help=("Enables LVM and specifies the volume group" " name (cluster-wide) for disk allocation" " [%s]" % constants.DEFAULT_VG), metavar="VG", default=None) YES_DOIT_OPT = cli_option("--yes-do-it", "--ya-rly", dest="yes_do_it", help="Destroy cluster", action="store_true") NOVOTING_OPT = cli_option("--no-voting", dest="no_voting", help="Skip node agreement check (dangerous)", action="store_true", default=False) MAC_PREFIX_OPT = cli_option("-m", "--mac-prefix", dest="mac_prefix", help="Specify the mac prefix for the instance IP" " addresses, in the format XX:XX:XX", metavar="PREFIX", default=None) MASTER_NETDEV_OPT = cli_option("--master-netdev", dest="master_netdev", help="Specify the node interface (cluster-wide)" " on which the master IP address will be added" " (cluster init default: %s)" % constants.DEFAULT_BRIDGE, metavar="NETDEV", default=None) MASTER_NETMASK_OPT = cli_option("--master-netmask", dest="master_netmask", help="Specify the netmask of the master IP", metavar="NETMASK", default=None) USE_EXTERNAL_MIP_SCRIPT = cli_option("--use-external-mip-script", dest="use_external_mip_script", help="Specify whether to run a" " user-provided script for the master" " IP address turnup and" " turndown operations", type="bool", metavar=_YORNO, default=None) GLOBAL_FILEDIR_OPT = cli_option("--file-storage-dir", dest="file_storage_dir", help="Specify the default directory (cluster-" "wide) for storing the file-based disks [%s]" % pathutils.DEFAULT_FILE_STORAGE_DIR, metavar="DIR", default=None) GLOBAL_SHARED_FILEDIR_OPT = cli_option( "--shared-file-storage-dir", dest="shared_file_storage_dir", help="Specify the default directory (cluster-wide) for storing the" " shared file-based disks [%s]" % pathutils.DEFAULT_SHARED_FILE_STORAGE_DIR, metavar="SHAREDDIR", default=None) NOMODIFY_ETCHOSTS_OPT = cli_option("--no-etc-hosts", dest="modify_etc_hosts", help="Don't modify %s" % pathutils.ETC_HOSTS, action="store_false", default=True) MODIFY_ETCHOSTS_OPT = \ cli_option("--modify-etc-hosts", dest="modify_etc_hosts", metavar=_YORNO, default=None, type="bool", help="Defines whether the cluster should autonomously modify" " and keep in sync the /etc/hosts file of the nodes") NOMODIFY_SSH_SETUP_OPT = cli_option("--no-ssh-init", dest="modify_ssh_setup", help="Don't initialize SSH keys", action="store_false", default=True) ERROR_CODES_OPT = cli_option("--error-codes", dest="error_codes", help="Enable parseable error messages", action="store_true", default=False) NONPLUS1_OPT = cli_option("--no-nplus1-mem", dest="skip_nplusone_mem", help="Skip N+1 memory redundancy tests", action="store_true", default=False) REBOOT_TYPE_OPT = cli_option("-t", "--type", dest="reboot_type", help="Type of reboot: soft/hard/full", default=constants.INSTANCE_REBOOT_HARD, metavar="", choices=list(constants.REBOOT_TYPES)) IGNORE_SECONDARIES_OPT = cli_option("--ignore-secondaries", dest="ignore_secondaries", default=False, action="store_true", help="Ignore errors from secondaries") NOSHUTDOWN_OPT = cli_option("--noshutdown", dest="shutdown", action="store_false", default=True, help="Don't shutdown the instance (unsafe)") TIMEOUT_OPT = cli_option("--timeout", dest="timeout", type="int", default=constants.DEFAULT_SHUTDOWN_TIMEOUT, help="Maximum time to wait") SHUTDOWN_TIMEOUT_OPT = cli_option("--shutdown-timeout", dest="shutdown_timeout", type="int", default=constants.DEFAULT_SHUTDOWN_TIMEOUT, help="Maximum time to wait for instance" " shutdown") INTERVAL_OPT = cli_option("--interval", dest="interval", type="int", default=None, help=("Number of seconds between repetions of the" " command")) EARLY_RELEASE_OPT = cli_option("--early-release", dest="early_release", default=False, action="store_true", help="Release the locks on the secondary" " node(s) early") NEW_CLUSTER_CERT_OPT = cli_option("--new-cluster-certificate", dest="new_cluster_cert", default=False, action="store_true", help="Generate a new cluster certificate") RAPI_CERT_OPT = cli_option("--rapi-certificate", dest="rapi_cert", default=None, help="File containing new RAPI certificate") NEW_RAPI_CERT_OPT = cli_option("--new-rapi-certificate", dest="new_rapi_cert", default=None, action="store_true", help=("Generate a new self-signed RAPI" " certificate")) SPICE_CERT_OPT = cli_option("--spice-certificate", dest="spice_cert", default=None, help="File containing new SPICE certificate") SPICE_CACERT_OPT = cli_option("--spice-ca-certificate", dest="spice_cacert", default=None, help="File containing the certificate of the CA" " which signed the SPICE certificate") NEW_SPICE_CERT_OPT = cli_option("--new-spice-certificate", dest="new_spice_cert", default=None, action="store_true", help=("Generate a new self-signed SPICE" " certificate")) NEW_CONFD_HMAC_KEY_OPT = cli_option("--new-confd-hmac-key", dest="new_confd_hmac_key", default=False, action="store_true", help=("Create a new HMAC key for %s" % constants.CONFD)) CLUSTER_DOMAIN_SECRET_OPT = cli_option("--cluster-domain-secret", dest="cluster_domain_secret", default=None, help=("Load new new cluster domain" " secret from file")) NEW_CLUSTER_DOMAIN_SECRET_OPT = cli_option("--new-cluster-domain-secret", dest="new_cluster_domain_secret", default=False, action="store_true", help=("Create a new cluster domain" " secret")) USE_REPL_NET_OPT = cli_option("--use-replication-network", dest="use_replication_network", help="Whether to use the replication network" " for talking to the nodes", action="store_true", default=False) MAINTAIN_NODE_HEALTH_OPT = \ cli_option("--maintain-node-health", dest="maintain_node_health", metavar=_YORNO, default=None, type="bool", help="Configure the cluster to automatically maintain node" " health, by shutting down unknown instances, shutting down" " unknown DRBD devices, etc.") IDENTIFY_DEFAULTS_OPT = \ cli_option("--identify-defaults", dest="identify_defaults", default=False, action="store_true", help="Identify which saved instance parameters are equal to" " the current cluster defaults and set them as such, instead" " of marking them as overridden") UIDPOOL_OPT = cli_option("--uid-pool", default=None, action="store", dest="uid_pool", help=("A list of user-ids or user-id" " ranges separated by commas")) ADD_UIDS_OPT = cli_option("--add-uids", default=None, action="store", dest="add_uids", help=("A list of user-ids or user-id" " ranges separated by commas, to be" " added to the user-id pool")) REMOVE_UIDS_OPT = cli_option("--remove-uids", default=None, action="store", dest="remove_uids", help=("A list of user-ids or user-id" " ranges separated by commas, to be" " removed from the user-id pool")) RESERVED_LVS_OPT = cli_option("--reserved-lvs", default=None, action="store", dest="reserved_lvs", help=("A comma-separated list of reserved" " logical volumes names, that will be" " ignored by cluster verify")) ROMAN_OPT = cli_option("--roman", dest="roman_integers", default=False, action="store_true", help="Use roman numbers for positive integers") DRBD_HELPER_OPT = cli_option("--drbd-usermode-helper", dest="drbd_helper", action="store", default=None, help="Specifies usermode helper for DRBD") NODRBD_STORAGE_OPT = cli_option("--no-drbd-storage", dest="drbd_storage", action="store_false", default=True, help="Disable support for DRBD") PRIMARY_IP_VERSION_OPT = \ cli_option("--primary-ip-version", default=constants.IP4_VERSION, action="store", dest="primary_ip_version", metavar="%d|%d" % (constants.IP4_VERSION, constants.IP6_VERSION), help="Cluster-wide IP version for primary IP") SHOW_MACHINE_OPT = cli_option("-M", "--show-machine-names", default=False, action="store_true", help="Show machine name for every line in output") FAILURE_ONLY_OPT = cli_option("--failure-only", default=False, action="store_true", help=("Hide successful results and show failures" " only (determined by the exit code)")) REASON_OPT = cli_option("--reason", default=None, help="The reason for executing the command") def _PriorityOptionCb(option, _, value, parser): """Callback for processing C{--priority} option. """ value = _PRIONAME_TO_VALUE[value] setattr(parser.values, option.dest, value) PRIORITY_OPT = cli_option("--priority", default=None, dest="priority", metavar="|".join(name for name, _ in _PRIORITY_NAMES), choices=_PRIONAME_TO_VALUE.keys(), action="callback", type="choice", callback=_PriorityOptionCb, help="Priority for opcode processing") HID_OS_OPT = cli_option("--hidden", dest="hidden", type="bool", default=None, metavar=_YORNO, help="Sets the hidden flag on the OS") BLK_OS_OPT = cli_option("--blacklisted", dest="blacklisted", type="bool", default=None, metavar=_YORNO, help="Sets the blacklisted flag on the OS") PREALLOC_WIPE_DISKS_OPT = cli_option("--prealloc-wipe-disks", default=None, type="bool", metavar=_YORNO, dest="prealloc_wipe_disks", help=("Wipe disks prior to instance" " creation")) NODE_PARAMS_OPT = cli_option("--node-parameters", dest="ndparams", type="keyval", default=None, help="Node parameters") ALLOC_POLICY_OPT = cli_option("--alloc-policy", dest="alloc_policy", action="store", metavar="POLICY", default=None, help="Allocation policy for the node group") NODE_POWERED_OPT = cli_option("--node-powered", default=None, type="bool", metavar=_YORNO, dest="node_powered", help="Specify if the SoR for node is powered") OOB_TIMEOUT_OPT = cli_option("--oob-timeout", dest="oob_timeout", type="int", default=constants.OOB_TIMEOUT, help="Maximum time to wait for out-of-band helper") POWER_DELAY_OPT = cli_option("--power-delay", dest="power_delay", type="float", default=constants.OOB_POWER_DELAY, help="Time in seconds to wait between power-ons") FORCE_FILTER_OPT = cli_option("-F", "--filter", dest="force_filter", action="store_true", default=False, help=("Whether command argument should be treated" " as filter")) NO_REMEMBER_OPT = cli_option("--no-remember", dest="no_remember", action="store_true", default=False, help="Perform but do not record the change" " in the configuration") PRIMARY_ONLY_OPT = cli_option("-p", "--primary-only", default=False, action="store_true", help="Evacuate primary instances only") SECONDARY_ONLY_OPT = cli_option("-s", "--secondary-only", default=False, action="store_true", help="Evacuate secondary instances only" " (applies only to internally mirrored" " disk templates, e.g. %s)" % utils.CommaJoin(constants.DTS_INT_MIRROR)) STARTUP_PAUSED_OPT = cli_option("--paused", dest="startup_paused", action="store_true", default=False, help="Pause instance at startup") TO_GROUP_OPT = cli_option("--to", dest="to", metavar="", help="Destination node group (name or uuid)", default=None, action="append", completion_suggest=OPT_COMPL_ONE_NODEGROUP) IGNORE_ERRORS_OPT = cli_option("-I", "--ignore-errors", default=[], action="append", dest="ignore_errors", choices=list(constants.CV_ALL_ECODES_STRINGS), help="Error code to be ignored") DISK_STATE_OPT = cli_option("--disk-state", default=[], dest="disk_state", action="append", help=("Specify disk state information in the" " format" " storage_type/identifier:option=value,...;" " note this is unused for now"), type="identkeyval") HV_STATE_OPT = cli_option("--hypervisor-state", default=[], dest="hv_state", action="append", help=("Specify hypervisor state information in the" " format hypervisor:option=value,...;" " note this is unused for now"), type="identkeyval") IGNORE_IPOLICY_OPT = cli_option("--ignore-ipolicy", dest="ignore_ipolicy", action="store_true", default=False, help="Ignore instance policy violations") RUNTIME_MEM_OPT = cli_option("-m", "--runtime-memory", dest="runtime_mem", help="Sets the instance's runtime memory," " ballooning it up or down to the new value", default=None, type="unit", metavar="") ABSOLUTE_OPT = cli_option("--absolute", dest="absolute", action="store_true", default=False, help="Marks the grow as absolute instead of the" " (default) relative mode") NETWORK_OPT = cli_option("--network", action="store", default=None, dest="network", help="IP network in CIDR notation") GATEWAY_OPT = cli_option("--gateway", action="store", default=None, dest="gateway", help="IP address of the router (gateway)") ADD_RESERVED_IPS_OPT = cli_option("--add-reserved-ips", action="store", default=None, dest="add_reserved_ips", help="Comma-separated list of" " reserved IPs to add") REMOVE_RESERVED_IPS_OPT = cli_option("--remove-reserved-ips", action="store", default=None, dest="remove_reserved_ips", help="Comma-delimited list of" " reserved IPs to remove") NETWORK6_OPT = cli_option("--network6", action="store", default=None, dest="network6", help="IP network in CIDR notation") GATEWAY6_OPT = cli_option("--gateway6", action="store", default=None, dest="gateway6", help="IP6 address of the router (gateway)") NOCONFLICTSCHECK_OPT = cli_option("--no-conflicts-check", dest="conflicts_check", default=True, action="store_false", help="Don't check for conflicting IPs") INCLUDEDEFAULTS_OPT = cli_option("--include-defaults", dest="include_defaults", default=False, action="store_true", help="Include default values") #: Options provided by all commands COMMON_OPTS = [DEBUG_OPT, REASON_OPT] # options related to asynchronous job handling SUBMIT_OPTS = [ SUBMIT_OPT, PRINT_JOBID_OPT, ] # common options for creating instances. add and import then add their own # specific ones. COMMON_CREATE_OPTS = [ BACKEND_OPT, DISK_OPT, DISK_TEMPLATE_OPT, FILESTORE_DIR_OPT, FILESTORE_DRIVER_OPT, HYPERVISOR_OPT, IALLOCATOR_OPT, NET_OPT, NODE_PLACEMENT_OPT, NOIPCHECK_OPT, NOCONFLICTSCHECK_OPT, NONAMECHECK_OPT, NONICS_OPT, NWSYNC_OPT, OSPARAMS_OPT, OS_SIZE_OPT, SUBMIT_OPT, PRINT_JOBID_OPT, TAG_ADD_OPT, DRY_RUN_OPT, PRIORITY_OPT, ] # common instance policy options INSTANCE_POLICY_OPTS = [ IPOLICY_BOUNDS_SPECS_OPT, IPOLICY_DISK_TEMPLATES, IPOLICY_VCPU_RATIO, IPOLICY_SPINDLE_RATIO, ] # instance policy split specs options SPLIT_ISPECS_OPTS = [ SPECS_CPU_COUNT_OPT, SPECS_DISK_COUNT_OPT, SPECS_DISK_SIZE_OPT, SPECS_MEM_SIZE_OPT, SPECS_NIC_COUNT_OPT, ] class _ShowUsage(Exception): """Exception class for L{_ParseArgs}. """ def __init__(self, exit_error): """Initializes instances of this class. @type exit_error: bool @param exit_error: Whether to report failure on exit """ Exception.__init__(self) self.exit_error = exit_error class _ShowVersion(Exception): """Exception class for L{_ParseArgs}. """ def _ParseArgs(binary, argv, commands, aliases, env_override): """Parser for the command line arguments. This function parses the arguments and returns the function which must be executed together with its (modified) arguments. @param binary: Script name @param argv: Command line arguments @param commands: Dictionary containing command definitions @param aliases: dictionary with command aliases {"alias": "target", ...} @param env_override: list of env variables allowed for default args @raise _ShowUsage: If usage description should be shown @raise _ShowVersion: If version should be shown """ assert not (env_override - set(commands)) assert not (set(aliases.keys()) & set(commands.keys())) if len(argv) > 1: cmd = argv[1] else: # No option or command given raise _ShowUsage(exit_error=True) if cmd == "--version": raise _ShowVersion() elif cmd == "--help": raise _ShowUsage(exit_error=False) elif not (cmd in commands or cmd in aliases): raise _ShowUsage(exit_error=True) # get command, unalias it, and look it up in commands if cmd in aliases: if aliases[cmd] not in commands: raise errors.ProgrammerError("Alias '%s' maps to non-existing" " command '%s'" % (cmd, aliases[cmd])) cmd = aliases[cmd] if cmd in env_override: args_env_name = ("%s_%s" % (binary.replace("-", "_"), cmd)).upper() env_args = os.environ.get(args_env_name) if env_args: argv = utils.InsertAtPos(argv, 2, shlex.split(env_args)) func, args_def, parser_opts, usage, description = commands[cmd] parser = OptionParser(option_list=parser_opts + COMMON_OPTS, description=description, formatter=TitledHelpFormatter(), usage="%%prog %s %s" % (cmd, usage)) parser.disable_interspersed_args() options, args = parser.parse_args(args=argv[2:]) if not _CheckArguments(cmd, args_def, args): return None, None, None return func, options, args def _FormatUsage(binary, commands): """Generates a nice description of all commands. @param binary: Script name @param commands: Dictionary containing command definitions """ # compute the max line length for cmd + usage mlen = min(60, max(map(len, commands))) yield "Usage: %s {command} [options...] [argument...]" % binary yield "%s --help to see details, or man %s" % (binary, binary) yield "" yield "Commands:" # and format a nice command list for (cmd, (_, _, _, _, help_text)) in sorted(commands.items()): help_lines = textwrap.wrap(help_text, 79 - 3 - mlen) yield " %-*s - %s" % (mlen, cmd, help_lines.pop(0)) for line in help_lines: yield " %-*s %s" % (mlen, "", line) yield "" def _CheckArguments(cmd, args_def, args): """Verifies the arguments using the argument definition. Algorithm: 1. Abort with error if values specified by user but none expected. 1. For each argument in definition 1. Keep running count of minimum number of values (min_count) 1. Keep running count of maximum number of values (max_count) 1. If it has an unlimited number of values 1. Abort with error if it's not the last argument in the definition 1. If last argument has limited number of values 1. Abort with error if number of values doesn't match or is too large 1. Abort with error if user didn't pass enough values (min_count) """ if args and not args_def: ToStderr("Error: Command %s expects no arguments", cmd) return False min_count = None max_count = None check_max = None last_idx = len(args_def) - 1 for idx, arg in enumerate(args_def): if min_count is None: min_count = arg.min elif arg.min is not None: min_count += arg.min if max_count is None: max_count = arg.max elif arg.max is not None: max_count += arg.max if idx == last_idx: check_max = (arg.max is not None) elif arg.max is None: raise errors.ProgrammerError("Only the last argument can have max=None") if check_max: # Command with exact number of arguments if (min_count is not None and max_count is not None and min_count == max_count and len(args) != min_count): ToStderr("Error: Command %s expects %d argument(s)", cmd, min_count) return False # Command with limited number of arguments if max_count is not None and len(args) > max_count: ToStderr("Error: Command %s expects only %d argument(s)", cmd, max_count) return False # Command with some required arguments if min_count is not None and len(args) < min_count: ToStderr("Error: Command %s expects at least %d argument(s)", cmd, min_count) return False return True def SplitNodeOption(value): """Splits the value of a --node option. """ if value and ":" in value: return value.split(":", 1) else: return (value, None) def CalculateOSNames(os_name, os_variants): """Calculates all the names an OS can be called, according to its variants. @type os_name: string @param os_name: base name of the os @type os_variants: list or None @param os_variants: list of supported variants @rtype: list @return: list of valid names """ if os_variants: return ["%s+%s" % (os_name, v) for v in os_variants] else: return [os_name] def ParseFields(selected, default): """Parses the values of "--field"-like options. @type selected: string or None @param selected: User-selected options @type default: list @param default: Default fields """ if selected is None: return default if selected.startswith("+"): return default + selected[1:].split(",") return selected.split(",") UsesRPC = rpc.RunWithRPC def AskUser(text, choices=None): """Ask the user a question. @param text: the question to ask @param choices: list with elements tuples (input_char, return_value, description); if not given, it will default to: [('y', True, 'Perform the operation'), ('n', False, 'Do no do the operation')]; note that the '?' char is reserved for help @return: one of the return values from the choices list; if input is not possible (i.e. not running with a tty, we return the last entry from the list """ if choices is None: choices = [("y", True, "Perform the operation"), ("n", False, "Do not perform the operation")] if not choices or not isinstance(choices, list): raise errors.ProgrammerError("Invalid choices argument to AskUser") for entry in choices: if not isinstance(entry, tuple) or len(entry) < 3 or entry[0] == "?": raise errors.ProgrammerError("Invalid choices element to AskUser") answer = choices[-1][1] new_text = [] for line in text.splitlines(): new_text.append(textwrap.fill(line, 70, replace_whitespace=False)) text = "\n".join(new_text) try: f = file("/dev/tty", "a+") except IOError: return answer try: chars = [entry[0] for entry in choices] chars[-1] = "[%s]" % chars[-1] chars.append("?") maps = dict([(entry[0], entry[1]) for entry in choices]) while True: f.write(text) f.write("\n") f.write("/".join(chars)) f.write(": ") line = f.readline(2).strip().lower() if line in maps: answer = maps[line] break elif line == "?": for entry in choices: f.write(" %s - %s\n" % (entry[0], entry[2])) f.write("\n") continue finally: f.close() return answer class JobSubmittedException(Exception): """Job was submitted, client should exit. This exception has one argument, the ID of the job that was submitted. The handler should print this ID. This is not an error, just a structured way to exit from clients. """ def SendJob(ops, cl=None): """Function to submit an opcode without waiting for the results. @type ops: list @param ops: list of opcodes @type cl: luxi.Client @param cl: the luxi client to use for communicating with the master; if None, a new client will be created """ if cl is None: cl = GetClient() job_id = cl.SubmitJob(ops) return job_id def GenericPollJob(job_id, cbs, report_cbs): """Generic job-polling function. @type job_id: number @param job_id: Job ID @type cbs: Instance of L{JobPollCbBase} @param cbs: Data callbacks @type report_cbs: Instance of L{JobPollReportCbBase} @param report_cbs: Reporting callbacks """ prev_job_info = None prev_logmsg_serial = None status = None while True: result = cbs.WaitForJobChangeOnce(job_id, ["status"], prev_job_info, prev_logmsg_serial) if not result: # job not found, go away! raise errors.JobLost("Job with id %s lost" % job_id) if result == constants.JOB_NOTCHANGED: report_cbs.ReportNotChanged(job_id, status) # Wait again continue # Split result, a tuple of (field values, log entries) (job_info, log_entries) = result (status, ) = job_info if log_entries: for log_entry in log_entries: (serial, timestamp, log_type, message) = log_entry report_cbs.ReportLogMessage(job_id, serial, timestamp, log_type, message) prev_logmsg_serial = max(prev_logmsg_serial, serial) # TODO: Handle canceled and archived jobs elif status in (constants.JOB_STATUS_SUCCESS, constants.JOB_STATUS_ERROR, constants.JOB_STATUS_CANCELING, constants.JOB_STATUS_CANCELED): break prev_job_info = job_info jobs = cbs.QueryJobs([job_id], ["status", "opstatus", "opresult"]) if not jobs: raise errors.JobLost("Job with id %s lost" % job_id) status, opstatus, result = jobs[0] if status == constants.JOB_STATUS_SUCCESS: return result if status in (constants.JOB_STATUS_CANCELING, constants.JOB_STATUS_CANCELED): raise errors.OpExecError("Job was canceled") has_ok = False for idx, (status, msg) in enumerate(zip(opstatus, result)): if status == constants.OP_STATUS_SUCCESS: has_ok = True elif status == constants.OP_STATUS_ERROR: errors.MaybeRaise(msg) if has_ok: raise errors.OpExecError("partial failure (opcode %d): %s" % (idx, msg)) raise errors.OpExecError(str(msg)) # default failure mode raise errors.OpExecError(result) class JobPollCbBase: """Base class for L{GenericPollJob} callbacks. """ def __init__(self): """Initializes this class. """ def WaitForJobChangeOnce(self, job_id, fields, prev_job_info, prev_log_serial): """Waits for changes on a job. """ raise NotImplementedError() def QueryJobs(self, job_ids, fields): """Returns the selected fields for the selected job IDs. @type job_ids: list of numbers @param job_ids: Job IDs @type fields: list of strings @param fields: Fields """ raise NotImplementedError() class JobPollReportCbBase: """Base class for L{GenericPollJob} reporting callbacks. """ def __init__(self): """Initializes this class. """ def ReportLogMessage(self, job_id, serial, timestamp, log_type, log_msg): """Handles a log message. """ raise NotImplementedError() def ReportNotChanged(self, job_id, status): """Called for if a job hasn't changed in a while. @type job_id: number @param job_id: Job ID @type status: string or None @param status: Job status if available """ raise NotImplementedError() class _LuxiJobPollCb(JobPollCbBase): def __init__(self, cl): """Initializes this class. """ JobPollCbBase.__init__(self) self.cl = cl def WaitForJobChangeOnce(self, job_id, fields, prev_job_info, prev_log_serial): """Waits for changes on a job. """ return self.cl.WaitForJobChangeOnce(job_id, fields, prev_job_info, prev_log_serial) def QueryJobs(self, job_ids, fields): """Returns the selected fields for the selected job IDs. """ return self.cl.QueryJobs(job_ids, fields) class FeedbackFnJobPollReportCb(JobPollReportCbBase): def __init__(self, feedback_fn): """Initializes this class. """ JobPollReportCbBase.__init__(self) self.feedback_fn = feedback_fn assert callable(feedback_fn) def ReportLogMessage(self, job_id, serial, timestamp, log_type, log_msg): """Handles a log message. """ self.feedback_fn((timestamp, log_type, log_msg)) def ReportNotChanged(self, job_id, status): """Called if a job hasn't changed in a while. """ # Ignore class StdioJobPollReportCb(JobPollReportCbBase): def __init__(self): """Initializes this class. """ JobPollReportCbBase.__init__(self) self.notified_queued = False self.notified_waitlock = False def ReportLogMessage(self, job_id, serial, timestamp, log_type, log_msg): """Handles a log message. """ ToStdout("%s %s", time.ctime(utils.MergeTime(timestamp)), FormatLogMessage(log_type, log_msg)) def ReportNotChanged(self, job_id, status): """Called if a job hasn't changed in a while. """ if status is None: return if status == constants.JOB_STATUS_QUEUED and not self.notified_queued: ToStderr("Job %s is waiting in queue", job_id) self.notified_queued = True elif status == constants.JOB_STATUS_WAITING and not self.notified_waitlock: ToStderr("Job %s is trying to acquire all necessary locks", job_id) self.notified_waitlock = True def FormatLogMessage(log_type, log_msg): """Formats a job message according to its type. """ if log_type != constants.ELOG_MESSAGE: log_msg = str(log_msg) return utils.SafeEncode(log_msg) def PollJob(job_id, cl=None, feedback_fn=None, reporter=None): """Function to poll for the result of a job. @type job_id: job identified @param job_id: the job to poll for results @type cl: luxi.Client @param cl: the luxi client to use for communicating with the master; if None, a new client will be created """ if cl is None: cl = GetClient() if reporter is None: if feedback_fn: reporter = FeedbackFnJobPollReportCb(feedback_fn) else: reporter = StdioJobPollReportCb() elif feedback_fn: raise errors.ProgrammerError("Can't specify reporter and feedback function") return GenericPollJob(job_id, _LuxiJobPollCb(cl), reporter) def SubmitOpCode(op, cl=None, feedback_fn=None, opts=None, reporter=None): """Legacy function to submit an opcode. This is just a simple wrapper over the construction of the processor instance. It should be extended to better handle feedback and interaction functions. """ if cl is None: cl = GetClient() SetGenericOpcodeOpts([op], opts) job_id = SendJob([op], cl=cl) if hasattr(opts, "print_jobid") and opts.print_jobid: ToStdout("%d" % job_id) op_results = PollJob(job_id, cl=cl, feedback_fn=feedback_fn, reporter=reporter) return op_results[0] def SubmitOrSend(op, opts, cl=None, feedback_fn=None): """Wrapper around SubmitOpCode or SendJob. This function will decide, based on the 'opts' parameter, whether to submit and wait for the result of the opcode (and return it), or whether to just send the job and print its identifier. It is used in order to simplify the implementation of the '--submit' option. It will also process the opcodes if we're sending the via SendJob (otherwise SubmitOpCode does it). """ if opts and opts.submit_only: job = [op] SetGenericOpcodeOpts(job, opts) job_id = SendJob(job, cl=cl) if opts.print_jobid: ToStdout("%d" % job_id) raise JobSubmittedException(job_id) else: return SubmitOpCode(op, cl=cl, feedback_fn=feedback_fn, opts=opts) def _InitReasonTrail(op, opts): """Builds the first part of the reason trail Builds the initial part of the reason trail, adding the user provided reason (if it exists) and the name of the command starting the operation. @param op: the opcode the reason trail will be added to @param opts: the command line options selected by the user """ assert len(sys.argv) >= 2 trail = [] if opts.reason: trail.append((constants.OPCODE_REASON_SRC_USER, opts.reason, utils.EpochNano())) binary = os.path.basename(sys.argv[0]) source = "%s:%s" % (constants.OPCODE_REASON_SRC_CLIENT, binary) command = sys.argv[1] trail.append((source, command, utils.EpochNano())) op.reason = trail def SetGenericOpcodeOpts(opcode_list, options): """Processor for generic options. This function updates the given opcodes based on generic command line options (like debug, dry-run, etc.). @param opcode_list: list of opcodes @param options: command line options or None @return: None (in-place modification) """ if not options: return for op in opcode_list: op.debug_level = options.debug if hasattr(options, "dry_run"): op.dry_run = options.dry_run if getattr(options, "priority", None) is not None: op.priority = options.priority _InitReasonTrail(op, options) def GetClient(query=False): """Connects to the a luxi socket and returns a client. @type query: boolean @param query: this signifies that the client will only be used for queries; if the build-time parameter enable-split-queries is enabled, then the client will be connected to the query socket instead of the masterd socket """ override_socket = os.getenv(constants.LUXI_OVERRIDE, "") if override_socket: if override_socket == constants.LUXI_OVERRIDE_MASTER: address = pathutils.MASTER_SOCKET elif override_socket == constants.LUXI_OVERRIDE_QUERY: address = pathutils.QUERY_SOCKET else: address = override_socket elif query and constants.ENABLE_SPLIT_QUERY: address = pathutils.QUERY_SOCKET else: address = None # TODO: Cache object? try: client = luxi.Client(address=address) except luxi.NoMasterError: ss = ssconf.SimpleStore() # Try to read ssconf file try: ss.GetMasterNode() except errors.ConfigurationError: raise errors.OpPrereqError("Cluster not initialized or this machine is" " not part of a cluster", errors.ECODE_INVAL) master, myself = ssconf.GetMasterAndMyself(ss=ss) if master != myself: raise errors.OpPrereqError("This is not the master node, please connect" " to node '%s' and rerun the command" % master, errors.ECODE_INVAL) raise return client def FormatError(err): """Return a formatted error message for a given error. This function takes an exception instance and returns a tuple consisting of two values: first, the recommended exit code, and second, a string describing the error message (not newline-terminated). """ retcode = 1 obuf = StringIO() msg = str(err) if isinstance(err, errors.ConfigurationError): txt = "Corrupt configuration file: %s" % msg logging.error(txt) obuf.write(txt + "\n") obuf.write("Aborting.") retcode = 2 elif isinstance(err, errors.HooksAbort): obuf.write("Failure: hooks execution failed:\n") for node, script, out in err.args[0]: if out: obuf.write(" node: %s, script: %s, output: %s\n" % (node, script, out)) else: obuf.write(" node: %s, script: %s (no output)\n" % (node, script)) elif isinstance(err, errors.HooksFailure): obuf.write("Failure: hooks general failure: %s" % msg) elif isinstance(err, errors.ResolverError): this_host = netutils.Hostname.GetSysName() if err.args[0] == this_host: msg = "Failure: can't resolve my own hostname ('%s')" else: msg = "Failure: can't resolve hostname '%s'" obuf.write(msg % err.args[0]) elif isinstance(err, errors.OpPrereqError): if len(err.args) == 2: obuf.write("Failure: prerequisites not met for this" " operation:\nerror type: %s, error details:\n%s" % (err.args[1], err.args[0])) else: obuf.write("Failure: prerequisites not met for this" " operation:\n%s" % msg) elif isinstance(err, errors.OpExecError): obuf.write("Failure: command execution error:\n%s" % msg) elif isinstance(err, errors.TagError): obuf.write("Failure: invalid tag(s) given:\n%s" % msg) elif isinstance(err, errors.JobQueueDrainError): obuf.write("Failure: the job queue is marked for drain and doesn't" " accept new requests\n") elif isinstance(err, errors.JobQueueFull): obuf.write("Failure: the job queue is full and doesn't accept new" " job submissions until old jobs are archived\n") elif isinstance(err, errors.TypeEnforcementError): obuf.write("Parameter Error: %s" % msg) elif isinstance(err, errors.ParameterError): obuf.write("Failure: unknown/wrong parameter name '%s'" % msg) elif isinstance(err, luxi.NoMasterError): if err.args[0] == pathutils.MASTER_SOCKET: daemon = "the master daemon" elif err.args[0] == pathutils.QUERY_SOCKET: daemon = "the config daemon" else: daemon = "socket '%s'" % str(err.args[0]) obuf.write("Cannot communicate with %s.\nIs the process running" " and listening for connections?" % daemon) elif isinstance(err, luxi.TimeoutError): obuf.write("Timeout while talking to the master daemon. Jobs might have" " been submitted and will continue to run even if the call" " timed out. Useful commands in this situation are \"gnt-job" " list\", \"gnt-job cancel\" and \"gnt-job watch\". Error:\n") obuf.write(msg) elif isinstance(err, luxi.PermissionError): obuf.write("It seems you don't have permissions to connect to the" " master daemon.\nPlease retry as a different user.") elif isinstance(err, luxi.ProtocolError): obuf.write("Unhandled protocol error while talking to the master daemon:\n" "%s" % msg) elif isinstance(err, errors.JobLost): obuf.write("Error checking job status: %s" % msg) elif isinstance(err, errors.QueryFilterParseError): obuf.write("Error while parsing query filter: %s\n" % err.args[0]) obuf.write("\n".join(err.GetDetails())) elif isinstance(err, errors.GenericError): obuf.write("Unhandled Ganeti error: %s" % msg) elif isinstance(err, JobSubmittedException): obuf.write("JobID: %s\n" % err.args[0]) retcode = 0 else: obuf.write("Unhandled exception: %s" % msg) return retcode, obuf.getvalue().rstrip("\n") def GenericMain(commands, override=None, aliases=None, env_override=frozenset()): """Generic main function for all the gnt-* commands. @param commands: a dictionary with a special structure, see the design doc for command line handling. @param override: if not None, we expect a dictionary with keys that will override command line options; this can be used to pass options from the scripts to generic functions @param aliases: dictionary with command aliases {'alias': 'target, ...} @param env_override: list of environment names which are allowed to submit default args for commands """ # save the program name and the entire command line for later logging if sys.argv: binary = os.path.basename(sys.argv[0]) if not binary: binary = sys.argv[0] if len(sys.argv) >= 2: logname = utils.ShellQuoteArgs([binary, sys.argv[1]]) else: logname = binary cmdline = utils.ShellQuoteArgs([binary] + sys.argv[1:]) else: binary = "" cmdline = "" if aliases is None: aliases = {} try: (func, options, args) = _ParseArgs(binary, sys.argv, commands, aliases, env_override) except _ShowVersion: ToStdout("%s (ganeti %s) %s", binary, constants.VCS_VERSION, constants.RELEASE_VERSION) return constants.EXIT_SUCCESS except _ShowUsage, err: for line in _FormatUsage(binary, commands): ToStdout(line) if err.exit_error: return constants.EXIT_FAILURE else: return constants.EXIT_SUCCESS except errors.ParameterError, err: result, err_msg = FormatError(err) ToStderr(err_msg) return 1 if func is None: # parse error return 1 if override is not None: for key, val in override.iteritems(): setattr(options, key, val) utils.SetupLogging(pathutils.LOG_COMMANDS, logname, debug=options.debug, stderr_logging=True) logging.info("Command line: %s", cmdline) try: result = func(options, args) except (errors.GenericError, luxi.ProtocolError, JobSubmittedException), err: result, err_msg = FormatError(err) logging.exception("Error during command processing") ToStderr(err_msg) except KeyboardInterrupt: result = constants.EXIT_FAILURE ToStderr("Aborted. Note that if the operation created any jobs, they" " might have been submitted and" " will continue to run in the background.") except IOError, err: if err.errno == errno.EPIPE: # our terminal went away, we'll exit sys.exit(constants.EXIT_FAILURE) else: raise return result def ParseNicOption(optvalue): """Parses the value of the --net option(s). """ try: nic_max = max(int(nidx[0]) + 1 for nidx in optvalue) except (TypeError, ValueError), err: raise errors.OpPrereqError("Invalid NIC index passed: %s" % str(err), errors.ECODE_INVAL) nics = [{}] * nic_max for nidx, ndict in optvalue: nidx = int(nidx) if not isinstance(ndict, dict): raise errors.OpPrereqError("Invalid nic/%d value: expected dict," " got %s" % (nidx, ndict), errors.ECODE_INVAL) utils.ForceDictType(ndict, constants.INIC_PARAMS_TYPES) nics[nidx] = ndict return nics def FixHvParams(hvparams): # In Ganeti 2.8.4 the separator for the usb_devices hvparam was changed from # comma to space because commas cannot be accepted on the command line # (they already act as the separator between different hvparams). Still, # RAPI should be able to accept commas for backwards compatibility. # Therefore, we convert spaces into commas here, and we keep the old # parsing logic everywhere else. try: new_usb_devices = hvparams[constants.HV_USB_DEVICES].replace(" ", ",") hvparams[constants.HV_USB_DEVICES] = new_usb_devices except KeyError: #No usb_devices, no modification required pass def GenericInstanceCreate(mode, opts, args): """Add an instance to the cluster via either creation or import. @param mode: constants.INSTANCE_CREATE or constants.INSTANCE_IMPORT @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the new instance name @rtype: int @return: the desired exit code """ instance = args[0] (pnode, snode) = SplitNodeOption(opts.node) hypervisor = None hvparams = {} if opts.hypervisor: hypervisor, hvparams = opts.hypervisor if opts.nics: nics = ParseNicOption(opts.nics) elif opts.no_nics: # no nics nics = [] elif mode == constants.INSTANCE_CREATE: # default of one nic, all auto nics = [{}] else: # mode == import nics = [] if opts.disk_template == constants.DT_DISKLESS: if opts.disks or opts.sd_size is not None: raise errors.OpPrereqError("Diskless instance but disk" " information passed", errors.ECODE_INVAL) disks = [] else: if (not opts.disks and not opts.sd_size and mode == constants.INSTANCE_CREATE): raise errors.OpPrereqError("No disk information specified", errors.ECODE_INVAL) if opts.disks and opts.sd_size is not None: raise errors.OpPrereqError("Please use either the '--disk' or" " '-s' option", errors.ECODE_INVAL) if opts.sd_size is not None: opts.disks = [(0, {constants.IDISK_SIZE: opts.sd_size})] if opts.disks: try: disk_max = max(int(didx[0]) + 1 for didx in opts.disks) except ValueError, err: raise errors.OpPrereqError("Invalid disk index passed: %s" % str(err), errors.ECODE_INVAL) disks = [{}] * disk_max else: disks = [] for didx, ddict in opts.disks: didx = int(didx) if not isinstance(ddict, dict): msg = "Invalid disk/%d value: expected dict, got %s" % (didx, ddict) raise errors.OpPrereqError(msg, errors.ECODE_INVAL) elif constants.IDISK_SIZE in ddict: if constants.IDISK_ADOPT in ddict: raise errors.OpPrereqError("Only one of 'size' and 'adopt' allowed" " (disk %d)" % didx, errors.ECODE_INVAL) try: ddict[constants.IDISK_SIZE] = \ utils.ParseUnit(ddict[constants.IDISK_SIZE]) except ValueError, err: raise errors.OpPrereqError("Invalid disk size for disk %d: %s" % (didx, err), errors.ECODE_INVAL) elif constants.IDISK_ADOPT in ddict: if constants.IDISK_SPINDLES in ddict: raise errors.OpPrereqError("spindles is not a valid option when" " adopting a disk", errors.ECODE_INVAL) if mode == constants.INSTANCE_IMPORT: raise errors.OpPrereqError("Disk adoption not allowed for instance" " import", errors.ECODE_INVAL) ddict[constants.IDISK_SIZE] = 0 else: raise errors.OpPrereqError("Missing size or adoption source for" " disk %d" % didx, errors.ECODE_INVAL) disks[didx] = ddict if opts.tags is not None: tags = opts.tags.split(",") else: tags = [] utils.ForceDictType(opts.beparams, constants.BES_PARAMETER_COMPAT) utils.ForceDictType(hvparams, constants.HVS_PARAMETER_TYPES) FixHvParams(hvparams) if mode == constants.INSTANCE_CREATE: start = opts.start os_type = opts.os force_variant = opts.force_variant src_node = None src_path = None no_install = opts.no_install identify_defaults = False elif mode == constants.INSTANCE_IMPORT: start = False os_type = None force_variant = False src_node = opts.src_node src_path = opts.src_dir no_install = None identify_defaults = opts.identify_defaults else: raise errors.ProgrammerError("Invalid creation mode %s" % mode) op = opcodes.OpInstanceCreate(instance_name=instance, disks=disks, disk_template=opts.disk_template, nics=nics, conflicts_check=opts.conflicts_check, pnode=pnode, snode=snode, ip_check=opts.ip_check, name_check=opts.name_check, wait_for_sync=opts.wait_for_sync, file_storage_dir=opts.file_storage_dir, file_driver=opts.file_driver, iallocator=opts.iallocator, hypervisor=hypervisor, hvparams=hvparams, beparams=opts.beparams, osparams=opts.osparams, mode=mode, start=start, os_type=os_type, force_variant=force_variant, src_node=src_node, src_path=src_path, tags=tags, no_install=no_install, identify_defaults=identify_defaults, ignore_ipolicy=opts.ignore_ipolicy) SubmitOrSend(op, opts) return 0 class _RunWhileClusterStoppedHelper: """Helper class for L{RunWhileClusterStopped} to simplify state management """ def __init__(self, feedback_fn, cluster_name, master_node, online_nodes): """Initializes this class. @type feedback_fn: callable @param feedback_fn: Feedback function @type cluster_name: string @param cluster_name: Cluster name @type master_node: string @param master_node Master node name @type online_nodes: list @param online_nodes: List of names of online nodes """ self.feedback_fn = feedback_fn self.cluster_name = cluster_name self.master_node = master_node self.online_nodes = online_nodes self.ssh = ssh.SshRunner(self.cluster_name) self.nonmaster_nodes = [name for name in online_nodes if name != master_node] assert self.master_node not in self.nonmaster_nodes def _RunCmd(self, node_name, cmd): """Runs a command on the local or a remote machine. @type node_name: string @param node_name: Machine name @type cmd: list @param cmd: Command """ if node_name is None or node_name == self.master_node: # No need to use SSH result = utils.RunCmd(cmd) else: result = self.ssh.Run(node_name, constants.SSH_LOGIN_USER, utils.ShellQuoteArgs(cmd)) if result.failed: errmsg = ["Failed to run command %s" % result.cmd] if node_name: errmsg.append("on node %s" % node_name) errmsg.append(": exitcode %s and error %s" % (result.exit_code, result.output)) raise errors.OpExecError(" ".join(errmsg)) def Call(self, fn, *args): """Call function while all daemons are stopped. @type fn: callable @param fn: Function to be called """ # Pause watcher by acquiring an exclusive lock on watcher state file self.feedback_fn("Blocking watcher") watcher_block = utils.FileLock.Open(pathutils.WATCHER_LOCK_FILE) try: # TODO: Currently, this just blocks. There's no timeout. # TODO: Should it be a shared lock? watcher_block.Exclusive(blocking=True) # Stop master daemons, so that no new jobs can come in and all running # ones are finished self.feedback_fn("Stopping master daemons") self._RunCmd(None, [pathutils.DAEMON_UTIL, "stop-master"]) try: # Stop daemons on all nodes for node_name in self.online_nodes: self.feedback_fn("Stopping daemons on %s" % node_name) self._RunCmd(node_name, [pathutils.DAEMON_UTIL, "stop-all"]) # All daemons are shut down now try: return fn(self, *args) except Exception, err: _, errmsg = FormatError(err) logging.exception("Caught exception") self.feedback_fn(errmsg) raise finally: # Start cluster again, master node last for node_name in self.nonmaster_nodes + [self.master_node]: self.feedback_fn("Starting daemons on %s" % node_name) self._RunCmd(node_name, [pathutils.DAEMON_UTIL, "start-all"]) finally: # Resume watcher watcher_block.Close() def RunWhileClusterStopped(feedback_fn, fn, *args): """Calls a function while all cluster daemons are stopped. @type feedback_fn: callable @param feedback_fn: Feedback function @type fn: callable @param fn: Function to be called when daemons are stopped """ feedback_fn("Gathering cluster information") # This ensures we're running on the master daemon cl = GetClient() (cluster_name, master_node) = \ cl.QueryConfigValues(["cluster_name", "master_node"]) online_nodes = GetOnlineNodes([], cl=cl) # Don't keep a reference to the client. The master daemon will go away. del cl assert master_node in online_nodes return _RunWhileClusterStoppedHelper(feedback_fn, cluster_name, master_node, online_nodes).Call(fn, *args) def GenerateTable(headers, fields, separator, data, numfields=None, unitfields=None, units=None): """Prints a table with headers and different fields. @type headers: dict @param headers: dictionary mapping field names to headers for the table @type fields: list @param fields: the field names corresponding to each row in the data field @param separator: the separator to be used; if this is None, the default 'smart' algorithm is used which computes optimal field width, otherwise just the separator is used between each field @type data: list @param data: a list of lists, each sublist being one row to be output @type numfields: list @param numfields: a list with the fields that hold numeric values and thus should be right-aligned @type unitfields: list @param unitfields: a list with the fields that hold numeric values that should be formatted with the units field @type units: string or None @param units: the units we should use for formatting, or None for automatic choice (human-readable for non-separator usage, otherwise megabytes); this is a one-letter string """ if units is None: if separator: units = "m" else: units = "h" if numfields is None: numfields = [] if unitfields is None: unitfields = [] numfields = utils.FieldSet(*numfields) # pylint: disable=W0142 unitfields = utils.FieldSet(*unitfields) # pylint: disable=W0142 format_fields = [] for field in fields: if headers and field not in headers: # TODO: handle better unknown fields (either revert to old # style of raising exception, or deal more intelligently with # variable fields) headers[field] = field if separator is not None: format_fields.append("%s") elif numfields.Matches(field): format_fields.append("%*s") else: format_fields.append("%-*s") if separator is None: mlens = [0 for name in fields] format_str = " ".join(format_fields) else: format_str = separator.replace("%", "%%").join(format_fields) for row in data: if row is None: continue for idx, val in enumerate(row): if unitfields.Matches(fields[idx]): try: val = int(val) except (TypeError, ValueError): pass else: val = row[idx] = utils.FormatUnit(val, units) val = row[idx] = str(val) if separator is None: mlens[idx] = max(mlens[idx], len(val)) result = [] if headers: args = [] for idx, name in enumerate(fields): hdr = headers[name] if separator is None: mlens[idx] = max(mlens[idx], len(hdr)) args.append(mlens[idx]) args.append(hdr) result.append(format_str % tuple(args)) if separator is None: assert len(mlens) == len(fields) if fields and not numfields.Matches(fields[-1]): mlens[-1] = 0 for line in data: args = [] if line is None: line = ["-" for _ in fields] for idx in range(len(fields)): if separator is None: args.append(mlens[idx]) args.append(line[idx]) result.append(format_str % tuple(args)) return result def _FormatBool(value): """Formats a boolean value as a string. """ if value: return "Y" return "N" #: Default formatting for query results; (callback, align right) _DEFAULT_FORMAT_QUERY = { constants.QFT_TEXT: (str, False), constants.QFT_BOOL: (_FormatBool, False), constants.QFT_NUMBER: (str, True), constants.QFT_TIMESTAMP: (utils.FormatTime, False), constants.QFT_OTHER: (str, False), constants.QFT_UNKNOWN: (str, False), } def _GetColumnFormatter(fdef, override, unit): """Returns formatting function for a field. @type fdef: L{objects.QueryFieldDefinition} @type override: dict @param override: Dictionary for overriding field formatting functions, indexed by field name, contents like L{_DEFAULT_FORMAT_QUERY} @type unit: string @param unit: Unit used for formatting fields of type L{constants.QFT_UNIT} @rtype: tuple; (callable, bool) @return: Returns the function to format a value (takes one parameter) and a boolean for aligning the value on the right-hand side """ fmt = override.get(fdef.name, None) if fmt is not None: return fmt assert constants.QFT_UNIT not in _DEFAULT_FORMAT_QUERY if fdef.kind == constants.QFT_UNIT: # Can't keep this information in the static dictionary return (lambda value: utils.FormatUnit(value, unit), True) fmt = _DEFAULT_FORMAT_QUERY.get(fdef.kind, None) if fmt is not None: return fmt raise NotImplementedError("Can't format column type '%s'" % fdef.kind) class _QueryColumnFormatter: """Callable class for formatting fields of a query. """ def __init__(self, fn, status_fn, verbose): """Initializes this class. @type fn: callable @param fn: Formatting function @type status_fn: callable @param status_fn: Function to report fields' status @type verbose: boolean @param verbose: whether to use verbose field descriptions or not """ self._fn = fn self._status_fn = status_fn self._verbose = verbose def __call__(self, data): """Returns a field's string representation. """ (status, value) = data # Report status self._status_fn(status) if status == constants.RS_NORMAL: return self._fn(value) assert value is None, \ "Found value %r for abnormal status %s" % (value, status) return FormatResultError(status, self._verbose) def FormatResultError(status, verbose): """Formats result status other than L{constants.RS_NORMAL}. @param status: The result status @type verbose: boolean @param verbose: Whether to return the verbose text @return: Text of result status """ assert status != constants.RS_NORMAL, \ "FormatResultError called with status equal to constants.RS_NORMAL" try: (verbose_text, normal_text) = constants.RSS_DESCRIPTION[status] except KeyError: raise NotImplementedError("Unknown status %s" % status) else: if verbose: return verbose_text return normal_text def FormatQueryResult(result, unit=None, format_override=None, separator=None, header=False, verbose=False): """Formats data in L{objects.QueryResponse}. @type result: L{objects.QueryResponse} @param result: result of query operation @type unit: string @param unit: Unit used for formatting fields of type L{constants.QFT_UNIT}, see L{utils.text.FormatUnit} @type format_override: dict @param format_override: Dictionary for overriding field formatting functions, indexed by field name, contents like L{_DEFAULT_FORMAT_QUERY} @type separator: string or None @param separator: String used to separate fields @type header: bool @param header: Whether to output header row @type verbose: boolean @param verbose: whether to use verbose field descriptions or not """ if unit is None: if separator: unit = "m" else: unit = "h" if format_override is None: format_override = {} stats = dict.fromkeys(constants.RS_ALL, 0) def _RecordStatus(status): if status in stats: stats[status] += 1 columns = [] for fdef in result.fields: assert fdef.title and fdef.name (fn, align_right) = _GetColumnFormatter(fdef, format_override, unit) columns.append(TableColumn(fdef.title, _QueryColumnFormatter(fn, _RecordStatus, verbose), align_right)) table = FormatTable(result.data, columns, header, separator) # Collect statistics assert len(stats) == len(constants.RS_ALL) assert compat.all(count >= 0 for count in stats.values()) # Determine overall status. If there was no data, unknown fields must be # detected via the field definitions. if (stats[constants.RS_UNKNOWN] or (not result.data and _GetUnknownFields(result.fields))): status = QR_UNKNOWN elif compat.any(count > 0 for key, count in stats.items() if key != constants.RS_NORMAL): status = QR_INCOMPLETE else: status = QR_NORMAL return (status, table) def _GetUnknownFields(fdefs): """Returns list of unknown fields included in C{fdefs}. @type fdefs: list of L{objects.QueryFieldDefinition} """ return [fdef for fdef in fdefs if fdef.kind == constants.QFT_UNKNOWN] def _WarnUnknownFields(fdefs): """Prints a warning to stderr if a query included unknown fields. @type fdefs: list of L{objects.QueryFieldDefinition} """ unknown = _GetUnknownFields(fdefs) if unknown: ToStderr("Warning: Queried for unknown fields %s", utils.CommaJoin(fdef.name for fdef in unknown)) return True return False def GenericList(resource, fields, names, unit, separator, header, cl=None, format_override=None, verbose=False, force_filter=False, namefield=None, qfilter=None, isnumeric=False): """Generic implementation for listing all items of a resource. @param resource: One of L{constants.QR_VIA_LUXI} @type fields: list of strings @param fields: List of fields to query for @type names: list of strings @param names: Names of items to query for @type unit: string or None @param unit: Unit used for formatting fields of type L{constants.QFT_UNIT} or None for automatic choice (human-readable for non-separator usage, otherwise megabytes); this is a one-letter string @type separator: string or None @param separator: String used to separate fields @type header: bool @param header: Whether to show header row @type force_filter: bool @param force_filter: Whether to always treat names as filter @type format_override: dict @param format_override: Dictionary for overriding field formatting functions, indexed by field name, contents like L{_DEFAULT_FORMAT_QUERY} @type verbose: boolean @param verbose: whether to use verbose field descriptions or not @type namefield: string @param namefield: Name of field to use for simple filters (see L{qlang.MakeFilter} for details) @type qfilter: list or None @param qfilter: Query filter (in addition to names) @param isnumeric: bool @param isnumeric: Whether the namefield's type is numeric, and therefore any simple filters built by namefield should use integer values to reflect that """ if not names: names = None namefilter = qlang.MakeFilter(names, force_filter, namefield=namefield, isnumeric=isnumeric) if qfilter is None: qfilter = namefilter elif namefilter is not None: qfilter = [qlang.OP_AND, namefilter, qfilter] if cl is None: cl = GetClient() response = cl.Query(resource, fields, qfilter) found_unknown = _WarnUnknownFields(response.fields) (status, data) = FormatQueryResult(response, unit=unit, separator=separator, header=header, format_override=format_override, verbose=verbose) for line in data: ToStdout(line) assert ((found_unknown and status == QR_UNKNOWN) or (not found_unknown and status != QR_UNKNOWN)) if status == QR_UNKNOWN: return constants.EXIT_UNKNOWN_FIELD # TODO: Should the list command fail if not all data could be collected? return constants.EXIT_SUCCESS def _FieldDescValues(fdef): """Helper function for L{GenericListFields} to get query field description. @type fdef: L{objects.QueryFieldDefinition} @rtype: list """ return [ fdef.name, _QFT_NAMES.get(fdef.kind, fdef.kind), fdef.title, fdef.doc, ] def GenericListFields(resource, fields, separator, header, cl=None): """Generic implementation for listing fields for a resource. @param resource: One of L{constants.QR_VIA_LUXI} @type fields: list of strings @param fields: List of fields to query for @type separator: string or None @param separator: String used to separate fields @type header: bool @param header: Whether to show header row """ if cl is None: cl = GetClient() if not fields: fields = None response = cl.QueryFields(resource, fields) found_unknown = _WarnUnknownFields(response.fields) columns = [ TableColumn("Name", str, False), TableColumn("Type", str, False), TableColumn("Title", str, False), TableColumn("Description", str, False), ] rows = map(_FieldDescValues, response.fields) for line in FormatTable(rows, columns, header, separator): ToStdout(line) if found_unknown: return constants.EXIT_UNKNOWN_FIELD return constants.EXIT_SUCCESS class TableColumn: """Describes a column for L{FormatTable}. """ def __init__(self, title, fn, align_right): """Initializes this class. @type title: string @param title: Column title @type fn: callable @param fn: Formatting function @type align_right: bool @param align_right: Whether to align values on the right-hand side """ self.title = title self.format = fn self.align_right = align_right def _GetColFormatString(width, align_right): """Returns the format string for a field. """ if align_right: sign = "" else: sign = "-" return "%%%s%ss" % (sign, width) def FormatTable(rows, columns, header, separator): """Formats data as a table. @type rows: list of lists @param rows: Row data, one list per row @type columns: list of L{TableColumn} @param columns: Column descriptions @type header: bool @param header: Whether to show header row @type separator: string or None @param separator: String used to separate columns """ if header: data = [[col.title for col in columns]] colwidth = [len(col.title) for col in columns] else: data = [] colwidth = [0 for _ in columns] # Format row data for row in rows: assert len(row) == len(columns) formatted = [col.format(value) for value, col in zip(row, columns)] if separator is None: # Update column widths for idx, (oldwidth, value) in enumerate(zip(colwidth, formatted)): # Modifying a list's items while iterating is fine colwidth[idx] = max(oldwidth, len(value)) data.append(formatted) if separator is not None: # Return early if a separator is used return [separator.join(row) for row in data] if columns and not columns[-1].align_right: # Avoid unnecessary spaces at end of line colwidth[-1] = 0 # Build format string fmt = " ".join([_GetColFormatString(width, col.align_right) for col, width in zip(columns, colwidth)]) return [fmt % tuple(row) for row in data] def FormatTimestamp(ts): """Formats a given timestamp. @type ts: timestamp @param ts: a timeval-type timestamp, a tuple of seconds and microseconds @rtype: string @return: a string with the formatted timestamp """ if not isinstance(ts, (tuple, list)) or len(ts) != 2: return "?" (sec, usecs) = ts return utils.FormatTime(sec, usecs=usecs) def ParseTimespec(value): """Parse a time specification. The following suffixed will be recognized: - s: seconds - m: minutes - h: hours - d: day - w: weeks Without any suffix, the value will be taken to be in seconds. """ value = str(value) if not value: raise errors.OpPrereqError("Empty time specification passed", errors.ECODE_INVAL) suffix_map = { "s": 1, "m": 60, "h": 3600, "d": 86400, "w": 604800, } if value[-1] not in suffix_map: try: value = int(value) except (TypeError, ValueError): raise errors.OpPrereqError("Invalid time specification '%s'" % value, errors.ECODE_INVAL) else: multiplier = suffix_map[value[-1]] value = value[:-1] if not value: # no data left after stripping the suffix raise errors.OpPrereqError("Invalid time specification (only" " suffix passed)", errors.ECODE_INVAL) try: value = int(value) * multiplier except (TypeError, ValueError): raise errors.OpPrereqError("Invalid time specification '%s'" % value, errors.ECODE_INVAL) return value def GetOnlineNodes(nodes, cl=None, nowarn=False, secondary_ips=False, filter_master=False, nodegroup=None): """Returns the names of online nodes. This function will also log a warning on stderr with the names of the online nodes. @param nodes: if not empty, use only this subset of nodes (minus the offline ones) @param cl: if not None, luxi client to use @type nowarn: boolean @param nowarn: by default, this function will output a note with the offline nodes that are skipped; if this parameter is True the note is not displayed @type secondary_ips: boolean @param secondary_ips: if True, return the secondary IPs instead of the names, useful for doing network traffic over the replication interface (if any) @type filter_master: boolean @param filter_master: if True, do not return the master node in the list (useful in coordination with secondary_ips where we cannot check our node name against the list) @type nodegroup: string @param nodegroup: If set, only return nodes in this node group """ if cl is None: cl = GetClient() qfilter = [] if nodes: qfilter.append(qlang.MakeSimpleFilter("name", nodes)) if nodegroup is not None: qfilter.append([qlang.OP_OR, [qlang.OP_EQUAL, "group", nodegroup], [qlang.OP_EQUAL, "group.uuid", nodegroup]]) if filter_master: qfilter.append([qlang.OP_NOT, [qlang.OP_TRUE, "master"]]) if qfilter: if len(qfilter) > 1: final_filter = [qlang.OP_AND] + qfilter else: assert len(qfilter) == 1 final_filter = qfilter[0] else: final_filter = None result = cl.Query(constants.QR_NODE, ["name", "offline", "sip"], final_filter) def _IsOffline(row): (_, (_, offline), _) = row return offline def _GetName(row): ((_, name), _, _) = row return name def _GetSip(row): (_, _, (_, sip)) = row return sip (offline, online) = compat.partition(result.data, _IsOffline) if offline and not nowarn: ToStderr("Note: skipping offline node(s): %s" % utils.CommaJoin(map(_GetName, offline))) if secondary_ips: fn = _GetSip else: fn = _GetName return map(fn, online) def _ToStream(stream, txt, *args): """Write a message to a stream, bypassing the logging system @type stream: file object @param stream: the file to which we should write @type txt: str @param txt: the message """ try: if args: args = tuple(args) stream.write(txt % args) else: stream.write(txt) stream.write("\n") stream.flush() except IOError, err: if err.errno == errno.EPIPE: # our terminal went away, we'll exit sys.exit(constants.EXIT_FAILURE) else: raise def ToStdout(txt, *args): """Write a message to stdout only, bypassing the logging system This is just a wrapper over _ToStream. @type txt: str @param txt: the message """ _ToStream(sys.stdout, txt, *args) def ToStderr(txt, *args): """Write a message to stderr only, bypassing the logging system This is just a wrapper over _ToStream. @type txt: str @param txt: the message """ _ToStream(sys.stderr, txt, *args) class JobExecutor(object): """Class which manages the submission and execution of multiple jobs. Note that instances of this class should not be reused between GetResults() calls. """ def __init__(self, cl=None, verbose=True, opts=None, feedback_fn=None): self.queue = [] if cl is None: cl = GetClient() self.cl = cl self.verbose = verbose self.jobs = [] self.opts = opts self.feedback_fn = feedback_fn self._counter = itertools.count() @staticmethod def _IfName(name, fmt): """Helper function for formatting name. """ if name: return fmt % name return "" def QueueJob(self, name, *ops): """Record a job for later submit. @type name: string @param name: a description of the job, will be used in WaitJobSet """ SetGenericOpcodeOpts(ops, self.opts) self.queue.append((self._counter.next(), name, ops)) def AddJobId(self, name, status, job_id): """Adds a job ID to the internal queue. """ self.jobs.append((self._counter.next(), status, job_id, name)) def SubmitPending(self, each=False): """Submit all pending jobs. """ if each: results = [] for (_, _, ops) in self.queue: # SubmitJob will remove the success status, but raise an exception if # the submission fails, so we'll notice that anyway. results.append([True, self.cl.SubmitJob(ops)[0]]) else: results = self.cl.SubmitManyJobs([ops for (_, _, ops) in self.queue]) for ((status, data), (idx, name, _)) in zip(results, self.queue): self.jobs.append((idx, status, data, name)) def _ChooseJob(self): """Choose a non-waiting/queued job to poll next. """ assert self.jobs, "_ChooseJob called with empty job list" result = self.cl.QueryJobs([i[2] for i in self.jobs[:_CHOOSE_BATCH]], ["status"]) assert result for job_data, status in zip(self.jobs, result): if (isinstance(status, list) and status and status[0] in (constants.JOB_STATUS_QUEUED, constants.JOB_STATUS_WAITING, constants.JOB_STATUS_CANCELING)): # job is still present and waiting continue # good candidate found (either running job or lost job) self.jobs.remove(job_data) return job_data # no job found return self.jobs.pop(0) def GetResults(self): """Wait for and return the results of all jobs. @rtype: list @return: list of tuples (success, job results), in the same order as the submitted jobs; if a job has failed, instead of the result there will be the error message """ if not self.jobs: self.SubmitPending() results = [] if self.verbose: ok_jobs = [row[2] for row in self.jobs if row[1]] if ok_jobs: ToStdout("Submitted jobs %s", utils.CommaJoin(ok_jobs)) # first, remove any non-submitted jobs self.jobs, failures = compat.partition(self.jobs, lambda x: x[1]) for idx, _, jid, name in failures: ToStderr("Failed to submit job%s: %s", self._IfName(name, " for %s"), jid) results.append((idx, False, jid)) while self.jobs: (idx, _, jid, name) = self._ChooseJob() ToStdout("Waiting for job %s%s ...", jid, self._IfName(name, " for %s")) try: job_result = PollJob(jid, cl=self.cl, feedback_fn=self.feedback_fn) success = True except errors.JobLost, err: _, job_result = FormatError(err) ToStderr("Job %s%s has been archived, cannot check its result", jid, self._IfName(name, " for %s")) success = False except (errors.GenericError, luxi.ProtocolError), err: _, job_result = FormatError(err) success = False # the error message will always be shown, verbose or not ToStderr("Job %s%s has failed: %s", jid, self._IfName(name, " for %s"), job_result) results.append((idx, success, job_result)) # sort based on the index, then drop it results.sort() results = [i[1:] for i in results] return results def WaitOrShow(self, wait): """Wait for job results or only print the job IDs. @type wait: boolean @param wait: whether to wait or not """ if wait: return self.GetResults() else: if not self.jobs: self.SubmitPending() for _, status, result, name in self.jobs: if status: ToStdout("%s: %s", result, name) else: ToStderr("Failure for %s: %s", name, result) return [row[1:3] for row in self.jobs] def FormatParamsDictInfo(param_dict, actual): """Formats a parameter dictionary. @type param_dict: dict @param param_dict: the own parameters @type actual: dict @param actual: the current parameter set (including defaults) @rtype: dict @return: dictionary where the value of each parameter is either a fully formatted string or a dictionary containing formatted strings """ ret = {} for (key, data) in actual.items(): if isinstance(data, dict) and data: ret[key] = FormatParamsDictInfo(param_dict.get(key, {}), data) else: ret[key] = str(param_dict.get(key, "default (%s)" % data)) return ret def _FormatListInfoDefault(data, def_data): if data is not None: ret = utils.CommaJoin(data) else: ret = "default (%s)" % utils.CommaJoin(def_data) return ret def FormatPolicyInfo(custom_ipolicy, eff_ipolicy, iscluster): """Formats an instance policy. @type custom_ipolicy: dict @param custom_ipolicy: own policy @type eff_ipolicy: dict @param eff_ipolicy: effective policy (including defaults); ignored for cluster @type iscluster: bool @param iscluster: the policy is at cluster level @rtype: list of pairs @return: formatted data, suitable for L{PrintGenericInfo} """ if iscluster: eff_ipolicy = custom_ipolicy minmax_out = [] custom_minmax = custom_ipolicy.get(constants.ISPECS_MINMAX) if custom_minmax: for (k, minmax) in enumerate(custom_minmax): minmax_out.append([ ("%s/%s" % (key, k), FormatParamsDictInfo(minmax[key], minmax[key])) for key in constants.ISPECS_MINMAX_KEYS ]) else: for (k, minmax) in enumerate(eff_ipolicy[constants.ISPECS_MINMAX]): minmax_out.append([ ("%s/%s" % (key, k), FormatParamsDictInfo({}, minmax[key])) for key in constants.ISPECS_MINMAX_KEYS ]) ret = [("bounds specs", minmax_out)] if iscluster: stdspecs = custom_ipolicy[constants.ISPECS_STD] ret.append( (constants.ISPECS_STD, FormatParamsDictInfo(stdspecs, stdspecs)) ) ret.append( ("allowed disk templates", _FormatListInfoDefault(custom_ipolicy.get(constants.IPOLICY_DTS), eff_ipolicy[constants.IPOLICY_DTS])) ) ret.extend([ (key, str(custom_ipolicy.get(key, "default (%s)" % eff_ipolicy[key]))) for key in constants.IPOLICY_PARAMETERS ]) return ret def _PrintSpecsParameters(buf, specs): values = ("%s=%s" % (par, val) for (par, val) in sorted(specs.items())) buf.write(",".join(values)) def PrintIPolicyCommand(buf, ipolicy, isgroup): """Print the command option used to generate the given instance policy. Currently only the parts dealing with specs are supported. @type buf: StringIO @param buf: stream to write into @type ipolicy: dict @param ipolicy: instance policy @type isgroup: bool @param isgroup: whether the policy is at group level """ if not isgroup: stdspecs = ipolicy.get("std") if stdspecs: buf.write(" %s " % IPOLICY_STD_SPECS_STR) _PrintSpecsParameters(buf, stdspecs) minmaxes = ipolicy.get("minmax", []) first = True for minmax in minmaxes: minspecs = minmax.get("min") maxspecs = minmax.get("max") if minspecs and maxspecs: if first: buf.write(" %s " % IPOLICY_BOUNDS_SPECS_STR) first = False else: buf.write("//") buf.write("min:") _PrintSpecsParameters(buf, minspecs) buf.write("/max:") _PrintSpecsParameters(buf, maxspecs) def ConfirmOperation(names, list_type, text, extra=""): """Ask the user to confirm an operation on a list of list_type. This function is used to request confirmation for doing an operation on a given list of list_type. @type names: list @param names: the list of names that we display when we ask for confirmation @type list_type: str @param list_type: Human readable name for elements in the list (e.g. nodes) @type text: str @param text: the operation that the user should confirm @rtype: boolean @return: True or False depending on user's confirmation. """ count = len(names) msg = ("The %s will operate on %d %s.\n%s" "Do you want to continue?" % (text, count, list_type, extra)) affected = (("\nAffected %s:\n" % list_type) + "\n".join([" %s" % name for name in names])) choices = [("y", True, "Yes, execute the %s" % text), ("n", False, "No, abort the %s" % text)] if count > 20: choices.insert(1, ("v", "v", "View the list of affected %s" % list_type)) question = msg else: question = msg + affected choice = AskUser(question, choices) if choice == "v": choices.pop(1) choice = AskUser(msg + affected, choices) return choice def _MaybeParseUnit(elements): """Parses and returns an array of potential values with units. """ parsed = {} for k, v in elements.items(): if v == constants.VALUE_DEFAULT: parsed[k] = v else: parsed[k] = utils.ParseUnit(v) return parsed def _InitISpecsFromSplitOpts(ipolicy, ispecs_mem_size, ispecs_cpu_count, ispecs_disk_count, ispecs_disk_size, ispecs_nic_count, group_ipolicy, fill_all): try: if ispecs_mem_size: ispecs_mem_size = _MaybeParseUnit(ispecs_mem_size) if ispecs_disk_size: ispecs_disk_size = _MaybeParseUnit(ispecs_disk_size) except (TypeError, ValueError, errors.UnitParseError), err: raise errors.OpPrereqError("Invalid disk (%s) or memory (%s) size" " in policy: %s" % (ispecs_disk_size, ispecs_mem_size, err), errors.ECODE_INVAL) # prepare ipolicy dict ispecs_transposed = { constants.ISPEC_MEM_SIZE: ispecs_mem_size, constants.ISPEC_CPU_COUNT: ispecs_cpu_count, constants.ISPEC_DISK_COUNT: ispecs_disk_count, constants.ISPEC_DISK_SIZE: ispecs_disk_size, constants.ISPEC_NIC_COUNT: ispecs_nic_count, } # first, check that the values given are correct if group_ipolicy: forced_type = TISPECS_GROUP_TYPES else: forced_type = TISPECS_CLUSTER_TYPES for specs in ispecs_transposed.values(): assert type(specs) is dict utils.ForceDictType(specs, forced_type) # then transpose ispecs = { constants.ISPECS_MIN: {}, constants.ISPECS_MAX: {}, constants.ISPECS_STD: {}, } for (name, specs) in ispecs_transposed.iteritems(): assert name in constants.ISPECS_PARAMETERS for key, val in specs.items(): # {min: .. ,max: .., std: ..} assert key in ispecs ispecs[key][name] = val minmax_out = {} for key in constants.ISPECS_MINMAX_KEYS: if fill_all: minmax_out[key] = \ objects.FillDict(constants.ISPECS_MINMAX_DEFAULTS[key], ispecs[key]) else: minmax_out[key] = ispecs[key] ipolicy[constants.ISPECS_MINMAX] = [minmax_out] if fill_all: ipolicy[constants.ISPECS_STD] = \ objects.FillDict(constants.IPOLICY_DEFAULTS[constants.ISPECS_STD], ispecs[constants.ISPECS_STD]) else: ipolicy[constants.ISPECS_STD] = ispecs[constants.ISPECS_STD] def _ParseSpecUnit(spec, keyname): ret = spec.copy() for k in [constants.ISPEC_DISK_SIZE, constants.ISPEC_MEM_SIZE]: if k in ret: try: ret[k] = utils.ParseUnit(ret[k]) except (TypeError, ValueError, errors.UnitParseError), err: raise errors.OpPrereqError(("Invalid parameter %s (%s) in %s instance" " specs: %s" % (k, ret[k], keyname, err)), errors.ECODE_INVAL) return ret def _ParseISpec(spec, keyname, required): ret = _ParseSpecUnit(spec, keyname) utils.ForceDictType(ret, constants.ISPECS_PARAMETER_TYPES) missing = constants.ISPECS_PARAMETERS - frozenset(ret.keys()) if required and missing: raise errors.OpPrereqError("Missing parameters in ipolicy spec %s: %s" % (keyname, utils.CommaJoin(missing)), errors.ECODE_INVAL) return ret def _GetISpecsInAllowedValues(minmax_ispecs, allowed_values): ret = None if (minmax_ispecs and allowed_values and len(minmax_ispecs) == 1 and len(minmax_ispecs[0]) == 1): for (key, spec) in minmax_ispecs[0].items(): # This loop is executed exactly once if key in allowed_values and not spec: ret = key return ret def _InitISpecsFromFullOpts(ipolicy_out, minmax_ispecs, std_ispecs, group_ipolicy, allowed_values): found_allowed = _GetISpecsInAllowedValues(minmax_ispecs, allowed_values) if found_allowed is not None: ipolicy_out[constants.ISPECS_MINMAX] = found_allowed elif minmax_ispecs is not None: minmax_out = [] for mmpair in minmax_ispecs: mmpair_out = {} for (key, spec) in mmpair.items(): if key not in constants.ISPECS_MINMAX_KEYS: msg = "Invalid key in bounds instance specifications: %s" % key raise errors.OpPrereqError(msg, errors.ECODE_INVAL) mmpair_out[key] = _ParseISpec(spec, key, True) minmax_out.append(mmpair_out) ipolicy_out[constants.ISPECS_MINMAX] = minmax_out if std_ispecs is not None: assert not group_ipolicy # This is not an option for gnt-group ipolicy_out[constants.ISPECS_STD] = _ParseISpec(std_ispecs, "std", False) def CreateIPolicyFromOpts(ispecs_mem_size=None, ispecs_cpu_count=None, ispecs_disk_count=None, ispecs_disk_size=None, ispecs_nic_count=None, minmax_ispecs=None, std_ispecs=None, ipolicy_disk_templates=None, ipolicy_vcpu_ratio=None, ipolicy_spindle_ratio=None, group_ipolicy=False, allowed_values=None, fill_all=False): """Creation of instance policy based on command line options. @param fill_all: whether for cluster policies we should ensure that all values are filled """ assert not (fill_all and allowed_values) split_specs = (ispecs_mem_size or ispecs_cpu_count or ispecs_disk_count or ispecs_disk_size or ispecs_nic_count) if (split_specs and (minmax_ispecs is not None or std_ispecs is not None)): raise errors.OpPrereqError("A --specs-xxx option cannot be specified" " together with any --ipolicy-xxx-specs option", errors.ECODE_INVAL) ipolicy_out = objects.MakeEmptyIPolicy() if split_specs: assert fill_all _InitISpecsFromSplitOpts(ipolicy_out, ispecs_mem_size, ispecs_cpu_count, ispecs_disk_count, ispecs_disk_size, ispecs_nic_count, group_ipolicy, fill_all) elif (minmax_ispecs is not None or std_ispecs is not None): _InitISpecsFromFullOpts(ipolicy_out, minmax_ispecs, std_ispecs, group_ipolicy, allowed_values) if ipolicy_disk_templates is not None: if allowed_values and ipolicy_disk_templates in allowed_values: ipolicy_out[constants.IPOLICY_DTS] = ipolicy_disk_templates else: ipolicy_out[constants.IPOLICY_DTS] = list(ipolicy_disk_templates) if ipolicy_vcpu_ratio is not None: ipolicy_out[constants.IPOLICY_VCPU_RATIO] = ipolicy_vcpu_ratio if ipolicy_spindle_ratio is not None: ipolicy_out[constants.IPOLICY_SPINDLE_RATIO] = ipolicy_spindle_ratio assert not (frozenset(ipolicy_out.keys()) - constants.IPOLICY_ALL_KEYS) if not group_ipolicy and fill_all: ipolicy_out = objects.FillIPolicy(constants.IPOLICY_DEFAULTS, ipolicy_out) return ipolicy_out def _SerializeGenericInfo(buf, data, level, afterkey=False): """Formatting core of L{PrintGenericInfo}. @param buf: (string) stream to accumulate the result into @param data: data to format @type level: int @param level: depth in the data hierarchy, used for indenting @type afterkey: bool @param afterkey: True when we are in the middle of a line after a key (used to properly add newlines or indentation) """ baseind = " " if isinstance(data, dict): if not data: buf.write("\n") else: if afterkey: buf.write("\n") doindent = True else: doindent = False for key in sorted(data): if doindent: buf.write(baseind * level) else: doindent = True buf.write(key) buf.write(": ") _SerializeGenericInfo(buf, data[key], level + 1, afterkey=True) elif isinstance(data, list) and len(data) > 0 and isinstance(data[0], tuple): # list of tuples (an ordered dictionary) if afterkey: buf.write("\n") doindent = True else: doindent = False for (key, val) in data: if doindent: buf.write(baseind * level) else: doindent = True buf.write(key) buf.write(": ") _SerializeGenericInfo(buf, val, level + 1, afterkey=True) elif isinstance(data, list): if not data: buf.write("\n") else: if afterkey: buf.write("\n") doindent = True else: doindent = False for item in data: if doindent: buf.write(baseind * level) else: doindent = True buf.write("-") buf.write(baseind[1:]) _SerializeGenericInfo(buf, item, level + 1) else: # This branch should be only taken for strings, but it's practically # impossible to guarantee that no other types are produced somewhere buf.write(str(data)) buf.write("\n") def PrintGenericInfo(data): """Print information formatted according to the hierarchy. The output is a valid YAML string. @param data: the data to print. It's a hierarchical structure whose elements can be: - dictionaries, where keys are strings and values are of any of the types listed here - lists of pairs (key, value), where key is a string and value is of any of the types listed here; it's a way to encode ordered dictionaries - lists of any of the types listed here - strings """ buf = StringIO() _SerializeGenericInfo(buf, data, 0) ToStdout(buf.getvalue().rstrip("\n")) ganeti-2.9.3/lib/backend.py0000644000000000000000000042410212271422343015504 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Functions used by the node daemon @var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in the L{UploadFile} function @var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted in the L{_CleanDirectory} function """ # pylint: disable=E1103 # E1103: %s %r has no %r member (but some types could not be # inferred), because the _TryOSFromDisk returns either (True, os_obj) # or (False, "string") which confuses pylint import os import os.path import shutil import time import stat import errno import re import random import logging import tempfile import zlib import base64 import signal from ganeti import errors from ganeti import utils from ganeti import ssh from ganeti import hypervisor from ganeti import constants from ganeti.storage import bdev from ganeti.storage import drbd from ganeti.storage import filestorage from ganeti import objects from ganeti import ssconf from ganeti import serializer from ganeti import netutils from ganeti import runtime from ganeti import compat from ganeti import pathutils from ganeti import vcluster from ganeti import ht from ganeti.storage.base import BlockDev from ganeti.storage.drbd import DRBD8 from ganeti import hooksmaster _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id" _ALLOWED_CLEAN_DIRS = compat.UniqueFrozenset([ pathutils.DATA_DIR, pathutils.JOB_QUEUE_ARCHIVE_DIR, pathutils.QUEUE_DIR, pathutils.CRYPTO_KEYS_DIR, ]) _MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60 _X509_KEY_FILE = "key" _X509_CERT_FILE = "cert" _IES_STATUS_FILE = "status" _IES_PID_FILE = "pid" _IES_CA_FILE = "ca" #: Valid LVS output line regex _LVSLINE_REGEX = re.compile(r"^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6,})\|?$") # Actions for the master setup script _MASTER_START = "start" _MASTER_STOP = "stop" #: Maximum file permissions for restricted command directory and executables _RCMD_MAX_MODE = (stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) #: Delay before returning an error for restricted commands _RCMD_INVALID_DELAY = 10 #: How long to wait to acquire lock for restricted commands (shorter than #: L{_RCMD_INVALID_DELAY}) to reduce blockage of noded forks when many #: command requests arrive _RCMD_LOCK_TIMEOUT = _RCMD_INVALID_DELAY * 0.8 class RPCFail(Exception): """Class denoting RPC failure. Its argument is the error message. """ def _GetInstReasonFilename(instance_name): """Path of the file containing the reason of the instance status change. @type instance_name: string @param instance_name: The name of the instance @rtype: string @return: The path of the file """ return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name) def _StoreInstReasonTrail(instance_name, trail): """Serialize a reason trail related to an instance change of state to file. The exact location of the file depends on the name of the instance and on the configuration of the Ganeti cluster defined at deploy time. @type instance_name: string @param instance_name: The name of the instance @rtype: None """ json = serializer.DumpJson(trail) filename = _GetInstReasonFilename(instance_name) utils.WriteFile(filename, data=json) def _Fail(msg, *args, **kwargs): """Log an error and the raise an RPCFail exception. This exception is then handled specially in the ganeti daemon and turned into a 'failed' return type. As such, this function is a useful shortcut for logging the error and returning it to the master daemon. @type msg: string @param msg: the text of the exception @raise RPCFail """ if args: msg = msg % args if "log" not in kwargs or kwargs["log"]: # if we should log this error if "exc" in kwargs and kwargs["exc"]: logging.exception(msg) else: logging.error(msg) raise RPCFail(msg) def _GetConfig(): """Simple wrapper to return a SimpleStore. @rtype: L{ssconf.SimpleStore} @return: a SimpleStore instance """ return ssconf.SimpleStore() def _GetSshRunner(cluster_name): """Simple wrapper to return an SshRunner. @type cluster_name: str @param cluster_name: the cluster name, which is needed by the SshRunner constructor @rtype: L{ssh.SshRunner} @return: an SshRunner instance """ return ssh.SshRunner(cluster_name) def _Decompress(data): """Unpacks data compressed by the RPC client. @type data: list or tuple @param data: Data sent by RPC client @rtype: str @return: Decompressed data """ assert isinstance(data, (list, tuple)) assert len(data) == 2 (encoding, content) = data if encoding == constants.RPC_ENCODING_NONE: return content elif encoding == constants.RPC_ENCODING_ZLIB_BASE64: return zlib.decompress(base64.b64decode(content)) else: raise AssertionError("Unknown data encoding") def _CleanDirectory(path, exclude=None): """Removes all regular files in a directory. @type path: str @param path: the directory to clean @type exclude: list @param exclude: list of files to be excluded, defaults to the empty list """ if path not in _ALLOWED_CLEAN_DIRS: _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'", path) if not os.path.isdir(path): return if exclude is None: exclude = [] else: # Normalize excluded paths exclude = [os.path.normpath(i) for i in exclude] for rel_name in utils.ListVisibleFiles(path): full_name = utils.PathJoin(path, rel_name) if full_name in exclude: continue if os.path.isfile(full_name) and not os.path.islink(full_name): utils.RemoveFile(full_name) def _BuildUploadFileList(): """Build the list of allowed upload files. This is abstracted so that it's built only once at module import time. """ allowed_files = set([ pathutils.CLUSTER_CONF_FILE, pathutils.ETC_HOSTS, pathutils.SSH_KNOWN_HOSTS_FILE, pathutils.VNC_PASSWORD_FILE, pathutils.RAPI_CERT_FILE, pathutils.SPICE_CERT_FILE, pathutils.SPICE_CACERT_FILE, pathutils.RAPI_USERS_FILE, pathutils.CONFD_HMAC_KEY, pathutils.CLUSTER_DOMAIN_SECRET_FILE, ]) for hv_name in constants.HYPER_TYPES: hv_class = hypervisor.GetHypervisorClass(hv_name) allowed_files.update(hv_class.GetAncillaryFiles()[0]) assert pathutils.FILE_STORAGE_PATHS_FILE not in allowed_files, \ "Allowed file storage paths should never be uploaded via RPC" return frozenset(allowed_files) _ALLOWED_UPLOAD_FILES = _BuildUploadFileList() def JobQueuePurge(): """Removes job queue files and archived jobs. @rtype: tuple @return: True, None """ _CleanDirectory(pathutils.QUEUE_DIR, exclude=[pathutils.JOB_QUEUE_LOCK_FILE]) _CleanDirectory(pathutils.JOB_QUEUE_ARCHIVE_DIR) def GetMasterInfo(): """Returns master information. This is an utility function to compute master information, either for consumption here or from the node daemon. @rtype: tuple @return: master_netdev, master_ip, master_name, primary_ip_family, master_netmask @raise RPCFail: in case of errors """ try: cfg = _GetConfig() master_netdev = cfg.GetMasterNetdev() master_ip = cfg.GetMasterIP() master_netmask = cfg.GetMasterNetmask() master_node = cfg.GetMasterNode() primary_ip_family = cfg.GetPrimaryIPFamily() except errors.ConfigurationError, err: _Fail("Cluster configuration incomplete: %s", err, exc=True) return (master_netdev, master_ip, master_node, primary_ip_family, master_netmask) def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn): """Decorator that runs hooks before and after the decorated function. @type hook_opcode: string @param hook_opcode: opcode of the hook @type hooks_path: string @param hooks_path: path of the hooks @type env_builder_fn: function @param env_builder_fn: function that returns a dictionary containing the environment variables for the hooks. Will get all the parameters of the decorated function. @raise RPCFail: in case of pre-hook failure """ def decorator(fn): def wrapper(*args, **kwargs): _, myself = ssconf.GetMasterAndMyself() nodes = ([myself], [myself]) # these hooks run locally env_fn = compat.partial(env_builder_fn, *args, **kwargs) cfg = _GetConfig() hr = HooksRunner() hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes, hr.RunLocalHooks, None, env_fn, logging.warning, cfg.GetClusterName(), cfg.GetMasterNode()) hm.RunPhase(constants.HOOKS_PHASE_PRE) result = fn(*args, **kwargs) hm.RunPhase(constants.HOOKS_PHASE_POST) return result return wrapper return decorator def _BuildMasterIpEnv(master_params, use_external_mip_script=None): """Builds environment variables for master IP hooks. @type master_params: L{objects.MasterNetworkParameters} @param master_params: network parameters of the master @type use_external_mip_script: boolean @param use_external_mip_script: whether to use an external master IP address setup script (unused, but necessary per the implementation of the _RunLocalHooks decorator) """ # pylint: disable=W0613 ver = netutils.IPAddress.GetVersionFromAddressFamily(master_params.ip_family) env = { "MASTER_NETDEV": master_params.netdev, "MASTER_IP": master_params.ip, "MASTER_NETMASK": str(master_params.netmask), "CLUSTER_IP_VERSION": str(ver), } return env def _RunMasterSetupScript(master_params, action, use_external_mip_script): """Execute the master IP address setup script. @type master_params: L{objects.MasterNetworkParameters} @param master_params: network parameters of the master @type action: string @param action: action to pass to the script. Must be one of L{backend._MASTER_START} or L{backend._MASTER_STOP} @type use_external_mip_script: boolean @param use_external_mip_script: whether to use an external master IP address setup script @raise backend.RPCFail: if there are errors during the execution of the script """ env = _BuildMasterIpEnv(master_params) if use_external_mip_script: setup_script = pathutils.EXTERNAL_MASTER_SETUP_SCRIPT else: setup_script = pathutils.DEFAULT_MASTER_SETUP_SCRIPT result = utils.RunCmd([setup_script, action], env=env, reset_env=True) if result.failed: _Fail("Failed to %s the master IP. Script return value: %s, output: '%s'" % (action, result.exit_code, result.output), log=True) @RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup", _BuildMasterIpEnv) def ActivateMasterIp(master_params, use_external_mip_script): """Activate the IP address of the master daemon. @type master_params: L{objects.MasterNetworkParameters} @param master_params: network parameters of the master @type use_external_mip_script: boolean @param use_external_mip_script: whether to use an external master IP address setup script @raise RPCFail: in case of errors during the IP startup """ _RunMasterSetupScript(master_params, _MASTER_START, use_external_mip_script) def StartMasterDaemons(no_voting): """Activate local node as master node. The function will start the master daemons (ganeti-masterd and ganeti-rapi). @type no_voting: boolean @param no_voting: whether to start ganeti-masterd without a node vote but still non-interactively @rtype: None """ if no_voting: masterd_args = "--no-voting --yes-do-it" else: masterd_args = "" env = { "EXTRA_MASTERD_ARGS": masterd_args, } result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env) if result.failed: msg = "Can't start Ganeti master: %s" % result.output logging.error(msg) _Fail(msg) @RunLocalHooks(constants.FAKE_OP_MASTER_TURNDOWN, "master-ip-turndown", _BuildMasterIpEnv) def DeactivateMasterIp(master_params, use_external_mip_script): """Deactivate the master IP on this node. @type master_params: L{objects.MasterNetworkParameters} @param master_params: network parameters of the master @type use_external_mip_script: boolean @param use_external_mip_script: whether to use an external master IP address setup script @raise RPCFail: in case of errors during the IP turndown """ _RunMasterSetupScript(master_params, _MASTER_STOP, use_external_mip_script) def StopMasterDaemons(): """Stop the master daemons on this node. Stop the master daemons (ganeti-masterd and ganeti-rapi) on this node. @rtype: None """ # TODO: log and report back to the caller the error failures; we # need to decide in which case we fail the RPC for this result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"]) if result.failed: logging.error("Could not stop Ganeti master, command %s had exitcode %s" " and error %s", result.cmd, result.exit_code, result.output) def ChangeMasterNetmask(old_netmask, netmask, master_ip, master_netdev): """Change the netmask of the master IP. @param old_netmask: the old value of the netmask @param netmask: the new value of the netmask @param master_ip: the master IP @param master_netdev: the master network device """ if old_netmask == netmask: return if not netutils.IPAddress.Own(master_ip): _Fail("The master IP address is not up, not attempting to change its" " netmask") result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add", "%s/%s" % (master_ip, netmask), "dev", master_netdev, "label", "%s:0" % master_netdev]) if result.failed: _Fail("Could not set the new netmask on the master IP address") result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del", "%s/%s" % (master_ip, old_netmask), "dev", master_netdev, "label", "%s:0" % master_netdev]) if result.failed: _Fail("Could not bring down the master IP address with the old netmask") def EtcHostsModify(mode, host, ip): """Modify a host entry in /etc/hosts. @param mode: The mode to operate. Either add or remove entry @param host: The host to operate on @param ip: The ip associated with the entry """ if mode == constants.ETC_HOSTS_ADD: if not ip: RPCFail("Mode 'add' needs 'ip' parameter, but parameter not" " present") utils.AddHostToEtcHosts(host, ip) elif mode == constants.ETC_HOSTS_REMOVE: if ip: RPCFail("Mode 'remove' does not allow 'ip' parameter, but" " parameter is present") utils.RemoveHostFromEtcHosts(host) else: RPCFail("Mode not supported") def LeaveCluster(modify_ssh_setup): """Cleans up and remove the current node. This function cleans up and prepares the current node to be removed from the cluster. If processing is successful, then it raises an L{errors.QuitGanetiException} which is used as a special case to shutdown the node daemon. @param modify_ssh_setup: boolean """ _CleanDirectory(pathutils.DATA_DIR) _CleanDirectory(pathutils.CRYPTO_KEYS_DIR) JobQueuePurge() if modify_ssh_setup: try: priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.SSH_LOGIN_USER) utils.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key)) utils.RemoveFile(priv_key) utils.RemoveFile(pub_key) except errors.OpExecError: logging.exception("Error while processing ssh files") try: utils.RemoveFile(pathutils.CONFD_HMAC_KEY) utils.RemoveFile(pathutils.RAPI_CERT_FILE) utils.RemoveFile(pathutils.SPICE_CERT_FILE) utils.RemoveFile(pathutils.SPICE_CACERT_FILE) utils.RemoveFile(pathutils.NODED_CERT_FILE) except: # pylint: disable=W0702 logging.exception("Error while removing cluster secrets") result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop", constants.CONFD]) if result.failed: logging.error("Command %s failed with exitcode %s and error %s", result.cmd, result.exit_code, result.output) # Raise a custom exception (handled in ganeti-noded) raise errors.QuitGanetiException(True, "Shutdown scheduled") def _CheckStorageParams(params, num_params): """Performs sanity checks for storage parameters. @type params: list @param params: list of storage parameters @type num_params: int @param num_params: expected number of parameters """ if params is None: raise errors.ProgrammerError("No storage parameters for storage" " reporting is provided.") if not isinstance(params, list): raise errors.ProgrammerError("The storage parameters are not of type" " list: '%s'" % params) if not len(params) == num_params: raise errors.ProgrammerError("Did not receive the expected number of" "storage parameters: expected %s," " received '%s'" % (num_params, len(params))) def _CheckLvmStorageParams(params): """Performs sanity check for the 'exclusive storage' flag. @see: C{_CheckStorageParams} """ _CheckStorageParams(params, 1) excl_stor = params[0] if not isinstance(params[0], bool): raise errors.ProgrammerError("Exclusive storage parameter is not" " boolean: '%s'." % excl_stor) return excl_stor def _GetLvmVgSpaceInfo(name, params): """Wrapper around C{_GetVgInfo} which checks the storage parameters. @type name: string @param name: name of the volume group @type params: list @param params: list of storage parameters, which in this case should be containing only one for exclusive storage """ excl_stor = _CheckLvmStorageParams(params) return _GetVgInfo(name, excl_stor) def _GetVgInfo( name, excl_stor, info_fn=bdev.LogicalVolume.GetVGInfo): """Retrieves information about a LVM volume group. """ # TODO: GetVGInfo supports returning information for multiple VGs at once vginfo = info_fn([name], excl_stor) if vginfo: vg_free = int(round(vginfo[0][0], 0)) vg_size = int(round(vginfo[0][1], 0)) else: vg_free = None vg_size = None return { "type": constants.ST_LVM_VG, "name": name, "storage_free": vg_free, "storage_size": vg_size, } def _GetLvmPvSpaceInfo(name, params): """Wrapper around C{_GetVgSpindlesInfo} with sanity checks. @see: C{_GetLvmVgSpaceInfo} """ excl_stor = _CheckLvmStorageParams(params) return _GetVgSpindlesInfo(name, excl_stor) def _GetVgSpindlesInfo( name, excl_stor, info_fn=bdev.LogicalVolume.GetVgSpindlesInfo): """Retrieves information about spindles in an LVM volume group. @type name: string @param name: VG name @type excl_stor: bool @param excl_stor: exclusive storage @rtype: dict @return: dictionary whose keys are "name", "vg_free", "vg_size" for VG name, free spindles, total spindles respectively """ if excl_stor: (vg_free, vg_size) = info_fn(name) else: vg_free = 0 vg_size = 0 return { "type": constants.ST_LVM_PV, "name": name, "storage_free": vg_free, "storage_size": vg_size, } def _GetHvInfo(name, hvparams, get_hv_fn=hypervisor.GetHypervisor): """Retrieves node information from a hypervisor. The information returned depends on the hypervisor. Common items: - vg_size is the size of the configured volume group in MiB - vg_free is the free size of the volume group in MiB - memory_dom0 is the memory allocated for domain0 in MiB - memory_free is the currently available (free) ram in MiB - memory_total is the total number of ram in MiB - hv_version: the hypervisor version, if available @type hvparams: dict of string @param hvparams: the hypervisor's hvparams """ return get_hv_fn(name).GetNodeInfo(hvparams=hvparams) def _GetHvInfoAll(hv_specs, get_hv_fn=hypervisor.GetHypervisor): """Retrieves node information for all hypervisors. See C{_GetHvInfo} for information on the output. @type hv_specs: list of pairs (string, dict of strings) @param hv_specs: list of pairs of a hypervisor's name and its hvparams """ if hv_specs is None: return None result = [] for hvname, hvparams in hv_specs: result.append(_GetHvInfo(hvname, hvparams, get_hv_fn)) return result def _GetNamedNodeInfo(names, fn): """Calls C{fn} for all names in C{names} and returns a dictionary. @rtype: None or dict """ if names is None: return None else: return map(fn, names) def GetNodeInfo(storage_units, hv_specs): """Gives back a hash with different information about the node. @type storage_units: list of tuples (string, string, list) @param storage_units: List of tuples (storage unit, identifier, parameters) to ask for disk space information. In case of lvm-vg, the identifier is the VG name. The parameters can contain additional, storage-type-specific parameters, for example exclusive storage for lvm storage. @type hv_specs: list of pairs (string, dict of strings) @param hv_specs: list of pairs of a hypervisor's name and its hvparams @rtype: tuple; (string, None/dict, None/dict) @return: Tuple containing boot ID, volume group information and hypervisor information """ bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n") storage_info = _GetNamedNodeInfo( storage_units, (lambda (storage_type, storage_key, storage_params): _ApplyStorageInfoFunction(storage_type, storage_key, storage_params))) hv_info = _GetHvInfoAll(hv_specs) return (bootid, storage_info, hv_info) def _GetFileStorageSpaceInfo(path, params): """Wrapper around filestorage.GetSpaceInfo. The purpose of this wrapper is to call filestorage.GetFileStorageSpaceInfo and ignore the *args parameter to not leak it into the filestorage module's code. @see: C{filestorage.GetFileStorageSpaceInfo} for description of the parameters. """ _CheckStorageParams(params, 0) return filestorage.GetFileStorageSpaceInfo(path) # FIXME: implement storage reporting for all missing storage types. _STORAGE_TYPE_INFO_FN = { constants.ST_BLOCK: None, constants.ST_DISKLESS: None, constants.ST_EXT: None, constants.ST_FILE: _GetFileStorageSpaceInfo, constants.ST_LVM_PV: _GetLvmPvSpaceInfo, constants.ST_LVM_VG: _GetLvmVgSpaceInfo, constants.ST_RADOS: None, } def _ApplyStorageInfoFunction(storage_type, storage_key, *args): """Looks up and applies the correct function to calculate free and total storage for the given storage type. @type storage_type: string @param storage_type: the storage type for which the storage shall be reported. @type storage_key: string @param storage_key: identifier of a storage unit, e.g. the volume group name of an LVM storage unit @type args: any @param args: various parameters that can be used for storage reporting. These parameters and their semantics vary from storage type to storage type and are just propagated in this function. @return: the results of the application of the storage space function (see _STORAGE_TYPE_INFO_FN) if storage space reporting is implemented for that storage type @raises NotImplementedError: for storage types who don't support space reporting yet """ fn = _STORAGE_TYPE_INFO_FN[storage_type] if fn is not None: return fn(storage_key, *args) else: raise NotImplementedError def _CheckExclusivePvs(pvi_list): """Check that PVs are not shared among LVs @type pvi_list: list of L{objects.LvmPvInfo} objects @param pvi_list: information about the PVs @rtype: list of tuples (string, list of strings) @return: offending volumes, as tuples: (pv_name, [lv1_name, lv2_name...]) """ res = [] for pvi in pvi_list: if len(pvi.lv_list) > 1: res.append((pvi.name, pvi.lv_list)) return res def _VerifyHypervisors(what, vm_capable, result, all_hvparams, get_hv_fn=hypervisor.GetHypervisor): """Verifies the hypervisor. Appends the results to the 'results' list. @type what: C{dict} @param what: a dictionary of things to check @type vm_capable: boolean @param vm_capable: whether or not this node is vm capable @type result: dict @param result: dictionary of verification results; results of the verifications in this function will be added here @type all_hvparams: dict of dict of string @param all_hvparams: dictionary mapping hypervisor names to hvparams @type get_hv_fn: function @param get_hv_fn: function to retrieve the hypervisor, to improve testability """ if not vm_capable: return if constants.NV_HYPERVISOR in what: result[constants.NV_HYPERVISOR] = {} for hv_name in what[constants.NV_HYPERVISOR]: hvparams = all_hvparams[hv_name] try: val = get_hv_fn(hv_name).Verify(hvparams=hvparams) except errors.HypervisorError, err: val = "Error while checking hypervisor: %s" % str(err) result[constants.NV_HYPERVISOR][hv_name] = val def _VerifyHvparams(what, vm_capable, result, get_hv_fn=hypervisor.GetHypervisor): """Verifies the hvparams. Appends the results to the 'results' list. @type what: C{dict} @param what: a dictionary of things to check @type vm_capable: boolean @param vm_capable: whether or not this node is vm capable @type result: dict @param result: dictionary of verification results; results of the verifications in this function will be added here @type get_hv_fn: function @param get_hv_fn: function to retrieve the hypervisor, to improve testability """ if not vm_capable: return if constants.NV_HVPARAMS in what: result[constants.NV_HVPARAMS] = [] for source, hv_name, hvparms in what[constants.NV_HVPARAMS]: try: logging.info("Validating hv %s, %s", hv_name, hvparms) get_hv_fn(hv_name).ValidateParameters(hvparms) except errors.HypervisorError, err: result[constants.NV_HVPARAMS].append((source, hv_name, str(err))) def _VerifyInstanceList(what, vm_capable, result, all_hvparams): """Verifies the instance list. @type what: C{dict} @param what: a dictionary of things to check @type vm_capable: boolean @param vm_capable: whether or not this node is vm capable @type result: dict @param result: dictionary of verification results; results of the verifications in this function will be added here @type all_hvparams: dict of dict of string @param all_hvparams: dictionary mapping hypervisor names to hvparams """ if constants.NV_INSTANCELIST in what and vm_capable: # GetInstanceList can fail try: val = GetInstanceList(what[constants.NV_INSTANCELIST], all_hvparams=all_hvparams) except RPCFail, err: val = str(err) result[constants.NV_INSTANCELIST] = val def _VerifyNodeInfo(what, vm_capable, result, all_hvparams): """Verifies the node info. @type what: C{dict} @param what: a dictionary of things to check @type vm_capable: boolean @param vm_capable: whether or not this node is vm capable @type result: dict @param result: dictionary of verification results; results of the verifications in this function will be added here @type all_hvparams: dict of dict of string @param all_hvparams: dictionary mapping hypervisor names to hvparams """ if constants.NV_HVINFO in what and vm_capable: hvname = what[constants.NV_HVINFO] hyper = hypervisor.GetHypervisor(hvname) hvparams = all_hvparams[hvname] result[constants.NV_HVINFO] = hyper.GetNodeInfo(hvparams=hvparams) def VerifyNode(what, cluster_name, all_hvparams): """Verify the status of the local node. Based on the input L{what} parameter, various checks are done on the local node. If the I{filelist} key is present, this list of files is checksummed and the file/checksum pairs are returned. If the I{nodelist} key is present, we check that we have connectivity via ssh with the target nodes (and check the hostname report). If the I{node-net-test} key is present, we check that we have connectivity to the given nodes via both primary IP and, if applicable, secondary IPs. @type what: C{dict} @param what: a dictionary of things to check: - filelist: list of files for which to compute checksums - nodelist: list of nodes we should check ssh communication with - node-net-test: list of nodes we should check node daemon port connectivity with - hypervisor: list with hypervisors to run the verify for @type cluster_name: string @param cluster_name: the cluster's name @type all_hvparams: dict of dict of strings @param all_hvparams: a dictionary mapping hypervisor names to hvparams @rtype: dict @return: a dictionary with the same keys as the input dict, and values representing the result of the checks """ result = {} my_name = netutils.Hostname.GetSysName() port = netutils.GetDaemonPort(constants.NODED) vm_capable = my_name not in what.get(constants.NV_VMNODES, []) _VerifyHypervisors(what, vm_capable, result, all_hvparams) _VerifyHvparams(what, vm_capable, result) if constants.NV_FILELIST in what: fingerprints = utils.FingerprintFiles(map(vcluster.LocalizeVirtualPath, what[constants.NV_FILELIST])) result[constants.NV_FILELIST] = \ dict((vcluster.MakeVirtualPath(key), value) for (key, value) in fingerprints.items()) if constants.NV_NODELIST in what: (nodes, bynode) = what[constants.NV_NODELIST] # Add nodes from other groups (different for each node) try: nodes.extend(bynode[my_name]) except KeyError: pass # Use a random order random.shuffle(nodes) # Try to contact all nodes val = {} for node in nodes: success, message = _GetSshRunner(cluster_name).VerifyNodeHostname(node) if not success: val[node] = message result[constants.NV_NODELIST] = val if constants.NV_NODENETTEST in what: result[constants.NV_NODENETTEST] = tmp = {} my_pip = my_sip = None for name, pip, sip in what[constants.NV_NODENETTEST]: if name == my_name: my_pip = pip my_sip = sip break if not my_pip: tmp[my_name] = ("Can't find my own primary/secondary IP" " in the node list") else: for name, pip, sip in what[constants.NV_NODENETTEST]: fail = [] if not netutils.TcpPing(pip, port, source=my_pip): fail.append("primary") if sip != pip: if not netutils.TcpPing(sip, port, source=my_sip): fail.append("secondary") if fail: tmp[name] = ("failure using the %s interface(s)" % " and ".join(fail)) if constants.NV_MASTERIP in what: # FIXME: add checks on incoming data structures (here and in the # rest of the function) master_name, master_ip = what[constants.NV_MASTERIP] if master_name == my_name: source = constants.IP4_ADDRESS_LOCALHOST else: source = None result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port, source=source) if constants.NV_USERSCRIPTS in what: result[constants.NV_USERSCRIPTS] = \ [script for script in what[constants.NV_USERSCRIPTS] if not utils.IsExecutable(script)] if constants.NV_OOB_PATHS in what: result[constants.NV_OOB_PATHS] = tmp = [] for path in what[constants.NV_OOB_PATHS]: try: st = os.stat(path) except OSError, err: tmp.append("error stating out of band helper: %s" % err) else: if stat.S_ISREG(st.st_mode): if stat.S_IMODE(st.st_mode) & stat.S_IXUSR: tmp.append(None) else: tmp.append("out of band helper %s is not executable" % path) else: tmp.append("out of band helper %s is not a file" % path) if constants.NV_LVLIST in what and vm_capable: try: val = GetVolumeList(utils.ListVolumeGroups().keys()) except RPCFail, err: val = str(err) result[constants.NV_LVLIST] = val _VerifyInstanceList(what, vm_capable, result, all_hvparams) if constants.NV_VGLIST in what and vm_capable: result[constants.NV_VGLIST] = utils.ListVolumeGroups() if constants.NV_PVLIST in what and vm_capable: check_exclusive_pvs = constants.NV_EXCLUSIVEPVS in what val = bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST], filter_allocatable=False, include_lvs=check_exclusive_pvs) if check_exclusive_pvs: result[constants.NV_EXCLUSIVEPVS] = _CheckExclusivePvs(val) for pvi in val: # Avoid sending useless data on the wire pvi.lv_list = [] result[constants.NV_PVLIST] = map(objects.LvmPvInfo.ToDict, val) if constants.NV_VERSION in what: result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION, constants.RELEASE_VERSION) _VerifyNodeInfo(what, vm_capable, result, all_hvparams) if constants.NV_DRBDVERSION in what and vm_capable: try: drbd_version = DRBD8.GetProcInfo().GetVersionString() except errors.BlockDeviceError, err: logging.warning("Can't get DRBD version", exc_info=True) drbd_version = str(err) result[constants.NV_DRBDVERSION] = drbd_version if constants.NV_DRBDLIST in what and vm_capable: try: used_minors = drbd.DRBD8.GetUsedDevs() except errors.BlockDeviceError, err: logging.warning("Can't get used minors list", exc_info=True) used_minors = str(err) result[constants.NV_DRBDLIST] = used_minors if constants.NV_DRBDHELPER in what and vm_capable: status = True try: payload = drbd.DRBD8.GetUsermodeHelper() except errors.BlockDeviceError, err: logging.error("Can't get DRBD usermode helper: %s", str(err)) status = False payload = str(err) result[constants.NV_DRBDHELPER] = (status, payload) if constants.NV_NODESETUP in what: result[constants.NV_NODESETUP] = tmpr = [] if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"): tmpr.append("The sysfs filesytem doesn't seem to be mounted" " under /sys, missing required directories /sys/block" " and /sys/class/net") if (not os.path.isdir("/proc/sys") or not os.path.isfile("/proc/sysrq-trigger")): tmpr.append("The procfs filesystem doesn't seem to be mounted" " under /proc, missing required directory /proc/sys and" " the file /proc/sysrq-trigger") if constants.NV_TIME in what: result[constants.NV_TIME] = utils.SplitTime(time.time()) if constants.NV_OSLIST in what and vm_capable: result[constants.NV_OSLIST] = DiagnoseOS() if constants.NV_BRIDGES in what and vm_capable: result[constants.NV_BRIDGES] = [bridge for bridge in what[constants.NV_BRIDGES] if not utils.BridgeExists(bridge)] if what.get(constants.NV_ACCEPTED_STORAGE_PATHS) == my_name: result[constants.NV_ACCEPTED_STORAGE_PATHS] = \ filestorage.ComputeWrongFileStoragePaths() if what.get(constants.NV_FILE_STORAGE_PATH): pathresult = filestorage.CheckFileStoragePath( what[constants.NV_FILE_STORAGE_PATH]) if pathresult: result[constants.NV_FILE_STORAGE_PATH] = pathresult if what.get(constants.NV_SHARED_FILE_STORAGE_PATH): pathresult = filestorage.CheckFileStoragePath( what[constants.NV_SHARED_FILE_STORAGE_PATH]) if pathresult: result[constants.NV_SHARED_FILE_STORAGE_PATH] = pathresult return result def GetBlockDevSizes(devices): """Return the size of the given block devices @type devices: list @param devices: list of block device nodes to query @rtype: dict @return: dictionary of all block devices under /dev (key). The value is their size in MiB. {'/dev/disk/by-uuid/123456-12321231-312312-312': 124} """ DEV_PREFIX = "/dev/" blockdevs = {} for devpath in devices: if not utils.IsBelowDir(DEV_PREFIX, devpath): continue try: st = os.stat(devpath) except EnvironmentError, err: logging.warning("Error stat()'ing device %s: %s", devpath, str(err)) continue if stat.S_ISBLK(st.st_mode): result = utils.RunCmd(["blockdev", "--getsize64", devpath]) if result.failed: # We don't want to fail, just do not list this device as available logging.warning("Cannot get size for block device %s", devpath) continue size = int(result.stdout) / (1024 * 1024) blockdevs[devpath] = size return blockdevs def GetVolumeList(vg_names): """Compute list of logical volumes and their size. @type vg_names: list @param vg_names: the volume groups whose LVs we should list, or empty for all volume groups @rtype: dict @return: dictionary of all partions (key) with value being a tuple of their size (in MiB), inactive and online status:: {'xenvg/test1': ('20.06', True, True)} in case of errors, a string is returned with the error details. """ lvs = {} sep = "|" if not vg_names: vg_names = [] result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", "--separator=%s" % sep, "-ovg_name,lv_name,lv_size,lv_attr"] + vg_names) if result.failed: _Fail("Failed to list logical volumes, lvs output: %s", result.output) for line in result.stdout.splitlines(): line = line.strip() match = _LVSLINE_REGEX.match(line) if not match: logging.error("Invalid line returned from lvs output: '%s'", line) continue vg_name, name, size, attr = match.groups() inactive = attr[4] == "-" online = attr[5] == "o" virtual = attr[0] == "v" if virtual: # we don't want to report such volumes as existing, since they # don't really hold data continue lvs[vg_name + "/" + name] = (size, inactive, online) return lvs def ListVolumeGroups(): """List the volume groups and their size. @rtype: dict @return: dictionary with keys volume name and values the size of the volume """ return utils.ListVolumeGroups() def NodeVolumes(): """List all volumes on this node. @rtype: list @return: A list of dictionaries, each having four keys: - name: the logical volume name, - size: the size of the logical volume - dev: the physical device on which the LV lives - vg: the volume group to which it belongs In case of errors, we return an empty list and log the error. Note that since a logical volume can live on multiple physical volumes, the resulting list might include a logical volume multiple times. """ result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", "--separator=|", "--options=lv_name,lv_size,devices,vg_name"]) if result.failed: _Fail("Failed to list logical volumes, lvs output: %s", result.output) def parse_dev(dev): return dev.split("(")[0] def handle_dev(dev): return [parse_dev(x) for x in dev.split(",")] def map_line(line): line = [v.strip() for v in line] return [{"name": line[0], "size": line[1], "dev": dev, "vg": line[3]} for dev in handle_dev(line[2])] all_devs = [] for line in result.stdout.splitlines(): if line.count("|") >= 3: all_devs.extend(map_line(line.split("|"))) else: logging.warning("Strange line in the output from lvs: '%s'", line) return all_devs def BridgesExist(bridges_list): """Check if a list of bridges exist on the current node. @rtype: boolean @return: C{True} if all of them exist, C{False} otherwise """ missing = [] for bridge in bridges_list: if not utils.BridgeExists(bridge): missing.append(bridge) if missing: _Fail("Missing bridges %s", utils.CommaJoin(missing)) def GetInstanceListForHypervisor(hname, hvparams=None, get_hv_fn=hypervisor.GetHypervisor): """Provides a list of instances of the given hypervisor. @type hname: string @param hname: name of the hypervisor @type hvparams: dict of strings @param hvparams: hypervisor parameters for the given hypervisor @type get_hv_fn: function @param get_hv_fn: function that returns a hypervisor for the given hypervisor name; optional parameter to increase testability @rtype: list @return: a list of all running instances on the current node - instance1.example.com - instance2.example.com """ results = [] try: hv = get_hv_fn(hname) names = hv.ListInstances(hvparams=hvparams) results.extend(names) except errors.HypervisorError, err: _Fail("Error enumerating instances (hypervisor %s): %s", hname, err, exc=True) return results def GetInstanceList(hypervisor_list, all_hvparams=None, get_hv_fn=hypervisor.GetHypervisor): """Provides a list of instances. @type hypervisor_list: list @param hypervisor_list: the list of hypervisors to query information @type all_hvparams: dict of dict of strings @param all_hvparams: a dictionary mapping hypervisor types to respective cluster-wide hypervisor parameters @type get_hv_fn: function @param get_hv_fn: function that returns a hypervisor for the given hypervisor name; optional parameter to increase testability @rtype: list @return: a list of all running instances on the current node - instance1.example.com - instance2.example.com """ results = [] for hname in hypervisor_list: hvparams = all_hvparams[hname] results.extend(GetInstanceListForHypervisor(hname, hvparams=hvparams, get_hv_fn=get_hv_fn)) return results def GetInstanceInfo(instance, hname, hvparams=None): """Gives back the information about an instance as a dictionary. @type instance: string @param instance: the instance name @type hname: string @param hname: the hypervisor type of the instance @type hvparams: dict of strings @param hvparams: the instance's hvparams @rtype: dict @return: dictionary with the following keys: - memory: memory size of instance (int) - state: xen state of instance (string) - time: cpu time of instance (float) - vcpus: the number of vcpus (int) """ output = {} iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance, hvparams=hvparams) if iinfo is not None: output["memory"] = iinfo[2] output["vcpus"] = iinfo[3] output["state"] = iinfo[4] output["time"] = iinfo[5] return output def GetInstanceMigratable(instance): """Computes whether an instance can be migrated. @type instance: L{objects.Instance} @param instance: object representing the instance to be checked. @rtype: tuple @return: tuple of (result, description) where: - result: whether the instance can be migrated or not - description: a description of the issue, if relevant """ hyper = hypervisor.GetHypervisor(instance.hypervisor) iname = instance.name if iname not in hyper.ListInstances(instance.hvparams): _Fail("Instance %s is not running", iname) for idx in range(len(instance.disks)): link_name = _GetBlockDevSymlinkPath(iname, idx) if not os.path.islink(link_name): logging.warning("Instance %s is missing symlink %s for disk %d", iname, link_name, idx) def GetAllInstancesInfo(hypervisor_list, all_hvparams): """Gather data about all instances. This is the equivalent of L{GetInstanceInfo}, except that it computes data for all instances at once, thus being faster if one needs data about more than one instance. @type hypervisor_list: list @param hypervisor_list: list of hypervisors to query for instance data @type all_hvparams: dict of dict of strings @param all_hvparams: mapping of hypervisor names to hvparams @rtype: dict @return: dictionary of instance: data, with data having the following keys: - memory: memory size of instance (int) - state: xen state of instance (string) - time: cpu time of instance (float) - vcpus: the number of vcpus """ output = {} for hname in hypervisor_list: hvparams = all_hvparams[hname] iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo(hvparams) if iinfo: for name, _, memory, vcpus, state, times in iinfo: value = { "memory": memory, "vcpus": vcpus, "state": state, "time": times, } if name in output: # we only check static parameters, like memory and vcpus, # and not state and time which can change between the # invocations of the different hypervisors for key in "memory", "vcpus": if value[key] != output[name][key]: _Fail("Instance %s is running twice" " with different parameters", name) output[name] = value return output def _InstanceLogName(kind, os_name, instance, component): """Compute the OS log filename for a given instance and operation. The instance name and os name are passed in as strings since not all operations have these as part of an instance object. @type kind: string @param kind: the operation type (e.g. add, import, etc.) @type os_name: string @param os_name: the os name @type instance: string @param instance: the name of the instance being imported/added/etc. @type component: string or None @param component: the name of the component of the instance being transferred """ # TODO: Use tempfile.mkstemp to create unique filename if component: assert "/" not in component c_msg = "-%s" % component else: c_msg = "" base = ("%s-%s-%s%s-%s.log" % (kind, os_name, instance, c_msg, utils.TimestampForFilename())) return utils.PathJoin(pathutils.LOG_OS_DIR, base) def InstanceOsAdd(instance, reinstall, debug): """Add an OS to an instance. @type instance: L{objects.Instance} @param instance: Instance whose OS is to be installed @type reinstall: boolean @param reinstall: whether this is an instance reinstall @type debug: integer @param debug: debug level, passed to the OS scripts @rtype: None """ inst_os = OSFromDisk(instance.os) create_env = OSEnvironment(instance, inst_os, debug) if reinstall: create_env["INSTANCE_REINSTALL"] = "1" logfile = _InstanceLogName("add", instance.os, instance.name, None) result = utils.RunCmd([inst_os.create_script], env=create_env, cwd=inst_os.path, output=logfile, reset_env=True) if result.failed: logging.error("os create command '%s' returned error: %s, logfile: %s," " output: %s", result.cmd, result.fail_reason, logfile, result.output) lines = [utils.SafeEncode(val) for val in utils.TailFile(logfile, lines=20)] _Fail("OS create script failed (%s), last lines in the" " log file:\n%s", result.fail_reason, "\n".join(lines), log=False) def RunRenameInstance(instance, old_name, debug): """Run the OS rename script for an instance. @type instance: L{objects.Instance} @param instance: Instance whose OS is to be installed @type old_name: string @param old_name: previous instance name @type debug: integer @param debug: debug level, passed to the OS scripts @rtype: boolean @return: the success of the operation """ inst_os = OSFromDisk(instance.os) rename_env = OSEnvironment(instance, inst_os, debug) rename_env["OLD_INSTANCE_NAME"] = old_name logfile = _InstanceLogName("rename", instance.os, "%s-%s" % (old_name, instance.name), None) result = utils.RunCmd([inst_os.rename_script], env=rename_env, cwd=inst_os.path, output=logfile, reset_env=True) if result.failed: logging.error("os create command '%s' returned error: %s output: %s", result.cmd, result.fail_reason, result.output) lines = [utils.SafeEncode(val) for val in utils.TailFile(logfile, lines=20)] _Fail("OS rename script failed (%s), last lines in the" " log file:\n%s", result.fail_reason, "\n".join(lines), log=False) def _GetBlockDevSymlinkPath(instance_name, idx, _dir=None): """Returns symlink path for block device. """ if _dir is None: _dir = pathutils.DISK_LINKS_DIR return utils.PathJoin(_dir, ("%s%s%s" % (instance_name, constants.DISK_SEPARATOR, idx))) def _SymlinkBlockDev(instance_name, device_path, idx): """Set up symlinks to a instance's block device. This is an auxiliary function run when an instance is start (on the primary node) or when an instance is migrated (on the target node). @param instance_name: the name of the target instance @param device_path: path of the physical block device, on the node @param idx: the disk index @return: absolute path to the disk's symlink """ link_name = _GetBlockDevSymlinkPath(instance_name, idx) try: os.symlink(device_path, link_name) except OSError, err: if err.errno == errno.EEXIST: if (not os.path.islink(link_name) or os.readlink(link_name) != device_path): os.remove(link_name) os.symlink(device_path, link_name) else: raise return link_name def _RemoveBlockDevLinks(instance_name, disks): """Remove the block device symlinks belonging to the given instance. """ for idx, _ in enumerate(disks): link_name = _GetBlockDevSymlinkPath(instance_name, idx) if os.path.islink(link_name): try: os.remove(link_name) except OSError: logging.exception("Can't remove symlink '%s'", link_name) def _GatherAndLinkBlockDevs(instance): """Set up an instance's block device(s). This is run on the primary node at instance startup. The block devices must be already assembled. @type instance: L{objects.Instance} @param instance: the instance whose disks we shoul assemble @rtype: list @return: list of (disk_object, device_path) """ block_devices = [] for idx, disk in enumerate(instance.disks): device = _RecursiveFindBD(disk) if device is None: raise errors.BlockDeviceError("Block device '%s' is not set up." % str(disk)) device.Open() try: link_name = _SymlinkBlockDev(instance.name, device.dev_path, idx) except OSError, e: raise errors.BlockDeviceError("Cannot create block device symlink: %s" % e.strerror) block_devices.append((disk, link_name)) return block_devices def StartInstance(instance, startup_paused, reason, store_reason=True): """Start an instance. @type instance: L{objects.Instance} @param instance: the instance object @type startup_paused: bool @param instance: pause instance at startup? @type reason: list of reasons @param reason: the reason trail for this startup @type store_reason: boolean @param store_reason: whether to store the shutdown reason trail on file @rtype: None """ running_instances = GetInstanceListForHypervisor(instance.hypervisor, instance.hvparams) if instance.name in running_instances: logging.info("Instance %s already running, not starting", instance.name) return try: block_devices = _GatherAndLinkBlockDevs(instance) hyper = hypervisor.GetHypervisor(instance.hypervisor) hyper.StartInstance(instance, block_devices, startup_paused) if store_reason: _StoreInstReasonTrail(instance.name, reason) except errors.BlockDeviceError, err: _Fail("Block device error: %s", err, exc=True) except errors.HypervisorError, err: _RemoveBlockDevLinks(instance.name, instance.disks) _Fail("Hypervisor error: %s", err, exc=True) def InstanceShutdown(instance, timeout, reason, store_reason=True): """Shut an instance down. @note: this functions uses polling with a hardcoded timeout. @type instance: L{objects.Instance} @param instance: the instance object @type timeout: integer @param timeout: maximum timeout for soft shutdown @type reason: list of reasons @param reason: the reason trail for this shutdown @type store_reason: boolean @param store_reason: whether to store the shutdown reason trail on file @rtype: None """ hv_name = instance.hypervisor hyper = hypervisor.GetHypervisor(hv_name) iname = instance.name if instance.name not in hyper.ListInstances(instance.hvparams): logging.info("Instance %s not running, doing nothing", iname) return class _TryShutdown: def __init__(self): self.tried_once = False def __call__(self): if iname not in hyper.ListInstances(instance.hvparams): return try: hyper.StopInstance(instance, retry=self.tried_once) if store_reason: _StoreInstReasonTrail(instance.name, reason) except errors.HypervisorError, err: if iname not in hyper.ListInstances(instance.hvparams): # if the instance is no longer existing, consider this a # success and go to cleanup return _Fail("Failed to stop instance %s: %s", iname, err) self.tried_once = True raise utils.RetryAgain() try: utils.Retry(_TryShutdown(), 5, timeout) except utils.RetryTimeout: # the shutdown did not succeed logging.error("Shutdown of '%s' unsuccessful, forcing", iname) try: hyper.StopInstance(instance, force=True) except errors.HypervisorError, err: if iname in hyper.ListInstances(instance.hvparams): # only raise an error if the instance still exists, otherwise # the error could simply be "instance ... unknown"! _Fail("Failed to force stop instance %s: %s", iname, err) time.sleep(1) if iname in hyper.ListInstances(instance.hvparams): _Fail("Could not shutdown instance %s even by destroy", iname) try: hyper.CleanupInstance(instance.name) except errors.HypervisorError, err: logging.warning("Failed to execute post-shutdown cleanup step: %s", err) _RemoveBlockDevLinks(iname, instance.disks) def InstanceReboot(instance, reboot_type, shutdown_timeout, reason): """Reboot an instance. @type instance: L{objects.Instance} @param instance: the instance object to reboot @type reboot_type: str @param reboot_type: the type of reboot, one the following constants: - L{constants.INSTANCE_REBOOT_SOFT}: only reboot the instance OS, do not recreate the VM - L{constants.INSTANCE_REBOOT_HARD}: tear down and restart the VM (at the hypervisor level) - the other reboot type (L{constants.INSTANCE_REBOOT_FULL}) is not accepted here, since that mode is handled differently, in cmdlib, and translates into full stop and start of the instance (instead of a call_instance_reboot RPC) @type shutdown_timeout: integer @param shutdown_timeout: maximum timeout for soft shutdown @type reason: list of reasons @param reason: the reason trail for this reboot @rtype: None """ running_instances = GetInstanceListForHypervisor(instance.hypervisor, instance.hvparams) if instance.name not in running_instances: _Fail("Cannot reboot instance %s that is not running", instance.name) hyper = hypervisor.GetHypervisor(instance.hypervisor) if reboot_type == constants.INSTANCE_REBOOT_SOFT: try: hyper.RebootInstance(instance) except errors.HypervisorError, err: _Fail("Failed to soft reboot instance %s: %s", instance.name, err) elif reboot_type == constants.INSTANCE_REBOOT_HARD: try: InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False) result = StartInstance(instance, False, reason, store_reason=False) _StoreInstReasonTrail(instance.name, reason) return result except errors.HypervisorError, err: _Fail("Failed to hard reboot instance %s: %s", instance.name, err) else: _Fail("Invalid reboot_type received: %s", reboot_type) def InstanceBalloonMemory(instance, memory): """Resize an instance's memory. @type instance: L{objects.Instance} @param instance: the instance object @type memory: int @param memory: new memory amount in MB @rtype: None """ hyper = hypervisor.GetHypervisor(instance.hypervisor) running = hyper.ListInstances(instance.hvparams) if instance.name not in running: logging.info("Instance %s is not running, cannot balloon", instance.name) return try: hyper.BalloonInstanceMemory(instance, memory) except errors.HypervisorError, err: _Fail("Failed to balloon instance memory: %s", err, exc=True) def MigrationInfo(instance): """Gather information about an instance to be migrated. @type instance: L{objects.Instance} @param instance: the instance definition """ hyper = hypervisor.GetHypervisor(instance.hypervisor) try: info = hyper.MigrationInfo(instance) except errors.HypervisorError, err: _Fail("Failed to fetch migration information: %s", err, exc=True) return info def AcceptInstance(instance, info, target): """Prepare the node to accept an instance. @type instance: L{objects.Instance} @param instance: the instance definition @type info: string/data (opaque) @param info: migration information, from the source node @type target: string @param target: target host (usually ip), on this node """ # TODO: why is this required only for DTS_EXT_MIRROR? if instance.disk_template in constants.DTS_EXT_MIRROR: # Create the symlinks, as the disks are not active # in any way try: _GatherAndLinkBlockDevs(instance) except errors.BlockDeviceError, err: _Fail("Block device error: %s", err, exc=True) hyper = hypervisor.GetHypervisor(instance.hypervisor) try: hyper.AcceptInstance(instance, info, target) except errors.HypervisorError, err: if instance.disk_template in constants.DTS_EXT_MIRROR: _RemoveBlockDevLinks(instance.name, instance.disks) _Fail("Failed to accept instance: %s", err, exc=True) def FinalizeMigrationDst(instance, info, success): """Finalize any preparation to accept an instance. @type instance: L{objects.Instance} @param instance: the instance definition @type info: string/data (opaque) @param info: migration information, from the source node @type success: boolean @param success: whether the migration was a success or a failure """ hyper = hypervisor.GetHypervisor(instance.hypervisor) try: hyper.FinalizeMigrationDst(instance, info, success) except errors.HypervisorError, err: _Fail("Failed to finalize migration on the target node: %s", err, exc=True) def MigrateInstance(cluster_name, instance, target, live): """Migrates an instance to another node. @type cluster_name: string @param cluster_name: name of the cluster @type instance: L{objects.Instance} @param instance: the instance definition @type target: string @param target: the target node name @type live: boolean @param live: whether the migration should be done live or not (the interpretation of this parameter is left to the hypervisor) @raise RPCFail: if migration fails for some reason """ hyper = hypervisor.GetHypervisor(instance.hypervisor) try: hyper.MigrateInstance(cluster_name, instance, target, live) except errors.HypervisorError, err: _Fail("Failed to migrate instance: %s", err, exc=True) def FinalizeMigrationSource(instance, success, live): """Finalize the instance migration on the source node. @type instance: L{objects.Instance} @param instance: the instance definition of the migrated instance @type success: bool @param success: whether the migration succeeded or not @type live: bool @param live: whether the user requested a live migration or not @raise RPCFail: If the execution fails for some reason """ hyper = hypervisor.GetHypervisor(instance.hypervisor) try: hyper.FinalizeMigrationSource(instance, success, live) except Exception, err: # pylint: disable=W0703 _Fail("Failed to finalize the migration on the source node: %s", err, exc=True) def GetMigrationStatus(instance): """Get the migration status @type instance: L{objects.Instance} @param instance: the instance that is being migrated @rtype: L{objects.MigrationStatus} @return: the status of the current migration (one of L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional progress info that can be retrieved from the hypervisor @raise RPCFail: If the migration status cannot be retrieved """ hyper = hypervisor.GetHypervisor(instance.hypervisor) try: return hyper.GetMigrationStatus(instance) except Exception, err: # pylint: disable=W0703 _Fail("Failed to get migration status: %s", err, exc=True) def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor): """Creates a block device for an instance. @type disk: L{objects.Disk} @param disk: the object describing the disk we should create @type size: int @param size: the size of the physical underlying device, in MiB @type owner: str @param owner: the name of the instance for which disk is created, used for device cache data @type on_primary: boolean @param on_primary: indicates if it is the primary node or not @type info: string @param info: string that will be sent to the physical device creation, used for example to set (LVM) tags on LVs @type excl_stor: boolean @param excl_stor: Whether exclusive_storage is active @return: the new unique_id of the device (this can sometime be computed only after creation), or None. On secondary nodes, it's not required to return anything. """ # TODO: remove the obsolete "size" argument # pylint: disable=W0613 clist = [] if disk.children: for child in disk.children: try: crdev = _RecursiveAssembleBD(child, owner, on_primary) except errors.BlockDeviceError, err: _Fail("Can't assemble device %s: %s", child, err) if on_primary or disk.AssembleOnSecondary(): # we need the children open in case the device itself has to # be assembled try: # pylint: disable=E1103 crdev.Open() except errors.BlockDeviceError, err: _Fail("Can't make child '%s' read-write: %s", child, err) clist.append(crdev) try: device = bdev.Create(disk, clist, excl_stor) except errors.BlockDeviceError, err: _Fail("Can't create block device: %s", err) if on_primary or disk.AssembleOnSecondary(): try: device.Assemble() except errors.BlockDeviceError, err: _Fail("Can't assemble device after creation, unusual event: %s", err) if on_primary or disk.OpenOnSecondary(): try: device.Open(force=True) except errors.BlockDeviceError, err: _Fail("Can't make device r/w after creation, unusual event: %s", err) DevCacheManager.UpdateCache(device.dev_path, owner, on_primary, disk.iv_name) device.SetInfo(info) return device.unique_id def _WipeDevice(path, offset, size): """This function actually wipes the device. @param path: The path to the device to wipe @param offset: The offset in MiB in the file @param size: The size in MiB to write """ # Internal sizes are always in Mebibytes; if the following "dd" command # should use a different block size the offset and size given to this # function must be adjusted accordingly before being passed to "dd". block_size = 1024 * 1024 cmd = [constants.DD_CMD, "if=/dev/zero", "seek=%d" % offset, "bs=%s" % block_size, "oflag=direct", "of=%s" % path, "count=%d" % size] result = utils.RunCmd(cmd) if result.failed: _Fail("Wipe command '%s' exited with error: %s; output: %s", result.cmd, result.fail_reason, result.output) def BlockdevWipe(disk, offset, size): """Wipes a block device. @type disk: L{objects.Disk} @param disk: the disk object we want to wipe @type offset: int @param offset: The offset in MiB in the file @type size: int @param size: The size in MiB to write """ try: rdev = _RecursiveFindBD(disk) except errors.BlockDeviceError: rdev = None if not rdev: _Fail("Cannot execute wipe for device %s: device not found", disk.iv_name) # Do cross verify some of the parameters if offset < 0: _Fail("Negative offset") if size < 0: _Fail("Negative size") if offset > rdev.size: _Fail("Offset is bigger than device size") if (offset + size) > rdev.size: _Fail("The provided offset and size to wipe is bigger than device size") _WipeDevice(rdev.dev_path, offset, size) def BlockdevPauseResumeSync(disks, pause): """Pause or resume the sync of the block device. @type disks: list of L{objects.Disk} @param disks: the disks object we want to pause/resume @type pause: bool @param pause: Wheater to pause or resume """ success = [] for disk in disks: try: rdev = _RecursiveFindBD(disk) except errors.BlockDeviceError: rdev = None if not rdev: success.append((False, ("Cannot change sync for device %s:" " device not found" % disk.iv_name))) continue result = rdev.PauseResumeSync(pause) if result: success.append((result, None)) else: if pause: msg = "Pause" else: msg = "Resume" success.append((result, "%s for device %s failed" % (msg, disk.iv_name))) return success def BlockdevRemove(disk): """Remove a block device. @note: This is intended to be called recursively. @type disk: L{objects.Disk} @param disk: the disk object we should remove @rtype: boolean @return: the success of the operation """ msgs = [] try: rdev = _RecursiveFindBD(disk) except errors.BlockDeviceError, err: # probably can't attach logging.info("Can't attach to device %s in remove", disk) rdev = None if rdev is not None: r_path = rdev.dev_path try: rdev.Remove() except errors.BlockDeviceError, err: msgs.append(str(err)) if not msgs: DevCacheManager.RemoveCache(r_path) if disk.children: for child in disk.children: try: BlockdevRemove(child) except RPCFail, err: msgs.append(str(err)) if msgs: _Fail("; ".join(msgs)) def _RecursiveAssembleBD(disk, owner, as_primary): """Activate a block device for an instance. This is run on the primary and secondary nodes for an instance. @note: this function is called recursively. @type disk: L{objects.Disk} @param disk: the disk we try to assemble @type owner: str @param owner: the name of the instance which owns the disk @type as_primary: boolean @param as_primary: if we should make the block device read/write @return: the assembled device or None (in case no device was assembled) @raise errors.BlockDeviceError: in case there is an error during the activation of the children or the device itself """ children = [] if disk.children: mcn = disk.ChildrenNeeded() if mcn == -1: mcn = 0 # max number of Nones allowed else: mcn = len(disk.children) - mcn # max number of Nones for chld_disk in disk.children: try: cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary) except errors.BlockDeviceError, err: if children.count(None) >= mcn: raise cdev = None logging.error("Error in child activation (but continuing): %s", str(err)) children.append(cdev) if as_primary or disk.AssembleOnSecondary(): r_dev = bdev.Assemble(disk, children) result = r_dev if as_primary or disk.OpenOnSecondary(): r_dev.Open() DevCacheManager.UpdateCache(r_dev.dev_path, owner, as_primary, disk.iv_name) else: result = True return result def BlockdevAssemble(disk, owner, as_primary, idx): """Activate a block device for an instance. This is a wrapper over _RecursiveAssembleBD. @rtype: str or boolean @return: a C{/dev/...} path for primary nodes, and C{True} for secondary nodes """ try: result = _RecursiveAssembleBD(disk, owner, as_primary) if isinstance(result, BlockDev): # pylint: disable=E1103 result = result.dev_path if as_primary: _SymlinkBlockDev(owner, result, idx) except errors.BlockDeviceError, err: _Fail("Error while assembling disk: %s", err, exc=True) except OSError, err: _Fail("Error while symlinking disk: %s", err, exc=True) return result def BlockdevShutdown(disk): """Shut down a block device. First, if the device is assembled (Attach() is successful), then the device is shutdown. Then the children of the device are shutdown. This function is called recursively. Note that we don't cache the children or such, as oppossed to assemble, shutdown of different devices doesn't require that the upper device was active. @type disk: L{objects.Disk} @param disk: the description of the disk we should shutdown @rtype: None """ msgs = [] r_dev = _RecursiveFindBD(disk) if r_dev is not None: r_path = r_dev.dev_path try: r_dev.Shutdown() DevCacheManager.RemoveCache(r_path) except errors.BlockDeviceError, err: msgs.append(str(err)) if disk.children: for child in disk.children: try: BlockdevShutdown(child) except RPCFail, err: msgs.append(str(err)) if msgs: _Fail("; ".join(msgs)) def BlockdevAddchildren(parent_cdev, new_cdevs): """Extend a mirrored block device. @type parent_cdev: L{objects.Disk} @param parent_cdev: the disk to which we should add children @type new_cdevs: list of L{objects.Disk} @param new_cdevs: the list of children which we should add @rtype: None """ parent_bdev = _RecursiveFindBD(parent_cdev) if parent_bdev is None: _Fail("Can't find parent device '%s' in add children", parent_cdev) new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs] if new_bdevs.count(None) > 0: _Fail("Can't find new device(s) to add: %s:%s", new_bdevs, new_cdevs) parent_bdev.AddChildren(new_bdevs) def BlockdevRemovechildren(parent_cdev, new_cdevs): """Shrink a mirrored block device. @type parent_cdev: L{objects.Disk} @param parent_cdev: the disk from which we should remove children @type new_cdevs: list of L{objects.Disk} @param new_cdevs: the list of children which we should remove @rtype: None """ parent_bdev = _RecursiveFindBD(parent_cdev) if parent_bdev is None: _Fail("Can't find parent device '%s' in remove children", parent_cdev) devs = [] for disk in new_cdevs: rpath = disk.StaticDevPath() if rpath is None: bd = _RecursiveFindBD(disk) if bd is None: _Fail("Can't find device %s while removing children", disk) else: devs.append(bd.dev_path) else: if not utils.IsNormAbsPath(rpath): _Fail("Strange path returned from StaticDevPath: '%s'", rpath) devs.append(rpath) parent_bdev.RemoveChildren(devs) def BlockdevGetmirrorstatus(disks): """Get the mirroring status of a list of devices. @type disks: list of L{objects.Disk} @param disks: the list of disks which we should query @rtype: disk @return: List of L{objects.BlockDevStatus}, one for each disk @raise errors.BlockDeviceError: if any of the disks cannot be found """ stats = [] for dsk in disks: rbd = _RecursiveFindBD(dsk) if rbd is None: _Fail("Can't find device %s", dsk) stats.append(rbd.CombinedSyncStatus()) return stats def BlockdevGetmirrorstatusMulti(disks): """Get the mirroring status of a list of devices. @type disks: list of L{objects.Disk} @param disks: the list of disks which we should query @rtype: disk @return: List of tuples, (bool, status), one for each disk; bool denotes success/failure, status is L{objects.BlockDevStatus} on success, string otherwise """ result = [] for disk in disks: try: rbd = _RecursiveFindBD(disk) if rbd is None: result.append((False, "Can't find device %s" % disk)) continue status = rbd.CombinedSyncStatus() except errors.BlockDeviceError, err: logging.exception("Error while getting disk status") result.append((False, str(err))) else: result.append((True, status)) assert len(disks) == len(result) return result def _RecursiveFindBD(disk): """Check if a device is activated. If so, return information about the real device. @type disk: L{objects.Disk} @param disk: the disk object we need to find @return: None if the device can't be found, otherwise the device instance """ children = [] if disk.children: for chdisk in disk.children: children.append(_RecursiveFindBD(chdisk)) return bdev.FindDevice(disk, children) def _OpenRealBD(disk): """Opens the underlying block device of a disk. @type disk: L{objects.Disk} @param disk: the disk object we want to open """ real_disk = _RecursiveFindBD(disk) if real_disk is None: _Fail("Block device '%s' is not set up", disk) real_disk.Open() return real_disk def BlockdevFind(disk): """Check if a device is activated. If it is, return information about the real device. @type disk: L{objects.Disk} @param disk: the disk to find @rtype: None or objects.BlockDevStatus @return: None if the disk cannot be found, otherwise a the current information """ try: rbd = _RecursiveFindBD(disk) except errors.BlockDeviceError, err: _Fail("Failed to find device: %s", err, exc=True) if rbd is None: return None return rbd.GetSyncStatus() def BlockdevGetdimensions(disks): """Computes the size of the given disks. If a disk is not found, returns None instead. @type disks: list of L{objects.Disk} @param disks: the list of disk to compute the size for @rtype: list @return: list with elements None if the disk cannot be found, otherwise the pair (size, spindles), where spindles is None if the device doesn't support that """ result = [] for cf in disks: try: rbd = _RecursiveFindBD(cf) except errors.BlockDeviceError: result.append(None) continue if rbd is None: result.append(None) else: result.append(rbd.GetActualDimensions()) return result def BlockdevExport(disk, dest_node_ip, dest_path, cluster_name): """Export a block device to a remote node. @type disk: L{objects.Disk} @param disk: the description of the disk to export @type dest_node_ip: str @param dest_node_ip: the destination node IP to export to @type dest_path: str @param dest_path: the destination path on the target node @type cluster_name: str @param cluster_name: the cluster name, needed for SSH hostalias @rtype: None """ real_disk = _OpenRealBD(disk) # the block size on the read dd is 1MiB to match our units expcmd = utils.BuildShellCmd("set -e; set -o pipefail; " "dd if=%s bs=1048576 count=%s", real_disk.dev_path, str(disk.size)) # we set here a smaller block size as, due to ssh buffering, more # than 64-128k will mostly ignored; we use nocreat to fail if the # device is not already there or we pass a wrong path; we use # notrunc to no attempt truncate on an LV device; we use oflag=dsync # to not buffer too much memory; this means that at best, we flush # every 64k, which will not be very fast destcmd = utils.BuildShellCmd("dd of=%s conv=nocreat,notrunc bs=65536" " oflag=dsync", dest_path) remotecmd = _GetSshRunner(cluster_name).BuildCmd(dest_node_ip, constants.SSH_LOGIN_USER, destcmd) # all commands have been checked, so we're safe to combine them command = "|".join([expcmd, utils.ShellQuoteArgs(remotecmd)]) result = utils.RunCmd(["bash", "-c", command]) if result.failed: _Fail("Disk copy command '%s' returned error: %s" " output: %s", command, result.fail_reason, result.output) def UploadFile(file_name, data, mode, uid, gid, atime, mtime): """Write a file to the filesystem. This allows the master to overwrite(!) a file. It will only perform the operation if the file belongs to a list of configuration files. @type file_name: str @param file_name: the target file name @type data: str @param data: the new contents of the file @type mode: int @param mode: the mode to give the file (can be None) @type uid: string @param uid: the owner of the file @type gid: string @param gid: the group of the file @type atime: float @param atime: the atime to set on the file (can be None) @type mtime: float @param mtime: the mtime to set on the file (can be None) @rtype: None """ file_name = vcluster.LocalizeVirtualPath(file_name) if not os.path.isabs(file_name): _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name) if file_name not in _ALLOWED_UPLOAD_FILES: _Fail("Filename passed to UploadFile not in allowed upload targets: '%s'", file_name) raw_data = _Decompress(data) if not (isinstance(uid, basestring) and isinstance(gid, basestring)): _Fail("Invalid username/groupname type") getents = runtime.GetEnts() uid = getents.LookupUser(uid) gid = getents.LookupGroup(gid) utils.SafeWriteFile(file_name, None, data=raw_data, mode=mode, uid=uid, gid=gid, atime=atime, mtime=mtime) def RunOob(oob_program, command, node, timeout): """Executes oob_program with given command on given node. @param oob_program: The path to the executable oob_program @param command: The command to invoke on oob_program @param node: The node given as an argument to the program @param timeout: Timeout after which we kill the oob program @return: stdout @raise RPCFail: If execution fails for some reason """ result = utils.RunCmd([oob_program, command, node], timeout=timeout) if result.failed: _Fail("'%s' failed with reason '%s'; output: %s", result.cmd, result.fail_reason, result.output) return result.stdout def _OSOndiskAPIVersion(os_dir): """Compute and return the API version of a given OS. This function will try to read the API version of the OS residing in the 'os_dir' directory. @type os_dir: str @param os_dir: the directory in which we should look for the OS @rtype: tuple @return: tuple (status, data) with status denoting the validity and data holding either the vaid versions or an error message """ api_file = utils.PathJoin(os_dir, constants.OS_API_FILE) try: st = os.stat(api_file) except EnvironmentError, err: return False, ("Required file '%s' not found under path %s: %s" % (constants.OS_API_FILE, os_dir, utils.ErrnoOrStr(err))) if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): return False, ("File '%s' in %s is not a regular file" % (constants.OS_API_FILE, os_dir)) try: api_versions = utils.ReadFile(api_file).splitlines() except EnvironmentError, err: return False, ("Error while reading the API version file at %s: %s" % (api_file, utils.ErrnoOrStr(err))) try: api_versions = [int(version.strip()) for version in api_versions] except (TypeError, ValueError), err: return False, ("API version(s) can't be converted to integer: %s" % str(err)) return True, api_versions def DiagnoseOS(top_dirs=None): """Compute the validity for all OSes. @type top_dirs: list @param top_dirs: the list of directories in which to search (if not given defaults to L{pathutils.OS_SEARCH_PATH}) @rtype: list of L{objects.OS} @return: a list of tuples (name, path, status, diagnose, variants, parameters, api_version) for all (potential) OSes under all search paths, where: - name is the (potential) OS name - path is the full path to the OS - status True/False is the validity of the OS - diagnose is the error message for an invalid OS, otherwise empty - variants is a list of supported OS variants, if any - parameters is a list of (name, help) parameters, if any - api_version is a list of support OS API versions """ if top_dirs is None: top_dirs = pathutils.OS_SEARCH_PATH result = [] for dir_name in top_dirs: if os.path.isdir(dir_name): try: f_names = utils.ListVisibleFiles(dir_name) except EnvironmentError, err: logging.exception("Can't list the OS directory %s: %s", dir_name, err) break for name in f_names: os_path = utils.PathJoin(dir_name, name) status, os_inst = _TryOSFromDisk(name, base_dir=dir_name) if status: diagnose = "" variants = os_inst.supported_variants parameters = os_inst.supported_parameters api_versions = os_inst.api_versions else: diagnose = os_inst variants = parameters = api_versions = [] result.append((name, os_path, status, diagnose, variants, parameters, api_versions)) return result def _TryOSFromDisk(name, base_dir=None): """Create an OS instance from disk. This function will return an OS instance if the given name is a valid OS name. @type base_dir: string @keyword base_dir: Base directory containing OS installations. Defaults to a search in all the OS_SEARCH_PATH dirs. @rtype: tuple @return: success and either the OS instance if we find a valid one, or error message """ if base_dir is None: os_dir = utils.FindFile(name, pathutils.OS_SEARCH_PATH, os.path.isdir) else: os_dir = utils.FindFile(name, [base_dir], os.path.isdir) if os_dir is None: return False, "Directory for OS %s not found in search path" % name status, api_versions = _OSOndiskAPIVersion(os_dir) if not status: # push the error up return status, api_versions if not constants.OS_API_VERSIONS.intersection(api_versions): return False, ("API version mismatch for path '%s': found %s, want %s." % (os_dir, api_versions, constants.OS_API_VERSIONS)) # OS Files dictionary, we will populate it with the absolute path # names; if the value is True, then it is a required file, otherwise # an optional one os_files = dict.fromkeys(constants.OS_SCRIPTS, True) if max(api_versions) >= constants.OS_API_V15: os_files[constants.OS_VARIANTS_FILE] = False if max(api_versions) >= constants.OS_API_V20: os_files[constants.OS_PARAMETERS_FILE] = True else: del os_files[constants.OS_SCRIPT_VERIFY] for (filename, required) in os_files.items(): os_files[filename] = utils.PathJoin(os_dir, filename) try: st = os.stat(os_files[filename]) except EnvironmentError, err: if err.errno == errno.ENOENT and not required: del os_files[filename] continue return False, ("File '%s' under path '%s' is missing (%s)" % (filename, os_dir, utils.ErrnoOrStr(err))) if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): return False, ("File '%s' under path '%s' is not a regular file" % (filename, os_dir)) if filename in constants.OS_SCRIPTS: if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR: return False, ("File '%s' under path '%s' is not executable" % (filename, os_dir)) variants = [] if constants.OS_VARIANTS_FILE in os_files: variants_file = os_files[constants.OS_VARIANTS_FILE] try: variants = \ utils.FilterEmptyLinesAndComments(utils.ReadFile(variants_file)) except EnvironmentError, err: # we accept missing files, but not other errors if err.errno != errno.ENOENT: return False, ("Error while reading the OS variants file at %s: %s" % (variants_file, utils.ErrnoOrStr(err))) parameters = [] if constants.OS_PARAMETERS_FILE in os_files: parameters_file = os_files[constants.OS_PARAMETERS_FILE] try: parameters = utils.ReadFile(parameters_file).splitlines() except EnvironmentError, err: return False, ("Error while reading the OS parameters file at %s: %s" % (parameters_file, utils.ErrnoOrStr(err))) parameters = [v.split(None, 1) for v in parameters] os_obj = objects.OS(name=name, path=os_dir, create_script=os_files[constants.OS_SCRIPT_CREATE], export_script=os_files[constants.OS_SCRIPT_EXPORT], import_script=os_files[constants.OS_SCRIPT_IMPORT], rename_script=os_files[constants.OS_SCRIPT_RENAME], verify_script=os_files.get(constants.OS_SCRIPT_VERIFY, None), supported_variants=variants, supported_parameters=parameters, api_versions=api_versions) return True, os_obj def OSFromDisk(name, base_dir=None): """Create an OS instance from disk. This function will return an OS instance if the given name is a valid OS name. Otherwise, it will raise an appropriate L{RPCFail} exception, detailing why this is not a valid OS. This is just a wrapper over L{_TryOSFromDisk}, which doesn't raise an exception but returns true/false status data. @type base_dir: string @keyword base_dir: Base directory containing OS installations. Defaults to a search in all the OS_SEARCH_PATH dirs. @rtype: L{objects.OS} @return: the OS instance if we find a valid one @raise RPCFail: if we don't find a valid OS """ name_only = objects.OS.GetName(name) status, payload = _TryOSFromDisk(name_only, base_dir) if not status: _Fail(payload) return payload def OSCoreEnv(os_name, inst_os, os_params, debug=0): """Calculate the basic environment for an os script. @type os_name: str @param os_name: full operating system name (including variant) @type inst_os: L{objects.OS} @param inst_os: operating system for which the environment is being built @type os_params: dict @param os_params: the OS parameters @type debug: integer @param debug: debug level (0 or 1, for OS Api 10) @rtype: dict @return: dict of environment variables @raise errors.BlockDeviceError: if the block device cannot be found """ result = {} api_version = \ max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions)) result["OS_API_VERSION"] = "%d" % api_version result["OS_NAME"] = inst_os.name result["DEBUG_LEVEL"] = "%d" % debug # OS variants if api_version >= constants.OS_API_V15 and inst_os.supported_variants: variant = objects.OS.GetVariant(os_name) if not variant: variant = inst_os.supported_variants[0] else: variant = "" result["OS_VARIANT"] = variant # OS params for pname, pvalue in os_params.items(): result["OSP_%s" % pname.upper()] = pvalue # Set a default path otherwise programs called by OS scripts (or # even hooks called from OS scripts) might break, and we don't want # to have each script require setting a PATH variable result["PATH"] = constants.HOOKS_PATH return result def OSEnvironment(instance, inst_os, debug=0): """Calculate the environment for an os script. @type instance: L{objects.Instance} @param instance: target instance for the os script run @type inst_os: L{objects.OS} @param inst_os: operating system for which the environment is being built @type debug: integer @param debug: debug level (0 or 1, for OS Api 10) @rtype: dict @return: dict of environment variables @raise errors.BlockDeviceError: if the block device cannot be found """ result = OSCoreEnv(instance.os, inst_os, instance.osparams, debug=debug) for attr in ["name", "os", "uuid", "ctime", "mtime", "primary_node"]: result["INSTANCE_%s" % attr.upper()] = str(getattr(instance, attr)) result["HYPERVISOR"] = instance.hypervisor result["DISK_COUNT"] = "%d" % len(instance.disks) result["NIC_COUNT"] = "%d" % len(instance.nics) result["INSTANCE_SECONDARY_NODES"] = \ ("%s" % " ".join(instance.secondary_nodes)) # Disks for idx, disk in enumerate(instance.disks): real_disk = _OpenRealBD(disk) result["DISK_%d_PATH" % idx] = real_disk.dev_path result["DISK_%d_ACCESS" % idx] = disk.mode result["DISK_%d_UUID" % idx] = disk.uuid if disk.name: result["DISK_%d_NAME" % idx] = disk.name if constants.HV_DISK_TYPE in instance.hvparams: result["DISK_%d_FRONTEND_TYPE" % idx] = \ instance.hvparams[constants.HV_DISK_TYPE] if disk.dev_type in constants.DTS_BLOCK: result["DISK_%d_BACKEND_TYPE" % idx] = "block" elif disk.dev_type in [constants.DT_FILE, constants.DT_SHARED_FILE]: result["DISK_%d_BACKEND_TYPE" % idx] = \ "file:%s" % disk.physical_id[0] # NICs for idx, nic in enumerate(instance.nics): result["NIC_%d_MAC" % idx] = nic.mac result["NIC_%d_UUID" % idx] = nic.uuid if nic.name: result["NIC_%d_NAME" % idx] = nic.name if nic.ip: result["NIC_%d_IP" % idx] = nic.ip result["NIC_%d_MODE" % idx] = nic.nicparams[constants.NIC_MODE] if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: result["NIC_%d_BRIDGE" % idx] = nic.nicparams[constants.NIC_LINK] if nic.nicparams[constants.NIC_LINK]: result["NIC_%d_LINK" % idx] = nic.nicparams[constants.NIC_LINK] if nic.netinfo: nobj = objects.Network.FromDict(nic.netinfo) result.update(nobj.HooksDict("NIC_%d_" % idx)) if constants.HV_NIC_TYPE in instance.hvparams: result["NIC_%d_FRONTEND_TYPE" % idx] = \ instance.hvparams[constants.HV_NIC_TYPE] # HV/BE params for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]: for key, value in source.items(): result["INSTANCE_%s_%s" % (kind, key)] = str(value) return result def DiagnoseExtStorage(top_dirs=None): """Compute the validity for all ExtStorage Providers. @type top_dirs: list @param top_dirs: the list of directories in which to search (if not given defaults to L{pathutils.ES_SEARCH_PATH}) @rtype: list of L{objects.ExtStorage} @return: a list of tuples (name, path, status, diagnose, parameters) for all (potential) ExtStorage Providers under all search paths, where: - name is the (potential) ExtStorage Provider - path is the full path to the ExtStorage Provider - status True/False is the validity of the ExtStorage Provider - diagnose is the error message for an invalid ExtStorage Provider, otherwise empty - parameters is a list of (name, help) parameters, if any """ if top_dirs is None: top_dirs = pathutils.ES_SEARCH_PATH result = [] for dir_name in top_dirs: if os.path.isdir(dir_name): try: f_names = utils.ListVisibleFiles(dir_name) except EnvironmentError, err: logging.exception("Can't list the ExtStorage directory %s: %s", dir_name, err) break for name in f_names: es_path = utils.PathJoin(dir_name, name) status, es_inst = bdev.ExtStorageFromDisk(name, base_dir=dir_name) if status: diagnose = "" parameters = es_inst.supported_parameters else: diagnose = es_inst parameters = [] result.append((name, es_path, status, diagnose, parameters)) return result def BlockdevGrow(disk, amount, dryrun, backingstore, excl_stor): """Grow a stack of block devices. This function is called recursively, with the childrens being the first ones to resize. @type disk: L{objects.Disk} @param disk: the disk to be grown @type amount: integer @param amount: the amount (in mebibytes) to grow with @type dryrun: boolean @param dryrun: whether to execute the operation in simulation mode only, without actually increasing the size @param backingstore: whether to execute the operation on backing storage only, or on "logical" storage only; e.g. DRBD is logical storage, whereas LVM, file, RBD are backing storage @rtype: (status, result) @type excl_stor: boolean @param excl_stor: Whether exclusive_storage is active @return: a tuple with the status of the operation (True/False), and the errors message if status is False """ r_dev = _RecursiveFindBD(disk) if r_dev is None: _Fail("Cannot find block device %s", disk) try: r_dev.Grow(amount, dryrun, backingstore, excl_stor) except errors.BlockDeviceError, err: _Fail("Failed to grow block device: %s", err, exc=True) def BlockdevSnapshot(disk): """Create a snapshot copy of a block device. This function is called recursively, and the snapshot is actually created just for the leaf lvm backend device. @type disk: L{objects.Disk} @param disk: the disk to be snapshotted @rtype: string @return: snapshot disk ID as (vg, lv) """ if disk.dev_type == constants.DT_DRBD8: if not disk.children: _Fail("DRBD device '%s' without backing storage cannot be snapshotted", disk.unique_id) return BlockdevSnapshot(disk.children[0]) elif disk.dev_type == constants.DT_PLAIN: r_dev = _RecursiveFindBD(disk) if r_dev is not None: # FIXME: choose a saner value for the snapshot size # let's stay on the safe side and ask for the full size, for now return r_dev.Snapshot(disk.size) else: _Fail("Cannot find block device %s", disk) else: _Fail("Cannot snapshot non-lvm block device '%s' of type '%s'", disk.unique_id, disk.dev_type) def BlockdevSetInfo(disk, info): """Sets 'metadata' information on block devices. This function sets 'info' metadata on block devices. Initial information is set at device creation; this function should be used for example after renames. @type disk: L{objects.Disk} @param disk: the disk to be grown @type info: string @param info: new 'info' metadata @rtype: (status, result) @return: a tuple with the status of the operation (True/False), and the errors message if status is False """ r_dev = _RecursiveFindBD(disk) if r_dev is None: _Fail("Cannot find block device %s", disk) try: r_dev.SetInfo(info) except errors.BlockDeviceError, err: _Fail("Failed to set information on block device: %s", err, exc=True) def FinalizeExport(instance, snap_disks): """Write out the export configuration information. @type instance: L{objects.Instance} @param instance: the instance which we export, used for saving configuration @type snap_disks: list of L{objects.Disk} @param snap_disks: list of snapshot block devices, which will be used to get the actual name of the dump file @rtype: None """ destdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name + ".new") finaldestdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name) config = objects.SerializableConfigParser() config.add_section(constants.INISECT_EXP) config.set(constants.INISECT_EXP, "version", "0") config.set(constants.INISECT_EXP, "timestamp", "%d" % int(time.time())) config.set(constants.INISECT_EXP, "source", instance.primary_node) config.set(constants.INISECT_EXP, "os", instance.os) config.set(constants.INISECT_EXP, "compression", "none") config.add_section(constants.INISECT_INS) config.set(constants.INISECT_INS, "name", instance.name) config.set(constants.INISECT_INS, "maxmem", "%d" % instance.beparams[constants.BE_MAXMEM]) config.set(constants.INISECT_INS, "minmem", "%d" % instance.beparams[constants.BE_MINMEM]) # "memory" is deprecated, but useful for exporting to old ganeti versions config.set(constants.INISECT_INS, "memory", "%d" % instance.beparams[constants.BE_MAXMEM]) config.set(constants.INISECT_INS, "vcpus", "%d" % instance.beparams[constants.BE_VCPUS]) config.set(constants.INISECT_INS, "disk_template", instance.disk_template) config.set(constants.INISECT_INS, "hypervisor", instance.hypervisor) config.set(constants.INISECT_INS, "tags", " ".join(instance.GetTags())) nic_total = 0 for nic_count, nic in enumerate(instance.nics): nic_total += 1 config.set(constants.INISECT_INS, "nic%d_mac" % nic_count, "%s" % nic.mac) config.set(constants.INISECT_INS, "nic%d_ip" % nic_count, "%s" % nic.ip) config.set(constants.INISECT_INS, "nic%d_network" % nic_count, "%s" % nic.network) for param in constants.NICS_PARAMETER_TYPES: config.set(constants.INISECT_INS, "nic%d_%s" % (nic_count, param), "%s" % nic.nicparams.get(param, None)) # TODO: redundant: on load can read nics until it doesn't exist config.set(constants.INISECT_INS, "nic_count", "%d" % nic_total) disk_total = 0 for disk_count, disk in enumerate(snap_disks): if disk: disk_total += 1 config.set(constants.INISECT_INS, "disk%d_ivname" % disk_count, ("%s" % disk.iv_name)) config.set(constants.INISECT_INS, "disk%d_dump" % disk_count, ("%s" % disk.physical_id[1])) config.set(constants.INISECT_INS, "disk%d_size" % disk_count, ("%d" % disk.size)) config.set(constants.INISECT_INS, "disk_count", "%d" % disk_total) # New-style hypervisor/backend parameters config.add_section(constants.INISECT_HYP) for name, value in instance.hvparams.items(): if name not in constants.HVC_GLOBALS: config.set(constants.INISECT_HYP, name, str(value)) config.add_section(constants.INISECT_BEP) for name, value in instance.beparams.items(): config.set(constants.INISECT_BEP, name, str(value)) config.add_section(constants.INISECT_OSP) for name, value in instance.osparams.items(): config.set(constants.INISECT_OSP, name, str(value)) utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE), data=config.Dumps()) shutil.rmtree(finaldestdir, ignore_errors=True) shutil.move(destdir, finaldestdir) def ExportInfo(dest): """Get export configuration information. @type dest: str @param dest: directory containing the export @rtype: L{objects.SerializableConfigParser} @return: a serializable config file containing the export info """ cff = utils.PathJoin(dest, constants.EXPORT_CONF_FILE) config = objects.SerializableConfigParser() config.read(cff) if (not config.has_section(constants.INISECT_EXP) or not config.has_section(constants.INISECT_INS)): _Fail("Export info file doesn't have the required fields") return config.Dumps() def ListExports(): """Return a list of exports currently available on this machine. @rtype: list @return: list of the exports """ if os.path.isdir(pathutils.EXPORT_DIR): return sorted(utils.ListVisibleFiles(pathutils.EXPORT_DIR)) else: _Fail("No exports directory") def RemoveExport(export): """Remove an existing export from the node. @type export: str @param export: the name of the export to remove @rtype: None """ target = utils.PathJoin(pathutils.EXPORT_DIR, export) try: shutil.rmtree(target) except EnvironmentError, err: _Fail("Error while removing the export: %s", err, exc=True) def BlockdevRename(devlist): """Rename a list of block devices. @type devlist: list of tuples @param devlist: list of tuples of the form (disk, new_logical_id, new_physical_id); disk is an L{objects.Disk} object describing the current disk, and new logical_id/physical_id is the name we rename it to @rtype: boolean @return: True if all renames succeeded, False otherwise """ msgs = [] result = True for disk, unique_id in devlist: dev = _RecursiveFindBD(disk) if dev is None: msgs.append("Can't find device %s in rename" % str(disk)) result = False continue try: old_rpath = dev.dev_path dev.Rename(unique_id) new_rpath = dev.dev_path if old_rpath != new_rpath: DevCacheManager.RemoveCache(old_rpath) # FIXME: we should add the new cache information here, like: # DevCacheManager.UpdateCache(new_rpath, owner, ...) # but we don't have the owner here - maybe parse from existing # cache? for now, we only lose lvm data when we rename, which # is less critical than DRBD or MD except errors.BlockDeviceError, err: msgs.append("Can't rename device '%s' to '%s': %s" % (dev, unique_id, err)) logging.exception("Can't rename device '%s' to '%s'", dev, unique_id) result = False if not result: _Fail("; ".join(msgs)) def _TransformFileStorageDir(fs_dir): """Checks whether given file_storage_dir is valid. Checks wheter the given fs_dir is within the cluster-wide default file_storage_dir or the shared_file_storage_dir, which are stored in SimpleStore. Only paths under those directories are allowed. @type fs_dir: str @param fs_dir: the path to check @return: the normalized path if valid, None otherwise """ filestorage.CheckFileStoragePath(fs_dir) return os.path.normpath(fs_dir) def CreateFileStorageDir(file_storage_dir): """Create file storage directory. @type file_storage_dir: str @param file_storage_dir: directory to create @rtype: tuple @return: tuple with first element a boolean indicating wheter dir creation was successful or not """ file_storage_dir = _TransformFileStorageDir(file_storage_dir) if os.path.exists(file_storage_dir): if not os.path.isdir(file_storage_dir): _Fail("Specified storage dir '%s' is not a directory", file_storage_dir) else: try: os.makedirs(file_storage_dir, 0750) except OSError, err: _Fail("Cannot create file storage directory '%s': %s", file_storage_dir, err, exc=True) def RemoveFileStorageDir(file_storage_dir): """Remove file storage directory. Remove it only if it's empty. If not log an error and return. @type file_storage_dir: str @param file_storage_dir: the directory we should cleanup @rtype: tuple (success,) @return: tuple of one element, C{success}, denoting whether the operation was successful """ file_storage_dir = _TransformFileStorageDir(file_storage_dir) if os.path.exists(file_storage_dir): if not os.path.isdir(file_storage_dir): _Fail("Specified Storage directory '%s' is not a directory", file_storage_dir) # deletes dir only if empty, otherwise we want to fail the rpc call try: os.rmdir(file_storage_dir) except OSError, err: _Fail("Cannot remove file storage directory '%s': %s", file_storage_dir, err) def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir): """Rename the file storage directory. @type old_file_storage_dir: str @param old_file_storage_dir: the current path @type new_file_storage_dir: str @param new_file_storage_dir: the name we should rename to @rtype: tuple (success,) @return: tuple of one element, C{success}, denoting whether the operation was successful """ old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir) new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir) if not os.path.exists(new_file_storage_dir): if os.path.isdir(old_file_storage_dir): try: os.rename(old_file_storage_dir, new_file_storage_dir) except OSError, err: _Fail("Cannot rename '%s' to '%s': %s", old_file_storage_dir, new_file_storage_dir, err) else: _Fail("Specified storage dir '%s' is not a directory", old_file_storage_dir) else: if os.path.exists(old_file_storage_dir): _Fail("Cannot rename '%s' to '%s': both locations exist", old_file_storage_dir, new_file_storage_dir) def _EnsureJobQueueFile(file_name): """Checks whether the given filename is in the queue directory. @type file_name: str @param file_name: the file name we should check @rtype: None @raises RPCFail: if the file is not valid """ if not utils.IsBelowDir(pathutils.QUEUE_DIR, file_name): _Fail("Passed job queue file '%s' does not belong to" " the queue directory '%s'", file_name, pathutils.QUEUE_DIR) def JobQueueUpdate(file_name, content): """Updates a file in the queue directory. This is just a wrapper over L{utils.io.WriteFile}, with proper checking. @type file_name: str @param file_name: the job file name @type content: str @param content: the new job contents @rtype: boolean @return: the success of the operation """ file_name = vcluster.LocalizeVirtualPath(file_name) _EnsureJobQueueFile(file_name) getents = runtime.GetEnts() # Write and replace the file atomically utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid, gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS) def JobQueueRename(old, new): """Renames a job queue file. This is just a wrapper over os.rename with proper checking. @type old: str @param old: the old (actual) file name @type new: str @param new: the desired file name @rtype: tuple @return: the success of the operation and payload """ old = vcluster.LocalizeVirtualPath(old) new = vcluster.LocalizeVirtualPath(new) _EnsureJobQueueFile(old) _EnsureJobQueueFile(new) getents = runtime.GetEnts() utils.RenameFile(old, new, mkdir=True, mkdir_mode=0750, dir_uid=getents.masterd_uid, dir_gid=getents.daemons_gid) def BlockdevClose(instance_name, disks): """Closes the given block devices. This means they will be switched to secondary mode (in case of DRBD). @param instance_name: if the argument is not empty, the symlinks of this instance will be removed @type disks: list of L{objects.Disk} @param disks: the list of disks to be closed @rtype: tuple (success, message) @return: a tuple of success and message, where success indicates the succes of the operation, and message which will contain the error details in case we failed """ bdevs = [] for cf in disks: rd = _RecursiveFindBD(cf) if rd is None: _Fail("Can't find device %s", cf) bdevs.append(rd) msg = [] for rd in bdevs: try: rd.Close() except errors.BlockDeviceError, err: msg.append(str(err)) if msg: _Fail("Can't make devices secondary: %s", ",".join(msg)) else: if instance_name: _RemoveBlockDevLinks(instance_name, disks) def ValidateHVParams(hvname, hvparams): """Validates the given hypervisor parameters. @type hvname: string @param hvname: the hypervisor name @type hvparams: dict @param hvparams: the hypervisor parameters to be validated @rtype: None """ try: hv_type = hypervisor.GetHypervisor(hvname) hv_type.ValidateParameters(hvparams) except errors.HypervisorError, err: _Fail(str(err), log=False) def _CheckOSPList(os_obj, parameters): """Check whether a list of parameters is supported by the OS. @type os_obj: L{objects.OS} @param os_obj: OS object to check @type parameters: list @param parameters: the list of parameters to check """ supported = [v[0] for v in os_obj.supported_parameters] delta = frozenset(parameters).difference(supported) if delta: _Fail("The following parameters are not supported" " by the OS %s: %s" % (os_obj.name, utils.CommaJoin(delta))) def ValidateOS(required, osname, checks, osparams): """Validate the given OS' parameters. @type required: boolean @param required: whether absence of the OS should translate into failure or not @type osname: string @param osname: the OS to be validated @type checks: list @param checks: list of the checks to run (currently only 'parameters') @type osparams: dict @param osparams: dictionary with OS parameters @rtype: boolean @return: True if the validation passed, or False if the OS was not found and L{required} was false """ if not constants.OS_VALIDATE_CALLS.issuperset(checks): _Fail("Unknown checks required for OS %s: %s", osname, set(checks).difference(constants.OS_VALIDATE_CALLS)) name_only = objects.OS.GetName(osname) status, tbv = _TryOSFromDisk(name_only, None) if not status: if required: _Fail(tbv) else: return False if max(tbv.api_versions) < constants.OS_API_V20: return True if constants.OS_VALIDATE_PARAMETERS in checks: _CheckOSPList(tbv, osparams.keys()) validate_env = OSCoreEnv(osname, tbv, osparams) result = utils.RunCmd([tbv.verify_script] + checks, env=validate_env, cwd=tbv.path, reset_env=True) if result.failed: logging.error("os validate command '%s' returned error: %s output: %s", result.cmd, result.fail_reason, result.output) _Fail("OS validation script failed (%s), output: %s", result.fail_reason, result.output, log=False) return True def DemoteFromMC(): """Demotes the current node from master candidate role. """ # try to ensure we're not the master by mistake master, myself = ssconf.GetMasterAndMyself() if master == myself: _Fail("ssconf status shows I'm the master node, will not demote") result = utils.RunCmd([pathutils.DAEMON_UTIL, "check", constants.MASTERD]) if not result.failed: _Fail("The master daemon is running, will not demote") try: if os.path.isfile(pathutils.CLUSTER_CONF_FILE): utils.CreateBackup(pathutils.CLUSTER_CONF_FILE) except EnvironmentError, err: if err.errno != errno.ENOENT: _Fail("Error while backing up cluster file: %s", err, exc=True) utils.RemoveFile(pathutils.CLUSTER_CONF_FILE) def _GetX509Filenames(cryptodir, name): """Returns the full paths for the private key and certificate. """ return (utils.PathJoin(cryptodir, name), utils.PathJoin(cryptodir, name, _X509_KEY_FILE), utils.PathJoin(cryptodir, name, _X509_CERT_FILE)) def CreateX509Certificate(validity, cryptodir=pathutils.CRYPTO_KEYS_DIR): """Creates a new X509 certificate for SSL/TLS. @type validity: int @param validity: Validity in seconds @rtype: tuple; (string, string) @return: Certificate name and public part """ (key_pem, cert_pem) = \ utils.GenerateSelfSignedX509Cert(netutils.Hostname.GetSysName(), min(validity, _MAX_SSL_CERT_VALIDITY)) cert_dir = tempfile.mkdtemp(dir=cryptodir, prefix="x509-%s-" % utils.TimestampForFilename()) try: name = os.path.basename(cert_dir) assert len(name) > 5 (_, key_file, cert_file) = _GetX509Filenames(cryptodir, name) utils.WriteFile(key_file, mode=0400, data=key_pem) utils.WriteFile(cert_file, mode=0400, data=cert_pem) # Never return private key as it shouldn't leave the node return (name, cert_pem) except Exception: shutil.rmtree(cert_dir, ignore_errors=True) raise def RemoveX509Certificate(name, cryptodir=pathutils.CRYPTO_KEYS_DIR): """Removes a X509 certificate. @type name: string @param name: Certificate name """ (cert_dir, key_file, cert_file) = _GetX509Filenames(cryptodir, name) utils.RemoveFile(key_file) utils.RemoveFile(cert_file) try: os.rmdir(cert_dir) except EnvironmentError, err: _Fail("Cannot remove certificate directory '%s': %s", cert_dir, err) def _GetImportExportIoCommand(instance, mode, ieio, ieargs): """Returns the command for the requested input/output. @type instance: L{objects.Instance} @param instance: The instance object @param mode: Import/export mode @param ieio: Input/output type @param ieargs: Input/output arguments """ assert mode in (constants.IEM_IMPORT, constants.IEM_EXPORT) env = None prefix = None suffix = None exp_size = None if ieio == constants.IEIO_FILE: (filename, ) = ieargs if not utils.IsNormAbsPath(filename): _Fail("Path '%s' is not normalized or absolute", filename) real_filename = os.path.realpath(filename) directory = os.path.dirname(real_filename) if not utils.IsBelowDir(pathutils.EXPORT_DIR, real_filename): _Fail("File '%s' is not under exports directory '%s': %s", filename, pathutils.EXPORT_DIR, real_filename) # Create directory utils.Makedirs(directory, mode=0750) quoted_filename = utils.ShellQuote(filename) if mode == constants.IEM_IMPORT: suffix = "> %s" % quoted_filename elif mode == constants.IEM_EXPORT: suffix = "< %s" % quoted_filename # Retrieve file size try: st = os.stat(filename) except EnvironmentError, err: logging.error("Can't stat(2) %s: %s", filename, err) else: exp_size = utils.BytesToMebibyte(st.st_size) elif ieio == constants.IEIO_RAW_DISK: (disk, ) = ieargs real_disk = _OpenRealBD(disk) if mode == constants.IEM_IMPORT: # we set here a smaller block size as, due to transport buffering, more # than 64-128k will mostly ignored; we use nocreat to fail if the device # is not already there or we pass a wrong path; we use notrunc to no # attempt truncate on an LV device; we use oflag=dsync to not buffer too # much memory; this means that at best, we flush every 64k, which will # not be very fast suffix = utils.BuildShellCmd(("| dd of=%s conv=nocreat,notrunc" " bs=%s oflag=dsync"), real_disk.dev_path, str(64 * 1024)) elif mode == constants.IEM_EXPORT: # the block size on the read dd is 1MiB to match our units prefix = utils.BuildShellCmd("dd if=%s bs=%s count=%s |", real_disk.dev_path, str(1024 * 1024), # 1 MB str(disk.size)) exp_size = disk.size elif ieio == constants.IEIO_SCRIPT: (disk, disk_index, ) = ieargs assert isinstance(disk_index, (int, long)) real_disk = _OpenRealBD(disk) inst_os = OSFromDisk(instance.os) env = OSEnvironment(instance, inst_os) if mode == constants.IEM_IMPORT: env["IMPORT_DEVICE"] = env["DISK_%d_PATH" % disk_index] env["IMPORT_INDEX"] = str(disk_index) script = inst_os.import_script elif mode == constants.IEM_EXPORT: env["EXPORT_DEVICE"] = real_disk.dev_path env["EXPORT_INDEX"] = str(disk_index) script = inst_os.export_script # TODO: Pass special environment only to script script_cmd = utils.BuildShellCmd("( cd %s && %s; )", inst_os.path, script) if mode == constants.IEM_IMPORT: suffix = "| %s" % script_cmd elif mode == constants.IEM_EXPORT: prefix = "%s |" % script_cmd # Let script predict size exp_size = constants.IE_CUSTOM_SIZE else: _Fail("Invalid %s I/O mode %r", mode, ieio) return (env, prefix, suffix, exp_size) def _CreateImportExportStatusDir(prefix): """Creates status directory for import/export. """ return tempfile.mkdtemp(dir=pathutils.IMPORT_EXPORT_DIR, prefix=("%s-%s-" % (prefix, utils.TimestampForFilename()))) def StartImportExportDaemon(mode, opts, host, port, instance, component, ieio, ieioargs): """Starts an import or export daemon. @param mode: Import/output mode @type opts: L{objects.ImportExportOptions} @param opts: Daemon options @type host: string @param host: Remote host for export (None for import) @type port: int @param port: Remote port for export (None for import) @type instance: L{objects.Instance} @param instance: Instance object @type component: string @param component: which part of the instance is transferred now, e.g. 'disk/0' @param ieio: Input/output type @param ieioargs: Input/output arguments """ if mode == constants.IEM_IMPORT: prefix = "import" if not (host is None and port is None): _Fail("Can not specify host or port on import") elif mode == constants.IEM_EXPORT: prefix = "export" if host is None or port is None: _Fail("Host and port must be specified for an export") else: _Fail("Invalid mode %r", mode) if (opts.key_name is None) ^ (opts.ca_pem is None): _Fail("Cluster certificate can only be used for both key and CA") (cmd_env, cmd_prefix, cmd_suffix, exp_size) = \ _GetImportExportIoCommand(instance, mode, ieio, ieioargs) if opts.key_name is None: # Use server.pem key_path = pathutils.NODED_CERT_FILE cert_path = pathutils.NODED_CERT_FILE assert opts.ca_pem is None else: (_, key_path, cert_path) = _GetX509Filenames(pathutils.CRYPTO_KEYS_DIR, opts.key_name) assert opts.ca_pem is not None for i in [key_path, cert_path]: if not os.path.exists(i): _Fail("File '%s' does not exist" % i) status_dir = _CreateImportExportStatusDir("%s-%s" % (prefix, component)) try: status_file = utils.PathJoin(status_dir, _IES_STATUS_FILE) pid_file = utils.PathJoin(status_dir, _IES_PID_FILE) ca_file = utils.PathJoin(status_dir, _IES_CA_FILE) if opts.ca_pem is None: # Use server.pem ca = utils.ReadFile(pathutils.NODED_CERT_FILE) else: ca = opts.ca_pem # Write CA file utils.WriteFile(ca_file, data=ca, mode=0400) cmd = [ pathutils.IMPORT_EXPORT_DAEMON, status_file, mode, "--key=%s" % key_path, "--cert=%s" % cert_path, "--ca=%s" % ca_file, ] if host: cmd.append("--host=%s" % host) if port: cmd.append("--port=%s" % port) if opts.ipv6: cmd.append("--ipv6") else: cmd.append("--ipv4") if opts.compress: cmd.append("--compress=%s" % opts.compress) if opts.magic: cmd.append("--magic=%s" % opts.magic) if exp_size is not None: cmd.append("--expected-size=%s" % exp_size) if cmd_prefix: cmd.append("--cmd-prefix=%s" % cmd_prefix) if cmd_suffix: cmd.append("--cmd-suffix=%s" % cmd_suffix) if mode == constants.IEM_EXPORT: # Retry connection a few times when connecting to remote peer cmd.append("--connect-retries=%s" % constants.RIE_CONNECT_RETRIES) cmd.append("--connect-timeout=%s" % constants.RIE_CONNECT_ATTEMPT_TIMEOUT) elif opts.connect_timeout is not None: assert mode == constants.IEM_IMPORT # Overall timeout for establishing connection while listening cmd.append("--connect-timeout=%s" % opts.connect_timeout) logfile = _InstanceLogName(prefix, instance.os, instance.name, component) # TODO: Once _InstanceLogName uses tempfile.mkstemp, StartDaemon has # support for receiving a file descriptor for output utils.StartDaemon(cmd, env=cmd_env, pidfile=pid_file, output=logfile) # The import/export name is simply the status directory name return os.path.basename(status_dir) except Exception: shutil.rmtree(status_dir, ignore_errors=True) raise def GetImportExportStatus(names): """Returns import/export daemon status. @type names: sequence @param names: List of names @rtype: List of dicts @return: Returns a list of the state of each named import/export or None if a status couldn't be read """ result = [] for name in names: status_file = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name, _IES_STATUS_FILE) try: data = utils.ReadFile(status_file) except EnvironmentError, err: if err.errno != errno.ENOENT: raise data = None if not data: result.append(None) continue result.append(serializer.LoadJson(data)) return result def AbortImportExport(name): """Sends SIGTERM to a running import/export daemon. """ logging.info("Abort import/export %s", name) status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) if pid: logging.info("Import/export %s is running with PID %s, sending SIGTERM", name, pid) utils.IgnoreProcessNotFound(os.kill, pid, signal.SIGTERM) def CleanupImportExport(name): """Cleanup after an import or export. If the import/export daemon is still running it's killed. Afterwards the whole status directory is removed. """ logging.info("Finalizing import/export %s", name) status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) if pid: logging.info("Import/export %s is still running with PID %s", name, pid) utils.KillProcess(pid, waitpid=False) shutil.rmtree(status_dir, ignore_errors=True) def _SetPhysicalId(target_node_uuid, nodes_ip, disks): """Sets the correct physical ID on all passed disks. """ for cf in disks: cf.SetPhysicalID(target_node_uuid, nodes_ip) def _FindDisks(target_node_uuid, nodes_ip, disks): """Sets the physical ID on disks and returns the block devices. """ _SetPhysicalId(target_node_uuid, nodes_ip, disks) bdevs = [] for cf in disks: rd = _RecursiveFindBD(cf) if rd is None: _Fail("Can't find device %s", cf) bdevs.append(rd) return bdevs def DrbdDisconnectNet(target_node_uuid, nodes_ip, disks): """Disconnects the network on a list of drbd devices. """ bdevs = _FindDisks(target_node_uuid, nodes_ip, disks) # disconnect disks for rd in bdevs: try: rd.DisconnectNet() except errors.BlockDeviceError, err: _Fail("Can't change network configuration to standalone mode: %s", err, exc=True) def DrbdAttachNet(target_node_uuid, nodes_ip, disks, instance_name, multimaster): """Attaches the network on a list of drbd devices. """ bdevs = _FindDisks(target_node_uuid, nodes_ip, disks) if multimaster: for idx, rd in enumerate(bdevs): try: _SymlinkBlockDev(instance_name, rd.dev_path, idx) except EnvironmentError, err: _Fail("Can't create symlink: %s", err) # reconnect disks, switch to new master configuration and if # needed primary mode for rd in bdevs: try: rd.AttachNet(multimaster) except errors.BlockDeviceError, err: _Fail("Can't change network configuration: %s", err) # wait until the disks are connected; we need to retry the re-attach # if the device becomes standalone, as this might happen if the one # node disconnects and reconnects in a different mode before the # other node reconnects; in this case, one or both of the nodes will # decide it has wrong configuration and switch to standalone def _Attach(): all_connected = True for rd in bdevs: stats = rd.GetProcStatus() if multimaster: # In the multimaster case we have to wait explicitly until # the resource is Connected and UpToDate/UpToDate, because # we promote *both nodes* to primary directly afterwards. # Being in resync is not enough, since there is a race during which we # may promote a node with an Outdated disk to primary, effectively # tearing down the connection. all_connected = (all_connected and stats.is_connected and stats.is_disk_uptodate and stats.peer_disk_uptodate) else: all_connected = (all_connected and (stats.is_connected or stats.is_in_resync)) if stats.is_standalone: # peer had different config info and this node became # standalone, even though this should not happen with the # new staged way of changing disk configs try: rd.AttachNet(multimaster) except errors.BlockDeviceError, err: _Fail("Can't change network configuration: %s", err) if not all_connected: raise utils.RetryAgain() try: # Start with a delay of 100 miliseconds and go up to 5 seconds utils.Retry(_Attach, (0.1, 1.5, 5.0), 2 * 60) except utils.RetryTimeout: _Fail("Timeout in disk reconnecting") if multimaster: # change to primary mode for rd in bdevs: try: rd.Open() except errors.BlockDeviceError, err: _Fail("Can't change to primary mode: %s", err) def DrbdWaitSync(target_node_uuid, nodes_ip, disks): """Wait until DRBDs have synchronized. """ def _helper(rd): stats = rd.GetProcStatus() if not (stats.is_connected or stats.is_in_resync): raise utils.RetryAgain() return stats bdevs = _FindDisks(target_node_uuid, nodes_ip, disks) min_resync = 100 alldone = True for rd in bdevs: try: # poll each second for 15 seconds stats = utils.Retry(_helper, 1, 15, args=[rd]) except utils.RetryTimeout: stats = rd.GetProcStatus() # last check if not (stats.is_connected or stats.is_in_resync): _Fail("DRBD device %s is not in sync: stats=%s", rd, stats) alldone = alldone and (not stats.is_in_resync) if stats.sync_percent is not None: min_resync = min(min_resync, stats.sync_percent) return (alldone, min_resync) def DrbdNeedsActivation(target_node_uuid, nodes_ip, disks): """Checks which of the passed disks needs activation and returns their UUIDs. """ _SetPhysicalId(target_node_uuid, nodes_ip, disks) faulty_disks = [] for disk in disks: rd = _RecursiveFindBD(disk) if rd is None: faulty_disks.append(disk) continue stats = rd.GetProcStatus() if stats.is_standalone or stats.is_diskless: faulty_disks.append(disk) return [disk.uuid for disk in faulty_disks] def GetDrbdUsermodeHelper(): """Returns DRBD usermode helper currently configured. """ try: return drbd.DRBD8.GetUsermodeHelper() except errors.BlockDeviceError, err: _Fail(str(err)) def PowercycleNode(hypervisor_type, hvparams=None): """Hard-powercycle the node. Because we need to return first, and schedule the powercycle in the background, we won't be able to report failures nicely. """ hyper = hypervisor.GetHypervisor(hypervisor_type) try: pid = os.fork() except OSError: # if we can't fork, we'll pretend that we're in the child process pid = 0 if pid > 0: return "Reboot scheduled in 5 seconds" # ensure the child is running on ram try: utils.Mlockall() except Exception: # pylint: disable=W0703 pass time.sleep(5) hyper.PowercycleNode(hvparams=hvparams) def _VerifyRestrictedCmdName(cmd): """Verifies a restricted command name. @type cmd: string @param cmd: Command name @rtype: tuple; (boolean, string or None) @return: The tuple's first element is the status; if C{False}, the second element is an error message string, otherwise it's C{None} """ if not cmd.strip(): return (False, "Missing command name") if os.path.basename(cmd) != cmd: return (False, "Invalid command name") if not constants.EXT_PLUGIN_MASK.match(cmd): return (False, "Command name contains forbidden characters") return (True, None) def _CommonRestrictedCmdCheck(path, owner): """Common checks for restricted command file system directories and files. @type path: string @param path: Path to check @param owner: C{None} or tuple containing UID and GID @rtype: tuple; (boolean, string or C{os.stat} result) @return: The tuple's first element is the status; if C{False}, the second element is an error message string, otherwise it's the result of C{os.stat} """ if owner is None: # Default to root as owner owner = (0, 0) try: st = os.stat(path) except EnvironmentError, err: return (False, "Can't stat(2) '%s': %s" % (path, err)) if stat.S_IMODE(st.st_mode) & (~_RCMD_MAX_MODE): return (False, "Permissions on '%s' are too permissive" % path) if (st.st_uid, st.st_gid) != owner: (owner_uid, owner_gid) = owner return (False, "'%s' is not owned by %s:%s" % (path, owner_uid, owner_gid)) return (True, st) def _VerifyRestrictedCmdDirectory(path, _owner=None): """Verifies restricted command directory. @type path: string @param path: Path to check @rtype: tuple; (boolean, string or None) @return: The tuple's first element is the status; if C{False}, the second element is an error message string, otherwise it's C{None} """ (status, value) = _CommonRestrictedCmdCheck(path, _owner) if not status: return (False, value) if not stat.S_ISDIR(value.st_mode): return (False, "Path '%s' is not a directory" % path) return (True, None) def _VerifyRestrictedCmd(path, cmd, _owner=None): """Verifies a whole restricted command and returns its executable filename. @type path: string @param path: Directory containing restricted commands @type cmd: string @param cmd: Command name @rtype: tuple; (boolean, string) @return: The tuple's first element is the status; if C{False}, the second element is an error message string, otherwise the second element is the absolute path to the executable """ executable = utils.PathJoin(path, cmd) (status, msg) = _CommonRestrictedCmdCheck(executable, _owner) if not status: return (False, msg) if not utils.IsExecutable(executable): return (False, "access(2) thinks '%s' can't be executed" % executable) return (True, executable) def _PrepareRestrictedCmd(path, cmd, _verify_dir=_VerifyRestrictedCmdDirectory, _verify_name=_VerifyRestrictedCmdName, _verify_cmd=_VerifyRestrictedCmd): """Performs a number of tests on a restricted command. @type path: string @param path: Directory containing restricted commands @type cmd: string @param cmd: Command name @return: Same as L{_VerifyRestrictedCmd} """ # Verify the directory first (status, msg) = _verify_dir(path) if status: # Check command if everything was alright (status, msg) = _verify_name(cmd) if not status: return (False, msg) # Check actual executable return _verify_cmd(path, cmd) def RunRestrictedCmd(cmd, _lock_timeout=_RCMD_LOCK_TIMEOUT, _lock_file=pathutils.RESTRICTED_COMMANDS_LOCK_FILE, _path=pathutils.RESTRICTED_COMMANDS_DIR, _sleep_fn=time.sleep, _prepare_fn=_PrepareRestrictedCmd, _runcmd_fn=utils.RunCmd, _enabled=constants.ENABLE_RESTRICTED_COMMANDS): """Executes a restricted command after performing strict tests. @type cmd: string @param cmd: Command name @rtype: string @return: Command output @raise RPCFail: In case of an error """ logging.info("Preparing to run restricted command '%s'", cmd) if not _enabled: _Fail("Restricted commands disabled at configure time") lock = None try: cmdresult = None try: lock = utils.FileLock.Open(_lock_file) lock.Exclusive(blocking=True, timeout=_lock_timeout) (status, value) = _prepare_fn(_path, cmd) if status: cmdresult = _runcmd_fn([value], env={}, reset_env=True, postfork_fn=lambda _: lock.Unlock()) else: logging.error(value) except Exception: # pylint: disable=W0703 # Keep original error in log logging.exception("Caught exception") if cmdresult is None: logging.info("Sleeping for %0.1f seconds before returning", _RCMD_INVALID_DELAY) _sleep_fn(_RCMD_INVALID_DELAY) # Do not include original error message in returned error _Fail("Executing command '%s' failed" % cmd) elif cmdresult.failed or cmdresult.fail_reason: _Fail("Restricted command '%s' failed: %s; output: %s", cmd, cmdresult.fail_reason, cmdresult.output) else: return cmdresult.output finally: if lock is not None: # Release lock at last lock.Close() lock = None def SetWatcherPause(until, _filename=pathutils.WATCHER_PAUSEFILE): """Creates or removes the watcher pause file. @type until: None or number @param until: Unix timestamp saying until when the watcher shouldn't run """ if until is None: logging.info("Received request to no longer pause watcher") utils.RemoveFile(_filename) else: logging.info("Received request to pause watcher until %s", until) if not ht.TNumber(until): _Fail("Duration must be numeric") utils.WriteFile(_filename, data="%d\n" % (until, ), mode=0644) class HooksRunner(object): """Hook runner. This class is instantiated on the node side (ganeti-noded) and not on the master side. """ def __init__(self, hooks_base_dir=None): """Constructor for hooks runner. @type hooks_base_dir: str or None @param hooks_base_dir: if not None, this overrides the L{pathutils.HOOKS_BASE_DIR} (useful for unittests) """ if hooks_base_dir is None: hooks_base_dir = pathutils.HOOKS_BASE_DIR # yeah, _BASE_DIR is not valid for attributes, we use it like a # constant self._BASE_DIR = hooks_base_dir # pylint: disable=C0103 def RunLocalHooks(self, node_list, hpath, phase, env): """Check that the hooks will be run only locally and then run them. """ assert len(node_list) == 1 node = node_list[0] _, myself = ssconf.GetMasterAndMyself() assert node == myself results = self.RunHooks(hpath, phase, env) # Return values in the form expected by HooksMaster return {node: (None, False, results)} def RunHooks(self, hpath, phase, env): """Run the scripts in the hooks directory. @type hpath: str @param hpath: the path to the hooks directory which holds the scripts @type phase: str @param phase: either L{constants.HOOKS_PHASE_PRE} or L{constants.HOOKS_PHASE_POST} @type env: dict @param env: dictionary with the environment for the hook @rtype: list @return: list of 3-element tuples: - script path - script result, either L{constants.HKR_SUCCESS} or L{constants.HKR_FAIL} - output of the script @raise errors.ProgrammerError: for invalid input parameters """ if phase == constants.HOOKS_PHASE_PRE: suffix = "pre" elif phase == constants.HOOKS_PHASE_POST: suffix = "post" else: _Fail("Unknown hooks phase '%s'", phase) subdir = "%s-%s.d" % (hpath, suffix) dir_name = utils.PathJoin(self._BASE_DIR, subdir) results = [] if not os.path.isdir(dir_name): # for non-existing/non-dirs, we simply exit instead of logging a # warning at every operation return results runparts_results = utils.RunParts(dir_name, env=env, reset_env=True) for (relname, relstatus, runresult) in runparts_results: if relstatus == constants.RUNPARTS_SKIP: rrval = constants.HKR_SKIP output = "" elif relstatus == constants.RUNPARTS_ERR: rrval = constants.HKR_FAIL output = "Hook script execution error: %s" % runresult elif relstatus == constants.RUNPARTS_RUN: if runresult.failed: rrval = constants.HKR_FAIL else: rrval = constants.HKR_SUCCESS output = utils.SafeEncode(runresult.output.strip()) results.append(("%s/%s" % (subdir, relname), rrval, output)) return results class IAllocatorRunner(object): """IAllocator runner. This class is instantiated on the node side (ganeti-noded) and not on the master side. """ @staticmethod def Run(name, idata): """Run an iallocator script. @type name: str @param name: the iallocator script name @type idata: str @param idata: the allocator input data @rtype: tuple @return: two element tuple of: - status - either error message or stdout of allocator (for success) """ alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH, os.path.isfile) if alloc_script is None: _Fail("iallocator module '%s' not found in the search path", name) fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.") try: os.write(fd, idata) os.close(fd) result = utils.RunCmd([alloc_script, fin_name]) if result.failed: _Fail("iallocator module '%s' failed: %s, output '%s'", name, result.fail_reason, result.output) finally: os.unlink(fin_name) return result.stdout class DevCacheManager(object): """Simple class for managing a cache of block device information. """ _DEV_PREFIX = "/dev/" _ROOT_DIR = pathutils.BDEV_CACHE_DIR @classmethod def _ConvertPath(cls, dev_path): """Converts a /dev/name path to the cache file name. This replaces slashes with underscores and strips the /dev prefix. It then returns the full path to the cache file. @type dev_path: str @param dev_path: the C{/dev/} path name @rtype: str @return: the converted path name """ if dev_path.startswith(cls._DEV_PREFIX): dev_path = dev_path[len(cls._DEV_PREFIX):] dev_path = dev_path.replace("/", "_") fpath = utils.PathJoin(cls._ROOT_DIR, "bdev_%s" % dev_path) return fpath @classmethod def UpdateCache(cls, dev_path, owner, on_primary, iv_name): """Updates the cache information for a given device. @type dev_path: str @param dev_path: the pathname of the device @type owner: str @param owner: the owner (instance name) of the device @type on_primary: bool @param on_primary: whether this is the primary node nor not @type iv_name: str @param iv_name: the instance-visible name of the device, as in objects.Disk.iv_name @rtype: None """ if dev_path is None: logging.error("DevCacheManager.UpdateCache got a None dev_path") return fpath = cls._ConvertPath(dev_path) if on_primary: state = "primary" else: state = "secondary" if iv_name is None: iv_name = "not_visible" fdata = "%s %s %s\n" % (str(owner), state, iv_name) try: utils.WriteFile(fpath, data=fdata) except EnvironmentError, err: logging.exception("Can't update bdev cache for %s: %s", dev_path, err) @classmethod def RemoveCache(cls, dev_path): """Remove data for a dev_path. This is just a wrapper over L{utils.io.RemoveFile} with a converted path name and logging. @type dev_path: str @param dev_path: the pathname of the device @rtype: None """ if dev_path is None: logging.error("DevCacheManager.RemoveCache got a None dev_path") return fpath = cls._ConvertPath(dev_path) try: utils.RemoveFile(fpath) except EnvironmentError, err: logging.exception("Can't update bdev cache for %s: %s", dev_path, err) ganeti-2.9.3/lib/watcher/0000755000000000000000000000000012271445544015205 5ustar00rootroot00000000000000ganeti-2.9.3/lib/watcher/nodemaint.py0000644000000000000000000001133712267470014017535 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module doing node maintenance for Ganeti watcher. """ import logging from ganeti import constants from ganeti import errors from ganeti import hypervisor from ganeti import netutils from ganeti import ssconf from ganeti import utils from ganeti import confd from ganeti.storage import drbd import ganeti.confd.client # pylint: disable=W0611 class NodeMaintenance(object): """Talks to confd daemons and possible shutdown instances/drbd devices. """ def __init__(self): self.store_cb = confd.client.StoreResultCallback() self.filter_cb = confd.client.ConfdFilterCallback(self.store_cb) self.confd_client = confd.client.GetConfdClient(self.filter_cb) @staticmethod def ShouldRun(): """Checks whether node maintenance should run. """ try: return ssconf.SimpleStore().GetMaintainNodeHealth() except errors.ConfigurationError, err: logging.error("Configuration error, not activating node maintenance: %s", err) return False @staticmethod def GetRunningInstances(): """Compute list of hypervisor/running instances. """ hyp_list = ssconf.SimpleStore().GetHypervisorList() hvparams = ssconf.SimpleStore().GetHvparams() results = [] for hv_name in hyp_list: try: hv = hypervisor.GetHypervisor(hv_name) ilist = hv.ListInstances(hvparams=hvparams) results.extend([(iname, hv_name) for iname in ilist]) except: # pylint: disable=W0702 logging.error("Error while listing instances for hypervisor %s", hv_name, exc_info=True) return results @staticmethod def GetUsedDRBDs(): """Get list of used DRBD minors. """ return drbd.DRBD8.GetUsedDevs() @classmethod def DoMaintenance(cls, role): """Maintain the instance list. """ if role == constants.CONFD_NODE_ROLE_OFFLINE: inst_running = cls.GetRunningInstances() cls.ShutdownInstances(inst_running) drbd_running = cls.GetUsedDRBDs() cls.ShutdownDRBD(drbd_running) else: logging.debug("Not doing anything for role %s", role) @staticmethod def ShutdownInstances(inst_running): """Shutdown running instances. """ names_running = set([i[0] for i in inst_running]) if names_running: logging.info("Following instances should not be running," " shutting them down: %s", utils.CommaJoin(names_running)) # this dictionary will collapse duplicate instance names (only # xen pvm/vhm) into a single key, which is fine i2h = dict(inst_running) for name in names_running: hv_name = i2h[name] hv = hypervisor.GetHypervisor(hv_name) hv.StopInstance(None, force=True, name=name) @staticmethod def ShutdownDRBD(drbd_running): """Shutdown active DRBD devices. """ if drbd_running: logging.info("Following DRBD minors should not be active," " shutting them down: %s", utils.CommaJoin(drbd_running)) for minor in drbd_running: drbd.DRBD8.ShutdownAll(minor) def Exec(self): """Check node status versus cluster desired state. """ if not constants.ENABLE_CONFD: logging.warning("Confd use not enabled, cannot do maintenance") return my_name = netutils.Hostname.GetSysName() req = \ confd.client.ConfdClientRequest(type=constants.CONFD_REQ_NODE_ROLE_BYNAME, query=my_name) self.confd_client.SendRequest(req, async=False, coverage=-1) timed_out, _, _ = self.confd_client.WaitForReply(req.rsalt) if not timed_out: # should have a valid response status, result = self.store_cb.GetResponse(req.rsalt) assert status, "Missing result but received replies" if not self.filter_cb.consistent[req.rsalt]: logging.warning("Inconsistent replies, not doing anything") return self.DoMaintenance(result.server_reply.answer) else: logging.warning("Confd query timed out, cannot do maintenance actions") ganeti-2.9.3/lib/watcher/__init__.py0000644000000000000000000006055712271422343017323 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Tool to restart erroneously downed virtual machines. This program and set of classes implement a watchdog to restart virtual machines in a Ganeti cluster that have crashed or been killed by a node reboot. Run from cron or similar. """ import os import os.path import sys import time import logging import operator import errno from optparse import OptionParser from ganeti import utils from ganeti import constants from ganeti import compat from ganeti import errors from ganeti import opcodes from ganeti import cli from ganeti import luxi from ganeti import rapi from ganeti import netutils from ganeti import qlang from ganeti import objects from ganeti import ssconf from ganeti import ht from ganeti import pathutils import ganeti.rapi.client # pylint: disable=W0611 from ganeti.rapi.client import UsesRapiClient from ganeti.watcher import nodemaint from ganeti.watcher import state MAXTRIES = 5 BAD_STATES = compat.UniqueFrozenset([ constants.INSTST_ERRORDOWN, ]) HELPLESS_STATES = compat.UniqueFrozenset([ constants.INSTST_NODEDOWN, constants.INSTST_NODEOFFLINE, ]) NOTICE = "NOTICE" ERROR = "ERROR" #: Number of seconds to wait between starting child processes for node groups CHILD_PROCESS_DELAY = 1.0 #: How many seconds to wait for instance status file lock INSTANCE_STATUS_LOCK_TIMEOUT = 10.0 class NotMasterError(errors.GenericError): """Exception raised when this host is not the master.""" def ShouldPause(): """Check whether we should pause. """ return bool(utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)) def StartNodeDaemons(): """Start all the daemons that should be running on all nodes. """ # on master or not, try to start the node daemon utils.EnsureDaemon(constants.NODED) # start confd as well. On non candidates it will be in disabled mode. if constants.ENABLE_CONFD: utils.EnsureDaemon(constants.CONFD) # start mond as well: all nodes need monitoring if constants.ENABLE_MOND: utils.EnsureDaemon(constants.MOND) def RunWatcherHooks(): """Run the watcher hooks. """ hooks_dir = utils.PathJoin(pathutils.HOOKS_BASE_DIR, constants.HOOKS_NAME_WATCHER) if not os.path.isdir(hooks_dir): return try: results = utils.RunParts(hooks_dir) except Exception, err: # pylint: disable=W0703 logging.exception("RunParts %s failed: %s", hooks_dir, err) return for (relname, status, runresult) in results: if status == constants.RUNPARTS_SKIP: logging.debug("Watcher hook %s: skipped", relname) elif status == constants.RUNPARTS_ERR: logging.warning("Watcher hook %s: error (%s)", relname, runresult) elif status == constants.RUNPARTS_RUN: if runresult.failed: logging.warning("Watcher hook %s: failed (exit: %d) (output: %s)", relname, runresult.exit_code, runresult.output) else: logging.debug("Watcher hook %s: success (output: %s)", relname, runresult.output) else: raise errors.ProgrammerError("Unknown status %s returned by RunParts", status) class Instance(object): """Abstraction for a Virtual Machine instance. """ def __init__(self, name, status, disks_active, snodes): self.name = name self.status = status self.disks_active = disks_active self.snodes = snodes def Restart(self, cl): """Encapsulates the start of an instance. """ op = opcodes.OpInstanceStartup(instance_name=self.name, force=False) cli.SubmitOpCode(op, cl=cl) def ActivateDisks(self, cl): """Encapsulates the activation of all disks of an instance. """ op = opcodes.OpInstanceActivateDisks(instance_name=self.name) cli.SubmitOpCode(op, cl=cl) class Node: """Data container representing cluster node. """ def __init__(self, name, bootid, offline, secondaries): """Initializes this class. """ self.name = name self.bootid = bootid self.offline = offline self.secondaries = secondaries def _CheckInstances(cl, notepad, instances): """Make a pass over the list of instances, restarting downed ones. """ notepad.MaintainInstanceList(instances.keys()) started = set() for inst in instances.values(): if inst.status in BAD_STATES: n = notepad.NumberOfRestartAttempts(inst.name) if n > MAXTRIES: logging.warning("Not restarting instance '%s', retries exhausted", inst.name) continue if n == MAXTRIES: notepad.RecordRestartAttempt(inst.name) logging.error("Could not restart instance '%s' after %s attempts," " giving up", inst.name, MAXTRIES) continue try: logging.info("Restarting instance '%s' (attempt #%s)", inst.name, n + 1) inst.Restart(cl) except Exception: # pylint: disable=W0703 logging.exception("Error while restarting instance '%s'", inst.name) else: started.add(inst.name) notepad.RecordRestartAttempt(inst.name) else: if notepad.NumberOfRestartAttempts(inst.name): notepad.RemoveInstance(inst.name) if inst.status not in HELPLESS_STATES: logging.info("Restart of instance '%s' succeeded", inst.name) return started def _CheckDisks(cl, notepad, nodes, instances, started): """Check all nodes for restarted ones. """ check_nodes = [] for node in nodes.values(): old = notepad.GetNodeBootID(node.name) if not node.bootid: # Bad node, not returning a boot id if not node.offline: logging.debug("Node '%s' missing boot ID, skipping secondary checks", node.name) continue if old != node.bootid: # Node's boot ID has changed, probably through a reboot check_nodes.append(node) if check_nodes: # Activate disks for all instances with any of the checked nodes as a # secondary node. for node in check_nodes: for instance_name in node.secondaries: try: inst = instances[instance_name] except KeyError: logging.info("Can't find instance '%s', maybe it was ignored", instance_name) continue if not inst.disks_active: logging.info("Skipping disk activation for instance with not" " activated disks '%s'", inst.name) continue if inst.name in started: # we already tried to start the instance, which should have # activated its drives (if they can be at all) logging.debug("Skipping disk activation for instance '%s' as" " it was already started", inst.name) continue try: logging.info("Activating disks for instance '%s'", inst.name) inst.ActivateDisks(cl) except Exception: # pylint: disable=W0703 logging.exception("Error while activating disks for instance '%s'", inst.name) # Keep changed boot IDs for node in check_nodes: notepad.SetNodeBootID(node.name, node.bootid) def _CheckForOfflineNodes(nodes, instance): """Checks if given instances has any secondary in offline status. @param instance: The instance object @return: True if any of the secondary is offline, False otherwise """ return compat.any(nodes[node_name].offline for node_name in instance.snodes) def _VerifyDisks(cl, uuid, nodes, instances): """Run a per-group "gnt-cluster verify-disks". """ job_id = cl.SubmitJob([opcodes.OpGroupVerifyDisks(group_name=uuid)]) ((_, offline_disk_instances, _), ) = \ cli.PollJob(job_id, cl=cl, feedback_fn=logging.debug) cl.ArchiveJob(job_id) if not offline_disk_instances: # nothing to do logging.debug("Verify-disks reported no offline disks, nothing to do") return logging.debug("Will activate disks for instance(s) %s", utils.CommaJoin(offline_disk_instances)) # We submit only one job, and wait for it. Not optimal, but this puts less # load on the job queue. job = [] for name in offline_disk_instances: try: inst = instances[name] except KeyError: logging.info("Can't find instance '%s', maybe it was ignored", name) continue if inst.status in HELPLESS_STATES or _CheckForOfflineNodes(nodes, inst): logging.info("Skipping instance '%s' because it is in a helpless state" " or has offline secondaries", name) continue job.append(opcodes.OpInstanceActivateDisks(instance_name=name)) if job: job_id = cli.SendJob(job, cl=cl) try: cli.PollJob(job_id, cl=cl, feedback_fn=logging.debug) except Exception: # pylint: disable=W0703 logging.exception("Error while activating disks") def IsRapiResponding(hostname): """Connects to RAPI port and does a simple test. Connects to RAPI port of hostname and does a simple test. At this time, the test is GetVersion. @type hostname: string @param hostname: hostname of the node to connect to. @rtype: bool @return: Whether RAPI is working properly """ curl_config = rapi.client.GenericCurlConfig() rapi_client = rapi.client.GanetiRapiClient(hostname, curl_config_fn=curl_config) try: master_version = rapi_client.GetVersion() except rapi.client.CertificateError, err: logging.warning("RAPI certificate error: %s", err) return False except rapi.client.GanetiApiError, err: logging.warning("RAPI error: %s", err) return False else: logging.debug("Reported RAPI version %s", master_version) return master_version == constants.RAPI_VERSION def ParseOptions(): """Parse the command line options. @return: (options, args) as from OptionParser.parse_args() """ parser = OptionParser(description="Ganeti cluster watcher", usage="%prog [-d]", version="%%prog (ganeti) %s" % constants.RELEASE_VERSION) parser.add_option(cli.DEBUG_OPT) parser.add_option(cli.NODEGROUP_OPT) parser.add_option("-A", "--job-age", dest="job_age", default=6 * 3600, help="Autoarchive jobs older than this age (default" " 6 hours)") parser.add_option("--ignore-pause", dest="ignore_pause", default=False, action="store_true", help="Ignore cluster pause setting") parser.add_option("--wait-children", dest="wait_children", action="store_true", help="Wait for child processes") parser.add_option("--no-wait-children", dest="wait_children", action="store_false", help="Don't wait for child processes") # See optparse documentation for why default values are not set by options parser.set_defaults(wait_children=True) options, args = parser.parse_args() options.job_age = cli.ParseTimespec(options.job_age) if args: parser.error("No arguments expected") return (options, args) def _WriteInstanceStatus(filename, data): """Writes the per-group instance status file. The entries are sorted. @type filename: string @param filename: Path to instance status file @type data: list of tuple; (instance name as string, status as string) @param data: Instance name and status """ logging.debug("Updating instance status file '%s' with %s instances", filename, len(data)) utils.WriteFile(filename, data="".join(map(compat.partial(operator.mod, "%s %s\n"), sorted(data)))) def _UpdateInstanceStatus(filename, instances): """Writes an instance status file from L{Instance} objects. @type filename: string @param filename: Path to status file @type instances: list of L{Instance} """ _WriteInstanceStatus(filename, [(inst.name, inst.status) for inst in instances]) def _ReadInstanceStatus(filename): """Reads an instance status file. @type filename: string @param filename: Path to status file @rtype: tuple; (None or number, list of lists containing instance name and status) @return: File's mtime and instance status contained in the file; mtime is C{None} if file can't be read """ logging.debug("Reading per-group instance status from '%s'", filename) statcb = utils.FileStatHelper() try: content = utils.ReadFile(filename, preread=statcb) except EnvironmentError, err: if err.errno == errno.ENOENT: logging.error("Can't read '%s', does not exist (yet)", filename) else: logging.exception("Unable to read '%s', ignoring", filename) return (None, None) else: return (statcb.st.st_mtime, [line.split(None, 1) for line in content.splitlines()]) def _MergeInstanceStatus(filename, pergroup_filename, groups): """Merges all per-group instance status files into a global one. @type filename: string @param filename: Path to global instance status file @type pergroup_filename: string @param pergroup_filename: Path to per-group status files, must contain "%s" to be replaced with group UUID @type groups: sequence @param groups: UUIDs of known groups """ # Lock global status file in exclusive mode lock = utils.FileLock.Open(filename) try: lock.Exclusive(blocking=True, timeout=INSTANCE_STATUS_LOCK_TIMEOUT) except errors.LockError, err: # All per-group processes will lock and update the file. None of them # should take longer than 10 seconds (the value of # INSTANCE_STATUS_LOCK_TIMEOUT). logging.error("Can't acquire lock on instance status file '%s', not" " updating: %s", filename, err) return logging.debug("Acquired exclusive lock on '%s'", filename) data = {} # Load instance status from all groups for group_uuid in groups: (mtime, instdata) = _ReadInstanceStatus(pergroup_filename % group_uuid) if mtime is not None: for (instance_name, status) in instdata: data.setdefault(instance_name, []).append((mtime, status)) # Select last update based on file mtime inststatus = [(instance_name, sorted(status, reverse=True)[0][1]) for (instance_name, status) in data.items()] # Write the global status file. Don't touch file after it's been # updated--there is no lock anymore. _WriteInstanceStatus(filename, inststatus) def GetLuxiClient(try_restart): """Tries to connect to the master daemon. @type try_restart: bool @param try_restart: Whether to attempt to restart the master daemon """ try: return cli.GetClient() except errors.OpPrereqError, err: # this is, from cli.GetClient, a not-master case raise NotMasterError("Not on master node (%s)" % err) except luxi.NoMasterError, err: if not try_restart: raise logging.warning("Master daemon seems to be down (%s), trying to restart", err) if not utils.EnsureDaemon(constants.MASTERD): raise errors.GenericError("Can't start the master daemon") # Retry the connection return cli.GetClient() def _StartGroupChildren(cl, wait): """Starts a new instance of the watcher for every node group. """ assert not compat.any(arg.startswith(cli.NODEGROUP_OPT_NAME) for arg in sys.argv) result = cl.QueryGroups([], ["name", "uuid"], False) children = [] for (idx, (name, uuid)) in enumerate(result): args = sys.argv + [cli.NODEGROUP_OPT_NAME, uuid] if idx > 0: # Let's not kill the system time.sleep(CHILD_PROCESS_DELAY) logging.debug("Spawning child for group '%s' (%s), arguments %s", name, uuid, args) try: # TODO: Should utils.StartDaemon be used instead? pid = os.spawnv(os.P_NOWAIT, args[0], args) except Exception: # pylint: disable=W0703 logging.exception("Failed to start child for group '%s' (%s)", name, uuid) else: logging.debug("Started with PID %s", pid) children.append(pid) if wait: for pid in children: logging.debug("Waiting for child PID %s", pid) try: result = utils.RetryOnSignal(os.waitpid, pid, 0) except EnvironmentError, err: result = str(err) logging.debug("Child PID %s exited with status %s", pid, result) def _ArchiveJobs(cl, age): """Archives old jobs. """ (arch_count, left_count) = cl.AutoArchiveJobs(age) logging.debug("Archived %s jobs, left %s", arch_count, left_count) def _CheckMaster(cl): """Ensures current host is master node. """ (master, ) = cl.QueryConfigValues(["master_node"]) if master != netutils.Hostname.GetSysName(): raise NotMasterError("This is not the master node") @UsesRapiClient def _GlobalWatcher(opts): """Main function for global watcher. At the end child processes are spawned for every node group. """ StartNodeDaemons() RunWatcherHooks() # Run node maintenance in all cases, even if master, so that old masters can # be properly cleaned up if nodemaint.NodeMaintenance.ShouldRun(): # pylint: disable=E0602 nodemaint.NodeMaintenance().Exec() # pylint: disable=E0602 try: client = GetLuxiClient(True) except NotMasterError: # Don't proceed on non-master nodes return constants.EXIT_SUCCESS # we are on master now utils.EnsureDaemon(constants.RAPI) # If RAPI isn't responding to queries, try one restart logging.debug("Attempting to talk to remote API on %s", constants.IP4_ADDRESS_LOCALHOST) if not IsRapiResponding(constants.IP4_ADDRESS_LOCALHOST): logging.warning("Couldn't get answer from remote API, restaring daemon") utils.StopDaemon(constants.RAPI) utils.EnsureDaemon(constants.RAPI) logging.debug("Second attempt to talk to remote API") if not IsRapiResponding(constants.IP4_ADDRESS_LOCALHOST): logging.fatal("RAPI is not responding") logging.debug("Successfully talked to remote API") _CheckMaster(client) _ArchiveJobs(client, opts.job_age) # Spawn child processes for all node groups _StartGroupChildren(client, opts.wait_children) return constants.EXIT_SUCCESS def _GetGroupData(cl, uuid): """Retrieves instances and nodes per node group. """ job = [ # Get all primary instances in group opcodes.OpQuery(what=constants.QR_INSTANCE, fields=["name", "status", "disks_active", "snodes", "pnode.group.uuid", "snodes.group.uuid"], qfilter=[qlang.OP_EQUAL, "pnode.group.uuid", uuid], use_locking=True), # Get all nodes in group opcodes.OpQuery(what=constants.QR_NODE, fields=["name", "bootid", "offline"], qfilter=[qlang.OP_EQUAL, "group.uuid", uuid], use_locking=True), ] job_id = cl.SubmitJob(job) results = map(objects.QueryResponse.FromDict, cli.PollJob(job_id, cl=cl, feedback_fn=logging.debug)) cl.ArchiveJob(job_id) results_data = map(operator.attrgetter("data"), results) # Ensure results are tuples with two values assert compat.all(map(ht.TListOf(ht.TListOf(ht.TIsLength(2))), results_data)) # Extract values ignoring result status (raw_instances, raw_nodes) = [[map(compat.snd, values) for values in res] for res in results_data] secondaries = {} instances = [] # Load all instances for (name, status, disks_active, snodes, pnode_group_uuid, snodes_group_uuid) in raw_instances: if snodes and set([pnode_group_uuid]) != set(snodes_group_uuid): logging.error("Ignoring split instance '%s', primary group %s, secondary" " groups %s", name, pnode_group_uuid, utils.CommaJoin(snodes_group_uuid)) else: instances.append(Instance(name, status, disks_active, snodes)) for node in snodes: secondaries.setdefault(node, set()).add(name) # Load all nodes nodes = [Node(name, bootid, offline, secondaries.get(name, set())) for (name, bootid, offline) in raw_nodes] return (dict((node.name, node) for node in nodes), dict((inst.name, inst) for inst in instances)) def _LoadKnownGroups(): """Returns a list of all node groups known by L{ssconf}. """ groups = ssconf.SimpleStore().GetNodegroupList() result = list(line.split(None, 1)[0] for line in groups if line.strip()) if not compat.all(map(utils.UUID_RE.match, result)): raise errors.GenericError("Ssconf contains invalid group UUID") return result def _GroupWatcher(opts): """Main function for per-group watcher process. """ group_uuid = opts.nodegroup.lower() if not utils.UUID_RE.match(group_uuid): raise errors.GenericError("Node group parameter (%s) must be given a UUID," " got '%s'" % (cli.NODEGROUP_OPT_NAME, group_uuid)) logging.info("Watcher for node group '%s'", group_uuid) known_groups = _LoadKnownGroups() # Check if node group is known if group_uuid not in known_groups: raise errors.GenericError("Node group '%s' is not known by ssconf" % group_uuid) # Group UUID has been verified and should not contain any dangerous # characters state_path = pathutils.WATCHER_GROUP_STATE_FILE % group_uuid inst_status_path = pathutils.WATCHER_GROUP_INSTANCE_STATUS_FILE % group_uuid logging.debug("Using state file %s", state_path) # Global watcher statefile = state.OpenStateFile(state_path) # pylint: disable=E0602 if not statefile: return constants.EXIT_FAILURE notepad = state.WatcherState(statefile) # pylint: disable=E0602 try: # Connect to master daemon client = GetLuxiClient(False) _CheckMaster(client) (nodes, instances) = _GetGroupData(client, group_uuid) # Update per-group instance status file _UpdateInstanceStatus(inst_status_path, instances.values()) _MergeInstanceStatus(pathutils.INSTANCE_STATUS_FILE, pathutils.WATCHER_GROUP_INSTANCE_STATUS_FILE, known_groups) started = _CheckInstances(client, notepad, instances) _CheckDisks(client, notepad, nodes, instances, started) _VerifyDisks(client, group_uuid, nodes, instances) except Exception, err: logging.info("Not updating status file due to failure: %s", err) raise else: # Save changes for next run notepad.Save(state_path) return constants.EXIT_SUCCESS def Main(): """Main function. """ (options, _) = ParseOptions() utils.SetupLogging(pathutils.LOG_WATCHER, sys.argv[0], debug=options.debug, stderr_logging=options.debug) if ShouldPause() and not options.ignore_pause: logging.debug("Pause has been set, exiting") return constants.EXIT_SUCCESS # Try to acquire global watcher lock in shared mode lock = utils.FileLock.Open(pathutils.WATCHER_LOCK_FILE) try: lock.Shared(blocking=False) except (EnvironmentError, errors.LockError), err: logging.error("Can't acquire lock on %s: %s", pathutils.WATCHER_LOCK_FILE, err) return constants.EXIT_SUCCESS if options.nodegroup is None: fn = _GlobalWatcher else: # Per-nodegroup watcher fn = _GroupWatcher try: return fn(options) except (SystemExit, KeyboardInterrupt): raise except NotMasterError: logging.debug("Not master, exiting") return constants.EXIT_NOTMASTER except errors.ResolverError, err: logging.error("Cannot resolve hostname '%s', exiting", err.args[0]) return constants.EXIT_NODESETUP_ERROR except errors.JobQueueFull: logging.error("Job queue is full, can't query cluster state") except errors.JobQueueDrainError: logging.error("Job queue is drained, can't maintain cluster state") except Exception, err: logging.exception(str(err)) return constants.EXIT_FAILURE return constants.EXIT_SUCCESS ganeti-2.9.3/lib/watcher/state.py0000644000000000000000000001407412271422343016675 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module keeping state for Ganeti watcher. """ import os import time import logging from ganeti import utils from ganeti import serializer from ganeti import errors # Delete any record that is older than 8 hours; this value is based on # the fact that the current retry counter is 5, and watcher runs every # 5 minutes, so it takes around half an hour to exceed the retry # counter, so 8 hours (16*1/2h) seems like a reasonable reset time RETRY_EXPIRATION = 8 * 3600 KEY_RESTART_COUNT = "restart_count" KEY_RESTART_WHEN = "restart_when" KEY_BOOT_ID = "bootid" def OpenStateFile(path): """Opens the state file and acquires a lock on it. @type path: string @param path: Path to state file """ # The two-step dance below is necessary to allow both opening existing # file read/write and creating if not existing. Vanilla open will truncate # an existing file -or- allow creating if not existing. statefile_fd = os.open(path, os.O_RDWR | os.O_CREAT) # Try to acquire lock on state file. If this fails, another watcher instance # might already be running or another program is temporarily blocking the # watcher from running. try: utils.LockFile(statefile_fd) except errors.LockError, err: logging.error("Can't acquire lock on state file %s: %s", path, err) return None return os.fdopen(statefile_fd, "w+") class WatcherState(object): """Interface to a state file recording restart attempts. """ def __init__(self, statefile): """Open, lock, read and parse the file. @type statefile: file @param statefile: State file object """ self.statefile = statefile try: state_data = self.statefile.read() if not state_data: self._data = {} else: self._data = serializer.Load(state_data) except Exception, msg: # pylint: disable=W0703 # Ignore errors while loading the file and treat it as empty self._data = {} logging.warning(("Invalid state file. Using defaults." " Error message: %s"), msg) if "instance" not in self._data: self._data["instance"] = {} if "node" not in self._data: self._data["node"] = {} self._orig_data = serializer.Dump(self._data) def Save(self, filename): """Save state to file, then unlock and close it. """ assert self.statefile serialized_form = serializer.Dump(self._data) if self._orig_data == serialized_form: logging.debug("Data didn't change, just touching status file") os.utime(filename, None) return # We need to make sure the file is locked before renaming it, otherwise # starting ganeti-watcher again at the same time will create a conflict. fd = utils.WriteFile(filename, data=serialized_form, prewrite=utils.LockFile, close=False) self.statefile = os.fdopen(fd, "w+") def Close(self): """Unlock configuration file and close it. """ assert self.statefile # Files are automatically unlocked when closing them self.statefile.close() self.statefile = None def GetNodeBootID(self, name): """Returns the last boot ID of a node or None. """ ndata = self._data["node"] if name in ndata and KEY_BOOT_ID in ndata[name]: return ndata[name][KEY_BOOT_ID] return None def SetNodeBootID(self, name, bootid): """Sets the boot ID of a node. """ assert bootid ndata = self._data["node"] ndata.setdefault(name, {})[KEY_BOOT_ID] = bootid def NumberOfRestartAttempts(self, instance_name): """Returns number of previous restart attempts. @type instance_name: string @param instance_name: the name of the instance to look up """ idata = self._data["instance"] if instance_name in idata: return idata[instance_name][KEY_RESTART_COUNT] return 0 def MaintainInstanceList(self, instances): """Perform maintenance on the recorded instances. @type instances: list of string @param instances: the list of currently existing instances """ idict = self._data["instance"] # First, delete obsolete instances obsolete_instances = set(idict).difference(instances) for inst in obsolete_instances: logging.debug("Forgetting obsolete instance %s", inst) idict.pop(inst, None) # Second, delete expired records earliest = time.time() - RETRY_EXPIRATION expired_instances = [i for i in idict if idict[i][KEY_RESTART_WHEN] < earliest] for inst in expired_instances: logging.debug("Expiring record for instance %s", inst) idict.pop(inst, None) def RecordRestartAttempt(self, instance_name): """Record a restart attempt. @type instance_name: string @param instance_name: the name of the instance being restarted """ idata = self._data["instance"] inst = idata.setdefault(instance_name, {}) inst[KEY_RESTART_WHEN] = time.time() inst[KEY_RESTART_COUNT] = inst.get(KEY_RESTART_COUNT, 0) + 1 def RemoveInstance(self, instance_name): """Update state to reflect that a machine is running. This method removes the record for a named instance (as we only track down instances). @type instance_name: string @param instance_name: the name of the instance to remove from books """ idata = self._data["instance"] idata.pop(instance_name, None) ganeti-2.9.3/lib/rapi/0000755000000000000000000000000012271445544014503 5ustar00rootroot00000000000000ganeti-2.9.3/lib/rapi/connector.py0000644000000000000000000002222612244641676017057 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Remote API connection map. """ # pylint: disable=C0103 # C0103: Invalid name, since the R_* names are not conforming import re import urlparse from ganeti import constants from ganeti import http from ganeti import utils from ganeti.rapi import rlib2 _NAME_PATTERN = r"[\w\._-]+" _DISK_PATTERN = r"\d+" # the connection map is created at the end of this file CONNECTOR = {} class Mapper: """Map resource to method. """ def __init__(self, connector=None): """Resource mapper constructor. @param connector: a dictionary, mapping method name with URL path regexp """ if connector is None: connector = CONNECTOR self._connector = connector def getController(self, uri): """Find method for a given URI. @param uri: string with URI @return: None if no method is found or a tuple containing the following fields: - method: name of method mapped to URI - items: a list of variable intems in the path - args: a dictionary with additional parameters from URL """ if "?" in uri: (path, query) = uri.split("?", 1) args = urlparse.parse_qs(query) else: path = uri query = None args = {} # Try to find handler for request path result = utils.FindMatch(self._connector, path) if result is None: raise http.HttpNotFound() (handler, groups) = result return (handler, groups, args) def _ConvertPattern(value): """Converts URI pattern into a regular expression group. Used by L{_CompileHandlerPath}. """ if isinstance(value, UriPattern): return "(%s)" % value.content else: return value def _CompileHandlerPath(*args): """Compiles path for RAPI resource into regular expression. @return: Compiled regular expression object """ return re.compile("^%s$" % "".join(map(_ConvertPattern, args))) class UriPattern(object): __slots__ = [ "content", ] def __init__(self, content): self.content = content def GetHandlers(node_name_pattern, instance_name_pattern, group_name_pattern, network_name_pattern, job_id_pattern, disk_pattern, query_res_pattern, translate=None): """Returns all supported resources and their handlers. C{node_name_pattern} and the other C{*_pattern} parameters are wrapped in L{UriPattern} and, if used in a URI, passed to the function specified using C{translate}. C{translate} receives 1..N parameters which are either plain strings or instances of L{UriPattern} and returns a dictionary key suitable for the caller of C{GetHandlers}. The default implementation in L{_CompileHandlerPath} returns a compiled regular expression in which each pattern is a group. @rtype: dict """ if translate is None: translate_fn = _CompileHandlerPath else: translate_fn = translate node_name = UriPattern(node_name_pattern) instance_name = UriPattern(instance_name_pattern) group_name = UriPattern(group_name_pattern) network_name = UriPattern(network_name_pattern) job_id = UriPattern(job_id_pattern) disk = UriPattern(disk_pattern) query_res = UriPattern(query_res_pattern) # Important note: New resources should always be added under /2. During a # discussion in July 2010 it was decided that having per-resource versions # is more flexible and future-compatible than versioning the whole remote # API. # TODO: Consider a different data structure where all keys are of the same # type. Strings are faster to look up in a dictionary than iterating and # matching regular expressions, therefore maybe two separate dictionaries # should be used. return { "/": rlib2.R_root, "/2": rlib2.R_2, "/version": rlib2.R_version, "/2/nodes": rlib2.R_2_nodes, translate_fn("/2/nodes/", node_name): rlib2.R_2_nodes_name, translate_fn("/2/nodes/", node_name, "/powercycle"): rlib2.R_2_nodes_name_powercycle, translate_fn("/2/nodes/", node_name, "/tags"): rlib2.R_2_nodes_name_tags, translate_fn("/2/nodes/", node_name, "/role"): rlib2.R_2_nodes_name_role, translate_fn("/2/nodes/", node_name, "/evacuate"): rlib2.R_2_nodes_name_evacuate, translate_fn("/2/nodes/", node_name, "/migrate"): rlib2.R_2_nodes_name_migrate, translate_fn("/2/nodes/", node_name, "/modify"): rlib2.R_2_nodes_name_modify, translate_fn("/2/nodes/", node_name, "/storage"): rlib2.R_2_nodes_name_storage, translate_fn("/2/nodes/", node_name, "/storage/modify"): rlib2.R_2_nodes_name_storage_modify, translate_fn("/2/nodes/", node_name, "/storage/repair"): rlib2.R_2_nodes_name_storage_repair, "/2/instances": rlib2.R_2_instances, translate_fn("/2/instances/", instance_name): rlib2.R_2_instances_name, translate_fn("/2/instances/", instance_name, "/info"): rlib2.R_2_instances_name_info, translate_fn("/2/instances/", instance_name, "/tags"): rlib2.R_2_instances_name_tags, translate_fn("/2/instances/", instance_name, "/reboot"): rlib2.R_2_instances_name_reboot, translate_fn("/2/instances/", instance_name, "/reinstall"): rlib2.R_2_instances_name_reinstall, translate_fn("/2/instances/", instance_name, "/replace-disks"): rlib2.R_2_instances_name_replace_disks, translate_fn("/2/instances/", instance_name, "/shutdown"): rlib2.R_2_instances_name_shutdown, translate_fn("/2/instances/", instance_name, "/startup"): rlib2.R_2_instances_name_startup, translate_fn("/2/instances/", instance_name, "/activate-disks"): rlib2.R_2_instances_name_activate_disks, translate_fn("/2/instances/", instance_name, "/deactivate-disks"): rlib2.R_2_instances_name_deactivate_disks, translate_fn("/2/instances/", instance_name, "/recreate-disks"): rlib2.R_2_instances_name_recreate_disks, translate_fn("/2/instances/", instance_name, "/prepare-export"): rlib2.R_2_instances_name_prepare_export, translate_fn("/2/instances/", instance_name, "/export"): rlib2.R_2_instances_name_export, translate_fn("/2/instances/", instance_name, "/migrate"): rlib2.R_2_instances_name_migrate, translate_fn("/2/instances/", instance_name, "/failover"): rlib2.R_2_instances_name_failover, translate_fn("/2/instances/", instance_name, "/rename"): rlib2.R_2_instances_name_rename, translate_fn("/2/instances/", instance_name, "/modify"): rlib2.R_2_instances_name_modify, translate_fn("/2/instances/", instance_name, "/disk/", disk, "/grow"): rlib2.R_2_instances_name_disk_grow, translate_fn("/2/instances/", instance_name, "/console"): rlib2.R_2_instances_name_console, "/2/networks": rlib2.R_2_networks, translate_fn("/2/networks/", network_name): rlib2.R_2_networks_name, translate_fn("/2/networks/", network_name, "/connect"): rlib2.R_2_networks_name_connect, translate_fn("/2/networks/", network_name, "/disconnect"): rlib2.R_2_networks_name_disconnect, translate_fn("/2/networks/", network_name, "/modify"): rlib2.R_2_networks_name_modify, translate_fn("/2/networks/", network_name, "/tags"): rlib2.R_2_networks_name_tags, "/2/groups": rlib2.R_2_groups, translate_fn("/2/groups/", group_name): rlib2.R_2_groups_name, translate_fn("/2/groups/", group_name, "/modify"): rlib2.R_2_groups_name_modify, translate_fn("/2/groups/", group_name, "/rename"): rlib2.R_2_groups_name_rename, translate_fn("/2/groups/", group_name, "/assign-nodes"): rlib2.R_2_groups_name_assign_nodes, translate_fn("/2/groups/", group_name, "/tags"): rlib2.R_2_groups_name_tags, "/2/jobs": rlib2.R_2_jobs, translate_fn("/2/jobs/", job_id): rlib2.R_2_jobs_id, translate_fn("/2/jobs/", job_id, "/wait"): rlib2.R_2_jobs_id_wait, "/2/instances-multi-alloc": rlib2.R_2_instances_multi_alloc, "/2/tags": rlib2.R_2_tags, "/2/info": rlib2.R_2_info, "/2/os": rlib2.R_2_os, "/2/redistribute-config": rlib2.R_2_redist_config, "/2/features": rlib2.R_2_features, "/2/modify": rlib2.R_2_cluster_modify, translate_fn("/2/query/", query_res): rlib2.R_2_query, translate_fn("/2/query/", query_res, "/fields"): rlib2.R_2_query_fields, } CONNECTOR.update(GetHandlers(_NAME_PATTERN, _NAME_PATTERN, _NAME_PATTERN, _NAME_PATTERN, constants.JOB_ID_TEMPLATE, _DISK_PATTERN, _NAME_PATTERN)) ganeti-2.9.3/lib/rapi/client.py0000644000000000000000000017712112271422343016334 0ustar00rootroot00000000000000# # # Copyright (C) 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Ganeti RAPI client. @attention: To use the RAPI client, the application B{must} call C{pycurl.global_init} during initialization and C{pycurl.global_cleanup} before exiting the process. This is very important in multi-threaded programs. See curl_global_init(3) and curl_global_cleanup(3) for details. The decorator L{UsesRapiClient} can be used. """ # No Ganeti-specific modules should be imported. The RAPI client is supposed to # be standalone. import logging import simplejson import socket import urllib import threading import pycurl import time try: from cStringIO import StringIO except ImportError: from StringIO import StringIO GANETI_RAPI_PORT = 5080 GANETI_RAPI_VERSION = 2 HTTP_DELETE = "DELETE" HTTP_GET = "GET" HTTP_PUT = "PUT" HTTP_POST = "POST" HTTP_OK = 200 HTTP_NOT_FOUND = 404 HTTP_APP_JSON = "application/json" REPLACE_DISK_PRI = "replace_on_primary" REPLACE_DISK_SECONDARY = "replace_on_secondary" REPLACE_DISK_CHG = "replace_new_secondary" REPLACE_DISK_AUTO = "replace_auto" NODE_EVAC_PRI = "primary-only" NODE_EVAC_SEC = "secondary-only" NODE_EVAC_ALL = "all" NODE_ROLE_DRAINED = "drained" NODE_ROLE_MASTER_CANDIATE = "master-candidate" NODE_ROLE_MASTER = "master" NODE_ROLE_OFFLINE = "offline" NODE_ROLE_REGULAR = "regular" JOB_STATUS_QUEUED = "queued" JOB_STATUS_WAITING = "waiting" JOB_STATUS_CANCELING = "canceling" JOB_STATUS_RUNNING = "running" JOB_STATUS_CANCELED = "canceled" JOB_STATUS_SUCCESS = "success" JOB_STATUS_ERROR = "error" JOB_STATUS_PENDING = frozenset([ JOB_STATUS_QUEUED, JOB_STATUS_WAITING, JOB_STATUS_CANCELING, ]) JOB_STATUS_FINALIZED = frozenset([ JOB_STATUS_CANCELED, JOB_STATUS_SUCCESS, JOB_STATUS_ERROR, ]) JOB_STATUS_ALL = frozenset([ JOB_STATUS_RUNNING, ]) | JOB_STATUS_PENDING | JOB_STATUS_FINALIZED # Legacy name JOB_STATUS_WAITLOCK = JOB_STATUS_WAITING # Internal constants _REQ_DATA_VERSION_FIELD = "__version__" _QPARAM_DRY_RUN = "dry-run" _QPARAM_FORCE = "force" # Feature strings INST_CREATE_REQV1 = "instance-create-reqv1" INST_REINSTALL_REQV1 = "instance-reinstall-reqv1" NODE_MIGRATE_REQV1 = "node-migrate-reqv1" NODE_EVAC_RES1 = "node-evac-res1" # Old feature constant names in case they're references by users of this module _INST_CREATE_REQV1 = INST_CREATE_REQV1 _INST_REINSTALL_REQV1 = INST_REINSTALL_REQV1 _NODE_MIGRATE_REQV1 = NODE_MIGRATE_REQV1 _NODE_EVAC_RES1 = NODE_EVAC_RES1 #: Resolver errors ECODE_RESOLVER = "resolver_error" #: Not enough resources (iallocator failure, disk space, memory, etc.) ECODE_NORES = "insufficient_resources" #: Temporarily out of resources; operation can be tried again ECODE_TEMP_NORES = "temp_insufficient_resources" #: Wrong arguments (at syntax level) ECODE_INVAL = "wrong_input" #: Wrong entity state ECODE_STATE = "wrong_state" #: Entity not found ECODE_NOENT = "unknown_entity" #: Entity already exists ECODE_EXISTS = "already_exists" #: Resource not unique (e.g. MAC or IP duplication) ECODE_NOTUNIQUE = "resource_not_unique" #: Internal cluster error ECODE_FAULT = "internal_error" #: Environment error (e.g. node disk error) ECODE_ENVIRON = "environment_error" #: List of all failure types ECODE_ALL = frozenset([ ECODE_RESOLVER, ECODE_NORES, ECODE_TEMP_NORES, ECODE_INVAL, ECODE_STATE, ECODE_NOENT, ECODE_EXISTS, ECODE_NOTUNIQUE, ECODE_FAULT, ECODE_ENVIRON, ]) # Older pycURL versions don't have all error constants try: _CURLE_SSL_CACERT = pycurl.E_SSL_CACERT _CURLE_SSL_CACERT_BADFILE = pycurl.E_SSL_CACERT_BADFILE except AttributeError: _CURLE_SSL_CACERT = 60 _CURLE_SSL_CACERT_BADFILE = 77 _CURL_SSL_CERT_ERRORS = frozenset([ _CURLE_SSL_CACERT, _CURLE_SSL_CACERT_BADFILE, ]) class Error(Exception): """Base error class for this module. """ pass class GanetiApiError(Error): """Generic error raised from Ganeti API. """ def __init__(self, msg, code=None): Error.__init__(self, msg) self.code = code class CertificateError(GanetiApiError): """Raised when a problem is found with the SSL certificate. """ pass def _AppendIf(container, condition, value): """Appends to a list if a condition evaluates to truth. """ if condition: container.append(value) return condition def _AppendDryRunIf(container, condition): """Appends a "dry-run" parameter if a condition evaluates to truth. """ return _AppendIf(container, condition, (_QPARAM_DRY_RUN, 1)) def _AppendForceIf(container, condition): """Appends a "force" parameter if a condition evaluates to truth. """ return _AppendIf(container, condition, (_QPARAM_FORCE, 1)) def _SetItemIf(container, condition, item, value): """Sets an item if a condition evaluates to truth. """ if condition: container[item] = value return condition def UsesRapiClient(fn): """Decorator for code using RAPI client to initialize pycURL. """ def wrapper(*args, **kwargs): # curl_global_init(3) and curl_global_cleanup(3) must be called with only # one thread running. This check is just a safety measure -- it doesn't # cover all cases. assert threading.activeCount() == 1, \ "Found active threads when initializing pycURL" pycurl.global_init(pycurl.GLOBAL_ALL) try: return fn(*args, **kwargs) finally: pycurl.global_cleanup() return wrapper def GenericCurlConfig(verbose=False, use_signal=False, use_curl_cabundle=False, cafile=None, capath=None, proxy=None, verify_hostname=False, connect_timeout=None, timeout=None, _pycurl_version_fn=pycurl.version_info): """Curl configuration function generator. @type verbose: bool @param verbose: Whether to set cURL to verbose mode @type use_signal: bool @param use_signal: Whether to allow cURL to use signals @type use_curl_cabundle: bool @param use_curl_cabundle: Whether to use cURL's default CA bundle @type cafile: string @param cafile: In which file we can find the certificates @type capath: string @param capath: In which directory we can find the certificates @type proxy: string @param proxy: Proxy to use, None for default behaviour and empty string for disabling proxies (see curl_easy_setopt(3)) @type verify_hostname: bool @param verify_hostname: Whether to verify the remote peer certificate's commonName @type connect_timeout: number @param connect_timeout: Timeout for establishing connection in seconds @type timeout: number @param timeout: Timeout for complete transfer in seconds (see curl_easy_setopt(3)). """ if use_curl_cabundle and (cafile or capath): raise Error("Can not use default CA bundle when CA file or path is set") def _ConfigCurl(curl, logger): """Configures a cURL object @type curl: pycurl.Curl @param curl: cURL object """ logger.debug("Using cURL version %s", pycurl.version) # pycurl.version_info returns a tuple with information about the used # version of libcurl. Item 5 is the SSL library linked to it. # e.g.: (3, '7.18.0', 463360, 'x86_64-pc-linux-gnu', 1581, 'GnuTLS/2.0.4', # 0, '1.2.3.3', ...) sslver = _pycurl_version_fn()[5] if not sslver: raise Error("No SSL support in cURL") lcsslver = sslver.lower() if lcsslver.startswith("openssl/"): pass elif lcsslver.startswith("nss/"): # TODO: investigate compatibility beyond a simple test pass elif lcsslver.startswith("gnutls/"): if capath: raise Error("cURL linked against GnuTLS has no support for a" " CA path (%s)" % (pycurl.version, )) else: raise NotImplementedError("cURL uses unsupported SSL version '%s'" % sslver) curl.setopt(pycurl.VERBOSE, verbose) curl.setopt(pycurl.NOSIGNAL, not use_signal) # Whether to verify remote peer's CN if verify_hostname: # curl_easy_setopt(3): "When CURLOPT_SSL_VERIFYHOST is 2, that # certificate must indicate that the server is the server to which you # meant to connect, or the connection fails. [...] When the value is 1, # the certificate must contain a Common Name field, but it doesn't matter # what name it says. [...]" curl.setopt(pycurl.SSL_VERIFYHOST, 2) else: curl.setopt(pycurl.SSL_VERIFYHOST, 0) if cafile or capath or use_curl_cabundle: # Require certificates to be checked curl.setopt(pycurl.SSL_VERIFYPEER, True) if cafile: curl.setopt(pycurl.CAINFO, str(cafile)) if capath: curl.setopt(pycurl.CAPATH, str(capath)) # Not changing anything for using default CA bundle else: # Disable SSL certificate verification curl.setopt(pycurl.SSL_VERIFYPEER, False) if proxy is not None: curl.setopt(pycurl.PROXY, str(proxy)) # Timeouts if connect_timeout is not None: curl.setopt(pycurl.CONNECTTIMEOUT, connect_timeout) if timeout is not None: curl.setopt(pycurl.TIMEOUT, timeout) return _ConfigCurl class GanetiRapiClient(object): # pylint: disable=R0904 """Ganeti RAPI client. """ USER_AGENT = "Ganeti RAPI Client" _json_encoder = simplejson.JSONEncoder(sort_keys=True) def __init__(self, host, port=GANETI_RAPI_PORT, username=None, password=None, logger=logging, curl_config_fn=None, curl_factory=None): """Initializes this class. @type host: string @param host: the ganeti cluster master to interact with @type port: int @param port: the port on which the RAPI is running (default is 5080) @type username: string @param username: the username to connect with @type password: string @param password: the password to connect with @type curl_config_fn: callable @param curl_config_fn: Function to configure C{pycurl.Curl} object @param logger: Logging object """ self._username = username self._password = password self._logger = logger self._curl_config_fn = curl_config_fn self._curl_factory = curl_factory try: socket.inet_pton(socket.AF_INET6, host) address = "[%s]:%s" % (host, port) except socket.error: address = "%s:%s" % (host, port) self._base_url = "https://%s" % address if username is not None: if password is None: raise Error("Password not specified") elif password: raise Error("Specified password without username") def _CreateCurl(self): """Creates a cURL object. """ # Create pycURL object if no factory is provided if self._curl_factory: curl = self._curl_factory() else: curl = pycurl.Curl() # Default cURL settings curl.setopt(pycurl.VERBOSE, False) curl.setopt(pycurl.FOLLOWLOCATION, False) curl.setopt(pycurl.MAXREDIRS, 5) curl.setopt(pycurl.NOSIGNAL, True) curl.setopt(pycurl.USERAGENT, self.USER_AGENT) curl.setopt(pycurl.SSL_VERIFYHOST, 0) curl.setopt(pycurl.SSL_VERIFYPEER, False) curl.setopt(pycurl.HTTPHEADER, [ "Accept: %s" % HTTP_APP_JSON, "Content-type: %s" % HTTP_APP_JSON, ]) assert ((self._username is None and self._password is None) ^ (self._username is not None and self._password is not None)) if self._username: # Setup authentication curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_BASIC) curl.setopt(pycurl.USERPWD, str("%s:%s" % (self._username, self._password))) # Call external configuration function if self._curl_config_fn: self._curl_config_fn(curl, self._logger) return curl @staticmethod def _EncodeQuery(query): """Encode query values for RAPI URL. @type query: list of two-tuples @param query: Query arguments @rtype: list @return: Query list with encoded values """ result = [] for name, value in query: if value is None: result.append((name, "")) elif isinstance(value, bool): # Boolean values must be encoded as 0 or 1 result.append((name, int(value))) elif isinstance(value, (list, tuple, dict)): raise ValueError("Invalid query data type %r" % type(value).__name__) else: result.append((name, value)) return result def _SendRequest(self, method, path, query, content): """Sends an HTTP request. This constructs a full URL, encodes and decodes HTTP bodies, and handles invalid responses in a pythonic way. @type method: string @param method: HTTP method to use @type path: string @param path: HTTP URL path @type query: list of two-tuples @param query: query arguments to pass to urllib.urlencode @type content: str or None @param content: HTTP body content @rtype: str @return: JSON-Decoded response @raises CertificateError: If an invalid SSL certificate is found @raises GanetiApiError: If an invalid response is returned """ assert path.startswith("/") curl = self._CreateCurl() if content is not None: encoded_content = self._json_encoder.encode(content) else: encoded_content = "" # Build URL urlparts = [self._base_url, path] if query: urlparts.append("?") urlparts.append(urllib.urlencode(self._EncodeQuery(query))) url = "".join(urlparts) self._logger.debug("Sending request %s %s (content=%r)", method, url, encoded_content) # Buffer for response encoded_resp_body = StringIO() # Configure cURL curl.setopt(pycurl.CUSTOMREQUEST, str(method)) curl.setopt(pycurl.URL, str(url)) curl.setopt(pycurl.POSTFIELDS, str(encoded_content)) curl.setopt(pycurl.WRITEFUNCTION, encoded_resp_body.write) try: # Send request and wait for response try: curl.perform() except pycurl.error, err: if err.args[0] in _CURL_SSL_CERT_ERRORS: raise CertificateError("SSL certificate error %s" % err, code=err.args[0]) raise GanetiApiError(str(err), code=err.args[0]) finally: # Reset settings to not keep references to large objects in memory # between requests curl.setopt(pycurl.POSTFIELDS, "") curl.setopt(pycurl.WRITEFUNCTION, lambda _: None) # Get HTTP response code http_code = curl.getinfo(pycurl.RESPONSE_CODE) # Was anything written to the response buffer? if encoded_resp_body.tell(): response_content = simplejson.loads(encoded_resp_body.getvalue()) else: response_content = None if http_code != HTTP_OK: if isinstance(response_content, dict): msg = ("%s %s: %s" % (response_content["code"], response_content["message"], response_content["explain"])) else: msg = str(response_content) raise GanetiApiError(msg, code=http_code) return response_content def GetVersion(self): """Gets the Remote API version running on the cluster. @rtype: int @return: Ganeti Remote API version """ return self._SendRequest(HTTP_GET, "/version", None, None) def GetFeatures(self): """Gets the list of optional features supported by RAPI server. @rtype: list @return: List of optional features """ try: return self._SendRequest(HTTP_GET, "/%s/features" % GANETI_RAPI_VERSION, None, None) except GanetiApiError, err: # Older RAPI servers don't support this resource if err.code == HTTP_NOT_FOUND: return [] raise def GetOperatingSystems(self): """Gets the Operating Systems running in the Ganeti cluster. @rtype: list of str @return: operating systems """ return self._SendRequest(HTTP_GET, "/%s/os" % GANETI_RAPI_VERSION, None, None) def GetInfo(self): """Gets info about the cluster. @rtype: dict @return: information about the cluster """ return self._SendRequest(HTTP_GET, "/%s/info" % GANETI_RAPI_VERSION, None, None) def RedistributeConfig(self): """Tells the cluster to redistribute its configuration files. @rtype: string @return: job id """ return self._SendRequest(HTTP_PUT, "/%s/redistribute-config" % GANETI_RAPI_VERSION, None, None) def ModifyCluster(self, **kwargs): """Modifies cluster parameters. More details for parameters can be found in the RAPI documentation. @rtype: string @return: job id """ body = kwargs return self._SendRequest(HTTP_PUT, "/%s/modify" % GANETI_RAPI_VERSION, None, body) def GetClusterTags(self): """Gets the cluster tags. @rtype: list of str @return: cluster tags """ return self._SendRequest(HTTP_GET, "/%s/tags" % GANETI_RAPI_VERSION, None, None) def AddClusterTags(self, tags, dry_run=False): """Adds tags to the cluster. @type tags: list of str @param tags: tags to add to the cluster @type dry_run: bool @param dry_run: whether to perform a dry run @rtype: string @return: job id """ query = [("tag", t) for t in tags] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_PUT, "/%s/tags" % GANETI_RAPI_VERSION, query, None) def DeleteClusterTags(self, tags, dry_run=False): """Deletes tags from the cluster. @type tags: list of str @param tags: tags to delete @type dry_run: bool @param dry_run: whether to perform a dry run @rtype: string @return: job id """ query = [("tag", t) for t in tags] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_DELETE, "/%s/tags" % GANETI_RAPI_VERSION, query, None) def GetInstances(self, bulk=False): """Gets information about instances on the cluster. @type bulk: bool @param bulk: whether to return all information about all instances @rtype: list of dict or list of str @return: if bulk is True, info about the instances, else a list of instances """ query = [] _AppendIf(query, bulk, ("bulk", 1)) instances = self._SendRequest(HTTP_GET, "/%s/instances" % GANETI_RAPI_VERSION, query, None) if bulk: return instances else: return [i["id"] for i in instances] def GetInstance(self, instance): """Gets information about an instance. @type instance: str @param instance: instance whose info to return @rtype: dict @return: info about the instance """ return self._SendRequest(HTTP_GET, ("/%s/instances/%s" % (GANETI_RAPI_VERSION, instance)), None, None) def GetInstanceInfo(self, instance, static=None): """Gets information about an instance. @type instance: string @param instance: Instance name @rtype: string @return: Job ID """ if static is not None: query = [("static", static)] else: query = None return self._SendRequest(HTTP_GET, ("/%s/instances/%s/info" % (GANETI_RAPI_VERSION, instance)), query, None) @staticmethod def _UpdateWithKwargs(base, **kwargs): """Updates the base with params from kwargs. @param base: The base dict, filled with required fields @note: This is an inplace update of base """ conflicts = set(kwargs.iterkeys()) & set(base.iterkeys()) if conflicts: raise GanetiApiError("Required fields can not be specified as" " keywords: %s" % ", ".join(conflicts)) base.update((key, value) for key, value in kwargs.iteritems() if key != "dry_run") def InstanceAllocation(self, mode, name, disk_template, disks, nics, **kwargs): """Generates an instance allocation as used by multiallocate. More details for parameters can be found in the RAPI documentation. It is the same as used by CreateInstance. @type mode: string @param mode: Instance creation mode @type name: string @param name: Hostname of the instance to create @type disk_template: string @param disk_template: Disk template for instance (e.g. plain, diskless, file, or drbd) @type disks: list of dicts @param disks: List of disk definitions @type nics: list of dicts @param nics: List of NIC definitions @return: A dict with the generated entry """ # All required fields for request data version 1 alloc = { "mode": mode, "name": name, "disk_template": disk_template, "disks": disks, "nics": nics, } self._UpdateWithKwargs(alloc, **kwargs) return alloc def InstancesMultiAlloc(self, instances, **kwargs): """Tries to allocate multiple instances. More details for parameters can be found in the RAPI documentation. @param instances: A list of L{InstanceAllocation} results """ query = [] body = { "instances": instances, } self._UpdateWithKwargs(body, **kwargs) _AppendDryRunIf(query, kwargs.get("dry_run")) return self._SendRequest(HTTP_POST, "/%s/instances-multi-alloc" % GANETI_RAPI_VERSION, query, body) def CreateInstance(self, mode, name, disk_template, disks, nics, **kwargs): """Creates a new instance. More details for parameters can be found in the RAPI documentation. @type mode: string @param mode: Instance creation mode @type name: string @param name: Hostname of the instance to create @type disk_template: string @param disk_template: Disk template for instance (e.g. plain, diskless, file, or drbd) @type disks: list of dicts @param disks: List of disk definitions @type nics: list of dicts @param nics: List of NIC definitions @type dry_run: bool @keyword dry_run: whether to perform a dry run @rtype: string @return: job id """ query = [] _AppendDryRunIf(query, kwargs.get("dry_run")) if _INST_CREATE_REQV1 in self.GetFeatures(): body = self.InstanceAllocation(mode, name, disk_template, disks, nics, **kwargs) body[_REQ_DATA_VERSION_FIELD] = 1 else: raise GanetiApiError("Server does not support new-style (version 1)" " instance creation requests") return self._SendRequest(HTTP_POST, "/%s/instances" % GANETI_RAPI_VERSION, query, body) def DeleteInstance(self, instance, dry_run=False): """Deletes an instance. @type instance: str @param instance: the instance to delete @rtype: string @return: job id """ query = [] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_DELETE, ("/%s/instances/%s" % (GANETI_RAPI_VERSION, instance)), query, None) def ModifyInstance(self, instance, **kwargs): """Modifies an instance. More details for parameters can be found in the RAPI documentation. @type instance: string @param instance: Instance name @rtype: string @return: job id """ body = kwargs return self._SendRequest(HTTP_PUT, ("/%s/instances/%s/modify" % (GANETI_RAPI_VERSION, instance)), None, body) def ActivateInstanceDisks(self, instance, ignore_size=None): """Activates an instance's disks. @type instance: string @param instance: Instance name @type ignore_size: bool @param ignore_size: Whether to ignore recorded size @rtype: string @return: job id """ query = [] _AppendIf(query, ignore_size, ("ignore_size", 1)) return self._SendRequest(HTTP_PUT, ("/%s/instances/%s/activate-disks" % (GANETI_RAPI_VERSION, instance)), query, None) def DeactivateInstanceDisks(self, instance): """Deactivates an instance's disks. @type instance: string @param instance: Instance name @rtype: string @return: job id """ return self._SendRequest(HTTP_PUT, ("/%s/instances/%s/deactivate-disks" % (GANETI_RAPI_VERSION, instance)), None, None) def RecreateInstanceDisks(self, instance, disks=None, nodes=None): """Recreate an instance's disks. @type instance: string @param instance: Instance name @type disks: list of int @param disks: List of disk indexes @type nodes: list of string @param nodes: New instance nodes, if relocation is desired @rtype: string @return: job id """ body = {} _SetItemIf(body, disks is not None, "disks", disks) _SetItemIf(body, nodes is not None, "nodes", nodes) return self._SendRequest(HTTP_POST, ("/%s/instances/%s/recreate-disks" % (GANETI_RAPI_VERSION, instance)), None, body) def GrowInstanceDisk(self, instance, disk, amount, wait_for_sync=None): """Grows a disk of an instance. More details for parameters can be found in the RAPI documentation. @type instance: string @param instance: Instance name @type disk: integer @param disk: Disk index @type amount: integer @param amount: Grow disk by this amount (MiB) @type wait_for_sync: bool @param wait_for_sync: Wait for disk to synchronize @rtype: string @return: job id """ body = { "amount": amount, } _SetItemIf(body, wait_for_sync is not None, "wait_for_sync", wait_for_sync) return self._SendRequest(HTTP_POST, ("/%s/instances/%s/disk/%s/grow" % (GANETI_RAPI_VERSION, instance, disk)), None, body) def GetInstanceTags(self, instance): """Gets tags for an instance. @type instance: str @param instance: instance whose tags to return @rtype: list of str @return: tags for the instance """ return self._SendRequest(HTTP_GET, ("/%s/instances/%s/tags" % (GANETI_RAPI_VERSION, instance)), None, None) def AddInstanceTags(self, instance, tags, dry_run=False): """Adds tags to an instance. @type instance: str @param instance: instance to add tags to @type tags: list of str @param tags: tags to add to the instance @type dry_run: bool @param dry_run: whether to perform a dry run @rtype: string @return: job id """ query = [("tag", t) for t in tags] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_PUT, ("/%s/instances/%s/tags" % (GANETI_RAPI_VERSION, instance)), query, None) def DeleteInstanceTags(self, instance, tags, dry_run=False): """Deletes tags from an instance. @type instance: str @param instance: instance to delete tags from @type tags: list of str @param tags: tags to delete @type dry_run: bool @param dry_run: whether to perform a dry run @rtype: string @return: job id """ query = [("tag", t) for t in tags] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_DELETE, ("/%s/instances/%s/tags" % (GANETI_RAPI_VERSION, instance)), query, None) def RebootInstance(self, instance, reboot_type=None, ignore_secondaries=None, dry_run=False, reason=None): """Reboots an instance. @type instance: str @param instance: instance to reboot @type reboot_type: str @param reboot_type: one of: hard, soft, full @type ignore_secondaries: bool @param ignore_secondaries: if True, ignores errors for the secondary node while re-assembling disks (in hard-reboot mode only) @type dry_run: bool @param dry_run: whether to perform a dry run @type reason: string @param reason: the reason for the reboot @rtype: string @return: job id """ query = [] _AppendDryRunIf(query, dry_run) _AppendIf(query, reboot_type, ("type", reboot_type)) _AppendIf(query, ignore_secondaries is not None, ("ignore_secondaries", ignore_secondaries)) _AppendIf(query, reason, ("reason", reason)) return self._SendRequest(HTTP_POST, ("/%s/instances/%s/reboot" % (GANETI_RAPI_VERSION, instance)), query, None) def ShutdownInstance(self, instance, dry_run=False, no_remember=False, reason=None, **kwargs): """Shuts down an instance. @type instance: str @param instance: the instance to shut down @type dry_run: bool @param dry_run: whether to perform a dry run @type no_remember: bool @param no_remember: if true, will not record the state change @type reason: string @param reason: the reason for the shutdown @rtype: string @return: job id """ query = [] body = kwargs _AppendDryRunIf(query, dry_run) _AppendIf(query, no_remember, ("no_remember", 1)) _AppendIf(query, reason, ("reason", reason)) return self._SendRequest(HTTP_PUT, ("/%s/instances/%s/shutdown" % (GANETI_RAPI_VERSION, instance)), query, body) def StartupInstance(self, instance, dry_run=False, no_remember=False, reason=None): """Starts up an instance. @type instance: str @param instance: the instance to start up @type dry_run: bool @param dry_run: whether to perform a dry run @type no_remember: bool @param no_remember: if true, will not record the state change @type reason: string @param reason: the reason for the startup @rtype: string @return: job id """ query = [] _AppendDryRunIf(query, dry_run) _AppendIf(query, no_remember, ("no_remember", 1)) _AppendIf(query, reason, ("reason", reason)) return self._SendRequest(HTTP_PUT, ("/%s/instances/%s/startup" % (GANETI_RAPI_VERSION, instance)), query, None) def ReinstallInstance(self, instance, os=None, no_startup=False, osparams=None): """Reinstalls an instance. @type instance: str @param instance: The instance to reinstall @type os: str or None @param os: The operating system to reinstall. If None, the instance's current operating system will be installed again @type no_startup: bool @param no_startup: Whether to start the instance automatically @rtype: string @return: job id """ if _INST_REINSTALL_REQV1 in self.GetFeatures(): body = { "start": not no_startup, } _SetItemIf(body, os is not None, "os", os) _SetItemIf(body, osparams is not None, "osparams", osparams) return self._SendRequest(HTTP_POST, ("/%s/instances/%s/reinstall" % (GANETI_RAPI_VERSION, instance)), None, body) # Use old request format if osparams: raise GanetiApiError("Server does not support specifying OS parameters" " for instance reinstallation") query = [] _AppendIf(query, os, ("os", os)) _AppendIf(query, no_startup, ("nostartup", 1)) return self._SendRequest(HTTP_POST, ("/%s/instances/%s/reinstall" % (GANETI_RAPI_VERSION, instance)), query, None) def ReplaceInstanceDisks(self, instance, disks=None, mode=REPLACE_DISK_AUTO, remote_node=None, iallocator=None): """Replaces disks on an instance. @type instance: str @param instance: instance whose disks to replace @type disks: list of ints @param disks: Indexes of disks to replace @type mode: str @param mode: replacement mode to use (defaults to replace_auto) @type remote_node: str or None @param remote_node: new secondary node to use (for use with replace_new_secondary mode) @type iallocator: str or None @param iallocator: instance allocator plugin to use (for use with replace_auto mode) @rtype: string @return: job id """ query = [ ("mode", mode), ] # TODO: Convert to body parameters if disks is not None: _AppendIf(query, True, ("disks", ",".join(str(idx) for idx in disks))) _AppendIf(query, remote_node is not None, ("remote_node", remote_node)) _AppendIf(query, iallocator is not None, ("iallocator", iallocator)) return self._SendRequest(HTTP_POST, ("/%s/instances/%s/replace-disks" % (GANETI_RAPI_VERSION, instance)), query, None) def PrepareExport(self, instance, mode): """Prepares an instance for an export. @type instance: string @param instance: Instance name @type mode: string @param mode: Export mode @rtype: string @return: Job ID """ query = [("mode", mode)] return self._SendRequest(HTTP_PUT, ("/%s/instances/%s/prepare-export" % (GANETI_RAPI_VERSION, instance)), query, None) def ExportInstance(self, instance, mode, destination, shutdown=None, remove_instance=None, x509_key_name=None, destination_x509_ca=None): """Exports an instance. @type instance: string @param instance: Instance name @type mode: string @param mode: Export mode @rtype: string @return: Job ID """ body = { "destination": destination, "mode": mode, } _SetItemIf(body, shutdown is not None, "shutdown", shutdown) _SetItemIf(body, remove_instance is not None, "remove_instance", remove_instance) _SetItemIf(body, x509_key_name is not None, "x509_key_name", x509_key_name) _SetItemIf(body, destination_x509_ca is not None, "destination_x509_ca", destination_x509_ca) return self._SendRequest(HTTP_PUT, ("/%s/instances/%s/export" % (GANETI_RAPI_VERSION, instance)), None, body) def MigrateInstance(self, instance, mode=None, cleanup=None, target_node=None): """Migrates an instance. @type instance: string @param instance: Instance name @type mode: string @param mode: Migration mode @type cleanup: bool @param cleanup: Whether to clean up a previously failed migration @type target_node: string @param target_node: Target Node for externally mirrored instances @rtype: string @return: job id """ body = {} _SetItemIf(body, mode is not None, "mode", mode) _SetItemIf(body, cleanup is not None, "cleanup", cleanup) _SetItemIf(body, target_node is not None, "target_node", target_node) return self._SendRequest(HTTP_PUT, ("/%s/instances/%s/migrate" % (GANETI_RAPI_VERSION, instance)), None, body) def FailoverInstance(self, instance, iallocator=None, ignore_consistency=None, target_node=None): """Does a failover of an instance. @type instance: string @param instance: Instance name @type iallocator: string @param iallocator: Iallocator for deciding the target node for shared-storage instances @type ignore_consistency: bool @param ignore_consistency: Whether to ignore disk consistency @type target_node: string @param target_node: Target node for shared-storage instances @rtype: string @return: job id """ body = {} _SetItemIf(body, iallocator is not None, "iallocator", iallocator) _SetItemIf(body, ignore_consistency is not None, "ignore_consistency", ignore_consistency) _SetItemIf(body, target_node is not None, "target_node", target_node) return self._SendRequest(HTTP_PUT, ("/%s/instances/%s/failover" % (GANETI_RAPI_VERSION, instance)), None, body) def RenameInstance(self, instance, new_name, ip_check=None, name_check=None): """Changes the name of an instance. @type instance: string @param instance: Instance name @type new_name: string @param new_name: New instance name @type ip_check: bool @param ip_check: Whether to ensure instance's IP address is inactive @type name_check: bool @param name_check: Whether to ensure instance's name is resolvable @rtype: string @return: job id """ body = { "new_name": new_name, } _SetItemIf(body, ip_check is not None, "ip_check", ip_check) _SetItemIf(body, name_check is not None, "name_check", name_check) return self._SendRequest(HTTP_PUT, ("/%s/instances/%s/rename" % (GANETI_RAPI_VERSION, instance)), None, body) def GetInstanceConsole(self, instance): """Request information for connecting to instance's console. @type instance: string @param instance: Instance name @rtype: dict @return: dictionary containing information about instance's console """ return self._SendRequest(HTTP_GET, ("/%s/instances/%s/console" % (GANETI_RAPI_VERSION, instance)), None, None) def GetJobs(self, bulk=False): """Gets all jobs for the cluster. @type bulk: bool @param bulk: Whether to return detailed information about jobs. @rtype: list of int @return: List of job ids for the cluster or list of dicts with detailed information about the jobs if bulk parameter was true. """ query = [] _AppendIf(query, bulk, ("bulk", 1)) if bulk: return self._SendRequest(HTTP_GET, "/%s/jobs" % GANETI_RAPI_VERSION, query, None) else: return [int(j["id"]) for j in self._SendRequest(HTTP_GET, "/%s/jobs" % GANETI_RAPI_VERSION, None, None)] def GetJobStatus(self, job_id): """Gets the status of a job. @type job_id: string @param job_id: job id whose status to query @rtype: dict @return: job status """ return self._SendRequest(HTTP_GET, "/%s/jobs/%s" % (GANETI_RAPI_VERSION, job_id), None, None) def WaitForJobCompletion(self, job_id, period=5, retries=-1): """Polls cluster for job status until completion. Completion is defined as any of the following states listed in L{JOB_STATUS_FINALIZED}. @type job_id: string @param job_id: job id to watch @type period: int @param period: how often to poll for status (optional, default 5s) @type retries: int @param retries: how many time to poll before giving up (optional, default -1 means unlimited) @rtype: bool @return: C{True} if job succeeded or C{False} if failed/status timeout @deprecated: It is recommended to use L{WaitForJobChange} wherever possible; L{WaitForJobChange} returns immediately after a job changed and does not use polling """ while retries != 0: job_result = self.GetJobStatus(job_id) if job_result and job_result["status"] == JOB_STATUS_SUCCESS: return True elif not job_result or job_result["status"] in JOB_STATUS_FINALIZED: return False if period: time.sleep(period) if retries > 0: retries -= 1 return False def WaitForJobChange(self, job_id, fields, prev_job_info, prev_log_serial): """Waits for job changes. @type job_id: string @param job_id: Job ID for which to wait @return: C{None} if no changes have been detected and a dict with two keys, C{job_info} and C{log_entries} otherwise. @rtype: dict """ body = { "fields": fields, "previous_job_info": prev_job_info, "previous_log_serial": prev_log_serial, } return self._SendRequest(HTTP_GET, "/%s/jobs/%s/wait" % (GANETI_RAPI_VERSION, job_id), None, body) def CancelJob(self, job_id, dry_run=False): """Cancels a job. @type job_id: string @param job_id: id of the job to delete @type dry_run: bool @param dry_run: whether to perform a dry run @rtype: tuple @return: tuple containing the result, and a message (bool, string) """ query = [] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_DELETE, "/%s/jobs/%s" % (GANETI_RAPI_VERSION, job_id), query, None) def GetNodes(self, bulk=False): """Gets all nodes in the cluster. @type bulk: bool @param bulk: whether to return all information about all instances @rtype: list of dict or str @return: if bulk is true, info about nodes in the cluster, else list of nodes in the cluster """ query = [] _AppendIf(query, bulk, ("bulk", 1)) nodes = self._SendRequest(HTTP_GET, "/%s/nodes" % GANETI_RAPI_VERSION, query, None) if bulk: return nodes else: return [n["id"] for n in nodes] def GetNode(self, node): """Gets information about a node. @type node: str @param node: node whose info to return @rtype: dict @return: info about the node """ return self._SendRequest(HTTP_GET, "/%s/nodes/%s" % (GANETI_RAPI_VERSION, node), None, None) def EvacuateNode(self, node, iallocator=None, remote_node=None, dry_run=False, early_release=None, mode=None, accept_old=False): """Evacuates instances from a Ganeti node. @type node: str @param node: node to evacuate @type iallocator: str or None @param iallocator: instance allocator to use @type remote_node: str @param remote_node: node to evaucate to @type dry_run: bool @param dry_run: whether to perform a dry run @type early_release: bool @param early_release: whether to enable parallelization @type mode: string @param mode: Node evacuation mode @type accept_old: bool @param accept_old: Whether caller is ready to accept old-style (pre-2.5) results @rtype: string, or a list for pre-2.5 results @return: Job ID or, if C{accept_old} is set and server is pre-2.5, list of (job ID, instance name, new secondary node); if dry_run was specified, then the actual move jobs were not submitted and the job IDs will be C{None} @raises GanetiApiError: if an iallocator and remote_node are both specified """ if iallocator and remote_node: raise GanetiApiError("Only one of iallocator or remote_node can be used") query = [] _AppendDryRunIf(query, dry_run) if _NODE_EVAC_RES1 in self.GetFeatures(): # Server supports body parameters body = {} _SetItemIf(body, iallocator is not None, "iallocator", iallocator) _SetItemIf(body, remote_node is not None, "remote_node", remote_node) _SetItemIf(body, early_release is not None, "early_release", early_release) _SetItemIf(body, mode is not None, "mode", mode) else: # Pre-2.5 request format body = None if not accept_old: raise GanetiApiError("Server is version 2.4 or earlier and caller does" " not accept old-style results (parameter" " accept_old)") # Pre-2.5 servers can only evacuate secondaries if mode is not None and mode != NODE_EVAC_SEC: raise GanetiApiError("Server can only evacuate secondary instances") _AppendIf(query, iallocator, ("iallocator", iallocator)) _AppendIf(query, remote_node, ("remote_node", remote_node)) _AppendIf(query, early_release, ("early_release", 1)) return self._SendRequest(HTTP_POST, ("/%s/nodes/%s/evacuate" % (GANETI_RAPI_VERSION, node)), query, body) def MigrateNode(self, node, mode=None, dry_run=False, iallocator=None, target_node=None): """Migrates all primary instances from a node. @type node: str @param node: node to migrate @type mode: string @param mode: if passed, it will overwrite the live migration type, otherwise the hypervisor default will be used @type dry_run: bool @param dry_run: whether to perform a dry run @type iallocator: string @param iallocator: instance allocator to use @type target_node: string @param target_node: Target node for shared-storage instances @rtype: string @return: job id """ query = [] _AppendDryRunIf(query, dry_run) if _NODE_MIGRATE_REQV1 in self.GetFeatures(): body = {} _SetItemIf(body, mode is not None, "mode", mode) _SetItemIf(body, iallocator is not None, "iallocator", iallocator) _SetItemIf(body, target_node is not None, "target_node", target_node) assert len(query) <= 1 return self._SendRequest(HTTP_POST, ("/%s/nodes/%s/migrate" % (GANETI_RAPI_VERSION, node)), query, body) else: # Use old request format if target_node is not None: raise GanetiApiError("Server does not support specifying target node" " for node migration") _AppendIf(query, mode is not None, ("mode", mode)) return self._SendRequest(HTTP_POST, ("/%s/nodes/%s/migrate" % (GANETI_RAPI_VERSION, node)), query, None) def GetNodeRole(self, node): """Gets the current role for a node. @type node: str @param node: node whose role to return @rtype: str @return: the current role for a node """ return self._SendRequest(HTTP_GET, ("/%s/nodes/%s/role" % (GANETI_RAPI_VERSION, node)), None, None) def SetNodeRole(self, node, role, force=False, auto_promote=None): """Sets the role for a node. @type node: str @param node: the node whose role to set @type role: str @param role: the role to set for the node @type force: bool @param force: whether to force the role change @type auto_promote: bool @param auto_promote: Whether node(s) should be promoted to master candidate if necessary @rtype: string @return: job id """ query = [] _AppendForceIf(query, force) _AppendIf(query, auto_promote is not None, ("auto-promote", auto_promote)) return self._SendRequest(HTTP_PUT, ("/%s/nodes/%s/role" % (GANETI_RAPI_VERSION, node)), query, role) def PowercycleNode(self, node, force=False): """Powercycles a node. @type node: string @param node: Node name @type force: bool @param force: Whether to force the operation @rtype: string @return: job id """ query = [] _AppendForceIf(query, force) return self._SendRequest(HTTP_POST, ("/%s/nodes/%s/powercycle" % (GANETI_RAPI_VERSION, node)), query, None) def ModifyNode(self, node, **kwargs): """Modifies a node. More details for parameters can be found in the RAPI documentation. @type node: string @param node: Node name @rtype: string @return: job id """ return self._SendRequest(HTTP_POST, ("/%s/nodes/%s/modify" % (GANETI_RAPI_VERSION, node)), None, kwargs) def GetNodeStorageUnits(self, node, storage_type, output_fields): """Gets the storage units for a node. @type node: str @param node: the node whose storage units to return @type storage_type: str @param storage_type: storage type whose units to return @type output_fields: str @param output_fields: storage type fields to return @rtype: string @return: job id where results can be retrieved """ query = [ ("storage_type", storage_type), ("output_fields", output_fields), ] return self._SendRequest(HTTP_GET, ("/%s/nodes/%s/storage" % (GANETI_RAPI_VERSION, node)), query, None) def ModifyNodeStorageUnits(self, node, storage_type, name, allocatable=None): """Modifies parameters of storage units on the node. @type node: str @param node: node whose storage units to modify @type storage_type: str @param storage_type: storage type whose units to modify @type name: str @param name: name of the storage unit @type allocatable: bool or None @param allocatable: Whether to set the "allocatable" flag on the storage unit (None=no modification, True=set, False=unset) @rtype: string @return: job id """ query = [ ("storage_type", storage_type), ("name", name), ] _AppendIf(query, allocatable is not None, ("allocatable", allocatable)) return self._SendRequest(HTTP_PUT, ("/%s/nodes/%s/storage/modify" % (GANETI_RAPI_VERSION, node)), query, None) def RepairNodeStorageUnits(self, node, storage_type, name): """Repairs a storage unit on the node. @type node: str @param node: node whose storage units to repair @type storage_type: str @param storage_type: storage type to repair @type name: str @param name: name of the storage unit to repair @rtype: string @return: job id """ query = [ ("storage_type", storage_type), ("name", name), ] return self._SendRequest(HTTP_PUT, ("/%s/nodes/%s/storage/repair" % (GANETI_RAPI_VERSION, node)), query, None) def GetNodeTags(self, node): """Gets the tags for a node. @type node: str @param node: node whose tags to return @rtype: list of str @return: tags for the node """ return self._SendRequest(HTTP_GET, ("/%s/nodes/%s/tags" % (GANETI_RAPI_VERSION, node)), None, None) def AddNodeTags(self, node, tags, dry_run=False): """Adds tags to a node. @type node: str @param node: node to add tags to @type tags: list of str @param tags: tags to add to the node @type dry_run: bool @param dry_run: whether to perform a dry run @rtype: string @return: job id """ query = [("tag", t) for t in tags] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_PUT, ("/%s/nodes/%s/tags" % (GANETI_RAPI_VERSION, node)), query, tags) def DeleteNodeTags(self, node, tags, dry_run=False): """Delete tags from a node. @type node: str @param node: node to remove tags from @type tags: list of str @param tags: tags to remove from the node @type dry_run: bool @param dry_run: whether to perform a dry run @rtype: string @return: job id """ query = [("tag", t) for t in tags] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_DELETE, ("/%s/nodes/%s/tags" % (GANETI_RAPI_VERSION, node)), query, None) def GetNetworks(self, bulk=False): """Gets all networks in the cluster. @type bulk: bool @param bulk: whether to return all information about the networks @rtype: list of dict or str @return: if bulk is true, a list of dictionaries with info about all networks in the cluster, else a list of names of those networks """ query = [] _AppendIf(query, bulk, ("bulk", 1)) networks = self._SendRequest(HTTP_GET, "/%s/networks" % GANETI_RAPI_VERSION, query, None) if bulk: return networks else: return [n["name"] for n in networks] def GetNetwork(self, network): """Gets information about a network. @type network: str @param network: name of the network whose info to return @rtype: dict @return: info about the network """ return self._SendRequest(HTTP_GET, "/%s/networks/%s" % (GANETI_RAPI_VERSION, network), None, None) def CreateNetwork(self, network_name, network, gateway=None, network6=None, gateway6=None, mac_prefix=None, add_reserved_ips=None, tags=None, dry_run=False): """Creates a new network. @type network_name: str @param network_name: the name of network to create @type dry_run: bool @param dry_run: whether to peform a dry run @rtype: string @return: job id """ query = [] _AppendDryRunIf(query, dry_run) if add_reserved_ips: add_reserved_ips = add_reserved_ips.split(",") if tags: tags = tags.split(",") body = { "network_name": network_name, "gateway": gateway, "network": network, "gateway6": gateway6, "network6": network6, "mac_prefix": mac_prefix, "add_reserved_ips": add_reserved_ips, "tags": tags, } return self._SendRequest(HTTP_POST, "/%s/networks" % GANETI_RAPI_VERSION, query, body) def ConnectNetwork(self, network_name, group_name, mode, link, dry_run=False): """Connects a Network to a NodeGroup with the given netparams """ body = { "group_name": group_name, "network_mode": mode, "network_link": link, } query = [] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_PUT, ("/%s/networks/%s/connect" % (GANETI_RAPI_VERSION, network_name)), query, body) def DisconnectNetwork(self, network_name, group_name, dry_run=False): """Connects a Network to a NodeGroup with the given netparams """ body = { "group_name": group_name, } query = [] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_PUT, ("/%s/networks/%s/disconnect" % (GANETI_RAPI_VERSION, network_name)), query, body) def ModifyNetwork(self, network, **kwargs): """Modifies a network. More details for parameters can be found in the RAPI documentation. @type network: string @param network: Network name @rtype: string @return: job id """ return self._SendRequest(HTTP_PUT, ("/%s/networks/%s/modify" % (GANETI_RAPI_VERSION, network)), None, kwargs) def DeleteNetwork(self, network, dry_run=False): """Deletes a network. @type network: str @param network: the network to delete @type dry_run: bool @param dry_run: whether to peform a dry run @rtype: string @return: job id """ query = [] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_DELETE, ("/%s/networks/%s" % (GANETI_RAPI_VERSION, network)), query, None) def GetNetworkTags(self, network): """Gets tags for a network. @type network: string @param network: Node group whose tags to return @rtype: list of strings @return: tags for the network """ return self._SendRequest(HTTP_GET, ("/%s/networks/%s/tags" % (GANETI_RAPI_VERSION, network)), None, None) def AddNetworkTags(self, network, tags, dry_run=False): """Adds tags to a network. @type network: str @param network: network to add tags to @type tags: list of string @param tags: tags to add to the network @type dry_run: bool @param dry_run: whether to perform a dry run @rtype: string @return: job id """ query = [("tag", t) for t in tags] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_PUT, ("/%s/networks/%s/tags" % (GANETI_RAPI_VERSION, network)), query, None) def DeleteNetworkTags(self, network, tags, dry_run=False): """Deletes tags from a network. @type network: str @param network: network to delete tags from @type tags: list of string @param tags: tags to delete @type dry_run: bool @param dry_run: whether to perform a dry run @rtype: string @return: job id """ query = [("tag", t) for t in tags] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_DELETE, ("/%s/networks/%s/tags" % (GANETI_RAPI_VERSION, network)), query, None) def GetGroups(self, bulk=False): """Gets all node groups in the cluster. @type bulk: bool @param bulk: whether to return all information about the groups @rtype: list of dict or str @return: if bulk is true, a list of dictionaries with info about all node groups in the cluster, else a list of names of those node groups """ query = [] _AppendIf(query, bulk, ("bulk", 1)) groups = self._SendRequest(HTTP_GET, "/%s/groups" % GANETI_RAPI_VERSION, query, None) if bulk: return groups else: return [g["name"] for g in groups] def GetGroup(self, group): """Gets information about a node group. @type group: str @param group: name of the node group whose info to return @rtype: dict @return: info about the node group """ return self._SendRequest(HTTP_GET, "/%s/groups/%s" % (GANETI_RAPI_VERSION, group), None, None) def CreateGroup(self, name, alloc_policy=None, dry_run=False): """Creates a new node group. @type name: str @param name: the name of node group to create @type alloc_policy: str @param alloc_policy: the desired allocation policy for the group, if any @type dry_run: bool @param dry_run: whether to peform a dry run @rtype: string @return: job id """ query = [] _AppendDryRunIf(query, dry_run) body = { "name": name, "alloc_policy": alloc_policy, } return self._SendRequest(HTTP_POST, "/%s/groups" % GANETI_RAPI_VERSION, query, body) def ModifyGroup(self, group, **kwargs): """Modifies a node group. More details for parameters can be found in the RAPI documentation. @type group: string @param group: Node group name @rtype: string @return: job id """ return self._SendRequest(HTTP_PUT, ("/%s/groups/%s/modify" % (GANETI_RAPI_VERSION, group)), None, kwargs) def DeleteGroup(self, group, dry_run=False): """Deletes a node group. @type group: str @param group: the node group to delete @type dry_run: bool @param dry_run: whether to peform a dry run @rtype: string @return: job id """ query = [] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_DELETE, ("/%s/groups/%s" % (GANETI_RAPI_VERSION, group)), query, None) def RenameGroup(self, group, new_name): """Changes the name of a node group. @type group: string @param group: Node group name @type new_name: string @param new_name: New node group name @rtype: string @return: job id """ body = { "new_name": new_name, } return self._SendRequest(HTTP_PUT, ("/%s/groups/%s/rename" % (GANETI_RAPI_VERSION, group)), None, body) def AssignGroupNodes(self, group, nodes, force=False, dry_run=False): """Assigns nodes to a group. @type group: string @param group: Node group name @type nodes: list of strings @param nodes: List of nodes to assign to the group @rtype: string @return: job id """ query = [] _AppendForceIf(query, force) _AppendDryRunIf(query, dry_run) body = { "nodes": nodes, } return self._SendRequest(HTTP_PUT, ("/%s/groups/%s/assign-nodes" % (GANETI_RAPI_VERSION, group)), query, body) def GetGroupTags(self, group): """Gets tags for a node group. @type group: string @param group: Node group whose tags to return @rtype: list of strings @return: tags for the group """ return self._SendRequest(HTTP_GET, ("/%s/groups/%s/tags" % (GANETI_RAPI_VERSION, group)), None, None) def AddGroupTags(self, group, tags, dry_run=False): """Adds tags to a node group. @type group: str @param group: group to add tags to @type tags: list of string @param tags: tags to add to the group @type dry_run: bool @param dry_run: whether to perform a dry run @rtype: string @return: job id """ query = [("tag", t) for t in tags] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_PUT, ("/%s/groups/%s/tags" % (GANETI_RAPI_VERSION, group)), query, None) def DeleteGroupTags(self, group, tags, dry_run=False): """Deletes tags from a node group. @type group: str @param group: group to delete tags from @type tags: list of string @param tags: tags to delete @type dry_run: bool @param dry_run: whether to perform a dry run @rtype: string @return: job id """ query = [("tag", t) for t in tags] _AppendDryRunIf(query, dry_run) return self._SendRequest(HTTP_DELETE, ("/%s/groups/%s/tags" % (GANETI_RAPI_VERSION, group)), query, None) def Query(self, what, fields, qfilter=None): """Retrieves information about resources. @type what: string @param what: Resource name, one of L{constants.QR_VIA_RAPI} @type fields: list of string @param fields: Requested fields @type qfilter: None or list @param qfilter: Query filter @rtype: string @return: job id """ body = { "fields": fields, } _SetItemIf(body, qfilter is not None, "qfilter", qfilter) # TODO: remove "filter" after 2.7 _SetItemIf(body, qfilter is not None, "filter", qfilter) return self._SendRequest(HTTP_PUT, ("/%s/query/%s" % (GANETI_RAPI_VERSION, what)), None, body) def QueryFields(self, what, fields=None): """Retrieves available fields for a resource. @type what: string @param what: Resource name, one of L{constants.QR_VIA_RAPI} @type fields: list of string @param fields: Requested fields @rtype: string @return: job id """ query = [] if fields is not None: _AppendIf(query, True, ("fields", ",".join(fields))) return self._SendRequest(HTTP_GET, ("/%s/query/%s/fields" % (GANETI_RAPI_VERSION, what)), query, None) ganeti-2.9.3/lib/rapi/testutils.py0000644000000000000000000002363312271422343017114 0ustar00rootroot00000000000000# # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Remote API test utilities. """ import logging import re import base64 import pycurl from cStringIO import StringIO from ganeti import errors from ganeti import opcodes from ganeti import http from ganeti import server from ganeti import utils from ganeti import compat from ganeti import luxi from ganeti import rapi import ganeti.http.server # pylint: disable=W0611 import ganeti.server.rapi import ganeti.rapi.client _URI_RE = re.compile(r"https://(?P.*):(?P\d+)(?P/.*)") class VerificationError(Exception): """Dedicated error class for test utilities. This class is used to hide all of Ganeti's internal exception, so that external users of these utilities don't have to integrate Ganeti's exception hierarchy. """ def _GetOpById(op_id): """Tries to get an opcode class based on its C{OP_ID}. """ try: return opcodes.OP_MAPPING[op_id] except KeyError: raise VerificationError("Unknown opcode ID '%s'" % op_id) def _HideInternalErrors(fn): """Hides Ganeti-internal exceptions, see L{VerificationError}. """ def wrapper(*args, **kwargs): try: return fn(*args, **kwargs) except (errors.GenericError, rapi.client.GanetiApiError), err: raise VerificationError("Unhandled Ganeti error: %s" % err) return wrapper @_HideInternalErrors def VerifyOpInput(op_id, data): """Verifies opcode parameters according to their definition. @type op_id: string @param op_id: Opcode ID (C{OP_ID} attribute), e.g. C{OP_CLUSTER_VERIFY} @type data: dict @param data: Opcode parameter values @raise VerificationError: Parameter verification failed """ op_cls = _GetOpById(op_id) try: op = op_cls(**data) # pylint: disable=W0142 except TypeError, err: raise VerificationError("Unable to create opcode instance: %s" % err) try: op.Validate(False) except errors.OpPrereqError, err: raise VerificationError("Parameter validation for opcode '%s' failed: %s" % (op_id, err)) @_HideInternalErrors def VerifyOpResult(op_id, result): """Verifies opcode results used in tests (e.g. in a mock). @type op_id: string @param op_id: Opcode ID (C{OP_ID} attribute), e.g. C{OP_CLUSTER_VERIFY} @param result: Mocked opcode result @raise VerificationError: Return value verification failed """ resultcheck_fn = _GetOpById(op_id).OP_RESULT if not resultcheck_fn: logging.warning("Opcode '%s' has no result type definition", op_id) elif not resultcheck_fn(result): raise VerificationError("Given result does not match result description" " for opcode '%s': %s" % (op_id, resultcheck_fn)) def _GetPathFromUri(uri): """Gets the path and query from a URI. """ match = _URI_RE.match(uri) if match: return match.groupdict()["path"] else: return None def _FormatHeaders(headers): """Formats HTTP headers. @type headers: sequence of strings @rtype: string """ assert compat.all(": " in header for header in headers) return "\n".join(headers) class FakeCurl: """Fake cURL object. """ def __init__(self, handler): """Initialize this class @param handler: Request handler instance """ self._handler = handler self._opts = {} self._info = {} def setopt(self, opt, value): self._opts[opt] = value def getopt(self, opt): return self._opts.get(opt) def unsetopt(self, opt): self._opts.pop(opt, None) def getinfo(self, info): return self._info[info] def perform(self): method = self._opts[pycurl.CUSTOMREQUEST] url = self._opts[pycurl.URL] request_body = self._opts[pycurl.POSTFIELDS] writefn = self._opts[pycurl.WRITEFUNCTION] if pycurl.HTTPHEADER in self._opts: baseheaders = _FormatHeaders(self._opts[pycurl.HTTPHEADER]) else: baseheaders = "" headers = http.ParseHeaders(StringIO(baseheaders)) if request_body: headers[http.HTTP_CONTENT_LENGTH] = str(len(request_body)) if self._opts.get(pycurl.HTTPAUTH, 0) & pycurl.HTTPAUTH_BASIC: try: userpwd = self._opts[pycurl.USERPWD] except KeyError: raise errors.ProgrammerError("Basic authentication requires username" " and password") headers[http.HTTP_AUTHORIZATION] = \ "%s %s" % (http.auth.HTTP_BASIC_AUTH, base64.b64encode(userpwd)) path = _GetPathFromUri(url) (code, _, resp_body) = \ self._handler.FetchResponse(path, method, headers, request_body) self._info[pycurl.RESPONSE_CODE] = code if resp_body is not None: writefn(resp_body) class _RapiMock: """Mocking out the RAPI server parts. """ def __init__(self, user_fn, luxi_client, reqauth=False): """Initialize this class. @type user_fn: callable @param user_fn: Function to authentication username @param luxi_client: A LUXI client implementation """ self.handler = \ server.rapi.RemoteApiHandler(user_fn, reqauth, _client_cls=luxi_client) def FetchResponse(self, path, method, headers, request_body): """This is a callback method used to fetch a response. This method is called by the FakeCurl.perform method @type path: string @param path: Requested path @type method: string @param method: HTTP method @type request_body: string @param request_body: Request body @type headers: mimetools.Message @param headers: Request headers @return: Tuple containing status code, response headers and response body """ req_msg = http.HttpMessage() req_msg.start_line = \ http.HttpClientToServerStartLine(method, path, http.HTTP_1_0) req_msg.headers = headers req_msg.body = request_body (_, _, _, resp_msg) = \ http.server.HttpResponder(self.handler)(lambda: (req_msg, None)) return (resp_msg.start_line.code, resp_msg.headers, resp_msg.body) class _TestLuxiTransport: """Mocked LUXI transport. Raises L{errors.RapiTestResult} for all method calls, no matter the arguments. """ def __init__(self, record_fn, address, timeouts=None): # pylint: disable=W0613 """Initializes this class. """ self._record_fn = record_fn def Close(self): pass def Call(self, data): """Calls LUXI method. In this test class the method is not actually called, but added to a list of called methods and then an exception (L{errors.RapiTestResult}) is raised. There is no return value. """ (method, _, _) = luxi.ParseRequest(data) # Take a note of called method self._record_fn(method) # Everything went fine until here, so let's abort the test raise errors.RapiTestResult class _LuxiCallRecorder: """Records all called LUXI client methods. """ def __init__(self): """Initializes this class. """ self._called = set() def Record(self, name): """Records a called function name. """ self._called.add(name) def CalledNames(self): """Returns a list of called LUXI methods. """ return self._called def __call__(self, address=None): """Creates an instrumented LUXI client. The LUXI client will record all method calls (use L{CalledNames} to retrieve them). """ return luxi.Client(transport=compat.partial(_TestLuxiTransport, self.Record), address=address) def _TestWrapper(fn, *args, **kwargs): """Wrapper for ignoring L{errors.RapiTestResult}. """ try: return fn(*args, **kwargs) except errors.RapiTestResult: # Everything was fine up to the point of sending a LUXI request return NotImplemented class InputTestClient: """Test version of RAPI client. Instances of this class can be used to test input arguments for RAPI client calls. See L{rapi.client.GanetiRapiClient} for available methods and their arguments. Functions can return C{NotImplemented} if all arguments are acceptable, but a LUXI request would be necessary to provide an actual return value. In case of an error, L{VerificationError} is raised. @see: An example on how to use this class can be found in C{doc/examples/rapi_testutils.py} """ def __init__(self): """Initializes this class. """ username = utils.GenerateSecret() password = utils.GenerateSecret() def user_fn(wanted): """Called to verify user credentials given in HTTP request. """ assert username == wanted return http.auth.PasswordFileUser(username, password, [rapi.RAPI_ACCESS_WRITE]) self._lcr = _LuxiCallRecorder() # Create a mock RAPI server handler = _RapiMock(user_fn, self._lcr) self._client = \ rapi.client.GanetiRapiClient("master.example.com", username=username, password=password, curl_factory=lambda: FakeCurl(handler)) def _GetLuxiCalls(self): """Returns the names of all called LUXI client functions. """ return self._lcr.CalledNames() def __getattr__(self, name): """Finds method by name. The method is wrapped using L{_TestWrapper} to produce the actual test result. """ return _HideInternalErrors(compat.partial(_TestWrapper, getattr(self._client, name))) ganeti-2.9.3/lib/rapi/rlib2.py0000644000000000000000000011764612271422343016076 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Remote API resource implementations. PUT or POST? ============ According to RFC2616 the main difference between PUT and POST is that POST can create new resources but PUT can only create the resource the URI was pointing to on the PUT request. In the context of this module POST on ``/2/instances`` to change an existing entity is legitimate, while PUT would not be. PUT creates a new entity (e.g. a new instance) with a name specified in the request. Quoting from RFC2616, section 9.6:: The fundamental difference between the POST and PUT requests is reflected in the different meaning of the Request-URI. The URI in a POST request identifies the resource that will handle the enclosed entity. That resource might be a data-accepting process, a gateway to some other protocol, or a separate entity that accepts annotations. In contrast, the URI in a PUT request identifies the entity enclosed with the request -- the user agent knows what URI is intended and the server MUST NOT attempt to apply the request to some other resource. If the server desires that the request be applied to a different URI, it MUST send a 301 (Moved Permanently) response; the user agent MAY then make its own decision regarding whether or not to redirect the request. So when adding new methods, if they are operating on the URI entity itself, PUT should be prefered over POST. """ # pylint: disable=C0103 # C0103: Invalid name, since the R_* names are not conforming from ganeti import opcodes from ganeti import objects from ganeti import http from ganeti import constants from ganeti import cli from ganeti import rapi from ganeti import ht from ganeti import compat from ganeti import ssconf from ganeti.rapi import baserlib _COMMON_FIELDS = ["ctime", "mtime", "uuid", "serial_no", "tags"] I_FIELDS = ["name", "admin_state", "os", "pnode", "snodes", "disk_template", "nic.ips", "nic.macs", "nic.modes", "nic.uuids", "nic.names", "nic.links", "nic.networks", "nic.networks.names", "nic.bridges", "network_port", "disk.sizes", "disk.spindles", "disk_usage", "disk.uuids", "disk.names", "beparams", "hvparams", "oper_state", "oper_ram", "oper_vcpus", "status", "custom_hvparams", "custom_beparams", "custom_nicparams", ] + _COMMON_FIELDS N_FIELDS = ["name", "offline", "master_candidate", "drained", "dtotal", "dfree", "sptotal", "spfree", "mtotal", "mnode", "mfree", "pinst_cnt", "sinst_cnt", "ctotal", "cnos", "cnodes", "csockets", "pip", "sip", "role", "pinst_list", "sinst_list", "master_capable", "vm_capable", "ndparams", "group.uuid", ] + _COMMON_FIELDS NET_FIELDS = ["name", "network", "gateway", "network6", "gateway6", "mac_prefix", "free_count", "reserved_count", "map", "group_list", "inst_list", "external_reservations", ] + _COMMON_FIELDS G_FIELDS = [ "alloc_policy", "name", "node_cnt", "node_list", "ipolicy", "custom_ipolicy", "diskparams", "custom_diskparams", "ndparams", "custom_ndparams", ] + _COMMON_FIELDS J_FIELDS_BULK = [ "id", "ops", "status", "summary", "opstatus", "received_ts", "start_ts", "end_ts", ] J_FIELDS = J_FIELDS_BULK + [ "oplog", "opresult", ] _NR_DRAINED = "drained" _NR_MASTER_CANDIDATE = "master-candidate" _NR_MASTER = "master" _NR_OFFLINE = "offline" _NR_REGULAR = "regular" _NR_MAP = { constants.NR_MASTER: _NR_MASTER, constants.NR_MCANDIDATE: _NR_MASTER_CANDIDATE, constants.NR_DRAINED: _NR_DRAINED, constants.NR_OFFLINE: _NR_OFFLINE, constants.NR_REGULAR: _NR_REGULAR, } assert frozenset(_NR_MAP.keys()) == constants.NR_ALL # Request data version field _REQ_DATA_VERSION = "__version__" # Feature string for instance creation request data version 1 _INST_CREATE_REQV1 = "instance-create-reqv1" # Feature string for instance reinstall request version 1 _INST_REINSTALL_REQV1 = "instance-reinstall-reqv1" # Feature string for node migration version 1 _NODE_MIGRATE_REQV1 = "node-migrate-reqv1" # Feature string for node evacuation with LU-generated jobs _NODE_EVAC_RES1 = "node-evac-res1" ALL_FEATURES = compat.UniqueFrozenset([ _INST_CREATE_REQV1, _INST_REINSTALL_REQV1, _NODE_MIGRATE_REQV1, _NODE_EVAC_RES1, ]) # Timeout for /2/jobs/[job_id]/wait. Gives job up to 10 seconds to change. _WFJC_TIMEOUT = 10 # FIXME: For compatibility we update the beparams/memory field. Needs to be # removed in Ganeti 2.8 def _UpdateBeparams(inst): """Updates the beparams dict of inst to support the memory field. @param inst: Inst dict @return: Updated inst dict """ beparams = inst["beparams"] beparams[constants.BE_MEMORY] = beparams[constants.BE_MAXMEM] return inst class R_root(baserlib.ResourceBase): """/ resource. """ @staticmethod def GET(): """Supported for legacy reasons. """ return None class R_2(R_root): """/2 resource. """ class R_version(baserlib.ResourceBase): """/version resource. This resource should be used to determine the remote API version and to adapt clients accordingly. """ @staticmethod def GET(): """Returns the remote API version. """ return constants.RAPI_VERSION class R_2_info(baserlib.OpcodeResource): """/2/info resource. """ GET_OPCODE = opcodes.OpClusterQuery def GET(self): """Returns cluster information. """ client = self.GetClient(query=True) return client.QueryClusterInfo() class R_2_features(baserlib.ResourceBase): """/2/features resource. """ @staticmethod def GET(): """Returns list of optional RAPI features implemented. """ return list(ALL_FEATURES) class R_2_os(baserlib.OpcodeResource): """/2/os resource. """ GET_OPCODE = opcodes.OpOsDiagnose def GET(self): """Return a list of all OSes. Can return error 500 in case of a problem. Example: ["debian-etch"] """ cl = self.GetClient() op = opcodes.OpOsDiagnose(output_fields=["name", "variants"], names=[]) job_id = self.SubmitJob([op], cl=cl) # we use custom feedback function, instead of print we log the status result = cli.PollJob(job_id, cl, feedback_fn=baserlib.FeedbackFn) diagnose_data = result[0] if not isinstance(diagnose_data, list): raise http.HttpBadGateway(message="Can't get OS list") os_names = [] for (name, variants) in diagnose_data: os_names.extend(cli.CalculateOSNames(name, variants)) return os_names class R_2_redist_config(baserlib.OpcodeResource): """/2/redistribute-config resource. """ PUT_OPCODE = opcodes.OpClusterRedistConf class R_2_cluster_modify(baserlib.OpcodeResource): """/2/modify resource. """ PUT_OPCODE = opcodes.OpClusterSetParams class R_2_jobs(baserlib.ResourceBase): """/2/jobs resource. """ def GET(self): """Returns a dictionary of jobs. @return: a dictionary with jobs id and uri. """ client = self.GetClient(query=True) if self.useBulk(): bulkdata = client.QueryJobs(None, J_FIELDS_BULK) return baserlib.MapBulkFields(bulkdata, J_FIELDS_BULK) else: jobdata = map(compat.fst, client.QueryJobs(None, ["id"])) return baserlib.BuildUriList(jobdata, "/2/jobs/%s", uri_fields=("id", "uri")) class R_2_jobs_id(baserlib.ResourceBase): """/2/jobs/[job_id] resource. """ def GET(self): """Returns a job status. @return: a dictionary with job parameters. The result includes: - id: job ID as a number - status: current job status as a string - ops: involved OpCodes as a list of dictionaries for each opcodes in the job - opstatus: OpCodes status as a list - opresult: OpCodes results as a list of lists """ job_id = self.items[0] result = self.GetClient(query=True).QueryJobs([job_id, ], J_FIELDS)[0] if result is None: raise http.HttpNotFound() return baserlib.MapFields(J_FIELDS, result) def DELETE(self): """Cancel not-yet-started job. """ job_id = self.items[0] result = self.GetClient().CancelJob(job_id) return result class R_2_jobs_id_wait(baserlib.ResourceBase): """/2/jobs/[job_id]/wait resource. """ # WaitForJobChange provides access to sensitive information and blocks # machine resources (it's a blocking RAPI call), hence restricting access. GET_ACCESS = [rapi.RAPI_ACCESS_WRITE] def GET(self): """Waits for job changes. """ job_id = self.items[0] fields = self.getBodyParameter("fields") prev_job_info = self.getBodyParameter("previous_job_info", None) prev_log_serial = self.getBodyParameter("previous_log_serial", None) if not isinstance(fields, list): raise http.HttpBadRequest("The 'fields' parameter should be a list") if not (prev_job_info is None or isinstance(prev_job_info, list)): raise http.HttpBadRequest("The 'previous_job_info' parameter should" " be a list") if not (prev_log_serial is None or isinstance(prev_log_serial, (int, long))): raise http.HttpBadRequest("The 'previous_log_serial' parameter should" " be a number") client = self.GetClient() result = client.WaitForJobChangeOnce(job_id, fields, prev_job_info, prev_log_serial, timeout=_WFJC_TIMEOUT) if not result: raise http.HttpNotFound() if result == constants.JOB_NOTCHANGED: # No changes return None (job_info, log_entries) = result return { "job_info": job_info, "log_entries": log_entries, } class R_2_nodes(baserlib.OpcodeResource): """/2/nodes resource. """ GET_OPCODE = opcodes.OpNodeQuery def GET(self): """Returns a list of all nodes. """ client = self.GetClient(query=True) if self.useBulk(): bulkdata = client.QueryNodes([], N_FIELDS, False) return baserlib.MapBulkFields(bulkdata, N_FIELDS) else: nodesdata = client.QueryNodes([], ["name"], False) nodeslist = [row[0] for row in nodesdata] return baserlib.BuildUriList(nodeslist, "/2/nodes/%s", uri_fields=("id", "uri")) class R_2_nodes_name(baserlib.OpcodeResource): """/2/nodes/[node_name] resource. """ GET_OPCODE = opcodes.OpNodeQuery def GET(self): """Send information about a node. """ node_name = self.items[0] client = self.GetClient(query=True) result = baserlib.HandleItemQueryErrors(client.QueryNodes, names=[node_name], fields=N_FIELDS, use_locking=self.useLocking()) return baserlib.MapFields(N_FIELDS, result[0]) class R_2_nodes_name_powercycle(baserlib.OpcodeResource): """/2/nodes/[node_name]/powercycle resource. """ POST_OPCODE = opcodes.OpNodePowercycle def GetPostOpInput(self): """Tries to powercycle a node. """ return (self.request_body, { "node_name": self.items[0], "force": self.useForce(), }) class R_2_nodes_name_role(baserlib.OpcodeResource): """/2/nodes/[node_name]/role resource. """ PUT_OPCODE = opcodes.OpNodeSetParams def GET(self): """Returns the current node role. @return: Node role """ node_name = self.items[0] client = self.GetClient(query=True) result = client.QueryNodes(names=[node_name], fields=["role"], use_locking=self.useLocking()) return _NR_MAP[result[0][0]] def GetPutOpInput(self): """Sets the node role. """ baserlib.CheckType(self.request_body, basestring, "Body contents") role = self.request_body if role == _NR_REGULAR: candidate = False offline = False drained = False elif role == _NR_MASTER_CANDIDATE: candidate = True offline = drained = None elif role == _NR_DRAINED: drained = True candidate = offline = None elif role == _NR_OFFLINE: offline = True candidate = drained = None else: raise http.HttpBadRequest("Can't set '%s' role" % role) assert len(self.items) == 1 return ({}, { "node_name": self.items[0], "master_candidate": candidate, "offline": offline, "drained": drained, "force": self.useForce(), "auto_promote": bool(self._checkIntVariable("auto-promote", default=0)), }) class R_2_nodes_name_evacuate(baserlib.OpcodeResource): """/2/nodes/[node_name]/evacuate resource. """ POST_OPCODE = opcodes.OpNodeEvacuate def GetPostOpInput(self): """Evacuate all instances off a node. """ return (self.request_body, { "node_name": self.items[0], "dry_run": self.dryRun(), }) class R_2_nodes_name_migrate(baserlib.OpcodeResource): """/2/nodes/[node_name]/migrate resource. """ POST_OPCODE = opcodes.OpNodeMigrate def GetPostOpInput(self): """Migrate all primary instances from a node. """ if self.queryargs: # Support old-style requests if "live" in self.queryargs and "mode" in self.queryargs: raise http.HttpBadRequest("Only one of 'live' and 'mode' should" " be passed") if "live" in self.queryargs: if self._checkIntVariable("live", default=1): mode = constants.HT_MIGRATION_LIVE else: mode = constants.HT_MIGRATION_NONLIVE else: mode = self._checkStringVariable("mode", default=None) data = { "mode": mode, } else: data = self.request_body return (data, { "node_name": self.items[0], }) class R_2_nodes_name_modify(baserlib.OpcodeResource): """/2/nodes/[node_name]/modify resource. """ POST_OPCODE = opcodes.OpNodeSetParams def GetPostOpInput(self): """Changes parameters of a node. """ assert len(self.items) == 1 return (self.request_body, { "node_name": self.items[0], }) class R_2_nodes_name_storage(baserlib.OpcodeResource): """/2/nodes/[node_name]/storage resource. """ # LUNodeQueryStorage acquires locks, hence restricting access to GET GET_ACCESS = [rapi.RAPI_ACCESS_WRITE] GET_OPCODE = opcodes.OpNodeQueryStorage def GetGetOpInput(self): """List storage available on a node. """ storage_type = self._checkStringVariable("storage_type", None) output_fields = self._checkStringVariable("output_fields", None) if not output_fields: raise http.HttpBadRequest("Missing the required 'output_fields'" " parameter") return ({}, { "nodes": [self.items[0]], "storage_type": storage_type, "output_fields": output_fields.split(","), }) class R_2_nodes_name_storage_modify(baserlib.OpcodeResource): """/2/nodes/[node_name]/storage/modify resource. """ PUT_OPCODE = opcodes.OpNodeModifyStorage def GetPutOpInput(self): """Modifies a storage volume on a node. """ storage_type = self._checkStringVariable("storage_type", None) name = self._checkStringVariable("name", None) if not name: raise http.HttpBadRequest("Missing the required 'name'" " parameter") changes = {} if "allocatable" in self.queryargs: changes[constants.SF_ALLOCATABLE] = \ bool(self._checkIntVariable("allocatable", default=1)) return ({}, { "node_name": self.items[0], "storage_type": storage_type, "name": name, "changes": changes, }) class R_2_nodes_name_storage_repair(baserlib.OpcodeResource): """/2/nodes/[node_name]/storage/repair resource. """ PUT_OPCODE = opcodes.OpRepairNodeStorage def GetPutOpInput(self): """Repairs a storage volume on a node. """ storage_type = self._checkStringVariable("storage_type", None) name = self._checkStringVariable("name", None) if not name: raise http.HttpBadRequest("Missing the required 'name'" " parameter") return ({}, { "node_name": self.items[0], "storage_type": storage_type, "name": name, }) class R_2_networks(baserlib.OpcodeResource): """/2/networks resource. """ GET_OPCODE = opcodes.OpNetworkQuery POST_OPCODE = opcodes.OpNetworkAdd POST_RENAME = { "name": "network_name", } def GetPostOpInput(self): """Create a network. """ assert not self.items return (self.request_body, { "dry_run": self.dryRun(), }) def GET(self): """Returns a list of all networks. """ client = self.GetClient(query=True) if self.useBulk(): bulkdata = client.QueryNetworks([], NET_FIELDS, False) return baserlib.MapBulkFields(bulkdata, NET_FIELDS) else: data = client.QueryNetworks([], ["name"], False) networknames = [row[0] for row in data] return baserlib.BuildUriList(networknames, "/2/networks/%s", uri_fields=("name", "uri")) class R_2_networks_name(baserlib.OpcodeResource): """/2/networks/[network_name] resource. """ DELETE_OPCODE = opcodes.OpNetworkRemove def GET(self): """Send information about a network. """ network_name = self.items[0] client = self.GetClient(query=True) result = baserlib.HandleItemQueryErrors(client.QueryNetworks, names=[network_name], fields=NET_FIELDS, use_locking=self.useLocking()) return baserlib.MapFields(NET_FIELDS, result[0]) def GetDeleteOpInput(self): """Delete a network. """ assert len(self.items) == 1 return (self.request_body, { "network_name": self.items[0], "dry_run": self.dryRun(), }) class R_2_networks_name_connect(baserlib.OpcodeResource): """/2/networks/[network_name]/connect resource. """ PUT_OPCODE = opcodes.OpNetworkConnect def GetPutOpInput(self): """Changes some parameters of node group. """ assert self.items return (self.request_body, { "network_name": self.items[0], "dry_run": self.dryRun(), }) class R_2_networks_name_disconnect(baserlib.OpcodeResource): """/2/networks/[network_name]/disconnect resource. """ PUT_OPCODE = opcodes.OpNetworkDisconnect def GetPutOpInput(self): """Changes some parameters of node group. """ assert self.items return (self.request_body, { "network_name": self.items[0], "dry_run": self.dryRun(), }) class R_2_networks_name_modify(baserlib.OpcodeResource): """/2/networks/[network_name]/modify resource. """ PUT_OPCODE = opcodes.OpNetworkSetParams def GetPutOpInput(self): """Changes some parameters of network. """ assert self.items return (self.request_body, { "network_name": self.items[0], }) class R_2_groups(baserlib.OpcodeResource): """/2/groups resource. """ GET_OPCODE = opcodes.OpGroupQuery POST_OPCODE = opcodes.OpGroupAdd POST_RENAME = { "name": "group_name", } def GetPostOpInput(self): """Create a node group. """ assert not self.items return (self.request_body, { "dry_run": self.dryRun(), }) def GET(self): """Returns a list of all node groups. """ client = self.GetClient(query=True) if self.useBulk(): bulkdata = client.QueryGroups([], G_FIELDS, False) return baserlib.MapBulkFields(bulkdata, G_FIELDS) else: data = client.QueryGroups([], ["name"], False) groupnames = [row[0] for row in data] return baserlib.BuildUriList(groupnames, "/2/groups/%s", uri_fields=("name", "uri")) class R_2_groups_name(baserlib.OpcodeResource): """/2/groups/[group_name] resource. """ DELETE_OPCODE = opcodes.OpGroupRemove def GET(self): """Send information about a node group. """ group_name = self.items[0] client = self.GetClient(query=True) result = baserlib.HandleItemQueryErrors(client.QueryGroups, names=[group_name], fields=G_FIELDS, use_locking=self.useLocking()) return baserlib.MapFields(G_FIELDS, result[0]) def GetDeleteOpInput(self): """Delete a node group. """ assert len(self.items) == 1 return ({}, { "group_name": self.items[0], "dry_run": self.dryRun(), }) class R_2_groups_name_modify(baserlib.OpcodeResource): """/2/groups/[group_name]/modify resource. """ PUT_OPCODE = opcodes.OpGroupSetParams def GetPutOpInput(self): """Changes some parameters of node group. """ assert self.items return (self.request_body, { "group_name": self.items[0], }) class R_2_groups_name_rename(baserlib.OpcodeResource): """/2/groups/[group_name]/rename resource. """ PUT_OPCODE = opcodes.OpGroupRename def GetPutOpInput(self): """Changes the name of a node group. """ assert len(self.items) == 1 return (self.request_body, { "group_name": self.items[0], "dry_run": self.dryRun(), }) class R_2_groups_name_assign_nodes(baserlib.OpcodeResource): """/2/groups/[group_name]/assign-nodes resource. """ PUT_OPCODE = opcodes.OpGroupAssignNodes def GetPutOpInput(self): """Assigns nodes to a group. """ assert len(self.items) == 1 return (self.request_body, { "group_name": self.items[0], "dry_run": self.dryRun(), "force": self.useForce(), }) def _ConvertUsbDevices(data): """Convert in place the usb_devices string to the proper format. In Ganeti 2.8.4 the separator for the usb_devices hvparam was changed from comma to space because commas cannot be accepted on the command line (they already act as the separator between different hvparams). RAPI should be able to accept commas for backwards compatibility, but we want it to also accept the new space separator. Therefore, we convert spaces into commas here and keep the old parsing logic elsewhere. """ try: hvparams = data["hvparams"] usb_devices = hvparams[constants.HV_USB_DEVICES] hvparams[constants.HV_USB_DEVICES] = usb_devices.replace(" ", ",") data["hvparams"] = hvparams except KeyError: #No usb_devices, no modification required pass class R_2_instances(baserlib.OpcodeResource): """/2/instances resource. """ GET_OPCODE = opcodes.OpInstanceQuery POST_OPCODE = opcodes.OpInstanceCreate POST_RENAME = { "os": "os_type", "name": "instance_name", } def GET(self): """Returns a list of all available instances. """ client = self.GetClient() use_locking = self.useLocking() if self.useBulk(): bulkdata = client.QueryInstances([], I_FIELDS, use_locking) return map(_UpdateBeparams, baserlib.MapBulkFields(bulkdata, I_FIELDS)) else: instancesdata = client.QueryInstances([], ["name"], use_locking) instanceslist = [row[0] for row in instancesdata] return baserlib.BuildUriList(instanceslist, "/2/instances/%s", uri_fields=("id", "uri")) def GetPostOpInput(self): """Create an instance. @return: a job id """ baserlib.CheckType(self.request_body, dict, "Body contents") # Default to request data version 0 data_version = self.getBodyParameter(_REQ_DATA_VERSION, 0) if data_version == 0: raise http.HttpBadRequest("Instance creation request version 0 is no" " longer supported") elif data_version != 1: raise http.HttpBadRequest("Unsupported request data version %s" % data_version) data = self.request_body.copy() # Remove "__version__" data.pop(_REQ_DATA_VERSION, None) _ConvertUsbDevices(data) return (data, { "dry_run": self.dryRun(), }) class R_2_instances_multi_alloc(baserlib.OpcodeResource): """/2/instances-multi-alloc resource. """ POST_OPCODE = opcodes.OpInstanceMultiAlloc def GetPostOpInput(self): """Try to allocate multiple instances. @return: A dict with submitted jobs, allocatable instances and failed allocations """ if "instances" not in self.request_body: raise http.HttpBadRequest("Request is missing required 'instances' field" " in body") op_id = { "OP_ID": self.POST_OPCODE.OP_ID, # pylint: disable=E1101 } body = objects.FillDict(self.request_body, { "instances": [objects.FillDict(inst, op_id) for inst in self.request_body["instances"]], }) return (body, { "dry_run": self.dryRun(), }) class R_2_instances_name(baserlib.OpcodeResource): """/2/instances/[instance_name] resource. """ GET_OPCODE = opcodes.OpInstanceQuery DELETE_OPCODE = opcodes.OpInstanceRemove def GET(self): """Send information about an instance. """ client = self.GetClient() instance_name = self.items[0] result = baserlib.HandleItemQueryErrors(client.QueryInstances, names=[instance_name], fields=I_FIELDS, use_locking=self.useLocking()) return _UpdateBeparams(baserlib.MapFields(I_FIELDS, result[0])) def GetDeleteOpInput(self): """Delete an instance. """ assert len(self.items) == 1 return ({}, { "instance_name": self.items[0], "ignore_failures": False, "dry_run": self.dryRun(), }) class R_2_instances_name_info(baserlib.OpcodeResource): """/2/instances/[instance_name]/info resource. """ GET_OPCODE = opcodes.OpInstanceQueryData def GetGetOpInput(self): """Request detailed instance information. """ assert len(self.items) == 1 return ({}, { "instances": [self.items[0]], "static": bool(self._checkIntVariable("static", default=0)), }) class R_2_instances_name_reboot(baserlib.OpcodeResource): """/2/instances/[instance_name]/reboot resource. Implements an instance reboot. """ POST_OPCODE = opcodes.OpInstanceReboot def GetPostOpInput(self): """Reboot an instance. The URI takes type=[hard|soft|full] and ignore_secondaries=[False|True] parameters. """ return ({}, { "instance_name": self.items[0], "reboot_type": self.queryargs.get("type", [constants.INSTANCE_REBOOT_HARD])[0], "ignore_secondaries": bool(self._checkIntVariable("ignore_secondaries")), "dry_run": self.dryRun(), }) class R_2_instances_name_startup(baserlib.OpcodeResource): """/2/instances/[instance_name]/startup resource. Implements an instance startup. """ PUT_OPCODE = opcodes.OpInstanceStartup def GetPutOpInput(self): """Startup an instance. The URI takes force=[False|True] parameter to start the instance if even if secondary disks are failing. """ return ({}, { "instance_name": self.items[0], "force": self.useForce(), "dry_run": self.dryRun(), "no_remember": bool(self._checkIntVariable("no_remember")), }) class R_2_instances_name_shutdown(baserlib.OpcodeResource): """/2/instances/[instance_name]/shutdown resource. Implements an instance shutdown. """ PUT_OPCODE = opcodes.OpInstanceShutdown def GetPutOpInput(self): """Shutdown an instance. """ return (self.request_body, { "instance_name": self.items[0], "no_remember": bool(self._checkIntVariable("no_remember")), "dry_run": self.dryRun(), }) def _ParseInstanceReinstallRequest(name, data): """Parses a request for reinstalling an instance. """ if not isinstance(data, dict): raise http.HttpBadRequest("Invalid body contents, not a dictionary") ostype = baserlib.CheckParameter(data, "os", default=None) start = baserlib.CheckParameter(data, "start", exptype=bool, default=True) osparams = baserlib.CheckParameter(data, "osparams", default=None) ops = [ opcodes.OpInstanceShutdown(instance_name=name), opcodes.OpInstanceReinstall(instance_name=name, os_type=ostype, osparams=osparams), ] if start: ops.append(opcodes.OpInstanceStartup(instance_name=name, force=False)) return ops class R_2_instances_name_reinstall(baserlib.OpcodeResource): """/2/instances/[instance_name]/reinstall resource. Implements an instance reinstall. """ POST_OPCODE = opcodes.OpInstanceReinstall def POST(self): """Reinstall an instance. The URI takes os=name and nostartup=[0|1] optional parameters. By default, the instance will be started automatically. """ if self.request_body: if self.queryargs: raise http.HttpBadRequest("Can't combine query and body parameters") body = self.request_body elif self.queryargs: # Legacy interface, do not modify/extend body = { "os": self._checkStringVariable("os"), "start": not self._checkIntVariable("nostartup"), } else: body = {} ops = _ParseInstanceReinstallRequest(self.items[0], body) return self.SubmitJob(ops) class R_2_instances_name_replace_disks(baserlib.OpcodeResource): """/2/instances/[instance_name]/replace-disks resource. """ POST_OPCODE = opcodes.OpInstanceReplaceDisks def GetPostOpInput(self): """Replaces disks on an instance. """ static = { "instance_name": self.items[0], } if self.request_body: data = self.request_body elif self.queryargs: # Legacy interface, do not modify/extend data = { "remote_node": self._checkStringVariable("remote_node", default=None), "mode": self._checkStringVariable("mode", default=None), "disks": self._checkStringVariable("disks", default=None), "iallocator": self._checkStringVariable("iallocator", default=None), } else: data = {} # Parse disks try: raw_disks = data.pop("disks") except KeyError: pass else: if raw_disks: if ht.TListOf(ht.TInt)(raw_disks): # pylint: disable=E1102 data["disks"] = raw_disks else: # Backwards compatibility for strings of the format "1, 2, 3" try: data["disks"] = [int(part) for part in raw_disks.split(",")] except (TypeError, ValueError), err: raise http.HttpBadRequest("Invalid disk index passed: %s" % err) return (data, static) class R_2_instances_name_activate_disks(baserlib.OpcodeResource): """/2/instances/[instance_name]/activate-disks resource. """ PUT_OPCODE = opcodes.OpInstanceActivateDisks def GetPutOpInput(self): """Activate disks for an instance. The URI might contain ignore_size to ignore current recorded size. """ return ({}, { "instance_name": self.items[0], "ignore_size": bool(self._checkIntVariable("ignore_size")), }) class R_2_instances_name_deactivate_disks(baserlib.OpcodeResource): """/2/instances/[instance_name]/deactivate-disks resource. """ PUT_OPCODE = opcodes.OpInstanceDeactivateDisks def GetPutOpInput(self): """Deactivate disks for an instance. """ return ({}, { "instance_name": self.items[0], }) class R_2_instances_name_recreate_disks(baserlib.OpcodeResource): """/2/instances/[instance_name]/recreate-disks resource. """ POST_OPCODE = opcodes.OpInstanceRecreateDisks def GetPostOpInput(self): """Recreate disks for an instance. """ return ({}, { "instance_name": self.items[0], }) class R_2_instances_name_prepare_export(baserlib.OpcodeResource): """/2/instances/[instance_name]/prepare-export resource. """ PUT_OPCODE = opcodes.OpBackupPrepare def GetPutOpInput(self): """Prepares an export for an instance. """ return ({}, { "instance_name": self.items[0], "mode": self._checkStringVariable("mode"), }) class R_2_instances_name_export(baserlib.OpcodeResource): """/2/instances/[instance_name]/export resource. """ PUT_OPCODE = opcodes.OpBackupExport PUT_RENAME = { "destination": "target_node", } def GetPutOpInput(self): """Exports an instance. """ return (self.request_body, { "instance_name": self.items[0], }) class R_2_instances_name_migrate(baserlib.OpcodeResource): """/2/instances/[instance_name]/migrate resource. """ PUT_OPCODE = opcodes.OpInstanceMigrate def GetPutOpInput(self): """Migrates an instance. """ return (self.request_body, { "instance_name": self.items[0], }) class R_2_instances_name_failover(baserlib.OpcodeResource): """/2/instances/[instance_name]/failover resource. """ PUT_OPCODE = opcodes.OpInstanceFailover def GetPutOpInput(self): """Does a failover of an instance. """ return (self.request_body, { "instance_name": self.items[0], }) class R_2_instances_name_rename(baserlib.OpcodeResource): """/2/instances/[instance_name]/rename resource. """ PUT_OPCODE = opcodes.OpInstanceRename def GetPutOpInput(self): """Changes the name of an instance. """ return (self.request_body, { "instance_name": self.items[0], }) class R_2_instances_name_modify(baserlib.OpcodeResource): """/2/instances/[instance_name]/modify resource. """ PUT_OPCODE = opcodes.OpInstanceSetParams def GetPutOpInput(self): """Changes parameters of an instance. """ data = self.request_body.copy() _ConvertUsbDevices(data) return (data, { "instance_name": self.items[0], }) class R_2_instances_name_disk_grow(baserlib.OpcodeResource): """/2/instances/[instance_name]/disk/[disk_index]/grow resource. """ POST_OPCODE = opcodes.OpInstanceGrowDisk def GetPostOpInput(self): """Increases the size of an instance disk. """ return (self.request_body, { "instance_name": self.items[0], "disk": int(self.items[1]), }) class R_2_instances_name_console(baserlib.ResourceBase): """/2/instances/[instance_name]/console resource. """ GET_ACCESS = [rapi.RAPI_ACCESS_WRITE, rapi.RAPI_ACCESS_READ] GET_OPCODE = opcodes.OpInstanceConsole def GET(self): """Request information for connecting to instance's console. @return: Serialized instance console description, see L{objects.InstanceConsole} """ client = self.GetClient() ((console, ), ) = client.QueryInstances([self.items[0]], ["console"], False) if console is None: raise http.HttpServiceUnavailable("Instance console unavailable") assert isinstance(console, dict) return console def _GetQueryFields(args): """Tries to extract C{fields} query parameter. @type args: dictionary @rtype: list of string @raise http.HttpBadRequest: When parameter can't be found """ try: fields = args["fields"] except KeyError: raise http.HttpBadRequest("Missing 'fields' query argument") return _SplitQueryFields(fields[0]) def _SplitQueryFields(fields): """Splits fields as given for a query request. @type fields: string @rtype: list of string """ return [i.strip() for i in fields.split(",")] class R_2_query(baserlib.ResourceBase): """/2/query/[resource] resource. """ # Results might contain sensitive information GET_ACCESS = [rapi.RAPI_ACCESS_WRITE, rapi.RAPI_ACCESS_READ] PUT_ACCESS = GET_ACCESS GET_OPCODE = opcodes.OpQuery PUT_OPCODE = opcodes.OpQuery def _Query(self, fields, qfilter): return self.GetClient().Query(self.items[0], fields, qfilter).ToDict() def GET(self): """Returns resource information. @return: Query result, see L{objects.QueryResponse} """ return self._Query(_GetQueryFields(self.queryargs), None) def PUT(self): """Submits job querying for resources. @return: Query result, see L{objects.QueryResponse} """ body = self.request_body baserlib.CheckType(body, dict, "Body contents") try: fields = body["fields"] except KeyError: fields = _GetQueryFields(self.queryargs) qfilter = body.get("qfilter", None) # TODO: remove this after 2.7 if qfilter is None: qfilter = body.get("filter", None) return self._Query(fields, qfilter) class R_2_query_fields(baserlib.ResourceBase): """/2/query/[resource]/fields resource. """ GET_OPCODE = opcodes.OpQueryFields def GET(self): """Retrieves list of available fields for a resource. @return: List of serialized L{objects.QueryFieldDefinition} """ try: raw_fields = self.queryargs["fields"] except KeyError: fields = None else: fields = _SplitQueryFields(raw_fields[0]) return self.GetClient().QueryFields(self.items[0], fields).ToDict() class _R_Tags(baserlib.OpcodeResource): """Quasiclass for tagging resources. Manages tags. When inheriting this class you must define the TAG_LEVEL for it. """ TAG_LEVEL = None GET_OPCODE = opcodes.OpTagsGet PUT_OPCODE = opcodes.OpTagsSet DELETE_OPCODE = opcodes.OpTagsDel def __init__(self, items, queryargs, req, **kwargs): """A tag resource constructor. We have to override the default to sort out cluster naming case. """ baserlib.OpcodeResource.__init__(self, items, queryargs, req, **kwargs) if self.TAG_LEVEL == constants.TAG_CLUSTER: self.name = None else: self.name = items[0] def GET(self): """Returns a list of tags. Example: ["tag1", "tag2", "tag3"] """ kind = self.TAG_LEVEL if kind in (constants.TAG_INSTANCE, constants.TAG_NODEGROUP, constants.TAG_NODE, constants.TAG_NETWORK): if not self.name: raise http.HttpBadRequest("Missing name on tag request") cl = self.GetClient(query=True) tags = list(cl.QueryTags(kind, self.name)) elif kind == constants.TAG_CLUSTER: assert not self.name # TODO: Use query API? ssc = ssconf.SimpleStore() tags = ssc.GetClusterTags() else: raise http.HttpBadRequest("Unhandled tag type!") return list(tags) def GetPutOpInput(self): """Add a set of tags. The request as a list of strings should be PUT to this URI. And you'll have back a job id. """ return ({}, { "kind": self.TAG_LEVEL, "name": self.name, "tags": self.queryargs.get("tag", []), "dry_run": self.dryRun(), }) def GetDeleteOpInput(self): """Delete a tag. In order to delete a set of tags, the DELETE request should be addressed to URI like: /tags?tag=[tag]&tag=[tag] """ # Re-use code return self.GetPutOpInput() class R_2_instances_name_tags(_R_Tags): """ /2/instances/[instance_name]/tags resource. Manages per-instance tags. """ TAG_LEVEL = constants.TAG_INSTANCE class R_2_nodes_name_tags(_R_Tags): """ /2/nodes/[node_name]/tags resource. Manages per-node tags. """ TAG_LEVEL = constants.TAG_NODE class R_2_groups_name_tags(_R_Tags): """ /2/groups/[group_name]/tags resource. Manages per-nodegroup tags. """ TAG_LEVEL = constants.TAG_NODEGROUP class R_2_networks_name_tags(_R_Tags): """ /2/networks/[network_name]/tags resource. Manages per-network tags. """ TAG_LEVEL = constants.TAG_NETWORK class R_2_tags(_R_Tags): """ /2/tags resource. Manages cluster tags. """ TAG_LEVEL = constants.TAG_CLUSTER ganeti-2.9.3/lib/rapi/__init__.py0000644000000000000000000000167612244641676016632 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2008, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Ganeti RAPI module""" from ganeti import compat RAPI_ACCESS_WRITE = "write" RAPI_ACCESS_READ = "read" RAPI_ACCESS_ALL = compat.UniqueFrozenset([ RAPI_ACCESS_WRITE, RAPI_ACCESS_READ, ]) ganeti-2.9.3/lib/rapi/baserlib.py0000644000000000000000000003707612271422343016645 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Remote API base resources library. """ # pylint: disable=C0103 # C0103: Invalid name, since the R_* names are not conforming import logging from ganeti import luxi from ganeti import rapi from ganeti import http from ganeti import errors from ganeti import compat from ganeti import constants from ganeti import pathutils from ganeti import utils # Dummy value to detect unchanged parameters _DEFAULT = object() #: Supported HTTP methods _SUPPORTED_METHODS = compat.UniqueFrozenset([ http.HTTP_DELETE, http.HTTP_GET, http.HTTP_POST, http.HTTP_PUT, ]) def _BuildOpcodeAttributes(): """Builds list of attributes used for per-handler opcodes. """ return [(method, "%s_OPCODE" % method, "%s_RENAME" % method, "Get%sOpInput" % method.capitalize()) for method in _SUPPORTED_METHODS] OPCODE_ATTRS = _BuildOpcodeAttributes() def BuildUriList(ids, uri_format, uri_fields=("name", "uri")): """Builds a URI list as used by index resources. @param ids: list of ids as strings @param uri_format: format to be applied for URI @param uri_fields: optional parameter for field IDs """ (field_id, field_uri) = uri_fields def _MapId(m_id): return { field_id: m_id, field_uri: uri_format % m_id, } # Make sure the result is sorted, makes it nicer to look at and simplifies # unittests. ids.sort() return map(_MapId, ids) def MapFields(names, data): """Maps two lists into one dictionary. Example:: >>> MapFields(["a", "b"], ["foo", 123]) {'a': 'foo', 'b': 123} @param names: field names (list of strings) @param data: field data (list) """ if len(names) != len(data): raise AttributeError("Names and data must have the same length") return dict(zip(names, data)) def MapBulkFields(itemslist, fields): """Map value to field name in to one dictionary. @param itemslist: a list of items values @param fields: a list of items names @return: a list of mapped dictionaries """ items_details = [] for item in itemslist: mapped = MapFields(fields, item) items_details.append(mapped) return items_details def FillOpcode(opcls, body, static, rename=None): """Fills an opcode with body parameters. Parameter types are checked. @type opcls: L{opcodes.OpCode} @param opcls: Opcode class @type body: dict @param body: Body parameters as received from client @type static: dict @param static: Static parameters which can't be modified by client @type rename: dict @param rename: Renamed parameters, key as old name, value as new name @return: Opcode object """ if body is None: params = {} else: CheckType(body, dict, "Body contents") # Make copy to be modified params = body.copy() if rename: for old, new in rename.items(): if new in params and old in params: raise http.HttpBadRequest("Parameter '%s' was renamed to '%s', but" " both are specified" % (old, new)) if old in params: assert new not in params params[new] = params.pop(old) if static: overwritten = set(params.keys()) & set(static.keys()) if overwritten: raise http.HttpBadRequest("Can't overwrite static parameters %r" % overwritten) params.update(static) # Convert keys to strings (simplejson decodes them as unicode) params = dict((str(key), value) for (key, value) in params.items()) try: op = opcls(**params) # pylint: disable=W0142 op.Validate(False) except (errors.OpPrereqError, TypeError), err: raise http.HttpBadRequest("Invalid body parameters: %s" % err) return op def HandleItemQueryErrors(fn, *args, **kwargs): """Converts errors when querying a single item. """ try: return fn(*args, **kwargs) except errors.OpPrereqError, err: if len(err.args) == 2 and err.args[1] == errors.ECODE_NOENT: raise http.HttpNotFound() raise def FeedbackFn(msg): """Feedback logging function for jobs. We don't have a stdout for printing log messages, so log them to the http log at least. @param msg: the message """ (_, log_type, log_msg) = msg logging.info("%s: %s", log_type, log_msg) def CheckType(value, exptype, descr): """Abort request if value type doesn't match expected type. @param value: Value @type exptype: type @param exptype: Expected type @type descr: string @param descr: Description of value @return: Value (allows inline usage) """ if not isinstance(value, exptype): raise http.HttpBadRequest("%s: Type is '%s', but '%s' is expected" % (descr, type(value).__name__, exptype.__name__)) return value def CheckParameter(data, name, default=_DEFAULT, exptype=_DEFAULT): """Check and return the value for a given parameter. If no default value was given and the parameter doesn't exist in the input data, an error is raise. @type data: dict @param data: Dictionary containing input data @type name: string @param name: Parameter name @param default: Default value (can be None) @param exptype: Expected type (can be None) """ try: value = data[name] except KeyError: if default is not _DEFAULT: return default raise http.HttpBadRequest("Required parameter '%s' is missing" % name) if exptype is _DEFAULT: return value return CheckType(value, exptype, "'%s' parameter" % name) class ResourceBase(object): """Generic class for resources. """ # Default permission requirements GET_ACCESS = [] PUT_ACCESS = [rapi.RAPI_ACCESS_WRITE] POST_ACCESS = [rapi.RAPI_ACCESS_WRITE] DELETE_ACCESS = [rapi.RAPI_ACCESS_WRITE] def __init__(self, items, queryargs, req, _client_cls=None): """Generic resource constructor. @param items: a list with variables encoded in the URL @param queryargs: a dictionary with additional options from URL @param req: Request context @param _client_cls: L{luxi} client class (unittests only) """ assert isinstance(queryargs, dict) self.items = items self.queryargs = queryargs self._req = req if _client_cls is None: _client_cls = luxi.Client self._client_cls = _client_cls def _GetRequestBody(self): """Returns the body data. """ return self._req.private.body_data request_body = property(fget=_GetRequestBody) def _checkIntVariable(self, name, default=0): """Return the parsed value of an int argument. """ val = self.queryargs.get(name, default) if isinstance(val, list): if val: val = val[0] else: val = default try: val = int(val) except (ValueError, TypeError): raise http.HttpBadRequest("Invalid value for the" " '%s' parameter" % (name,)) return val def _checkStringVariable(self, name, default=None): """Return the parsed value of a string argument. """ val = self.queryargs.get(name, default) if isinstance(val, list): if val: val = val[0] else: val = default return val def getBodyParameter(self, name, *args): """Check and return the value for a given parameter. If a second parameter is not given, an error will be returned, otherwise this parameter specifies the default value. @param name: the required parameter """ if args: return CheckParameter(self.request_body, name, default=args[0]) return CheckParameter(self.request_body, name) def useLocking(self): """Check if the request specifies locking. """ return bool(self._checkIntVariable("lock")) def useBulk(self): """Check if the request specifies bulk querying. """ return bool(self._checkIntVariable("bulk")) def useForce(self): """Check if the request specifies a forced operation. """ return bool(self._checkIntVariable("force")) def dryRun(self): """Check if the request specifies dry-run mode. """ return bool(self._checkIntVariable("dry-run")) def GetClient(self, query=False): """Wrapper for L{luxi.Client} with HTTP-specific error handling. @param query: this signifies that the client will only be used for queries; if the build-time parameter enable-split-queries is enabled, then the client will be connected to the query socket instead of the masterd socket """ if query and constants.ENABLE_SPLIT_QUERY: address = pathutils.QUERY_SOCKET else: address = None # Could be a function, pylint: disable=R0201 try: return self._client_cls(address=address) except luxi.NoMasterError, err: raise http.HttpBadGateway("Can't connect to master daemon: %s" % err) except luxi.PermissionError: raise http.HttpInternalServerError("Internal error: no permission to" " connect to the master daemon") def SubmitJob(self, op, cl=None): """Generic wrapper for submit job, for better http compatibility. @type op: list @param op: the list of opcodes for the job @type cl: None or luxi.Client @param cl: optional luxi client to use @rtype: string @return: the job ID """ if cl is None: cl = self.GetClient() try: return cl.SubmitJob(op) except errors.JobQueueFull: raise http.HttpServiceUnavailable("Job queue is full, needs archiving") except errors.JobQueueDrainError: raise http.HttpServiceUnavailable("Job queue is drained, cannot submit") except luxi.NoMasterError, err: raise http.HttpBadGateway("Master seems to be unreachable: %s" % err) except luxi.PermissionError: raise http.HttpInternalServerError("Internal error: no permission to" " connect to the master daemon") except luxi.TimeoutError, err: raise http.HttpGatewayTimeout("Timeout while talking to the master" " daemon: %s" % err) def GetResourceOpcodes(cls): """Returns all opcodes used by a resource. """ return frozenset(filter(None, (getattr(cls, op_attr, None) for (_, op_attr, _, _) in OPCODE_ATTRS))) def GetHandlerAccess(handler, method): """Returns the access rights for a method on a handler. @type handler: L{ResourceBase} @type method: string @rtype: string or None """ return getattr(handler, "%s_ACCESS" % method, None) class _MetaOpcodeResource(type): """Meta class for RAPI resources. """ def __call__(mcs, *args, **kwargs): """Instantiates class and patches it for use by the RAPI daemon. """ # Access to private attributes of a client class, pylint: disable=W0212 obj = type.__call__(mcs, *args, **kwargs) for (method, op_attr, rename_attr, fn_attr) in OPCODE_ATTRS: if hasattr(obj, method): # If the method handler is already defined, "*_RENAME" or "Get*OpInput" # shouldn't be (they're only used by the automatically generated # handler) assert not hasattr(obj, rename_attr) assert not hasattr(obj, fn_attr) else: # Try to generate handler method on handler instance try: opcode = getattr(obj, op_attr) except AttributeError: pass else: setattr(obj, method, compat.partial(obj._GenericHandler, opcode, getattr(obj, rename_attr, None), getattr(obj, fn_attr, obj._GetDefaultData))) return obj class OpcodeResource(ResourceBase): """Base class for opcode-based RAPI resources. Instances of this class automatically gain handler functions through L{_MetaOpcodeResource} for any method for which a C{$METHOD$_OPCODE} variable is defined at class level. Subclasses can define a C{Get$Method$OpInput} method to do their own opcode input processing (e.g. for static values). The C{$METHOD$_RENAME} variable defines which values are renamed (see L{baserlib.FillOpcode}). @cvar GET_OPCODE: Set this to a class derived from L{opcodes.OpCode} to automatically generate a GET handler submitting the opcode @cvar GET_RENAME: Set this to rename parameters in the GET handler (see L{baserlib.FillOpcode}) @ivar GetGetOpInput: Define this to override the default method for getting opcode parameters (see L{baserlib.OpcodeResource._GetDefaultData}) @cvar PUT_OPCODE: Set this to a class derived from L{opcodes.OpCode} to automatically generate a PUT handler submitting the opcode @cvar PUT_RENAME: Set this to rename parameters in the PUT handler (see L{baserlib.FillOpcode}) @ivar GetPutOpInput: Define this to override the default method for getting opcode parameters (see L{baserlib.OpcodeResource._GetDefaultData}) @cvar POST_OPCODE: Set this to a class derived from L{opcodes.OpCode} to automatically generate a POST handler submitting the opcode @cvar POST_RENAME: Set this to rename parameters in the POST handler (see L{baserlib.FillOpcode}) @ivar GetPostOpInput: Define this to override the default method for getting opcode parameters (see L{baserlib.OpcodeResource._GetDefaultData}) @cvar DELETE_OPCODE: Set this to a class derived from L{opcodes.OpCode} to automatically generate a DELETE handler submitting the opcode @cvar DELETE_RENAME: Set this to rename parameters in the DELETE handler (see L{baserlib.FillOpcode}) @ivar GetDeleteOpInput: Define this to override the default method for getting opcode parameters (see L{baserlib.OpcodeResource._GetDefaultData}) """ __metaclass__ = _MetaOpcodeResource def _GetDefaultData(self): return (self.request_body, None) def _GetRapiOpName(self): """Extracts the name of the RAPI operation from the class name """ if self.__class__.__name__.startswith("R_2_"): return self.__class__.__name__[4:] return self.__class__.__name__ def _GetCommonStatic(self): """Return the static parameters common to all the RAPI calls The reason is a parameter present in all the RAPI calls, and the reason trail has to be build for all of them, so the parameter is read here and used to build the reason trail, that is the actual parameter passed forward. """ trail = [] usr_reason = self._checkStringVariable("reason", default=None) if usr_reason: trail.append((constants.OPCODE_REASON_SRC_USER, usr_reason, utils.EpochNano())) reason_src = "%s:%s" % (constants.OPCODE_REASON_SRC_RLIB2, self._GetRapiOpName()) trail.append((reason_src, "", utils.EpochNano())) common_static = { "reason": trail, } return common_static def _GenericHandler(self, opcode, rename, fn): (body, specific_static) = fn() static = self._GetCommonStatic() if specific_static: static.update(specific_static) op = FillOpcode(opcode, body, static, rename=rename) return self.SubmitJob([op]) ganeti-2.9.3/lib/rapi/client_utils.py0000644000000000000000000000477412230001635017547 0ustar00rootroot00000000000000# # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """RAPI client utilities. """ from ganeti import constants from ganeti import cli from ganeti.rapi import client # Local constant to avoid importing ganeti.http HTTP_NOT_FOUND = 404 class RapiJobPollCb(cli.JobPollCbBase): def __init__(self, cl): """Initializes this class. @param cl: RAPI client instance """ cli.JobPollCbBase.__init__(self) self.cl = cl def WaitForJobChangeOnce(self, job_id, fields, prev_job_info, prev_log_serial): """Waits for changes on a job. """ try: result = self.cl.WaitForJobChange(job_id, fields, prev_job_info, prev_log_serial) except client.GanetiApiError, err: if err.code == HTTP_NOT_FOUND: return None raise if result is None: return constants.JOB_NOTCHANGED return (result["job_info"], result["log_entries"]) def QueryJobs(self, job_ids, fields): """Returns the given fields for the selected job IDs. @type job_ids: list of numbers @param job_ids: Job IDs @type fields: list of strings @param fields: Fields """ if len(job_ids) != 1: raise NotImplementedError("Only one job supported at this time") try: result = self.cl.GetJobStatus(job_ids[0]) except client.GanetiApiError, err: if err.code == HTTP_NOT_FOUND: return [None] raise return [[result[name] for name in fields], ] def PollJob(rapi_client, job_id, reporter): """Function to poll for the result of a job. @param rapi_client: RAPI client instance @type job_id: number @param job_id: Job ID @type reporter: L{cli.JobPollReportCbBase} @param reporter: PollJob reporter instance """ return cli.GenericPollJob(job_id, RapiJobPollCb(rapi_client), reporter) ganeti-2.9.3/lib/server/0000755000000000000000000000000012271445544015056 5ustar00rootroot00000000000000ganeti-2.9.3/lib/server/noded.py0000644000000000000000000010073612271422343016520 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Ganeti node daemon""" # pylint: disable=C0103,W0142 # C0103: Functions in this module need to have a given name structure, # and the name of the daemon doesn't match # W0142: Used * or ** magic, since we do use it extensively in this # module import os import sys import logging import signal import codecs from optparse import OptionParser from ganeti import backend from ganeti import constants from ganeti import objects from ganeti import errors from ganeti import jstore from ganeti import daemon from ganeti import http from ganeti import utils from ganeti.storage import container from ganeti import serializer from ganeti import netutils from ganeti import pathutils from ganeti import ssconf import ganeti.http.server # pylint: disable=W0611 queue_lock = None def _extendReasonTrail(trail, source, reason=""): """Extend the reason trail with noded information The trail is extended by appending the name of the noded functionality """ assert trail is not None trail_source = "%s:%s" % (constants.OPCODE_REASON_SRC_NODED, source) trail.append((trail_source, reason, utils.EpochNano())) def _PrepareQueueLock(): """Try to prepare the queue lock. @return: None for success, otherwise an exception object """ global queue_lock # pylint: disable=W0603 if queue_lock is not None: return None # Prepare job queue try: queue_lock = jstore.InitAndVerifyQueue(must_lock=False) return None except EnvironmentError, err: return err def _RequireJobQueueLock(fn): """Decorator for job queue manipulating functions. """ QUEUE_LOCK_TIMEOUT = 10 def wrapper(*args, **kwargs): # Locking in exclusive, blocking mode because there could be several # children running at the same time. Waiting up to 10 seconds. if _PrepareQueueLock() is not None: raise errors.JobQueueError("Job queue failed initialization," " cannot update jobs") queue_lock.Exclusive(blocking=True, timeout=QUEUE_LOCK_TIMEOUT) try: return fn(*args, **kwargs) finally: queue_lock.Unlock() return wrapper def _DecodeImportExportIO(ieio, ieioargs): """Decodes import/export I/O information. """ if ieio == constants.IEIO_RAW_DISK: assert len(ieioargs) == 1 return (objects.Disk.FromDict(ieioargs[0]), ) if ieio == constants.IEIO_SCRIPT: assert len(ieioargs) == 2 return (objects.Disk.FromDict(ieioargs[0]), ieioargs[1]) return ieioargs def _DefaultAlternative(value, default): """Returns value or, if evaluating to False, a default value. Returns the given value, unless it evaluates to False. In the latter case the default value is returned. @param value: Value to return if it doesn't evaluate to False @param default: Default value @return: Given value or the default """ if value: return value return default class MlockallRequestExecutor(http.server.HttpServerRequestExecutor): """Subclass ensuring request handlers are locked in RAM. """ def __init__(self, *args, **kwargs): utils.Mlockall() http.server.HttpServerRequestExecutor.__init__(self, *args, **kwargs) class NodeRequestHandler(http.server.HttpServerHandler): """The server implementation. This class holds all methods exposed over the RPC interface. """ # too many public methods, and unused args - all methods get params # due to the API # pylint: disable=R0904,W0613 def __init__(self): http.server.HttpServerHandler.__init__(self) self.noded_pid = os.getpid() def HandleRequest(self, req): """Handle a request. """ if req.request_method.upper() != http.HTTP_POST: raise http.HttpBadRequest("Only the POST method is supported") path = req.request_path if path.startswith("/"): path = path[1:] method = getattr(self, "perspective_%s" % path, None) if method is None: raise http.HttpNotFound() try: result = (True, method(serializer.LoadJson(req.request_body))) except backend.RPCFail, err: # our custom failure exception; str(err) works fine if the # exception was constructed with a single argument, and in # this case, err.message == err.args[0] == str(err) result = (False, str(err)) except errors.QuitGanetiException, err: # Tell parent to quit logging.info("Shutting down the node daemon, arguments: %s", str(err.args)) os.kill(self.noded_pid, signal.SIGTERM) # And return the error's arguments, which must be already in # correct tuple format result = err.args except Exception, err: logging.exception("Error in RPC call") result = (False, "Error while executing backend function: %s" % str(err)) return serializer.DumpJson(result) # the new block devices -------------------------- @staticmethod def perspective_blockdev_create(params): """Create a block device. """ (bdev_s, size, owner, on_primary, info, excl_stor) = params bdev = objects.Disk.FromDict(bdev_s) if bdev is None: raise ValueError("can't unserialize data!") return backend.BlockdevCreate(bdev, size, owner, on_primary, info, excl_stor) @staticmethod def perspective_blockdev_pause_resume_sync(params): """Pause/resume sync of a block device. """ disks_s, pause = params disks = [objects.Disk.FromDict(bdev_s) for bdev_s in disks_s] return backend.BlockdevPauseResumeSync(disks, pause) @staticmethod def perspective_blockdev_wipe(params): """Wipe a block device. """ bdev_s, offset, size = params bdev = objects.Disk.FromDict(bdev_s) return backend.BlockdevWipe(bdev, offset, size) @staticmethod def perspective_blockdev_remove(params): """Remove a block device. """ bdev_s = params[0] bdev = objects.Disk.FromDict(bdev_s) return backend.BlockdevRemove(bdev) @staticmethod def perspective_blockdev_rename(params): """Remove a block device. """ devlist = [(objects.Disk.FromDict(ds), uid) for ds, uid in params[0]] return backend.BlockdevRename(devlist) @staticmethod def perspective_blockdev_assemble(params): """Assemble a block device. """ bdev_s, owner, on_primary, idx = params bdev = objects.Disk.FromDict(bdev_s) if bdev is None: raise ValueError("can't unserialize data!") return backend.BlockdevAssemble(bdev, owner, on_primary, idx) @staticmethod def perspective_blockdev_shutdown(params): """Shutdown a block device. """ bdev_s = params[0] bdev = objects.Disk.FromDict(bdev_s) if bdev is None: raise ValueError("can't unserialize data!") return backend.BlockdevShutdown(bdev) @staticmethod def perspective_blockdev_addchildren(params): """Add a child to a mirror device. Note: this is only valid for mirror devices. It's the caller's duty to send a correct disk, otherwise we raise an error. """ bdev_s, ndev_s = params bdev = objects.Disk.FromDict(bdev_s) ndevs = [objects.Disk.FromDict(disk_s) for disk_s in ndev_s] if bdev is None or ndevs.count(None) > 0: raise ValueError("can't unserialize data!") return backend.BlockdevAddchildren(bdev, ndevs) @staticmethod def perspective_blockdev_removechildren(params): """Remove a child from a mirror device. This is only valid for mirror devices, of course. It's the callers duty to send a correct disk, otherwise we raise an error. """ bdev_s, ndev_s = params bdev = objects.Disk.FromDict(bdev_s) ndevs = [objects.Disk.FromDict(disk_s) for disk_s in ndev_s] if bdev is None or ndevs.count(None) > 0: raise ValueError("can't unserialize data!") return backend.BlockdevRemovechildren(bdev, ndevs) @staticmethod def perspective_blockdev_getmirrorstatus(params): """Return the mirror status for a list of disks. """ disks = [objects.Disk.FromDict(dsk_s) for dsk_s in params[0]] return [status.ToDict() for status in backend.BlockdevGetmirrorstatus(disks)] @staticmethod def perspective_blockdev_getmirrorstatus_multi(params): """Return the mirror status for a list of disks. """ (node_disks, ) = params disks = [objects.Disk.FromDict(dsk_s) for dsk_s in node_disks] result = [] for (success, status) in backend.BlockdevGetmirrorstatusMulti(disks): if success: result.append((success, status.ToDict())) else: result.append((success, status)) return result @staticmethod def perspective_blockdev_find(params): """Expose the FindBlockDevice functionality for a disk. This will try to find but not activate a disk. """ disk = objects.Disk.FromDict(params[0]) result = backend.BlockdevFind(disk) if result is None: return None return result.ToDict() @staticmethod def perspective_blockdev_snapshot(params): """Create a snapshot device. Note that this is only valid for LVM disks, if we get passed something else we raise an exception. The snapshot device can be remove by calling the generic block device remove call. """ cfbd = objects.Disk.FromDict(params[0]) return backend.BlockdevSnapshot(cfbd) @staticmethod def perspective_blockdev_grow(params): """Grow a stack of devices. """ if len(params) < 5: raise ValueError("Received only %s parameters in blockdev_grow," " old master?" % len(params)) cfbd = objects.Disk.FromDict(params[0]) amount = params[1] dryrun = params[2] backingstore = params[3] excl_stor = params[4] return backend.BlockdevGrow(cfbd, amount, dryrun, backingstore, excl_stor) @staticmethod def perspective_blockdev_close(params): """Closes the given block devices. """ disks = [objects.Disk.FromDict(cf) for cf in params[1]] return backend.BlockdevClose(params[0], disks) @staticmethod def perspective_blockdev_getdimensions(params): """Compute the sizes of the given block devices. """ disks = [objects.Disk.FromDict(cf) for cf in params[0]] return backend.BlockdevGetdimensions(disks) @staticmethod def perspective_blockdev_export(params): """Compute the sizes of the given block devices. """ disk = objects.Disk.FromDict(params[0]) dest_node_ip, dest_path, cluster_name = params[1:] return backend.BlockdevExport(disk, dest_node_ip, dest_path, cluster_name) @staticmethod def perspective_blockdev_setinfo(params): """Sets metadata information on the given block device. """ (disk, info) = params disk = objects.Disk.FromDict(disk) return backend.BlockdevSetInfo(disk, info) # blockdev/drbd specific methods ---------- @staticmethod def perspective_drbd_disconnect_net(params): """Disconnects the network connection of drbd disks. Note that this is only valid for drbd disks, so the members of the disk list must all be drbd devices. """ nodes_ip, disks, target_node_uuid = params disks = [objects.Disk.FromDict(cf) for cf in disks] return backend.DrbdDisconnectNet(target_node_uuid, nodes_ip, disks) @staticmethod def perspective_drbd_attach_net(params): """Attaches the network connection of drbd disks. Note that this is only valid for drbd disks, so the members of the disk list must all be drbd devices. """ nodes_ip, disks, instance_name, multimaster, target_node_uuid = params disks = [objects.Disk.FromDict(cf) for cf in disks] return backend.DrbdAttachNet(target_node_uuid, nodes_ip, disks, instance_name, multimaster) @staticmethod def perspective_drbd_wait_sync(params): """Wait until DRBD disks are synched. Note that this is only valid for drbd disks, so the members of the disk list must all be drbd devices. """ nodes_ip, disks, target_node_uuid = params disks = [objects.Disk.FromDict(cf) for cf in disks] return backend.DrbdWaitSync(target_node_uuid, nodes_ip, disks) @staticmethod def perspective_drbd_needs_activation(params): """Checks if the drbd devices need activation Note that this is only valid for drbd disks, so the members of the disk list must all be drbd devices. """ nodes_ip, disks, target_node_uuid = params disks = [objects.Disk.FromDict(cf) for cf in disks] return backend.DrbdNeedsActivation(target_node_uuid, nodes_ip, disks) @staticmethod def perspective_drbd_helper(params): """Query drbd helper. """ return backend.GetDrbdUsermodeHelper() # export/import -------------------------- @staticmethod def perspective_finalize_export(params): """Expose the finalize export functionality. """ instance = objects.Instance.FromDict(params[0]) snap_disks = [] for disk in params[1]: if isinstance(disk, bool): snap_disks.append(disk) else: snap_disks.append(objects.Disk.FromDict(disk)) return backend.FinalizeExport(instance, snap_disks) @staticmethod def perspective_export_info(params): """Query information about an existing export on this node. The given path may not contain an export, in which case we return None. """ path = params[0] return backend.ExportInfo(path) @staticmethod def perspective_export_list(params): """List the available exports on this node. Note that as opposed to export_info, which may query data about an export in any path, this only queries the standard Ganeti path (pathutils.EXPORT_DIR). """ return backend.ListExports() @staticmethod def perspective_export_remove(params): """Remove an export. """ export = params[0] return backend.RemoveExport(export) # block device --------------------- @staticmethod def perspective_bdev_sizes(params): """Query the list of block devices """ devices = params[0] return backend.GetBlockDevSizes(devices) # volume -------------------------- @staticmethod def perspective_lv_list(params): """Query the list of logical volumes in a given volume group. """ vgname = params[0] return backend.GetVolumeList(vgname) @staticmethod def perspective_vg_list(params): """Query the list of volume groups. """ return backend.ListVolumeGroups() # Storage -------------------------- @staticmethod def perspective_storage_list(params): """Get list of storage units. """ (su_name, su_args, name, fields) = params return container.GetStorage(su_name, *su_args).List(name, fields) @staticmethod def perspective_storage_modify(params): """Modify a storage unit. """ (su_name, su_args, name, changes) = params return container.GetStorage(su_name, *su_args).Modify(name, changes) @staticmethod def perspective_storage_execute(params): """Execute an operation on a storage unit. """ (su_name, su_args, name, op) = params return container.GetStorage(su_name, *su_args).Execute(name, op) # bridge -------------------------- @staticmethod def perspective_bridges_exist(params): """Check if all bridges given exist on this node. """ bridges_list = params[0] return backend.BridgesExist(bridges_list) # instance -------------------------- @staticmethod def perspective_instance_os_add(params): """Install an OS on a given instance. """ inst_s = params[0] inst = objects.Instance.FromDict(inst_s) reinstall = params[1] debug = params[2] return backend.InstanceOsAdd(inst, reinstall, debug) @staticmethod def perspective_instance_run_rename(params): """Runs the OS rename script for an instance. """ inst_s, old_name, debug = params inst = objects.Instance.FromDict(inst_s) return backend.RunRenameInstance(inst, old_name, debug) @staticmethod def perspective_instance_shutdown(params): """Shutdown an instance. """ instance = objects.Instance.FromDict(params[0]) timeout = params[1] trail = params[2] _extendReasonTrail(trail, "shutdown") return backend.InstanceShutdown(instance, timeout, trail) @staticmethod def perspective_instance_start(params): """Start an instance. """ (instance_name, startup_paused, trail) = params instance = objects.Instance.FromDict(instance_name) _extendReasonTrail(trail, "start") return backend.StartInstance(instance, startup_paused, trail) @staticmethod def perspective_migration_info(params): """Gather information about an instance to be migrated. """ instance = objects.Instance.FromDict(params[0]) return backend.MigrationInfo(instance) @staticmethod def perspective_accept_instance(params): """Prepare the node to accept an instance. """ instance, info, target = params instance = objects.Instance.FromDict(instance) return backend.AcceptInstance(instance, info, target) @staticmethod def perspective_instance_finalize_migration_dst(params): """Finalize the instance migration on the destination node. """ instance, info, success = params instance = objects.Instance.FromDict(instance) return backend.FinalizeMigrationDst(instance, info, success) @staticmethod def perspective_instance_migrate(params): """Migrates an instance. """ cluster_name, instance, target, live = params instance = objects.Instance.FromDict(instance) return backend.MigrateInstance(cluster_name, instance, target, live) @staticmethod def perspective_instance_finalize_migration_src(params): """Finalize the instance migration on the source node. """ instance, success, live = params instance = objects.Instance.FromDict(instance) return backend.FinalizeMigrationSource(instance, success, live) @staticmethod def perspective_instance_get_migration_status(params): """Reports migration status. """ instance = objects.Instance.FromDict(params[0]) return backend.GetMigrationStatus(instance).ToDict() @staticmethod def perspective_instance_reboot(params): """Reboot an instance. """ instance = objects.Instance.FromDict(params[0]) reboot_type = params[1] shutdown_timeout = params[2] trail = params[3] _extendReasonTrail(trail, "reboot") return backend.InstanceReboot(instance, reboot_type, shutdown_timeout, trail) @staticmethod def perspective_instance_balloon_memory(params): """Modify instance runtime memory. """ instance_dict, memory = params instance = objects.Instance.FromDict(instance_dict) return backend.InstanceBalloonMemory(instance, memory) @staticmethod def perspective_instance_info(params): """Query instance information. """ (instance_name, hypervisor_name, hvparams) = params return backend.GetInstanceInfo(instance_name, hypervisor_name, hvparams) @staticmethod def perspective_instance_migratable(params): """Query whether the specified instance can be migrated. """ instance = objects.Instance.FromDict(params[0]) return backend.GetInstanceMigratable(instance) @staticmethod def perspective_all_instances_info(params): """Query information about all instances. """ (hypervisor_list, all_hvparams) = params return backend.GetAllInstancesInfo(hypervisor_list, all_hvparams) @staticmethod def perspective_instance_list(params): """Query the list of running instances. """ (hypervisor_list, hvparams) = params return backend.GetInstanceList(hypervisor_list, hvparams) # node -------------------------- @staticmethod def perspective_node_has_ip_address(params): """Checks if a node has the given ip address. """ return netutils.IPAddress.Own(params[0]) @staticmethod def perspective_node_info(params): """Query node information. """ (storage_units, hv_specs) = params return backend.GetNodeInfo(storage_units, hv_specs) @staticmethod def perspective_etc_hosts_modify(params): """Modify a node entry in /etc/hosts. """ backend.EtcHostsModify(params[0], params[1], params[2]) return True @staticmethod def perspective_node_verify(params): """Run a verify sequence on this node. """ (what, cluster_name, hvparams) = params return backend.VerifyNode(what, cluster_name, hvparams) @classmethod def perspective_node_verify_light(cls, params): """Run a light verify sequence on this node. """ # So far it's the same as the normal node_verify return cls.perspective_node_verify(params) @staticmethod def perspective_node_start_master_daemons(params): """Start the master daemons on this node. """ return backend.StartMasterDaemons(params[0]) @staticmethod def perspective_node_activate_master_ip(params): """Activate the master IP on this node. """ master_params = objects.MasterNetworkParameters.FromDict(params[0]) return backend.ActivateMasterIp(master_params, params[1]) @staticmethod def perspective_node_deactivate_master_ip(params): """Deactivate the master IP on this node. """ master_params = objects.MasterNetworkParameters.FromDict(params[0]) return backend.DeactivateMasterIp(master_params, params[1]) @staticmethod def perspective_node_stop_master(params): """Stops master daemons on this node. """ return backend.StopMasterDaemons() @staticmethod def perspective_node_change_master_netmask(params): """Change the master IP netmask. """ return backend.ChangeMasterNetmask(params[0], params[1], params[2], params[3]) @staticmethod def perspective_node_leave_cluster(params): """Cleanup after leaving a cluster. """ return backend.LeaveCluster(params[0]) @staticmethod def perspective_node_volumes(params): """Query the list of all logical volume groups. """ return backend.NodeVolumes() @staticmethod def perspective_node_demote_from_mc(params): """Demote a node from the master candidate role. """ return backend.DemoteFromMC() @staticmethod def perspective_node_powercycle(params): """Tries to powercycle the nod. """ (hypervisor_type, hvparams) = params return backend.PowercycleNode(hypervisor_type, hvparams) # cluster -------------------------- @staticmethod def perspective_version(params): """Query version information. """ return constants.PROTOCOL_VERSION @staticmethod def perspective_upload_file(params): """Upload a file. Note that the backend implementation imposes strict rules on which files are accepted. """ return backend.UploadFile(*(params[0])) @staticmethod def perspective_master_info(params): """Query master information. """ return backend.GetMasterInfo() @staticmethod def perspective_run_oob(params): """Runs oob on node. """ output = backend.RunOob(params[0], params[1], params[2], params[3]) if output: result = serializer.LoadJson(output) else: result = None return result @staticmethod def perspective_restricted_command(params): """Runs a restricted command. """ (cmd, ) = params return backend.RunRestrictedCmd(cmd) @staticmethod def perspective_write_ssconf_files(params): """Write ssconf files. """ (values,) = params return ssconf.WriteSsconfFiles(values) @staticmethod def perspective_get_watcher_pause(params): """Get watcher pause end. """ return utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE) @staticmethod def perspective_set_watcher_pause(params): """Set watcher pause. """ (until, ) = params return backend.SetWatcherPause(until) # os ----------------------- @staticmethod def perspective_os_diagnose(params): """Query detailed information about existing OSes. """ return backend.DiagnoseOS() @staticmethod def perspective_os_get(params): """Query information about a given OS. """ name = params[0] os_obj = backend.OSFromDisk(name) return os_obj.ToDict() @staticmethod def perspective_os_validate(params): """Run a given OS' validation routine. """ required, name, checks, params = params return backend.ValidateOS(required, name, checks, params) # extstorage ----------------------- @staticmethod def perspective_extstorage_diagnose(params): """Query detailed information about existing extstorage providers. """ return backend.DiagnoseExtStorage() # hooks ----------------------- @staticmethod def perspective_hooks_runner(params): """Run hook scripts. """ hpath, phase, env = params hr = backend.HooksRunner() return hr.RunHooks(hpath, phase, env) # iallocator ----------------- @staticmethod def perspective_iallocator_runner(params): """Run an iallocator script. """ name, idata = params iar = backend.IAllocatorRunner() return iar.Run(name, idata) # test ----------------------- @staticmethod def perspective_test_delay(params): """Run test delay. """ duration = params[0] status, rval = utils.TestDelay(duration) if not status: raise backend.RPCFail(rval) return rval # file storage --------------- @staticmethod def perspective_file_storage_dir_create(params): """Create the file storage directory. """ file_storage_dir = params[0] return backend.CreateFileStorageDir(file_storage_dir) @staticmethod def perspective_file_storage_dir_remove(params): """Remove the file storage directory. """ file_storage_dir = params[0] return backend.RemoveFileStorageDir(file_storage_dir) @staticmethod def perspective_file_storage_dir_rename(params): """Rename the file storage directory. """ old_file_storage_dir = params[0] new_file_storage_dir = params[1] return backend.RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir) # jobs ------------------------ @staticmethod @_RequireJobQueueLock def perspective_jobqueue_update(params): """Update job queue. """ (file_name, content) = params return backend.JobQueueUpdate(file_name, content) @staticmethod @_RequireJobQueueLock def perspective_jobqueue_purge(params): """Purge job queue. """ return backend.JobQueuePurge() @staticmethod @_RequireJobQueueLock def perspective_jobqueue_rename(params): """Rename a job queue file. """ # TODO: What if a file fails to rename? return [backend.JobQueueRename(old, new) for old, new in params[0]] @staticmethod @_RequireJobQueueLock def perspective_jobqueue_set_drain_flag(params): """Set job queue's drain flag. """ (flag, ) = params return jstore.SetDrainFlag(flag) # hypervisor --------------- @staticmethod def perspective_hypervisor_validate_params(params): """Validate the hypervisor parameters. """ (hvname, hvparams) = params return backend.ValidateHVParams(hvname, hvparams) # Crypto @staticmethod def perspective_x509_cert_create(params): """Creates a new X509 certificate for SSL/TLS. """ (validity, ) = params return backend.CreateX509Certificate(validity) @staticmethod def perspective_x509_cert_remove(params): """Removes a X509 certificate. """ (name, ) = params return backend.RemoveX509Certificate(name) # Import and export @staticmethod def perspective_import_start(params): """Starts an import daemon. """ (opts_s, instance, component, (dest, dest_args)) = params opts = objects.ImportExportOptions.FromDict(opts_s) return backend.StartImportExportDaemon(constants.IEM_IMPORT, opts, None, None, objects.Instance.FromDict(instance), component, dest, _DecodeImportExportIO(dest, dest_args)) @staticmethod def perspective_export_start(params): """Starts an export daemon. """ (opts_s, host, port, instance, component, (source, source_args)) = params opts = objects.ImportExportOptions.FromDict(opts_s) return backend.StartImportExportDaemon(constants.IEM_EXPORT, opts, host, port, objects.Instance.FromDict(instance), component, source, _DecodeImportExportIO(source, source_args)) @staticmethod def perspective_impexp_status(params): """Retrieves the status of an import or export daemon. """ return backend.GetImportExportStatus(params[0]) @staticmethod def perspective_impexp_abort(params): """Aborts an import or export. """ return backend.AbortImportExport(params[0]) @staticmethod def perspective_impexp_cleanup(params): """Cleans up after an import or export. """ return backend.CleanupImportExport(params[0]) def CheckNoded(_, args): """Initial checks whether to run or exit with a failure. """ if args: # noded doesn't take any arguments print >> sys.stderr, ("Usage: %s [-f] [-d] [-p port] [-b ADDRESS]" % sys.argv[0]) sys.exit(constants.EXIT_FAILURE) try: codecs.lookup("string-escape") except LookupError: print >> sys.stderr, ("Can't load the string-escape code which is part" " of the Python installation. Is your installation" " complete/correct? Aborting.") sys.exit(constants.EXIT_FAILURE) def PrepNoded(options, _): """Preparation node daemon function, executed with the PID file held. """ if options.mlock: request_executor_class = MlockallRequestExecutor try: utils.Mlockall() except errors.NoCtypesError: logging.warning("Cannot set memory lock, ctypes module not found") request_executor_class = http.server.HttpServerRequestExecutor else: request_executor_class = http.server.HttpServerRequestExecutor # Read SSL certificate if options.ssl: ssl_params = http.HttpSslParams(ssl_key_path=options.ssl_key, ssl_cert_path=options.ssl_cert) else: ssl_params = None err = _PrepareQueueLock() if err is not None: # this might be some kind of file-system/permission error; while # this breaks the job queue functionality, we shouldn't prevent # startup of the whole node daemon because of this logging.critical("Can't init/verify the queue, proceeding anyway: %s", err) handler = NodeRequestHandler() mainloop = daemon.Mainloop() server = \ http.server.HttpServer(mainloop, options.bind_address, options.port, handler, ssl_params=ssl_params, ssl_verify_peer=True, request_executor_class=request_executor_class) server.Start() return (mainloop, server) def ExecNoded(options, args, prep_data): # pylint: disable=W0613 """Main node daemon function, executed with the PID file held. """ (mainloop, server) = prep_data try: mainloop.Run() finally: server.Stop() def Main(): """Main function for the node daemon. """ parser = OptionParser(description="Ganeti node daemon", usage=("%prog [-f] [-d] [-p port] [-b ADDRESS]" " [-i INTERFACE]"), version="%%prog (ganeti) %s" % constants.RELEASE_VERSION) parser.add_option("--no-mlock", dest="mlock", help="Do not mlock the node memory in ram", default=True, action="store_false") daemon.GenericMain(constants.NODED, parser, CheckNoded, PrepNoded, ExecNoded, default_ssl_cert=pathutils.NODED_CERT_FILE, default_ssl_key=pathutils.NODED_CERT_FILE, console_logging=True) ganeti-2.9.3/lib/server/masterd.py0000644000000000000000000006302612271422343017066 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Master daemon program. Some classes deviates from the standard style guide since the inheritance from parent classes requires it. """ # pylint: disable=C0103 # C0103: Invalid name ganeti-masterd import grp import os import pwd import sys import socket import time import tempfile import logging from optparse import OptionParser from ganeti import config from ganeti import constants from ganeti import daemon from ganeti import mcpu from ganeti import opcodes from ganeti import jqueue from ganeti import locking from ganeti import luxi from ganeti import utils from ganeti import errors from ganeti import ssconf from ganeti import workerpool from ganeti import rpc from ganeti import bootstrap from ganeti import netutils from ganeti import objects from ganeti import query from ganeti import runtime from ganeti import pathutils from ganeti import ht CLIENT_REQUEST_WORKERS = 16 EXIT_NOTMASTER = constants.EXIT_NOTMASTER EXIT_NODESETUP_ERROR = constants.EXIT_NODESETUP_ERROR def _LogNewJob(status, info, ops): """Log information about a recently submitted job. """ op_summary = utils.CommaJoin(op.Summary() for op in ops) if status: logging.info("New job with id %s, summary: %s", info, op_summary) else: logging.info("Failed to submit job, reason: '%s', summary: %s", info, op_summary) class ClientRequestWorker(workerpool.BaseWorker): # pylint: disable=W0221 def RunTask(self, server, message, client): """Process the request. """ client_ops = ClientOps(server) try: (method, args, version) = luxi.ParseRequest(message) except luxi.ProtocolError, err: logging.error("Protocol Error: %s", err) client.close_log() return success = False try: # Verify client's version if there was one in the request if version is not None and version != constants.LUXI_VERSION: raise errors.LuxiError("LUXI version mismatch, server %s, request %s" % (constants.LUXI_VERSION, version)) result = client_ops.handle_request(method, args) success = True except errors.GenericError, err: logging.exception("Unexpected exception") success = False result = errors.EncodeException(err) except: logging.exception("Unexpected exception") err = sys.exc_info() result = "Caught exception: %s" % str(err[1]) try: reply = luxi.FormatResponse(success, result) client.send_message(reply) # awake the main thread so that it can write out the data. server.awaker.signal() except: # pylint: disable=W0702 logging.exception("Send error") client.close_log() class MasterClientHandler(daemon.AsyncTerminatedMessageStream): """Handler for master peers. """ _MAX_UNHANDLED = 1 def __init__(self, server, connected_socket, client_address, family): daemon.AsyncTerminatedMessageStream.__init__(self, connected_socket, client_address, constants.LUXI_EOM, family, self._MAX_UNHANDLED) self.server = server def handle_message(self, message, _): self.server.request_workers.AddTask((self.server, message, self)) class _MasterShutdownCheck: """Logic for master daemon shutdown. """ #: How long to wait between checks _CHECK_INTERVAL = 5.0 #: How long to wait after all jobs are done (e.g. to give clients time to #: retrieve the job status) _SHUTDOWN_LINGER = 5.0 def __init__(self): """Initializes this class. """ self._had_active_jobs = None self._linger_timeout = None def __call__(self, jq_prepare_result): """Determines if master daemon is ready for shutdown. @param jq_prepare_result: Result of L{jqueue.JobQueue.PrepareShutdown} @rtype: None or number @return: None if master daemon is ready, timeout if the check must be repeated """ if jq_prepare_result: # Check again shortly logging.info("Job queue has been notified for shutdown but is still" " busy; next check in %s seconds", self._CHECK_INTERVAL) self._had_active_jobs = True return self._CHECK_INTERVAL if not self._had_active_jobs: # Can shut down as there were no active jobs on the first check return None # No jobs are running anymore, but maybe some clients want to collect some # information. Give them a short amount of time. if self._linger_timeout is None: self._linger_timeout = utils.RunningTimeout(self._SHUTDOWN_LINGER, True) remaining = self._linger_timeout.Remaining() logging.info("Job queue no longer busy; shutting down master daemon" " in %s seconds", remaining) # TODO: Should the master daemon socket be closed at this point? Doing so # wouldn't affect existing connections. if remaining < 0: return None else: return remaining class MasterServer(daemon.AsyncStreamServer): """Master Server. This is the main asynchronous master server. It handles connections to the master socket. """ family = socket.AF_UNIX def __init__(self, address, uid, gid): """MasterServer constructor @param address: the unix socket address to bind the MasterServer to @param uid: The uid of the owner of the socket @param gid: The gid of the owner of the socket """ temp_name = tempfile.mktemp(dir=os.path.dirname(address)) daemon.AsyncStreamServer.__init__(self, self.family, temp_name) os.chmod(temp_name, 0770) os.chown(temp_name, uid, gid) os.rename(temp_name, address) self.awaker = daemon.AsyncAwaker() # We'll only start threads once we've forked. self.context = None self.request_workers = None self._shutdown_check = None def handle_connection(self, connected_socket, client_address): # TODO: add connection count and limit the number of open connections to a # maximum number to avoid breaking for lack of file descriptors or memory. MasterClientHandler(self, connected_socket, client_address, self.family) def setup_queue(self): self.context = GanetiContext() self.request_workers = workerpool.WorkerPool("ClientReq", CLIENT_REQUEST_WORKERS, ClientRequestWorker) def WaitForShutdown(self): """Prepares server for shutdown. """ if self._shutdown_check is None: self._shutdown_check = _MasterShutdownCheck() return self._shutdown_check(self.context.jobqueue.PrepareShutdown()) def server_cleanup(self): """Cleanup the server. This involves shutting down the processor threads and the master socket. """ try: self.close() finally: if self.request_workers: self.request_workers.TerminateWorkers() if self.context: self.context.jobqueue.Shutdown() class ClientOps: """Class holding high-level client operations.""" def __init__(self, server): self.server = server def handle_request(self, method, args): # pylint: disable=R0911 context = self.server.context queue = context.jobqueue # TODO: Parameter validation if not isinstance(args, (tuple, list)): logging.info("Received invalid arguments of type '%s'", type(args)) raise ValueError("Invalid arguments type '%s'" % type(args)) if method not in luxi.REQ_ALL: logging.info("Received invalid request '%s'", method) raise ValueError("Invalid operation '%s'" % method) # TODO: Rewrite to not exit in each 'if/elif' branch if method == luxi.REQ_SUBMIT_JOB: logging.info("Receiving new job") (job_def, ) = args ops = [opcodes.OpCode.LoadOpCode(state) for state in job_def] job_id = queue.SubmitJob(ops) _LogNewJob(True, job_id, ops) return job_id elif method == luxi.REQ_SUBMIT_MANY_JOBS: logging.info("Receiving multiple jobs") (job_defs, ) = args jobs = [] for ops in job_defs: jobs.append([opcodes.OpCode.LoadOpCode(state) for state in ops]) job_ids = queue.SubmitManyJobs(jobs) for ((status, job_id), ops) in zip(job_ids, jobs): _LogNewJob(status, job_id, ops) return job_ids elif method == luxi.REQ_CANCEL_JOB: (job_id, ) = args logging.info("Received job cancel request for %s", job_id) return queue.CancelJob(job_id) elif method == luxi.REQ_CHANGE_JOB_PRIORITY: (job_id, priority) = args logging.info("Received request to change priority for job %s to %s", job_id, priority) return queue.ChangeJobPriority(job_id, priority) elif method == luxi.REQ_ARCHIVE_JOB: (job_id, ) = args logging.info("Received job archive request for %s", job_id) return queue.ArchiveJob(job_id) elif method == luxi.REQ_AUTO_ARCHIVE_JOBS: (age, timeout) = args logging.info("Received job autoarchive request for age %s, timeout %s", age, timeout) return queue.AutoArchiveJobs(age, timeout) elif method == luxi.REQ_WAIT_FOR_JOB_CHANGE: (job_id, fields, prev_job_info, prev_log_serial, timeout) = args logging.info("Received job poll request for %s", job_id) return queue.WaitForJobChanges(job_id, fields, prev_job_info, prev_log_serial, timeout) elif method == luxi.REQ_QUERY: (what, fields, qfilter) = args if what in constants.QR_VIA_OP: result = self._Query(opcodes.OpQuery(what=what, fields=fields, qfilter=qfilter)) elif what == constants.QR_LOCK: if qfilter is not None: raise errors.OpPrereqError("Lock queries can't be filtered", errors.ECODE_INVAL) return context.glm.QueryLocks(fields) elif what == constants.QR_JOB: return queue.QueryJobs(fields, qfilter) elif what in constants.QR_VIA_LUXI: raise NotImplementedError else: raise errors.OpPrereqError("Resource type '%s' unknown" % what, errors.ECODE_INVAL) return result elif method == luxi.REQ_QUERY_FIELDS: (what, fields) = args req = objects.QueryFieldsRequest(what=what, fields=fields) try: fielddefs = query.ALL_FIELDS[req.what] except KeyError: raise errors.OpPrereqError("Resource type '%s' unknown" % req.what, errors.ECODE_INVAL) return query.QueryFields(fielddefs, req.fields) elif method == luxi.REQ_QUERY_JOBS: (job_ids, fields) = args if isinstance(job_ids, (tuple, list)) and job_ids: msg = utils.CommaJoin(job_ids) else: msg = str(job_ids) logging.info("Received job query request for %s", msg) return queue.OldStyleQueryJobs(job_ids, fields) elif method == luxi.REQ_QUERY_INSTANCES: (names, fields, use_locking) = args logging.info("Received instance query request for %s", names) if use_locking: raise errors.OpPrereqError("Sync queries are not allowed", errors.ECODE_INVAL) op = opcodes.OpInstanceQuery(names=names, output_fields=fields, use_locking=use_locking) return self._Query(op) elif method == luxi.REQ_QUERY_NODES: (names, fields, use_locking) = args logging.info("Received node query request for %s", names) if use_locking: raise errors.OpPrereqError("Sync queries are not allowed", errors.ECODE_INVAL) op = opcodes.OpNodeQuery(names=names, output_fields=fields, use_locking=use_locking) return self._Query(op) elif method == luxi.REQ_QUERY_GROUPS: (names, fields, use_locking) = args logging.info("Received group query request for %s", names) if use_locking: raise errors.OpPrereqError("Sync queries are not allowed", errors.ECODE_INVAL) op = opcodes.OpGroupQuery(names=names, output_fields=fields) return self._Query(op) elif method == luxi.REQ_QUERY_NETWORKS: (names, fields, use_locking) = args logging.info("Received network query request for %s", names) if use_locking: raise errors.OpPrereqError("Sync queries are not allowed", errors.ECODE_INVAL) op = opcodes.OpNetworkQuery(names=names, output_fields=fields) return self._Query(op) elif method == luxi.REQ_QUERY_EXPORTS: (nodes, use_locking) = args if use_locking: raise errors.OpPrereqError("Sync queries are not allowed", errors.ECODE_INVAL) logging.info("Received exports query request") op = opcodes.OpBackupQuery(nodes=nodes, use_locking=use_locking) return self._Query(op) elif method == luxi.REQ_QUERY_CONFIG_VALUES: (fields, ) = args logging.info("Received config values query request for %s", fields) op = opcodes.OpClusterConfigQuery(output_fields=fields) return self._Query(op) elif method == luxi.REQ_QUERY_CLUSTER_INFO: logging.info("Received cluster info query request") op = opcodes.OpClusterQuery() return self._Query(op) elif method == luxi.REQ_QUERY_TAGS: (kind, name) = args logging.info("Received tags query request") op = opcodes.OpTagsGet(kind=kind, name=name, use_locking=False) return self._Query(op) elif method == luxi.REQ_SET_DRAIN_FLAG: (drain_flag, ) = args logging.info("Received queue drain flag change request to %s", drain_flag) return queue.SetDrainFlag(drain_flag) elif method == luxi.REQ_SET_WATCHER_PAUSE: (until, ) = args return _SetWatcherPause(context, until) else: logging.critical("Request '%s' in luxi.REQ_ALL, but not known", method) raise errors.ProgrammerError("Operation '%s' in luxi.REQ_ALL," " but not implemented" % method) def _Query(self, op): """Runs the specified opcode and returns the result. """ # Queries don't have a job id proc = mcpu.Processor(self.server.context, None, enable_locks=False) # TODO: Executing an opcode using locks will acquire them in blocking mode. # Consider using a timeout for retries. return proc.ExecOpCode(op, None) class GanetiContext(object): """Context common to all ganeti threads. This class creates and holds common objects shared by all threads. """ # pylint: disable=W0212 # we do want to ensure a singleton here _instance = None def __init__(self): """Constructs a new GanetiContext object. There should be only a GanetiContext object at any time, so this function raises an error if this is not the case. """ assert self.__class__._instance is None, "double GanetiContext instance" # Create global configuration object self.cfg = config.ConfigWriter() # Locking manager self.glm = locking.GanetiLockManager( self.cfg.GetNodeList(), self.cfg.GetNodeGroupList(), [inst.name for inst in self.cfg.GetAllInstancesInfo().values()], self.cfg.GetNetworkList()) self.cfg.SetContext(self) # RPC runner self.rpc = rpc.RpcRunner(self.cfg, self.glm.AddToLockMonitor) # Job queue self.jobqueue = jqueue.JobQueue(self) # setting this also locks the class against attribute modifications self.__class__._instance = self def __setattr__(self, name, value): """Setting GanetiContext attributes is forbidden after initialization. """ assert self.__class__._instance is None, "Attempt to modify Ganeti Context" object.__setattr__(self, name, value) def AddNode(self, node, ec_id): """Adds a node to the configuration and lock manager. """ # Add it to the configuration self.cfg.AddNode(node, ec_id) # If preseeding fails it'll not be added self.jobqueue.AddNode(node) # Add the new node to the Ganeti Lock Manager self.glm.add(locking.LEVEL_NODE, node.uuid) self.glm.add(locking.LEVEL_NODE_RES, node.uuid) def ReaddNode(self, node): """Updates a node that's already in the configuration """ # Synchronize the queue again self.jobqueue.AddNode(node) def RemoveNode(self, node): """Removes a node from the configuration and lock manager. """ # Remove node from configuration self.cfg.RemoveNode(node.uuid) # Notify job queue self.jobqueue.RemoveNode(node.name) # Remove the node from the Ganeti Lock Manager self.glm.remove(locking.LEVEL_NODE, node.uuid) self.glm.remove(locking.LEVEL_NODE_RES, node.uuid) def _SetWatcherPause(context, until): """Creates or removes the watcher pause file. @type context: L{GanetiContext} @param context: Global Ganeti context @type until: None or int @param until: Unix timestamp saying until when the watcher shouldn't run """ node_names = context.cfg.GetNodeList() if until is None: logging.info("Received request to no longer pause watcher") else: if not ht.TNumber(until): raise TypeError("Duration must be numeric") if until < time.time(): raise errors.GenericError("Unable to set pause end time in the past") logging.info("Received request to pause watcher until %s", until) result = context.rpc.call_set_watcher_pause(node_names, until) errmsg = utils.CommaJoin("%s (%s)" % (node_name, nres.fail_msg) for (node_name, nres) in result.items() if nres.fail_msg and not nres.offline) if errmsg: raise errors.OpExecError("Watcher pause was set where possible, but failed" " on the following node(s): %s" % errmsg) return until @rpc.RunWithRPC def CheckAgreement(): """Check the agreement on who is the master. The function uses a very simple algorithm: we must get more positive than negative answers. Since in most of the cases we are the master, we'll use our own config file for getting the node list. In the future we could collect the current node list from our (possibly obsolete) known nodes. In order to account for cold-start of all nodes, we retry for up to a minute until we get a real answer as the top-voted one. If the nodes are more out-of-sync, for now manual startup of the master should be attempted. Note that for a even number of nodes cluster, we need at least half of the nodes (beside ourselves) to vote for us. This creates a problem on two-node clusters, since in this case we require the other node to be up too to confirm our status. """ myself = netutils.Hostname.GetSysName() #temp instantiation of a config writer, used only to get the node list cfg = config.ConfigWriter() node_names = cfg.GetNodeNames(cfg.GetNodeList()) del cfg retries = 6 while retries > 0: votes = bootstrap.GatherMasterVotes(node_names) if not votes: # empty node list, this is a one node cluster return True if votes[0][0] is None: retries -= 1 time.sleep(10) continue break if retries == 0: logging.critical("Cluster inconsistent, most of the nodes didn't answer" " after multiple retries. Aborting startup") logging.critical("Use the --no-voting option if you understand what" " effects it has on the cluster state") return False # here a real node is at the top of the list all_votes = sum(item[1] for item in votes) top_node, top_votes = votes[0] result = False if top_node != myself: logging.critical("It seems we are not the master (top-voted node" " is %s with %d out of %d votes)", top_node, top_votes, all_votes) elif top_votes < all_votes - top_votes: logging.critical("It seems we are not the master (%d votes for," " %d votes against)", top_votes, all_votes - top_votes) else: result = True return result @rpc.RunWithRPC def ActivateMasterIP(): # activate ip cfg = config.ConfigWriter() master_params = cfg.GetMasterNetworkParameters() ems = cfg.GetUseExternalMipScript() runner = rpc.BootstrapRunner() # we use the node name, as the configuration is only available here yet result = runner.call_node_activate_master_ip( cfg.GetNodeName(master_params.uuid), master_params, ems) msg = result.fail_msg if msg: logging.error("Can't activate master IP address: %s", msg) def CheckMasterd(options, args): """Initial checks whether to run or exit with a failure. """ if args: # masterd doesn't take any arguments print >> sys.stderr, ("Usage: %s [-f] [-d]" % sys.argv[0]) sys.exit(constants.EXIT_FAILURE) ssconf.CheckMaster(options.debug) try: options.uid = pwd.getpwnam(constants.MASTERD_USER).pw_uid options.gid = grp.getgrnam(constants.DAEMONS_GROUP).gr_gid except KeyError: print >> sys.stderr, ("User or group not existing on system: %s:%s" % (constants.MASTERD_USER, constants.DAEMONS_GROUP)) sys.exit(constants.EXIT_FAILURE) # Determine static runtime architecture information runtime.InitArchInfo() # Check the configuration is sane before anything else try: config.ConfigWriter() except errors.ConfigVersionMismatch, err: v1 = "%s.%s.%s" % constants.SplitVersion(err.args[0]) v2 = "%s.%s.%s" % constants.SplitVersion(err.args[1]) print >> sys.stderr, \ ("Configuration version mismatch. The current Ganeti software" " expects version %s, but the on-disk configuration file has" " version %s. This is likely the result of upgrading the" " software without running the upgrade procedure. Please contact" " your cluster administrator or complete the upgrade using the" " cfgupgrade utility, after reading the upgrade notes." % (v1, v2)) sys.exit(constants.EXIT_FAILURE) except errors.ConfigurationError, err: print >> sys.stderr, \ ("Configuration error while opening the configuration file: %s\n" "This might be caused by an incomplete software upgrade or" " by a corrupted configuration file. Until the problem is fixed" " the master daemon cannot start." % str(err)) sys.exit(constants.EXIT_FAILURE) # If CheckMaster didn't fail we believe we are the master, but we have to # confirm with the other nodes. if options.no_voting: if not options.yes_do_it: sys.stdout.write("The 'no voting' option has been selected.\n") sys.stdout.write("This is dangerous, please confirm by" " typing uppercase 'yes': ") sys.stdout.flush() confirmation = sys.stdin.readline().strip() if confirmation != "YES": print >> sys.stderr, "Aborting." sys.exit(constants.EXIT_FAILURE) else: # CheckAgreement uses RPC and threads, hence it needs to be run in # a separate process before we call utils.Daemonize in the current # process. if not utils.RunInSeparateProcess(CheckAgreement): sys.exit(constants.EXIT_FAILURE) # ActivateMasterIP also uses RPC/threads, so we run it again via a # separate process. # TODO: decide whether failure to activate the master IP is a fatal error utils.RunInSeparateProcess(ActivateMasterIP) def PrepMasterd(options, _): """Prep master daemon function, executed with the PID file held. """ # This is safe to do as the pid file guarantees against # concurrent execution. utils.RemoveFile(pathutils.MASTER_SOCKET) mainloop = daemon.Mainloop() master = MasterServer(pathutils.MASTER_SOCKET, options.uid, options.gid) return (mainloop, master) def ExecMasterd(options, args, prep_data): # pylint: disable=W0613 """Main master daemon function, executed with the PID file held. """ (mainloop, master) = prep_data try: rpc.Init() try: master.setup_queue() try: mainloop.Run(shutdown_wait_fn=master.WaitForShutdown) finally: master.server_cleanup() finally: rpc.Shutdown() finally: utils.RemoveFile(pathutils.MASTER_SOCKET) logging.info("Clean master daemon shutdown") def Main(): """Main function""" parser = OptionParser(description="Ganeti master daemon", usage="%prog [-f] [-d]", version="%%prog (ganeti) %s" % constants.RELEASE_VERSION) parser.add_option("--no-voting", dest="no_voting", help="Do not check that the nodes agree on this node" " being the master and start the daemon unconditionally", default=False, action="store_true") parser.add_option("--yes-do-it", dest="yes_do_it", help="Override interactive check for --no-voting", default=False, action="store_true") daemon.GenericMain(constants.MASTERD, parser, CheckMasterd, PrepMasterd, ExecMasterd, multithreaded=True) ganeti-2.9.3/lib/server/rapi.py0000644000000000000000000002526112271422343016361 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Ganeti Remote API master script. """ # pylint: disable=C0103,W0142 # C0103: Invalid name ganeti-watcher import logging import optparse import sys import os import os.path import errno try: from pyinotify import pyinotify # pylint: disable=E0611 except ImportError: import pyinotify from ganeti import asyncnotifier from ganeti import constants from ganeti import http from ganeti import daemon from ganeti import ssconf from ganeti import luxi from ganeti import serializer from ganeti import compat from ganeti import utils from ganeti import pathutils from ganeti.rapi import connector from ganeti.rapi import baserlib import ganeti.http.auth # pylint: disable=W0611 import ganeti.http.server class RemoteApiRequestContext(object): """Data structure for Remote API requests. """ def __init__(self): self.handler = None self.handler_fn = None self.handler_access = None self.body_data = None class RemoteApiHandler(http.auth.HttpServerRequestAuthentication, http.server.HttpServerHandler): """REST Request Handler Class. """ AUTH_REALM = "Ganeti Remote API" def __init__(self, user_fn, reqauth, _client_cls=None): """Initializes this class. @type user_fn: callable @param user_fn: Function receiving username as string and returning L{http.auth.PasswordFileUser} or C{None} if user is not found @type reqauth: bool @param reqauth: Whether to require authentication """ # pylint: disable=W0233 # it seems pylint doesn't see the second parent class there http.server.HttpServerHandler.__init__(self) http.auth.HttpServerRequestAuthentication.__init__(self) self._client_cls = _client_cls self._resmap = connector.Mapper() self._user_fn = user_fn self._reqauth = reqauth @staticmethod def FormatErrorMessage(values): """Formats the body of an error message. @type values: dict @param values: dictionary with keys C{code}, C{message} and C{explain}. @rtype: tuple; (string, string) @return: Content-type and response body """ return (http.HTTP_APP_JSON, serializer.DumpJson(values)) def _GetRequestContext(self, req): """Returns the context for a request. The context is cached in the req.private variable. """ if req.private is None: (HandlerClass, items, args) = \ self._resmap.getController(req.request_path) ctx = RemoteApiRequestContext() ctx.handler = HandlerClass(items, args, req, _client_cls=self._client_cls) method = req.request_method.upper() try: ctx.handler_fn = getattr(ctx.handler, method) except AttributeError: raise http.HttpNotImplemented("Method %s is unsupported for path %s" % (method, req.request_path)) ctx.handler_access = baserlib.GetHandlerAccess(ctx.handler, method) # Require permissions definition (usually in the base class) if ctx.handler_access is None: raise AssertionError("Permissions definition missing") # This is only made available in HandleRequest ctx.body_data = None req.private = ctx # Check for expected attributes assert req.private.handler assert req.private.handler_fn assert req.private.handler_access is not None return req.private def AuthenticationRequired(self, req): """Determine whether authentication is required. """ return self._reqauth or bool(self._GetRequestContext(req).handler_access) def Authenticate(self, req, username, password): """Checks whether a user can access a resource. """ ctx = self._GetRequestContext(req) user = self._user_fn(username) if not (user and self.VerifyBasicAuthPassword(req, username, password, user.password)): # Unknown user or password wrong return False if (not ctx.handler_access or set(user.options).intersection(ctx.handler_access)): # Allow access return True # Access forbidden raise http.HttpForbidden() def HandleRequest(self, req): """Handles a request. """ ctx = self._GetRequestContext(req) # Deserialize request parameters if req.request_body: # RFC2616, 7.2.1: Any HTTP/1.1 message containing an entity-body SHOULD # include a Content-Type header field defining the media type of that # body. [...] If the media type remains unknown, the recipient SHOULD # treat it as type "application/octet-stream". req_content_type = req.request_headers.get(http.HTTP_CONTENT_TYPE, http.HTTP_APP_OCTET_STREAM) if req_content_type.lower() != http.HTTP_APP_JSON.lower(): raise http.HttpUnsupportedMediaType() try: ctx.body_data = serializer.LoadJson(req.request_body) except Exception: raise http.HttpBadRequest(message="Unable to parse JSON data") else: ctx.body_data = None try: result = ctx.handler_fn() except luxi.TimeoutError: raise http.HttpGatewayTimeout() except luxi.ProtocolError, err: raise http.HttpBadGateway(str(err)) req.resp_headers[http.HTTP_CONTENT_TYPE] = http.HTTP_APP_JSON return serializer.DumpJson(result) class RapiUsers: def __init__(self): """Initializes this class. """ self._users = None def Get(self, username): """Checks whether a user exists. """ if self._users: return self._users.get(username, None) else: return None def Load(self, filename): """Loads a file containing users and passwords. @type filename: string @param filename: Path to file """ logging.info("Reading users file at %s", filename) try: try: contents = utils.ReadFile(filename) except EnvironmentError, err: self._users = None if err.errno == errno.ENOENT: logging.warning("No users file at %s", filename) else: logging.warning("Error while reading %s: %s", filename, err) return False users = http.auth.ParsePasswordFile(contents) except Exception, err: # pylint: disable=W0703 # We don't care about the type of exception logging.error("Error while parsing %s: %s", filename, err) return False self._users = users return True class FileEventHandler(asyncnotifier.FileEventHandlerBase): def __init__(self, wm, path, cb): """Initializes this class. @param wm: Inotify watch manager @type path: string @param path: File path @type cb: callable @param cb: Function called on file change """ asyncnotifier.FileEventHandlerBase.__init__(self, wm) self._cb = cb self._filename = os.path.basename(path) # Different Pyinotify versions have the flag constants at different places, # hence not accessing them directly mask = (pyinotify.EventsCodes.ALL_FLAGS["IN_CLOSE_WRITE"] | pyinotify.EventsCodes.ALL_FLAGS["IN_DELETE"] | pyinotify.EventsCodes.ALL_FLAGS["IN_MOVED_FROM"] | pyinotify.EventsCodes.ALL_FLAGS["IN_MOVED_TO"]) self._handle = self.AddWatch(os.path.dirname(path), mask) def process_default(self, event): """Called upon inotify event. """ if event.name == self._filename: logging.debug("Received inotify event %s", event) self._cb() def SetupFileWatcher(filename, cb): """Configures an inotify watcher for a file. @type filename: string @param filename: File to watch @type cb: callable @param cb: Function called on file change """ wm = pyinotify.WatchManager() handler = FileEventHandler(wm, filename, cb) asyncnotifier.AsyncNotifier(wm, default_proc_fun=handler) def CheckRapi(options, args): """Initial checks whether to run or exit with a failure. """ if args: # rapi doesn't take any arguments print >> sys.stderr, ("Usage: %s [-f] [-d] [-p port] [-b ADDRESS]" % sys.argv[0]) sys.exit(constants.EXIT_FAILURE) ssconf.CheckMaster(options.debug) # Read SSL certificate (this is a little hackish to read the cert as root) if options.ssl: options.ssl_params = http.HttpSslParams(ssl_key_path=options.ssl_key, ssl_cert_path=options.ssl_cert) else: options.ssl_params = None def PrepRapi(options, _): """Prep remote API function, executed with the PID file held. """ mainloop = daemon.Mainloop() users = RapiUsers() handler = RemoteApiHandler(users.Get, options.reqauth) # Setup file watcher (it'll be driven by asyncore) SetupFileWatcher(pathutils.RAPI_USERS_FILE, compat.partial(users.Load, pathutils.RAPI_USERS_FILE)) users.Load(pathutils.RAPI_USERS_FILE) server = \ http.server.HttpServer(mainloop, options.bind_address, options.port, handler, ssl_params=options.ssl_params, ssl_verify_peer=False) server.Start() return (mainloop, server) def ExecRapi(options, args, prep_data): # pylint: disable=W0613 """Main remote API function, executed with the PID file held. """ (mainloop, server) = prep_data try: mainloop.Run() finally: server.Stop() def Main(): """Main function. """ parser = optparse.OptionParser(description="Ganeti Remote API", usage=("%prog [-f] [-d] [-p port] [-b ADDRESS]" " [-i INTERFACE]"), version="%%prog (ganeti) %s" % constants.RELEASE_VERSION) parser.add_option("--require-authentication", dest="reqauth", default=False, action="store_true", help=("Disable anonymous HTTP requests and require" " authentication")) daemon.GenericMain(constants.RAPI, parser, CheckRapi, PrepRapi, ExecRapi, default_ssl_cert=pathutils.RAPI_CERT_FILE, default_ssl_key=pathutils.RAPI_CERT_FILE) ganeti-2.9.3/lib/server/__init__.py0000644000000000000000000000143012230001635017145 0ustar00rootroot00000000000000# # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Empty file for package definition. """ ganeti-2.9.3/lib/storage/0000755000000000000000000000000012271445544015214 5ustar00rootroot00000000000000ganeti-2.9.3/lib/storage/base.py0000644000000000000000000003060012271422343016467 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Block device abstraction - base class and utility functions""" import logging from ganeti import objects from ganeti import constants from ganeti import utils from ganeti import errors class BlockDev(object): """Block device abstract class. A block device can be in the following states: - not existing on the system, and by `Create()` it goes into: - existing but not setup/not active, and by `Assemble()` goes into: - active read-write and by `Open()` it goes into - online (=used, or ready for use) A device can also be online but read-only, however we are not using the readonly state (LV has it, if needed in the future) and we are usually looking at this like at a stack, so it's easier to conceptualise the transition from not-existing to online and back like a linear one. The many different states of the device are due to the fact that we need to cover many device types: - logical volumes are created, lvchange -a y $lv, and used - drbd devices are attached to a local disk/remote peer and made primary A block device is identified by three items: - the /dev path of the device (dynamic) - a unique ID of the device (static) - it's major/minor pair (dynamic) Not all devices implement both the first two as distinct items. LVM logical volumes have their unique ID (the pair volume group, logical volume name) in a 1-to-1 relation to the dev path. For DRBD devices, the /dev path is again dynamic and the unique id is the pair (host1, dev1), (host2, dev2). You can get to a device in two ways: - creating the (real) device, which returns you an attached instance (lvcreate) - attaching of a python instance to an existing (real) device The second point, the attachment to a device, is different depending on whether the device is assembled or not. At init() time, we search for a device with the same unique_id as us. If found, good. It also means that the device is already assembled. If not, after assembly we'll have our correct major/minor. """ def __init__(self, unique_id, children, size, params): self._children = children self.dev_path = None self.unique_id = unique_id self.major = None self.minor = None self.attached = False self.size = size self.params = params def Assemble(self): """Assemble the device from its components. Implementations of this method by child classes must ensure that: - after the device has been assembled, it knows its major/minor numbers; this allows other devices (usually parents) to probe correctly for their children - calling this method on an existing, in-use device is safe - if the device is already configured (and in an OK state), this method is idempotent """ pass def Attach(self): """Find a device which matches our config and attach to it. """ raise NotImplementedError def Close(self): """Notifies that the device will no longer be used for I/O. """ raise NotImplementedError @classmethod def Create(cls, unique_id, children, size, spindles, params, excl_stor): """Create the device. If the device cannot be created, it will return None instead. Error messages go to the logging system. Note that for some devices, the unique_id is used, and for other, the children. The idea is that these two, taken together, are enough for both creation and assembly (later). @type unique_id: 2-element tuple or list @param unique_id: unique identifier; the details depend on the actual device type @type children: list of L{BlockDev} @param children: for hierarchical devices, the child devices @type size: float @param size: size in MiB @type spindles: int @param spindles: number of physical disk to dedicate to the device @type params: dict @param params: device-specific options/parameters @type excl_stor: bool @param excl_stor: whether exclusive_storage is active @rtype: L{BlockDev} @return: the created device, or C{None} in case of an error """ raise NotImplementedError def Remove(self): """Remove this device. This makes sense only for some of the device types: LV and file storage. Also note that if the device can't attach, the removal can't be completed. """ raise NotImplementedError def Rename(self, new_id): """Rename this device. This may or may not make sense for a given device type. """ raise NotImplementedError def Open(self, force=False): """Make the device ready for use. This makes the device ready for I/O. For now, just the DRBD devices need this. The force parameter signifies that if the device has any kind of --force thing, it should be used, we know what we are doing. @type force: boolean """ raise NotImplementedError def Shutdown(self): """Shut down the device, freeing its children. This undoes the `Assemble()` work, except for the child assembling; as such, the children on the device are still assembled after this call. """ raise NotImplementedError def SetSyncParams(self, params): """Adjust the synchronization parameters of the mirror. In case this is not a mirroring device, this is no-op. @param params: dictionary of LD level disk parameters related to the synchronization. @rtype: list @return: a list of error messages, emitted both by the current node and by children. An empty list means no errors. """ result = [] if self._children: for child in self._children: result.extend(child.SetSyncParams(params)) return result def PauseResumeSync(self, pause): """Pause/Resume the sync of the mirror. In case this is not a mirroring device, this is no-op. @type pause: boolean @param pause: Whether to pause or resume """ result = True if self._children: for child in self._children: result = result and child.PauseResumeSync(pause) return result def GetSyncStatus(self): """Returns the sync status of the device. If this device is a mirroring device, this function returns the status of the mirror. If sync_percent is None, it means the device is not syncing. If estimated_time is None, it means we can't estimate the time needed, otherwise it's the time left in seconds. If is_degraded is True, it means the device is missing redundancy. This is usually a sign that something went wrong in the device setup, if sync_percent is None. The ldisk parameter represents the degradation of the local data. This is only valid for some devices, the rest will always return False (not degraded). @rtype: objects.BlockDevStatus """ return objects.BlockDevStatus(dev_path=self.dev_path, major=self.major, minor=self.minor, sync_percent=None, estimated_time=None, is_degraded=False, ldisk_status=constants.LDS_OKAY) def CombinedSyncStatus(self): """Calculate the mirror status recursively for our children. The return value is the same as for `GetSyncStatus()` except the minimum percent and maximum time are calculated across our children. @rtype: objects.BlockDevStatus """ status = self.GetSyncStatus() min_percent = status.sync_percent max_time = status.estimated_time is_degraded = status.is_degraded ldisk_status = status.ldisk_status if self._children: for child in self._children: child_status = child.GetSyncStatus() if min_percent is None: min_percent = child_status.sync_percent elif child_status.sync_percent is not None: min_percent = min(min_percent, child_status.sync_percent) if max_time is None: max_time = child_status.estimated_time elif child_status.estimated_time is not None: max_time = max(max_time, child_status.estimated_time) is_degraded = is_degraded or child_status.is_degraded if ldisk_status is None: ldisk_status = child_status.ldisk_status elif child_status.ldisk_status is not None: ldisk_status = max(ldisk_status, child_status.ldisk_status) return objects.BlockDevStatus(dev_path=self.dev_path, major=self.major, minor=self.minor, sync_percent=min_percent, estimated_time=max_time, is_degraded=is_degraded, ldisk_status=ldisk_status) def SetInfo(self, text): """Update metadata with info text. Only supported for some device types. """ for child in self._children: child.SetInfo(text) def Grow(self, amount, dryrun, backingstore, excl_stor): """Grow the block device. @type amount: integer @param amount: the amount (in mebibytes) to grow with @type dryrun: boolean @param dryrun: whether to execute the operation in simulation mode only, without actually increasing the size @param backingstore: whether to execute the operation on backing storage only, or on "logical" storage only; e.g. DRBD is logical storage, whereas LVM, file, RBD are backing storage @type excl_stor: boolean @param excl_stor: Whether exclusive_storage is active """ raise NotImplementedError def GetActualSize(self): """Return the actual disk size. @note: the device needs to be active when this is called """ assert self.attached, "BlockDevice not attached in GetActualSize()" result = utils.RunCmd(["blockdev", "--getsize64", self.dev_path]) if result.failed: ThrowError("blockdev failed (%s): %s", result.fail_reason, result.output) try: sz = int(result.output.strip()) except (ValueError, TypeError), err: ThrowError("Failed to parse blockdev output: %s", str(err)) return sz def GetActualSpindles(self): """Return the actual number of spindles used. This is not supported by all devices; if not supported, C{None} is returned. @note: the device needs to be active when this is called """ assert self.attached, "BlockDevice not attached in GetActualSpindles()" return None def GetActualDimensions(self): """Return the actual disk size and number of spindles used. @rtype: tuple @return: (size, spindles); spindles is C{None} when they are not supported @note: the device needs to be active when this is called """ return (self.GetActualSize(), self.GetActualSpindles()) def __repr__(self): return ("<%s: unique_id: %s, children: %s, %s:%s, %s>" % (self.__class__, self.unique_id, self._children, self.major, self.minor, self.dev_path)) def ThrowError(msg, *args): """Log an error to the node daemon and the raise an exception. @type msg: string @param msg: the text of the exception @raise errors.BlockDeviceError """ if args: msg = msg % args logging.error(msg) raise errors.BlockDeviceError(msg) def IgnoreError(fn, *args, **kwargs): """Executes the given function, ignoring BlockDeviceErrors. This is used in order to simplify the execution of cleanup or rollback functions. @rtype: boolean @return: True when fn didn't raise an exception, False otherwise """ try: fn(*args, **kwargs) return True except errors.BlockDeviceError, err: logging.warning("Caught BlockDeviceError but ignoring: %s", str(err)) return False ganeti-2.9.3/lib/storage/drbd.py0000644000000000000000000010523712271422343016501 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """DRBD block device related functionality""" import errno import logging import time from ganeti import constants from ganeti import utils from ganeti import errors from ganeti import netutils from ganeti import objects from ganeti.storage import base from ganeti.storage.drbd_info import DRBD8Info from ganeti.storage import drbd_info from ganeti.storage import drbd_cmdgen # Size of reads in _CanReadDevice _DEVICE_READ_SIZE = 128 * 1024 class DRBD8(object): """Various methods to deals with the DRBD system as a whole. This class provides a set of methods to deal with the DRBD installation on the node or with uninitialized devices as opposed to a DRBD device. """ _USERMODE_HELPER_FILE = "/sys/module/drbd/parameters/usermode_helper" _MAX_MINORS = 255 @staticmethod def GetUsermodeHelper(filename=_USERMODE_HELPER_FILE): """Returns DRBD usermode_helper currently set. @type filename: string @param filename: the filename to read the usermode helper from @rtype: string @return: the currently configured DRBD usermode helper """ try: helper = utils.ReadFile(filename).splitlines()[0] except EnvironmentError, err: if err.errno == errno.ENOENT: base.ThrowError("The file %s cannot be opened, check if the module" " is loaded (%s)", filename, str(err)) else: base.ThrowError("Can't read DRBD helper file %s: %s", filename, str(err)) if not helper: base.ThrowError("Can't read any data from %s", filename) return helper @staticmethod def GetProcInfo(): """Reads and parses information from /proc/drbd. @rtype: DRBD8Info @return: a L{DRBD8Info} instance containing the current /proc/drbd info """ return DRBD8Info.CreateFromFile() @staticmethod def GetUsedDevs(): """Compute the list of used DRBD minors. @rtype: list of ints """ info = DRBD8.GetProcInfo() return filter(lambda m: not info.GetMinorStatus(m).is_unconfigured, info.GetMinors()) @staticmethod def FindUnusedMinor(): """Find an unused DRBD device. This is specific to 8.x as the minors are allocated dynamically, so non-existing numbers up to a max minor count are actually free. @rtype: int """ highest = None info = DRBD8.GetProcInfo() for minor in info.GetMinors(): status = info.GetMinorStatus(minor) if not status.is_in_use: return minor highest = max(highest, minor) if highest is None: # there are no minors in use at all return 0 if highest >= DRBD8._MAX_MINORS: logging.error("Error: no free drbd minors!") raise errors.BlockDeviceError("Can't find a free DRBD minor") return highest + 1 @staticmethod def GetCmdGenerator(info): """Creates a suitable L{BaseDRBDCmdGenerator} based on the given info. @type info: DRBD8Info @rtype: BaseDRBDCmdGenerator """ version = info.GetVersion() if version["k_minor"] <= 3: return drbd_cmdgen.DRBD83CmdGenerator(version) else: return drbd_cmdgen.DRBD84CmdGenerator(version) @staticmethod def ShutdownAll(minor): """Deactivate the device. This will, of course, fail if the device is in use. @type minor: int @param minor: the minor to shut down """ info = DRBD8.GetProcInfo() cmd_gen = DRBD8.GetCmdGenerator(info) cmd = cmd_gen.GenDownCmd(minor) result = utils.RunCmd(cmd) if result.failed: base.ThrowError("drbd%d: can't shutdown drbd device: %s", minor, result.output) class DRBD8Dev(base.BlockDev): """DRBD v8.x block device. This implements the local host part of the DRBD device, i.e. it doesn't do anything to the supposed peer. If you need a fully connected DRBD pair, you need to use this class on both hosts. The unique_id for the drbd device is a (local_ip, local_port, remote_ip, remote_port, local_minor, secret) tuple, and it must have two children: the data device and the meta_device. The meta device is checked for valid size and is zeroed on create. """ _DRBD_MAJOR = 147 # timeout constants _NET_RECONFIG_TIMEOUT = 60 def __init__(self, unique_id, children, size, params): if children and children.count(None) > 0: children = [] if len(children) not in (0, 2): raise ValueError("Invalid configuration data %s" % str(children)) if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 6: raise ValueError("Invalid configuration data %s" % str(unique_id)) (self._lhost, self._lport, self._rhost, self._rport, self._aminor, self._secret) = unique_id if children: if not _CanReadDevice(children[1].dev_path): logging.info("drbd%s: Ignoring unreadable meta device", self._aminor) children = [] super(DRBD8Dev, self).__init__(unique_id, children, size, params) self.major = self._DRBD_MAJOR info = DRBD8.GetProcInfo() version = info.GetVersion() if version["k_major"] != 8: base.ThrowError("Mismatch in DRBD kernel version and requested ganeti" " usage: kernel is %s.%s, ganeti wants 8.x", version["k_major"], version["k_minor"]) if version["k_minor"] <= 3: self._show_info_cls = drbd_info.DRBD83ShowInfo else: self._show_info_cls = drbd_info.DRBD84ShowInfo self._cmd_gen = DRBD8.GetCmdGenerator(info) if (self._lhost is not None and self._lhost == self._rhost and self._lport == self._rport): raise ValueError("Invalid configuration data, same local/remote %s" % (unique_id,)) self.Attach() @staticmethod def _DevPath(minor): """Return the path to a drbd device for a given minor. @type minor: int @rtype: string """ return "/dev/drbd%d" % minor def _SetFromMinor(self, minor): """Set our parameters based on the given minor. This sets our minor variable and our dev_path. @type minor: int """ if minor is None: self.minor = self.dev_path = None self.attached = False else: self.minor = minor self.dev_path = self._DevPath(minor) self.attached = True @staticmethod def _CheckMetaSize(meta_device): """Check if the given meta device looks like a valid one. This currently only checks the size, which must be around 128MiB. @type meta_device: string @param meta_device: the path to the device to check """ result = utils.RunCmd(["blockdev", "--getsize", meta_device]) if result.failed: base.ThrowError("Failed to get device size: %s - %s", result.fail_reason, result.output) try: sectors = int(result.stdout) except (TypeError, ValueError): base.ThrowError("Invalid output from blockdev: '%s'", result.stdout) num_bytes = sectors * 512 if num_bytes < 128 * 1024 * 1024: # less than 128MiB base.ThrowError("Meta device too small (%.2fMib)", (num_bytes / 1024 / 1024)) # the maximum *valid* size of the meta device when living on top # of LVM is hard to compute: it depends on the number of stripes # and the PE size; e.g. a 2-stripe, 64MB PE will result in a 128MB # (normal size), but an eight-stripe 128MB PE will result in a 1GB # size meta device; as such, we restrict it to 1GB (a little bit # too generous, but making assumptions about PE size is hard) if num_bytes > 1024 * 1024 * 1024: base.ThrowError("Meta device too big (%.2fMiB)", (num_bytes / 1024 / 1024)) def _GetShowData(self, minor): """Return the `drbdsetup show` data. @type minor: int @param minor: the minor to collect show output for @rtype: string """ result = utils.RunCmd(self._cmd_gen.GenShowCmd(minor)) if result.failed: logging.error("Can't display the drbd config: %s - %s", result.fail_reason, result.output) return None return result.stdout def _GetShowInfo(self, minor): """Return parsed information from `drbdsetup show`. @type minor: int @param minor: the minor to return information for @rtype: dict as described in L{drbd_info.BaseShowInfo.GetDevInfo} """ return self._show_info_cls.GetDevInfo(self._GetShowData(minor)) def _MatchesLocal(self, info): """Test if our local config matches with an existing device. The parameter should be as returned from `_GetShowInfo()`. This method tests if our local backing device is the same as the one in the info parameter, in effect testing if we look like the given device. @type info: dict as described in L{drbd_info.BaseShowInfo.GetDevInfo} @rtype: boolean """ if self._children: backend, meta = self._children else: backend = meta = None if backend is not None: retval = ("local_dev" in info and info["local_dev"] == backend.dev_path) else: retval = ("local_dev" not in info) if meta is not None: retval = retval and ("meta_dev" in info and info["meta_dev"] == meta.dev_path) if "meta_index" in info: retval = retval and info["meta_index"] == 0 else: retval = retval and ("meta_dev" not in info and "meta_index" not in info) return retval def _MatchesNet(self, info): """Test if our network config matches with an existing device. The parameter should be as returned from `_GetShowInfo()`. This method tests if our network configuration is the same as the one in the info parameter, in effect testing if we look like the given device. @type info: dict as described in L{drbd_info.BaseShowInfo.GetDevInfo} @rtype: boolean """ if (((self._lhost is None and not ("local_addr" in info)) and (self._rhost is None and not ("remote_addr" in info)))): return True if self._lhost is None: return False if not ("local_addr" in info and "remote_addr" in info): return False retval = (info["local_addr"] == (self._lhost, self._lport)) retval = (retval and info["remote_addr"] == (self._rhost, self._rport)) return retval def _AssembleLocal(self, minor, backend, meta, size): """Configure the local part of a DRBD device. @type minor: int @param minor: the minor to assemble locally @type backend: string @param backend: path to the data device to use @type meta: string @param meta: path to the meta device to use @type size: int @param size: size in MiB """ cmds = self._cmd_gen.GenLocalInitCmds(minor, backend, meta, size, self.params) for cmd in cmds: result = utils.RunCmd(cmd) if result.failed: base.ThrowError("drbd%d: can't attach local disk: %s", minor, result.output) def _AssembleNet(self, minor, net_info, dual_pri=False, hmac=None, secret=None): """Configure the network part of the device. @type minor: int @param minor: the minor to assemble the network for @type net_info: (string, int, string, int) @param net_info: tuple containing the local address, local port, remote address and remote port @type dual_pri: boolean @param dual_pri: whether two primaries should be allowed or not @type hmac: string @param hmac: the HMAC algorithm to use @type secret: string @param secret: the shared secret to use """ lhost, lport, rhost, rport = net_info if None in net_info: # we don't want network connection and actually want to make # sure its shutdown self._ShutdownNet(minor) return if dual_pri: protocol = constants.DRBD_MIGRATION_NET_PROTOCOL else: protocol = self.params[constants.LDP_PROTOCOL] # Workaround for a race condition. When DRBD is doing its dance to # establish a connection with its peer, it also sends the # synchronization speed over the wire. In some cases setting the # sync speed only after setting up both sides can race with DRBD # connecting, hence we set it here before telling DRBD anything # about its peer. sync_errors = self._SetMinorSyncParams(minor, self.params) if sync_errors: base.ThrowError("drbd%d: can't set the synchronization parameters: %s" % (minor, utils.CommaJoin(sync_errors))) family = self._GetNetFamily(minor, lhost, rhost) cmd = self._cmd_gen.GenNetInitCmd(minor, family, lhost, lport, rhost, rport, protocol, dual_pri, hmac, secret, self.params) result = utils.RunCmd(cmd) if result.failed: base.ThrowError("drbd%d: can't setup network: %s - %s", minor, result.fail_reason, result.output) def _CheckNetworkConfig(): info = self._GetShowInfo(minor) if not "local_addr" in info or not "remote_addr" in info: raise utils.RetryAgain() if (info["local_addr"] != (lhost, lport) or info["remote_addr"] != (rhost, rport)): raise utils.RetryAgain() try: utils.Retry(_CheckNetworkConfig, 1.0, 10.0) except utils.RetryTimeout: base.ThrowError("drbd%d: timeout while configuring network", minor) @staticmethod def _GetNetFamily(minor, lhost, rhost): if netutils.IP6Address.IsValid(lhost): if not netutils.IP6Address.IsValid(rhost): base.ThrowError("drbd%d: can't connect ip %s to ip %s" % (minor, lhost, rhost)) return "ipv6" elif netutils.IP4Address.IsValid(lhost): if not netutils.IP4Address.IsValid(rhost): base.ThrowError("drbd%d: can't connect ip %s to ip %s" % (minor, lhost, rhost)) return "ipv4" else: base.ThrowError("drbd%d: Invalid ip %s" % (minor, lhost)) def AddChildren(self, devices): """Add a disk to the DRBD device. @type devices: list of L{BlockDev} @param devices: a list of exactly two L{BlockDev} objects; the first denotes the data device, the second the meta device for this DRBD device """ if self.minor is None: base.ThrowError("drbd%d: can't attach to dbrd8 during AddChildren", self._aminor) if len(devices) != 2: base.ThrowError("drbd%d: need two devices for AddChildren", self.minor) info = self._GetShowInfo(self.minor) if "local_dev" in info: base.ThrowError("drbd%d: already attached to a local disk", self.minor) backend, meta = devices if backend.dev_path is None or meta.dev_path is None: base.ThrowError("drbd%d: children not ready during AddChildren", self.minor) backend.Open() meta.Open() self._CheckMetaSize(meta.dev_path) self._InitMeta(DRBD8.FindUnusedMinor(), meta.dev_path) self._AssembleLocal(self.minor, backend.dev_path, meta.dev_path, self.size) self._children = devices def RemoveChildren(self, devices): """Detach the drbd device from local storage. @type devices: list of L{BlockDev} @param devices: a list of exactly two L{BlockDev} objects; the first denotes the data device, the second the meta device for this DRBD device """ if self.minor is None: base.ThrowError("drbd%d: can't attach to drbd8 during RemoveChildren", self._aminor) # early return if we don't actually have backing storage info = self._GetShowInfo(self.minor) if "local_dev" not in info: return if len(self._children) != 2: base.ThrowError("drbd%d: we don't have two children: %s", self.minor, self._children) if self._children.count(None) == 2: # we don't actually have children :) logging.warning("drbd%d: requested detach while detached", self.minor) return if len(devices) != 2: base.ThrowError("drbd%d: we need two children in RemoveChildren", self.minor) for child, dev in zip(self._children, devices): if dev != child.dev_path: base.ThrowError("drbd%d: mismatch in local storage (%s != %s) in" " RemoveChildren", self.minor, dev, child.dev_path) self._ShutdownLocal(self.minor) self._children = [] def _SetMinorSyncParams(self, minor, params): """Set the parameters of the DRBD syncer. This is the low-level implementation. @type minor: int @param minor: the drbd minor whose settings we change @type params: dict @param params: LD level disk parameters related to the synchronization @rtype: list @return: a list of error messages """ cmd = self._cmd_gen.GenSyncParamsCmd(minor, params) result = utils.RunCmd(cmd) if result.failed: msg = ("Can't change syncer rate: %s - %s" % (result.fail_reason, result.output)) logging.error(msg) return [msg] return [] def SetSyncParams(self, params): """Set the synchronization parameters of the DRBD syncer. See L{BlockDev.SetSyncParams} for parameter description. """ if self.minor is None: err = "Not attached during SetSyncParams" logging.info(err) return [err] children_result = super(DRBD8Dev, self).SetSyncParams(params) children_result.extend(self._SetMinorSyncParams(self.minor, params)) return children_result def PauseResumeSync(self, pause): """Pauses or resumes the sync of a DRBD device. See L{BlockDev.PauseResumeSync} for parameter description. """ if self.minor is None: logging.info("Not attached during PauseSync") return False children_result = super(DRBD8Dev, self).PauseResumeSync(pause) if pause: cmd = self._cmd_gen.GenPauseSyncCmd(self.minor) else: cmd = self._cmd_gen.GenResumeSyncCmd(self.minor) result = utils.RunCmd(cmd) if result.failed: logging.error("Can't %s: %s - %s", cmd, result.fail_reason, result.output) return not result.failed and children_result def GetProcStatus(self): """Return the current status data from /proc/drbd for this device. @rtype: DRBD8Status """ if self.minor is None: base.ThrowError("drbd%d: GetStats() called while not attached", self._aminor) info = DRBD8.GetProcInfo() if not info.HasMinorStatus(self.minor): base.ThrowError("drbd%d: can't find myself in /proc", self.minor) return info.GetMinorStatus(self.minor) def GetSyncStatus(self): """Returns the sync status of the device. If sync_percent is None, it means all is ok If estimated_time is None, it means we can't estimate the time needed, otherwise it's the time left in seconds. We set the is_degraded parameter to True on two conditions: network not connected or local disk missing. We compute the ldisk parameter based on whether we have a local disk or not. @rtype: objects.BlockDevStatus """ if self.minor is None and not self.Attach(): base.ThrowError("drbd%d: can't Attach() in GetSyncStatus", self._aminor) stats = self.GetProcStatus() is_degraded = not stats.is_connected or not stats.is_disk_uptodate if stats.is_disk_uptodate: ldisk_status = constants.LDS_OKAY elif stats.is_diskless: ldisk_status = constants.LDS_FAULTY else: ldisk_status = constants.LDS_UNKNOWN return objects.BlockDevStatus(dev_path=self.dev_path, major=self.major, minor=self.minor, sync_percent=stats.sync_percent, estimated_time=stats.est_time, is_degraded=is_degraded, ldisk_status=ldisk_status) def Open(self, force=False): """Make the local state primary. If the 'force' parameter is given, DRBD is instructed to switch the device into primary mode. Since this is a potentially dangerous operation, the force flag should be only given after creation, when it actually is mandatory. """ if self.minor is None and not self.Attach(): logging.error("DRBD cannot attach to a device during open") return False cmd = self._cmd_gen.GenPrimaryCmd(self.minor, force) result = utils.RunCmd(cmd) if result.failed: base.ThrowError("drbd%d: can't make drbd device primary: %s", self.minor, result.output) def Close(self): """Make the local state secondary. This will, of course, fail if the device is in use. """ if self.minor is None and not self.Attach(): base.ThrowError("drbd%d: can't Attach() in Close()", self._aminor) cmd = self._cmd_gen.GenSecondaryCmd(self.minor) result = utils.RunCmd(cmd) if result.failed: base.ThrowError("drbd%d: can't switch drbd device to secondary: %s", self.minor, result.output) def DisconnectNet(self): """Removes network configuration. This method shutdowns the network side of the device. The method will wait up to a hardcoded timeout for the device to go into standalone after the 'disconnect' command before re-configuring it, as sometimes it takes a while for the disconnect to actually propagate and thus we might issue a 'net' command while the device is still connected. If the device will still be attached to the network and we time out, we raise an exception. """ if self.minor is None: base.ThrowError("drbd%d: disk not attached in re-attach net", self._aminor) if None in (self._lhost, self._lport, self._rhost, self._rport): base.ThrowError("drbd%d: DRBD disk missing network info in" " DisconnectNet()", self.minor) class _DisconnectStatus: def __init__(self, ever_disconnected): self.ever_disconnected = ever_disconnected dstatus = _DisconnectStatus(base.IgnoreError(self._ShutdownNet, self.minor)) def _WaitForDisconnect(): if self.GetProcStatus().is_standalone: return # retry the disconnect, it seems possible that due to a well-time # disconnect on the peer, my disconnect command might be ignored and # forgotten dstatus.ever_disconnected = \ base.IgnoreError(self._ShutdownNet, self.minor) or \ dstatus.ever_disconnected raise utils.RetryAgain() # Keep start time start_time = time.time() try: # Start delay at 100 milliseconds and grow up to 2 seconds utils.Retry(_WaitForDisconnect, (0.1, 1.5, 2.0), self._NET_RECONFIG_TIMEOUT) except utils.RetryTimeout: if dstatus.ever_disconnected: msg = ("drbd%d: device did not react to the" " 'disconnect' command in a timely manner") else: msg = "drbd%d: can't shutdown network, even after multiple retries" base.ThrowError(msg, self.minor) reconfig_time = time.time() - start_time if reconfig_time > (self._NET_RECONFIG_TIMEOUT * 0.25): logging.info("drbd%d: DisconnectNet: detach took %.3f seconds", self.minor, reconfig_time) def AttachNet(self, multimaster): """Reconnects the network. This method connects the network side of the device with a specified multi-master flag. The device needs to be 'Standalone' but have valid network configuration data. @type multimaster: boolean @param multimaster: init the network in dual-primary mode """ if self.minor is None: base.ThrowError("drbd%d: device not attached in AttachNet", self._aminor) if None in (self._lhost, self._lport, self._rhost, self._rport): base.ThrowError("drbd%d: missing network info in AttachNet()", self.minor) status = self.GetProcStatus() if not status.is_standalone: base.ThrowError("drbd%d: device is not standalone in AttachNet", self.minor) self._AssembleNet(self.minor, (self._lhost, self._lport, self._rhost, self._rport), dual_pri=multimaster, hmac=constants.DRBD_HMAC_ALG, secret=self._secret) def Attach(self): """Check if our minor is configured. This doesn't do any device configurations - it only checks if the minor is in a state different from Unconfigured. Note that this function will not change the state of the system in any way (except in case of side-effects caused by reading from /proc). """ used_devs = DRBD8.GetUsedDevs() if self._aminor in used_devs: minor = self._aminor else: minor = None self._SetFromMinor(minor) return minor is not None def Assemble(self): """Assemble the drbd. Method: - if we have a configured device, we try to ensure that it matches our config - if not, we create it from zero - anyway, set the device parameters """ super(DRBD8Dev, self).Assemble() self.Attach() if self.minor is None: # local device completely unconfigured self._FastAssemble() else: # we have to recheck the local and network status and try to fix # the device self._SlowAssemble() sync_errors = self.SetSyncParams(self.params) if sync_errors: base.ThrowError("drbd%d: can't set the synchronization parameters: %s" % (self.minor, utils.CommaJoin(sync_errors))) def _SlowAssemble(self): """Assembles the DRBD device from a (partially) configured device. In case of partially attached (local device matches but no network setup), we perform the network attach. If successful, we re-test the attach if can return success. """ # TODO: Rewrite to not use a for loop just because there is 'break' # pylint: disable=W0631 net_data = (self._lhost, self._lport, self._rhost, self._rport) for minor in (self._aminor,): info = self._GetShowInfo(minor) match_l = self._MatchesLocal(info) match_r = self._MatchesNet(info) if match_l and match_r: # everything matches break if match_l and not match_r and "local_addr" not in info: # disk matches, but not attached to network, attach and recheck self._AssembleNet(minor, net_data, hmac=constants.DRBD_HMAC_ALG, secret=self._secret) if self._MatchesNet(self._GetShowInfo(minor)): break else: base.ThrowError("drbd%d: network attach successful, but 'drbdsetup" " show' disagrees", minor) if match_r and "local_dev" not in info: # no local disk, but network attached and it matches self._AssembleLocal(minor, self._children[0].dev_path, self._children[1].dev_path, self.size) if self._MatchesLocal(self._GetShowInfo(minor)): break else: base.ThrowError("drbd%d: disk attach successful, but 'drbdsetup" " show' disagrees", minor) # this case must be considered only if we actually have local # storage, i.e. not in diskless mode, because all diskless # devices are equal from the point of view of local # configuration if (match_l and "local_dev" in info and not match_r and "local_addr" in info): # strange case - the device network part points to somewhere # else, even though its local storage is ours; as we own the # drbd space, we try to disconnect from the remote peer and # reconnect to our correct one try: self._ShutdownNet(minor) except errors.BlockDeviceError, err: base.ThrowError("drbd%d: device has correct local storage, wrong" " remote peer and is unable to disconnect in order" " to attach to the correct peer: %s", minor, str(err)) # note: _AssembleNet also handles the case when we don't want # local storage (i.e. one or more of the _[lr](host|port) is # None) self._AssembleNet(minor, net_data, hmac=constants.DRBD_HMAC_ALG, secret=self._secret) if self._MatchesNet(self._GetShowInfo(minor)): break else: base.ThrowError("drbd%d: network attach successful, but 'drbdsetup" " show' disagrees", minor) else: minor = None self._SetFromMinor(minor) if minor is None: base.ThrowError("drbd%d: cannot activate, unknown or unhandled reason", self._aminor) def _FastAssemble(self): """Assemble the drbd device from zero. This is run when in Assemble we detect our minor is unused. """ minor = self._aminor if self._children and self._children[0] and self._children[1]: self._AssembleLocal(minor, self._children[0].dev_path, self._children[1].dev_path, self.size) if self._lhost and self._lport and self._rhost and self._rport: self._AssembleNet(minor, (self._lhost, self._lport, self._rhost, self._rport), hmac=constants.DRBD_HMAC_ALG, secret=self._secret) self._SetFromMinor(minor) def _ShutdownLocal(self, minor): """Detach from the local device. I/Os will continue to be served from the remote device. If we don't have a remote device, this operation will fail. @type minor: int @param minor: the device to detach from the local device """ cmd = self._cmd_gen.GenDetachCmd(minor) result = utils.RunCmd(cmd) if result.failed: base.ThrowError("drbd%d: can't detach local disk: %s", minor, result.output) def _ShutdownNet(self, minor): """Disconnect from the remote peer. This fails if we don't have a local device. @type minor: boolean @param minor: the device to disconnect from the remote peer """ family = self._GetNetFamily(minor, self._lhost, self._rhost) cmd = self._cmd_gen.GenDisconnectCmd(minor, family, self._lhost, self._lport, self._rhost, self._rport) result = utils.RunCmd(cmd) if result.failed: base.ThrowError("drbd%d: can't shutdown network: %s", minor, result.output) def Shutdown(self): """Shutdown the DRBD device. """ if self.minor is None and not self.Attach(): logging.info("drbd%d: not attached during Shutdown()", self._aminor) return try: DRBD8.ShutdownAll(self.minor) finally: self.minor = None self.dev_path = None def Remove(self): """Stub remove for DRBD devices. """ self.Shutdown() def Rename(self, new_id): """Rename a device. This is not supported for drbd devices. """ raise errors.ProgrammerError("Can't rename a drbd device") def Grow(self, amount, dryrun, backingstore, excl_stor): """Resize the DRBD device and its backing storage. See L{BlockDev.Grow} for parameter description. """ if self.minor is None: base.ThrowError("drbd%d: Grow called while not attached", self._aminor) if len(self._children) != 2 or None in self._children: base.ThrowError("drbd%d: cannot grow diskless device", self.minor) self._children[0].Grow(amount, dryrun, backingstore, excl_stor) if dryrun or backingstore: # DRBD does not support dry-run mode and is not backing storage, # so we'll return here return cmd = self._cmd_gen.GenResizeCmd(self.minor, self.size + amount) result = utils.RunCmd(cmd) if result.failed: base.ThrowError("drbd%d: resize failed: %s", self.minor, result.output) @classmethod def _InitMeta(cls, minor, dev_path): """Initialize a meta device. This will not work if the given minor is in use. @type minor: int @param minor: the DRBD minor whose (future) meta device should be initialized @type dev_path: string @param dev_path: path to the meta device to initialize """ # Zero the metadata first, in order to make sure drbdmeta doesn't # try to auto-detect existing filesystems or similar (see # http://code.google.com/p/ganeti/issues/detail?id=182); we only # care about the first 128MB of data in the device, even though it # can be bigger result = utils.RunCmd([constants.DD_CMD, "if=/dev/zero", "of=%s" % dev_path, "bs=1048576", "count=128", "oflag=direct"]) if result.failed: base.ThrowError("Can't wipe the meta device: %s", result.output) info = DRBD8.GetProcInfo() cmd_gen = DRBD8.GetCmdGenerator(info) cmd = cmd_gen.GenInitMetaCmd(minor, dev_path) result = utils.RunCmd(cmd) if result.failed: base.ThrowError("Can't initialize meta device: %s", result.output) @classmethod def Create(cls, unique_id, children, size, spindles, params, excl_stor): """Create a new DRBD8 device. Since DRBD devices are not created per se, just assembled, this function only initializes the metadata. """ if len(children) != 2: raise errors.ProgrammerError("Invalid setup for the drbd device") if excl_stor: raise errors.ProgrammerError("DRBD device requested with" " exclusive_storage") # check that the minor is unused aminor = unique_id[4] info = DRBD8.GetProcInfo() if info.HasMinorStatus(aminor): status = info.GetMinorStatus(aminor) in_use = status.is_in_use else: in_use = False if in_use: base.ThrowError("drbd%d: minor is already in use at Create() time", aminor) meta = children[1] meta.Assemble() if not meta.Attach(): base.ThrowError("drbd%d: can't attach to meta device '%s'", aminor, meta) cls._CheckMetaSize(meta.dev_path) cls._InitMeta(aminor, meta.dev_path) return cls(unique_id, children, size, params) def _CanReadDevice(path): """Check if we can read from the given device. This tries to read the first 128k of the device. @type path: string """ try: utils.ReadFile(path, size=_DEVICE_READ_SIZE) return True except EnvironmentError: logging.warning("Can't read from device %s", path, exc_info=True) return False ganeti-2.9.3/lib/storage/bdev.py0000644000000000000000000016070712271422343016511 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Block device abstraction""" import re import errno import stat import os import logging import math from ganeti import utils from ganeti import errors from ganeti import constants from ganeti import objects from ganeti import compat from ganeti import pathutils from ganeti import serializer from ganeti.storage import base from ganeti.storage import drbd from ganeti.storage import filestorage class RbdShowmappedJsonError(Exception): """`rbd showmmapped' JSON formatting error Exception class. """ pass def _CheckResult(result): """Throws an error if the given result is a failed one. @param result: result from RunCmd """ if result.failed: base.ThrowError("Command: %s error: %s - %s", result.cmd, result.fail_reason, result.output) class LogicalVolume(base.BlockDev): """Logical Volume block device. """ _VALID_NAME_RE = re.compile("^[a-zA-Z0-9+_.-]*$") _PARSE_PV_DEV_RE = re.compile(r"^([^ ()]+)\([0-9]+\)$") _INVALID_NAMES = compat.UniqueFrozenset([".", "..", "snapshot", "pvmove"]) _INVALID_SUBSTRINGS = compat.UniqueFrozenset(["_mlog", "_mimage"]) def __init__(self, unique_id, children, size, params): """Attaches to a LV device. The unique_id is a tuple (vg_name, lv_name) """ super(LogicalVolume, self).__init__(unique_id, children, size, params) if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2: raise ValueError("Invalid configuration data %s" % str(unique_id)) self._vg_name, self._lv_name = unique_id self._ValidateName(self._vg_name) self._ValidateName(self._lv_name) self.dev_path = utils.PathJoin("/dev", self._vg_name, self._lv_name) self._degraded = True self.major = self.minor = self.pe_size = self.stripe_count = None self.pv_names = None self.Attach() @staticmethod def _GetStdPvSize(pvs_info): """Return the the standard PV size (used with exclusive storage). @param pvs_info: list of objects.LvmPvInfo, cannot be empty @rtype: float @return: size in MiB """ assert len(pvs_info) > 0 smallest = min([pv.size for pv in pvs_info]) return smallest / (1 + constants.PART_MARGIN + constants.PART_RESERVED) @staticmethod def _ComputeNumPvs(size, pvs_info): """Compute the number of PVs needed for an LV (with exclusive storage). @type size: float @param size: LV size in MiB @param pvs_info: list of objects.LvmPvInfo, cannot be empty @rtype: integer @return: number of PVs needed """ assert len(pvs_info) > 0 pv_size = float(LogicalVolume._GetStdPvSize(pvs_info)) return int(math.ceil(float(size) / pv_size)) @staticmethod def _GetEmptyPvNames(pvs_info, max_pvs=None): """Return a list of empty PVs, by name. """ empty_pvs = filter(objects.LvmPvInfo.IsEmpty, pvs_info) if max_pvs is not None: empty_pvs = empty_pvs[:max_pvs] return map((lambda pv: pv.name), empty_pvs) @classmethod def Create(cls, unique_id, children, size, spindles, params, excl_stor): """Create a new logical volume. """ if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2: raise errors.ProgrammerError("Invalid configuration data %s" % str(unique_id)) vg_name, lv_name = unique_id cls._ValidateName(vg_name) cls._ValidateName(lv_name) pvs_info = cls.GetPVInfo([vg_name]) if not pvs_info: if excl_stor: msg = "No (empty) PVs found" else: msg = "Can't compute PV info for vg %s" % vg_name base.ThrowError(msg) pvs_info.sort(key=(lambda pv: pv.free), reverse=True) pvlist = [pv.name for pv in pvs_info] if compat.any(":" in v for v in pvlist): base.ThrowError("Some of your PVs have the invalid character ':' in their" " name, this is not supported - please filter them out" " in lvm.conf using either 'filter' or 'preferred_names'") current_pvs = len(pvlist) desired_stripes = params[constants.LDP_STRIPES] stripes = min(current_pvs, desired_stripes) if excl_stor: if spindles is None: base.ThrowError("Unspecified number of spindles: this is required" "when exclusive storage is enabled, try running" " gnt-cluster repair-disk-sizes") (err_msgs, _) = utils.LvmExclusiveCheckNodePvs(pvs_info) if err_msgs: for m in err_msgs: logging.warning(m) req_pvs = cls._ComputeNumPvs(size, pvs_info) if spindles < req_pvs: base.ThrowError("Requested number of spindles (%s) is not enough for" " a disk of %d MB (at least %d spindles needed)", spindles, size, req_pvs) else: req_pvs = spindles pvlist = cls._GetEmptyPvNames(pvs_info, req_pvs) current_pvs = len(pvlist) if current_pvs < req_pvs: base.ThrowError("Not enough empty PVs (spindles) to create a disk of %d" " MB: %d available, %d needed", size, current_pvs, req_pvs) assert current_pvs == len(pvlist) # We must update stripes to be sure to use all the desired spindles stripes = current_pvs if stripes > desired_stripes: # Don't warn when lowering stripes, as it's no surprise logging.warning("Using %s stripes instead of %s, to be able to use" " %s spindles", stripes, desired_stripes, current_pvs) else: if stripes < desired_stripes: logging.warning("Could not use %d stripes for VG %s, as only %d PVs are" " available.", desired_stripes, vg_name, current_pvs) free_size = sum([pv.free for pv in pvs_info]) # The size constraint should have been checked from the master before # calling the create function. if free_size < size: base.ThrowError("Not enough free space: required %s," " available %s", size, free_size) # If the free space is not well distributed, we won't be able to # create an optimally-striped volume; in that case, we want to try # with N, N-1, ..., 2, and finally 1 (non-stripped) number of # stripes cmd = ["lvcreate", "-L%dm" % size, "-n%s" % lv_name] for stripes_arg in range(stripes, 0, -1): result = utils.RunCmd(cmd + ["-i%d" % stripes_arg] + [vg_name] + pvlist) if not result.failed: break if result.failed: base.ThrowError("LV create failed (%s): %s", result.fail_reason, result.output) return LogicalVolume(unique_id, children, size, params) @staticmethod def _GetVolumeInfo(lvm_cmd, fields): """Returns LVM Volume infos using lvm_cmd @param lvm_cmd: Should be one of "pvs", "vgs" or "lvs" @param fields: Fields to return @return: A list of dicts each with the parsed fields """ if not fields: raise errors.ProgrammerError("No fields specified") sep = "|" cmd = [lvm_cmd, "--noheadings", "--nosuffix", "--units=m", "--unbuffered", "--separator=%s" % sep, "-o%s" % ",".join(fields)] result = utils.RunCmd(cmd) if result.failed: raise errors.CommandError("Can't get the volume information: %s - %s" % (result.fail_reason, result.output)) data = [] for line in result.stdout.splitlines(): splitted_fields = line.strip().split(sep) if len(fields) != len(splitted_fields): raise errors.CommandError("Can't parse %s output: line '%s'" % (lvm_cmd, line)) data.append(splitted_fields) return data @classmethod def GetPVInfo(cls, vg_names, filter_allocatable=True, include_lvs=False): """Get the free space info for PVs in a volume group. @param vg_names: list of volume group names, if empty all will be returned @param filter_allocatable: whether to skip over unallocatable PVs @param include_lvs: whether to include a list of LVs hosted on each PV @rtype: list @return: list of objects.LvmPvInfo objects """ # We request "lv_name" field only if we care about LVs, so we don't get # a long list of entries with many duplicates unless we really have to. # The duplicate "pv_name" field will be ignored. if include_lvs: lvfield = "lv_name" else: lvfield = "pv_name" try: info = cls._GetVolumeInfo("pvs", ["pv_name", "vg_name", "pv_free", "pv_attr", "pv_size", lvfield]) except errors.GenericError, err: logging.error("Can't get PV information: %s", err) return None # When asked for LVs, "pvs" may return multiple entries for the same PV-LV # pair. We sort entries by PV name and then LV name, so it's easy to weed # out duplicates. if include_lvs: info.sort(key=(lambda i: (i[0], i[5]))) data = [] lastpvi = None for (pv_name, vg_name, pv_free, pv_attr, pv_size, lv_name) in info: # (possibly) skip over pvs which are not allocatable if filter_allocatable and pv_attr[0] != "a": continue # (possibly) skip over pvs which are not in the right volume group(s) if vg_names and vg_name not in vg_names: continue # Beware of duplicates (check before inserting) if lastpvi and lastpvi.name == pv_name: if include_lvs and lv_name: if not lastpvi.lv_list or lastpvi.lv_list[-1] != lv_name: lastpvi.lv_list.append(lv_name) else: if include_lvs and lv_name: lvl = [lv_name] else: lvl = [] lastpvi = objects.LvmPvInfo(name=pv_name, vg_name=vg_name, size=float(pv_size), free=float(pv_free), attributes=pv_attr, lv_list=lvl) data.append(lastpvi) return data @classmethod def _GetRawFreePvInfo(cls, vg_name): """Return info (size/free) about PVs. @type vg_name: string @param vg_name: VG name @rtype: tuple @return: (standard_pv_size_in_MiB, number_of_free_pvs, total_number_of_pvs) """ pvs_info = cls.GetPVInfo([vg_name]) if not pvs_info: pv_size = 0.0 free_pvs = 0 num_pvs = 0 else: pv_size = cls._GetStdPvSize(pvs_info) free_pvs = len(cls._GetEmptyPvNames(pvs_info)) num_pvs = len(pvs_info) return (pv_size, free_pvs, num_pvs) @classmethod def _GetExclusiveStorageVgFree(cls, vg_name): """Return the free disk space in the given VG, in exclusive storage mode. @type vg_name: string @param vg_name: VG name @rtype: float @return: free space in MiB """ (pv_size, free_pvs, _) = cls._GetRawFreePvInfo(vg_name) return pv_size * free_pvs @classmethod def GetVgSpindlesInfo(cls, vg_name): """Get the free space info for specific VGs. @param vg_name: volume group name @rtype: tuple @return: (free_spindles, total_spindles) """ (_, free_pvs, num_pvs) = cls._GetRawFreePvInfo(vg_name) return (free_pvs, num_pvs) @classmethod def GetVGInfo(cls, vg_names, excl_stor, filter_readonly=True): """Get the free space info for specific VGs. @param vg_names: list of volume group names, if empty all will be returned @param excl_stor: whether exclusive_storage is enabled @param filter_readonly: whether to skip over readonly VGs @rtype: list @return: list of tuples (free_space, total_size, name) with free_space in MiB """ try: info = cls._GetVolumeInfo("vgs", ["vg_name", "vg_free", "vg_attr", "vg_size"]) except errors.GenericError, err: logging.error("Can't get VG information: %s", err) return None data = [] for vg_name, vg_free, vg_attr, vg_size in info: # (possibly) skip over vgs which are not writable if filter_readonly and vg_attr[0] == "r": continue # (possibly) skip over vgs which are not in the right volume group(s) if vg_names and vg_name not in vg_names: continue # Exclusive storage needs a different concept of free space if excl_stor: es_free = cls._GetExclusiveStorageVgFree(vg_name) assert es_free <= vg_free vg_free = es_free data.append((float(vg_free), float(vg_size), vg_name)) return data @classmethod def _ValidateName(cls, name): """Validates that a given name is valid as VG or LV name. The list of valid characters and restricted names is taken out of the lvm(8) manpage, with the simplification that we enforce both VG and LV restrictions on the names. """ if (not cls._VALID_NAME_RE.match(name) or name in cls._INVALID_NAMES or compat.any(substring in name for substring in cls._INVALID_SUBSTRINGS)): base.ThrowError("Invalid LVM name '%s'", name) def Remove(self): """Remove this logical volume. """ if not self.minor and not self.Attach(): # the LV does not exist return result = utils.RunCmd(["lvremove", "-f", "%s/%s" % (self._vg_name, self._lv_name)]) if result.failed: base.ThrowError("Can't lvremove: %s - %s", result.fail_reason, result.output) def Rename(self, new_id): """Rename this logical volume. """ if not isinstance(new_id, (tuple, list)) or len(new_id) != 2: raise errors.ProgrammerError("Invalid new logical id '%s'" % new_id) new_vg, new_name = new_id if new_vg != self._vg_name: raise errors.ProgrammerError("Can't move a logical volume across" " volume groups (from %s to to %s)" % (self._vg_name, new_vg)) result = utils.RunCmd(["lvrename", new_vg, self._lv_name, new_name]) if result.failed: base.ThrowError("Failed to rename the logical volume: %s", result.output) self._lv_name = new_name self.dev_path = utils.PathJoin("/dev", self._vg_name, self._lv_name) @classmethod def _ParseLvInfoLine(cls, line, sep): """Parse one line of the lvs output used in L{_GetLvInfo}. """ elems = line.strip().rstrip(sep).split(sep) if len(elems) != 6: base.ThrowError("Can't parse LVS output, len(%s) != 6", str(elems)) (status, major, minor, pe_size, stripes, pvs) = elems if len(status) < 6: base.ThrowError("lvs lv_attr is not at least 6 characters (%s)", status) try: major = int(major) minor = int(minor) except (TypeError, ValueError), err: base.ThrowError("lvs major/minor cannot be parsed: %s", str(err)) try: pe_size = int(float(pe_size)) except (TypeError, ValueError), err: base.ThrowError("Can't parse vg extent size: %s", err) try: stripes = int(stripes) except (TypeError, ValueError), err: base.ThrowError("Can't parse the number of stripes: %s", err) pv_names = [] for pv in pvs.split(","): m = re.match(cls._PARSE_PV_DEV_RE, pv) if not m: base.ThrowError("Can't parse this device list: %s", pvs) pv_names.append(m.group(1)) assert len(pv_names) > 0 return (status, major, minor, pe_size, stripes, pv_names) @classmethod def _GetLvInfo(cls, dev_path, _run_cmd=utils.RunCmd): """Get info about the given existing LV to be used. """ sep = "|" result = _run_cmd(["lvs", "--noheadings", "--separator=%s" % sep, "--units=k", "--nosuffix", "-olv_attr,lv_kernel_major,lv_kernel_minor," "vg_extent_size,stripes,devices", dev_path]) if result.failed: base.ThrowError("Can't find LV %s: %s, %s", dev_path, result.fail_reason, result.output) # the output can (and will) have multiple lines for multi-segment # LVs, as the 'stripes' parameter is a segment one, so we take # only the last entry, which is the one we're interested in; note # that with LVM2 anyway the 'stripes' value must be constant # across segments, so this is a no-op actually out = result.stdout.splitlines() if not out: # totally empty result? splitlines() returns at least # one line for any non-empty string base.ThrowError("Can't parse LVS output, no lines? Got '%s'", str(out)) pv_names = set() for line in out: (status, major, minor, pe_size, stripes, more_pvs) = \ cls._ParseLvInfoLine(line, sep) pv_names.update(more_pvs) return (status, major, minor, pe_size, stripes, pv_names) def Attach(self): """Attach to an existing LV. This method will try to see if an existing and active LV exists which matches our name. If so, its major/minor will be recorded. """ self.attached = False try: (status, major, minor, pe_size, stripes, pv_names) = \ self._GetLvInfo(self.dev_path) except errors.BlockDeviceError: return False self.major = major self.minor = minor self.pe_size = pe_size self.stripe_count = stripes self._degraded = status[0] == "v" # virtual volume, i.e. doesn't backing # storage self.pv_names = pv_names self.attached = True return True def Assemble(self): """Assemble the device. We always run `lvchange -ay` on the LV to ensure it's active before use, as there were cases when xenvg was not active after boot (also possibly after disk issues). """ result = utils.RunCmd(["lvchange", "-ay", self.dev_path]) if result.failed: base.ThrowError("Can't activate lv %s: %s", self.dev_path, result.output) def Shutdown(self): """Shutdown the device. This is a no-op for the LV device type, as we don't deactivate the volumes on shutdown. """ pass def GetSyncStatus(self): """Returns the sync status of the device. If this device is a mirroring device, this function returns the status of the mirror. For logical volumes, sync_percent and estimated_time are always None (no recovery in progress, as we don't handle the mirrored LV case). The is_degraded parameter is the inverse of the ldisk parameter. For the ldisk parameter, we check if the logical volume has the 'virtual' type, which means it's not backed by existing storage anymore (read from it return I/O error). This happens after a physical disk failure and subsequent 'vgreduce --removemissing' on the volume group. The status was already read in Attach, so we just return it. @rtype: objects.BlockDevStatus """ if self._degraded: ldisk_status = constants.LDS_FAULTY else: ldisk_status = constants.LDS_OKAY return objects.BlockDevStatus(dev_path=self.dev_path, major=self.major, minor=self.minor, sync_percent=None, estimated_time=None, is_degraded=self._degraded, ldisk_status=ldisk_status) def Open(self, force=False): """Make the device ready for I/O. This is a no-op for the LV device type. """ pass def Close(self): """Notifies that the device will no longer be used for I/O. This is a no-op for the LV device type. """ pass def Snapshot(self, size): """Create a snapshot copy of an lvm block device. @returns: tuple (vg, lv) """ snap_name = self._lv_name + ".snap" # remove existing snapshot if found snap = LogicalVolume((self._vg_name, snap_name), None, size, self.params) base.IgnoreError(snap.Remove) vg_info = self.GetVGInfo([self._vg_name], False) if not vg_info: base.ThrowError("Can't compute VG info for vg %s", self._vg_name) free_size, _, _ = vg_info[0] if free_size < size: base.ThrowError("Not enough free space: required %s," " available %s", size, free_size) _CheckResult(utils.RunCmd(["lvcreate", "-L%dm" % size, "-s", "-n%s" % snap_name, self.dev_path])) return (self._vg_name, snap_name) def _RemoveOldInfo(self): """Try to remove old tags from the lv. """ result = utils.RunCmd(["lvs", "-o", "tags", "--noheadings", "--nosuffix", self.dev_path]) _CheckResult(result) raw_tags = result.stdout.strip() if raw_tags: for tag in raw_tags.split(","): _CheckResult(utils.RunCmd(["lvchange", "--deltag", tag.strip(), self.dev_path])) def SetInfo(self, text): """Update metadata with info text. """ base.BlockDev.SetInfo(self, text) self._RemoveOldInfo() # Replace invalid characters text = re.sub("^[^A-Za-z0-9_+.]", "_", text) text = re.sub("[^-A-Za-z0-9_+.]", "_", text) # Only up to 128 characters are allowed text = text[:128] _CheckResult(utils.RunCmd(["lvchange", "--addtag", text, self.dev_path])) def _GetGrowthAvaliabilityExclStor(self): """Return how much the disk can grow with exclusive storage. @rtype: float @return: available space in Mib """ pvs_info = self.GetPVInfo([self._vg_name]) if not pvs_info: base.ThrowError("Cannot get information about PVs for %s", self.dev_path) std_pv_size = self._GetStdPvSize(pvs_info) free_space = sum(pvi.free - (pvi.size - std_pv_size) for pvi in pvs_info if pvi.name in self.pv_names) return free_space def Grow(self, amount, dryrun, backingstore, excl_stor): """Grow the logical volume. """ if not backingstore: return if self.pe_size is None or self.stripe_count is None: if not self.Attach(): base.ThrowError("Can't attach to LV during Grow()") full_stripe_size = self.pe_size * self.stripe_count # pe_size is in KB amount *= 1024 rest = amount % full_stripe_size if rest != 0: amount += full_stripe_size - rest cmd = ["lvextend", "-L", "+%dk" % amount] if dryrun: cmd.append("--test") if excl_stor: free_space = self._GetGrowthAvaliabilityExclStor() # amount is in KiB, free_space in MiB if amount > free_space * 1024: base.ThrowError("Not enough free space to grow %s: %d MiB required," " %d available", self.dev_path, amount / 1024, free_space) # Disk growth doesn't grow the number of spindles, so we must stay within # our assigned volumes pvlist = list(self.pv_names) else: pvlist = [] # we try multiple algorithms since the 'best' ones might not have # space available in the right place, but later ones might (since # they have less constraints); also note that only recent LVM # supports 'cling' for alloc_policy in "contiguous", "cling", "normal": result = utils.RunCmd(cmd + ["--alloc", alloc_policy, self.dev_path] + pvlist) if not result.failed: return base.ThrowError("Can't grow LV %s: %s", self.dev_path, result.output) def GetActualSpindles(self): """Return the number of spindles used. """ assert self.attached, "BlockDevice not attached in GetActualSpindles()" return len(self.pv_names) class FileStorage(base.BlockDev): """File device. This class represents a file storage backend device. The unique_id for the file device is a (file_driver, file_path) tuple. """ def __init__(self, unique_id, children, size, params): """Initalizes a file device backend. """ if children: raise errors.BlockDeviceError("Invalid setup for file device") super(FileStorage, self).__init__(unique_id, children, size, params) if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2: raise ValueError("Invalid configuration data %s" % str(unique_id)) self.driver = unique_id[0] self.dev_path = unique_id[1] filestorage.CheckFileStoragePathAcceptance(self.dev_path) self.Attach() def Assemble(self): """Assemble the device. Checks whether the file device exists, raises BlockDeviceError otherwise. """ if not os.path.exists(self.dev_path): base.ThrowError("File device '%s' does not exist" % self.dev_path) def Shutdown(self): """Shutdown the device. This is a no-op for the file type, as we don't deactivate the file on shutdown. """ pass def Open(self, force=False): """Make the device ready for I/O. This is a no-op for the file type. """ pass def Close(self): """Notifies that the device will no longer be used for I/O. This is a no-op for the file type. """ pass def Remove(self): """Remove the file backing the block device. @rtype: boolean @return: True if the removal was successful """ try: os.remove(self.dev_path) except OSError, err: if err.errno != errno.ENOENT: base.ThrowError("Can't remove file '%s': %s", self.dev_path, err) def Rename(self, new_id): """Renames the file. """ # TODO: implement rename for file-based storage base.ThrowError("Rename is not supported for file-based storage") def Grow(self, amount, dryrun, backingstore, excl_stor): """Grow the file @param amount: the amount (in mebibytes) to grow with """ if not backingstore: return # Check that the file exists self.Assemble() current_size = self.GetActualSize() new_size = current_size + amount * 1024 * 1024 assert new_size > current_size, "Cannot Grow with a negative amount" # We can't really simulate the growth if dryrun: return try: f = open(self.dev_path, "a+") f.truncate(new_size) f.close() except EnvironmentError, err: base.ThrowError("Error in file growth: %", str(err)) def Attach(self): """Attach to an existing file. Check if this file already exists. @rtype: boolean @return: True if file exists """ self.attached = os.path.exists(self.dev_path) return self.attached def GetActualSize(self): """Return the actual disk size. @note: the device needs to be active when this is called """ assert self.attached, "BlockDevice not attached in GetActualSize()" try: st = os.stat(self.dev_path) return st.st_size except OSError, err: base.ThrowError("Can't stat %s: %s", self.dev_path, err) @classmethod def Create(cls, unique_id, children, size, spindles, params, excl_stor): """Create a new file. @param size: the size of file in MiB @rtype: L{bdev.FileStorage} @return: an instance of FileStorage """ if excl_stor: raise errors.ProgrammerError("FileStorage device requested with" " exclusive_storage") if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2: raise ValueError("Invalid configuration data %s" % str(unique_id)) dev_path = unique_id[1] filestorage.CheckFileStoragePathAcceptance(dev_path) try: fd = os.open(dev_path, os.O_RDWR | os.O_CREAT | os.O_EXCL) f = os.fdopen(fd, "w") f.truncate(size * 1024 * 1024) f.close() except EnvironmentError, err: if err.errno == errno.EEXIST: base.ThrowError("File already existing: %s", dev_path) base.ThrowError("Error in file creation: %", str(err)) return FileStorage(unique_id, children, size, params) class PersistentBlockDevice(base.BlockDev): """A block device with persistent node May be either directly attached, or exposed through DM (e.g. dm-multipath). udev helpers are probably required to give persistent, human-friendly names. For the time being, pathnames are required to lie under /dev. """ def __init__(self, unique_id, children, size, params): """Attaches to a static block device. The unique_id is a path under /dev. """ super(PersistentBlockDevice, self).__init__(unique_id, children, size, params) if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2: raise ValueError("Invalid configuration data %s" % str(unique_id)) self.dev_path = unique_id[1] if not os.path.realpath(self.dev_path).startswith("/dev/"): raise ValueError("Full path '%s' lies outside /dev" % os.path.realpath(self.dev_path)) # TODO: this is just a safety guard checking that we only deal with devices # we know how to handle. In the future this will be integrated with # external storage backends and possible values will probably be collected # from the cluster configuration. if unique_id[0] != constants.BLOCKDEV_DRIVER_MANUAL: raise ValueError("Got persistent block device of invalid type: %s" % unique_id[0]) self.major = self.minor = None self.Attach() @classmethod def Create(cls, unique_id, children, size, spindles, params, excl_stor): """Create a new device This is a noop, we only return a PersistentBlockDevice instance """ if excl_stor: raise errors.ProgrammerError("Persistent block device requested with" " exclusive_storage") return PersistentBlockDevice(unique_id, children, 0, params) def Remove(self): """Remove a device This is a noop """ pass def Rename(self, new_id): """Rename this device. """ base.ThrowError("Rename is not supported for PersistentBlockDev storage") def Attach(self): """Attach to an existing block device. """ self.attached = False try: st = os.stat(self.dev_path) except OSError, err: logging.error("Error stat()'ing %s: %s", self.dev_path, str(err)) return False if not stat.S_ISBLK(st.st_mode): logging.error("%s is not a block device", self.dev_path) return False self.major = os.major(st.st_rdev) self.minor = os.minor(st.st_rdev) self.attached = True return True def Assemble(self): """Assemble the device. """ pass def Shutdown(self): """Shutdown the device. """ pass def Open(self, force=False): """Make the device ready for I/O. """ pass def Close(self): """Notifies that the device will no longer be used for I/O. """ pass def Grow(self, amount, dryrun, backingstore, excl_stor): """Grow the logical volume. """ base.ThrowError("Grow is not supported for PersistentBlockDev storage") class RADOSBlockDevice(base.BlockDev): """A RADOS Block Device (rbd). This class implements the RADOS Block Device for the backend. You need the rbd kernel driver, the RADOS Tools and a working RADOS cluster for this to be functional. """ def __init__(self, unique_id, children, size, params): """Attaches to an rbd device. """ super(RADOSBlockDevice, self).__init__(unique_id, children, size, params) if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2: raise ValueError("Invalid configuration data %s" % str(unique_id)) self.driver, self.rbd_name = unique_id self.major = self.minor = None self.Attach() @classmethod def Create(cls, unique_id, children, size, spindles, params, excl_stor): """Create a new rbd device. Provision a new rbd volume inside a RADOS pool. """ if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2: raise errors.ProgrammerError("Invalid configuration data %s" % str(unique_id)) if excl_stor: raise errors.ProgrammerError("RBD device requested with" " exclusive_storage") rbd_pool = params[constants.LDP_POOL] rbd_name = unique_id[1] # Provision a new rbd volume (Image) inside the RADOS cluster. cmd = [constants.RBD_CMD, "create", "-p", rbd_pool, rbd_name, "--size", "%s" % size] result = utils.RunCmd(cmd) if result.failed: base.ThrowError("rbd creation failed (%s): %s", result.fail_reason, result.output) return RADOSBlockDevice(unique_id, children, size, params) def Remove(self): """Remove the rbd device. """ rbd_pool = self.params[constants.LDP_POOL] rbd_name = self.unique_id[1] if not self.minor and not self.Attach(): # The rbd device doesn't exist. return # First shutdown the device (remove mappings). self.Shutdown() # Remove the actual Volume (Image) from the RADOS cluster. cmd = [constants.RBD_CMD, "rm", "-p", rbd_pool, rbd_name] result = utils.RunCmd(cmd) if result.failed: base.ThrowError("Can't remove Volume from cluster with rbd rm: %s - %s", result.fail_reason, result.output) def Rename(self, new_id): """Rename this device. """ pass def Attach(self): """Attach to an existing rbd device. This method maps the rbd volume that matches our name with an rbd device and then attaches to this device. """ self.attached = False # Map the rbd volume to a block device under /dev self.dev_path = self._MapVolumeToBlockdev(self.unique_id) try: st = os.stat(self.dev_path) except OSError, err: logging.error("Error stat()'ing %s: %s", self.dev_path, str(err)) return False if not stat.S_ISBLK(st.st_mode): logging.error("%s is not a block device", self.dev_path) return False self.major = os.major(st.st_rdev) self.minor = os.minor(st.st_rdev) self.attached = True return True def _MapVolumeToBlockdev(self, unique_id): """Maps existing rbd volumes to block devices. This method should be idempotent if the mapping already exists. @rtype: string @return: the block device path that corresponds to the volume """ pool = self.params[constants.LDP_POOL] name = unique_id[1] # Check if the mapping already exists. rbd_dev = self._VolumeToBlockdev(pool, name) if rbd_dev: # The mapping exists. Return it. return rbd_dev # The mapping doesn't exist. Create it. map_cmd = [constants.RBD_CMD, "map", "-p", pool, name] result = utils.RunCmd(map_cmd) if result.failed: base.ThrowError("rbd map failed (%s): %s", result.fail_reason, result.output) # Find the corresponding rbd device. rbd_dev = self._VolumeToBlockdev(pool, name) if not rbd_dev: base.ThrowError("rbd map succeeded, but could not find the rbd block" " device in output of showmapped, for volume: %s", name) # The device was successfully mapped. Return it. return rbd_dev @classmethod def _VolumeToBlockdev(cls, pool, volume_name): """Do the 'volume name'-to-'rbd block device' resolving. @type pool: string @param pool: RADOS pool to use @type volume_name: string @param volume_name: the name of the volume whose device we search for @rtype: string or None @return: block device path if the volume is mapped, else None """ try: # Newer versions of the rbd tool support json output formatting. Use it # if available. showmap_cmd = [ constants.RBD_CMD, "showmapped", "-p", pool, "--format", "json" ] result = utils.RunCmd(showmap_cmd) if result.failed: logging.error("rbd JSON output formatting returned error (%s): %s," "falling back to plain output parsing", result.fail_reason, result.output) raise RbdShowmappedJsonError return cls._ParseRbdShowmappedJson(result.output, volume_name) except RbdShowmappedJsonError: # For older versions of rbd, we have to parse the plain / text output # manually. showmap_cmd = [constants.RBD_CMD, "showmapped", "-p", pool] result = utils.RunCmd(showmap_cmd) if result.failed: base.ThrowError("rbd showmapped failed (%s): %s", result.fail_reason, result.output) return cls._ParseRbdShowmappedPlain(result.output, volume_name) @staticmethod def _ParseRbdShowmappedJson(output, volume_name): """Parse the json output of `rbd showmapped'. This method parses the json output of `rbd showmapped' and returns the rbd block device path (e.g. /dev/rbd0) that matches the given rbd volume. @type output: string @param output: the json output of `rbd showmapped' @type volume_name: string @param volume_name: the name of the volume whose device we search for @rtype: string or None @return: block device path if the volume is mapped, else None """ try: devices = serializer.LoadJson(output) except ValueError, err: base.ThrowError("Unable to parse JSON data: %s" % err) rbd_dev = None for d in devices.values(): # pylint: disable=E1103 try: name = d["name"] except KeyError: base.ThrowError("'name' key missing from json object %s", devices) if name == volume_name: if rbd_dev is not None: base.ThrowError("rbd volume %s is mapped more than once", volume_name) rbd_dev = d["device"] return rbd_dev @staticmethod def _ParseRbdShowmappedPlain(output, volume_name): """Parse the (plain / text) output of `rbd showmapped'. This method parses the output of `rbd showmapped' and returns the rbd block device path (e.g. /dev/rbd0) that matches the given rbd volume. @type output: string @param output: the plain text output of `rbd showmapped' @type volume_name: string @param volume_name: the name of the volume whose device we search for @rtype: string or None @return: block device path if the volume is mapped, else None """ allfields = 5 volumefield = 2 devicefield = 4 lines = output.splitlines() # Try parsing the new output format (ceph >= 0.55). splitted_lines = map(lambda l: l.split(), lines) # Check for empty output. if not splitted_lines: return None # Check showmapped output, to determine number of fields. field_cnt = len(splitted_lines[0]) if field_cnt != allfields: # Parsing the new format failed. Fallback to parsing the old output # format (< 0.55). splitted_lines = map(lambda l: l.split("\t"), lines) if field_cnt != allfields: base.ThrowError("Cannot parse rbd showmapped output expected %s fields," " found %s", allfields, field_cnt) matched_lines = \ filter(lambda l: len(l) == allfields and l[volumefield] == volume_name, splitted_lines) if len(matched_lines) > 1: base.ThrowError("rbd volume %s mapped more than once", volume_name) if matched_lines: # rbd block device found. Return it. rbd_dev = matched_lines[0][devicefield] return rbd_dev # The given volume is not mapped. return None def Assemble(self): """Assemble the device. """ pass def Shutdown(self): """Shutdown the device. """ if not self.minor and not self.Attach(): # The rbd device doesn't exist. return # Unmap the block device from the Volume. self._UnmapVolumeFromBlockdev(self.unique_id) self.minor = None self.dev_path = None def _UnmapVolumeFromBlockdev(self, unique_id): """Unmaps the rbd device from the Volume it is mapped. Unmaps the rbd device from the Volume it was previously mapped to. This method should be idempotent if the Volume isn't mapped. """ pool = self.params[constants.LDP_POOL] name = unique_id[1] # Check if the mapping already exists. rbd_dev = self._VolumeToBlockdev(pool, name) if rbd_dev: # The mapping exists. Unmap the rbd device. unmap_cmd = [constants.RBD_CMD, "unmap", "%s" % rbd_dev] result = utils.RunCmd(unmap_cmd) if result.failed: base.ThrowError("rbd unmap failed (%s): %s", result.fail_reason, result.output) def Open(self, force=False): """Make the device ready for I/O. """ pass def Close(self): """Notifies that the device will no longer be used for I/O. """ pass def Grow(self, amount, dryrun, backingstore, excl_stor): """Grow the Volume. @type amount: integer @param amount: the amount (in mebibytes) to grow with @type dryrun: boolean @param dryrun: whether to execute the operation in simulation mode only, without actually increasing the size """ if not backingstore: return if not self.Attach(): base.ThrowError("Can't attach to rbd device during Grow()") if dryrun: # the rbd tool does not support dry runs of resize operations. # Since rbd volumes are thinly provisioned, we assume # there is always enough free space for the operation. return rbd_pool = self.params[constants.LDP_POOL] rbd_name = self.unique_id[1] new_size = self.size + amount # Resize the rbd volume (Image) inside the RADOS cluster. cmd = [constants.RBD_CMD, "resize", "-p", rbd_pool, rbd_name, "--size", "%s" % new_size] result = utils.RunCmd(cmd) if result.failed: base.ThrowError("rbd resize failed (%s): %s", result.fail_reason, result.output) class ExtStorageDevice(base.BlockDev): """A block device provided by an ExtStorage Provider. This class implements the External Storage Interface, which means handling of the externally provided block devices. """ def __init__(self, unique_id, children, size, params): """Attaches to an extstorage block device. """ super(ExtStorageDevice, self).__init__(unique_id, children, size, params) if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2: raise ValueError("Invalid configuration data %s" % str(unique_id)) self.driver, self.vol_name = unique_id self.ext_params = params self.major = self.minor = None self.Attach() @classmethod def Create(cls, unique_id, children, size, spindles, params, excl_stor): """Create a new extstorage device. Provision a new volume using an extstorage provider, which will then be mapped to a block device. """ if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2: raise errors.ProgrammerError("Invalid configuration data %s" % str(unique_id)) if excl_stor: raise errors.ProgrammerError("extstorage device requested with" " exclusive_storage") # Call the External Storage's create script, # to provision a new Volume inside the External Storage _ExtStorageAction(constants.ES_ACTION_CREATE, unique_id, params, str(size)) return ExtStorageDevice(unique_id, children, size, params) def Remove(self): """Remove the extstorage device. """ if not self.minor and not self.Attach(): # The extstorage device doesn't exist. return # First shutdown the device (remove mappings). self.Shutdown() # Call the External Storage's remove script, # to remove the Volume from the External Storage _ExtStorageAction(constants.ES_ACTION_REMOVE, self.unique_id, self.ext_params) def Rename(self, new_id): """Rename this device. """ pass def Attach(self): """Attach to an existing extstorage device. This method maps the extstorage volume that matches our name with a corresponding block device and then attaches to this device. """ self.attached = False # Call the External Storage's attach script, # to attach an existing Volume to a block device under /dev self.dev_path = _ExtStorageAction(constants.ES_ACTION_ATTACH, self.unique_id, self.ext_params) try: st = os.stat(self.dev_path) except OSError, err: logging.error("Error stat()'ing %s: %s", self.dev_path, str(err)) return False if not stat.S_ISBLK(st.st_mode): logging.error("%s is not a block device", self.dev_path) return False self.major = os.major(st.st_rdev) self.minor = os.minor(st.st_rdev) self.attached = True return True def Assemble(self): """Assemble the device. """ pass def Shutdown(self): """Shutdown the device. """ if not self.minor and not self.Attach(): # The extstorage device doesn't exist. return # Call the External Storage's detach script, # to detach an existing Volume from it's block device under /dev _ExtStorageAction(constants.ES_ACTION_DETACH, self.unique_id, self.ext_params) self.minor = None self.dev_path = None def Open(self, force=False): """Make the device ready for I/O. """ pass def Close(self): """Notifies that the device will no longer be used for I/O. """ pass def Grow(self, amount, dryrun, backingstore, excl_stor): """Grow the Volume. @type amount: integer @param amount: the amount (in mebibytes) to grow with @type dryrun: boolean @param dryrun: whether to execute the operation in simulation mode only, without actually increasing the size """ if not backingstore: return if not self.Attach(): base.ThrowError("Can't attach to extstorage device during Grow()") if dryrun: # we do not support dry runs of resize operations for now. return new_size = self.size + amount # Call the External Storage's grow script, # to grow an existing Volume inside the External Storage _ExtStorageAction(constants.ES_ACTION_GROW, self.unique_id, self.ext_params, str(self.size), grow=str(new_size)) def SetInfo(self, text): """Update metadata with info text. """ # Replace invalid characters text = re.sub("^[^A-Za-z0-9_+.]", "_", text) text = re.sub("[^-A-Za-z0-9_+.]", "_", text) # Only up to 128 characters are allowed text = text[:128] # Call the External Storage's setinfo script, # to set metadata for an existing Volume inside the External Storage _ExtStorageAction(constants.ES_ACTION_SETINFO, self.unique_id, self.ext_params, metadata=text) def _ExtStorageAction(action, unique_id, ext_params, size=None, grow=None, metadata=None): """Take an External Storage action. Take an External Storage action concerning or affecting a specific Volume inside the External Storage. @type action: string @param action: which action to perform. One of: create / remove / grow / attach / detach @type unique_id: tuple (driver, vol_name) @param unique_id: a tuple containing the type of ExtStorage (driver) and the Volume name @type ext_params: dict @param ext_params: ExtStorage parameters @type size: integer @param size: the size of the Volume in mebibytes @type grow: integer @param grow: the new size in mebibytes (after grow) @type metadata: string @param metadata: metadata info of the Volume, for use by the provider @rtype: None or a block device path (during attach) """ driver, vol_name = unique_id # Create an External Storage instance of type `driver' status, inst_es = ExtStorageFromDisk(driver) if not status: base.ThrowError("%s" % inst_es) # Create the basic environment for the driver's scripts create_env = _ExtStorageEnvironment(unique_id, ext_params, size, grow, metadata) # Do not use log file for action `attach' as we need # to get the output from RunResult # TODO: find a way to have a log file for attach too logfile = None if action is not constants.ES_ACTION_ATTACH: logfile = _VolumeLogName(action, driver, vol_name) # Make sure the given action results in a valid script if action not in constants.ES_SCRIPTS: base.ThrowError("Action '%s' doesn't result in a valid ExtStorage script" % action) # Find out which external script to run according the given action script_name = action + "_script" script = getattr(inst_es, script_name) # Run the external script result = utils.RunCmd([script], env=create_env, cwd=inst_es.path, output=logfile,) if result.failed: logging.error("External storage's %s command '%s' returned" " error: %s, logfile: %s, output: %s", action, result.cmd, result.fail_reason, logfile, result.output) # If logfile is 'None' (during attach), it breaks TailFile # TODO: have a log file for attach too if action is not constants.ES_ACTION_ATTACH: lines = [utils.SafeEncode(val) for val in utils.TailFile(logfile, lines=20)] else: lines = result.output[-20:] base.ThrowError("External storage's %s script failed (%s), last" " lines of output:\n%s", action, result.fail_reason, "\n".join(lines)) if action == constants.ES_ACTION_ATTACH: return result.stdout def ExtStorageFromDisk(name, base_dir=None): """Create an ExtStorage instance from disk. This function will return an ExtStorage instance if the given name is a valid ExtStorage name. @type base_dir: string @keyword base_dir: Base directory containing ExtStorage installations. Defaults to a search in all the ES_SEARCH_PATH dirs. @rtype: tuple @return: True and the ExtStorage instance if we find a valid one, or False and the diagnose message on error """ if base_dir is None: es_base_dir = pathutils.ES_SEARCH_PATH else: es_base_dir = [base_dir] es_dir = utils.FindFile(name, es_base_dir, os.path.isdir) if es_dir is None: return False, ("Directory for External Storage Provider %s not" " found in search path" % name) # ES Files dictionary, we will populate it with the absolute path # names; if the value is True, then it is a required file, otherwise # an optional one es_files = dict.fromkeys(constants.ES_SCRIPTS, True) es_files[constants.ES_PARAMETERS_FILE] = True for (filename, _) in es_files.items(): es_files[filename] = utils.PathJoin(es_dir, filename) try: st = os.stat(es_files[filename]) except EnvironmentError, err: return False, ("File '%s' under path '%s' is missing (%s)" % (filename, es_dir, utils.ErrnoOrStr(err))) if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): return False, ("File '%s' under path '%s' is not a regular file" % (filename, es_dir)) if filename in constants.ES_SCRIPTS: if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR: return False, ("File '%s' under path '%s' is not executable" % (filename, es_dir)) parameters = [] if constants.ES_PARAMETERS_FILE in es_files: parameters_file = es_files[constants.ES_PARAMETERS_FILE] try: parameters = utils.ReadFile(parameters_file).splitlines() except EnvironmentError, err: return False, ("Error while reading the EXT parameters file at %s: %s" % (parameters_file, utils.ErrnoOrStr(err))) parameters = [v.split(None, 1) for v in parameters] es_obj = \ objects.ExtStorage(name=name, path=es_dir, create_script=es_files[constants.ES_SCRIPT_CREATE], remove_script=es_files[constants.ES_SCRIPT_REMOVE], grow_script=es_files[constants.ES_SCRIPT_GROW], attach_script=es_files[constants.ES_SCRIPT_ATTACH], detach_script=es_files[constants.ES_SCRIPT_DETACH], setinfo_script=es_files[constants.ES_SCRIPT_SETINFO], verify_script=es_files[constants.ES_SCRIPT_VERIFY], supported_parameters=parameters) return True, es_obj def _ExtStorageEnvironment(unique_id, ext_params, size=None, grow=None, metadata=None): """Calculate the environment for an External Storage script. @type unique_id: tuple (driver, vol_name) @param unique_id: ExtStorage pool and name of the Volume @type ext_params: dict @param ext_params: the EXT parameters @type size: string @param size: size of the Volume (in mebibytes) @type grow: string @param grow: new size of Volume after grow (in mebibytes) @type metadata: string @param metadata: metadata info of the Volume @rtype: dict @return: dict of environment variables """ vol_name = unique_id[1] result = {} result["VOL_NAME"] = vol_name # EXT params for pname, pvalue in ext_params.items(): result["EXTP_%s" % pname.upper()] = str(pvalue) if size is not None: result["VOL_SIZE"] = size if grow is not None: result["VOL_NEW_SIZE"] = grow if metadata is not None: result["VOL_METADATA"] = metadata return result def _VolumeLogName(kind, es_name, volume): """Compute the ExtStorage log filename for a given Volume and operation. @type kind: string @param kind: the operation type (e.g. create, remove etc.) @type es_name: string @param es_name: the ExtStorage name @type volume: string @param volume: the name of the Volume inside the External Storage """ # Check if the extstorage log dir is a valid dir if not os.path.isdir(pathutils.LOG_ES_DIR): base.ThrowError("Cannot find log directory: %s", pathutils.LOG_ES_DIR) # TODO: Use tempfile.mkstemp to create unique filename basename = ("%s-%s-%s-%s.log" % (kind, es_name, volume, utils.TimestampForFilename())) return utils.PathJoin(pathutils.LOG_ES_DIR, basename) DEV_MAP = { constants.DT_PLAIN: LogicalVolume, constants.DT_DRBD8: drbd.DRBD8Dev, constants.DT_BLOCK: PersistentBlockDevice, constants.DT_RBD: RADOSBlockDevice, constants.DT_EXT: ExtStorageDevice, constants.DT_FILE: FileStorage, constants.DT_SHARED_FILE: FileStorage, } def _VerifyDiskType(dev_type): if dev_type not in DEV_MAP: raise errors.ProgrammerError("Invalid block device type '%s'" % dev_type) def _VerifyDiskParams(disk): """Verifies if all disk parameters are set. """ missing = set(constants.DISK_LD_DEFAULTS[disk.dev_type]) - set(disk.params) if missing: raise errors.ProgrammerError("Block device is missing disk parameters: %s" % missing) def FindDevice(disk, children): """Search for an existing, assembled device. This will succeed only if the device exists and is assembled, but it does not do any actions in order to activate the device. @type disk: L{objects.Disk} @param disk: the disk object to find @type children: list of L{bdev.BlockDev} @param children: the list of block devices that are children of the device represented by the disk parameter """ _VerifyDiskType(disk.dev_type) device = DEV_MAP[disk.dev_type](disk.physical_id, children, disk.size, disk.params) if not device.attached: return None return device def Assemble(disk, children): """Try to attach or assemble an existing device. This will attach to assemble the device, as needed, to bring it fully up. It must be safe to run on already-assembled devices. @type disk: L{objects.Disk} @param disk: the disk object to assemble @type children: list of L{bdev.BlockDev} @param children: the list of block devices that are children of the device represented by the disk parameter """ _VerifyDiskType(disk.dev_type) _VerifyDiskParams(disk) device = DEV_MAP[disk.dev_type](disk.physical_id, children, disk.size, disk.params) device.Assemble() return device def Create(disk, children, excl_stor): """Create a device. @type disk: L{objects.Disk} @param disk: the disk object to create @type children: list of L{bdev.BlockDev} @param children: the list of block devices that are children of the device represented by the disk parameter @type excl_stor: boolean @param excl_stor: Whether exclusive_storage is active @rtype: L{bdev.BlockDev} @return: the created device, or C{None} in case of an error """ _VerifyDiskType(disk.dev_type) _VerifyDiskParams(disk) device = DEV_MAP[disk.dev_type].Create(disk.physical_id, children, disk.size, disk.spindles, disk.params, excl_stor) return device ganeti-2.9.3/lib/storage/drbd_info.py0000644000000000000000000003444012271422343017511 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """DRBD information parsing utilities""" import errno import pyparsing as pyp import re from ganeti import constants from ganeti import utils from ganeti import errors from ganeti import compat from ganeti.storage import base class DRBD8Status(object): # pylint: disable=R0902 """A DRBD status representation class. Note that this class is meant to be used to parse one of the entries returned from L{DRBD8Info._JoinLinesPerMinor}. """ UNCONF_RE = re.compile(r"\s*[0-9]+:\s*cs:Unconfigured$") LINE_RE = re.compile(r"\s*[0-9]+:\s*cs:(\S+)\s+(?:st|ro):([^/]+)/(\S+)" r"\s+ds:([^/]+)/(\S+)\s+.*$") SYNC_RE = re.compile(r"^.*\ssync'ed:\s*([0-9.]+)%.*" # Due to a bug in drbd in the kernel, introduced in # commit 4b0715f096 (still unfixed as of 2011-08-22) r"(?:\s|M)" r"finish: ([0-9]+):([0-9]+):([0-9]+)\s.*$") CS_UNCONFIGURED = "Unconfigured" CS_STANDALONE = "StandAlone" CS_WFCONNECTION = "WFConnection" CS_WFREPORTPARAMS = "WFReportParams" CS_CONNECTED = "Connected" CS_STARTINGSYNCS = "StartingSyncS" CS_STARTINGSYNCT = "StartingSyncT" CS_WFBITMAPS = "WFBitMapS" CS_WFBITMAPT = "WFBitMapT" CS_WFSYNCUUID = "WFSyncUUID" CS_SYNCSOURCE = "SyncSource" CS_SYNCTARGET = "SyncTarget" CS_PAUSEDSYNCS = "PausedSyncS" CS_PAUSEDSYNCT = "PausedSyncT" CSET_SYNC = compat.UniqueFrozenset([ CS_WFREPORTPARAMS, CS_STARTINGSYNCS, CS_STARTINGSYNCT, CS_WFBITMAPS, CS_WFBITMAPT, CS_WFSYNCUUID, CS_SYNCSOURCE, CS_SYNCTARGET, CS_PAUSEDSYNCS, CS_PAUSEDSYNCT, ]) DS_DISKLESS = "Diskless" DS_ATTACHING = "Attaching" # transient state DS_FAILED = "Failed" # transient state, next: diskless DS_NEGOTIATING = "Negotiating" # transient state DS_INCONSISTENT = "Inconsistent" # while syncing or after creation DS_OUTDATED = "Outdated" DS_DUNKNOWN = "DUnknown" # shown for peer disk when not connected DS_CONSISTENT = "Consistent" DS_UPTODATE = "UpToDate" # normal state RO_PRIMARY = "Primary" RO_SECONDARY = "Secondary" RO_UNKNOWN = "Unknown" def __init__(self, procline): u = self.UNCONF_RE.match(procline) if u: self.cstatus = self.CS_UNCONFIGURED self.lrole = self.rrole = self.ldisk = self.rdisk = None else: m = self.LINE_RE.match(procline) if not m: raise errors.BlockDeviceError("Can't parse input data '%s'" % procline) self.cstatus = m.group(1) self.lrole = m.group(2) self.rrole = m.group(3) self.ldisk = m.group(4) self.rdisk = m.group(5) # end reading of data from the LINE_RE or UNCONF_RE self.is_standalone = self.cstatus == self.CS_STANDALONE self.is_wfconn = self.cstatus == self.CS_WFCONNECTION self.is_connected = self.cstatus == self.CS_CONNECTED self.is_unconfigured = self.cstatus == self.CS_UNCONFIGURED self.is_primary = self.lrole == self.RO_PRIMARY self.is_secondary = self.lrole == self.RO_SECONDARY self.peer_primary = self.rrole == self.RO_PRIMARY self.peer_secondary = self.rrole == self.RO_SECONDARY self.both_primary = self.is_primary and self.peer_primary self.both_secondary = self.is_secondary and self.peer_secondary self.is_diskless = self.ldisk == self.DS_DISKLESS self.is_disk_uptodate = self.ldisk == self.DS_UPTODATE self.peer_disk_uptodate = self.rdisk == self.DS_UPTODATE self.is_in_resync = self.cstatus in self.CSET_SYNC self.is_in_use = self.cstatus != self.CS_UNCONFIGURED m = self.SYNC_RE.match(procline) if m: self.sync_percent = float(m.group(1)) hours = int(m.group(2)) minutes = int(m.group(3)) seconds = int(m.group(4)) self.est_time = hours * 3600 + minutes * 60 + seconds else: # we have (in this if branch) no percent information, but if # we're resyncing we need to 'fake' a sync percent information, # as this is how cmdlib determines if it makes sense to wait for # resyncing or not if self.is_in_resync: self.sync_percent = 0 else: self.sync_percent = None self.est_time = None class DRBD8Info(object): """Represents information DRBD exports (usually via /proc/drbd). An instance of this class is created by one of the CreateFrom... methods. """ _VERSION_RE = re.compile(r"^version: (\d+)\.(\d+)\.(\d+)(?:\.(\d+))?" r" \(api:(\d+)/proto:(\d+)(?:-(\d+))?\)") _VALID_LINE_RE = re.compile("^ *([0-9]+): cs:([^ ]+).*$") def __init__(self, lines): self._version = self._ParseVersion(lines) self._minors, self._line_per_minor = self._JoinLinesPerMinor(lines) def GetVersion(self): """Return the DRBD version. This will return a dict with keys: - k_major - k_minor - k_point - k_fix (only on some drbd versions) - api - proto - proto2 (only on drbd > 8.2.X) """ return self._version def GetVersionString(self): """Return the DRBD version as a single string. """ version = self.GetVersion() retval = "%d.%d.%d" % \ (version["k_major"], version["k_minor"], version["k_point"]) if "k_fix" in version: retval += ".%s" % version["k_fix"] retval += " (api:%d/proto:%d" % (version["api"], version["proto"]) if "proto2" in version: retval += "-%s" % version["proto2"] retval += ")" return retval def GetMinors(self): """Return a list of minor for which information is available. This list is ordered in exactly the order which was found in the underlying data. """ return self._minors def HasMinorStatus(self, minor): return minor in self._line_per_minor def GetMinorStatus(self, minor): return DRBD8Status(self._line_per_minor[minor]) def _ParseVersion(self, lines): first_line = lines[0].strip() version = self._VERSION_RE.match(first_line) if not version: raise errors.BlockDeviceError("Can't parse DRBD version from '%s'" % first_line) values = version.groups() retval = { "k_major": int(values[0]), "k_minor": int(values[1]), "k_point": int(values[2]), "api": int(values[4]), "proto": int(values[5]), } if values[3] is not None: retval["k_fix"] = values[3] if values[6] is not None: retval["proto2"] = values[6] return retval def _JoinLinesPerMinor(self, lines): """Transform the raw lines into a dictionary based on the minor. @return: a dictionary of minor: joined lines from /proc/drbd for that minor """ minors = [] results = {} old_minor = old_line = None for line in lines: if not line: # completely empty lines, as can be returned by drbd8.0+ continue lresult = self._VALID_LINE_RE.match(line) if lresult is not None: if old_minor is not None: minors.append(old_minor) results[old_minor] = old_line old_minor = int(lresult.group(1)) old_line = line else: if old_minor is not None: old_line += " " + line.strip() # add last line if old_minor is not None: minors.append(old_minor) results[old_minor] = old_line return minors, results @staticmethod def CreateFromLines(lines): return DRBD8Info(lines) @staticmethod def CreateFromFile(filename=constants.DRBD_STATUS_FILE): try: lines = utils.ReadFile(filename).splitlines() except EnvironmentError, err: if err.errno == errno.ENOENT: base.ThrowError("The file %s cannot be opened, check if the module" " is loaded (%s)", filename, str(err)) else: base.ThrowError("Can't read the DRBD proc file %s: %s", filename, str(err)) if not lines: base.ThrowError("Can't read any data from %s", filename) return DRBD8Info.CreateFromLines(lines) class BaseShowInfo(object): """Base class for parsing the `drbdsetup show` output. Holds various common pyparsing expressions which are used by subclasses. Also provides caching of the constructed parser. """ _PARSE_SHOW = None # pyparsing setup _lbrace = pyp.Literal("{").suppress() _rbrace = pyp.Literal("}").suppress() _lbracket = pyp.Literal("[").suppress() _rbracket = pyp.Literal("]").suppress() _semi = pyp.Literal(";").suppress() _colon = pyp.Literal(":").suppress() # this also converts the value to an int _number = pyp.Word(pyp.nums).setParseAction(lambda s, l, t: int(t[0])) _comment = pyp.Literal("#") + pyp.Optional(pyp.restOfLine) _defa = pyp.Literal("_is_default").suppress() _dbl_quote = pyp.Literal('"').suppress() _keyword = pyp.Word(pyp.alphanums + "-") # value types _value = pyp.Word(pyp.alphanums + "_-/.:") _quoted = _dbl_quote + pyp.CharsNotIn('"') + _dbl_quote _ipv4_addr = (pyp.Optional(pyp.Literal("ipv4")).suppress() + pyp.Word(pyp.nums + ".") + _colon + _number) _ipv6_addr = (pyp.Optional(pyp.Literal("ipv6")).suppress() + pyp.Optional(_lbracket) + pyp.Word(pyp.hexnums + ":") + pyp.Optional(_rbracket) + _colon + _number) # meta device, extended syntax _meta_value = ((_value ^ _quoted) + _lbracket + _number + _rbracket) # device name, extended syntax _device_value = pyp.Literal("minor").suppress() + _number # a statement _stmt = (~_rbrace + _keyword + ~_lbrace + pyp.Optional(_ipv4_addr ^ _ipv6_addr ^ _value ^ _quoted ^ _meta_value ^ _device_value) + pyp.Optional(_defa) + _semi + pyp.Optional(pyp.restOfLine).suppress()) @classmethod def GetDevInfo(cls, show_data): """Parse details about a given DRBD minor. This returns, if available, the local backing device (as a path) and the local and remote (ip, port) information from a string containing the output of the `drbdsetup show` command as returned by DRBD8Dev._GetShowData. This will return a dict with keys: - local_dev - meta_dev - meta_index - local_addr - remote_addr """ if not show_data: return {} try: # run pyparse results = (cls._GetShowParser()).parseString(show_data) except pyp.ParseException, err: base.ThrowError("Can't parse drbdsetup show output: %s", str(err)) return cls._TransformParseResult(results) @classmethod def _TransformParseResult(cls, parse_result): raise NotImplementedError @classmethod def _GetShowParser(cls): """Return a parser for `drbd show` output. This will either create or return an already-created parser for the output of the command `drbd show`. """ if cls._PARSE_SHOW is None: cls._PARSE_SHOW = cls._ConstructShowParser() return cls._PARSE_SHOW @classmethod def _ConstructShowParser(cls): raise NotImplementedError class DRBD83ShowInfo(BaseShowInfo): @classmethod def _ConstructShowParser(cls): # an entire section section_name = pyp.Word(pyp.alphas + "_") section = section_name + \ cls._lbrace + \ pyp.ZeroOrMore(pyp.Group(cls._stmt)) + \ cls._rbrace bnf = pyp.ZeroOrMore(pyp.Group(section ^ cls._stmt)) bnf.ignore(cls._comment) return bnf @classmethod def _TransformParseResult(cls, parse_result): retval = {} for section in parse_result: sname = section[0] if sname == "_this_host": for lst in section[1:]: if lst[0] == "disk": retval["local_dev"] = lst[1] elif lst[0] == "meta-disk": retval["meta_dev"] = lst[1] retval["meta_index"] = lst[2] elif lst[0] == "address": retval["local_addr"] = tuple(lst[1:]) elif sname == "_remote_host": for lst in section[1:]: if lst[0] == "address": retval["remote_addr"] = tuple(lst[1:]) return retval class DRBD84ShowInfo(BaseShowInfo): @classmethod def _ConstructShowParser(cls): # an entire section (sections can be nested in DRBD 8.4, and there exist # sections like "volume 0") section_name = pyp.Word(pyp.alphas + "_") + \ pyp.Optional(pyp.Word(pyp.nums)).suppress() # skip volume idx section = pyp.Forward() # pylint: disable=W0106 section << (section_name + cls._lbrace + pyp.ZeroOrMore(pyp.Group(cls._stmt ^ section)) + cls._rbrace) resource_name = pyp.Word(pyp.alphanums + "_-.") resource = (pyp.Literal("resource") + resource_name).suppress() + \ cls._lbrace + \ pyp.ZeroOrMore(pyp.Group(section)) + \ cls._rbrace resource.ignore(cls._comment) return resource @classmethod def _TransformVolumeSection(cls, vol_content, retval): for entry in vol_content: if entry[0] == "disk" and len(entry) == 2 and \ isinstance(entry[1], basestring): retval["local_dev"] = entry[1] elif entry[0] == "meta-disk": if len(entry) > 1: retval["meta_dev"] = entry[1] if len(entry) > 2: retval["meta_index"] = entry[2] @classmethod def _TransformParseResult(cls, parse_result): retval = {} for section in parse_result: sname = section[0] if sname == "_this_host": for lst in section[1:]: if lst[0] == "address": retval["local_addr"] = tuple(lst[1:]) elif lst[0] == "volume": cls._TransformVolumeSection(lst[1:], retval) elif sname == "_remote_host": for lst in section[1:]: if lst[0] == "address": retval["remote_addr"] = tuple(lst[1:]) return retval ganeti-2.9.3/lib/storage/__init__.py0000644000000000000000000000143212267470014017320 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Block device abstraction """ ganeti-2.9.3/lib/storage/container.py0000644000000000000000000003237412271422343017551 0ustar00rootroot00000000000000# # # Copyright (C) 2009, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Storage container abstraction. """ # pylint: disable=W0232,R0201 # W0232, since we use these as singletons rather than object holding # data # R0201, for the same reason # TODO: FileStorage initialised with paths whereas the others not import logging from ganeti import errors from ganeti import constants from ganeti import utils def _ParseSize(value): return int(round(float(value), 0)) class _Base: """Base class for storage abstraction. """ def List(self, name, fields): """Returns a list of all entities within the storage unit. @type name: string or None @param name: Entity name or None for all @type fields: list @param fields: List with all requested result fields (order is preserved) """ raise NotImplementedError() def Modify(self, name, changes): # pylint: disable=W0613 """Modifies an entity within the storage unit. @type name: string @param name: Entity name @type changes: dict @param changes: New field values """ # Don't raise an error if no changes are requested if changes: raise errors.ProgrammerError("Unable to modify the following" "fields: %r" % (changes.keys(), )) def Execute(self, name, op): """Executes an operation on an entity within the storage unit. @type name: string @param name: Entity name @type op: string @param op: Operation name """ raise NotImplementedError() class FileStorage(_Base): # pylint: disable=W0223 """File storage unit. """ def __init__(self, paths): """Initializes this class. @type paths: list @param paths: List of file storage paths """ self._paths = paths def List(self, name, fields): """Returns a list of all entities within the storage unit. See L{_Base.List}. """ rows = [] if name is None: paths = self._paths else: paths = [name] for path in paths: rows.append(self._ListInner(path, fields)) return rows @staticmethod def _ListInner(path, fields): """Gathers requested information from directory. @type path: string @param path: Path to directory @type fields: list @param fields: Requested fields """ values = [] # Pre-calculate information in case it's requested more than once if constants.SF_USED in fields: dirsize = utils.CalculateDirectorySize(path) else: dirsize = None if constants.SF_FREE in fields or constants.SF_SIZE in fields: fsstats = utils.GetFilesystemStats(path) else: fsstats = None # Make sure to update constants.VALID_STORAGE_FIELDS when changing fields. for field_name in fields: if field_name == constants.SF_NAME: values.append(path) elif field_name == constants.SF_USED: values.append(dirsize) elif field_name == constants.SF_FREE: values.append(fsstats[1]) elif field_name == constants.SF_SIZE: values.append(fsstats[0]) elif field_name == constants.SF_ALLOCATABLE: values.append(True) else: raise errors.StorageError("Unknown field: %r" % field_name) return values class _LvmBase(_Base): # pylint: disable=W0223 """Base class for LVM storage containers. @cvar LIST_FIELDS: list of tuples consisting of three elements: SF_* constants, lvm command output fields (list), and conversion function or static value (for static value, the lvm output field can be an empty list) """ LIST_SEP = "|" LIST_COMMAND = None LIST_FIELDS = None def List(self, name, wanted_field_names): """Returns a list of all entities within the storage unit. See L{_Base.List}. """ # Get needed LVM fields lvm_fields = self._GetLvmFields(self.LIST_FIELDS, wanted_field_names) # Build LVM command cmd_args = self._BuildListCommand(self.LIST_COMMAND, self.LIST_SEP, lvm_fields, name) # Run LVM command cmd_result = self._RunListCommand(cmd_args) # Split and rearrange LVM command output return self._BuildList(self._SplitList(cmd_result, self.LIST_SEP, len(lvm_fields)), self.LIST_FIELDS, wanted_field_names, lvm_fields) @staticmethod def _GetLvmFields(fields_def, wanted_field_names): """Returns unique list of fields wanted from LVM command. @type fields_def: list @param fields_def: Field definitions @type wanted_field_names: list @param wanted_field_names: List of requested fields """ field_to_idx = dict([(field_name, idx) for (idx, (field_name, _, _)) in enumerate(fields_def)]) lvm_fields = [] for field_name in wanted_field_names: try: idx = field_to_idx[field_name] except IndexError: raise errors.StorageError("Unknown field: %r" % field_name) (_, lvm_names, _) = fields_def[idx] lvm_fields.extend(lvm_names) return utils.UniqueSequence(lvm_fields) @classmethod def _BuildList(cls, cmd_result, fields_def, wanted_field_names, lvm_fields): """Builds the final result list. @type cmd_result: iterable @param cmd_result: Iterable of LVM command output (iterable of lists) @type fields_def: list @param fields_def: Field definitions @type wanted_field_names: list @param wanted_field_names: List of requested fields @type lvm_fields: list @param lvm_fields: LVM fields """ lvm_name_to_idx = dict([(lvm_name, idx) for (idx, lvm_name) in enumerate(lvm_fields)]) field_to_idx = dict([(field_name, idx) for (idx, (field_name, _, _)) in enumerate(fields_def)]) data = [] for raw_data in cmd_result: row = [] for field_name in wanted_field_names: (_, lvm_names, mapper) = fields_def[field_to_idx[field_name]] values = [raw_data[lvm_name_to_idx[i]] for i in lvm_names] if callable(mapper): # we got a function, call it with all the declared fields val = mapper(*values) # pylint: disable=W0142 elif len(values) == 1: assert mapper is None, ("Invalid mapper value (neither callable" " nor None) for one-element fields") # we don't have a function, but we had a single field # declared, pass it unchanged val = values[0] else: # let's make sure there are no fields declared (cannot map > # 1 field without a function) assert not values, "LVM storage has multi-fields without a function" val = mapper row.append(val) data.append(row) return data @staticmethod def _BuildListCommand(cmd, sep, options, name): """Builds LVM command line. @type cmd: string @param cmd: Command name @type sep: string @param sep: Field separator character @type options: list of strings @param options: Wanted LVM fields @type name: name or None @param name: Name of requested entity """ args = [cmd, "--noheadings", "--units=m", "--nosuffix", "--separator", sep, "--options", ",".join(options)] if name is not None: args.append(name) return args @staticmethod def _RunListCommand(args): """Run LVM command. """ result = utils.RunCmd(args) if result.failed: raise errors.StorageError("Failed to run %r, command output: %s" % (args[0], result.output)) return result.stdout @staticmethod def _SplitList(data, sep, fieldcount): """Splits LVM command output into rows and fields. @type data: string @param data: LVM command output @type sep: string @param sep: Field separator character @type fieldcount: int @param fieldcount: Expected number of fields """ for line in data.splitlines(): fields = line.strip().split(sep) if len(fields) != fieldcount: logging.warning("Invalid line returned from lvm command: %s", line) continue yield fields def _LvmPvGetAllocatable(attr): """Determines whether LVM PV is allocatable. @rtype: bool """ if attr: return (attr[0] == "a") else: logging.warning("Invalid PV attribute: %r", attr) return False class LvmPvStorage(_LvmBase): # pylint: disable=W0223 """LVM Physical Volume storage unit. """ LIST_COMMAND = "pvs" # Make sure to update constants.VALID_STORAGE_FIELDS when changing field # definitions. LIST_FIELDS = [ (constants.SF_NAME, ["pv_name"], None), (constants.SF_SIZE, ["pv_size"], _ParseSize), (constants.SF_USED, ["pv_used"], _ParseSize), (constants.SF_FREE, ["pv_free"], _ParseSize), (constants.SF_ALLOCATABLE, ["pv_attr"], _LvmPvGetAllocatable), ] def _SetAllocatable(self, name, allocatable): """Sets the "allocatable" flag on a physical volume. @type name: string @param name: Physical volume name @type allocatable: bool @param allocatable: Whether to set the "allocatable" flag """ args = ["pvchange", "--allocatable"] if allocatable: args.append("y") else: args.append("n") args.append(name) result = utils.RunCmd(args) if result.failed: raise errors.StorageError("Failed to modify physical volume," " pvchange output: %s" % result.output) def Modify(self, name, changes): """Modifies flags on a physical volume. See L{_Base.Modify}. """ if constants.SF_ALLOCATABLE in changes: self._SetAllocatable(name, changes[constants.SF_ALLOCATABLE]) del changes[constants.SF_ALLOCATABLE] # Other changes will be handled (and maybe refused) by the base class. return _LvmBase.Modify(self, name, changes) class LvmVgStorage(_LvmBase): """LVM Volume Group storage unit. """ LIST_COMMAND = "vgs" VGREDUCE_COMMAND = "vgreduce" # Make sure to update constants.VALID_STORAGE_FIELDS when changing field # definitions. LIST_FIELDS = [ (constants.SF_NAME, ["vg_name"], None), (constants.SF_SIZE, ["vg_size"], _ParseSize), (constants.SF_FREE, ["vg_free"], _ParseSize), (constants.SF_USED, ["vg_size", "vg_free"], lambda x, y: _ParseSize(x) - _ParseSize(y)), (constants.SF_ALLOCATABLE, [], True), ] def _RemoveMissing(self, name, _runcmd_fn=utils.RunCmd): """Runs "vgreduce --removemissing" on a volume group. @type name: string @param name: Volume group name """ # Ignoring vgreduce exit code. Older versions exit with an error even tough # the VG is already consistent. This was fixed in later versions, but we # cannot depend on it. result = _runcmd_fn([self.VGREDUCE_COMMAND, "--removemissing", name]) # Keep output in case something went wrong vgreduce_output = result.output # work around newer LVM version if ("Wrote out consistent volume group" not in vgreduce_output or "vgreduce --removemissing --force" in vgreduce_output): # we need to re-run with --force result = _runcmd_fn([self.VGREDUCE_COMMAND, "--removemissing", "--force", name]) vgreduce_output += "\n" + result.output result = _runcmd_fn([self.LIST_COMMAND, "--noheadings", "--nosuffix", name]) # we also need to check the output if result.failed or "Couldn't find device with uuid" in result.output: raise errors.StorageError(("Volume group '%s' still not consistent," " 'vgreduce' output: %r," " 'vgs' output: %r") % (name, vgreduce_output, result.output)) def Execute(self, name, op): """Executes an operation on a virtual volume. See L{_Base.Execute}. """ if op == constants.SO_FIX_CONSISTENCY: return self._RemoveMissing(name) return _LvmBase.Execute(self, name, op) # Lookup table for storage types _STORAGE_TYPES = { constants.ST_FILE: FileStorage, constants.ST_LVM_PV: LvmPvStorage, constants.ST_LVM_VG: LvmVgStorage, } def GetStorageClass(name): """Returns the class for a storage type. @type name: string @param name: Storage type """ try: return _STORAGE_TYPES[name] except KeyError: raise errors.StorageError("Unknown storage type: %r" % name) def GetStorage(name, *args): """Factory function for storage methods. @type name: string @param name: Storage type """ return GetStorageClass(name)(*args) ganeti-2.9.3/lib/storage/drbd_cmdgen.py0000644000000000000000000003501112267470014020011 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """DRBD command generating classes""" import logging import shlex from ganeti import constants from ganeti import errors class BaseDRBDCmdGenerator(object): """Base class for DRBD command generators. This class defines the interface for the command generators and holds shared code. """ def __init__(self, version): self._version = version def GenShowCmd(self, minor): raise NotImplementedError def GenInitMetaCmd(self, minor, meta_dev): raise NotImplementedError def GenLocalInitCmds(self, minor, data_dev, meta_dev, size_mb, params): raise NotImplementedError def GenNetInitCmd(self, minor, family, lhost, lport, rhost, rport, protocol, dual_pri, hmac, secret, params): raise NotImplementedError def GenSyncParamsCmd(self, minor, params): raise NotImplementedError def GenPauseSyncCmd(self, minor): raise NotImplementedError def GenResumeSyncCmd(self, minor): raise NotImplementedError def GenPrimaryCmd(self, minor, force): raise NotImplementedError def GenSecondaryCmd(self, minor): raise NotImplementedError def GenDetachCmd(self, minor): raise NotImplementedError def GenDisconnectCmd(self, minor, family, lhost, lport, rhost, rport): raise NotImplementedError def GenDownCmd(self, minor): raise NotImplementedError def GenResizeCmd(self, minor, size_mb): raise NotImplementedError @staticmethod def _DevPath(minor): """Return the path to a drbd device for a given minor. """ return "/dev/drbd%d" % minor class DRBD83CmdGenerator(BaseDRBDCmdGenerator): """Generates drbdsetup commands suited for the DRBD <= 8.3 syntax. """ # command line options for barriers _DISABLE_DISK_OPTION = "--no-disk-barrier" # -a _DISABLE_DRAIN_OPTION = "--no-disk-drain" # -D _DISABLE_FLUSH_OPTION = "--no-disk-flushes" # -i _DISABLE_META_FLUSH_OPTION = "--no-md-flushes" # -m def __init__(self, version): super(DRBD83CmdGenerator, self).__init__(version) def GenShowCmd(self, minor): return ["drbdsetup", self._DevPath(minor), "show"] def GenInitMetaCmd(self, minor, meta_dev): return ["drbdmeta", "--force", self._DevPath(minor), "v08", meta_dev, "0", "create-md"] def GenLocalInitCmds(self, minor, data_dev, meta_dev, size_mb, params): args = ["drbdsetup", self._DevPath(minor), "disk", data_dev, meta_dev, "0", "-e", "detach", "--create-device"] if size_mb: args.extend(["-d", "%sm" % size_mb]) vmaj = self._version["k_major"] vmin = self._version["k_minor"] vrel = self._version["k_point"] barrier_args = \ self._ComputeDiskBarrierArgs(vmaj, vmin, vrel, params[constants.LDP_BARRIERS], params[constants.LDP_NO_META_FLUSH]) args.extend(barrier_args) if params[constants.LDP_DISK_CUSTOM]: args.extend(shlex.split(params[constants.LDP_DISK_CUSTOM])) return [args] def GenNetInitCmd(self, minor, family, lhost, lport, rhost, rport, protocol, dual_pri, hmac, secret, params): args = ["drbdsetup", self._DevPath(minor), "net", "%s:%s:%s" % (family, lhost, lport), "%s:%s:%s" % (family, rhost, rport), protocol, "-A", "discard-zero-changes", "-B", "consensus", "--create-device", ] if dual_pri: args.append("-m") if hmac and secret: args.extend(["-a", hmac, "-x", secret]) if params[constants.LDP_NET_CUSTOM]: args.extend(shlex.split(params[constants.LDP_NET_CUSTOM])) return args def GenSyncParamsCmd(self, minor, params): args = ["drbdsetup", self._DevPath(minor), "syncer"] if params[constants.LDP_DYNAMIC_RESYNC]: vmin = self._version["k_minor"] vrel = self._version["k_point"] # By definition we are using 8.x, so just check the rest of the version # number if vmin != 3 or vrel < 9: msg = ("The current DRBD version (8.%d.%d) does not support the " "dynamic resync speed controller" % (vmin, vrel)) logging.error(msg) return [msg] if params[constants.LDP_PLAN_AHEAD] == 0: msg = ("A value of 0 for c-plan-ahead disables the dynamic sync speed" " controller at DRBD level. If you want to disable it, please" " set the dynamic-resync disk parameter to False.") logging.error(msg) return [msg] # add the c-* parameters to args args.extend(["--c-plan-ahead", params[constants.LDP_PLAN_AHEAD], "--c-fill-target", params[constants.LDP_FILL_TARGET], "--c-delay-target", params[constants.LDP_DELAY_TARGET], "--c-max-rate", params[constants.LDP_MAX_RATE], "--c-min-rate", params[constants.LDP_MIN_RATE], ]) else: args.extend(["-r", "%d" % params[constants.LDP_RESYNC_RATE]]) args.append("--create-device") return args def GenPauseSyncCmd(self, minor): return ["drbdsetup", self._DevPath(minor), "pause-sync"] def GenResumeSyncCmd(self, minor): return ["drbdsetup", self._DevPath(minor), "resume-sync"] def GenPrimaryCmd(self, minor, force): cmd = ["drbdsetup", self._DevPath(minor), "primary"] if force: cmd.append("-o") return cmd def GenSecondaryCmd(self, minor): return ["drbdsetup", self._DevPath(minor), "secondary"] def GenDetachCmd(self, minor): return ["drbdsetup", self._DevPath(minor), "detach"] def GenDisconnectCmd(self, minor, family, lhost, lport, rhost, rport): return ["drbdsetup", self._DevPath(minor), "disconnect"] def GenDownCmd(self, minor): return ["drbdsetup", self._DevPath(minor), "down"] def GenResizeCmd(self, minor, size_mb): return ["drbdsetup", self._DevPath(minor), "resize", "-s", "%dm" % size_mb] @classmethod def _ComputeDiskBarrierArgs(cls, vmaj, vmin, vrel, disabled_barriers, disable_meta_flush): """Compute the DRBD command line parameters for disk barriers Returns a list of the disk barrier parameters as requested via the disabled_barriers and disable_meta_flush arguments, and according to the supported ones in the DRBD version vmaj.vmin.vrel If the desired option is unsupported, raises errors.BlockDeviceError. """ disabled_barriers_set = frozenset(disabled_barriers) if not disabled_barriers_set in constants.DRBD_VALID_BARRIER_OPT: raise errors.BlockDeviceError("%s is not a valid option set for DRBD" " barriers" % disabled_barriers) args = [] # The following code assumes DRBD 8.x, with x < 4 and x != 1 (DRBD 8.1.x # does not exist) if not vmaj == 8 and vmin in (0, 2, 3): raise errors.BlockDeviceError("Unsupported DRBD version: %d.%d.%d" % (vmaj, vmin, vrel)) def _AppendOrRaise(option, min_version): """Helper for DRBD options""" if min_version is not None and vrel >= min_version: args.append(option) else: raise errors.BlockDeviceError("Could not use the option %s as the" " DRBD version %d.%d.%d does not support" " it." % (option, vmaj, vmin, vrel)) # the minimum version for each feature is encoded via pairs of (minor # version -> x) where x is version in which support for the option was # introduced. meta_flush_supported = disk_flush_supported = { 0: 12, 2: 7, 3: 0, } disk_drain_supported = { 2: 7, 3: 0, } disk_barriers_supported = { 3: 0, } # meta flushes if disable_meta_flush: _AppendOrRaise(cls._DISABLE_META_FLUSH_OPTION, meta_flush_supported.get(vmin, None)) # disk flushes if constants.DRBD_B_DISK_FLUSH in disabled_barriers_set: _AppendOrRaise(cls._DISABLE_FLUSH_OPTION, disk_flush_supported.get(vmin, None)) # disk drain if constants.DRBD_B_DISK_DRAIN in disabled_barriers_set: _AppendOrRaise(cls._DISABLE_DRAIN_OPTION, disk_drain_supported.get(vmin, None)) # disk barriers if constants.DRBD_B_DISK_BARRIERS in disabled_barriers_set: _AppendOrRaise(cls._DISABLE_DISK_OPTION, disk_barriers_supported.get(vmin, None)) return args class DRBD84CmdGenerator(BaseDRBDCmdGenerator): """Generates drbdsetup commands suited for the DRBD >= 8.4 syntax. """ # command line options for barriers _DISABLE_DISK_OPTION = "--disk-barrier=no" _DISABLE_DRAIN_OPTION = "--disk-drain=no" _DISABLE_FLUSH_OPTION = "--disk-flushes=no" _DISABLE_META_FLUSH_OPTION = "--md-flushes=no" def __init__(self, version): super(DRBD84CmdGenerator, self).__init__(version) def GenShowCmd(self, minor): return ["drbdsetup", "show", minor] def GenInitMetaCmd(self, minor, meta_dev): return ["drbdmeta", "--force", self._DevPath(minor), "v08", meta_dev, "flex-external", "create-md"] def GenLocalInitCmds(self, minor, data_dev, meta_dev, size_mb, params): cmds = [] cmds.append(["drbdsetup", "new-resource", self._GetResource(minor)]) cmds.append(["drbdsetup", "new-minor", self._GetResource(minor), str(minor), "0"]) # We need to apply the activity log before attaching the disk else drbdsetup # will fail. cmds.append(["drbdmeta", self._DevPath(minor), "v08", meta_dev, "flex-external", "apply-al"]) attach_cmd = ["drbdsetup", "attach", minor, data_dev, meta_dev, "flexible", "--on-io-error=detach"] if size_mb: attach_cmd.extend(["--size", "%sm" % size_mb]) barrier_args = \ self._ComputeDiskBarrierArgs(params[constants.LDP_BARRIERS], params[constants.LDP_NO_META_FLUSH]) attach_cmd.extend(barrier_args) if params[constants.LDP_DISK_CUSTOM]: attach_cmd.extend(shlex.split(params[constants.LDP_DISK_CUSTOM])) cmds.append(attach_cmd) return cmds def GenNetInitCmd(self, minor, family, lhost, lport, rhost, rport, protocol, dual_pri, hmac, secret, params): args = ["drbdsetup", "connect", self._GetResource(minor), "%s:%s:%s" % (family, lhost, lport), "%s:%s:%s" % (family, rhost, rport), "--protocol", protocol, "--after-sb-0pri", "discard-zero-changes", "--after-sb-1pri", "consensus" ] if dual_pri: args.append("--allow-two-primaries") if hmac and secret: args.extend(["--cram-hmac-alg", hmac, "--shared-secret", secret]) if params[constants.LDP_NET_CUSTOM]: args.extend(shlex.split(params[constants.LDP_NET_CUSTOM])) return args def GenSyncParamsCmd(self, minor, params): args = ["drbdsetup", "disk-options", minor] if params[constants.LDP_DYNAMIC_RESYNC]: if params[constants.LDP_PLAN_AHEAD] == 0: msg = ("A value of 0 for c-plan-ahead disables the dynamic sync speed" " controller at DRBD level. If you want to disable it, please" " set the dynamic-resync disk parameter to False.") logging.error(msg) return [msg] # add the c-* parameters to args args.extend(["--c-plan-ahead", params[constants.LDP_PLAN_AHEAD], "--c-fill-target", params[constants.LDP_FILL_TARGET], "--c-delay-target", params[constants.LDP_DELAY_TARGET], "--c-max-rate", params[constants.LDP_MAX_RATE], "--c-min-rate", params[constants.LDP_MIN_RATE], ]) else: args.extend(["--resync-rate", "%d" % params[constants.LDP_RESYNC_RATE]]) return args def GenPauseSyncCmd(self, minor): return ["drbdsetup", "pause-sync", minor] def GenResumeSyncCmd(self, minor): return ["drbdsetup", "resume-sync", minor] def GenPrimaryCmd(self, minor, force): cmd = ["drbdsetup", "primary", minor] if force: cmd.append("--force") return cmd def GenSecondaryCmd(self, minor): return ["drbdsetup", "secondary", minor] def GenDetachCmd(self, minor): return ["drbdsetup", "detach", minor] def GenDisconnectCmd(self, minor, family, lhost, lport, rhost, rport): return ["drbdsetup", "disconnect", "%s:%s:%s" % (family, lhost, lport), "%s:%s:%s" % (family, rhost, rport)] def GenDownCmd(self, minor): return ["drbdsetup", "down", self._GetResource(minor)] def GenResizeCmd(self, minor, size_mb): return ["drbdsetup", "resize", minor, "--size", "%dm" % size_mb] @staticmethod def _GetResource(minor): """Return the resource name for a given minor. Currently we don't support DRBD volumes which share a resource, so we generate the resource name based on the minor the resulting volumes is assigned to. """ return "resource%d" % minor @classmethod def _ComputeDiskBarrierArgs(cls, disabled_barriers, disable_meta_flush): """Compute the DRBD command line parameters for disk barriers """ disabled_barriers_set = frozenset(disabled_barriers) if not disabled_barriers_set in constants.DRBD_VALID_BARRIER_OPT: raise errors.BlockDeviceError("%s is not a valid option set for DRBD" " barriers" % disabled_barriers) args = [] # meta flushes if disable_meta_flush: args.append(cls._DISABLE_META_FLUSH_OPTION) # disk flushes if constants.DRBD_B_DISK_FLUSH in disabled_barriers_set: args.append(cls._DISABLE_FLUSH_OPTION) # disk drain if constants.DRBD_B_DISK_DRAIN in disabled_barriers_set: args.append(cls._DISABLE_DRAIN_OPTION) # disk barriers if constants.DRBD_B_DISK_BARRIERS in disabled_barriers_set: args.append(cls._DISABLE_DISK_OPTION) return args ganeti-2.9.3/lib/storage/filestorage.py0000644000000000000000000001460412271422343020067 0ustar00rootroot00000000000000# # # Copyright (C) 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """File storage functions. """ import logging import os from ganeti import compat from ganeti import constants from ganeti import errors from ganeti import pathutils from ganeti import utils def GetFileStorageSpaceInfo(path): """Retrieves the free and total space of the device where the file is located. @type path: string @param path: Path of the file whose embracing device's capacity is reported. @return: a dictionary containing 'vg_size' and 'vg_free' given in MebiBytes """ try: result = os.statvfs(path) free = (result.f_frsize * result.f_bavail) / (1024 * 1024) size = (result.f_frsize * result.f_blocks) / (1024 * 1024) return {"type": constants.ST_FILE, "name": path, "storage_size": size, "storage_free": free} except OSError, e: raise errors.CommandError("Failed to retrieve file system information about" " path: %s - %s" % (path, e.strerror)) def _GetForbiddenFileStoragePaths(): """Builds a list of path prefixes which shouldn't be used for file storage. @rtype: frozenset """ paths = set([ "/boot", "/dev", "/etc", "/home", "/proc", "/root", "/sys", ]) for prefix in ["", "/usr", "/usr/local"]: paths.update(map(lambda s: "%s/%s" % (prefix, s), ["bin", "lib", "lib32", "lib64", "sbin"])) return compat.UniqueFrozenset(map(os.path.normpath, paths)) def _ComputeWrongFileStoragePaths(paths, _forbidden=_GetForbiddenFileStoragePaths()): """Cross-checks a list of paths for prefixes considered bad. Some paths, e.g. "/bin", should not be used for file storage. @type paths: list @param paths: List of paths to be checked @rtype: list @return: Sorted list of paths for which the user should be warned """ def _Check(path): return (not os.path.isabs(path) or path in _forbidden or filter(lambda p: utils.IsBelowDir(p, path), _forbidden)) return utils.NiceSort(filter(_Check, map(os.path.normpath, paths))) def ComputeWrongFileStoragePaths(_filename=pathutils.FILE_STORAGE_PATHS_FILE): """Returns a list of file storage paths whose prefix is considered bad. See L{_ComputeWrongFileStoragePaths}. """ return _ComputeWrongFileStoragePaths(_LoadAllowedFileStoragePaths(_filename)) def _CheckFileStoragePath(path, allowed, exact_match_ok=False): """Checks if a path is in a list of allowed paths for file storage. @type path: string @param path: Path to check @type allowed: list @param allowed: List of allowed paths @type exact_match_ok: bool @param exact_match_ok: whether or not it is okay when the path is exactly equal to an allowed path and not a subdir of it @raise errors.FileStoragePathError: If the path is not allowed """ if not os.path.isabs(path): raise errors.FileStoragePathError("File storage path must be absolute," " got '%s'" % path) for i in allowed: if not os.path.isabs(i): logging.info("Ignoring relative path '%s' for file storage", i) continue if exact_match_ok: if os.path.normpath(i) == os.path.normpath(path): break if utils.IsBelowDir(i, path): break else: raise errors.FileStoragePathError("Path '%s' is not acceptable for file" " storage" % path) def _LoadAllowedFileStoragePaths(filename): """Loads file containing allowed file storage paths. @rtype: list @return: List of allowed paths (can be an empty list) """ try: contents = utils.ReadFile(filename) except EnvironmentError: return [] else: return utils.FilterEmptyLinesAndComments(contents) def CheckFileStoragePathAcceptance( path, _filename=pathutils.FILE_STORAGE_PATHS_FILE, exact_match_ok=False): """Checks if a path is allowed for file storage. @type path: string @param path: Path to check @raise errors.FileStoragePathError: If the path is not allowed """ allowed = _LoadAllowedFileStoragePaths(_filename) if not allowed: raise errors.FileStoragePathError("No paths are valid or path file '%s'" " was not accessible." % _filename) if _ComputeWrongFileStoragePaths([path]): raise errors.FileStoragePathError("Path '%s' uses a forbidden prefix" % path) _CheckFileStoragePath(path, allowed, exact_match_ok=exact_match_ok) def _CheckFileStoragePathExistance(path): """Checks whether the given path is usable on the file system. This checks wether the path is existing, a directory and writable. @type path: string @param path: path to check """ if not os.path.isdir(path): raise errors.FileStoragePathError("Path '%s' is not existing or not a" " directory." % path) if not os.access(path, os.W_OK): raise errors.FileStoragePathError("Path '%s' is not writable" % path) def CheckFileStoragePath( path, _allowed_paths_file=pathutils.FILE_STORAGE_PATHS_FILE): """Checks whether the path exists and is acceptable to use. Can be used for any file-based storage, for example shared-file storage. @type path: string @param path: path to check @rtype: string @returns: error message if the path is not ready to use """ try: CheckFileStoragePathAcceptance(path, _filename=_allowed_paths_file, exact_match_ok=True) except errors.FileStoragePathError as e: return str(e) if not os.path.isdir(path): return "Path '%s' is not exisiting or not a directory." % path if not os.access(path, os.W_OK): return "Path '%s' is not writable" % path ganeti-2.9.3/lib/pathutils.py0000644000000000000000000001334112271422343016131 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module containing constants and functions for filesystem paths. """ from ganeti import _autoconf from ganeti import compat from ganeti import vcluster # Build-time constants DEFAULT_FILE_STORAGE_DIR = "/srv/ganeti/file-storage" DEFAULT_FILE_STORAGE_DIR = vcluster.AddNodePrefix(DEFAULT_FILE_STORAGE_DIR) DEFAULT_SHARED_FILE_STORAGE_DIR = "/srv/ganeti/shared-file-storage" DEFAULT_SHARED_FILE_STORAGE_DIR = \ vcluster.AddNodePrefix(DEFAULT_SHARED_FILE_STORAGE_DIR) EXPORT_DIR = vcluster.AddNodePrefix(_autoconf.EXPORT_DIR) OS_SEARCH_PATH = _autoconf.OS_SEARCH_PATH ES_SEARCH_PATH = _autoconf.ES_SEARCH_PATH SSH_CONFIG_DIR = _autoconf.SSH_CONFIG_DIR XEN_CONFIG_DIR = vcluster.AddNodePrefix(_autoconf.XEN_CONFIG_DIR) SYSCONFDIR = vcluster.AddNodePrefix(_autoconf.SYSCONFDIR) TOOLSDIR = _autoconf.TOOLSDIR LOCALSTATEDIR = vcluster.AddNodePrefix(_autoconf.LOCALSTATEDIR) # Paths which don't change for a virtual cluster DAEMON_UTIL = _autoconf.PKGLIBDIR + "/daemon-util" IMPORT_EXPORT_DAEMON = _autoconf.PKGLIBDIR + "/import-export" KVM_CONSOLE_WRAPPER = _autoconf.PKGLIBDIR + "/tools/kvm-console-wrapper" KVM_IFUP = _autoconf.PKGLIBDIR + "/kvm-ifup" PREPARE_NODE_JOIN = _autoconf.PKGLIBDIR + "/prepare-node-join" NODE_DAEMON_SETUP = _autoconf.PKGLIBDIR + "/node-daemon-setup" XEN_CONSOLE_WRAPPER = _autoconf.PKGLIBDIR + "/tools/xen-console-wrapper" ETC_HOSTS = vcluster.ETC_HOSTS # Top-level paths DATA_DIR = LOCALSTATEDIR + "/lib/ganeti" LOCK_DIR = LOCALSTATEDIR + "/lock" LOG_DIR = LOCALSTATEDIR + "/log/ganeti" RUN_DIR = LOCALSTATEDIR + "/run/ganeti" #: Script to configure master IP address DEFAULT_MASTER_SETUP_SCRIPT = TOOLSDIR + "/master-ip-setup" SSH_HOST_DSA_PRIV = SSH_CONFIG_DIR + "/ssh_host_dsa_key" SSH_HOST_DSA_PUB = SSH_HOST_DSA_PRIV + ".pub" SSH_HOST_RSA_PRIV = SSH_CONFIG_DIR + "/ssh_host_rsa_key" SSH_HOST_RSA_PUB = SSH_HOST_RSA_PRIV + ".pub" BDEV_CACHE_DIR = RUN_DIR + "/bdev-cache" DISK_LINKS_DIR = RUN_DIR + "/instance-disks" SOCKET_DIR = RUN_DIR + "/socket" CRYPTO_KEYS_DIR = RUN_DIR + "/crypto" IMPORT_EXPORT_DIR = RUN_DIR + "/import-export" INSTANCE_STATUS_FILE = RUN_DIR + "/instance-status" INSTANCE_REASON_DIR = RUN_DIR + "/instance-reason" #: User-id pool lock directory (used user IDs have a corresponding lock file in #: this directory) UIDPOOL_LOCKDIR = RUN_DIR + "/uid-pool" SSCONF_LOCK_FILE = LOCK_DIR + "/ganeti-ssconf.lock" CLUSTER_CONF_FILE = DATA_DIR + "/config.data" RAPI_CERT_FILE = DATA_DIR + "/rapi.pem" CONFD_HMAC_KEY = DATA_DIR + "/hmac.key" SPICE_CERT_FILE = DATA_DIR + "/spice.pem" SPICE_CACERT_FILE = DATA_DIR + "/spice-ca.pem" CLUSTER_DOMAIN_SECRET_FILE = DATA_DIR + "/cluster-domain-secret" SSH_KNOWN_HOSTS_FILE = DATA_DIR + "/known_hosts" RAPI_USERS_FILE = DATA_DIR + "/rapi/users" QUEUE_DIR = DATA_DIR + "/queue" CONF_DIR = SYSCONFDIR + "/ganeti" USER_SCRIPTS_DIR = CONF_DIR + "/scripts" VNC_PASSWORD_FILE = CONF_DIR + "/vnc-cluster-password" HOOKS_BASE_DIR = CONF_DIR + "/hooks" FILE_STORAGE_PATHS_FILE = CONF_DIR + "/file-storage-paths" RESTRICTED_COMMANDS_DIR = CONF_DIR + "/restricted-commands" #: Node daemon certificate path NODED_CERT_FILE = DATA_DIR + "/server.pem" #: Node daemon certificate file permissions NODED_CERT_MODE = 0440 #: Locked in exclusive mode while noded verifies a remote command RESTRICTED_COMMANDS_LOCK_FILE = LOCK_DIR + "/ganeti-restricted-commands.lock" #: Lock file for watcher, locked in shared mode by watcher; lock in exclusive # mode to block watcher (see L{cli._RunWhileClusterStoppedHelper.Call} WATCHER_LOCK_FILE = LOCK_DIR + "/ganeti-watcher.lock" #: Status file for per-group watcher, locked in exclusive mode by watcher WATCHER_GROUP_STATE_FILE = DATA_DIR + "/watcher.%s.data" #: File for per-group instance status, merged into L{INSTANCE_STATUS_FILE} by #: per-group processes WATCHER_GROUP_INSTANCE_STATUS_FILE = DATA_DIR + "/watcher.%s.instance-status" #: File containing Unix timestamp until which watcher should be paused WATCHER_PAUSEFILE = DATA_DIR + "/watcher.pause" #: User-provided master IP setup script EXTERNAL_MASTER_SETUP_SCRIPT = USER_SCRIPTS_DIR + "/master-ip-setup" #: LUXI socket used for job execution MASTER_SOCKET = SOCKET_DIR + "/ganeti-master" #: LUXI socket used for queries only QUERY_SOCKET = SOCKET_DIR + "/ganeti-query" LOG_OS_DIR = LOG_DIR + "/os" LOG_ES_DIR = LOG_DIR + "/extstorage" #: Directory for storing Xen config files after failed instance starts LOG_XEN_DIR = LOG_DIR + "/xen" # Job queue paths JOB_QUEUE_LOCK_FILE = QUEUE_DIR + "/lock" JOB_QUEUE_VERSION_FILE = QUEUE_DIR + "/version" JOB_QUEUE_SERIAL_FILE = QUEUE_DIR + "/serial" JOB_QUEUE_ARCHIVE_DIR = QUEUE_DIR + "/archive" JOB_QUEUE_DRAIN_FILE = QUEUE_DIR + "/drain" ALL_CERT_FILES = compat.UniqueFrozenset([ NODED_CERT_FILE, RAPI_CERT_FILE, SPICE_CERT_FILE, SPICE_CACERT_FILE, ]) def GetLogFilename(daemon_name): """Returns the full path for a daemon's log file. """ return "%s/%s.log" % (LOG_DIR, daemon_name) LOG_WATCHER = GetLogFilename("watcher") LOG_COMMANDS = GetLogFilename("commands") LOG_BURNIN = GetLogFilename("burnin") ganeti-2.9.3/lib/locking.py0000644000000000000000000017163612271422343015556 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module implementing the Ganeti locking code.""" # pylint: disable=W0212 # W0212 since e.g. LockSet methods use (a lot) the internals of # SharedLock import os import select import threading import errno import weakref import logging import heapq import itertools import time from ganeti import errors from ganeti import utils from ganeti import compat from ganeti import query _EXCLUSIVE_TEXT = "exclusive" _SHARED_TEXT = "shared" _DELETED_TEXT = "deleted" _DEFAULT_PRIORITY = 0 #: Minimum timeout required to consider scheduling a pending acquisition #: (seconds) _LOCK_ACQUIRE_MIN_TIMEOUT = (1.0 / 1000) # Internal lock acquisition modes for L{LockSet} (_LS_ACQUIRE_EXACT, _LS_ACQUIRE_ALL, _LS_ACQUIRE_OPPORTUNISTIC) = range(1, 4) _LS_ACQUIRE_MODES = compat.UniqueFrozenset([ _LS_ACQUIRE_EXACT, _LS_ACQUIRE_ALL, _LS_ACQUIRE_OPPORTUNISTIC, ]) def ssynchronized(mylock, shared=0): """Shared Synchronization decorator. Calls the function holding the given lock, either in exclusive or shared mode. It requires the passed lock to be a SharedLock (or support its semantics). @type mylock: lockable object or string @param mylock: lock to acquire or class member name of the lock to acquire """ def wrap(fn): def sync_function(*args, **kwargs): if isinstance(mylock, basestring): assert args, "cannot ssynchronize on non-class method: self not found" # args[0] is "self" lock = getattr(args[0], mylock) else: lock = mylock lock.acquire(shared=shared) try: return fn(*args, **kwargs) finally: lock.release() return sync_function return wrap class _SingleNotifyPipeConditionWaiter(object): """Helper class for SingleNotifyPipeCondition """ __slots__ = [ "_fd", ] def __init__(self, fd): """Constructor for _SingleNotifyPipeConditionWaiter @type fd: int @param fd: File descriptor to wait for """ object.__init__(self) self._fd = fd def __call__(self, timeout): """Wait for something to happen on the pipe. @type timeout: float or None @param timeout: Timeout for waiting (can be None) """ running_timeout = utils.RunningTimeout(timeout, True) poller = select.poll() poller.register(self._fd, select.POLLHUP) while True: remaining_time = running_timeout.Remaining() if remaining_time is not None: if remaining_time < 0.0: break # Our calculation uses seconds, poll() wants milliseconds remaining_time *= 1000 try: result = poller.poll(remaining_time) except EnvironmentError, err: if err.errno != errno.EINTR: raise result = None # Check whether we were notified if result and result[0][0] == self._fd: break class _BaseCondition(object): """Base class containing common code for conditions. Some of this code is taken from python's threading module. """ __slots__ = [ "_lock", "acquire", "release", "_is_owned", "_acquire_restore", "_release_save", ] def __init__(self, lock): """Constructor for _BaseCondition. @type lock: threading.Lock @param lock: condition base lock """ object.__init__(self) try: self._release_save = lock._release_save except AttributeError: self._release_save = self._base_release_save try: self._acquire_restore = lock._acquire_restore except AttributeError: self._acquire_restore = self._base_acquire_restore try: self._is_owned = lock.is_owned except AttributeError: self._is_owned = self._base_is_owned self._lock = lock # Export the lock's acquire() and release() methods self.acquire = lock.acquire self.release = lock.release def _base_is_owned(self): """Check whether lock is owned by current thread. """ if self._lock.acquire(0): self._lock.release() return False return True def _base_release_save(self): self._lock.release() def _base_acquire_restore(self, _): self._lock.acquire() def _check_owned(self): """Raise an exception if the current thread doesn't own the lock. """ if not self._is_owned(): raise RuntimeError("cannot work with un-aquired lock") class SingleNotifyPipeCondition(_BaseCondition): """Condition which can only be notified once. This condition class uses pipes and poll, internally, to be able to wait for notification with a timeout, without resorting to polling. It is almost compatible with Python's threading.Condition, with the following differences: - notifyAll can only be called once, and no wait can happen after that - notify is not supported, only notifyAll """ __slots__ = [ "_read_fd", "_write_fd", "_nwaiters", "_notified", ] _waiter_class = _SingleNotifyPipeConditionWaiter def __init__(self, lock): """Constructor for SingleNotifyPipeCondition """ _BaseCondition.__init__(self, lock) self._nwaiters = 0 self._notified = False self._read_fd = None self._write_fd = None def _check_unnotified(self): """Throws an exception if already notified. """ if self._notified: raise RuntimeError("cannot use already notified condition") def _Cleanup(self): """Cleanup open file descriptors, if any. """ if self._read_fd is not None: os.close(self._read_fd) self._read_fd = None if self._write_fd is not None: os.close(self._write_fd) self._write_fd = None def wait(self, timeout): """Wait for a notification. @type timeout: float or None @param timeout: Waiting timeout (can be None) """ self._check_owned() self._check_unnotified() self._nwaiters += 1 try: if self._read_fd is None: (self._read_fd, self._write_fd) = os.pipe() wait_fn = self._waiter_class(self._read_fd) state = self._release_save() try: # Wait for notification wait_fn(timeout) finally: # Re-acquire lock self._acquire_restore(state) finally: self._nwaiters -= 1 if self._nwaiters == 0: self._Cleanup() def notifyAll(self): # pylint: disable=C0103 """Close the writing side of the pipe to notify all waiters. """ self._check_owned() self._check_unnotified() self._notified = True if self._write_fd is not None: os.close(self._write_fd) self._write_fd = None class PipeCondition(_BaseCondition): """Group-only non-polling condition with counters. This condition class uses pipes and poll, internally, to be able to wait for notification with a timeout, without resorting to polling. It is almost compatible with Python's threading.Condition, but only supports notifyAll and non-recursive locks. As an additional features it's able to report whether there are any waiting threads. """ __slots__ = [ "_waiters", "_single_condition", ] _single_condition_class = SingleNotifyPipeCondition def __init__(self, lock): """Initializes this class. """ _BaseCondition.__init__(self, lock) self._waiters = set() self._single_condition = self._single_condition_class(self._lock) def wait(self, timeout): """Wait for a notification. @type timeout: float or None @param timeout: Waiting timeout (can be None) """ self._check_owned() # Keep local reference to the pipe. It could be replaced by another thread # notifying while we're waiting. cond = self._single_condition self._waiters.add(threading.currentThread()) try: cond.wait(timeout) finally: self._check_owned() self._waiters.remove(threading.currentThread()) def notifyAll(self): # pylint: disable=C0103 """Notify all currently waiting threads. """ self._check_owned() self._single_condition.notifyAll() self._single_condition = self._single_condition_class(self._lock) def get_waiting(self): """Returns a list of all waiting threads. """ self._check_owned() return self._waiters def has_waiting(self): """Returns whether there are active waiters. """ self._check_owned() return bool(self._waiters) def __repr__(self): return ("<%s.%s waiters=%s at %#x>" % (self.__class__.__module__, self.__class__.__name__, self._waiters, id(self))) class _PipeConditionWithMode(PipeCondition): __slots__ = [ "shared", ] def __init__(self, lock, shared): """Initializes this class. """ self.shared = shared PipeCondition.__init__(self, lock) class SharedLock(object): """Implements a shared lock. Multiple threads can acquire the lock in a shared way by calling C{acquire(shared=1)}. In order to acquire the lock in an exclusive way threads can call C{acquire(shared=0)}. Notes on data structures: C{__pending} contains a priority queue (heapq) of all pending acquires: C{[(priority1: prioqueue1), (priority2: prioqueue2), ...]}. Each per-priority queue contains a normal in-order list of conditions to be notified when the lock can be acquired. Shared locks are grouped together by priority and the condition for them is stored in C{__pending_shared} if it already exists. C{__pending_by_prio} keeps references for the per-priority queues indexed by priority for faster access. @type name: string @ivar name: the name of the lock """ __slots__ = [ "__weakref__", "__deleted", "__exc", "__lock", "__pending", "__pending_by_prio", "__pending_shared", "__shr", "__time_fn", "name", ] __condition_class = _PipeConditionWithMode def __init__(self, name, monitor=None, _time_fn=time.time): """Construct a new SharedLock. @param name: the name of the lock @type monitor: L{LockMonitor} @param monitor: Lock monitor with which to register """ object.__init__(self) self.name = name # Used for unittesting self.__time_fn = _time_fn # Internal lock self.__lock = threading.Lock() # Queue containing waiting acquires self.__pending = [] self.__pending_by_prio = {} self.__pending_shared = {} # Current lock holders self.__shr = set() self.__exc = None # is this lock in the deleted state? self.__deleted = False # Register with lock monitor if monitor: logging.debug("Adding lock %s to monitor", name) monitor.RegisterLock(self) def __repr__(self): return ("<%s.%s name=%s at %#x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))) def GetLockInfo(self, requested): """Retrieves information for querying locks. @type requested: set @param requested: Requested information, see C{query.LQ_*} """ self.__lock.acquire() try: # Note: to avoid unintentional race conditions, no references to # modifiable objects should be returned unless they were created in this # function. mode = None owner_names = None if query.LQ_MODE in requested: if self.__deleted: mode = _DELETED_TEXT assert not (self.__exc or self.__shr) elif self.__exc: mode = _EXCLUSIVE_TEXT elif self.__shr: mode = _SHARED_TEXT # Current owner(s) are wanted if query.LQ_OWNER in requested: if self.__exc: owner = [self.__exc] else: owner = self.__shr if owner: assert not self.__deleted owner_names = [i.getName() for i in owner] # Pending acquires are wanted if query.LQ_PENDING in requested: pending = [] # Sorting instead of copying and using heaq functions for simplicity for (_, prioqueue) in sorted(self.__pending): for cond in prioqueue: if cond.shared: pendmode = _SHARED_TEXT else: pendmode = _EXCLUSIVE_TEXT # List of names will be sorted in L{query._GetLockPending} pending.append((pendmode, [i.getName() for i in cond.get_waiting()])) else: pending = None return [(self.name, mode, owner_names, pending)] finally: self.__lock.release() def __check_deleted(self): """Raises an exception if the lock has been deleted. """ if self.__deleted: raise errors.LockError("Deleted lock %s" % self.name) def __is_sharer(self): """Is the current thread sharing the lock at this time? """ return threading.currentThread() in self.__shr def __is_exclusive(self): """Is the current thread holding the lock exclusively at this time? """ return threading.currentThread() == self.__exc def __is_owned(self, shared=-1): """Is the current thread somehow owning the lock at this time? This is a private version of the function, which presumes you're holding the internal lock. """ if shared < 0: return self.__is_sharer() or self.__is_exclusive() elif shared: return self.__is_sharer() else: return self.__is_exclusive() def is_owned(self, shared=-1): """Is the current thread somehow owning the lock at this time? @param shared: - < 0: check for any type of ownership (default) - 0: check for exclusive ownership - > 0: check for shared ownership """ self.__lock.acquire() try: return self.__is_owned(shared=shared) finally: self.__lock.release() #: Necessary to remain compatible with threading.Condition, which tries to #: retrieve a locks' "_is_owned" attribute _is_owned = is_owned def _count_pending(self): """Returns the number of pending acquires. @rtype: int """ self.__lock.acquire() try: return sum(len(prioqueue) for (_, prioqueue) in self.__pending) finally: self.__lock.release() def _check_empty(self): """Checks whether there are any pending acquires. @rtype: bool """ self.__lock.acquire() try: # Order is important: __find_first_pending_queue modifies __pending (_, prioqueue) = self.__find_first_pending_queue() return not (prioqueue or self.__pending or self.__pending_by_prio or self.__pending_shared) finally: self.__lock.release() def __do_acquire(self, shared): """Actually acquire the lock. """ if shared: self.__shr.add(threading.currentThread()) else: self.__exc = threading.currentThread() def __can_acquire(self, shared): """Determine whether lock can be acquired. """ if shared: return self.__exc is None else: return len(self.__shr) == 0 and self.__exc is None def __find_first_pending_queue(self): """Tries to find the topmost queued entry with pending acquires. Removes empty entries while going through the list. """ while self.__pending: (priority, prioqueue) = self.__pending[0] if prioqueue: return (priority, prioqueue) # Remove empty queue heapq.heappop(self.__pending) del self.__pending_by_prio[priority] assert priority not in self.__pending_shared return (None, None) def __is_on_top(self, cond): """Checks whether the passed condition is on top of the queue. The caller must make sure the queue isn't empty. """ (_, prioqueue) = self.__find_first_pending_queue() return cond == prioqueue[0] def __acquire_unlocked(self, shared, timeout, priority): """Acquire a shared lock. @param shared: whether to acquire in shared mode; by default an exclusive lock will be acquired @param timeout: maximum waiting time before giving up @type priority: integer @param priority: Priority for acquiring lock """ self.__check_deleted() # We cannot acquire the lock if we already have it assert not self.__is_owned(), ("double acquire() on a non-recursive lock" " %s" % self.name) # Remove empty entries from queue self.__find_first_pending_queue() # Check whether someone else holds the lock or there are pending acquires. if not self.__pending and self.__can_acquire(shared): # Apparently not, can acquire lock directly. self.__do_acquire(shared) return True # The lock couldn't be acquired right away, so if a timeout is given and is # considered too short, return right away as scheduling a pending # acquisition is quite expensive if timeout is not None and timeout < _LOCK_ACQUIRE_MIN_TIMEOUT: return False prioqueue = self.__pending_by_prio.get(priority, None) if shared: # Try to re-use condition for shared acquire wait_condition = self.__pending_shared.get(priority, None) assert (wait_condition is None or (wait_condition.shared and wait_condition in prioqueue)) else: wait_condition = None if wait_condition is None: if prioqueue is None: assert priority not in self.__pending_by_prio prioqueue = [] heapq.heappush(self.__pending, (priority, prioqueue)) self.__pending_by_prio[priority] = prioqueue wait_condition = self.__condition_class(self.__lock, shared) prioqueue.append(wait_condition) if shared: # Keep reference for further shared acquires on same priority. This is # better than trying to find it in the list of pending acquires. assert priority not in self.__pending_shared self.__pending_shared[priority] = wait_condition wait_start = self.__time_fn() acquired = False try: # Wait until we become the topmost acquire in the queue or the timeout # expires. while True: if self.__is_on_top(wait_condition) and self.__can_acquire(shared): self.__do_acquire(shared) acquired = True break # A lot of code assumes blocking acquires always succeed, therefore we # can never return False for a blocking acquire if (timeout is not None and utils.TimeoutExpired(wait_start, timeout, _time_fn=self.__time_fn)): break # Wait for notification wait_condition.wait(timeout) self.__check_deleted() finally: # Remove condition from queue if there are no more waiters if not wait_condition.has_waiting(): prioqueue.remove(wait_condition) if wait_condition.shared: # Remove from list of shared acquires if it wasn't while releasing # (e.g. on lock deletion) self.__pending_shared.pop(priority, None) return acquired def acquire(self, shared=0, timeout=None, priority=None, test_notify=None): """Acquire a shared lock. @type shared: integer (0/1) used as a boolean @param shared: whether to acquire in shared mode; by default an exclusive lock will be acquired @type timeout: float @param timeout: maximum waiting time before giving up @type priority: integer @param priority: Priority for acquiring lock @type test_notify: callable or None @param test_notify: Special callback function for unittesting """ if priority is None: priority = _DEFAULT_PRIORITY self.__lock.acquire() try: # We already got the lock, notify now if __debug__ and callable(test_notify): test_notify() return self.__acquire_unlocked(shared, timeout, priority) finally: self.__lock.release() def downgrade(self): """Changes the lock mode from exclusive to shared. Pending acquires in shared mode on the same priority will go ahead. """ self.__lock.acquire() try: assert self.__is_owned(), "Lock must be owned" if self.__is_exclusive(): # Do nothing if the lock is already acquired in shared mode self.__exc = None self.__do_acquire(1) # Important: pending shared acquires should only jump ahead if there # was a transition from exclusive to shared, otherwise an owner of a # shared lock can keep calling this function to push incoming shared # acquires (priority, prioqueue) = self.__find_first_pending_queue() if prioqueue: # Is there a pending shared acquire on this priority? cond = self.__pending_shared.pop(priority, None) if cond: assert cond.shared assert cond in prioqueue # Ensure shared acquire is on top of queue if len(prioqueue) > 1: prioqueue.remove(cond) prioqueue.insert(0, cond) # Notify cond.notifyAll() assert not self.__is_exclusive() assert self.__is_sharer() return True finally: self.__lock.release() def release(self): """Release a Shared Lock. You must have acquired the lock, either in shared or in exclusive mode, before calling this function. """ self.__lock.acquire() try: assert self.__is_exclusive() or self.__is_sharer(), \ "Cannot release non-owned lock" # Autodetect release type if self.__is_exclusive(): self.__exc = None notify = True else: self.__shr.remove(threading.currentThread()) notify = not self.__shr # Notify topmost condition in queue if there are no owners left (for # shared locks) if notify: self.__notify_topmost() finally: self.__lock.release() def __notify_topmost(self): """Notifies topmost condition in queue of pending acquires. """ (priority, prioqueue) = self.__find_first_pending_queue() if prioqueue: cond = prioqueue[0] cond.notifyAll() if cond.shared: # Prevent further shared acquires from sneaking in while waiters are # notified self.__pending_shared.pop(priority, None) def _notify_topmost(self): """Exported version of L{__notify_topmost}. """ self.__lock.acquire() try: return self.__notify_topmost() finally: self.__lock.release() def delete(self, timeout=None, priority=None): """Delete a Shared Lock. This operation will declare the lock for removal. First the lock will be acquired in exclusive mode if you don't already own it, then the lock will be put in a state where any future and pending acquire() fail. @type timeout: float @param timeout: maximum waiting time before giving up @type priority: integer @param priority: Priority for acquiring lock """ if priority is None: priority = _DEFAULT_PRIORITY self.__lock.acquire() try: assert not self.__is_sharer(), "Cannot delete() a lock while sharing it" self.__check_deleted() # The caller is allowed to hold the lock exclusively already. acquired = self.__is_exclusive() if not acquired: acquired = self.__acquire_unlocked(0, timeout, priority) if acquired: assert self.__is_exclusive() and not self.__is_sharer(), \ "Lock wasn't acquired in exclusive mode" self.__deleted = True self.__exc = None assert not (self.__exc or self.__shr), "Found owner during deletion" # Notify all acquires. They'll throw an error. for (_, prioqueue) in self.__pending: for cond in prioqueue: cond.notifyAll() assert self.__deleted return acquired finally: self.__lock.release() def _release_save(self): shared = self.__is_sharer() self.release() return shared def _acquire_restore(self, shared): self.acquire(shared=shared) # Whenever we want to acquire a full LockSet we pass None as the value # to acquire. Hide this behind this nicely named constant. ALL_SET = None def _TimeoutZero(): """Returns the number zero. """ return 0 def _GetLsAcquireModeAndTimeouts(want_all, timeout, opportunistic): """Determines modes and timeouts for L{LockSet.acquire}. @type want_all: boolean @param want_all: Whether all locks in set should be acquired @param timeout: Timeout in seconds or C{None} @param opportunistic: Whther locks should be acquired opportunistically @rtype: tuple @return: Tuple containing mode to be passed to L{LockSet.__acquire_inner} (one of L{_LS_ACQUIRE_MODES}), a function to calculate timeout for acquiring the lockset-internal lock (might be C{None}) and a function to calculate the timeout for acquiring individual locks """ # Short circuit when no running timeout is needed if opportunistic and not want_all: assert timeout is None, "Got timeout for an opportunistic acquisition" return (_LS_ACQUIRE_OPPORTUNISTIC, None, _TimeoutZero) # We need to keep track of how long we spent waiting for a lock. The # timeout passed to this function is over all lock acquisitions. running_timeout = utils.RunningTimeout(timeout, False) if want_all: mode = _LS_ACQUIRE_ALL ls_timeout_fn = running_timeout.Remaining else: mode = _LS_ACQUIRE_EXACT ls_timeout_fn = None if opportunistic: mode = _LS_ACQUIRE_OPPORTUNISTIC timeout_fn = _TimeoutZero else: timeout_fn = running_timeout.Remaining return (mode, ls_timeout_fn, timeout_fn) class _AcquireTimeout(Exception): """Internal exception to abort an acquire on a timeout. """ class LockSet: """Implements a set of locks. This abstraction implements a set of shared locks for the same resource type, distinguished by name. The user can lock a subset of the resources and the LockSet will take care of acquiring the locks always in the same order, thus preventing deadlock. All the locks needed in the same set must be acquired together, though. @type name: string @ivar name: the name of the lockset """ def __init__(self, members, name, monitor=None): """Constructs a new LockSet. @type members: list of strings @param members: initial members of the set @type monitor: L{LockMonitor} @param monitor: Lock monitor with which to register member locks """ assert members is not None, "members parameter is not a list" self.name = name # Lock monitor self.__monitor = monitor # Used internally to guarantee coherency self.__lock = SharedLock(self._GetLockName("[lockset]"), monitor=monitor) # The lockdict indexes the relationship name -> lock # The order-of-locking is implied by the alphabetical order of names self.__lockdict = {} for mname in members: self.__lockdict[mname] = SharedLock(self._GetLockName(mname), monitor=monitor) # The owner dict contains the set of locks each thread owns. For # performance each thread can access its own key without a global lock on # this structure. It is paramount though that *no* other type of access is # done to this structure (eg. no looping over its keys). *_owner helper # function are defined to guarantee access is correct, but in general never # do anything different than __owners[threading.currentThread()], or there # will be trouble. self.__owners = {} def _GetLockName(self, mname): """Returns the name for a member lock. """ return "%s/%s" % (self.name, mname) def _get_lock(self): """Returns the lockset-internal lock. """ return self.__lock def _get_lockdict(self): """Returns the lockset-internal lock dictionary. Accessing this structure is only safe in single-thread usage or when the lockset-internal lock is held. """ return self.__lockdict def is_owned(self): """Is the current thread a current level owner? @note: Use L{check_owned} to check if a specific lock is held """ return threading.currentThread() in self.__owners def check_owned(self, names, shared=-1): """Check if locks are owned in a specific mode. @type names: sequence or string @param names: Lock names (or a single lock name) @param shared: See L{SharedLock.is_owned} @rtype: bool @note: Use L{is_owned} to check if the current thread holds I{any} lock and L{list_owned} to get the names of all owned locks """ if isinstance(names, basestring): names = [names] # Avoid check if no locks are owned anyway if names and self.is_owned(): candidates = [] # Gather references to all locks (in case they're deleted in the meantime) for lname in names: try: lock = self.__lockdict[lname] except KeyError: raise errors.LockError("Non-existing lock '%s' in set '%s' (it may" " have been removed)" % (lname, self.name)) else: candidates.append(lock) return compat.all(lock.is_owned(shared=shared) for lock in candidates) else: return False def owning_all(self): """Checks whether current thread owns internal lock. Holding the internal lock is equivalent with holding all locks in the set (the opposite does not necessarily hold as it can not be easily determined). L{add} and L{remove} require the internal lock. @rtype: boolean """ return self.__lock.is_owned() def _add_owned(self, name=None): """Note the current thread owns the given lock""" if name is None: if not self.is_owned(): self.__owners[threading.currentThread()] = set() else: if self.is_owned(): self.__owners[threading.currentThread()].add(name) else: self.__owners[threading.currentThread()] = set([name]) def _del_owned(self, name=None): """Note the current thread owns the given lock""" assert not (name is None and self.__lock.is_owned()), \ "Cannot hold internal lock when deleting owner status" if name is not None: self.__owners[threading.currentThread()].remove(name) # Only remove the key if we don't hold the set-lock as well if not (self.__lock.is_owned() or self.__owners[threading.currentThread()]): del self.__owners[threading.currentThread()] def list_owned(self): """Get the set of resource names owned by the current thread""" if self.is_owned(): return self.__owners[threading.currentThread()].copy() else: return set() def _release_and_delete_owned(self): """Release and delete all resources owned by the current thread""" for lname in self.list_owned(): lock = self.__lockdict[lname] if lock.is_owned(): lock.release() self._del_owned(name=lname) def __names(self): """Return the current set of names. Only call this function while holding __lock and don't iterate on the result after releasing the lock. """ return self.__lockdict.keys() def _names(self): """Return a copy of the current set of elements. Used only for debugging purposes. """ # If we don't already own the set-level lock acquired # we'll get it and note we need to release it later. release_lock = False if not self.__lock.is_owned(): release_lock = True self.__lock.acquire(shared=1) try: result = self.__names() finally: if release_lock: self.__lock.release() return set(result) def acquire(self, names, timeout=None, shared=0, priority=None, opportunistic=False, test_notify=None): """Acquire a set of resource locks. @note: When acquiring locks opportunistically, any number of locks might actually be acquired, even zero. @type names: list of strings (or string) @param names: the names of the locks which shall be acquired (special lock names, or instance/node names) @type shared: integer (0/1) used as a boolean @param shared: whether to acquire in shared mode; by default an exclusive lock will be acquired @type timeout: float or None @param timeout: Maximum time to acquire all locks; for opportunistic acquisitions, a timeout can only be given when C{names} is C{None}, in which case it is exclusively used for acquiring the L{LockSet}-internal lock; opportunistic acquisitions don't use a timeout for acquiring individual locks @type priority: integer @param priority: Priority for acquiring locks @type opportunistic: boolean @param opportunistic: Acquire locks opportunistically; use the return value to determine which locks were actually acquired @type test_notify: callable or None @param test_notify: Special callback function for unittesting @return: Set of all locks successfully acquired or None in case of timeout @raise errors.LockError: when any lock we try to acquire has been deleted before we succeed. In this case none of the locks requested will be acquired. """ assert timeout is None or timeout >= 0.0 # Check we don't already own locks at this level assert not self.is_owned(), ("Cannot acquire locks in the same set twice" " (lockset %s)" % self.name) if priority is None: priority = _DEFAULT_PRIORITY try: if names is not None: assert timeout is None or not opportunistic, \ ("Opportunistic acquisitions can only use a timeout if no" " names are given; see docstring for details") # Support passing in a single resource to acquire rather than many if isinstance(names, basestring): names = [names] (mode, _, timeout_fn) = \ _GetLsAcquireModeAndTimeouts(False, timeout, opportunistic) return self.__acquire_inner(names, mode, shared, priority, timeout_fn, test_notify) else: (mode, ls_timeout_fn, timeout_fn) = \ _GetLsAcquireModeAndTimeouts(True, timeout, opportunistic) # If no names are given acquire the whole set by not letting new names # being added before we release, and getting the current list of names. # Some of them may then be deleted later, but we'll cope with this. # # We'd like to acquire this lock in a shared way, as it's nice if # everybody else can use the instances at the same time. If we are # acquiring them exclusively though they won't be able to do this # anyway, though, so we'll get the list lock exclusively as well in # order to be able to do add() on the set while owning it. if not self.__lock.acquire(shared=shared, priority=priority, timeout=ls_timeout_fn()): raise _AcquireTimeout() try: # note we own the set-lock self._add_owned() return self.__acquire_inner(self.__names(), mode, shared, priority, timeout_fn, test_notify) except: # We shouldn't have problems adding the lock to the owners list, but # if we did we'll try to release this lock and re-raise exception. # Of course something is going to be really wrong, after this. self.__lock.release() self._del_owned() raise except _AcquireTimeout: return None def __acquire_inner(self, names, mode, shared, priority, timeout_fn, test_notify): """Inner logic for acquiring a number of locks. Acquisition modes: - C{_LS_ACQUIRE_ALL}: C{names} contains names of all locks in set, but deleted locks can be ignored as the whole set is being acquired with its internal lock held - C{_LS_ACQUIRE_EXACT}: The names listed in C{names} must be acquired; timeouts and deleted locks are fatal - C{_LS_ACQUIRE_OPPORTUNISTIC}: C{names} lists names of locks (potentially all within the set) which should be acquired opportunistically, that is failures are ignored @param names: Names of the locks to be acquired @param mode: Lock acquisition mode (one of L{_LS_ACQUIRE_MODES}) @param shared: Whether to acquire in shared mode @param timeout_fn: Function returning remaining timeout (C{None} for opportunistic acquisitions) @param priority: Priority for acquiring locks @param test_notify: Special callback function for unittesting """ assert mode in _LS_ACQUIRE_MODES acquire_list = [] # First we look the locks up on __lockdict. We have no way of being sure # they will still be there after, but this makes it a lot faster should # just one of them be the already wrong. Using a sorted sequence to prevent # deadlocks. for lname in sorted(frozenset(names)): try: lock = self.__lockdict[lname] # raises KeyError if lock is not there except KeyError: # We are acquiring the whole set, it doesn't matter if this particular # element is not there anymore. If, however, only certain names should # be acquired, not finding a lock is an error. if mode == _LS_ACQUIRE_EXACT: raise errors.LockError("Lock '%s' not found in set '%s' (it may have" " been removed)" % (lname, self.name)) else: acquire_list.append((lname, lock)) # This will hold the locknames we effectively acquired. acquired = set() try: # Now acquire_list contains a sorted list of resources and locks we # want. In order to get them we loop on this (private) list and # acquire() them. We gave no real guarantee they will still exist till # this is done but .acquire() itself is safe and will alert us if the # lock gets deleted. for (lname, lock) in acquire_list: if __debug__ and callable(test_notify): test_notify_fn = lambda: test_notify(lname) else: test_notify_fn = None timeout = timeout_fn() try: # raises LockError if the lock was deleted acq_success = lock.acquire(shared=shared, timeout=timeout, priority=priority, test_notify=test_notify_fn) except errors.LockError: if mode in (_LS_ACQUIRE_ALL, _LS_ACQUIRE_OPPORTUNISTIC): # We are acquiring the whole set, it doesn't matter if this # particular element is not there anymore. continue raise errors.LockError("Lock '%s' not found in set '%s' (it may have" " been removed)" % (lname, self.name)) if not acq_success: # Couldn't get lock or timeout occurred if mode == _LS_ACQUIRE_OPPORTUNISTIC: # Ignore timeouts on opportunistic acquisitions continue if timeout is None: # This shouldn't happen as SharedLock.acquire(timeout=None) is # blocking. raise errors.LockError("Failed to get lock %s (set %s)" % (lname, self.name)) raise _AcquireTimeout() try: # now the lock cannot be deleted, we have it! self._add_owned(name=lname) acquired.add(lname) except: # We shouldn't have problems adding the lock to the owners list, but # if we did we'll try to release this lock and re-raise exception. # Of course something is going to be really wrong after this. if lock.is_owned(): lock.release() raise except: # Release all owned locks self._release_and_delete_owned() raise return acquired def downgrade(self, names=None): """Downgrade a set of resource locks from exclusive to shared mode. The locks must have been acquired in exclusive mode. """ assert self.is_owned(), ("downgrade on lockset %s while not owning any" " lock" % self.name) # Support passing in a single resource to downgrade rather than many if isinstance(names, basestring): names = [names] owned = self.list_owned() if names is None: names = owned else: names = set(names) assert owned.issuperset(names), \ ("downgrade() on unheld resources %s (set %s)" % (names.difference(owned), self.name)) for lockname in names: self.__lockdict[lockname].downgrade() # Do we own the lockset in exclusive mode? if self.__lock.is_owned(shared=0): # Have all locks been downgraded? if not compat.any(lock.is_owned(shared=0) for lock in self.__lockdict.values()): self.__lock.downgrade() assert self.__lock.is_owned(shared=1) return True def release(self, names=None): """Release a set of resource locks, at the same level. You must have acquired the locks, either in shared or in exclusive mode, before releasing them. @type names: list of strings, or None @param names: the names of the locks which shall be released (defaults to all the locks acquired at that level). """ assert self.is_owned(), ("release() on lock set %s while not owner" % self.name) # Support passing in a single resource to release rather than many if isinstance(names, basestring): names = [names] if names is None: names = self.list_owned() else: names = set(names) assert self.list_owned().issuperset(names), ( "release() on unheld resources %s (set %s)" % (names.difference(self.list_owned()), self.name)) # First of all let's release the "all elements" lock, if set. # After this 'add' can work again if self.__lock.is_owned(): self.__lock.release() self._del_owned() for lockname in names: # If we are sure the lock doesn't leave __lockdict without being # exclusively held we can do this... self.__lockdict[lockname].release() self._del_owned(name=lockname) def add(self, names, acquired=0, shared=0): """Add a new set of elements to the set @type names: list of strings @param names: names of the new elements to add @type acquired: integer (0/1) used as a boolean @param acquired: pre-acquire the new resource? @type shared: integer (0/1) used as a boolean @param shared: is the pre-acquisition shared? """ # Check we don't already own locks at this level assert not self.is_owned() or self.__lock.is_owned(shared=0), \ ("Cannot add locks if the set %s is only partially owned, or shared" % self.name) # Support passing in a single resource to add rather than many if isinstance(names, basestring): names = [names] # If we don't already own the set-level lock acquired in an exclusive way # we'll get it and note we need to release it later. release_lock = False if not self.__lock.is_owned(): release_lock = True self.__lock.acquire() try: invalid_names = set(self.__names()).intersection(names) if invalid_names: # This must be an explicit raise, not an assert, because assert is # turned off when using optimization, and this can happen because of # concurrency even if the user doesn't want it. raise errors.LockError("duplicate add(%s) on lockset %s" % (invalid_names, self.name)) for lockname in names: lock = SharedLock(self._GetLockName(lockname), monitor=self.__monitor) if acquired: # No need for priority or timeout here as this lock has just been # created lock.acquire(shared=shared) # now the lock cannot be deleted, we have it! try: self._add_owned(name=lockname) except: # We shouldn't have problems adding the lock to the owners list, # but if we did we'll try to release this lock and re-raise # exception. Of course something is going to be really wrong, # after this. On the other hand the lock hasn't been added to the # __lockdict yet so no other threads should be pending on it. This # release is just a safety measure. lock.release() raise self.__lockdict[lockname] = lock finally: # Only release __lock if we were not holding it previously. if release_lock: self.__lock.release() return True def remove(self, names): """Remove elements from the lock set. You can either not hold anything in the lockset or already hold a superset of the elements you want to delete, exclusively. @type names: list of strings @param names: names of the resource to remove. @return: a list of locks which we removed; the list is always equal to the names list if we were holding all the locks exclusively """ # Support passing in a single resource to remove rather than many if isinstance(names, basestring): names = [names] # If we own any subset of this lock it must be a superset of what we want # to delete. The ownership must also be exclusive, but that will be checked # by the lock itself. assert not self.is_owned() or self.list_owned().issuperset(names), ( "remove() on acquired lockset %s while not owning all elements" % self.name) removed = [] for lname in names: # Calling delete() acquires the lock exclusively if we don't already own # it, and causes all pending and subsequent lock acquires to fail. It's # fine to call it out of order because delete() also implies release(), # and the assertion above guarantees that if we either already hold # everything we want to delete, or we hold none. try: self.__lockdict[lname].delete() removed.append(lname) except (KeyError, errors.LockError): # This cannot happen if we were already holding it, verify: assert not self.is_owned(), ("remove failed while holding lockset %s" % self.name) else: # If no LockError was raised we are the ones who deleted the lock. # This means we can safely remove it from lockdict, as any further or # pending delete() or acquire() will fail (and nobody can have the lock # since before our call to delete()). # # This is done in an else clause because if the exception was thrown # it's the job of the one who actually deleted it. del self.__lockdict[lname] # And let's remove it from our private list if we owned it. if self.is_owned(): self._del_owned(name=lname) return removed # Locking levels, must be acquired in increasing order. Current rules are: # - At level LEVEL_CLUSTER resides the Big Ganeti Lock (BGL) which must be # acquired before performing any operation, either in shared or exclusive # mode. Acquiring the BGL in exclusive mode is discouraged and should be # avoided.. # - At levels LEVEL_NODE and LEVEL_INSTANCE reside node and instance locks. If # you need more than one node, or more than one instance, acquire them at the # same time. # - LEVEL_NODE_RES is for node resources and should be used by operations with # possibly high impact on the node's disks. # - LEVEL_NODE_ALLOC blocks instance allocations for the whole cluster # ("NAL" is the only lock at this level). It should be acquired in shared # mode when an opcode blocks all or a significant amount of a cluster's # locks. Opcodes doing instance allocations should acquire in exclusive mode. # Once the set of acquired locks for an opcode has been reduced to the working # set, the NAL should be released as well to allow allocations to proceed. (LEVEL_CLUSTER, LEVEL_INSTANCE, LEVEL_NODE_ALLOC, LEVEL_NODEGROUP, LEVEL_NODE, LEVEL_NODE_RES, LEVEL_NETWORK) = range(0, 7) LEVELS = [ LEVEL_CLUSTER, LEVEL_INSTANCE, LEVEL_NODE_ALLOC, LEVEL_NODEGROUP, LEVEL_NODE, LEVEL_NODE_RES, LEVEL_NETWORK, ] # Lock levels which are modifiable LEVELS_MOD = compat.UniqueFrozenset([ LEVEL_NODE_RES, LEVEL_NODE, LEVEL_NODEGROUP, LEVEL_INSTANCE, LEVEL_NETWORK, ]) #: Lock level names (make sure to use singular form) LEVEL_NAMES = { LEVEL_CLUSTER: "cluster", LEVEL_INSTANCE: "instance", LEVEL_NODE_ALLOC: "node-alloc", LEVEL_NODEGROUP: "nodegroup", LEVEL_NODE: "node", LEVEL_NODE_RES: "node-res", LEVEL_NETWORK: "network", } # Constant for the big ganeti lock BGL = "BGL" #: Node allocation lock NAL = "NAL" class GanetiLockManager: """The Ganeti Locking Library The purpose of this small library is to manage locking for ganeti clusters in a central place, while at the same time doing dynamic checks against possible deadlocks. It will also make it easier to transition to a different lock type should we migrate away from python threads. """ _instance = None def __init__(self, node_uuids, nodegroups, instance_names, networks): """Constructs a new GanetiLockManager object. There should be only a GanetiLockManager object at any time, so this function raises an error if this is not the case. @param node_uuids: list of node UUIDs @param nodegroups: list of nodegroup uuids @param instance_names: list of instance names """ assert self.__class__._instance is None, \ "double GanetiLockManager instance" self.__class__._instance = self self._monitor = LockMonitor() # The keyring contains all the locks, at their level and in the correct # locking order. self.__keyring = { LEVEL_CLUSTER: LockSet([BGL], "cluster", monitor=self._monitor), LEVEL_NODE: LockSet(node_uuids, "node", monitor=self._monitor), LEVEL_NODE_RES: LockSet(node_uuids, "node-res", monitor=self._monitor), LEVEL_NODEGROUP: LockSet(nodegroups, "nodegroup", monitor=self._monitor), LEVEL_INSTANCE: LockSet(instance_names, "instance", monitor=self._monitor), LEVEL_NETWORK: LockSet(networks, "network", monitor=self._monitor), LEVEL_NODE_ALLOC: LockSet([NAL], "node-alloc", monitor=self._monitor), } assert compat.all(ls.name == LEVEL_NAMES[level] for (level, ls) in self.__keyring.items()), \ "Keyring name mismatch" def AddToLockMonitor(self, provider): """Registers a new lock with the monitor. See L{LockMonitor.RegisterLock}. """ return self._monitor.RegisterLock(provider) def QueryLocks(self, fields): """Queries information from all locks. See L{LockMonitor.QueryLocks}. """ return self._monitor.QueryLocks(fields) def _names(self, level): """List the lock names at the given level. This can be used for debugging/testing purposes. @param level: the level whose list of locks to get """ assert level in LEVELS, "Invalid locking level %s" % level return self.__keyring[level]._names() def is_owned(self, level): """Check whether we are owning locks at the given level """ return self.__keyring[level].is_owned() def list_owned(self, level): """Get the set of owned locks at the given level """ return self.__keyring[level].list_owned() def check_owned(self, level, names, shared=-1): """Check if locks at a certain level are owned in a specific mode. @see: L{LockSet.check_owned} """ return self.__keyring[level].check_owned(names, shared=shared) def owning_all(self, level): """Checks whether current thread owns all locks at a certain level. @see: L{LockSet.owning_all} """ return self.__keyring[level].owning_all() def _upper_owned(self, level): """Check that we don't own any lock at a level greater than the given one. """ # This way of checking only works if LEVELS[i] = i, which we check for in # the test cases. return compat.any((self.is_owned(l) for l in LEVELS[level + 1:])) def _BGL_owned(self): # pylint: disable=C0103 """Check if the current thread owns the BGL. Both an exclusive or a shared acquisition work. """ return BGL in self.__keyring[LEVEL_CLUSTER].list_owned() @staticmethod def _contains_BGL(level, names): # pylint: disable=C0103 """Check if the level contains the BGL. Check if acting on the given level and set of names will change the status of the Big Ganeti Lock. """ return level == LEVEL_CLUSTER and (names is None or BGL in names) def acquire(self, level, names, timeout=None, shared=0, priority=None, opportunistic=False): """Acquire a set of resource locks, at the same level. @type level: member of locking.LEVELS @param level: the level at which the locks shall be acquired @type names: list of strings (or string) @param names: the names of the locks which shall be acquired (special lock names, or instance/node names) @type shared: integer (0/1) used as a boolean @param shared: whether to acquire in shared mode; by default an exclusive lock will be acquired @type timeout: float @param timeout: Maximum time to acquire all locks @type priority: integer @param priority: Priority for acquiring lock @type opportunistic: boolean @param opportunistic: Acquire locks opportunistically; use the return value to determine which locks were actually acquired """ assert level in LEVELS, "Invalid locking level %s" % level # Check that we are either acquiring the Big Ganeti Lock or we already own # it. Some "legacy" opcodes need to be sure they are run non-concurrently # so even if we've migrated we need to at least share the BGL to be # compatible with them. Of course if we own the BGL exclusively there's no # point in acquiring any other lock, unless perhaps we are half way through # the migration of the current opcode. assert (self._contains_BGL(level, names) or self._BGL_owned()), ( "You must own the Big Ganeti Lock before acquiring any other") # Check we don't own locks at the same or upper levels. assert not self._upper_owned(level), ("Cannot acquire locks at a level" " while owning some at a greater one") # Acquire the locks in the set. return self.__keyring[level].acquire(names, shared=shared, timeout=timeout, priority=priority, opportunistic=opportunistic) def downgrade(self, level, names=None): """Downgrade a set of resource locks from exclusive to shared mode. You must have acquired the locks in exclusive mode. @type level: member of locking.LEVELS @param level: the level at which the locks shall be downgraded @type names: list of strings, or None @param names: the names of the locks which shall be downgraded (defaults to all the locks acquired at the level) """ assert level in LEVELS, "Invalid locking level %s" % level return self.__keyring[level].downgrade(names=names) def release(self, level, names=None): """Release a set of resource locks, at the same level. You must have acquired the locks, either in shared or in exclusive mode, before releasing them. @type level: member of locking.LEVELS @param level: the level at which the locks shall be released @type names: list of strings, or None @param names: the names of the locks which shall be released (defaults to all the locks acquired at that level) """ assert level in LEVELS, "Invalid locking level %s" % level assert (not self._contains_BGL(level, names) or not self._upper_owned(LEVEL_CLUSTER)), ( "Cannot release the Big Ganeti Lock while holding something" " at upper levels (%r)" % (utils.CommaJoin(["%s=%r" % (LEVEL_NAMES[i], self.list_owned(i)) for i in self.__keyring.keys()]), )) # Release will complain if we don't own the locks already return self.__keyring[level].release(names) def add(self, level, names, acquired=0, shared=0): """Add locks at the specified level. @type level: member of locking.LEVELS_MOD @param level: the level at which the locks shall be added @type names: list of strings @param names: names of the locks to acquire @type acquired: integer (0/1) used as a boolean @param acquired: whether to acquire the newly added locks @type shared: integer (0/1) used as a boolean @param shared: whether the acquisition will be shared """ assert level in LEVELS_MOD, "Invalid or immutable level %s" % level assert self._BGL_owned(), ("You must own the BGL before performing other" " operations") assert not self._upper_owned(level), ("Cannot add locks at a level" " while owning some at a greater one") return self.__keyring[level].add(names, acquired=acquired, shared=shared) def remove(self, level, names): """Remove locks from the specified level. You must either already own the locks you are trying to remove exclusively or not own any lock at an upper level. @type level: member of locking.LEVELS_MOD @param level: the level at which the locks shall be removed @type names: list of strings @param names: the names of the locks which shall be removed (special lock names, or instance/node names) """ assert level in LEVELS_MOD, "Invalid or immutable level %s" % level assert self._BGL_owned(), ("You must own the BGL before performing other" " operations") # Check we either own the level or don't own anything from here # up. LockSet.remove() will check the case in which we don't own # all the needed resources, or we have a shared ownership. assert self.is_owned(level) or not self._upper_owned(level), ( "Cannot remove locks at a level while not owning it or" " owning some at a greater one") return self.__keyring[level].remove(names) def _MonitorSortKey((item, idx, num)): """Sorting key function. Sort by name, registration order and then order of information. This provides a stable sort order over different providers, even if they return the same name. """ (name, _, _, _) = item return (utils.NiceSortKey(name), num, idx) class LockMonitor(object): _LOCK_ATTR = "_lock" def __init__(self): """Initializes this class. """ self._lock = SharedLock("LockMonitor") # Counter for stable sorting self._counter = itertools.count(0) # Tracked locks. Weak references are used to avoid issues with circular # references and deletion. self._locks = weakref.WeakKeyDictionary() @ssynchronized(_LOCK_ATTR) def RegisterLock(self, provider): """Registers a new lock. @param provider: Object with a callable method named C{GetLockInfo}, taking a single C{set} containing the requested information items @note: It would be nicer to only receive the function generating the requested information but, as it turns out, weak references to bound methods (e.g. C{self.GetLockInfo}) are tricky; there are several workarounds, but none of the ones I found works properly in combination with a standard C{WeakKeyDictionary} """ assert provider not in self._locks, "Duplicate registration" # There used to be a check for duplicate names here. As it turned out, when # a lock is re-created with the same name in a very short timeframe, the # previous instance might not yet be removed from the weakref dictionary. # By keeping track of the order of incoming registrations, a stable sort # ordering can still be guaranteed. self._locks[provider] = self._counter.next() def _GetLockInfo(self, requested): """Get information from all locks. """ # Must hold lock while getting consistent list of tracked items self._lock.acquire(shared=1) try: items = self._locks.items() finally: self._lock.release() return [(info, idx, num) for (provider, num) in items for (idx, info) in enumerate(provider.GetLockInfo(requested))] def _Query(self, fields): """Queries information from all locks. @type fields: list of strings @param fields: List of fields to return """ qobj = query.Query(query.LOCK_FIELDS, fields) # Get all data with internal lock held and then sort by name and incoming # order lockinfo = sorted(self._GetLockInfo(qobj.RequestedData()), key=_MonitorSortKey) # Extract lock information and build query data return (qobj, query.LockQueryData(map(compat.fst, lockinfo))) def QueryLocks(self, fields): """Queries information from all locks. @type fields: list of strings @param fields: List of fields to return """ (qobj, ctx) = self._Query(fields) # Prepare query response return query.GetQueryResponse(qobj, ctx) ganeti-2.9.3/lib/rpc_defs.py0000644000000000000000000006217012271422343015705 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """RPC definitions for communication between master and node daemons. RPC definition fields: - Name as string - L{SINGLE} for single-node calls, L{MULTI} for multi-node - Name resolver option(s), can be callable receiving all arguments in a tuple - Timeout (e.g. L{constants.RPC_TMO_NORMAL}), or callback receiving all arguments in a tuple to calculate timeout - List of arguments as tuples - Name as string - Argument kind used for encoding/decoding - Description for docstring (can be C{None}) - Custom body encoder (e.g. for preparing per-node bodies) - Return value wrapper (e.g. for deserializing into L{objects}-based objects) - Short call description for docstring """ from ganeti import constants from ganeti import utils from ganeti import objects # Guidelines for choosing timeouts: # - call used during watcher: timeout of 1min, constants.RPC_TMO_URGENT # - trivial (but be sure it is trivial) # (e.g. reading a file): 5min, constants.RPC_TMO_FAST # - other calls: 15 min, constants.RPC_TMO_NORMAL # - special calls (instance add, etc.): # either constants.RPC_TMO_SLOW (1h) or huge timeouts SINGLE = "single-node" MULTI = "multi-node" ACCEPT_OFFLINE_NODE = object() # Constants for encoding/decoding (ED_OBJECT_DICT, ED_OBJECT_DICT_LIST, ED_INST_DICT, ED_INST_DICT_HVP_BEP_DP, ED_NODE_TO_DISK_DICT, ED_INST_DICT_OSP_DP, ED_IMPEXP_IO, ED_FILE_DETAILS, ED_FINALIZE_EXPORT_DISKS, ED_COMPRESS, ED_BLOCKDEV_RENAME, ED_DISKS_DICT_DP, ED_MULTI_DISKS_DICT_DP, ED_SINGLE_DISK_DICT_DP, ED_NIC_DICT) = range(1, 16) def _Prepare(calls): """Converts list of calls to dictionary. """ return utils.SequenceToDict(calls) def _MigrationStatusPostProc(result): """Post-processor for L{rpc.RpcRunner.call_instance_get_migration_status}. """ if not result.fail_msg and result.payload is not None: result.payload = objects.MigrationStatus.FromDict(result.payload) return result def _BlockdevFindPostProc(result): """Post-processor for L{rpc.RpcRunner.call_blockdev_find}. """ if not result.fail_msg and result.payload is not None: result.payload = objects.BlockDevStatus.FromDict(result.payload) return result def _BlockdevGetMirrorStatusPostProc(result): """Post-processor for L{rpc.RpcRunner.call_blockdev_getmirrorstatus}. """ if not result.fail_msg: result.payload = map(objects.BlockDevStatus.FromDict, result.payload) return result def _BlockdevGetMirrorStatusMultiPreProc(node, args): """Prepares the appropriate node values for blockdev_getmirrorstatus_multi. """ # there should be only one argument to this RPC, already holding a # node->disks dictionary, we just need to extract the value for the # current node assert len(args) == 1 return [args[0][node]] def _BlockdevGetMirrorStatusMultiPostProc(result): """Post-processor for L{rpc.RpcRunner.call_blockdev_getmirrorstatus_multi}. """ if not result.fail_msg: for idx, (success, status) in enumerate(result.payload): if success: result.payload[idx] = (success, objects.BlockDevStatus.FromDict(status)) return result def _NodeInfoPreProc(node, args): """Prepare the storage_units argument for node_info calls.""" assert len(args) == 2 # The storage_units argument is either a dictionary with one value for each # node, or a fixed value to be used for all the nodes if type(args[0]) is dict: return [args[0][node], args[1]] else: return args def _DrbdCallsPreProc(node, args): """Add the target node UUID as additional field for DRBD related calls.""" return args + [node] def _OsGetPostProc(result): """Post-processor for L{rpc.RpcRunner.call_os_get}. """ if not result.fail_msg and isinstance(result.payload, dict): result.payload = objects.OS.FromDict(result.payload) return result def _ImpExpStatusPostProc(result): """Post-processor for import/export status. @rtype: Payload containing list of L{objects.ImportExportStatus} instances @return: Returns a list of the state of each named import/export or None if a status couldn't be retrieved """ if not result.fail_msg: decoded = [] for i in result.payload: if i is None: decoded.append(None) continue decoded.append(objects.ImportExportStatus.FromDict(i)) result.payload = decoded return result def _TestDelayTimeout((duration, )): """Calculate timeout for "test_delay" RPC. """ return int(duration + 5) _FILE_STORAGE_CALLS = [ ("file_storage_dir_create", SINGLE, None, constants.RPC_TMO_FAST, [ ("file_storage_dir", None, "File storage directory"), ], None, None, "Create the given file storage directory"), ("file_storage_dir_remove", SINGLE, None, constants.RPC_TMO_FAST, [ ("file_storage_dir", None, "File storage directory"), ], None, None, "Remove the given file storage directory"), ("file_storage_dir_rename", SINGLE, None, constants.RPC_TMO_FAST, [ ("old_file_storage_dir", None, "Old name"), ("new_file_storage_dir", None, "New name"), ], None, None, "Rename file storage directory"), ] _STORAGE_CALLS = [ ("storage_list", MULTI, None, constants.RPC_TMO_NORMAL, [ ("su_name", None, None), ("su_args", None, None), ("name", None, None), ("fields", None, None), ], None, None, "Get list of storage units"), ("storage_modify", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("su_name", None, None), ("su_args", None, None), ("name", None, None), ("changes", None, None), ], None, None, "Modify a storage unit"), ("storage_execute", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("su_name", None, None), ("su_args", None, None), ("name", None, None), ("op", None, None), ], None, None, "Executes an operation on a storage unit"), ] _INSTANCE_CALLS = [ ("instance_info", SINGLE, None, constants.RPC_TMO_URGENT, [ ("instance", None, "Instance name"), ("hname", None, "Hypervisor type"), ("hvparams", None, "Hypervisor parameters"), ], None, None, "Returns information about a single instance"), ("all_instances_info", MULTI, None, constants.RPC_TMO_URGENT, [ ("hypervisor_list", None, "Hypervisors to query for instances"), ("all_hvparams", None, "Dictionary mapping hypervisor names to hvparams"), ], None, None, "Returns information about all instances on the given nodes"), ("instance_list", MULTI, None, constants.RPC_TMO_URGENT, [ ("hypervisor_list", None, "Hypervisors to query for instances"), ("hvparams", None, "Hvparams of all hypervisors"), ], None, None, "Returns the list of running instances on the given nodes"), ("instance_reboot", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("inst", ED_INST_DICT, "Instance object"), ("reboot_type", None, None), ("shutdown_timeout", None, None), ("reason", None, "The reason for the reboot"), ], None, None, "Returns the list of running instances on the given nodes"), ("instance_shutdown", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("instance", ED_INST_DICT, "Instance object"), ("timeout", None, None), ("reason", None, "The reason for the shutdown"), ], None, None, "Stops an instance"), ("instance_balloon_memory", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("instance", ED_INST_DICT, "Instance object"), ("memory", None, None), ], None, None, "Modify the amount of an instance's runtime memory"), ("instance_run_rename", SINGLE, None, constants.RPC_TMO_SLOW, [ ("instance", ED_INST_DICT, "Instance object"), ("old_name", None, None), ("debug", None, None), ], None, None, "Run the OS rename script for an instance"), ("instance_migratable", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("instance", ED_INST_DICT, "Instance object"), ], None, None, "Checks whether the given instance can be migrated"), ("migration_info", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("instance", ED_INST_DICT, "Instance object"), ], None, None, "Gather the information necessary to prepare an instance migration"), ("accept_instance", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("instance", ED_INST_DICT, "Instance object"), ("info", None, "Result for the call_migration_info call"), ("target", None, "Target hostname (usually an IP address)"), ], None, None, "Prepare a node to accept an instance"), ("instance_finalize_migration_dst", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("instance", ED_INST_DICT, "Instance object"), ("info", None, "Result for the call_migration_info call"), ("success", None, "Whether the migration was a success or failure"), ], None, None, "Finalize any target-node migration specific operation"), ("instance_migrate", SINGLE, None, constants.RPC_TMO_SLOW, [ ("cluster_name", None, "Cluster name"), ("instance", ED_INST_DICT, "Instance object"), ("target", None, "Target node name"), ("live", None, "Whether the migration should be done live or not"), ], None, None, "Migrate an instance"), ("instance_finalize_migration_src", SINGLE, None, constants.RPC_TMO_SLOW, [ ("instance", ED_INST_DICT, "Instance object"), ("success", None, "Whether the migration succeeded or not"), ("live", None, "Whether the user requested a live migration or not"), ], None, None, "Finalize the instance migration on the source node"), ("instance_get_migration_status", SINGLE, None, constants.RPC_TMO_SLOW, [ ("instance", ED_INST_DICT, "Instance object"), ], None, _MigrationStatusPostProc, "Report migration status"), ("instance_start", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("instance_hvp_bep", ED_INST_DICT_HVP_BEP_DP, None), ("startup_paused", None, None), ("reason", None, "The reason for the startup"), ], None, None, "Starts an instance"), ("instance_os_add", SINGLE, None, constants.RPC_TMO_1DAY, [ ("instance_osp", ED_INST_DICT_OSP_DP, None), ("reinstall", None, None), ("debug", None, None), ], None, None, "Starts an instance"), ] _IMPEXP_CALLS = [ ("import_start", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("opts", ED_OBJECT_DICT, None), ("instance", ED_INST_DICT, None), ("component", None, None), ("dest", ED_IMPEXP_IO, "Import destination"), ], None, None, "Starts an import daemon"), ("export_start", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("opts", ED_OBJECT_DICT, None), ("host", None, None), ("port", None, None), ("instance", ED_INST_DICT, None), ("component", None, None), ("source", ED_IMPEXP_IO, "Export source"), ], None, None, "Starts an export daemon"), ("impexp_status", SINGLE, None, constants.RPC_TMO_FAST, [ ("names", None, "Import/export names"), ], None, _ImpExpStatusPostProc, "Gets the status of an import or export"), ("impexp_abort", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("name", None, "Import/export name"), ], None, None, "Aborts an import or export"), ("impexp_cleanup", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("name", None, "Import/export name"), ], None, None, "Cleans up after an import or export"), ("export_info", SINGLE, None, constants.RPC_TMO_FAST, [ ("path", None, None), ], None, None, "Queries the export information in a given path"), ("finalize_export", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("instance", ED_INST_DICT, None), ("snap_disks", ED_FINALIZE_EXPORT_DISKS, None), ], None, None, "Request the completion of an export operation"), ("export_list", MULTI, None, constants.RPC_TMO_FAST, [], None, None, "Gets the stored exports list"), ("export_remove", SINGLE, None, constants.RPC_TMO_FAST, [ ("export", None, None), ], None, None, "Requests removal of a given export"), ] _X509_CALLS = [ ("x509_cert_create", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("validity", None, "Validity in seconds"), ], None, None, "Creates a new X509 certificate for SSL/TLS"), ("x509_cert_remove", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("name", None, "Certificate name"), ], None, None, "Removes a X509 certificate"), ] _BLOCKDEV_CALLS = [ ("bdev_sizes", MULTI, None, constants.RPC_TMO_URGENT, [ ("devices", None, None), ], None, None, "Gets the sizes of requested block devices present on a node"), ("blockdev_create", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("bdev", ED_OBJECT_DICT, None), ("size", None, None), ("owner", None, None), ("on_primary", None, None), ("info", None, None), ("exclusive_storage", None, None), ], None, None, "Request creation of a given block device"), ("blockdev_wipe", SINGLE, None, constants.RPC_TMO_SLOW, [ ("bdev", ED_SINGLE_DISK_DICT_DP, None), ("offset", None, None), ("size", None, None), ], None, None, "Request wipe at given offset with given size of a block device"), ("blockdev_remove", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("bdev", ED_OBJECT_DICT, None), ], None, None, "Request removal of a given block device"), ("blockdev_pause_resume_sync", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("disks", ED_DISKS_DICT_DP, None), ("pause", None, None), ], None, None, "Request a pause/resume of given block device"), ("blockdev_assemble", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("disk", ED_SINGLE_DISK_DICT_DP, None), ("owner", None, None), ("on_primary", None, None), ("idx", None, None), ], None, None, "Request assembling of a given block device"), ("blockdev_shutdown", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("disk", ED_SINGLE_DISK_DICT_DP, None), ], None, None, "Request shutdown of a given block device"), ("blockdev_addchildren", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("bdev", ED_SINGLE_DISK_DICT_DP, None), ("ndevs", ED_OBJECT_DICT_LIST, None), ], None, None, "Request adding a list of children to a (mirroring) device"), ("blockdev_removechildren", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("bdev", ED_OBJECT_DICT, None), ("ndevs", ED_OBJECT_DICT_LIST, None), ], None, None, "Request removing a list of children from a (mirroring) device"), ("blockdev_close", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("instance_name", None, None), ("disks", ED_OBJECT_DICT_LIST, None), ], None, None, "Closes the given block devices"), ("blockdev_getdimensions", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("disks", ED_OBJECT_DICT_LIST, None), ], None, None, "Returns size and spindles of the given disks"), ("drbd_disconnect_net", MULTI, None, constants.RPC_TMO_NORMAL, [ ("nodes_ip", None, None), ("disks", ED_OBJECT_DICT_LIST, None), ], _DrbdCallsPreProc, None, "Disconnects the network of the given drbd devices"), ("drbd_attach_net", MULTI, None, constants.RPC_TMO_NORMAL, [ ("nodes_ip", None, None), ("disks", ED_DISKS_DICT_DP, None), ("instance_name", None, None), ("multimaster", None, None), ], _DrbdCallsPreProc, None, "Connects the given DRBD devices"), ("drbd_wait_sync", MULTI, None, constants.RPC_TMO_SLOW, [ ("nodes_ip", None, None), ("disks", ED_DISKS_DICT_DP, None), ], _DrbdCallsPreProc, None, "Waits for the synchronization of drbd devices is complete"), ("drbd_needs_activation", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("nodes_ip", None, None), ("disks", ED_MULTI_DISKS_DICT_DP, None), ], _DrbdCallsPreProc, None, "Returns the drbd disks which need activation"), ("blockdev_grow", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("cf_bdev", ED_SINGLE_DISK_DICT_DP, None), ("amount", None, None), ("dryrun", None, None), ("backingstore", None, None), ("es_flag", None, None), ], None, None, "Request growing of the given block device by a" " given amount"), ("blockdev_export", SINGLE, None, constants.RPC_TMO_1DAY, [ ("cf_bdev", ED_SINGLE_DISK_DICT_DP, None), ("dest_node_ip", None, None), ("dest_path", None, None), ("cluster_name", None, None), ], None, None, "Export a given disk to another node"), ("blockdev_snapshot", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("cf_bdev", ED_SINGLE_DISK_DICT_DP, None), ], None, None, "Export a given disk to another node"), ("blockdev_rename", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("devlist", ED_BLOCKDEV_RENAME, None), ], None, None, "Request rename of the given block devices"), ("blockdev_find", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("disk", ED_OBJECT_DICT, None), ], None, _BlockdevFindPostProc, "Request identification of a given block device"), ("blockdev_getmirrorstatus", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("disks", ED_DISKS_DICT_DP, None), ], None, _BlockdevGetMirrorStatusPostProc, "Request status of a (mirroring) device"), ("blockdev_getmirrorstatus_multi", MULTI, None, constants.RPC_TMO_NORMAL, [ ("node_disks", ED_NODE_TO_DISK_DICT, None), ], _BlockdevGetMirrorStatusMultiPreProc, _BlockdevGetMirrorStatusMultiPostProc, "Request status of (mirroring) devices from multiple nodes"), ("blockdev_setinfo", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("disk", ED_OBJECT_DICT, None), ("info", None, None), ], None, None, "Sets metadata information on a given block device"), ] _OS_CALLS = [ ("os_diagnose", MULTI, None, constants.RPC_TMO_FAST, [], None, None, "Request a diagnose of OS definitions"), ("os_validate", MULTI, None, constants.RPC_TMO_FAST, [ ("required", None, None), ("name", None, None), ("checks", None, None), ("params", None, None), ], None, None, "Run a validation routine for a given OS"), ("os_get", SINGLE, None, constants.RPC_TMO_FAST, [ ("name", None, None), ], None, _OsGetPostProc, "Returns an OS definition"), ] _EXTSTORAGE_CALLS = [ ("extstorage_diagnose", MULTI, None, constants.RPC_TMO_FAST, [], None, None, "Request a diagnose of ExtStorage Providers"), ] _NODE_CALLS = [ ("node_has_ip_address", SINGLE, None, constants.RPC_TMO_FAST, [ ("address", None, "IP address"), ], None, None, "Checks if a node has the given IP address"), ("node_info", MULTI, None, constants.RPC_TMO_URGENT, [ ("storage_units", None, "List of tuples ',,[]' to ask for disk space" " information; the parameter list varies depending on the storage_type"), ("hv_specs", None, "List of hypervisor specification (name, hvparams) to ask for node " "information"), ], _NodeInfoPreProc, None, "Return node information"), ("node_verify", MULTI, None, constants.RPC_TMO_NORMAL, [ ("checkdict", None, "What to verify"), ("cluster_name", None, "Cluster name"), ("all_hvparams", None, "Dictionary mapping hypervisor names to hvparams"), ], None, None, "Request verification of given parameters"), ("node_volumes", MULTI, None, constants.RPC_TMO_FAST, [], None, None, "Gets all volumes on node(s)"), ("node_demote_from_mc", SINGLE, None, constants.RPC_TMO_FAST, [], None, None, "Demote a node from the master candidate role"), ("node_powercycle", SINGLE, ACCEPT_OFFLINE_NODE, constants.RPC_TMO_NORMAL, [ ("hypervisor", None, "Hypervisor type"), ("hvparams", None, "Hypervisor parameters"), ], None, None, "Tries to powercycle a node"), ] _MISC_CALLS = [ ("lv_list", MULTI, None, constants.RPC_TMO_URGENT, [ ("vg_name", None, None), ], None, None, "Gets the logical volumes present in a given volume group"), ("vg_list", MULTI, None, constants.RPC_TMO_URGENT, [], None, None, "Gets the volume group list"), ("bridges_exist", SINGLE, None, constants.RPC_TMO_URGENT, [ ("bridges_list", None, "Bridges which must be present on remote node"), ], None, None, "Checks if a node has all the bridges given"), ("etc_hosts_modify", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("mode", None, "Mode to operate; currently L{constants.ETC_HOSTS_ADD} or" " L{constants.ETC_HOSTS_REMOVE}"), ("name", None, "Hostname to be modified"), ("ip", None, "IP address (L{constants.ETC_HOSTS_ADD} only)"), ], None, None, "Modify hosts file with name"), ("drbd_helper", MULTI, None, constants.RPC_TMO_URGENT, [], None, None, "Gets DRBD helper"), ("restricted_command", MULTI, None, constants.RPC_TMO_SLOW, [ ("cmd", None, "Command name"), ], None, None, "Runs restricted command"), ("run_oob", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("oob_program", None, None), ("command", None, None), ("remote_node", None, None), ("timeout", None, None), ], None, None, "Runs out-of-band command"), ("hooks_runner", MULTI, None, constants.RPC_TMO_NORMAL, [ ("hpath", None, None), ("phase", None, None), ("env", None, None), ], None, None, "Call the hooks runner"), ("iallocator_runner", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("name", None, "Iallocator name"), ("idata", None, "JSON-encoded input string"), ], None, None, "Call an iallocator on a remote node"), ("test_delay", MULTI, None, _TestDelayTimeout, [ ("duration", None, None), ], None, None, "Sleep for a fixed time on given node(s)"), ("hypervisor_validate_params", MULTI, None, constants.RPC_TMO_NORMAL, [ ("hvname", None, "Hypervisor name"), ("hvfull", None, "Parameters to be validated"), ], None, None, "Validate hypervisor params"), ("get_watcher_pause", SINGLE, None, constants.RPC_TMO_URGENT, [], None, None, "Get watcher pause end"), ("set_watcher_pause", MULTI, None, constants.RPC_TMO_URGENT, [ ("until", None, None), ], None, None, "Set watcher pause end"), ] CALLS = { "RpcClientDefault": _Prepare(_IMPEXP_CALLS + _X509_CALLS + _OS_CALLS + _NODE_CALLS + _FILE_STORAGE_CALLS + _MISC_CALLS + _INSTANCE_CALLS + _BLOCKDEV_CALLS + _STORAGE_CALLS + _EXTSTORAGE_CALLS), "RpcClientJobQueue": _Prepare([ ("jobqueue_update", MULTI, None, constants.RPC_TMO_URGENT, [ ("file_name", None, None), ("content", ED_COMPRESS, None), ], None, None, "Update job queue file"), ("jobqueue_purge", SINGLE, None, constants.RPC_TMO_NORMAL, [], None, None, "Purge job queue"), ("jobqueue_rename", MULTI, None, constants.RPC_TMO_URGENT, [ ("rename", None, None), ], None, None, "Rename job queue file"), ("jobqueue_set_drain_flag", MULTI, None, constants.RPC_TMO_URGENT, [ ("flag", None, None), ], None, None, "Set job queue drain flag"), ]), "RpcClientBootstrap": _Prepare([ ("node_start_master_daemons", SINGLE, None, constants.RPC_TMO_FAST, [ ("no_voting", None, None), ], None, None, "Starts master daemons on a node"), ("node_activate_master_ip", SINGLE, None, constants.RPC_TMO_FAST, [ ("master_params", ED_OBJECT_DICT, "Network parameters of the master"), ("use_external_mip_script", None, "Whether to use the user-provided master IP address setup script"), ], None, None, "Activates master IP on a node"), ("node_stop_master", SINGLE, None, constants.RPC_TMO_FAST, [], None, None, "Deactivates master IP and stops master daemons on a node"), ("node_deactivate_master_ip", SINGLE, None, constants.RPC_TMO_FAST, [ ("master_params", ED_OBJECT_DICT, "Network parameters of the master"), ("use_external_mip_script", None, "Whether to use the user-provided master IP address setup script"), ], None, None, "Deactivates master IP on a node"), ("node_change_master_netmask", SINGLE, None, constants.RPC_TMO_FAST, [ ("old_netmask", None, "The old value of the netmask"), ("netmask", None, "The new value of the netmask"), ("master_ip", None, "The master IP"), ("master_netdev", None, "The master network device"), ], None, None, "Change master IP netmask"), ("node_leave_cluster", SINGLE, None, constants.RPC_TMO_NORMAL, [ ("modify_ssh_setup", None, None), ], None, None, "Requests a node to clean the cluster information it has"), ("master_info", MULTI, None, constants.RPC_TMO_URGENT, [], None, None, "Query master info"), ]), "RpcClientDnsOnly": _Prepare([ ("version", MULTI, ACCEPT_OFFLINE_NODE, constants.RPC_TMO_URGENT, [], None, None, "Query node version"), ("node_verify_light", MULTI, None, constants.RPC_TMO_NORMAL, [ ("checkdict", None, "What to verify"), ("cluster_name", None, "Cluster name"), ("hvparams", None, "Dictionary mapping hypervisor names to hvparams"), ], None, None, "Request verification of given parameters"), ]), "RpcClientConfig": _Prepare([ ("upload_file", MULTI, None, constants.RPC_TMO_NORMAL, [ ("file_name", ED_FILE_DETAILS, None), ], None, None, "Upload a file"), ("write_ssconf_files", MULTI, None, constants.RPC_TMO_NORMAL, [ ("values", None, None), ], None, None, "Write ssconf files"), ]), } ganeti-2.9.3/lib/http/0000755000000000000000000000000012271445544014527 5ustar00rootroot00000000000000ganeti-2.9.3/lib/http/client.py0000644000000000000000000002554212230001635016347 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2008, 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """HTTP client module. """ import logging import pycurl import threading from cStringIO import StringIO from ganeti import http from ganeti import compat from ganeti import netutils from ganeti import locking class HttpClientRequest(object): def __init__(self, host, port, method, path, headers=None, post_data=None, read_timeout=None, curl_config_fn=None, nicename=None, completion_cb=None): """Describes an HTTP request. @type host: string @param host: Hostname @type port: int @param port: Port @type method: string @param method: Method name @type path: string @param path: Request path @type headers: list or None @param headers: Additional headers to send, list of strings @type post_data: string or None @param post_data: Additional data to send @type read_timeout: int @param read_timeout: if passed, it will be used as the read timeout while reading the response from the server @type curl_config_fn: callable @param curl_config_fn: Function to configure cURL object before request @type nicename: string @param nicename: Name, presentable to a user, to describe this request (no whitespace) @type completion_cb: callable accepting this request object as a single parameter @param completion_cb: Callback for request completion """ assert path.startswith("/"), "Path must start with slash (/)" assert curl_config_fn is None or callable(curl_config_fn) assert completion_cb is None or callable(completion_cb) # Request attributes self.host = host self.port = port self.method = method self.path = path self.read_timeout = read_timeout self.curl_config_fn = curl_config_fn self.nicename = nicename self.completion_cb = completion_cb if post_data is None: self.post_data = "" else: self.post_data = post_data if headers is None: self.headers = [] elif isinstance(headers, dict): # Support for old interface self.headers = ["%s: %s" % (name, value) for name, value in headers.items()] else: self.headers = headers # Response status self.success = None self.error = None # Response attributes self.resp_status_code = None self.resp_body = None def __repr__(self): status = ["%s.%s" % (self.__class__.__module__, self.__class__.__name__), "%s:%s" % (self.host, self.port), self.method, self.path] return "<%s at %#x>" % (" ".join(status), id(self)) @property def url(self): """Returns the full URL for this requests. """ if netutils.IPAddress.IsValid(self.host): address = netutils.FormatAddress((self.host, self.port)) else: address = "%s:%s" % (self.host, self.port) # TODO: Support for non-SSL requests return "https://%s%s" % (address, self.path) def _StartRequest(curl, req): """Starts a request on a cURL object. @type curl: pycurl.Curl @param curl: cURL object @type req: L{HttpClientRequest} @param req: HTTP request """ logging.debug("Starting request %r", req) url = req.url method = req.method post_data = req.post_data headers = req.headers # PycURL requires strings to be non-unicode assert isinstance(method, str) assert isinstance(url, str) assert isinstance(post_data, str) assert compat.all(isinstance(i, str) for i in headers) # Buffer for response resp_buffer = StringIO() # Configure client for request curl.setopt(pycurl.VERBOSE, False) curl.setopt(pycurl.NOSIGNAL, True) curl.setopt(pycurl.USERAGENT, http.HTTP_GANETI_VERSION) curl.setopt(pycurl.PROXY, "") curl.setopt(pycurl.CUSTOMREQUEST, str(method)) curl.setopt(pycurl.URL, url) curl.setopt(pycurl.POSTFIELDS, post_data) curl.setopt(pycurl.HTTPHEADER, headers) if req.read_timeout is None: curl.setopt(pycurl.TIMEOUT, 0) else: curl.setopt(pycurl.TIMEOUT, int(req.read_timeout)) # Disable SSL session ID caching (pycurl >= 7.16.0) if hasattr(pycurl, "SSL_SESSIONID_CACHE"): curl.setopt(pycurl.SSL_SESSIONID_CACHE, False) curl.setopt(pycurl.WRITEFUNCTION, resp_buffer.write) # Pass cURL object to external config function if req.curl_config_fn: req.curl_config_fn(curl) return _PendingRequest(curl, req, resp_buffer.getvalue) class _PendingRequest: def __init__(self, curl, req, resp_buffer_read): """Initializes this class. @type curl: pycurl.Curl @param curl: cURL object @type req: L{HttpClientRequest} @param req: HTTP request @type resp_buffer_read: callable @param resp_buffer_read: Function to read response body """ assert req.success is None self._curl = curl self._req = req self._resp_buffer_read = resp_buffer_read def GetCurlHandle(self): """Returns the cURL object. """ return self._curl def GetCurrentRequest(self): """Returns the current request. """ return self._req def Done(self, errmsg): """Finishes a request. @type errmsg: string or None @param errmsg: Error message if request failed """ curl = self._curl req = self._req assert req.success is None, "Request has already been finalized" logging.debug("Request %s finished, errmsg=%s", req, errmsg) req.success = not bool(errmsg) req.error = errmsg # Get HTTP response code req.resp_status_code = curl.getinfo(pycurl.RESPONSE_CODE) req.resp_body = self._resp_buffer_read() # Ensure no potentially large variables are referenced curl.setopt(pycurl.POSTFIELDS, "") curl.setopt(pycurl.WRITEFUNCTION, lambda _: None) if req.completion_cb: req.completion_cb(req) class _NoOpRequestMonitor: # pylint: disable=W0232 """No-op request monitor. """ @staticmethod def acquire(*args, **kwargs): pass release = acquire Disable = acquire class _PendingRequestMonitor: _LOCK = "_lock" def __init__(self, owner, pending_fn): """Initializes this class. """ self._owner = owner self._pending_fn = pending_fn # The lock monitor runs in another thread, hence locking is necessary self._lock = locking.SharedLock("PendingHttpRequests") self.acquire = self._lock.acquire self.release = self._lock.release @locking.ssynchronized(_LOCK) def Disable(self): """Disable monitor. """ self._pending_fn = None @locking.ssynchronized(_LOCK, shared=1) def GetLockInfo(self, requested): # pylint: disable=W0613 """Retrieves information about pending requests. @type requested: set @param requested: Requested information, see C{query.LQ_*} """ # No need to sort here, that's being done by the lock manager and query # library. There are no priorities for requests, hence all show up as # one item under "pending". result = [] if self._pending_fn: owner_name = self._owner.getName() for client in self._pending_fn(): req = client.GetCurrentRequest() if req: if req.nicename is None: name = "%s%s" % (req.host, req.path) else: name = req.nicename result.append(("rpc/%s" % name, None, [owner_name], None)) return result def _ProcessCurlRequests(multi, requests): """cURL request processor. This generator yields a tuple once for every completed request, successful or not. The first value in the tuple is the handle, the second an error message or C{None} for successful requests. @type multi: C{pycurl.CurlMulti} @param multi: cURL multi object @type requests: sequence @param requests: cURL request handles """ for curl in requests: multi.add_handle(curl) while True: (ret, active) = multi.perform() assert ret in (pycurl.E_MULTI_OK, pycurl.E_CALL_MULTI_PERFORM) if ret == pycurl.E_CALL_MULTI_PERFORM: # cURL wants to be called again continue while True: (remaining_messages, successful, failed) = multi.info_read() for curl in successful: multi.remove_handle(curl) yield (curl, None) for curl, errnum, errmsg in failed: multi.remove_handle(curl) yield (curl, "Error %s: %s" % (errnum, errmsg)) if remaining_messages == 0: break if active == 0: # No active handles anymore break # Wait for I/O. The I/O timeout shouldn't be too long so that HTTP # timeouts, which are only evaluated in multi.perform, aren't # unnecessarily delayed. multi.select(1.0) def ProcessRequests(requests, lock_monitor_cb=None, _curl=pycurl.Curl, _curl_multi=pycurl.CurlMulti, _curl_process=_ProcessCurlRequests): """Processes any number of HTTP client requests. @type requests: list of L{HttpClientRequest} @param requests: List of all requests @param lock_monitor_cb: Callable for registering with lock monitor """ assert compat.all((req.error is None and req.success is None and req.resp_status_code is None and req.resp_body is None) for req in requests) # Prepare all requests curl_to_client = \ dict((client.GetCurlHandle(), client) for client in map(lambda req: _StartRequest(_curl(), req), requests)) assert len(curl_to_client) == len(requests) if lock_monitor_cb: monitor = _PendingRequestMonitor(threading.currentThread(), curl_to_client.values) lock_monitor_cb(monitor) else: monitor = _NoOpRequestMonitor # Process all requests and act based on the returned values for (curl, msg) in _curl_process(_curl_multi(), curl_to_client.keys()): monitor.acquire(shared=0) try: curl_to_client.pop(curl).Done(msg) finally: monitor.release() assert not curl_to_client, "Not all requests were processed" # Don't try to read information anymore as all requests have been processed monitor.Disable() assert compat.all(req.error is not None or (req.success and req.resp_status_code is not None and req.resp_body is not None) for req in requests) ganeti-2.9.3/lib/http/server.py0000644000000000000000000004467512271422343016417 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2008, 2010, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """HTTP server module. """ import BaseHTTPServer import cgi import logging import os import socket import time import signal import asyncore from ganeti import http from ganeti import utils from ganeti import netutils from ganeti import compat from ganeti import errors WEEKDAYNAME = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] MONTHNAME = [None, "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] # Default error message DEFAULT_ERROR_CONTENT_TYPE = "text/html" DEFAULT_ERROR_MESSAGE = """\ Error response

Error response

Error code %(code)d.

Message: %(message)s.

Error code explanation: %(code)s = %(explain)s. """ def _DateTimeHeader(gmnow=None): """Return the current date and time formatted for a message header. The time MUST be in the GMT timezone. """ if gmnow is None: gmnow = time.gmtime() (year, month, day, hh, mm, ss, wd, _, _) = gmnow return ("%s, %02d %3s %4d %02d:%02d:%02d GMT" % (WEEKDAYNAME[wd], day, MONTHNAME[month], year, hh, mm, ss)) class _HttpServerRequest(object): """Data structure for HTTP request on server side. """ def __init__(self, method, path, headers, body): # Request attributes self.request_method = method self.request_path = path self.request_headers = headers self.request_body = body # Response attributes self.resp_headers = {} # Private data for request handler (useful in combination with # authentication) self.private = None def __repr__(self): status = ["%s.%s" % (self.__class__.__module__, self.__class__.__name__), self.request_method, self.request_path, "headers=%r" % str(self.request_headers), "body=%r" % (self.request_body, )] return "<%s at %#x>" % (" ".join(status), id(self)) class _HttpServerToClientMessageWriter(http.HttpMessageWriter): """Writes an HTTP response to client. """ def __init__(self, sock, request_msg, response_msg, write_timeout): """Writes the response to the client. @type sock: socket @param sock: Target socket @type request_msg: http.HttpMessage @param request_msg: Request message, required to determine whether response may have a message body @type response_msg: http.HttpMessage @param response_msg: Response message @type write_timeout: float @param write_timeout: Write timeout for socket """ self._request_msg = request_msg self._response_msg = response_msg http.HttpMessageWriter.__init__(self, sock, response_msg, write_timeout) def HasMessageBody(self): """Logic to detect whether response should contain a message body. """ if self._request_msg.start_line: request_method = self._request_msg.start_line.method else: request_method = None response_code = self._response_msg.start_line.code # RFC2616, section 4.3: "A message-body MUST NOT be included in a request # if the specification of the request method (section 5.1.1) does not allow # sending an entity-body in requests" # # RFC2616, section 9.4: "The HEAD method is identical to GET except that # the server MUST NOT return a message-body in the response." # # RFC2616, section 10.2.5: "The 204 response MUST NOT include a # message-body [...]" # # RFC2616, section 10.3.5: "The 304 response MUST NOT contain a # message-body, [...]" return (http.HttpMessageWriter.HasMessageBody(self) and (request_method is not None and request_method != http.HTTP_HEAD) and response_code >= http.HTTP_OK and response_code not in (http.HTTP_NO_CONTENT, http.HTTP_NOT_MODIFIED)) class _HttpClientToServerMessageReader(http.HttpMessageReader): """Reads an HTTP request sent by client. """ # Length limits START_LINE_LENGTH_MAX = 8192 HEADER_LENGTH_MAX = 4096 def ParseStartLine(self, start_line): """Parses the start line sent by client. Example: "GET /index.html HTTP/1.1" @type start_line: string @param start_line: Start line """ # Empty lines are skipped when reading assert start_line logging.debug("HTTP request: %s", start_line) words = start_line.split() if len(words) == 3: [method, path, version] = words if version[:5] != "HTTP/": raise http.HttpBadRequest("Bad request version (%r)" % version) try: base_version_number = version.split("/", 1)[1] version_number = base_version_number.split(".") # RFC 2145 section 3.1 says there can be only one "." and # - major and minor numbers MUST be treated as # separate integers; # - HTTP/2.4 is a lower version than HTTP/2.13, which in # turn is lower than HTTP/12.3; # - Leading zeros MUST be ignored by recipients. if len(version_number) != 2: raise http.HttpBadRequest("Bad request version (%r)" % version) version_number = (int(version_number[0]), int(version_number[1])) except (ValueError, IndexError): raise http.HttpBadRequest("Bad request version (%r)" % version) if version_number >= (2, 0): raise http.HttpVersionNotSupported("Invalid HTTP Version (%s)" % base_version_number) elif len(words) == 2: version = http.HTTP_0_9 [method, path] = words if method != http.HTTP_GET: raise http.HttpBadRequest("Bad HTTP/0.9 request type (%r)" % method) else: raise http.HttpBadRequest("Bad request syntax (%r)" % start_line) return http.HttpClientToServerStartLine(method, path, version) def _HandleServerRequestInner(handler, req_msg): """Calls the handler function for the current request. """ handler_context = _HttpServerRequest(req_msg.start_line.method, req_msg.start_line.path, req_msg.headers, req_msg.body) logging.debug("Handling request %r", handler_context) try: try: # Authentication, etc. handler.PreHandleRequest(handler_context) # Call actual request handler result = handler.HandleRequest(handler_context) except (http.HttpException, errors.RapiTestResult, KeyboardInterrupt, SystemExit): raise except Exception, err: logging.exception("Caught exception") raise http.HttpInternalServerError(message=str(err)) except: logging.exception("Unknown exception") raise http.HttpInternalServerError(message="Unknown error") if not isinstance(result, basestring): raise http.HttpError("Handler function didn't return string type") return (http.HTTP_OK, handler_context.resp_headers, result) finally: # No reason to keep this any longer, even for exceptions handler_context.private = None class HttpResponder(object): # The default request version. This only affects responses up until # the point where the request line is parsed, so it mainly decides what # the client gets back when sending a malformed request line. # Most web servers default to HTTP 0.9, i.e. don't send a status line. default_request_version = http.HTTP_0_9 responses = BaseHTTPServer.BaseHTTPRequestHandler.responses def __init__(self, handler): """Initializes this class. """ self._handler = handler def __call__(self, fn): """Handles a request. @type fn: callable @param fn: Callback for retrieving HTTP request, must return a tuple containing request message (L{http.HttpMessage}) and C{None} or the message reader (L{_HttpClientToServerMessageReader}) """ response_msg = http.HttpMessage() response_msg.start_line = \ http.HttpServerToClientStartLine(version=self.default_request_version, code=None, reason=None) force_close = True try: (request_msg, req_msg_reader) = fn() response_msg.start_line.version = request_msg.start_line.version # RFC2616, 14.23: All Internet-based HTTP/1.1 servers MUST respond # with a 400 (Bad Request) status code to any HTTP/1.1 request # message which lacks a Host header field. if (request_msg.start_line.version == http.HTTP_1_1 and not (request_msg.headers and http.HTTP_HOST in request_msg.headers)): raise http.HttpBadRequest(message="Missing Host header") (response_msg.start_line.code, response_msg.headers, response_msg.body) = \ _HandleServerRequestInner(self._handler, request_msg) except http.HttpException, err: self._SetError(self.responses, self._handler, response_msg, err) else: # Only wait for client to close if we didn't have any exception. force_close = False return (request_msg, req_msg_reader, force_close, self._Finalize(self.responses, response_msg)) @staticmethod def _SetError(responses, handler, response_msg, err): """Sets the response code and body from a HttpException. @type err: HttpException @param err: Exception instance """ try: (shortmsg, longmsg) = responses[err.code] except KeyError: shortmsg = longmsg = "Unknown" if err.message: message = err.message else: message = shortmsg values = { "code": err.code, "message": cgi.escape(message), "explain": longmsg, } (content_type, body) = handler.FormatErrorMessage(values) headers = { http.HTTP_CONTENT_TYPE: content_type, } if err.headers: headers.update(err.headers) response_msg.start_line.code = err.code response_msg.headers = headers response_msg.body = body @staticmethod def _Finalize(responses, msg): assert msg.start_line.reason is None if not msg.headers: msg.headers = {} msg.headers.update({ # TODO: Keep-alive is not supported http.HTTP_CONNECTION: "close", http.HTTP_DATE: _DateTimeHeader(), http.HTTP_SERVER: http.HTTP_GANETI_VERSION, }) # Get response reason based on code try: code_desc = responses[msg.start_line.code] except KeyError: reason = "" else: (reason, _) = code_desc msg.start_line.reason = reason return msg class HttpServerRequestExecutor(object): """Implements server side of HTTP. This class implements the server side of HTTP. It's based on code of Python's BaseHTTPServer, from both version 2.4 and 3k. It does not support non-ASCII character encodings. Keep-alive connections are not supported. """ # Timeouts in seconds for socket layer WRITE_TIMEOUT = 10 READ_TIMEOUT = 10 CLOSE_TIMEOUT = 1 def __init__(self, server, handler, sock, client_addr): """Initializes this class. """ responder = HttpResponder(handler) # Disable Python's timeout sock.settimeout(None) # Operate in non-blocking mode sock.setblocking(0) request_msg_reader = None force_close = True logging.debug("Connection from %s:%s", client_addr[0], client_addr[1]) try: # Block for closing connection try: # Do the secret SSL handshake if server.using_ssl: sock.set_accept_state() try: http.Handshake(sock, self.WRITE_TIMEOUT) except http.HttpSessionHandshakeUnexpectedEOF: # Ignore rest return (request_msg, request_msg_reader, force_close, response_msg) = \ responder(compat.partial(self._ReadRequest, sock, self.READ_TIMEOUT)) if response_msg: # HttpMessage.start_line can be of different types # Instance of 'HttpClientToServerStartLine' has no 'code' member # pylint: disable=E1103,E1101 logging.info("%s:%s %s %s", client_addr[0], client_addr[1], request_msg.start_line, response_msg.start_line.code) self._SendResponse(sock, request_msg, response_msg, self.WRITE_TIMEOUT) finally: http.ShutdownConnection(sock, self.CLOSE_TIMEOUT, self.WRITE_TIMEOUT, request_msg_reader, force_close) sock.close() finally: logging.debug("Disconnected %s:%s", client_addr[0], client_addr[1]) @staticmethod def _ReadRequest(sock, timeout): """Reads a request sent by client. """ msg = http.HttpMessage() try: reader = _HttpClientToServerMessageReader(sock, msg, timeout) except http.HttpSocketTimeout: raise http.HttpError("Timeout while reading request") except socket.error, err: raise http.HttpError("Error reading request: %s" % err) return (msg, reader) @staticmethod def _SendResponse(sock, req_msg, msg, timeout): """Sends the response to the client. """ try: _HttpServerToClientMessageWriter(sock, req_msg, msg, timeout) except http.HttpSocketTimeout: raise http.HttpError("Timeout while sending response") except socket.error, err: raise http.HttpError("Error sending response: %s" % err) class HttpServer(http.HttpBase, asyncore.dispatcher): """Generic HTTP server class """ MAX_CHILDREN = 20 def __init__(self, mainloop, local_address, port, handler, ssl_params=None, ssl_verify_peer=False, request_executor_class=None): """Initializes the HTTP server @type mainloop: ganeti.daemon.Mainloop @param mainloop: Mainloop used to poll for I/O events @type local_address: string @param local_address: Local IP address to bind to @type port: int @param port: TCP port to listen on @type ssl_params: HttpSslParams @param ssl_params: SSL key and certificate @type ssl_verify_peer: bool @param ssl_verify_peer: Whether to require client certificate and compare it with our certificate @type request_executor_class: class @param request_executor_class: an class derived from the HttpServerRequestExecutor class """ http.HttpBase.__init__(self) asyncore.dispatcher.__init__(self) if request_executor_class is None: self.request_executor = HttpServerRequestExecutor else: self.request_executor = request_executor_class self.mainloop = mainloop self.local_address = local_address self.port = port self.handler = handler family = netutils.IPAddress.GetAddressFamily(local_address) self.socket = self._CreateSocket(ssl_params, ssl_verify_peer, family) # Allow port to be reused self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self._children = [] self.set_socket(self.socket) self.accepting = True mainloop.RegisterSignal(self) def Start(self): self.socket.bind((self.local_address, self.port)) self.socket.listen(1024) def Stop(self): self.socket.close() def handle_accept(self): self._IncomingConnection() def OnSignal(self, signum): if signum == signal.SIGCHLD: self._CollectChildren(True) def _CollectChildren(self, quick): """Checks whether any child processes are done @type quick: bool @param quick: Whether to only use non-blocking functions """ if not quick: # Don't wait for other processes if it should be a quick check while len(self._children) > self.MAX_CHILDREN: try: # Waiting without a timeout brings us into a potential DoS situation. # As soon as too many children run, we'll not respond to new # requests. The real solution would be to add a timeout for children # and killing them after some time. pid, _ = os.waitpid(0, 0) except os.error: pid = None if pid and pid in self._children: self._children.remove(pid) for child in self._children: try: pid, _ = os.waitpid(child, os.WNOHANG) except os.error: pid = None if pid and pid in self._children: self._children.remove(pid) def _IncomingConnection(self): """Called for each incoming connection """ # pylint: disable=W0212 (connection, client_addr) = self.socket.accept() self._CollectChildren(False) pid = os.fork() if pid == 0: # Child process try: # The client shouldn't keep the listening socket open. If the parent # process is restarted, it would fail when there's already something # listening (in this case its own child from a previous run) on the # same port. try: self.socket.close() except socket.error: pass self.socket = None # In case the handler code uses temporary files utils.ResetTempfileModule() self.request_executor(self, self.handler, connection, client_addr) except Exception: # pylint: disable=W0703 logging.exception("Error while handling request from %s:%s", client_addr[0], client_addr[1]) os._exit(1) os._exit(0) else: self._children.append(pid) class HttpServerHandler(object): """Base class for handling HTTP server requests. Users of this class must subclass it and override the L{HandleRequest} function. """ def PreHandleRequest(self, req): """Called before handling a request. Can be overridden by a subclass. """ def HandleRequest(self, req): """Handles a request. Must be overridden by subclass. """ raise NotImplementedError() @staticmethod def FormatErrorMessage(values): """Formats the body of an error message. @type values: dict @param values: dictionary with keys C{code}, C{message} and C{explain}. @rtype: tuple; (string, string) @return: Content-type and response body """ return (DEFAULT_ERROR_CONTENT_TYPE, DEFAULT_ERROR_MESSAGE % values) ganeti-2.9.3/lib/http/auth.py0000644000000000000000000002263512244641676016056 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2008 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """HTTP authentication module. """ import logging import re import base64 import binascii from ganeti import compat from ganeti import http from ganeti import utils from cStringIO import StringIO # Digest types from RFC2617 HTTP_BASIC_AUTH = "Basic" HTTP_DIGEST_AUTH = "Digest" # Not exactly as described in RFC2616, section 2.2, but good enough _NOQUOTE = re.compile(r"^[-_a-z0-9]+$", re.I) def _FormatAuthHeader(scheme, params): """Formats WWW-Authentication header value as per RFC2617, section 1.2 @type scheme: str @param scheme: Authentication scheme @type params: dict @param params: Additional parameters @rtype: str @return: Formatted header value """ buf = StringIO() buf.write(scheme) for name, value in params.iteritems(): buf.write(" ") buf.write(name) buf.write("=") if _NOQUOTE.match(value): buf.write(value) else: buf.write("\"") # TODO: Better quoting buf.write(value.replace("\"", "\\\"")) buf.write("\"") return buf.getvalue() class HttpServerRequestAuthentication(object): # Default authentication realm AUTH_REALM = "Unspecified" # Schemes for passwords _CLEARTEXT_SCHEME = "{CLEARTEXT}" _HA1_SCHEME = "{HA1}" def GetAuthRealm(self, req): """Returns the authentication realm for a request. May be overridden by a subclass, which then can return different realms for different paths. @type req: L{http.server._HttpServerRequest} @param req: HTTP request context @rtype: string @return: Authentication realm """ # today we don't have per-request filtering, but we might want to # add it in the future # pylint: disable=W0613 return self.AUTH_REALM def AuthenticationRequired(self, req): """Determines whether authentication is required for a request. To enable authentication, override this function in a subclass and return C{True}. L{AUTH_REALM} must be set. @type req: L{http.server._HttpServerRequest} @param req: HTTP request context """ # Unused argument, method could be a function # pylint: disable=W0613,R0201 return False def PreHandleRequest(self, req): """Called before a request is handled. @type req: L{http.server._HttpServerRequest} @param req: HTTP request context """ # Authentication not required, and no credentials given? if not (self.AuthenticationRequired(req) or (req.request_headers and http.HTTP_AUTHORIZATION in req.request_headers)): return realm = self.GetAuthRealm(req) if not realm: raise AssertionError("No authentication realm") # Check "Authorization" header if self._CheckAuthorization(req): # User successfully authenticated return # Send 401 Unauthorized response params = { "realm": realm, } # TODO: Support for Digest authentication (RFC2617, section 3). # TODO: Support for more than one WWW-Authenticate header with the same # response (RFC2617, section 4.6). headers = { http.HTTP_WWW_AUTHENTICATE: _FormatAuthHeader(HTTP_BASIC_AUTH, params), } raise http.HttpUnauthorized(headers=headers) def _CheckAuthorization(self, req): """Checks 'Authorization' header sent by client. @type req: L{http.server._HttpServerRequest} @param req: HTTP request context @rtype: bool @return: Whether user is allowed to execute request """ credentials = req.request_headers.get(http.HTTP_AUTHORIZATION, None) if not credentials: return False # Extract scheme parts = credentials.strip().split(None, 2) if len(parts) < 1: # Missing scheme return False # RFC2617, section 1.2: "[...] It uses an extensible, case-insensitive # token to identify the authentication scheme [...]" scheme = parts[0].lower() if scheme == HTTP_BASIC_AUTH.lower(): # Do basic authentication if len(parts) < 2: raise http.HttpBadRequest(message=("Basic authentication requires" " credentials")) return self._CheckBasicAuthorization(req, parts[1]) elif scheme == HTTP_DIGEST_AUTH.lower(): # TODO: Implement digest authentication # RFC2617, section 3.3: "Note that the HTTP server does not actually need # to know the user's cleartext password. As long as H(A1) is available to # the server, the validity of an Authorization header may be verified." pass # Unsupported authentication scheme return False def _CheckBasicAuthorization(self, req, in_data): """Checks credentials sent for basic authentication. @type req: L{http.server._HttpServerRequest} @param req: HTTP request context @type in_data: str @param in_data: Username and password encoded as Base64 @rtype: bool @return: Whether user is allowed to execute request """ try: creds = base64.b64decode(in_data.encode("ascii")).decode("ascii") except (TypeError, binascii.Error, UnicodeError): logging.exception("Error when decoding Basic authentication credentials") return False if ":" not in creds: return False (user, password) = creds.split(":", 1) return self.Authenticate(req, user, password) def Authenticate(self, req, user, password): """Checks the password for a user. This function MUST be overridden by a subclass. """ raise NotImplementedError() def VerifyBasicAuthPassword(self, req, username, password, expected): """Checks the password for basic authentication. As long as they don't start with an opening brace ("E{lb}"), old passwords are supported. A new scheme uses H(A1) from RFC2617, where H is MD5 and A1 consists of the username, the authentication realm and the actual password. @type req: L{http.server._HttpServerRequest} @param req: HTTP request context @type username: string @param username: Username from HTTP headers @type password: string @param password: Password from HTTP headers @type expected: string @param expected: Expected password with optional scheme prefix (e.g. from users file) """ # Backwards compatibility for old-style passwords without a scheme if not expected.startswith("{"): expected = self._CLEARTEXT_SCHEME + expected # Check again, just to be sure if not expected.startswith("{"): raise AssertionError("Invalid scheme") scheme_end_idx = expected.find("}", 1) # Ensure scheme has a length of at least one character if scheme_end_idx <= 1: logging.warning("Invalid scheme in password for user '%s'", username) return False scheme = expected[:scheme_end_idx + 1].upper() expected_password = expected[scheme_end_idx + 1:] # Good old plain text password if scheme == self._CLEARTEXT_SCHEME: return password == expected_password # H(A1) as described in RFC2617 if scheme == self._HA1_SCHEME: realm = self.GetAuthRealm(req) if not realm: # There can not be a valid password for this case raise AssertionError("No authentication realm") expha1 = compat.md5_hash() expha1.update("%s:%s:%s" % (username, realm, password)) return (expected_password.lower() == expha1.hexdigest().lower()) logging.warning("Unknown scheme '%s' in password for user '%s'", scheme, username) return False class PasswordFileUser(object): """Data structure for users from password file. """ def __init__(self, name, password, options): self.name = name self.password = password self.options = options def ParsePasswordFile(contents): """Parses the contents of a password file. Lines in the password file are of the following format:: [options] Fields are separated by whitespace. Username and password are mandatory, options are optional and separated by comma (','). Empty lines and comments ('#') are ignored. @type contents: str @param contents: Contents of password file @rtype: dict @return: Dictionary containing L{PasswordFileUser} instances """ users = {} for line in utils.FilterEmptyLinesAndComments(contents): parts = line.split(None, 2) if len(parts) < 2: # Invalid line # TODO: Return line number from FilterEmptyLinesAndComments logging.warning("Ignoring non-comment line with less than two fields") continue name = parts[0] password = parts[1] # Extract options options = [] if len(parts) >= 3: for part in parts[2].split(","): options.append(part.strip()) else: logging.warning("Ignoring values for user '%s': %s", name, parts[3:]) users[name] = PasswordFileUser(name, password, options) return users ganeti-2.9.3/lib/http/__init__.py0000644000000000000000000006720512271422343016642 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2008, 2010, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """HTTP module. """ import logging import mimetools import OpenSSL import select import socket import errno from cStringIO import StringIO from ganeti import constants from ganeti import utils HTTP_GANETI_VERSION = "Ganeti %s" % constants.RELEASE_VERSION HTTP_OK = 200 HTTP_NO_CONTENT = 204 HTTP_NOT_MODIFIED = 304 HTTP_0_9 = "HTTP/0.9" HTTP_1_0 = "HTTP/1.0" HTTP_1_1 = "HTTP/1.1" HTTP_GET = "GET" HTTP_HEAD = "HEAD" HTTP_POST = "POST" HTTP_PUT = "PUT" HTTP_DELETE = "DELETE" HTTP_ETAG = "ETag" HTTP_HOST = "Host" HTTP_SERVER = "Server" HTTP_DATE = "Date" HTTP_USER_AGENT = "User-Agent" HTTP_CONTENT_TYPE = "Content-Type" HTTP_CONTENT_LENGTH = "Content-Length" HTTP_CONNECTION = "Connection" HTTP_KEEP_ALIVE = "Keep-Alive" HTTP_WWW_AUTHENTICATE = "WWW-Authenticate" HTTP_AUTHORIZATION = "Authorization" HTTP_AUTHENTICATION_INFO = "Authentication-Info" HTTP_ALLOW = "Allow" HTTP_APP_OCTET_STREAM = "application/octet-stream" HTTP_APP_JSON = "application/json" _SSL_UNEXPECTED_EOF = "Unexpected EOF" # Socket operations (SOCKOP_SEND, SOCKOP_RECV, SOCKOP_SHUTDOWN, SOCKOP_HANDSHAKE) = range(4) # send/receive quantum SOCK_BUF_SIZE = 32768 class HttpError(Exception): """Internal exception for HTTP errors. This should only be used for internal error reporting. """ class HttpConnectionClosed(Exception): """Internal exception for a closed connection. This should only be used for internal error reporting. Only use it if there's no other way to report this condition. """ class HttpSessionHandshakeUnexpectedEOF(HttpError): """Internal exception for errors during SSL handshake. This should only be used for internal error reporting. """ class HttpSocketTimeout(Exception): """Internal exception for socket timeouts. This should only be used for internal error reporting. """ class HttpException(Exception): code = None message = None def __init__(self, message=None, headers=None): Exception.__init__(self) self.message = message self.headers = headers class HttpBadRequest(HttpException): """400 Bad Request RFC2616, 10.4.1: The request could not be understood by the server due to malformed syntax. The client SHOULD NOT repeat the request without modifications. """ code = 400 class HttpUnauthorized(HttpException): """401 Unauthorized RFC2616, section 10.4.2: The request requires user authentication. The response MUST include a WWW-Authenticate header field (section 14.47) containing a challenge applicable to the requested resource. """ code = 401 class HttpForbidden(HttpException): """403 Forbidden RFC2616, 10.4.4: The server understood the request, but is refusing to fulfill it. Authorization will not help and the request SHOULD NOT be repeated. """ code = 403 class HttpNotFound(HttpException): """404 Not Found RFC2616, 10.4.5: The server has not found anything matching the Request-URI. No indication is given of whether the condition is temporary or permanent. """ code = 404 class HttpMethodNotAllowed(HttpException): """405 Method Not Allowed RFC2616, 10.4.6: The method specified in the Request-Line is not allowed for the resource identified by the Request-URI. The response MUST include an Allow header containing a list of valid methods for the requested resource. """ code = 405 class HttpNotAcceptable(HttpException): """406 Not Acceptable RFC2616, 10.4.7: The resource identified by the request is only capable of generating response entities which have content characteristics not acceptable according to the accept headers sent in the request. """ code = 406 class HttpRequestTimeout(HttpException): """408 Request Timeout RFC2616, 10.4.9: The client did not produce a request within the time that the server was prepared to wait. The client MAY repeat the request without modifications at any later time. """ code = 408 class HttpConflict(HttpException): """409 Conflict RFC2616, 10.4.10: The request could not be completed due to a conflict with the current state of the resource. This code is only allowed in situations where it is expected that the user might be able to resolve the conflict and resubmit the request. """ code = 409 class HttpGone(HttpException): """410 Gone RFC2616, 10.4.11: The requested resource is no longer available at the server and no forwarding address is known. This condition is expected to be considered permanent. """ code = 410 class HttpLengthRequired(HttpException): """411 Length Required RFC2616, 10.4.12: The server refuses to accept the request without a defined Content-Length. The client MAY repeat the request if it adds a valid Content-Length header field containing the length of the message-body in the request message. """ code = 411 class HttpPreconditionFailed(HttpException): """412 Precondition Failed RFC2616, 10.4.13: The precondition given in one or more of the request-header fields evaluated to false when it was tested on the server. """ code = 412 class HttpUnsupportedMediaType(HttpException): """415 Unsupported Media Type RFC2616, 10.4.16: The server is refusing to service the request because the entity of the request is in a format not supported by the requested resource for the requested method. """ code = 415 class HttpInternalServerError(HttpException): """500 Internal Server Error RFC2616, 10.5.1: The server encountered an unexpected condition which prevented it from fulfilling the request. """ code = 500 class HttpNotImplemented(HttpException): """501 Not Implemented RFC2616, 10.5.2: The server does not support the functionality required to fulfill the request. """ code = 501 class HttpBadGateway(HttpException): """502 Bad Gateway RFC2616, 10.5.3: The server, while acting as a gateway or proxy, received an invalid response from the upstream server it accessed in attempting to fulfill the request. """ code = 502 class HttpServiceUnavailable(HttpException): """503 Service Unavailable RFC2616, 10.5.4: The server is currently unable to handle the request due to a temporary overloading or maintenance of the server. """ code = 503 class HttpGatewayTimeout(HttpException): """504 Gateway Timeout RFC2616, 10.5.5: The server, while acting as a gateway or proxy, did not receive a timely response from the upstream server specified by the URI (e.g. HTTP, FTP, LDAP) or some other auxiliary server (e.g. DNS) it needed to access in attempting to complete the request. """ code = 504 class HttpVersionNotSupported(HttpException): """505 HTTP Version Not Supported RFC2616, 10.5.6: The server does not support, or refuses to support, the HTTP protocol version that was used in the request message. """ code = 505 def ParseHeaders(buf): """Parses HTTP headers. @note: This is just a trivial wrapper around C{mimetools.Message} """ return mimetools.Message(buf, 0) def SocketOperation(sock, op, arg1, timeout): """Wrapper around socket functions. This function abstracts error handling for socket operations, especially for the complicated interaction with OpenSSL. @type sock: socket @param sock: Socket for the operation @type op: int @param op: Operation to execute (SOCKOP_* constants) @type arg1: any @param arg1: Parameter for function (if needed) @type timeout: None or float @param timeout: Timeout in seconds or None @return: Return value of socket function """ # TODO: event_poll/event_check/override if op in (SOCKOP_SEND, SOCKOP_HANDSHAKE): event_poll = select.POLLOUT elif op == SOCKOP_RECV: event_poll = select.POLLIN elif op == SOCKOP_SHUTDOWN: event_poll = None # The timeout is only used when OpenSSL requests polling for a condition. # It is not advisable to have no timeout for shutdown. assert timeout else: raise AssertionError("Invalid socket operation") # Handshake is only supported by SSL sockets if (op == SOCKOP_HANDSHAKE and not isinstance(sock, OpenSSL.SSL.ConnectionType)): return # No override by default event_override = 0 while True: # Poll only for certain operations and when asked for by an override if event_override or op in (SOCKOP_SEND, SOCKOP_RECV, SOCKOP_HANDSHAKE): if event_override: wait_for_event = event_override else: wait_for_event = event_poll event = utils.WaitForFdCondition(sock, wait_for_event, timeout) if event is None: raise HttpSocketTimeout() if event & (select.POLLNVAL | select.POLLHUP | select.POLLERR): # Let the socket functions handle these break if not event & wait_for_event: continue # Reset override event_override = 0 try: try: if op == SOCKOP_SEND: return sock.send(arg1) elif op == SOCKOP_RECV: return sock.recv(arg1) elif op == SOCKOP_SHUTDOWN: if isinstance(sock, OpenSSL.SSL.ConnectionType): # PyOpenSSL's shutdown() doesn't take arguments return sock.shutdown() else: return sock.shutdown(arg1) elif op == SOCKOP_HANDSHAKE: return sock.do_handshake() except OpenSSL.SSL.WantWriteError: # OpenSSL wants to write, poll for POLLOUT event_override = select.POLLOUT continue except OpenSSL.SSL.WantReadError: # OpenSSL wants to read, poll for POLLIN event_override = select.POLLIN | select.POLLPRI continue except OpenSSL.SSL.WantX509LookupError: continue except OpenSSL.SSL.ZeroReturnError, err: # SSL Connection has been closed. In SSL 3.0 and TLS 1.0, this only # occurs if a closure alert has occurred in the protocol, i.e. the # connection has been closed cleanly. Note that this does not # necessarily mean that the transport layer (e.g. a socket) has been # closed. if op == SOCKOP_SEND: # Can happen during a renegotiation raise HttpConnectionClosed(err.args) elif op == SOCKOP_RECV: return "" # SSL_shutdown shouldn't return SSL_ERROR_ZERO_RETURN raise socket.error(err.args) except OpenSSL.SSL.SysCallError, err: if op == SOCKOP_SEND: # arg1 is the data when writing if err.args and err.args[0] == -1 and arg1 == "": # errors when writing empty strings are expected # and can be ignored return 0 if err.args == (-1, _SSL_UNEXPECTED_EOF): if op == SOCKOP_RECV: return "" elif op == SOCKOP_HANDSHAKE: # Can happen if peer disconnects directly after the connection is # opened. raise HttpSessionHandshakeUnexpectedEOF(err.args) raise socket.error(err.args) except OpenSSL.SSL.Error, err: raise socket.error(err.args) except socket.error, err: if err.args and err.args[0] == errno.EAGAIN: # Ignore EAGAIN continue raise def ShutdownConnection(sock, close_timeout, write_timeout, msgreader, force): """Closes the connection. @type sock: socket @param sock: Socket to be shut down @type close_timeout: float @param close_timeout: How long to wait for the peer to close the connection @type write_timeout: float @param write_timeout: Write timeout for shutdown @type msgreader: http.HttpMessageReader @param msgreader: Request message reader, used to determine whether peer should close connection @type force: bool @param force: Whether to forcibly close the connection without waiting for peer """ #print msgreader.peer_will_close, force if msgreader and msgreader.peer_will_close and not force: # Wait for peer to close try: # Check whether it's actually closed if not SocketOperation(sock, SOCKOP_RECV, 1, close_timeout): return except (socket.error, HttpError, HttpSocketTimeout): # Ignore errors at this stage pass # Close the connection from our side try: # We don't care about the return value, see NOTES in SSL_shutdown(3). SocketOperation(sock, SOCKOP_SHUTDOWN, socket.SHUT_RDWR, write_timeout) except HttpSocketTimeout: raise HttpError("Timeout while shutting down connection") except socket.error, err: # Ignore ENOTCONN if not (err.args and err.args[0] == errno.ENOTCONN): raise HttpError("Error while shutting down connection: %s" % err) def Handshake(sock, write_timeout): """Shakes peer's hands. @type sock: socket @param sock: Socket to be shut down @type write_timeout: float @param write_timeout: Write timeout for handshake """ try: return SocketOperation(sock, SOCKOP_HANDSHAKE, None, write_timeout) except HttpSocketTimeout: raise HttpError("Timeout during SSL handshake") except socket.error, err: raise HttpError("Error in SSL handshake: %s" % err) class HttpSslParams(object): """Data class for SSL key and certificate. """ def __init__(self, ssl_key_path, ssl_cert_path): """Initializes this class. @type ssl_key_path: string @param ssl_key_path: Path to file containing SSL key in PEM format @type ssl_cert_path: string @param ssl_cert_path: Path to file containing SSL certificate in PEM format """ self.ssl_key_pem = utils.ReadFile(ssl_key_path) self.ssl_cert_pem = utils.ReadFile(ssl_cert_path) self.ssl_cert_path = ssl_cert_path def GetKey(self): return OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, self.ssl_key_pem) def GetCertificate(self): return OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, self.ssl_cert_pem) class HttpBase(object): """Base class for HTTP server and client. """ def __init__(self): self.using_ssl = None self._ssl_params = None self._ssl_key = None self._ssl_cert = None def _CreateSocket(self, ssl_params, ssl_verify_peer, family): """Creates a TCP socket and initializes SSL if needed. @type ssl_params: HttpSslParams @param ssl_params: SSL key and certificate @type ssl_verify_peer: bool @param ssl_verify_peer: Whether to require client certificate and compare it with our certificate @type family: int @param family: socket.AF_INET | socket.AF_INET6 """ assert family in (socket.AF_INET, socket.AF_INET6) self._ssl_params = ssl_params sock = socket.socket(family, socket.SOCK_STREAM) # Should we enable SSL? self.using_ssl = ssl_params is not None if not self.using_ssl: return sock self._ssl_key = ssl_params.GetKey() self._ssl_cert = ssl_params.GetCertificate() ctx = OpenSSL.SSL.Context(OpenSSL.SSL.SSLv23_METHOD) ctx.set_options(OpenSSL.SSL.OP_NO_SSLv2) ciphers = self.GetSslCiphers() logging.debug("Setting SSL cipher string %s", ciphers) ctx.set_cipher_list(ciphers) ctx.use_privatekey(self._ssl_key) ctx.use_certificate(self._ssl_cert) ctx.check_privatekey() if ssl_verify_peer: ctx.set_verify(OpenSSL.SSL.VERIFY_PEER | OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, self._SSLVerifyCallback) # Also add our certificate as a trusted CA to be sent to the client. # This is required at least for GnuTLS clients to work. try: # This will fail for PyOpenssl versions before 0.10 ctx.add_client_ca(self._ssl_cert) except AttributeError: # Fall back to letting OpenSSL read the certificate file directly. ctx.load_client_ca(ssl_params.ssl_cert_path) return OpenSSL.SSL.Connection(ctx, sock) def GetSslCiphers(self): # pylint: disable=R0201 """Returns the ciphers string for SSL. """ return constants.OPENSSL_CIPHERS def _SSLVerifyCallback(self, conn, cert, errnum, errdepth, ok): """Verify the certificate provided by the peer We only compare fingerprints. The client must use the same certificate as we do on our side. """ # some parameters are unused, but this is the API # pylint: disable=W0613 assert self._ssl_params, "SSL not initialized" return (self._ssl_cert.digest("sha1") == cert.digest("sha1") and self._ssl_cert.digest("md5") == cert.digest("md5")) class HttpMessage(object): """Data structure for HTTP message. """ def __init__(self): self.start_line = None self.headers = None self.body = None class HttpClientToServerStartLine(object): """Data structure for HTTP request start line. """ def __init__(self, method, path, version): self.method = method self.path = path self.version = version def __str__(self): return "%s %s %s" % (self.method, self.path, self.version) class HttpServerToClientStartLine(object): """Data structure for HTTP response start line. """ def __init__(self, version, code, reason): self.version = version self.code = code self.reason = reason def __str__(self): return "%s %s %s" % (self.version, self.code, self.reason) class HttpMessageWriter(object): """Writes an HTTP message to a socket. """ def __init__(self, sock, msg, write_timeout): """Initializes this class and writes an HTTP message to a socket. @type sock: socket @param sock: Socket to be written to @type msg: http.HttpMessage @param msg: HTTP message to be written @type write_timeout: float @param write_timeout: Write timeout for socket """ self._msg = msg self._PrepareMessage() buf = self._FormatMessage() pos = 0 end = len(buf) while pos < end: # Send only SOCK_BUF_SIZE bytes at a time data = buf[pos:(pos + SOCK_BUF_SIZE)] sent = SocketOperation(sock, SOCKOP_SEND, data, write_timeout) # Remove sent bytes pos += sent assert pos == end, "Message wasn't sent completely" def _PrepareMessage(self): """Prepares the HTTP message by setting mandatory headers. """ # RFC2616, section 4.3: "The presence of a message-body in a request is # signaled by the inclusion of a Content-Length or Transfer-Encoding header # field in the request's message-headers." if self._msg.body: self._msg.headers[HTTP_CONTENT_LENGTH] = len(self._msg.body) def _FormatMessage(self): """Serializes the HTTP message into a string. """ buf = StringIO() # Add start line buf.write(str(self._msg.start_line)) buf.write("\r\n") # Add headers if self._msg.start_line.version != HTTP_0_9: for name, value in self._msg.headers.iteritems(): buf.write("%s: %s\r\n" % (name, value)) buf.write("\r\n") # Add message body if needed if self.HasMessageBody(): buf.write(self._msg.body) elif self._msg.body: logging.warning("Ignoring message body") return buf.getvalue() def HasMessageBody(self): """Checks whether the HTTP message contains a body. Can be overridden by subclasses. """ return bool(self._msg.body) class HttpMessageReader(object): """Reads HTTP message from socket. """ # Length limits START_LINE_LENGTH_MAX = None HEADER_LENGTH_MAX = None # Parser state machine PS_START_LINE = "start-line" PS_HEADERS = "headers" PS_BODY = "entity-body" PS_COMPLETE = "complete" def __init__(self, sock, msg, read_timeout): """Reads an HTTP message from a socket. @type sock: socket @param sock: Socket to be read from @type msg: http.HttpMessage @param msg: Object for the read message @type read_timeout: float @param read_timeout: Read timeout for socket """ self.sock = sock self.msg = msg self.start_line_buffer = None self.header_buffer = StringIO() self.body_buffer = StringIO() self.parser_status = self.PS_START_LINE self.content_length = None self.peer_will_close = None buf = "" eof = False while self.parser_status != self.PS_COMPLETE: # TODO: Don't read more than necessary (Content-Length), otherwise # data might be lost and/or an error could occur data = SocketOperation(sock, SOCKOP_RECV, SOCK_BUF_SIZE, read_timeout) if data: buf += data else: eof = True # Do some parsing and error checking while more data arrives buf = self._ContinueParsing(buf, eof) # Must be done only after the buffer has been evaluated # TODO: Content-Length < len(data read) and connection closed if (eof and self.parser_status in (self.PS_START_LINE, self.PS_HEADERS)): raise HttpError("Connection closed prematurely") # Parse rest buf = self._ContinueParsing(buf, True) assert self.parser_status == self.PS_COMPLETE assert not buf, "Parser didn't read full response" # Body is complete msg.body = self.body_buffer.getvalue() def _ContinueParsing(self, buf, eof): """Main function for HTTP message state machine. @type buf: string @param buf: Receive buffer @type eof: bool @param eof: Whether we've reached EOF on the socket @rtype: string @return: Updated receive buffer """ # TODO: Use offset instead of slicing when possible if self.parser_status == self.PS_START_LINE: # Expect start line while True: idx = buf.find("\r\n") # RFC2616, section 4.1: "In the interest of robustness, servers SHOULD # ignore any empty line(s) received where a Request-Line is expected. # In other words, if the server is reading the protocol stream at the # beginning of a message and receives a CRLF first, it should ignore # the CRLF." if idx == 0: # TODO: Limit number of CRLFs/empty lines for safety? buf = buf[2:] continue if idx > 0: self.start_line_buffer = buf[:idx] self._CheckStartLineLength(len(self.start_line_buffer)) # Remove status line, including CRLF buf = buf[idx + 2:] self.msg.start_line = self.ParseStartLine(self.start_line_buffer) self.parser_status = self.PS_HEADERS else: # Check whether incoming data is getting too large, otherwise we just # fill our read buffer. self._CheckStartLineLength(len(buf)) break # TODO: Handle messages without headers if self.parser_status == self.PS_HEADERS: # Wait for header end idx = buf.find("\r\n\r\n") if idx >= 0: self.header_buffer.write(buf[:idx + 2]) self._CheckHeaderLength(self.header_buffer.tell()) # Remove headers, including CRLF buf = buf[idx + 4:] self._ParseHeaders() self.parser_status = self.PS_BODY else: # Check whether incoming data is getting too large, otherwise we just # fill our read buffer. self._CheckHeaderLength(len(buf)) if self.parser_status == self.PS_BODY: # TODO: Implement max size for body_buffer self.body_buffer.write(buf) buf = "" # Check whether we've read everything # # RFC2616, section 4.4: "When a message-body is included with a message, # the transfer-length of that body is determined by one of the following # [...] 5. By the server closing the connection. (Closing the connection # cannot be used to indicate the end of a request body, since that would # leave no possibility for the server to send back a response.)" # # TODO: Error when buffer length > Content-Length header if (eof or self.content_length is None or (self.content_length is not None and self.body_buffer.tell() >= self.content_length)): self.parser_status = self.PS_COMPLETE return buf def _CheckStartLineLength(self, length): """Limits the start line buffer size. @type length: int @param length: Buffer size """ if (self.START_LINE_LENGTH_MAX is not None and length > self.START_LINE_LENGTH_MAX): raise HttpError("Start line longer than %d chars" % self.START_LINE_LENGTH_MAX) def _CheckHeaderLength(self, length): """Limits the header buffer size. @type length: int @param length: Buffer size """ if (self.HEADER_LENGTH_MAX is not None and length > self.HEADER_LENGTH_MAX): raise HttpError("Headers longer than %d chars" % self.HEADER_LENGTH_MAX) def ParseStartLine(self, start_line): """Parses the start line of a message. Must be overridden by subclass. @type start_line: string @param start_line: Start line string """ raise NotImplementedError() def _WillPeerCloseConnection(self): """Evaluate whether peer will close the connection. @rtype: bool @return: Whether peer will close the connection """ # RFC2616, section 14.10: "HTTP/1.1 defines the "close" connection option # for the sender to signal that the connection will be closed after # completion of the response. For example, # # Connection: close # # in either the request or the response header fields indicates that the # connection SHOULD NOT be considered `persistent' (section 8.1) after the # current request/response is complete." hdr_connection = self.msg.headers.get(HTTP_CONNECTION, None) if hdr_connection: hdr_connection = hdr_connection.lower() # An HTTP/1.1 server is assumed to stay open unless explicitly closed. if self.msg.start_line.version == HTTP_1_1: return (hdr_connection and "close" in hdr_connection) # Some HTTP/1.0 implementations have support for persistent connections, # using rules different than HTTP/1.1. # For older HTTP, Keep-Alive indicates persistent connection. if self.msg.headers.get(HTTP_KEEP_ALIVE): return False # At least Akamai returns a "Connection: Keep-Alive" header, which was # supposed to be sent by the client. if hdr_connection and "keep-alive" in hdr_connection: return False return True def _ParseHeaders(self): """Parses the headers. This function also adjusts internal variables based on header values. RFC2616, section 4.3: The presence of a message-body in a request is signaled by the inclusion of a Content-Length or Transfer-Encoding header field in the request's message-headers. """ # Parse headers self.header_buffer.seek(0, 0) self.msg.headers = ParseHeaders(self.header_buffer) self.peer_will_close = self._WillPeerCloseConnection() # Do we have a Content-Length header? hdr_content_length = self.msg.headers.get(HTTP_CONTENT_LENGTH, None) if hdr_content_length: try: self.content_length = int(hdr_content_length) except (TypeError, ValueError): self.content_length = None if self.content_length is not None and self.content_length < 0: self.content_length = None # if the connection remains open and a content-length was not provided, # then assume that the connection WILL close. if self.content_length is None: self.peer_will_close = True ganeti-2.9.3/lib/impexpd/0000755000000000000000000000000012271445544015216 5ustar00rootroot00000000000000ganeti-2.9.3/lib/impexpd/__init__.py0000644000000000000000000004040012244641676017331 0ustar00rootroot00000000000000# # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Classes and functions for import/export daemon. """ import os import re import socket import logging import signal import errno import time from cStringIO import StringIO from ganeti import constants from ganeti import errors from ganeti import utils from ganeti import netutils from ganeti import compat #: Used to recognize point at which socat(1) starts to listen on its socket. #: The local address is required for the remote peer to connect (in particular #: the port number). LISTENING_RE = re.compile(r"^listening on\s+" r"AF=(?P\d+)\s+" r"(?P

.+):(?P\d+)$", re.I) #: Used to recognize point at which socat(1) is sending data over the wire TRANSFER_LOOP_RE = re.compile(r"^starting data transfer loop with FDs\s+.*$", re.I) SOCAT_LOG_DEBUG = "D" SOCAT_LOG_INFO = "I" SOCAT_LOG_NOTICE = "N" SOCAT_LOG_WARNING = "W" SOCAT_LOG_ERROR = "E" SOCAT_LOG_FATAL = "F" SOCAT_LOG_IGNORE = compat.UniqueFrozenset([ SOCAT_LOG_DEBUG, SOCAT_LOG_INFO, SOCAT_LOG_NOTICE, ]) #: Used to parse GNU dd(1) statistics DD_INFO_RE = re.compile(r"^(?P\d+)\s*byte(?:|s)\s.*\scopied,\s*" r"(?P[\d.]+)\s*s(?:|econds),.*$", re.I) #: Used to ignore "N+N records in/out" on dd(1)'s stderr DD_STDERR_IGNORE = re.compile(r"^\d+\+\d+\s*records\s+(?:in|out)$", re.I) #: Signal upon which dd(1) will print statistics (on some platforms, SIGINFO is #: unavailable and SIGUSR1 is used instead) DD_INFO_SIGNAL = getattr(signal, "SIGINFO", signal.SIGUSR1) #: Buffer size: at most this many bytes are transferred at once BUFSIZE = 1024 * 1024 # Common options for socat SOCAT_TCP_OPTS = ["keepalive", "keepidle=60", "keepintvl=10", "keepcnt=5"] SOCAT_OPENSSL_OPTS = ["verify=1", "method=TLSv1", "cipher=%s" % constants.OPENSSL_CIPHERS] if constants.SOCAT_USE_COMPRESS: # Disables all compression in by OpenSSL. Only supported in patched versions # of socat (as of November 2010). See INSTALL for more information. SOCAT_OPENSSL_OPTS.append("compress=none") SOCAT_OPTION_MAXLEN = 400 (PROG_OTHER, PROG_SOCAT, PROG_DD, PROG_DD_PID, PROG_EXP_SIZE) = range(1, 6) PROG_ALL = compat.UniqueFrozenset([ PROG_OTHER, PROG_SOCAT, PROG_DD, PROG_DD_PID, PROG_EXP_SIZE, ]) class CommandBuilder(object): def __init__(self, mode, opts, socat_stderr_fd, dd_stderr_fd, dd_pid_fd): """Initializes this class. @param mode: Daemon mode (import or export) @param opts: Options object @type socat_stderr_fd: int @param socat_stderr_fd: File descriptor socat should write its stderr to @type dd_stderr_fd: int @param dd_stderr_fd: File descriptor dd should write its stderr to @type dd_pid_fd: int @param dd_pid_fd: File descriptor the child should write dd's PID to """ self._opts = opts self._mode = mode self._socat_stderr_fd = socat_stderr_fd self._dd_stderr_fd = dd_stderr_fd self._dd_pid_fd = dd_pid_fd assert (self._opts.magic is None or constants.IE_MAGIC_RE.match(self._opts.magic)) @staticmethod def GetBashCommand(cmd): """Prepares a command to be run in Bash. """ return ["bash", "-o", "errexit", "-o", "pipefail", "-c", cmd] def _GetSocatCommand(self): """Returns the socat command. """ common_addr_opts = SOCAT_TCP_OPTS + SOCAT_OPENSSL_OPTS + [ "key=%s" % self._opts.key, "cert=%s" % self._opts.cert, "cafile=%s" % self._opts.ca, ] if self._opts.bind is not None: common_addr_opts.append("bind=%s" % self._opts.bind) assert not (self._opts.ipv4 and self._opts.ipv6) if self._opts.ipv4: common_addr_opts.append("pf=ipv4") elif self._opts.ipv6: common_addr_opts.append("pf=ipv6") if self._mode == constants.IEM_IMPORT: if self._opts.port is None: port = 0 else: port = self._opts.port addr1 = [ "OPENSSL-LISTEN:%s" % port, "reuseaddr", # Retry to listen if connection wasn't established successfully, up to # 100 times a second. Note that this still leaves room for DoS attacks. "forever", "intervall=0.01", ] + common_addr_opts addr2 = ["stdout"] elif self._mode == constants.IEM_EXPORT: if self._opts.host and netutils.IP6Address.IsValid(self._opts.host): host = "[%s]" % self._opts.host else: host = self._opts.host addr1 = ["stdin"] addr2 = [ "OPENSSL:%s:%s" % (host, self._opts.port), # How long to wait per connection attempt "connect-timeout=%s" % self._opts.connect_timeout, # Retry a few times before giving up to connect (once per second) "retry=%s" % self._opts.connect_retries, "intervall=1", ] + common_addr_opts else: raise errors.GenericError("Invalid mode '%s'" % self._mode) for i in [addr1, addr2]: for value in i: if len(value) > SOCAT_OPTION_MAXLEN: raise errors.GenericError("Socat option longer than %s" " characters: %r" % (SOCAT_OPTION_MAXLEN, value)) if "," in value: raise errors.GenericError("Comma not allowed in socat option" " value: %r" % value) return [ constants.SOCAT_PATH, # Log to stderr "-ls", # Log level "-d", "-d", # Buffer size "-b%s" % BUFSIZE, # Unidirectional mode, the first address is only used for reading, and the # second address is only used for writing "-u", ",".join(addr1), ",".join(addr2), ] def _GetMagicCommand(self): """Returns the command to read/write the magic value. """ if not self._opts.magic: return None # Prefix to ensure magic isn't interpreted as option to "echo" magic = "M=%s" % self._opts.magic cmd = StringIO() if self._mode == constants.IEM_IMPORT: cmd.write("{ ") cmd.write(utils.ShellQuoteArgs(["read", "-n", str(len(magic)), "magic"])) cmd.write(" && ") cmd.write("if test \"$magic\" != %s; then" % utils.ShellQuote(magic)) cmd.write(" echo %s >&2;" % utils.ShellQuote("Magic value mismatch")) cmd.write(" exit 1;") cmd.write("fi;") cmd.write(" }") elif self._mode == constants.IEM_EXPORT: cmd.write(utils.ShellQuoteArgs(["echo", "-E", "-n", magic])) else: raise errors.GenericError("Invalid mode '%s'" % self._mode) return cmd.getvalue() def _GetDdCommand(self): """Returns the command for measuring throughput. """ dd_cmd = StringIO() magic_cmd = self._GetMagicCommand() if magic_cmd: dd_cmd.write("{ ") dd_cmd.write(magic_cmd) dd_cmd.write(" && ") dd_cmd.write("{ ") # Setting LC_ALL since we want to parse the output and explicitly # redirecting stdin, as the background process (dd) would have # /dev/null as stdin otherwise dd_cmd.write("LC_ALL=C dd bs=%s <&0 2>&%d & pid=${!};" % (BUFSIZE, self._dd_stderr_fd)) # Send PID to daemon dd_cmd.write(" echo $pid >&%d;" % self._dd_pid_fd) # And wait for dd dd_cmd.write(" wait $pid;") dd_cmd.write(" }") if magic_cmd: dd_cmd.write(" }") return dd_cmd.getvalue() def _GetTransportCommand(self): """Returns the command for the transport part of the daemon. """ socat_cmd = ("%s 2>&%d" % (utils.ShellQuoteArgs(self._GetSocatCommand()), self._socat_stderr_fd)) dd_cmd = self._GetDdCommand() compr = self._opts.compress assert compr in constants.IEC_ALL parts = [] if self._mode == constants.IEM_IMPORT: parts.append(socat_cmd) if compr == constants.IEC_GZIP: parts.append("gunzip -c") parts.append(dd_cmd) elif self._mode == constants.IEM_EXPORT: parts.append(dd_cmd) if compr == constants.IEC_GZIP: parts.append("gzip -c") parts.append(socat_cmd) else: raise errors.GenericError("Invalid mode '%s'" % self._mode) # TODO: Run transport as separate user # The transport uses its own shell to simplify running it as a separate user # in the future. return self.GetBashCommand(" | ".join(parts)) def GetCommand(self): """Returns the complete child process command. """ transport_cmd = self._GetTransportCommand() buf = StringIO() if self._opts.cmd_prefix: buf.write(self._opts.cmd_prefix) buf.write(" ") buf.write(utils.ShellQuoteArgs(transport_cmd)) if self._opts.cmd_suffix: buf.write(" ") buf.write(self._opts.cmd_suffix) return self.GetBashCommand(buf.getvalue()) def _VerifyListening(family, address, port): """Verify address given as listening address by socat. """ if family not in (socket.AF_INET, socket.AF_INET6): raise errors.GenericError("Address family %r not supported" % family) if (family == socket.AF_INET6 and address.startswith("[") and address.endswith("]")): address = address.lstrip("[").rstrip("]") try: packed_address = socket.inet_pton(family, address) except socket.error: raise errors.GenericError("Invalid address %r for family %s" % (address, family)) return (socket.inet_ntop(family, packed_address), port) class ChildIOProcessor(object): def __init__(self, debug, status_file, logger, throughput_samples, exp_size): """Initializes this class. """ self._debug = debug self._status_file = status_file self._logger = logger self._splitter = dict([(prog, utils.LineSplitter(self._ProcessOutput, prog)) for prog in PROG_ALL]) self._dd_pid = None self._dd_ready = False self._dd_tp_samples = throughput_samples self._dd_progress = [] # Expected size of transferred data self._exp_size = exp_size def GetLineSplitter(self, prog): """Returns the line splitter for a program. """ return self._splitter[prog] def FlushAll(self): """Flushes all line splitters. """ for ls in self._splitter.itervalues(): ls.flush() def CloseAll(self): """Closes all line splitters. """ for ls in self._splitter.itervalues(): ls.close() self._splitter.clear() def NotifyDd(self): """Tells dd(1) to write statistics. """ if self._dd_pid is None: # Can't notify return False if not self._dd_ready: # There's a race condition between starting the program and sending # signals. The signal handler is only registered after some time, so we # have to check whether the program is ready. If it isn't, sending a # signal will invoke the default handler (and usually abort the program). if not utils.IsProcessHandlingSignal(self._dd_pid, DD_INFO_SIGNAL): logging.debug("dd is not yet ready for signal %s", DD_INFO_SIGNAL) return False logging.debug("dd is now handling signal %s", DD_INFO_SIGNAL) self._dd_ready = True logging.debug("Sending signal %s to PID %s", DD_INFO_SIGNAL, self._dd_pid) try: os.kill(self._dd_pid, DD_INFO_SIGNAL) except EnvironmentError, err: if err.errno != errno.ESRCH: raise # Process no longer exists logging.debug("dd exited") self._dd_pid = None return True def _ProcessOutput(self, line, prog): """Takes care of child process output. @type line: string @param line: Child output line @type prog: number @param prog: Program from which the line originates """ force_update = False forward_line = line if prog == PROG_SOCAT: level = None parts = line.split(None, 4) if len(parts) == 5: (_, _, _, level, msg) = parts force_update = self._ProcessSocatOutput(self._status_file, level, msg) if self._debug or (level and level not in SOCAT_LOG_IGNORE): forward_line = "socat: %s %s" % (level, msg) else: forward_line = None else: forward_line = "socat: %s" % line elif prog == PROG_DD: (should_forward, force_update) = self._ProcessDdOutput(line) if should_forward or self._debug: forward_line = "dd: %s" % line else: forward_line = None elif prog == PROG_DD_PID: if self._dd_pid: raise RuntimeError("dd PID reported more than once") logging.debug("Received dd PID %r", line) self._dd_pid = int(line) forward_line = None elif prog == PROG_EXP_SIZE: logging.debug("Received predicted size %r", line) forward_line = None if line: try: exp_size = utils.BytesToMebibyte(int(line)) except (ValueError, TypeError), err: logging.error("Failed to convert predicted size %r to number: %s", line, err) exp_size = None else: exp_size = None self._exp_size = exp_size if forward_line: self._logger.info(forward_line) self._status_file.AddRecentOutput(forward_line) self._status_file.Update(force_update) @staticmethod def _ProcessSocatOutput(status_file, level, msg): """Interprets socat log output. """ if level == SOCAT_LOG_NOTICE: if status_file.GetListenPort() is None: # TODO: Maybe implement timeout to not listen forever m = LISTENING_RE.match(msg) if m: (_, port) = _VerifyListening(int(m.group("family")), m.group("address"), int(m.group("port"))) status_file.SetListenPort(port) return True if not status_file.GetConnected(): m = TRANSFER_LOOP_RE.match(msg) if m: logging.debug("Connection established") status_file.SetConnected() return True return False def _ProcessDdOutput(self, line): """Interprets a line of dd(1)'s output. """ m = DD_INFO_RE.match(line) if m: seconds = float(m.group("seconds")) mbytes = utils.BytesToMebibyte(int(m.group("bytes"))) self._UpdateDdProgress(seconds, mbytes) return (False, True) m = DD_STDERR_IGNORE.match(line) if m: # Ignore return (False, False) # Forward line return (True, False) def _UpdateDdProgress(self, seconds, mbytes): """Updates the internal status variables for dd(1) progress. @type seconds: float @param seconds: Timestamp of this update @type mbytes: float @param mbytes: Total number of MiB transferred so far """ # Add latest sample self._dd_progress.append((seconds, mbytes)) # Remove old samples del self._dd_progress[:-self._dd_tp_samples] # Calculate throughput throughput = _CalcThroughput(self._dd_progress) # Calculate percent and ETA percent = None eta = None if self._exp_size is not None: if self._exp_size != 0: percent = max(0, min(100, (100.0 * mbytes) / self._exp_size)) if throughput: eta = max(0, float(self._exp_size - mbytes) / throughput) self._status_file.SetProgress(mbytes, throughput, percent, eta) def _CalcThroughput(samples): """Calculates the throughput in MiB/second. @type samples: sequence @param samples: List of samples, each consisting of a (timestamp, mbytes) tuple @rtype: float or None @return: Throughput in MiB/second """ if len(samples) < 2: # Can't calculate throughput return None (start_time, start_mbytes) = samples[0] (end_time, end_mbytes) = samples[-1] return (float(end_mbytes) - start_mbytes) / (float(end_time) - start_time) ganeti-2.9.3/lib/luxi.py0000644000000000000000000004172312271422343015102 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2011, 2012, 2014 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module for the unix socket protocol This module implements the local unix socket protocol. You only need this module and the opcodes module in the client program in order to communicate with the master. The module is also used by the master daemon. """ import socket import collections import time import errno import logging from ganeti import compat from ganeti import serializer from ganeti import constants from ganeti import errors from ganeti import utils from ganeti import objects from ganeti import pathutils KEY_METHOD = "method" KEY_ARGS = "args" KEY_SUCCESS = "success" KEY_RESULT = "result" KEY_VERSION = "version" REQ_SUBMIT_JOB = "SubmitJob" REQ_SUBMIT_MANY_JOBS = "SubmitManyJobs" REQ_WAIT_FOR_JOB_CHANGE = "WaitForJobChange" REQ_CANCEL_JOB = "CancelJob" REQ_ARCHIVE_JOB = "ArchiveJob" REQ_CHANGE_JOB_PRIORITY = "ChangeJobPriority" REQ_AUTO_ARCHIVE_JOBS = "AutoArchiveJobs" REQ_QUERY = "Query" REQ_QUERY_FIELDS = "QueryFields" REQ_QUERY_JOBS = "QueryJobs" REQ_QUERY_INSTANCES = "QueryInstances" REQ_QUERY_NODES = "QueryNodes" REQ_QUERY_GROUPS = "QueryGroups" REQ_QUERY_NETWORKS = "QueryNetworks" REQ_QUERY_EXPORTS = "QueryExports" REQ_QUERY_CONFIG_VALUES = "QueryConfigValues" REQ_QUERY_CLUSTER_INFO = "QueryClusterInfo" REQ_QUERY_TAGS = "QueryTags" REQ_SET_DRAIN_FLAG = "SetDrainFlag" REQ_SET_WATCHER_PAUSE = "SetWatcherPause" #: List of all LUXI requests REQ_ALL = compat.UniqueFrozenset([ REQ_ARCHIVE_JOB, REQ_AUTO_ARCHIVE_JOBS, REQ_CANCEL_JOB, REQ_CHANGE_JOB_PRIORITY, REQ_QUERY, REQ_QUERY_CLUSTER_INFO, REQ_QUERY_CONFIG_VALUES, REQ_QUERY_EXPORTS, REQ_QUERY_FIELDS, REQ_QUERY_GROUPS, REQ_QUERY_INSTANCES, REQ_QUERY_JOBS, REQ_QUERY_NODES, REQ_QUERY_NETWORKS, REQ_QUERY_TAGS, REQ_SET_DRAIN_FLAG, REQ_SET_WATCHER_PAUSE, REQ_SUBMIT_JOB, REQ_SUBMIT_MANY_JOBS, REQ_WAIT_FOR_JOB_CHANGE, ]) DEF_CTMO = 10 DEF_RWTO = 60 # WaitForJobChange timeout WFJC_TIMEOUT = (DEF_RWTO - 1) / 2 class ProtocolError(errors.LuxiError): """Denotes an error in the LUXI protocol.""" class ConnectionClosedError(ProtocolError): """Connection closed error.""" class TimeoutError(ProtocolError): """Operation timeout error.""" class RequestError(ProtocolError): """Error on request. This signifies an error in the request format or request handling, but not (e.g.) an error in starting up an instance. Some common conditions that can trigger this exception: - job submission failed because the job data was wrong - query failed because required fields were missing """ class NoMasterError(ProtocolError): """The master cannot be reached. This means that the master daemon is not running or the socket has been removed. """ class PermissionError(ProtocolError): """Permission denied while connecting to the master socket. This means the user doesn't have the proper rights. """ class Transport: """Low-level transport class. This is used on the client side. This could be replace by any other class that provides the same semantics to the Client. This means: - can send messages and receive messages - safe for multithreading """ def __init__(self, address, timeouts=None): """Constructor for the Client class. Arguments: - address: a valid address the the used transport class - timeout: a list of timeouts, to be used on connect and read/write There are two timeouts used since we might want to wait for a long time for a response, but the connect timeout should be lower. If not passed, we use a default of 10 and respectively 60 seconds. Note that on reading data, since the timeout applies to an invidual receive, it might be that the total duration is longer than timeout value passed (we make a hard limit at twice the read timeout). """ self.address = address if timeouts is None: self._ctimeout, self._rwtimeout = DEF_CTMO, DEF_RWTO else: self._ctimeout, self._rwtimeout = timeouts self.socket = None self._buffer = "" self._msgs = collections.deque() try: self.socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) # Try to connect try: utils.Retry(self._Connect, 1.0, self._ctimeout, args=(self.socket, address, self._ctimeout)) except utils.RetryTimeout: raise TimeoutError("Connect timed out") self.socket.settimeout(self._rwtimeout) except (socket.error, NoMasterError): if self.socket is not None: self.socket.close() self.socket = None raise @staticmethod def _Connect(sock, address, timeout): sock.settimeout(timeout) try: sock.connect(address) except socket.timeout, err: raise TimeoutError("Connect timed out: %s" % str(err)) except socket.error, err: error_code = err.args[0] if error_code in (errno.ENOENT, errno.ECONNREFUSED): raise NoMasterError(address) elif error_code in (errno.EPERM, errno.EACCES): raise PermissionError(address) elif error_code == errno.EAGAIN: # Server's socket backlog is full at the moment raise utils.RetryAgain() raise def _CheckSocket(self): """Make sure we are connected. """ if self.socket is None: raise ProtocolError("Connection is closed") def Send(self, msg): """Send a message. This just sends a message and doesn't wait for the response. """ if constants.LUXI_EOM in msg: raise ProtocolError("Message terminator found in payload") self._CheckSocket() try: # TODO: sendall is not guaranteed to send everything self.socket.sendall(msg + constants.LUXI_EOM) except socket.timeout, err: raise TimeoutError("Sending timeout: %s" % str(err)) def Recv(self): """Try to receive a message from the socket. In case we already have messages queued, we just return from the queue. Otherwise, we try to read data with a _rwtimeout network timeout, and making sure we don't go over 2x_rwtimeout as a global limit. """ self._CheckSocket() etime = time.time() + self._rwtimeout while not self._msgs: if time.time() > etime: raise TimeoutError("Extended receive timeout") while True: try: data = self.socket.recv(4096) except socket.timeout, err: raise TimeoutError("Receive timeout: %s" % str(err)) except socket.error, err: if err.args and err.args[0] == errno.EAGAIN: continue raise break if not data: raise ConnectionClosedError("Connection closed while reading") new_msgs = (self._buffer + data).split(constants.LUXI_EOM) self._buffer = new_msgs.pop() self._msgs.extend(new_msgs) return self._msgs.popleft() def Call(self, msg): """Send a message and wait for the response. This is just a wrapper over Send and Recv. """ self.Send(msg) return self.Recv() def Close(self): """Close the socket""" if self.socket is not None: self.socket.close() self.socket = None def ParseRequest(msg): """Parses a LUXI request message. """ try: request = serializer.LoadJson(msg) except ValueError, err: raise ProtocolError("Invalid LUXI request (parsing error): %s" % err) logging.debug("LUXI request: %s", request) if not isinstance(request, dict): logging.error("LUXI request not a dict: %r", msg) raise ProtocolError("Invalid LUXI request (not a dict)") method = request.get(KEY_METHOD, None) # pylint: disable=E1103 args = request.get(KEY_ARGS, None) # pylint: disable=E1103 version = request.get(KEY_VERSION, None) # pylint: disable=E1103 if method is None or args is None: logging.error("LUXI request missing method or arguments: %r", msg) raise ProtocolError(("Invalid LUXI request (no method or arguments" " in request): %r") % msg) return (method, args, version) def ParseResponse(msg): """Parses a LUXI response message. """ # Parse the result try: data = serializer.LoadJson(msg) except KeyboardInterrupt: raise except Exception, err: raise ProtocolError("Error while deserializing response: %s" % str(err)) # Validate response if not (isinstance(data, dict) and KEY_SUCCESS in data and KEY_RESULT in data): raise ProtocolError("Invalid response from server: %r" % data) return (data[KEY_SUCCESS], data[KEY_RESULT], data.get(KEY_VERSION, None)) # pylint: disable=E1103 def FormatResponse(success, result, version=None): """Formats a LUXI response message. """ response = { KEY_SUCCESS: success, KEY_RESULT: result, } if version is not None: response[KEY_VERSION] = version logging.debug("LUXI response: %s", response) return serializer.DumpJson(response) def FormatRequest(method, args, version=None): """Formats a LUXI request message. """ # Build request request = { KEY_METHOD: method, KEY_ARGS: args, } if version is not None: request[KEY_VERSION] = version # Serialize the request return serializer.DumpJson(request) def CallLuxiMethod(transport_cb, method, args, version=None): """Send a LUXI request via a transport and return the response. """ assert callable(transport_cb) request_msg = FormatRequest(method, args, version=version) # Send request and wait for response response_msg = transport_cb(request_msg) (success, result, resp_version) = ParseResponse(response_msg) # Verify version if there was one in the response if resp_version is not None and resp_version != version: raise errors.LuxiError("LUXI version mismatch, client %s, response %s" % (version, resp_version)) if success: return result errors.MaybeRaise(result) raise RequestError(result) class Client(object): """High-level client implementation. This uses a backing Transport-like class on top of which it implements data serialization/deserialization. """ def __init__(self, address=None, timeouts=None, transport=Transport): """Constructor for the Client class. Arguments: - address: a valid address the the used transport class - timeout: a list of timeouts, to be used on connect and read/write - transport: a Transport-like class If timeout is not passed, the default timeouts of the transport class are used. """ if address is None: address = pathutils.MASTER_SOCKET self.address = address self.timeouts = timeouts self.transport_class = transport self.transport = None self._InitTransport() def _InitTransport(self): """(Re)initialize the transport if needed. """ if self.transport is None: self.transport = self.transport_class(self.address, timeouts=self.timeouts) def _CloseTransport(self): """Close the transport, ignoring errors. """ if self.transport is None: return try: old_transp = self.transport self.transport = None old_transp.Close() except Exception: # pylint: disable=W0703 pass def _SendMethodCall(self, data): # Send request and wait for response try: self._InitTransport() return self.transport.Call(data) except Exception: self._CloseTransport() raise def Close(self): """Close the underlying connection. """ self._CloseTransport() def CallMethod(self, method, args): """Send a generic request and return the response. """ if not isinstance(args, (list, tuple)): raise errors.ProgrammerError("Invalid parameter passed to CallMethod:" " expected list, got %s" % type(args)) return CallLuxiMethod(self._SendMethodCall, method, args, version=constants.LUXI_VERSION) def SetQueueDrainFlag(self, drain_flag): return self.CallMethod(REQ_SET_DRAIN_FLAG, (drain_flag, )) def SetWatcherPause(self, until): return self.CallMethod(REQ_SET_WATCHER_PAUSE, (until, )) def SubmitJob(self, ops): ops_state = map(lambda op: op.__getstate__(), ops) return self.CallMethod(REQ_SUBMIT_JOB, (ops_state, )) def SubmitManyJobs(self, jobs): jobs_state = [] for ops in jobs: jobs_state.append([op.__getstate__() for op in ops]) return self.CallMethod(REQ_SUBMIT_MANY_JOBS, (jobs_state, )) @staticmethod def _PrepareJobId(request_name, job_id): try: return int(job_id) except ValueError: raise RequestError("Invalid parameter passed to %s as job id: " " expected integer, got value %s" % (request_name, job_id)) def CancelJob(self, job_id): job_id = Client._PrepareJobId(REQ_CANCEL_JOB, job_id) return self.CallMethod(REQ_CANCEL_JOB, (job_id, )) def ArchiveJob(self, job_id): job_id = Client._PrepareJobId(REQ_ARCHIVE_JOB, job_id) return self.CallMethod(REQ_ARCHIVE_JOB, (job_id, )) def ChangeJobPriority(self, job_id, priority): job_id = Client._PrepareJobId(REQ_CHANGE_JOB_PRIORITY, job_id) return self.CallMethod(REQ_CHANGE_JOB_PRIORITY, (job_id, priority)) def AutoArchiveJobs(self, age): timeout = (DEF_RWTO - 1) / 2 return self.CallMethod(REQ_AUTO_ARCHIVE_JOBS, (age, timeout)) def WaitForJobChangeOnce(self, job_id, fields, prev_job_info, prev_log_serial, timeout=WFJC_TIMEOUT): """Waits for changes on a job. @param job_id: Job ID @type fields: list @param fields: List of field names to be observed @type prev_job_info: None or list @param prev_job_info: Previously received job information @type prev_log_serial: None or int/long @param prev_log_serial: Highest log serial number previously received @type timeout: int/float @param timeout: Timeout in seconds (values larger than L{WFJC_TIMEOUT} will be capped to that value) """ assert timeout >= 0, "Timeout can not be negative" return self.CallMethod(REQ_WAIT_FOR_JOB_CHANGE, (job_id, fields, prev_job_info, prev_log_serial, min(WFJC_TIMEOUT, timeout))) def WaitForJobChange(self, job_id, fields, prev_job_info, prev_log_serial): job_id = Client._PrepareJobId(REQ_WAIT_FOR_JOB_CHANGE, job_id) while True: result = self.WaitForJobChangeOnce(job_id, fields, prev_job_info, prev_log_serial) if result != constants.JOB_NOTCHANGED: break return result def Query(self, what, fields, qfilter): """Query for resources/items. @param what: One of L{constants.QR_VIA_LUXI} @type fields: List of strings @param fields: List of requested fields @type qfilter: None or list @param qfilter: Query filter @rtype: L{objects.QueryResponse} """ result = self.CallMethod(REQ_QUERY, (what, fields, qfilter)) return objects.QueryResponse.FromDict(result) def QueryFields(self, what, fields): """Query for available fields. @param what: One of L{constants.QR_VIA_LUXI} @type fields: None or list of strings @param fields: List of requested fields @rtype: L{objects.QueryFieldsResponse} """ result = self.CallMethod(REQ_QUERY_FIELDS, (what, fields)) return objects.QueryFieldsResponse.FromDict(result) def QueryJobs(self, job_ids, fields): return self.CallMethod(REQ_QUERY_JOBS, (job_ids, fields)) def QueryInstances(self, names, fields, use_locking): return self.CallMethod(REQ_QUERY_INSTANCES, (names, fields, use_locking)) def QueryNodes(self, names, fields, use_locking): return self.CallMethod(REQ_QUERY_NODES, (names, fields, use_locking)) def QueryGroups(self, names, fields, use_locking): return self.CallMethod(REQ_QUERY_GROUPS, (names, fields, use_locking)) def QueryNetworks(self, names, fields, use_locking): return self.CallMethod(REQ_QUERY_NETWORKS, (names, fields, use_locking)) def QueryExports(self, nodes, use_locking): return self.CallMethod(REQ_QUERY_EXPORTS, (nodes, use_locking)) def QueryClusterInfo(self): return self.CallMethod(REQ_QUERY_CLUSTER_INFO, ()) def QueryConfigValues(self, fields): return self.CallMethod(REQ_QUERY_CONFIG_VALUES, (fields, )) def QueryTags(self, kind, name): return self.CallMethod(REQ_QUERY_TAGS, (kind, name)) ganeti-2.9.3/lib/config.py0000644000000000000000000030074212271422343015365 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Configuration management for Ganeti This module provides the interface to the Ganeti cluster configuration. The configuration data is stored on every node but is updated on the master only. After each update, the master distributes the data to the other nodes. Currently, the data storage format is JSON. YAML was slow and consuming too much memory. """ # pylint: disable=R0904 # R0904: Too many public methods import copy import os import random import logging import time import itertools from ganeti import errors from ganeti import locking from ganeti import utils from ganeti import constants from ganeti import rpc from ganeti import objects from ganeti import serializer from ganeti import uidpool from ganeti import netutils from ganeti import runtime from ganeti import pathutils from ganeti import network _config_lock = locking.SharedLock("ConfigWriter") # job id used for resource management at config upgrade time _UPGRADE_CONFIG_JID = "jid-cfg-upgrade" def _ValidateConfig(data): """Verifies that a configuration objects looks valid. This only verifies the version of the configuration. @raise errors.ConfigurationError: if the version differs from what we expect """ if data.version != constants.CONFIG_VERSION: raise errors.ConfigVersionMismatch(constants.CONFIG_VERSION, data.version) class TemporaryReservationManager: """A temporary resource reservation manager. This is used to reserve resources in a job, before using them, making sure other jobs cannot get them in the meantime. """ def __init__(self): self._ec_reserved = {} def Reserved(self, resource): for holder_reserved in self._ec_reserved.values(): if resource in holder_reserved: return True return False def Reserve(self, ec_id, resource): if self.Reserved(resource): raise errors.ReservationError("Duplicate reservation for resource '%s'" % str(resource)) if ec_id not in self._ec_reserved: self._ec_reserved[ec_id] = set([resource]) else: self._ec_reserved[ec_id].add(resource) def DropECReservations(self, ec_id): if ec_id in self._ec_reserved: del self._ec_reserved[ec_id] def GetReserved(self): all_reserved = set() for holder_reserved in self._ec_reserved.values(): all_reserved.update(holder_reserved) return all_reserved def GetECReserved(self, ec_id): """ Used when you want to retrieve all reservations for a specific execution context. E.g when commiting reserved IPs for a specific network. """ ec_reserved = set() if ec_id in self._ec_reserved: ec_reserved.update(self._ec_reserved[ec_id]) return ec_reserved def Generate(self, existing, generate_one_fn, ec_id): """Generate a new resource of this type """ assert callable(generate_one_fn) all_elems = self.GetReserved() all_elems.update(existing) retries = 64 while retries > 0: new_resource = generate_one_fn() if new_resource is not None and new_resource not in all_elems: break else: raise errors.ConfigurationError("Not able generate new resource" " (last tried: %s)" % new_resource) self.Reserve(ec_id, new_resource) return new_resource def _MatchNameComponentIgnoreCase(short_name, names): """Wrapper around L{utils.text.MatchNameComponent}. """ return utils.MatchNameComponent(short_name, names, case_sensitive=False) def _CheckInstanceDiskIvNames(disks): """Checks if instance's disks' C{iv_name} attributes are in order. @type disks: list of L{objects.Disk} @param disks: List of disks @rtype: list of tuples; (int, string, string) @return: List of wrongly named disks, each tuple contains disk index, expected and actual name """ result = [] for (idx, disk) in enumerate(disks): exp_iv_name = "disk/%s" % idx if disk.iv_name != exp_iv_name: result.append((idx, exp_iv_name, disk.iv_name)) return result class ConfigWriter: """The interface to the cluster configuration. @ivar _temporary_lvs: reservation manager for temporary LVs @ivar _all_rms: a list of all temporary reservation managers """ def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts, accept_foreign=False): self.write_count = 0 self._lock = _config_lock self._config_data = None self._offline = offline if cfg_file is None: self._cfg_file = pathutils.CLUSTER_CONF_FILE else: self._cfg_file = cfg_file self._getents = _getents self._temporary_ids = TemporaryReservationManager() self._temporary_drbds = {} self._temporary_macs = TemporaryReservationManager() self._temporary_secrets = TemporaryReservationManager() self._temporary_lvs = TemporaryReservationManager() self._temporary_ips = TemporaryReservationManager() self._all_rms = [self._temporary_ids, self._temporary_macs, self._temporary_secrets, self._temporary_lvs, self._temporary_ips] # Note: in order to prevent errors when resolving our name in # _DistributeConfig, we compute it here once and reuse it; it's # better to raise an error before starting to modify the config # file than after it was modified self._my_hostname = netutils.Hostname.GetSysName() self._last_cluster_serial = -1 self._cfg_id = None self._context = None self._OpenConfig(accept_foreign) def _GetRpc(self, address_list): """Returns RPC runner for configuration. """ return rpc.ConfigRunner(self._context, address_list) def SetContext(self, context): """Sets Ganeti context. """ self._context = context # this method needs to be static, so that we can call it on the class @staticmethod def IsCluster(): """Check if the cluster is configured. """ return os.path.exists(pathutils.CLUSTER_CONF_FILE) @locking.ssynchronized(_config_lock, shared=1) def GetNdParams(self, node): """Get the node params populated with cluster defaults. @type node: L{objects.Node} @param node: The node we want to know the params for @return: A dict with the filled in node params """ nodegroup = self._UnlockedGetNodeGroup(node.group) return self._config_data.cluster.FillND(node, nodegroup) @locking.ssynchronized(_config_lock, shared=1) def GetInstanceDiskParams(self, instance): """Get the disk params populated with inherit chain. @type instance: L{objects.Instance} @param instance: The instance we want to know the params for @return: A dict with the filled in disk params """ node = self._UnlockedGetNodeInfo(instance.primary_node) nodegroup = self._UnlockedGetNodeGroup(node.group) return self._UnlockedGetGroupDiskParams(nodegroup) @locking.ssynchronized(_config_lock, shared=1) def GetGroupDiskParams(self, group): """Get the disk params populated with inherit chain. @type group: L{objects.NodeGroup} @param group: The group we want to know the params for @return: A dict with the filled in disk params """ return self._UnlockedGetGroupDiskParams(group) def _UnlockedGetGroupDiskParams(self, group): """Get the disk params populated with inherit chain down to node-group. @type group: L{objects.NodeGroup} @param group: The group we want to know the params for @return: A dict with the filled in disk params """ return self._config_data.cluster.SimpleFillDP(group.diskparams) def _UnlockedGetNetworkMACPrefix(self, net_uuid): """Return the network mac prefix if it exists or the cluster level default. """ prefix = None if net_uuid: nobj = self._UnlockedGetNetwork(net_uuid) if nobj.mac_prefix: prefix = nobj.mac_prefix return prefix def _GenerateOneMAC(self, prefix=None): """Return a function that randomly generates a MAC suffic and appends it to the given prefix. If prefix is not given get the cluster level default. """ if not prefix: prefix = self._config_data.cluster.mac_prefix def GenMac(): byte1 = random.randrange(0, 256) byte2 = random.randrange(0, 256) byte3 = random.randrange(0, 256) mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3) return mac return GenMac @locking.ssynchronized(_config_lock, shared=1) def GenerateMAC(self, net_uuid, ec_id): """Generate a MAC for an instance. This should check the current instances for duplicates. """ existing = self._AllMACs() prefix = self._UnlockedGetNetworkMACPrefix(net_uuid) gen_mac = self._GenerateOneMAC(prefix) return self._temporary_ids.Generate(existing, gen_mac, ec_id) @locking.ssynchronized(_config_lock, shared=1) def ReserveMAC(self, mac, ec_id): """Reserve a MAC for an instance. This only checks instances managed by this cluster, it does not check for potential collisions elsewhere. """ all_macs = self._AllMACs() if mac in all_macs: raise errors.ReservationError("mac already in use") else: self._temporary_macs.Reserve(ec_id, mac) def _UnlockedCommitTemporaryIps(self, ec_id): """Commit all reserved IP address to their respective pools """ for action, address, net_uuid in self._temporary_ips.GetECReserved(ec_id): self._UnlockedCommitIp(action, net_uuid, address) def _UnlockedCommitIp(self, action, net_uuid, address): """Commit a reserved IP address to an IP pool. The IP address is taken from the network's IP pool and marked as reserved. """ nobj = self._UnlockedGetNetwork(net_uuid) pool = network.AddressPool(nobj) if action == constants.RESERVE_ACTION: pool.Reserve(address) elif action == constants.RELEASE_ACTION: pool.Release(address) def _UnlockedReleaseIp(self, net_uuid, address, ec_id): """Give a specific IP address back to an IP pool. The IP address is returned to the IP pool designated by pool_id and marked as reserved. """ self._temporary_ips.Reserve(ec_id, (constants.RELEASE_ACTION, address, net_uuid)) @locking.ssynchronized(_config_lock, shared=1) def ReleaseIp(self, net_uuid, address, ec_id): """Give a specified IP address back to an IP pool. This is just a wrapper around _UnlockedReleaseIp. """ if net_uuid: self._UnlockedReleaseIp(net_uuid, address, ec_id) @locking.ssynchronized(_config_lock, shared=1) def GenerateIp(self, net_uuid, ec_id): """Find a free IPv4 address for an instance. """ nobj = self._UnlockedGetNetwork(net_uuid) pool = network.AddressPool(nobj) def gen_one(): try: ip = pool.GenerateFree() except errors.AddressPoolError: raise errors.ReservationError("Cannot generate IP. Network is full") return (constants.RESERVE_ACTION, ip, net_uuid) _, address, _ = self._temporary_ips.Generate([], gen_one, ec_id) return address def _UnlockedReserveIp(self, net_uuid, address, ec_id): """Reserve a given IPv4 address for use by an instance. """ nobj = self._UnlockedGetNetwork(net_uuid) pool = network.AddressPool(nobj) try: isreserved = pool.IsReserved(address) except errors.AddressPoolError: raise errors.ReservationError("IP address not in network") if isreserved: raise errors.ReservationError("IP address already in use") return self._temporary_ips.Reserve(ec_id, (constants.RESERVE_ACTION, address, net_uuid)) @locking.ssynchronized(_config_lock, shared=1) def ReserveIp(self, net_uuid, address, ec_id): """Reserve a given IPv4 address for use by an instance. """ if net_uuid: return self._UnlockedReserveIp(net_uuid, address, ec_id) @locking.ssynchronized(_config_lock, shared=1) def ReserveLV(self, lv_name, ec_id): """Reserve an VG/LV pair for an instance. @type lv_name: string @param lv_name: the logical volume name to reserve """ all_lvs = self._AllLVs() if lv_name in all_lvs: raise errors.ReservationError("LV already in use") else: self._temporary_lvs.Reserve(ec_id, lv_name) @locking.ssynchronized(_config_lock, shared=1) def GenerateDRBDSecret(self, ec_id): """Generate a DRBD secret. This checks the current disks for duplicates. """ return self._temporary_secrets.Generate(self._AllDRBDSecrets(), utils.GenerateSecret, ec_id) def _AllLVs(self): """Compute the list of all LVs. """ lvnames = set() for instance in self._config_data.instances.values(): node_data = instance.MapLVsByNode() for lv_list in node_data.values(): lvnames.update(lv_list) return lvnames def _AllDisks(self): """Compute the list of all Disks (recursively, including children). """ def DiskAndAllChildren(disk): """Returns a list containing the given disk and all of his children. """ disks = [disk] if disk.children: for child_disk in disk.children: disks.extend(DiskAndAllChildren(child_disk)) return disks disks = [] for instance in self._config_data.instances.values(): for disk in instance.disks: disks.extend(DiskAndAllChildren(disk)) return disks def _AllNICs(self): """Compute the list of all NICs. """ nics = [] for instance in self._config_data.instances.values(): nics.extend(instance.nics) return nics def _AllIDs(self, include_temporary): """Compute the list of all UUIDs and names we have. @type include_temporary: boolean @param include_temporary: whether to include the _temporary_ids set @rtype: set @return: a set of IDs """ existing = set() if include_temporary: existing.update(self._temporary_ids.GetReserved()) existing.update(self._AllLVs()) existing.update(self._config_data.instances.keys()) existing.update(self._config_data.nodes.keys()) existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid]) return existing def _GenerateUniqueID(self, ec_id): """Generate an unique UUID. This checks the current node, instances and disk names for duplicates. @rtype: string @return: the unique id """ existing = self._AllIDs(include_temporary=False) return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id) @locking.ssynchronized(_config_lock, shared=1) def GenerateUniqueID(self, ec_id): """Generate an unique ID. This is just a wrapper over the unlocked version. @type ec_id: string @param ec_id: unique id for the job to reserve the id to """ return self._GenerateUniqueID(ec_id) def _AllMACs(self): """Return all MACs present in the config. @rtype: list @return: the list of all MACs """ result = [] for instance in self._config_data.instances.values(): for nic in instance.nics: result.append(nic.mac) return result def _AllDRBDSecrets(self): """Return all DRBD secrets present in the config. @rtype: list @return: the list of all DRBD secrets """ def helper(disk, result): """Recursively gather secrets from this disk.""" if disk.dev_type == constants.DT_DRBD8: result.append(disk.logical_id[5]) if disk.children: for child in disk.children: helper(child, result) result = [] for instance in self._config_data.instances.values(): for disk in instance.disks: helper(disk, result) return result def _CheckDiskIDs(self, disk, l_ids, p_ids): """Compute duplicate disk IDs @type disk: L{objects.Disk} @param disk: the disk at which to start searching @type l_ids: list @param l_ids: list of current logical ids @type p_ids: list @param p_ids: list of current physical ids @rtype: list @return: a list of error messages """ result = [] if disk.logical_id is not None: if disk.logical_id in l_ids: result.append("duplicate logical id %s" % str(disk.logical_id)) else: l_ids.append(disk.logical_id) if disk.physical_id is not None: if disk.physical_id in p_ids: result.append("duplicate physical id %s" % str(disk.physical_id)) else: p_ids.append(disk.physical_id) if disk.children: for child in disk.children: result.extend(self._CheckDiskIDs(child, l_ids, p_ids)) return result def _UnlockedVerifyConfig(self): """Verify function. @rtype: list @return: a list of error messages; a non-empty list signifies configuration errors """ # pylint: disable=R0914 result = [] seen_macs = [] ports = {} data = self._config_data cluster = data.cluster seen_lids = [] seen_pids = [] # global cluster checks if not cluster.enabled_hypervisors: result.append("enabled hypervisors list doesn't have any entries") invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES if invalid_hvs: result.append("enabled hypervisors contains invalid entries: %s" % utils.CommaJoin(invalid_hvs)) missing_hvp = (set(cluster.enabled_hypervisors) - set(cluster.hvparams.keys())) if missing_hvp: result.append("hypervisor parameters missing for the enabled" " hypervisor(s) %s" % utils.CommaJoin(missing_hvp)) if not cluster.enabled_disk_templates: result.append("enabled disk templates list doesn't have any entries") invalid_disk_templates = set(cluster.enabled_disk_templates) \ - constants.DISK_TEMPLATES if invalid_disk_templates: result.append("enabled disk templates list contains invalid entries:" " %s" % utils.CommaJoin(invalid_disk_templates)) if cluster.master_node not in data.nodes: result.append("cluster has invalid primary node '%s'" % cluster.master_node) def _helper(owner, attr, value, template): try: utils.ForceDictType(value, template) except errors.GenericError, err: result.append("%s has invalid %s: %s" % (owner, attr, err)) def _helper_nic(owner, params): try: objects.NIC.CheckParameterSyntax(params) except errors.ConfigurationError, err: result.append("%s has invalid nicparams: %s" % (owner, err)) def _helper_ipolicy(owner, ipolicy, iscluster): try: objects.InstancePolicy.CheckParameterSyntax(ipolicy, iscluster) except errors.ConfigurationError, err: result.append("%s has invalid instance policy: %s" % (owner, err)) for key, value in ipolicy.items(): if key == constants.ISPECS_MINMAX: for k in range(len(value)): _helper_ispecs(owner, "ipolicy/%s[%s]" % (key, k), value[k]) elif key == constants.ISPECS_STD: _helper(owner, "ipolicy/" + key, value, constants.ISPECS_PARAMETER_TYPES) else: # FIXME: assuming list type if key in constants.IPOLICY_PARAMETERS: exp_type = float else: exp_type = list if not isinstance(value, exp_type): result.append("%s has invalid instance policy: for %s," " expecting %s, got %s" % (owner, key, exp_type.__name__, type(value))) def _helper_ispecs(owner, parentkey, params): for (key, value) in params.items(): fullkey = "/".join([parentkey, key]) _helper(owner, fullkey, value, constants.ISPECS_PARAMETER_TYPES) # check cluster parameters _helper("cluster", "beparams", cluster.SimpleFillBE({}), constants.BES_PARAMETER_TYPES) _helper("cluster", "nicparams", cluster.SimpleFillNIC({}), constants.NICS_PARAMETER_TYPES) _helper_nic("cluster", cluster.SimpleFillNIC({})) _helper("cluster", "ndparams", cluster.SimpleFillND({}), constants.NDS_PARAMETER_TYPES) _helper_ipolicy("cluster", cluster.ipolicy, True) # per-instance checks for instance_uuid in data.instances: instance = data.instances[instance_uuid] if instance.uuid != instance_uuid: result.append("instance '%s' is indexed by wrong UUID '%s'" % (instance.name, instance_uuid)) if instance.primary_node not in data.nodes: result.append("instance '%s' has invalid primary node '%s'" % (instance.name, instance.primary_node)) for snode in instance.secondary_nodes: if snode not in data.nodes: result.append("instance '%s' has invalid secondary node '%s'" % (instance.name, snode)) for idx, nic in enumerate(instance.nics): if nic.mac in seen_macs: result.append("instance '%s' has NIC %d mac %s duplicate" % (instance.name, idx, nic.mac)) else: seen_macs.append(nic.mac) if nic.nicparams: filled = cluster.SimpleFillNIC(nic.nicparams) owner = "instance %s nic %d" % (instance.name, idx) _helper(owner, "nicparams", filled, constants.NICS_PARAMETER_TYPES) _helper_nic(owner, filled) # disk template checks if not instance.disk_template in data.cluster.enabled_disk_templates: result.append("instance '%s' uses the disabled disk template '%s'." % (instance.name, instance.disk_template)) # parameter checks if instance.beparams: _helper("instance %s" % instance.name, "beparams", cluster.FillBE(instance), constants.BES_PARAMETER_TYPES) # gather the drbd ports for duplicate checks for (idx, dsk) in enumerate(instance.disks): if dsk.dev_type in constants.DTS_DRBD: tcp_port = dsk.logical_id[2] if tcp_port not in ports: ports[tcp_port] = [] ports[tcp_port].append((instance.name, "drbd disk %s" % idx)) # gather network port reservation net_port = getattr(instance, "network_port", None) if net_port is not None: if net_port not in ports: ports[net_port] = [] ports[net_port].append((instance.name, "network port")) # instance disk verify for idx, disk in enumerate(instance.disks): result.extend(["instance '%s' disk %d error: %s" % (instance.name, idx, msg) for msg in disk.Verify()]) result.extend(self._CheckDiskIDs(disk, seen_lids, seen_pids)) wrong_names = _CheckInstanceDiskIvNames(instance.disks) if wrong_names: tmp = "; ".join(("name of disk %s should be '%s', but is '%s'" % (idx, exp_name, actual_name)) for (idx, exp_name, actual_name) in wrong_names) result.append("Instance '%s' has wrongly named disks: %s" % (instance.name, tmp)) # cluster-wide pool of free ports for free_port in cluster.tcpudp_port_pool: if free_port not in ports: ports[free_port] = [] ports[free_port].append(("cluster", "port marked as free")) # compute tcp/udp duplicate ports keys = ports.keys() keys.sort() for pnum in keys: pdata = ports[pnum] if len(pdata) > 1: txt = utils.CommaJoin(["%s/%s" % val for val in pdata]) result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt)) # highest used tcp port check if keys: if keys[-1] > cluster.highest_used_port: result.append("Highest used port mismatch, saved %s, computed %s" % (cluster.highest_used_port, keys[-1])) if not data.nodes[cluster.master_node].master_candidate: result.append("Master node is not a master candidate") # master candidate checks mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats() if mc_now < mc_max: result.append("Not enough master candidates: actual %d, target %d" % (mc_now, mc_max)) # node checks for node_uuid, node in data.nodes.items(): if node.uuid != node_uuid: result.append("Node '%s' is indexed by wrong UUID '%s'" % (node.name, node_uuid)) if [node.master_candidate, node.drained, node.offline].count(True) > 1: result.append("Node %s state is invalid: master_candidate=%s," " drain=%s, offline=%s" % (node.name, node.master_candidate, node.drained, node.offline)) if node.group not in data.nodegroups: result.append("Node '%s' has invalid group '%s'" % (node.name, node.group)) else: _helper("node %s" % node.name, "ndparams", cluster.FillND(node, data.nodegroups[node.group]), constants.NDS_PARAMETER_TYPES) used_globals = constants.NDC_GLOBALS.intersection(node.ndparams) if used_globals: result.append("Node '%s' has some global parameters set: %s" % (node.name, utils.CommaJoin(used_globals))) # nodegroups checks nodegroups_names = set() for nodegroup_uuid in data.nodegroups: nodegroup = data.nodegroups[nodegroup_uuid] if nodegroup.uuid != nodegroup_uuid: result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'" % (nodegroup.name, nodegroup.uuid, nodegroup_uuid)) if utils.UUID_RE.match(nodegroup.name.lower()): result.append("node group '%s' (uuid: '%s') has uuid-like name" % (nodegroup.name, nodegroup.uuid)) if nodegroup.name in nodegroups_names: result.append("duplicate node group name '%s'" % nodegroup.name) else: nodegroups_names.add(nodegroup.name) group_name = "group %s" % nodegroup.name _helper_ipolicy(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy), False) if nodegroup.ndparams: _helper(group_name, "ndparams", cluster.SimpleFillND(nodegroup.ndparams), constants.NDS_PARAMETER_TYPES) # drbd minors check _, duplicates = self._UnlockedComputeDRBDMap() for node, minor, instance_a, instance_b in duplicates: result.append("DRBD minor %d on node %s is assigned twice to instances" " %s and %s" % (minor, node, instance_a, instance_b)) # IP checks default_nicparams = cluster.nicparams[constants.PP_DEFAULT] ips = {} def _AddIpAddress(ip, name): ips.setdefault(ip, []).append(name) _AddIpAddress(cluster.master_ip, "cluster_ip") for node in data.nodes.values(): _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name) if node.secondary_ip != node.primary_ip: _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name) for instance in data.instances.values(): for idx, nic in enumerate(instance.nics): if nic.ip is None: continue nicparams = objects.FillDict(default_nicparams, nic.nicparams) nic_mode = nicparams[constants.NIC_MODE] nic_link = nicparams[constants.NIC_LINK] if nic_mode == constants.NIC_MODE_BRIDGED: link = "bridge:%s" % nic_link elif nic_mode == constants.NIC_MODE_ROUTED: link = "route:%s" % nic_link else: raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode) _AddIpAddress("%s/%s/%s" % (link, nic.ip, nic.network), "instance:%s/nic:%d" % (instance.name, idx)) for ip, owners in ips.items(): if len(owners) > 1: result.append("IP address %s is used by multiple owners: %s" % (ip, utils.CommaJoin(owners))) return result @locking.ssynchronized(_config_lock, shared=1) def VerifyConfig(self): """Verify function. This is just a wrapper over L{_UnlockedVerifyConfig}. @rtype: list @return: a list of error messages; a non-empty list signifies configuration errors """ return self._UnlockedVerifyConfig() def _UnlockedSetDiskID(self, disk, node_uuid): """Convert the unique ID to the ID needed on the target nodes. This is used only for drbd, which needs ip/port configuration. The routine descends down and updates its children also, because this helps when the only the top device is passed to the remote node. This function is for internal use, when the config lock is already held. """ if disk.children: for child in disk.children: self._UnlockedSetDiskID(child, node_uuid) if disk.logical_id is None and disk.physical_id is not None: return if disk.dev_type == constants.DT_DRBD8: pnode, snode, port, pminor, sminor, secret = disk.logical_id if node_uuid not in (pnode, snode): raise errors.ConfigurationError("DRBD device not knowing node %s" % node_uuid) pnode_info = self._UnlockedGetNodeInfo(pnode) snode_info = self._UnlockedGetNodeInfo(snode) if pnode_info is None or snode_info is None: raise errors.ConfigurationError("Can't find primary or secondary node" " for %s" % str(disk)) p_data = (pnode_info.secondary_ip, port) s_data = (snode_info.secondary_ip, port) if pnode == node_uuid: disk.physical_id = p_data + s_data + (pminor, secret) else: # it must be secondary, we tested above disk.physical_id = s_data + p_data + (sminor, secret) else: disk.physical_id = disk.logical_id return @locking.ssynchronized(_config_lock) def SetDiskID(self, disk, node_uuid): """Convert the unique ID to the ID needed on the target nodes. This is used only for drbd, which needs ip/port configuration. The routine descends down and updates its children also, because this helps when the only the top device is passed to the remote node. """ return self._UnlockedSetDiskID(disk, node_uuid) @locking.ssynchronized(_config_lock) def AddTcpUdpPort(self, port): """Adds a new port to the available port pool. @warning: this method does not "flush" the configuration (via L{_WriteConfig}); callers should do that themselves once the configuration is stable """ if not isinstance(port, int): raise errors.ProgrammerError("Invalid type passed for port") self._config_data.cluster.tcpudp_port_pool.add(port) @locking.ssynchronized(_config_lock, shared=1) def GetPortList(self): """Returns a copy of the current port list. """ return self._config_data.cluster.tcpudp_port_pool.copy() @locking.ssynchronized(_config_lock) def AllocatePort(self): """Allocate a port. The port will be taken from the available port pool or from the default port range (and in this case we increase highest_used_port). """ # If there are TCP/IP ports configured, we use them first. if self._config_data.cluster.tcpudp_port_pool: port = self._config_data.cluster.tcpudp_port_pool.pop() else: port = self._config_data.cluster.highest_used_port + 1 if port >= constants.LAST_DRBD_PORT: raise errors.ConfigurationError("The highest used port is greater" " than %s. Aborting." % constants.LAST_DRBD_PORT) self._config_data.cluster.highest_used_port = port self._WriteConfig() return port def _UnlockedComputeDRBDMap(self): """Compute the used DRBD minor/nodes. @rtype: (dict, list) @return: dictionary of node_uuid: dict of minor: instance_uuid; the returned dict will have all the nodes in it (even if with an empty list), and a list of duplicates; if the duplicates list is not empty, the configuration is corrupted and its caller should raise an exception """ def _AppendUsedMinors(get_node_name_fn, instance, disk, used): duplicates = [] if disk.dev_type == constants.DT_DRBD8 and len(disk.logical_id) >= 5: node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5] for node_uuid, minor in ((node_a, minor_a), (node_b, minor_b)): assert node_uuid in used, \ ("Node '%s' of instance '%s' not found in node list" % (get_node_name_fn(node_uuid), instance.name)) if minor in used[node_uuid]: duplicates.append((node_uuid, minor, instance.uuid, used[node_uuid][minor])) else: used[node_uuid][minor] = instance.uuid if disk.children: for child in disk.children: duplicates.extend(_AppendUsedMinors(get_node_name_fn, instance, child, used)) return duplicates duplicates = [] my_dict = dict((node_uuid, {}) for node_uuid in self._config_data.nodes) for instance in self._config_data.instances.itervalues(): for disk in instance.disks: duplicates.extend(_AppendUsedMinors(self._UnlockedGetNodeName, instance, disk, my_dict)) for (node_uuid, minor), inst_uuid in self._temporary_drbds.iteritems(): if minor in my_dict[node_uuid] and my_dict[node_uuid][minor] != inst_uuid: duplicates.append((node_uuid, minor, inst_uuid, my_dict[node_uuid][minor])) else: my_dict[node_uuid][minor] = inst_uuid return my_dict, duplicates @locking.ssynchronized(_config_lock) def ComputeDRBDMap(self): """Compute the used DRBD minor/nodes. This is just a wrapper over L{_UnlockedComputeDRBDMap}. @return: dictionary of node_uuid: dict of minor: instance_uuid; the returned dict will have all the nodes in it (even if with an empty list). """ d_map, duplicates = self._UnlockedComputeDRBDMap() if duplicates: raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" % str(duplicates)) return d_map @locking.ssynchronized(_config_lock) def AllocateDRBDMinor(self, node_uuids, inst_uuid): """Allocate a drbd minor. The free minor will be automatically computed from the existing devices. A node can be given multiple times in order to allocate multiple minors. The result is the list of minors, in the same order as the passed nodes. @type inst_uuid: string @param inst_uuid: the instance for which we allocate minors """ assert isinstance(inst_uuid, basestring), \ "Invalid argument '%s' passed to AllocateDRBDMinor" % inst_uuid d_map, duplicates = self._UnlockedComputeDRBDMap() if duplicates: raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" % str(duplicates)) result = [] for nuuid in node_uuids: ndata = d_map[nuuid] if not ndata: # no minors used, we can start at 0 result.append(0) ndata[0] = inst_uuid self._temporary_drbds[(nuuid, 0)] = inst_uuid continue keys = ndata.keys() keys.sort() ffree = utils.FirstFree(keys) if ffree is None: # return the next minor # TODO: implement high-limit check minor = keys[-1] + 1 else: minor = ffree # double-check minor against current instances assert minor not in d_map[nuuid], \ ("Attempt to reuse allocated DRBD minor %d on node %s," " already allocated to instance %s" % (minor, nuuid, d_map[nuuid][minor])) ndata[minor] = inst_uuid # double-check minor against reservation r_key = (nuuid, minor) assert r_key not in self._temporary_drbds, \ ("Attempt to reuse reserved DRBD minor %d on node %s," " reserved for instance %s" % (minor, nuuid, self._temporary_drbds[r_key])) self._temporary_drbds[r_key] = inst_uuid result.append(minor) logging.debug("Request to allocate drbd minors, input: %s, returning %s", node_uuids, result) return result def _UnlockedReleaseDRBDMinors(self, inst_uuid): """Release temporary drbd minors allocated for a given instance. @type inst_uuid: string @param inst_uuid: the instance for which temporary minors should be released """ assert isinstance(inst_uuid, basestring), \ "Invalid argument passed to ReleaseDRBDMinors" for key, uuid in self._temporary_drbds.items(): if uuid == inst_uuid: del self._temporary_drbds[key] @locking.ssynchronized(_config_lock) def ReleaseDRBDMinors(self, inst_uuid): """Release temporary drbd minors allocated for a given instance. This should be called on the error paths, on the success paths it's automatically called by the ConfigWriter add and update functions. This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}. @type inst_uuid: string @param inst_uuid: the instance for which temporary minors should be released """ self._UnlockedReleaseDRBDMinors(inst_uuid) @locking.ssynchronized(_config_lock, shared=1) def GetConfigVersion(self): """Get the configuration version. @return: Config version """ return self._config_data.version @locking.ssynchronized(_config_lock, shared=1) def GetClusterName(self): """Get cluster name. @return: Cluster name """ return self._config_data.cluster.cluster_name @locking.ssynchronized(_config_lock, shared=1) def GetMasterNode(self): """Get the UUID of the master node for this cluster. @return: Master node UUID """ return self._config_data.cluster.master_node @locking.ssynchronized(_config_lock, shared=1) def GetMasterNodeName(self): """Get the hostname of the master node for this cluster. @return: Master node hostname """ return self._UnlockedGetNodeName(self._config_data.cluster.master_node) @locking.ssynchronized(_config_lock, shared=1) def GetMasterIP(self): """Get the IP of the master node for this cluster. @return: Master IP """ return self._config_data.cluster.master_ip @locking.ssynchronized(_config_lock, shared=1) def GetMasterNetdev(self): """Get the master network device for this cluster. """ return self._config_data.cluster.master_netdev @locking.ssynchronized(_config_lock, shared=1) def GetMasterNetmask(self): """Get the netmask of the master node for this cluster. """ return self._config_data.cluster.master_netmask @locking.ssynchronized(_config_lock, shared=1) def GetUseExternalMipScript(self): """Get flag representing whether to use the external master IP setup script. """ return self._config_data.cluster.use_external_mip_script @locking.ssynchronized(_config_lock, shared=1) def GetFileStorageDir(self): """Get the file storage dir for this cluster. """ return self._config_data.cluster.file_storage_dir @locking.ssynchronized(_config_lock, shared=1) def GetSharedFileStorageDir(self): """Get the shared file storage dir for this cluster. """ return self._config_data.cluster.shared_file_storage_dir @locking.ssynchronized(_config_lock, shared=1) def GetHypervisorType(self): """Get the hypervisor type for this cluster. """ return self._config_data.cluster.enabled_hypervisors[0] @locking.ssynchronized(_config_lock, shared=1) def GetRsaHostKey(self): """Return the rsa hostkey from the config. @rtype: string @return: the rsa hostkey """ return self._config_data.cluster.rsahostkeypub @locking.ssynchronized(_config_lock, shared=1) def GetDsaHostKey(self): """Return the dsa hostkey from the config. @rtype: string @return: the dsa hostkey """ return self._config_data.cluster.dsahostkeypub @locking.ssynchronized(_config_lock, shared=1) def GetDefaultIAllocator(self): """Get the default instance allocator for this cluster. """ return self._config_data.cluster.default_iallocator @locking.ssynchronized(_config_lock, shared=1) def GetPrimaryIPFamily(self): """Get cluster primary ip family. @return: primary ip family """ return self._config_data.cluster.primary_ip_family @locking.ssynchronized(_config_lock, shared=1) def GetMasterNetworkParameters(self): """Get network parameters of the master node. @rtype: L{object.MasterNetworkParameters} @return: network parameters of the master node """ cluster = self._config_data.cluster result = objects.MasterNetworkParameters( uuid=cluster.master_node, ip=cluster.master_ip, netmask=cluster.master_netmask, netdev=cluster.master_netdev, ip_family=cluster.primary_ip_family) return result @locking.ssynchronized(_config_lock) def AddNodeGroup(self, group, ec_id, check_uuid=True): """Add a node group to the configuration. This method calls group.UpgradeConfig() to fill any missing attributes according to their default values. @type group: L{objects.NodeGroup} @param group: the NodeGroup object to add @type ec_id: string @param ec_id: unique id for the job to use when creating a missing UUID @type check_uuid: bool @param check_uuid: add an UUID to the group if it doesn't have one or, if it does, ensure that it does not exist in the configuration already """ self._UnlockedAddNodeGroup(group, ec_id, check_uuid) self._WriteConfig() def _UnlockedAddNodeGroup(self, group, ec_id, check_uuid): """Add a node group to the configuration. """ logging.info("Adding node group %s to configuration", group.name) # Some code might need to add a node group with a pre-populated UUID # generated with ConfigWriter.GenerateUniqueID(). We allow them to bypass # the "does this UUID" exist already check. if check_uuid: self._EnsureUUID(group, ec_id) try: existing_uuid = self._UnlockedLookupNodeGroup(group.name) except errors.OpPrereqError: pass else: raise errors.OpPrereqError("Desired group name '%s' already exists as a" " node group (UUID: %s)" % (group.name, existing_uuid), errors.ECODE_EXISTS) group.serial_no = 1 group.ctime = group.mtime = time.time() group.UpgradeConfig() self._config_data.nodegroups[group.uuid] = group self._config_data.cluster.serial_no += 1 @locking.ssynchronized(_config_lock) def RemoveNodeGroup(self, group_uuid): """Remove a node group from the configuration. @type group_uuid: string @param group_uuid: the UUID of the node group to remove """ logging.info("Removing node group %s from configuration", group_uuid) if group_uuid not in self._config_data.nodegroups: raise errors.ConfigurationError("Unknown node group '%s'" % group_uuid) assert len(self._config_data.nodegroups) != 1, \ "Group '%s' is the only group, cannot be removed" % group_uuid del self._config_data.nodegroups[group_uuid] self._config_data.cluster.serial_no += 1 self._WriteConfig() def _UnlockedLookupNodeGroup(self, target): """Lookup a node group's UUID. @type target: string or None @param target: group name or UUID or None to look for the default @rtype: string @return: nodegroup UUID @raises errors.OpPrereqError: when the target group cannot be found """ if target is None: if len(self._config_data.nodegroups) != 1: raise errors.OpPrereqError("More than one node group exists. Target" " group must be specified explicitly.") else: return self._config_data.nodegroups.keys()[0] if target in self._config_data.nodegroups: return target for nodegroup in self._config_data.nodegroups.values(): if nodegroup.name == target: return nodegroup.uuid raise errors.OpPrereqError("Node group '%s' not found" % target, errors.ECODE_NOENT) @locking.ssynchronized(_config_lock, shared=1) def LookupNodeGroup(self, target): """Lookup a node group's UUID. This function is just a wrapper over L{_UnlockedLookupNodeGroup}. @type target: string or None @param target: group name or UUID or None to look for the default @rtype: string @return: nodegroup UUID """ return self._UnlockedLookupNodeGroup(target) def _UnlockedGetNodeGroup(self, uuid): """Lookup a node group. @type uuid: string @param uuid: group UUID @rtype: L{objects.NodeGroup} or None @return: nodegroup object, or None if not found """ if uuid not in self._config_data.nodegroups: return None return self._config_data.nodegroups[uuid] @locking.ssynchronized(_config_lock, shared=1) def GetNodeGroup(self, uuid): """Lookup a node group. @type uuid: string @param uuid: group UUID @rtype: L{objects.NodeGroup} or None @return: nodegroup object, or None if not found """ return self._UnlockedGetNodeGroup(uuid) @locking.ssynchronized(_config_lock, shared=1) def GetAllNodeGroupsInfo(self): """Get the configuration of all node groups. """ return dict(self._config_data.nodegroups) @locking.ssynchronized(_config_lock, shared=1) def GetNodeGroupList(self): """Get a list of node groups. """ return self._config_data.nodegroups.keys() @locking.ssynchronized(_config_lock, shared=1) def GetNodeGroupMembersByNodes(self, nodes): """Get nodes which are member in the same nodegroups as the given nodes. """ ngfn = lambda node_uuid: self._UnlockedGetNodeInfo(node_uuid).group return frozenset(member_uuid for node_uuid in nodes for member_uuid in self._UnlockedGetNodeGroup(ngfn(node_uuid)).members) @locking.ssynchronized(_config_lock, shared=1) def GetMultiNodeGroupInfo(self, group_uuids): """Get the configuration of multiple node groups. @param group_uuids: List of node group UUIDs @rtype: list @return: List of tuples of (group_uuid, group_info) """ return [(uuid, self._UnlockedGetNodeGroup(uuid)) for uuid in group_uuids] @locking.ssynchronized(_config_lock) def AddInstance(self, instance, ec_id): """Add an instance to the config. This should be used after creating a new instance. @type instance: L{objects.Instance} @param instance: the instance object """ if not isinstance(instance, objects.Instance): raise errors.ProgrammerError("Invalid type passed to AddInstance") if instance.disk_template != constants.DT_DISKLESS: all_lvs = instance.MapLVsByNode() logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs) all_macs = self._AllMACs() for nic in instance.nics: if nic.mac in all_macs: raise errors.ConfigurationError("Cannot add instance %s:" " MAC address '%s' already in use." % (instance.name, nic.mac)) self._CheckUniqueUUID(instance, include_temporary=False) instance.serial_no = 1 instance.ctime = instance.mtime = time.time() self._config_data.instances[instance.uuid] = instance self._config_data.cluster.serial_no += 1 self._UnlockedReleaseDRBDMinors(instance.uuid) self._UnlockedCommitTemporaryIps(ec_id) self._WriteConfig() def _EnsureUUID(self, item, ec_id): """Ensures a given object has a valid UUID. @param item: the instance or node to be checked @param ec_id: the execution context id for the uuid reservation """ if not item.uuid: item.uuid = self._GenerateUniqueID(ec_id) else: self._CheckUniqueUUID(item, include_temporary=True) def _CheckUniqueUUID(self, item, include_temporary): """Checks that the UUID of the given object is unique. @param item: the instance or node to be checked @param include_temporary: whether temporarily generated UUID's should be included in the check. If the UUID of the item to be checked is a temporarily generated one, this has to be C{False}. """ if not item.uuid: raise errors.ConfigurationError("'%s' must have an UUID" % (item.name,)) if item.uuid in self._AllIDs(include_temporary=include_temporary): raise errors.ConfigurationError("Cannot add '%s': UUID %s already" " in use" % (item.name, item.uuid)) def _SetInstanceStatus(self, inst_uuid, status, disks_active): """Set the instance's status to a given value. """ if inst_uuid not in self._config_data.instances: raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid) instance = self._config_data.instances[inst_uuid] if status is None: status = instance.admin_state if disks_active is None: disks_active = instance.disks_active assert status in constants.ADMINST_ALL, \ "Invalid status '%s' passed to SetInstanceStatus" % (status,) if instance.admin_state != status or \ instance.disks_active != disks_active: instance.admin_state = status instance.disks_active = disks_active instance.serial_no += 1 instance.mtime = time.time() self._WriteConfig() @locking.ssynchronized(_config_lock) def MarkInstanceUp(self, inst_uuid): """Mark the instance status to up in the config. This also sets the instance disks active flag. """ self._SetInstanceStatus(inst_uuid, constants.ADMINST_UP, True) @locking.ssynchronized(_config_lock) def MarkInstanceOffline(self, inst_uuid): """Mark the instance status to down in the config. This also clears the instance disks active flag. """ self._SetInstanceStatus(inst_uuid, constants.ADMINST_OFFLINE, False) @locking.ssynchronized(_config_lock) def RemoveInstance(self, inst_uuid): """Remove the instance from the configuration. """ if inst_uuid not in self._config_data.instances: raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid) # If a network port has been allocated to the instance, # return it to the pool of free ports. inst = self._config_data.instances[inst_uuid] network_port = getattr(inst, "network_port", None) if network_port is not None: self._config_data.cluster.tcpudp_port_pool.add(network_port) instance = self._UnlockedGetInstanceInfo(inst_uuid) for nic in instance.nics: if nic.network and nic.ip: # Return all IP addresses to the respective address pools self._UnlockedCommitIp(constants.RELEASE_ACTION, nic.network, nic.ip) del self._config_data.instances[inst_uuid] self._config_data.cluster.serial_no += 1 self._WriteConfig() @locking.ssynchronized(_config_lock) def RenameInstance(self, inst_uuid, new_name): """Rename an instance. This needs to be done in ConfigWriter and not by RemoveInstance combined with AddInstance as only we can guarantee an atomic rename. """ if inst_uuid not in self._config_data.instances: raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid) inst = self._config_data.instances[inst_uuid] inst.name = new_name for (idx, disk) in enumerate(inst.disks): if disk.dev_type in [constants.DT_FILE, constants.DT_SHARED_FILE]: # rename the file paths in logical and physical id file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1])) disk.logical_id = (disk.logical_id[0], utils.PathJoin(file_storage_dir, inst.name, "disk%s" % idx)) disk.physical_id = disk.logical_id # Force update of ssconf files self._config_data.cluster.serial_no += 1 self._WriteConfig() @locking.ssynchronized(_config_lock) def MarkInstanceDown(self, inst_uuid): """Mark the status of an instance to down in the configuration. This does not touch the instance disks active flag, as shut down instances can still have active disks. """ self._SetInstanceStatus(inst_uuid, constants.ADMINST_DOWN, None) @locking.ssynchronized(_config_lock) def MarkInstanceDisksActive(self, inst_uuid): """Mark the status of instance disks active. """ self._SetInstanceStatus(inst_uuid, None, True) @locking.ssynchronized(_config_lock) def MarkInstanceDisksInactive(self, inst_uuid): """Mark the status of instance disks inactive. """ self._SetInstanceStatus(inst_uuid, None, False) def _UnlockedGetInstanceList(self): """Get the list of instances. This function is for internal use, when the config lock is already held. """ return self._config_data.instances.keys() @locking.ssynchronized(_config_lock, shared=1) def GetInstanceList(self): """Get the list of instances. @return: array of instances, ex. ['instance2-uuid', 'instance1-uuid'] """ return self._UnlockedGetInstanceList() def ExpandInstanceName(self, short_name): """Attempt to expand an incomplete instance name. """ # Locking is done in L{ConfigWriter.GetAllInstancesInfo} all_insts = self.GetAllInstancesInfo().values() expanded_name = _MatchNameComponentIgnoreCase( short_name, [inst.name for inst in all_insts]) if expanded_name is not None: # there has to be exactly one instance with that name inst = (filter(lambda n: n.name == expanded_name, all_insts)[0]) return (inst.uuid, inst.name) else: return (None, None) def _UnlockedGetInstanceInfo(self, inst_uuid): """Returns information about an instance. This function is for internal use, when the config lock is already held. """ if inst_uuid not in self._config_data.instances: return None return self._config_data.instances[inst_uuid] @locking.ssynchronized(_config_lock, shared=1) def GetInstanceInfo(self, inst_uuid): """Returns information about an instance. It takes the information from the configuration file. Other information of an instance are taken from the live systems. @param inst_uuid: UUID of the instance @rtype: L{objects.Instance} @return: the instance object """ return self._UnlockedGetInstanceInfo(inst_uuid) @locking.ssynchronized(_config_lock, shared=1) def GetInstanceNodeGroups(self, inst_uuid, primary_only=False): """Returns set of node group UUIDs for instance's nodes. @rtype: frozenset """ instance = self._UnlockedGetInstanceInfo(inst_uuid) if not instance: raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid) if primary_only: nodes = [instance.primary_node] else: nodes = instance.all_nodes return frozenset(self._UnlockedGetNodeInfo(node_uuid).group for node_uuid in nodes) @locking.ssynchronized(_config_lock, shared=1) def GetInstanceNetworks(self, inst_uuid): """Returns set of network UUIDs for instance's nics. @rtype: frozenset """ instance = self._UnlockedGetInstanceInfo(inst_uuid) if not instance: raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid) networks = set() for nic in instance.nics: if nic.network: networks.add(nic.network) return frozenset(networks) @locking.ssynchronized(_config_lock, shared=1) def GetMultiInstanceInfo(self, inst_uuids): """Get the configuration of multiple instances. @param inst_uuids: list of instance UUIDs @rtype: list @return: list of tuples (instance UUID, instance_info), where instance_info is what would GetInstanceInfo return for the node, while keeping the original order """ return [(uuid, self._UnlockedGetInstanceInfo(uuid)) for uuid in inst_uuids] @locking.ssynchronized(_config_lock, shared=1) def GetMultiInstanceInfoByName(self, inst_names): """Get the configuration of multiple instances. @param inst_names: list of instance names @rtype: list @return: list of tuples (instance, instance_info), where instance_info is what would GetInstanceInfo return for the node, while keeping the original order """ result = [] for name in inst_names: instance = self._UnlockedGetInstanceInfoByName(name) result.append((instance.uuid, instance)) return result @locking.ssynchronized(_config_lock, shared=1) def GetAllInstancesInfo(self): """Get the configuration of all instances. @rtype: dict @return: dict of (instance, instance_info), where instance_info is what would GetInstanceInfo return for the node """ return self._UnlockedGetAllInstancesInfo() def _UnlockedGetAllInstancesInfo(self): my_dict = dict([(inst_uuid, self._UnlockedGetInstanceInfo(inst_uuid)) for inst_uuid in self._UnlockedGetInstanceList()]) return my_dict @locking.ssynchronized(_config_lock, shared=1) def GetInstancesInfoByFilter(self, filter_fn): """Get instance configuration with a filter. @type filter_fn: callable @param filter_fn: Filter function receiving instance object as parameter, returning boolean. Important: this function is called while the configuration locks is held. It must not do any complex work or call functions potentially leading to a deadlock. Ideally it doesn't call any other functions and just compares instance attributes. """ return dict((uuid, inst) for (uuid, inst) in self._config_data.instances.items() if filter_fn(inst)) @locking.ssynchronized(_config_lock, shared=1) def GetInstanceInfoByName(self, inst_name): """Get the L{objects.Instance} object for a named instance. @param inst_name: name of the instance to get information for @type inst_name: string @return: the corresponding L{objects.Instance} instance or None if no information is available """ return self._UnlockedGetInstanceInfoByName(inst_name) def _UnlockedGetInstanceInfoByName(self, inst_name): for inst in self._UnlockedGetAllInstancesInfo().values(): if inst.name == inst_name: return inst return None def _UnlockedGetInstanceName(self, inst_uuid): inst_info = self._UnlockedGetInstanceInfo(inst_uuid) if inst_info is None: raise errors.OpExecError("Unknown instance: %s" % inst_uuid) return inst_info.name @locking.ssynchronized(_config_lock, shared=1) def GetInstanceName(self, inst_uuid): """Gets the instance name for the passed instance. @param inst_uuid: instance UUID to get name for @type inst_uuid: string @rtype: string @return: instance name """ return self._UnlockedGetInstanceName(inst_uuid) @locking.ssynchronized(_config_lock, shared=1) def GetInstanceNames(self, inst_uuids): """Gets the instance names for the passed list of nodes. @param inst_uuids: list of instance UUIDs to get names for @type inst_uuids: list of strings @rtype: list of strings @return: list of instance names """ return self._UnlockedGetInstanceNames(inst_uuids) def _UnlockedGetInstanceNames(self, inst_uuids): return [self._UnlockedGetInstanceName(uuid) for uuid in inst_uuids] @locking.ssynchronized(_config_lock) def AddNode(self, node, ec_id): """Add a node to the configuration. @type node: L{objects.Node} @param node: a Node instance """ logging.info("Adding node %s to configuration", node.name) self._EnsureUUID(node, ec_id) node.serial_no = 1 node.ctime = node.mtime = time.time() self._UnlockedAddNodeToGroup(node.uuid, node.group) self._config_data.nodes[node.uuid] = node self._config_data.cluster.serial_no += 1 self._WriteConfig() @locking.ssynchronized(_config_lock) def RemoveNode(self, node_uuid): """Remove a node from the configuration. """ logging.info("Removing node %s from configuration", node_uuid) if node_uuid not in self._config_data.nodes: raise errors.ConfigurationError("Unknown node '%s'" % node_uuid) self._UnlockedRemoveNodeFromGroup(self._config_data.nodes[node_uuid]) del self._config_data.nodes[node_uuid] self._config_data.cluster.serial_no += 1 self._WriteConfig() def ExpandNodeName(self, short_name): """Attempt to expand an incomplete node name into a node UUID. """ # Locking is done in L{ConfigWriter.GetAllNodesInfo} all_nodes = self.GetAllNodesInfo().values() expanded_name = _MatchNameComponentIgnoreCase( short_name, [node.name for node in all_nodes]) if expanded_name is not None: # there has to be exactly one node with that name node = (filter(lambda n: n.name == expanded_name, all_nodes)[0]) return (node.uuid, node.name) else: return (None, None) def _UnlockedGetNodeInfo(self, node_uuid): """Get the configuration of a node, as stored in the config. This function is for internal use, when the config lock is already held. @param node_uuid: the node UUID @rtype: L{objects.Node} @return: the node object """ if node_uuid not in self._config_data.nodes: return None return self._config_data.nodes[node_uuid] @locking.ssynchronized(_config_lock, shared=1) def GetNodeInfo(self, node_uuid): """Get the configuration of a node, as stored in the config. This is just a locked wrapper over L{_UnlockedGetNodeInfo}. @param node_uuid: the node UUID @rtype: L{objects.Node} @return: the node object """ return self._UnlockedGetNodeInfo(node_uuid) @locking.ssynchronized(_config_lock, shared=1) def GetNodeInstances(self, node_uuid): """Get the instances of a node, as stored in the config. @param node_uuid: the node UUID @rtype: (list, list) @return: a tuple with two lists: the primary and the secondary instances """ pri = [] sec = [] for inst in self._config_data.instances.values(): if inst.primary_node == node_uuid: pri.append(inst.uuid) if node_uuid in inst.secondary_nodes: sec.append(inst.uuid) return (pri, sec) @locking.ssynchronized(_config_lock, shared=1) def GetNodeGroupInstances(self, uuid, primary_only=False): """Get the instances of a node group. @param uuid: Node group UUID @param primary_only: Whether to only consider primary nodes @rtype: frozenset @return: List of instance UUIDs in node group """ if primary_only: nodes_fn = lambda inst: [inst.primary_node] else: nodes_fn = lambda inst: inst.all_nodes return frozenset(inst.uuid for inst in self._config_data.instances.values() for node_uuid in nodes_fn(inst) if self._UnlockedGetNodeInfo(node_uuid).group == uuid) def _UnlockedGetHvparamsString(self, hvname): """Return the string representation of the list of hyervisor parameters of the given hypervisor. @see: C{GetHvparams} """ result = "" hvparams = self._config_data.cluster.hvparams[hvname] for key in hvparams: result += "%s=%s\n" % (key, hvparams[key]) return result @locking.ssynchronized(_config_lock, shared=1) def GetHvparamsString(self, hvname): """Return the hypervisor parameters of the given hypervisor. @type hvname: string @param hvname: name of a hypervisor @rtype: string @return: string containing key-value-pairs, one pair on each line; format: KEY=VALUE """ return self._UnlockedGetHvparamsString(hvname) def _UnlockedGetNodeList(self): """Return the list of nodes which are in the configuration. This function is for internal use, when the config lock is already held. @rtype: list """ return self._config_data.nodes.keys() @locking.ssynchronized(_config_lock, shared=1) def GetNodeList(self): """Return the list of nodes which are in the configuration. """ return self._UnlockedGetNodeList() def _UnlockedGetOnlineNodeList(self): """Return the list of nodes which are online. """ all_nodes = [self._UnlockedGetNodeInfo(node) for node in self._UnlockedGetNodeList()] return [node.uuid for node in all_nodes if not node.offline] @locking.ssynchronized(_config_lock, shared=1) def GetOnlineNodeList(self): """Return the list of nodes which are online. """ return self._UnlockedGetOnlineNodeList() @locking.ssynchronized(_config_lock, shared=1) def GetVmCapableNodeList(self): """Return the list of nodes which are not vm capable. """ all_nodes = [self._UnlockedGetNodeInfo(node) for node in self._UnlockedGetNodeList()] return [node.uuid for node in all_nodes if node.vm_capable] @locking.ssynchronized(_config_lock, shared=1) def GetNonVmCapableNodeList(self): """Return the list of nodes which are not vm capable. """ all_nodes = [self._UnlockedGetNodeInfo(node) for node in self._UnlockedGetNodeList()] return [node.uuid for node in all_nodes if not node.vm_capable] @locking.ssynchronized(_config_lock, shared=1) def GetMultiNodeInfo(self, node_uuids): """Get the configuration of multiple nodes. @param node_uuids: list of node UUIDs @rtype: list @return: list of tuples of (node, node_info), where node_info is what would GetNodeInfo return for the node, in the original order """ return [(uuid, self._UnlockedGetNodeInfo(uuid)) for uuid in node_uuids] def _UnlockedGetAllNodesInfo(self): """Gets configuration of all nodes. @note: See L{GetAllNodesInfo} """ return dict([(node_uuid, self._UnlockedGetNodeInfo(node_uuid)) for node_uuid in self._UnlockedGetNodeList()]) @locking.ssynchronized(_config_lock, shared=1) def GetAllNodesInfo(self): """Get the configuration of all nodes. @rtype: dict @return: dict of (node, node_info), where node_info is what would GetNodeInfo return for the node """ return self._UnlockedGetAllNodesInfo() def _UnlockedGetNodeInfoByName(self, node_name): for node in self._UnlockedGetAllNodesInfo().values(): if node.name == node_name: return node return None @locking.ssynchronized(_config_lock, shared=1) def GetNodeInfoByName(self, node_name): """Get the L{objects.Node} object for a named node. @param node_name: name of the node to get information for @type node_name: string @return: the corresponding L{objects.Node} instance or None if no information is available """ return self._UnlockedGetNodeInfoByName(node_name) def _UnlockedGetNodeName(self, node_spec): if isinstance(node_spec, objects.Node): return node_spec.name elif isinstance(node_spec, basestring): node_info = self._UnlockedGetNodeInfo(node_spec) if node_info is None: raise errors.OpExecError("Unknown node: %s" % node_spec) return node_info.name else: raise errors.ProgrammerError("Can't handle node spec '%s'" % node_spec) @locking.ssynchronized(_config_lock, shared=1) def GetNodeName(self, node_spec): """Gets the node name for the passed node. @param node_spec: node to get names for @type node_spec: either node UUID or a L{objects.Node} object @rtype: string @return: node name """ return self._UnlockedGetNodeName(node_spec) def _UnlockedGetNodeNames(self, node_specs): return [self._UnlockedGetNodeName(node_spec) for node_spec in node_specs] @locking.ssynchronized(_config_lock, shared=1) def GetNodeNames(self, node_specs): """Gets the node names for the passed list of nodes. @param node_specs: list of nodes to get names for @type node_specs: list of either node UUIDs or L{objects.Node} objects @rtype: list of strings @return: list of node names """ return self._UnlockedGetNodeNames(node_specs) @locking.ssynchronized(_config_lock, shared=1) def GetNodeGroupsFromNodes(self, node_uuids): """Returns groups for a list of nodes. @type node_uuids: list of string @param node_uuids: List of node UUIDs @rtype: frozenset """ return frozenset(self._UnlockedGetNodeInfo(uuid).group for uuid in node_uuids) def _UnlockedGetMasterCandidateStats(self, exceptions=None): """Get the number of current and maximum desired and possible candidates. @type exceptions: list @param exceptions: if passed, list of nodes that should be ignored @rtype: tuple @return: tuple of (current, desired and possible, possible) """ mc_now = mc_should = mc_max = 0 for node in self._config_data.nodes.values(): if exceptions and node.uuid in exceptions: continue if not (node.offline or node.drained) and node.master_capable: mc_max += 1 if node.master_candidate: mc_now += 1 mc_should = min(mc_max, self._config_data.cluster.candidate_pool_size) return (mc_now, mc_should, mc_max) @locking.ssynchronized(_config_lock, shared=1) def GetMasterCandidateStats(self, exceptions=None): """Get the number of current and maximum possible candidates. This is just a wrapper over L{_UnlockedGetMasterCandidateStats}. @type exceptions: list @param exceptions: if passed, list of nodes that should be ignored @rtype: tuple @return: tuple of (current, max) """ return self._UnlockedGetMasterCandidateStats(exceptions) @locking.ssynchronized(_config_lock) def MaintainCandidatePool(self, exception_node_uuids): """Try to grow the candidate pool to the desired size. @type exception_node_uuids: list @param exception_node_uuids: if passed, list of nodes that should be ignored @rtype: list @return: list with the adjusted nodes (L{objects.Node} instances) """ mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats( exception_node_uuids) mod_list = [] if mc_now < mc_max: node_list = self._config_data.nodes.keys() random.shuffle(node_list) for uuid in node_list: if mc_now >= mc_max: break node = self._config_data.nodes[uuid] if (node.master_candidate or node.offline or node.drained or node.uuid in exception_node_uuids or not node.master_capable): continue mod_list.append(node) node.master_candidate = True node.serial_no += 1 mc_now += 1 if mc_now != mc_max: # this should not happen logging.warning("Warning: MaintainCandidatePool didn't manage to" " fill the candidate pool (%d/%d)", mc_now, mc_max) if mod_list: self._config_data.cluster.serial_no += 1 self._WriteConfig() return mod_list def _UnlockedAddNodeToGroup(self, node_uuid, nodegroup_uuid): """Add a given node to the specified group. """ if nodegroup_uuid not in self._config_data.nodegroups: # This can happen if a node group gets deleted between its lookup and # when we're adding the first node to it, since we don't keep a lock in # the meantime. It's ok though, as we'll fail cleanly if the node group # is not found anymore. raise errors.OpExecError("Unknown node group: %s" % nodegroup_uuid) if node_uuid not in self._config_data.nodegroups[nodegroup_uuid].members: self._config_data.nodegroups[nodegroup_uuid].members.append(node_uuid) def _UnlockedRemoveNodeFromGroup(self, node): """Remove a given node from its group. """ nodegroup = node.group if nodegroup not in self._config_data.nodegroups: logging.warning("Warning: node '%s' has unknown node group '%s'" " (while being removed from it)", node.uuid, nodegroup) nodegroup_obj = self._config_data.nodegroups[nodegroup] if node.uuid not in nodegroup_obj.members: logging.warning("Warning: node '%s' not a member of its node group '%s'" " (while being removed from it)", node.uuid, nodegroup) else: nodegroup_obj.members.remove(node.uuid) @locking.ssynchronized(_config_lock) def AssignGroupNodes(self, mods): """Changes the group of a number of nodes. @type mods: list of tuples; (node name, new group UUID) @param mods: Node membership modifications """ groups = self._config_data.nodegroups nodes = self._config_data.nodes resmod = [] # Try to resolve UUIDs first for (node_uuid, new_group_uuid) in mods: try: node = nodes[node_uuid] except KeyError: raise errors.ConfigurationError("Unable to find node '%s'" % node_uuid) if node.group == new_group_uuid: # Node is being assigned to its current group logging.debug("Node '%s' was assigned to its current group (%s)", node_uuid, node.group) continue # Try to find current group of node try: old_group = groups[node.group] except KeyError: raise errors.ConfigurationError("Unable to find old group '%s'" % node.group) # Try to find new group for node try: new_group = groups[new_group_uuid] except KeyError: raise errors.ConfigurationError("Unable to find new group '%s'" % new_group_uuid) assert node.uuid in old_group.members, \ ("Inconsistent configuration: node '%s' not listed in members for its" " old group '%s'" % (node.uuid, old_group.uuid)) assert node.uuid not in new_group.members, \ ("Inconsistent configuration: node '%s' already listed in members for" " its new group '%s'" % (node.uuid, new_group.uuid)) resmod.append((node, old_group, new_group)) # Apply changes for (node, old_group, new_group) in resmod: assert node.uuid != new_group.uuid and old_group.uuid != new_group.uuid, \ "Assigning to current group is not possible" node.group = new_group.uuid # Update members of involved groups if node.uuid in old_group.members: old_group.members.remove(node.uuid) if node.uuid not in new_group.members: new_group.members.append(node.uuid) # Update timestamps and serials (only once per node/group object) now = time.time() for obj in frozenset(itertools.chain(*resmod)): # pylint: disable=W0142 obj.serial_no += 1 obj.mtime = now # Force ssconf update self._config_data.cluster.serial_no += 1 self._WriteConfig() def _BumpSerialNo(self): """Bump up the serial number of the config. """ self._config_data.serial_no += 1 self._config_data.mtime = time.time() def _AllUUIDObjects(self): """Returns all objects with uuid attributes. """ return (self._config_data.instances.values() + self._config_data.nodes.values() + self._config_data.nodegroups.values() + self._config_data.networks.values() + self._AllDisks() + self._AllNICs() + [self._config_data.cluster]) def _OpenConfig(self, accept_foreign): """Read the config data from disk. """ raw_data = utils.ReadFile(self._cfg_file) try: data = objects.ConfigData.FromDict(serializer.Load(raw_data)) except Exception, err: raise errors.ConfigurationError(err) # Make sure the configuration has the right version _ValidateConfig(data) if (not hasattr(data, "cluster") or not hasattr(data.cluster, "rsahostkeypub")): raise errors.ConfigurationError("Incomplete configuration" " (missing cluster.rsahostkeypub)") if not data.cluster.master_node in data.nodes: msg = ("The configuration denotes node %s as master, but does not" " contain information about this node" % data.cluster.master_node) raise errors.ConfigurationError(msg) master_info = data.nodes[data.cluster.master_node] if master_info.name != self._my_hostname and not accept_foreign: msg = ("The configuration denotes node %s as master, while my" " hostname is %s; opening a foreign configuration is only" " possible in accept_foreign mode" % (master_info.name, self._my_hostname)) raise errors.ConfigurationError(msg) self._config_data = data # reset the last serial as -1 so that the next write will cause # ssconf update self._last_cluster_serial = -1 # Upgrade configuration if needed self._UpgradeConfig() self._cfg_id = utils.GetFileID(path=self._cfg_file) def _UpgradeConfig(self): """Run any upgrade steps. This method performs both in-object upgrades and also update some data elements that need uniqueness across the whole configuration or interact with other objects. @warning: this function will call L{_WriteConfig()}, but also L{DropECReservations} so it needs to be called only from a "safe" place (the constructor). If one wanted to call it with the lock held, a DropECReservationUnlocked would need to be created first, to avoid causing deadlock. """ # Keep a copy of the persistent part of _config_data to check for changes # Serialization doesn't guarantee order in dictionaries oldconf = copy.deepcopy(self._config_data.ToDict()) # In-object upgrades self._config_data.UpgradeConfig() for item in self._AllUUIDObjects(): if item.uuid is None: item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID) if not self._config_data.nodegroups: default_nodegroup_name = constants.INITIAL_NODE_GROUP_NAME default_nodegroup = objects.NodeGroup(name=default_nodegroup_name, members=[]) self._UnlockedAddNodeGroup(default_nodegroup, _UPGRADE_CONFIG_JID, True) for node in self._config_data.nodes.values(): if not node.group: node.group = self.LookupNodeGroup(None) # This is technically *not* an upgrade, but needs to be done both when # nodegroups are being added, and upon normally loading the config, # because the members list of a node group is discarded upon # serializing/deserializing the object. self._UnlockedAddNodeToGroup(node.uuid, node.group) modified = (oldconf != self._config_data.ToDict()) if modified: self._WriteConfig() # This is ok even if it acquires the internal lock, as _UpgradeConfig is # only called at config init time, without the lock held self.DropECReservations(_UPGRADE_CONFIG_JID) else: config_errors = self._UnlockedVerifyConfig() if config_errors: errmsg = ("Loaded configuration data is not consistent: %s" % (utils.CommaJoin(config_errors))) logging.critical(errmsg) def _DistributeConfig(self, feedback_fn): """Distribute the configuration to the other nodes. Currently, this only copies the configuration file. In the future, it could be used to encapsulate the 2/3-phase update mechanism. """ if self._offline: return True bad = False node_list = [] addr_list = [] myhostname = self._my_hostname # we can skip checking whether _UnlockedGetNodeInfo returns None # since the node list comes from _UnlocketGetNodeList, and we are # called with the lock held, so no modifications should take place # in between for node_uuid in self._UnlockedGetNodeList(): node_info = self._UnlockedGetNodeInfo(node_uuid) if node_info.name == myhostname or not node_info.master_candidate: continue node_list.append(node_info.name) addr_list.append(node_info.primary_ip) # TODO: Use dedicated resolver talking to config writer for name resolution result = \ self._GetRpc(addr_list).call_upload_file(node_list, self._cfg_file) for to_node, to_result in result.items(): msg = to_result.fail_msg if msg: msg = ("Copy of file %s to node %s failed: %s" % (self._cfg_file, to_node, msg)) logging.error(msg) if feedback_fn: feedback_fn(msg) bad = True return not bad def _WriteConfig(self, destination=None, feedback_fn=None): """Write the configuration data to persistent storage. """ assert feedback_fn is None or callable(feedback_fn) # Warn on config errors, but don't abort the save - the # configuration has already been modified, and we can't revert; # the best we can do is to warn the user and save as is, leaving # recovery to the user config_errors = self._UnlockedVerifyConfig() if config_errors: errmsg = ("Configuration data is not consistent: %s" % (utils.CommaJoin(config_errors))) logging.critical(errmsg) if feedback_fn: feedback_fn(errmsg) if destination is None: destination = self._cfg_file self._BumpSerialNo() txt = serializer.Dump(self._config_data.ToDict()) getents = self._getents() try: fd = utils.SafeWriteFile(destination, self._cfg_id, data=txt, close=False, gid=getents.confd_gid, mode=0640) except errors.LockError: raise errors.ConfigurationError("The configuration file has been" " modified since the last write, cannot" " update") try: self._cfg_id = utils.GetFileID(fd=fd) finally: os.close(fd) self.write_count += 1 # and redistribute the config file to master candidates self._DistributeConfig(feedback_fn) # Write ssconf files on all nodes (including locally) if self._last_cluster_serial < self._config_data.cluster.serial_no: if not self._offline: result = self._GetRpc(None).call_write_ssconf_files( self._UnlockedGetNodeNames(self._UnlockedGetOnlineNodeList()), self._UnlockedGetSsconfValues()) for nname, nresu in result.items(): msg = nresu.fail_msg if msg: errmsg = ("Error while uploading ssconf files to" " node %s: %s" % (nname, msg)) logging.warning(errmsg) if feedback_fn: feedback_fn(errmsg) self._last_cluster_serial = self._config_data.cluster.serial_no def _GetAllHvparamsStrings(self, hypervisors): """Get the hvparams of all given hypervisors from the config. @type hypervisors: list of string @param hypervisors: list of hypervisor names @rtype: dict of strings @returns: dictionary mapping the hypervisor name to a string representation of the hypervisor's hvparams """ hvparams = {} for hv in hypervisors: hvparams[hv] = self._UnlockedGetHvparamsString(hv) return hvparams @staticmethod def _ExtendByAllHvparamsStrings(ssconf_values, all_hvparams): """Extends the ssconf_values dictionary by hvparams. @type ssconf_values: dict of strings @param ssconf_values: dictionary mapping ssconf_keys to strings representing the content of ssconf files @type all_hvparams: dict of strings @param all_hvparams: dictionary mapping hypervisor names to a string representation of their hvparams @rtype: same as ssconf_values @returns: the ssconf_values dictionary extended by hvparams """ for hv in all_hvparams: ssconf_key = constants.SS_HVPARAMS_PREF + hv ssconf_values[ssconf_key] = all_hvparams[hv] return ssconf_values def _UnlockedGetSsconfValues(self): """Return the values needed by ssconf. @rtype: dict @return: a dictionary with keys the ssconf names and values their associated value """ fn = "\n".join instance_names = utils.NiceSort( [inst.name for inst in self._UnlockedGetAllInstancesInfo().values()]) node_infos = self._UnlockedGetAllNodesInfo().values() node_names = [node.name for node in node_infos] node_pri_ips = ["%s %s" % (ninfo.name, ninfo.primary_ip) for ninfo in node_infos] node_snd_ips = ["%s %s" % (ninfo.name, ninfo.secondary_ip) for ninfo in node_infos] instance_data = fn(instance_names) off_data = fn(node.name for node in node_infos if node.offline) on_data = fn(node.name for node in node_infos if not node.offline) mc_data = fn(node.name for node in node_infos if node.master_candidate) mc_ips_data = fn(node.primary_ip for node in node_infos if node.master_candidate) node_data = fn(node_names) node_pri_ips_data = fn(node_pri_ips) node_snd_ips_data = fn(node_snd_ips) cluster = self._config_data.cluster cluster_tags = fn(cluster.GetTags()) hypervisor_list = fn(cluster.enabled_hypervisors) all_hvparams = self._GetAllHvparamsStrings(constants.HYPER_TYPES) uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n") nodegroups = ["%s %s" % (nodegroup.uuid, nodegroup.name) for nodegroup in self._config_data.nodegroups.values()] nodegroups_data = fn(utils.NiceSort(nodegroups)) networks = ["%s %s" % (net.uuid, net.name) for net in self._config_data.networks.values()] networks_data = fn(utils.NiceSort(networks)) ssconf_values = { constants.SS_CLUSTER_NAME: cluster.cluster_name, constants.SS_CLUSTER_TAGS: cluster_tags, constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir, constants.SS_SHARED_FILE_STORAGE_DIR: cluster.shared_file_storage_dir, constants.SS_MASTER_CANDIDATES: mc_data, constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data, constants.SS_MASTER_IP: cluster.master_ip, constants.SS_MASTER_NETDEV: cluster.master_netdev, constants.SS_MASTER_NETMASK: str(cluster.master_netmask), constants.SS_MASTER_NODE: self._UnlockedGetNodeName(cluster.master_node), constants.SS_NODE_LIST: node_data, constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data, constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data, constants.SS_OFFLINE_NODES: off_data, constants.SS_ONLINE_NODES: on_data, constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family), constants.SS_INSTANCE_LIST: instance_data, constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION, constants.SS_HYPERVISOR_LIST: hypervisor_list, constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health), constants.SS_UID_POOL: uid_pool, constants.SS_NODEGROUPS: nodegroups_data, constants.SS_NETWORKS: networks_data, } ssconf_values = self._ExtendByAllHvparamsStrings(ssconf_values, all_hvparams) bad_values = [(k, v) for k, v in ssconf_values.items() if not isinstance(v, (str, basestring))] if bad_values: err = utils.CommaJoin("%s=%s" % (k, v) for k, v in bad_values) raise errors.ConfigurationError("Some ssconf key(s) have non-string" " values: %s" % err) return ssconf_values @locking.ssynchronized(_config_lock, shared=1) def GetSsconfValues(self): """Wrapper using lock around _UnlockedGetSsconf(). """ return self._UnlockedGetSsconfValues() @locking.ssynchronized(_config_lock, shared=1) def GetVGName(self): """Return the volume group name. """ return self._config_data.cluster.volume_group_name @locking.ssynchronized(_config_lock) def SetVGName(self, vg_name): """Set the volume group name. """ self._config_data.cluster.volume_group_name = vg_name self._config_data.cluster.serial_no += 1 self._WriteConfig() @locking.ssynchronized(_config_lock, shared=1) def GetDRBDHelper(self): """Return DRBD usermode helper. """ return self._config_data.cluster.drbd_usermode_helper @locking.ssynchronized(_config_lock) def SetDRBDHelper(self, drbd_helper): """Set DRBD usermode helper. """ self._config_data.cluster.drbd_usermode_helper = drbd_helper self._config_data.cluster.serial_no += 1 self._WriteConfig() @locking.ssynchronized(_config_lock, shared=1) def GetMACPrefix(self): """Return the mac prefix. """ return self._config_data.cluster.mac_prefix @locking.ssynchronized(_config_lock, shared=1) def GetClusterInfo(self): """Returns information about the cluster @rtype: L{objects.Cluster} @return: the cluster object """ return self._config_data.cluster @locking.ssynchronized(_config_lock, shared=1) def HasAnyDiskOfType(self, dev_type): """Check if in there is at disk of the given type in the configuration. """ return self._config_data.HasAnyDiskOfType(dev_type) @locking.ssynchronized(_config_lock) def Update(self, target, feedback_fn, ec_id=None): """Notify function to be called after updates. This function must be called when an object (as returned by GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the caller wants the modifications saved to the backing store. Note that all modified objects will be saved, but the target argument is the one the caller wants to ensure that it's saved. @param target: an instance of either L{objects.Cluster}, L{objects.Node} or L{objects.Instance} which is existing in the cluster @param feedback_fn: Callable feedback function """ if self._config_data is None: raise errors.ProgrammerError("Configuration file not read," " cannot save.") update_serial = False if isinstance(target, objects.Cluster): test = target == self._config_data.cluster elif isinstance(target, objects.Node): test = target in self._config_data.nodes.values() update_serial = True elif isinstance(target, objects.Instance): test = target in self._config_data.instances.values() elif isinstance(target, objects.NodeGroup): test = target in self._config_data.nodegroups.values() elif isinstance(target, objects.Network): test = target in self._config_data.networks.values() else: raise errors.ProgrammerError("Invalid object type (%s) passed to" " ConfigWriter.Update" % type(target)) if not test: raise errors.ConfigurationError("Configuration updated since object" " has been read or unknown object") target.serial_no += 1 target.mtime = now = time.time() if update_serial: # for node updates, we need to increase the cluster serial too self._config_data.cluster.serial_no += 1 self._config_data.cluster.mtime = now if isinstance(target, objects.Instance): self._UnlockedReleaseDRBDMinors(target.uuid) if ec_id is not None: # Commit all ips reserved by OpInstanceSetParams and OpGroupSetParams self._UnlockedCommitTemporaryIps(ec_id) self._WriteConfig(feedback_fn=feedback_fn) @locking.ssynchronized(_config_lock) def DropECReservations(self, ec_id): """Drop per-execution-context reservations """ for rm in self._all_rms: rm.DropECReservations(ec_id) @locking.ssynchronized(_config_lock, shared=1) def GetAllNetworksInfo(self): """Get configuration info of all the networks. """ return dict(self._config_data.networks) def _UnlockedGetNetworkList(self): """Get the list of networks. This function is for internal use, when the config lock is already held. """ return self._config_data.networks.keys() @locking.ssynchronized(_config_lock, shared=1) def GetNetworkList(self): """Get the list of networks. @return: array of networks, ex. ["main", "vlan100", "200] """ return self._UnlockedGetNetworkList() @locking.ssynchronized(_config_lock, shared=1) def GetNetworkNames(self): """Get a list of network names """ names = [net.name for net in self._config_data.networks.values()] return names def _UnlockedGetNetwork(self, uuid): """Returns information about a network. This function is for internal use, when the config lock is already held. """ if uuid not in self._config_data.networks: return None return self._config_data.networks[uuid] @locking.ssynchronized(_config_lock, shared=1) def GetNetwork(self, uuid): """Returns information about a network. It takes the information from the configuration file. @param uuid: UUID of the network @rtype: L{objects.Network} @return: the network object """ return self._UnlockedGetNetwork(uuid) @locking.ssynchronized(_config_lock) def AddNetwork(self, net, ec_id, check_uuid=True): """Add a network to the configuration. @type net: L{objects.Network} @param net: the Network object to add @type ec_id: string @param ec_id: unique id for the job to use when creating a missing UUID """ self._UnlockedAddNetwork(net, ec_id, check_uuid) self._WriteConfig() def _UnlockedAddNetwork(self, net, ec_id, check_uuid): """Add a network to the configuration. """ logging.info("Adding network %s to configuration", net.name) if check_uuid: self._EnsureUUID(net, ec_id) net.serial_no = 1 net.ctime = net.mtime = time.time() self._config_data.networks[net.uuid] = net self._config_data.cluster.serial_no += 1 def _UnlockedLookupNetwork(self, target): """Lookup a network's UUID. @type target: string @param target: network name or UUID @rtype: string @return: network UUID @raises errors.OpPrereqError: when the target network cannot be found """ if target is None: return None if target in self._config_data.networks: return target for net in self._config_data.networks.values(): if net.name == target: return net.uuid raise errors.OpPrereqError("Network '%s' not found" % target, errors.ECODE_NOENT) @locking.ssynchronized(_config_lock, shared=1) def LookupNetwork(self, target): """Lookup a network's UUID. This function is just a wrapper over L{_UnlockedLookupNetwork}. @type target: string @param target: network name or UUID @rtype: string @return: network UUID """ return self._UnlockedLookupNetwork(target) @locking.ssynchronized(_config_lock) def RemoveNetwork(self, network_uuid): """Remove a network from the configuration. @type network_uuid: string @param network_uuid: the UUID of the network to remove """ logging.info("Removing network %s from configuration", network_uuid) if network_uuid not in self._config_data.networks: raise errors.ConfigurationError("Unknown network '%s'" % network_uuid) del self._config_data.networks[network_uuid] self._config_data.cluster.serial_no += 1 self._WriteConfig() def _UnlockedGetGroupNetParams(self, net_uuid, node_uuid): """Get the netparams (mode, link) of a network. Get a network's netparams for a given node. @type net_uuid: string @param net_uuid: network uuid @type node_uuid: string @param node_uuid: node UUID @rtype: dict or None @return: netparams """ node_info = self._UnlockedGetNodeInfo(node_uuid) nodegroup_info = self._UnlockedGetNodeGroup(node_info.group) netparams = nodegroup_info.networks.get(net_uuid, None) return netparams @locking.ssynchronized(_config_lock, shared=1) def GetGroupNetParams(self, net_uuid, node_uuid): """Locking wrapper of _UnlockedGetGroupNetParams() """ return self._UnlockedGetGroupNetParams(net_uuid, node_uuid) @locking.ssynchronized(_config_lock, shared=1) def CheckIPInNodeGroup(self, ip, node_uuid): """Check IP uniqueness in nodegroup. Check networks that are connected in the node's node group if ip is contained in any of them. Used when creating/adding a NIC to ensure uniqueness among nodegroups. @type ip: string @param ip: ip address @type node_uuid: string @param node_uuid: node UUID @rtype: (string, dict) or (None, None) @return: (network name, netparams) """ if ip is None: return (None, None) node_info = self._UnlockedGetNodeInfo(node_uuid) nodegroup_info = self._UnlockedGetNodeGroup(node_info.group) for net_uuid in nodegroup_info.networks.keys(): net_info = self._UnlockedGetNetwork(net_uuid) pool = network.AddressPool(net_info) if pool.Contains(ip): return (net_info.name, nodegroup_info.networks[net_uuid]) return (None, None) ganeti-2.9.3/lib/client/0000755000000000000000000000000012271445544015026 5ustar00rootroot00000000000000ganeti-2.9.3/lib/client/gnt_node.py0000644000000000000000000011236512271422343017175 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Node related commands""" # pylint: disable=W0401,W0613,W0614,C0103 # W0401: Wildcard import ganeti.cli # W0613: Unused argument, since all functions follow the same API # W0614: Unused import %s from wildcard import (since we need cli) # C0103: Invalid name gnt-node import itertools import errno from ganeti.cli import * from ganeti import cli from ganeti import bootstrap from ganeti import opcodes from ganeti import utils from ganeti import constants from ganeti import errors from ganeti import netutils from ganeti import pathutils from ganeti import ssh from ganeti import compat from ganeti import confd from ganeti.confd import client as confd_client #: default list of field for L{ListNodes} _LIST_DEF_FIELDS = [ "name", "dtotal", "dfree", "mtotal", "mnode", "mfree", "pinst_cnt", "sinst_cnt", ] #: Default field list for L{ListVolumes} _LIST_VOL_DEF_FIELDS = ["node", "phys", "vg", "name", "size", "instance"] #: default list of field for L{ListStorage} _LIST_STOR_DEF_FIELDS = [ constants.SF_NODE, constants.SF_TYPE, constants.SF_NAME, constants.SF_SIZE, constants.SF_USED, constants.SF_FREE, constants.SF_ALLOCATABLE, ] #: default list of power commands _LIST_POWER_COMMANDS = ["on", "off", "cycle", "status"] #: headers (and full field list) for L{ListStorage} _LIST_STOR_HEADERS = { constants.SF_NODE: "Node", constants.SF_TYPE: "Type", constants.SF_NAME: "Name", constants.SF_SIZE: "Size", constants.SF_USED: "Used", constants.SF_FREE: "Free", constants.SF_ALLOCATABLE: "Allocatable", } #: User-facing storage unit types _USER_STORAGE_TYPE = { constants.ST_FILE: "file", constants.ST_LVM_PV: "lvm-pv", constants.ST_LVM_VG: "lvm-vg", } _STORAGE_TYPE_OPT = \ cli_option("-t", "--storage-type", dest="user_storage_type", choices=_USER_STORAGE_TYPE.keys(), default=None, metavar="STORAGE_TYPE", help=("Storage type (%s)" % utils.CommaJoin(_USER_STORAGE_TYPE.keys()))) _REPAIRABLE_STORAGE_TYPES = \ [st for st, so in constants.VALID_STORAGE_OPERATIONS.iteritems() if constants.SO_FIX_CONSISTENCY in so] _MODIFIABLE_STORAGE_TYPES = constants.MODIFIABLE_STORAGE_FIELDS.keys() _OOB_COMMAND_ASK = compat.UniqueFrozenset([ constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE, ]) _ENV_OVERRIDE = compat.UniqueFrozenset(["list"]) NONODE_SETUP_OPT = cli_option("--no-node-setup", default=True, action="store_false", dest="node_setup", help=("Do not make initial SSH setup on remote" " node (needs to be done manually)")) IGNORE_STATUS_OPT = cli_option("--ignore-status", default=False, action="store_true", dest="ignore_status", help=("Ignore the Node(s) offline status" " (potentially DANGEROUS)")) def ConvertStorageType(user_storage_type): """Converts a user storage type to its internal name. """ try: return _USER_STORAGE_TYPE[user_storage_type] except KeyError: raise errors.OpPrereqError("Unknown storage type: %s" % user_storage_type, errors.ECODE_INVAL) def _TryReadFile(path): """Tries to read a file. If the file is not found, C{None} is returned. @type path: string @param path: Filename @rtype: None or string @todo: Consider adding a generic ENOENT wrapper """ try: return utils.ReadFile(path) except EnvironmentError, err: if err.errno == errno.ENOENT: return None else: raise def _ReadSshKeys(keyfiles, _tostderr_fn=ToStderr): """Reads SSH keys according to C{keyfiles}. @type keyfiles: dict @param keyfiles: Dictionary with keys of L{constants.SSHK_ALL} and two-values tuples (private and public key file) @rtype: list @return: List of three-values tuples (L{constants.SSHK_ALL}, private and public key as strings) """ result = [] for (kind, (private_file, public_file)) in keyfiles.items(): private_key = _TryReadFile(private_file) public_key = _TryReadFile(public_file) if public_key and private_key: result.append((kind, private_key, public_key)) elif public_key or private_key: _tostderr_fn("Couldn't find a complete set of keys for kind '%s'; files" " '%s' and '%s'", kind, private_file, public_file) return result def _SetupSSH(options, cluster_name, node): """Configures a destination node's SSH daemon. @param options: Command line options @type cluster_name @param cluster_name: Cluster name @type node: string @param node: Destination node name """ if options.force_join: ToStderr("The \"--force-join\" option is no longer supported and will be" " ignored.") host_keys = _ReadSshKeys(constants.SSH_DAEMON_KEYFILES) (_, root_keyfiles) = \ ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) root_keys = _ReadSshKeys(root_keyfiles) (_, cert_pem) = \ utils.ExtractX509Certificate(utils.ReadFile(pathutils.NODED_CERT_FILE)) data = { constants.SSHS_CLUSTER_NAME: cluster_name, constants.SSHS_NODE_DAEMON_CERTIFICATE: cert_pem, constants.SSHS_SSH_HOST_KEY: host_keys, constants.SSHS_SSH_ROOT_KEY: root_keys, } bootstrap.RunNodeSetupCmd(cluster_name, node, pathutils.PREPARE_NODE_JOIN, options.debug, options.verbose, False, options.ssh_key_check, options.ssh_key_check, data) @UsesRPC def AddNode(opts, args): """Add a node to the cluster. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the new node name @rtype: int @return: the desired exit code """ cl = GetClient() node = netutils.GetHostname(name=args[0]).name readd = opts.readd try: output = cl.QueryNodes(names=[node], fields=["name", "sip", "master"], use_locking=False) node_exists, sip, is_master = output[0] except (errors.OpPrereqError, errors.OpExecError): node_exists = "" sip = None if readd: if not node_exists: ToStderr("Node %s not in the cluster" " - please retry without '--readd'", node) return 1 if is_master: ToStderr("Node %s is the master, cannot readd", node) return 1 else: if node_exists: ToStderr("Node %s already in the cluster (as %s)" " - please retry with '--readd'", node, node_exists) return 1 sip = opts.secondary_ip # read the cluster name from the master (cluster_name, ) = cl.QueryConfigValues(["cluster_name"]) if not readd and opts.node_setup: ToStderr("-- WARNING -- \n" "Performing this operation is going to replace the ssh daemon" " keypair\n" "on the target machine (%s) with the ones of the" " current one\n" "and grant full intra-cluster ssh root access to/from it\n", node) if opts.node_setup: _SetupSSH(opts, cluster_name, node) bootstrap.SetupNodeDaemon(opts, cluster_name, node) if opts.disk_state: disk_state = utils.FlatToDict(opts.disk_state) else: disk_state = {} hv_state = dict(opts.hv_state) op = opcodes.OpNodeAdd(node_name=args[0], secondary_ip=sip, readd=opts.readd, group=opts.nodegroup, vm_capable=opts.vm_capable, ndparams=opts.ndparams, master_capable=opts.master_capable, disk_state=disk_state, hv_state=hv_state) SubmitOpCode(op, opts=opts) def ListNodes(opts, args): """List nodes and their properties. @param opts: the command line options selected by the user @type args: list @param args: nodes to list, or empty for all @rtype: int @return: the desired exit code """ selected_fields = ParseFields(opts.output, _LIST_DEF_FIELDS) fmtoverride = dict.fromkeys(["pinst_list", "sinst_list", "tags"], (",".join, False)) cl = GetClient(query=True) return GenericList(constants.QR_NODE, selected_fields, args, opts.units, opts.separator, not opts.no_headers, format_override=fmtoverride, verbose=opts.verbose, force_filter=opts.force_filter, cl=cl) def ListNodeFields(opts, args): """List node fields. @param opts: the command line options selected by the user @type args: list @param args: fields to list, or empty for all @rtype: int @return: the desired exit code """ cl = GetClient(query=True) return GenericListFields(constants.QR_NODE, args, opts.separator, not opts.no_headers, cl=cl) def EvacuateNode(opts, args): """Relocate all secondary instance from a node. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ if opts.dst_node is not None: ToStderr("New secondary node given (disabling iallocator), hence evacuating" " secondary instances only.") opts.secondary_only = True opts.primary_only = False if opts.secondary_only and opts.primary_only: raise errors.OpPrereqError("Only one of the --primary-only and" " --secondary-only options can be passed", errors.ECODE_INVAL) elif opts.primary_only: mode = constants.NODE_EVAC_PRI elif opts.secondary_only: mode = constants.NODE_EVAC_SEC else: mode = constants.NODE_EVAC_ALL # Determine affected instances fields = [] if not opts.secondary_only: fields.append("pinst_list") if not opts.primary_only: fields.append("sinst_list") cl = GetClient() qcl = GetClient(query=True) result = qcl.QueryNodes(names=args, fields=fields, use_locking=False) qcl.Close() instances = set(itertools.chain(*itertools.chain(*itertools.chain(result)))) if not instances: # No instances to evacuate ToStderr("No instances to evacuate on node(s) %s, exiting.", utils.CommaJoin(args)) return constants.EXIT_SUCCESS if not (opts.force or AskUser("Relocate instance(s) %s from node(s) %s?" % (utils.CommaJoin(utils.NiceSort(instances)), utils.CommaJoin(args)))): return constants.EXIT_CONFIRMATION # Evacuate node op = opcodes.OpNodeEvacuate(node_name=args[0], mode=mode, remote_node=opts.dst_node, iallocator=opts.iallocator, early_release=opts.early_release) result = SubmitOrSend(op, opts, cl=cl) # Keep track of submitted jobs jex = JobExecutor(cl=cl, opts=opts) for (status, job_id) in result[constants.JOB_IDS_KEY]: jex.AddJobId(None, status, job_id) results = jex.GetResults() bad_cnt = len([row for row in results if not row[0]]) if bad_cnt == 0: ToStdout("All instances evacuated successfully.") rcode = constants.EXIT_SUCCESS else: ToStdout("There were %s errors during the evacuation.", bad_cnt) rcode = constants.EXIT_FAILURE return rcode def FailoverNode(opts, args): """Failover all primary instance on a node. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ cl = GetClient() force = opts.force selected_fields = ["name", "pinst_list"] # these fields are static data anyway, so it doesn't matter, but # locking=True should be safer qcl = GetClient(query=True) result = cl.QueryNodes(names=args, fields=selected_fields, use_locking=False) qcl.Close() node, pinst = result[0] if not pinst: ToStderr("No primary instances on node %s, exiting.", node) return 0 pinst = utils.NiceSort(pinst) retcode = 0 if not force and not AskUser("Fail over instance(s) %s?" % (",".join("'%s'" % name for name in pinst))): return 2 jex = JobExecutor(cl=cl, opts=opts) for iname in pinst: op = opcodes.OpInstanceFailover(instance_name=iname, ignore_consistency=opts.ignore_consistency, iallocator=opts.iallocator) jex.QueueJob(iname, op) results = jex.GetResults() bad_cnt = len([row for row in results if not row[0]]) if bad_cnt == 0: ToStdout("All %d instance(s) failed over successfully.", len(results)) else: ToStdout("There were errors during the failover:\n" "%d error(s) out of %d instance(s).", bad_cnt, len(results)) return retcode def MigrateNode(opts, args): """Migrate all primary instance on a node. """ cl = GetClient() force = opts.force selected_fields = ["name", "pinst_list"] qcl = GetClient(query=True) result = cl.QueryNodes(names=args, fields=selected_fields, use_locking=False) qcl.Close() ((node, pinst), ) = result if not pinst: ToStdout("No primary instances on node %s, exiting." % node) return 0 pinst = utils.NiceSort(pinst) if not (force or AskUser("Migrate instance(s) %s?" % utils.CommaJoin(utils.NiceSort(pinst)))): return constants.EXIT_CONFIRMATION # this should be removed once --non-live is deprecated if not opts.live and opts.migration_mode is not None: raise errors.OpPrereqError("Only one of the --non-live and " "--migration-mode options can be passed", errors.ECODE_INVAL) if not opts.live: # --non-live passed mode = constants.HT_MIGRATION_NONLIVE else: mode = opts.migration_mode op = opcodes.OpNodeMigrate(node_name=args[0], mode=mode, iallocator=opts.iallocator, target_node=opts.dst_node, allow_runtime_changes=opts.allow_runtime_chgs, ignore_ipolicy=opts.ignore_ipolicy) result = SubmitOrSend(op, opts, cl=cl) # Keep track of submitted jobs jex = JobExecutor(cl=cl, opts=opts) for (status, job_id) in result[constants.JOB_IDS_KEY]: jex.AddJobId(None, status, job_id) results = jex.GetResults() bad_cnt = len([row for row in results if not row[0]]) if bad_cnt == 0: ToStdout("All instances migrated successfully.") rcode = constants.EXIT_SUCCESS else: ToStdout("There were %s errors during the node migration.", bad_cnt) rcode = constants.EXIT_FAILURE return rcode def _FormatNodeInfo(node_info): """Format node information for L{cli.PrintGenericInfo()}. """ (name, primary_ip, secondary_ip, pinst, sinst, is_mc, drained, offline, master_capable, vm_capable, powered, ndparams, ndparams_custom) = node_info info = [ ("Node name", name), ("primary ip", primary_ip), ("secondary ip", secondary_ip), ("master candidate", is_mc), ("drained", drained), ("offline", offline), ] if powered is not None: info.append(("powered", powered)) info.extend([ ("master_capable", master_capable), ("vm_capable", vm_capable), ]) if vm_capable: info.extend([ ("primary for instances", [iname for iname in utils.NiceSort(pinst)]), ("secondary for instances", [iname for iname in utils.NiceSort(sinst)]), ]) info.append(("node parameters", FormatParamsDictInfo(ndparams_custom, ndparams))) return info def ShowNodeConfig(opts, args): """Show node information. @param opts: the command line options selected by the user @type args: list @param args: should either be an empty list, in which case we show information about all nodes, or should contain a list of nodes to be queried for information @rtype: int @return: the desired exit code """ cl = GetClient(query=True) result = cl.QueryNodes(fields=["name", "pip", "sip", "pinst_list", "sinst_list", "master_candidate", "drained", "offline", "master_capable", "vm_capable", "powered", "ndparams", "custom_ndparams"], names=args, use_locking=False) PrintGenericInfo([ _FormatNodeInfo(node_info) for node_info in result ]) return 0 def RemoveNode(opts, args): """Remove a node from the cluster. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the name of the node to be removed @rtype: int @return: the desired exit code """ op = opcodes.OpNodeRemove(node_name=args[0]) SubmitOpCode(op, opts=opts) return 0 def PowercycleNode(opts, args): """Remove a node from the cluster. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the name of the node to be removed @rtype: int @return: the desired exit code """ node = args[0] if (not opts.confirm and not AskUser("Are you sure you want to hard powercycle node %s?" % node)): return 2 op = opcodes.OpNodePowercycle(node_name=node, force=opts.force) result = SubmitOrSend(op, opts) if result: ToStderr(result) return 0 def PowerNode(opts, args): """Change/ask power state of a node. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the name of the node to be removed @rtype: int @return: the desired exit code """ command = args.pop(0) if opts.no_headers: headers = None else: headers = {"node": "Node", "status": "Status"} if command not in _LIST_POWER_COMMANDS: ToStderr("power subcommand %s not supported." % command) return constants.EXIT_FAILURE oob_command = "power-%s" % command if oob_command in _OOB_COMMAND_ASK: if not args: ToStderr("Please provide at least one node for this command") return constants.EXIT_FAILURE elif not opts.force and not ConfirmOperation(args, "nodes", "power %s" % command): return constants.EXIT_FAILURE assert len(args) > 0 opcodelist = [] if not opts.ignore_status and oob_command == constants.OOB_POWER_OFF: # TODO: This is a little ugly as we can't catch and revert for node in args: opcodelist.append(opcodes.OpNodeSetParams(node_name=node, offline=True, auto_promote=opts.auto_promote)) opcodelist.append(opcodes.OpOobCommand(node_names=args, command=oob_command, ignore_status=opts.ignore_status, timeout=opts.oob_timeout, power_delay=opts.power_delay)) cli.SetGenericOpcodeOpts(opcodelist, opts) job_id = cli.SendJob(opcodelist) # We just want the OOB Opcode status # If it fails PollJob gives us the error message in it result = cli.PollJob(job_id)[-1] errs = 0 data = [] for node_result in result: (node_tuple, data_tuple) = node_result (_, node_name) = node_tuple (data_status, data_node) = data_tuple if data_status == constants.RS_NORMAL: if oob_command == constants.OOB_POWER_STATUS: if data_node[constants.OOB_POWER_STATUS_POWERED]: text = "powered" else: text = "unpowered" data.append([node_name, text]) else: # We don't expect data here, so we just say, it was successfully invoked data.append([node_name, "invoked"]) else: errs += 1 data.append([node_name, cli.FormatResultError(data_status, True)]) data = GenerateTable(separator=opts.separator, headers=headers, fields=["node", "status"], data=data) for line in data: ToStdout(line) if errs: return constants.EXIT_FAILURE else: return constants.EXIT_SUCCESS def Health(opts, args): """Show health of a node using OOB. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the name of the node to be removed @rtype: int @return: the desired exit code """ op = opcodes.OpOobCommand(node_names=args, command=constants.OOB_HEALTH, timeout=opts.oob_timeout) result = SubmitOpCode(op, opts=opts) if opts.no_headers: headers = None else: headers = {"node": "Node", "status": "Status"} errs = 0 data = [] for node_result in result: (node_tuple, data_tuple) = node_result (_, node_name) = node_tuple (data_status, data_node) = data_tuple if data_status == constants.RS_NORMAL: data.append([node_name, "%s=%s" % tuple(data_node[0])]) for item, status in data_node[1:]: data.append(["", "%s=%s" % (item, status)]) else: errs += 1 data.append([node_name, cli.FormatResultError(data_status, True)]) data = GenerateTable(separator=opts.separator, headers=headers, fields=["node", "status"], data=data) for line in data: ToStdout(line) if errs: return constants.EXIT_FAILURE else: return constants.EXIT_SUCCESS def ListVolumes(opts, args): """List logical volumes on node(s). @param opts: the command line options selected by the user @type args: list @param args: should either be an empty list, in which case we list data for all nodes, or contain a list of nodes to display data only for those @rtype: int @return: the desired exit code """ selected_fields = ParseFields(opts.output, _LIST_VOL_DEF_FIELDS) op = opcodes.OpNodeQueryvols(nodes=args, output_fields=selected_fields) output = SubmitOpCode(op, opts=opts) if not opts.no_headers: headers = {"node": "Node", "phys": "PhysDev", "vg": "VG", "name": "Name", "size": "Size", "instance": "Instance"} else: headers = None unitfields = ["size"] numfields = ["size"] data = GenerateTable(separator=opts.separator, headers=headers, fields=selected_fields, unitfields=unitfields, numfields=numfields, data=output, units=opts.units) for line in data: ToStdout(line) return 0 def ListStorage(opts, args): """List physical volumes on node(s). @param opts: the command line options selected by the user @type args: list @param args: should either be an empty list, in which case we list data for all nodes, or contain a list of nodes to display data only for those @rtype: int @return: the desired exit code """ # TODO: Default to ST_FILE if LVM is disabled on the cluster if opts.user_storage_type is None: opts.user_storage_type = constants.ST_LVM_PV storage_type = ConvertStorageType(opts.user_storage_type) selected_fields = ParseFields(opts.output, _LIST_STOR_DEF_FIELDS) op = opcodes.OpNodeQueryStorage(nodes=args, storage_type=storage_type, output_fields=selected_fields) output = SubmitOpCode(op, opts=opts) if not opts.no_headers: headers = { constants.SF_NODE: "Node", constants.SF_TYPE: "Type", constants.SF_NAME: "Name", constants.SF_SIZE: "Size", constants.SF_USED: "Used", constants.SF_FREE: "Free", constants.SF_ALLOCATABLE: "Allocatable", } else: headers = None unitfields = [constants.SF_SIZE, constants.SF_USED, constants.SF_FREE] numfields = [constants.SF_SIZE, constants.SF_USED, constants.SF_FREE] # change raw values to nicer strings for row in output: for idx, field in enumerate(selected_fields): val = row[idx] if field == constants.SF_ALLOCATABLE: if val: val = "Y" else: val = "N" row[idx] = str(val) data = GenerateTable(separator=opts.separator, headers=headers, fields=selected_fields, unitfields=unitfields, numfields=numfields, data=output, units=opts.units) for line in data: ToStdout(line) return 0 def ModifyStorage(opts, args): """Modify storage volume on a node. @param opts: the command line options selected by the user @type args: list @param args: should contain 3 items: node name, storage type and volume name @rtype: int @return: the desired exit code """ (node_name, user_storage_type, volume_name) = args storage_type = ConvertStorageType(user_storage_type) changes = {} if opts.allocatable is not None: changes[constants.SF_ALLOCATABLE] = opts.allocatable if changes: op = opcodes.OpNodeModifyStorage(node_name=node_name, storage_type=storage_type, name=volume_name, changes=changes) SubmitOrSend(op, opts) else: ToStderr("No changes to perform, exiting.") def RepairStorage(opts, args): """Repairs a storage volume on a node. @param opts: the command line options selected by the user @type args: list @param args: should contain 3 items: node name, storage type and volume name @rtype: int @return: the desired exit code """ (node_name, user_storage_type, volume_name) = args storage_type = ConvertStorageType(user_storage_type) op = opcodes.OpRepairNodeStorage(node_name=node_name, storage_type=storage_type, name=volume_name, ignore_consistency=opts.ignore_consistency) SubmitOrSend(op, opts) def SetNodeParams(opts, args): """Modifies a node. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the node name @rtype: int @return: the desired exit code """ all_changes = [opts.master_candidate, opts.drained, opts.offline, opts.master_capable, opts.vm_capable, opts.secondary_ip, opts.ndparams] if (all_changes.count(None) == len(all_changes) and not (opts.hv_state or opts.disk_state)): ToStderr("Please give at least one of the parameters.") return 1 if opts.disk_state: disk_state = utils.FlatToDict(opts.disk_state) else: disk_state = {} hv_state = dict(opts.hv_state) op = opcodes.OpNodeSetParams(node_name=args[0], master_candidate=opts.master_candidate, offline=opts.offline, drained=opts.drained, master_capable=opts.master_capable, vm_capable=opts.vm_capable, secondary_ip=opts.secondary_ip, force=opts.force, ndparams=opts.ndparams, auto_promote=opts.auto_promote, powered=opts.node_powered, hv_state=hv_state, disk_state=disk_state) # even if here we process the result, we allow submit only result = SubmitOrSend(op, opts) if result: ToStdout("Modified node %s", args[0]) for param, data in result: ToStdout(" - %-5s -> %s", param, data) return 0 def RestrictedCommand(opts, args): """Runs a remote command on node(s). @param opts: Command line options selected by user @type args: list @param args: Command line arguments @rtype: int @return: Exit code """ cl = GetClient() if len(args) > 1 or opts.nodegroup: # Expand node names nodes = GetOnlineNodes(nodes=args[1:], cl=cl, nodegroup=opts.nodegroup) else: raise errors.OpPrereqError("Node group or node names must be given", errors.ECODE_INVAL) op = opcodes.OpRestrictedCommand(command=args[0], nodes=nodes, use_locking=opts.do_locking) result = SubmitOrSend(op, opts, cl=cl) exit_code = constants.EXIT_SUCCESS for (node, (status, text)) in zip(nodes, result): ToStdout("------------------------------------------------") if status: if opts.show_machine_names: for line in text.splitlines(): ToStdout("%s: %s", node, line) else: ToStdout("Node: %s", node) ToStdout(text) else: exit_code = constants.EXIT_FAILURE ToStdout(text) return exit_code class ReplyStatus(object): """Class holding a reply status for synchronous confd clients. """ def __init__(self): self.failure = True self.answer = False def ListDrbd(opts, args): """Modifies a node. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the node name @rtype: int @return: the desired exit code """ if len(args) != 1: ToStderr("Please give one (and only one) node.") return constants.EXIT_FAILURE if not constants.ENABLE_CONFD: ToStderr("Error: this command requires confd support, but it has not" " been enabled at build time.") return constants.EXIT_FAILURE status = ReplyStatus() def ListDrbdConfdCallback(reply): """Callback for confd queries""" if reply.type == confd_client.UPCALL_REPLY: answer = reply.server_reply.answer reqtype = reply.orig_request.type if reqtype == constants.CONFD_REQ_NODE_DRBD: if reply.server_reply.status != constants.CONFD_REPL_STATUS_OK: ToStderr("Query gave non-ok status '%s': %s" % (reply.server_reply.status, reply.server_reply.answer)) status.failure = True return if not confd.HTNodeDrbd(answer): ToStderr("Invalid response from server: expected %s, got %s", confd.HTNodeDrbd, answer) status.failure = True else: status.failure = False status.answer = answer else: ToStderr("Unexpected reply %s!?", reqtype) status.failure = True node = args[0] hmac = utils.ReadFile(pathutils.CONFD_HMAC_KEY) filter_callback = confd_client.ConfdFilterCallback(ListDrbdConfdCallback) counting_callback = confd_client.ConfdCountingCallback(filter_callback) cf_client = confd_client.ConfdClient(hmac, [constants.IP4_ADDRESS_LOCALHOST], counting_callback) req = confd_client.ConfdClientRequest(type=constants.CONFD_REQ_NODE_DRBD, query=node) def DoConfdRequestReply(req): counting_callback.RegisterQuery(req.rsalt) cf_client.SendRequest(req, async=False) while not counting_callback.AllAnswered(): if not cf_client.ReceiveReply(): ToStderr("Did not receive all expected confd replies") break DoConfdRequestReply(req) if status.failure: return constants.EXIT_FAILURE fields = ["node", "minor", "instance", "disk", "role", "peer"] if opts.no_headers: headers = None else: headers = {"node": "Node", "minor": "Minor", "instance": "Instance", "disk": "Disk", "role": "Role", "peer": "PeerNode"} data = GenerateTable(separator=opts.separator, headers=headers, fields=fields, data=sorted(status.answer), numfields=["minor"]) for line in data: ToStdout(line) return constants.EXIT_SUCCESS commands = { "add": ( AddNode, [ArgHost(min=1, max=1)], [SECONDARY_IP_OPT, READD_OPT, NOSSH_KEYCHECK_OPT, NODE_FORCE_JOIN_OPT, NONODE_SETUP_OPT, VERBOSE_OPT, NODEGROUP_OPT, PRIORITY_OPT, CAPAB_MASTER_OPT, CAPAB_VM_OPT, NODE_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT], "[-s ip] [--readd] [--no-ssh-key-check] [--force-join]" " [--no-node-setup] [--verbose]" " ", "Add a node to the cluster"), "evacuate": ( EvacuateNode, ARGS_ONE_NODE, [FORCE_OPT, IALLOCATOR_OPT, NEW_SECONDARY_OPT, EARLY_RELEASE_OPT, PRIORITY_OPT, PRIMARY_ONLY_OPT, SECONDARY_ONLY_OPT] + SUBMIT_OPTS, "[-f] {-I | -n } [-p | -s] [options...] ", "Relocate the primary and/or secondary instances from a node"), "failover": ( FailoverNode, ARGS_ONE_NODE, [FORCE_OPT, IGNORE_CONSIST_OPT, IALLOCATOR_OPT, PRIORITY_OPT], "[-f] ", "Stops the primary instances on a node and start them on their" " secondary node (only for instances with drbd disk template)"), "migrate": ( MigrateNode, ARGS_ONE_NODE, [FORCE_OPT, NONLIVE_OPT, MIGRATION_MODE_OPT, DST_NODE_OPT, IALLOCATOR_OPT, PRIORITY_OPT, IGNORE_IPOLICY_OPT, NORUNTIME_CHGS_OPT] + SUBMIT_OPTS, "[-f] ", "Migrate all the primary instance on a node away from it" " (only for instances of type drbd)"), "info": ( ShowNodeConfig, ARGS_MANY_NODES, [], "[...]", "Show information about the node(s)"), "list": ( ListNodes, ARGS_MANY_NODES, [NOHDR_OPT, SEP_OPT, USEUNITS_OPT, FIELDS_OPT, VERBOSE_OPT, FORCE_FILTER_OPT], "[nodes...]", "Lists the nodes in the cluster. The available fields can be shown using" " the \"list-fields\" command (see the man page for details)." " The default field list is (in order): %s." % utils.CommaJoin(_LIST_DEF_FIELDS)), "list-fields": ( ListNodeFields, [ArgUnknown()], [NOHDR_OPT, SEP_OPT], "[fields...]", "Lists all available fields for nodes"), "modify": ( SetNodeParams, ARGS_ONE_NODE, [FORCE_OPT] + SUBMIT_OPTS + [MC_OPT, DRAINED_OPT, OFFLINE_OPT, CAPAB_MASTER_OPT, CAPAB_VM_OPT, SECONDARY_IP_OPT, AUTO_PROMOTE_OPT, DRY_RUN_OPT, PRIORITY_OPT, NODE_PARAMS_OPT, NODE_POWERED_OPT, HV_STATE_OPT, DISK_STATE_OPT], "", "Alters the parameters of a node"), "powercycle": ( PowercycleNode, ARGS_ONE_NODE, [FORCE_OPT, CONFIRM_OPT, DRY_RUN_OPT, PRIORITY_OPT] + SUBMIT_OPTS, "", "Tries to forcefully powercycle a node"), "power": ( PowerNode, [ArgChoice(min=1, max=1, choices=_LIST_POWER_COMMANDS), ArgNode()], SUBMIT_OPTS + [AUTO_PROMOTE_OPT, PRIORITY_OPT, IGNORE_STATUS_OPT, FORCE_OPT, NOHDR_OPT, SEP_OPT, OOB_TIMEOUT_OPT, POWER_DELAY_OPT], "on|off|cycle|status [nodes...]", "Change power state of node by calling out-of-band helper."), "remove": ( RemoveNode, ARGS_ONE_NODE, [DRY_RUN_OPT, PRIORITY_OPT], "", "Removes a node from the cluster"), "volumes": ( ListVolumes, [ArgNode()], [NOHDR_OPT, SEP_OPT, USEUNITS_OPT, FIELDS_OPT, PRIORITY_OPT], "[...]", "List logical volumes on node(s)"), "list-storage": ( ListStorage, ARGS_MANY_NODES, [NOHDR_OPT, SEP_OPT, USEUNITS_OPT, FIELDS_OPT, _STORAGE_TYPE_OPT, PRIORITY_OPT], "[...]", "List physical volumes on node(s). The available" " fields are (see the man page for details): %s." % (utils.CommaJoin(_LIST_STOR_HEADERS))), "modify-storage": ( ModifyStorage, [ArgNode(min=1, max=1), ArgChoice(min=1, max=1, choices=_MODIFIABLE_STORAGE_TYPES), ArgFile(min=1, max=1)], [ALLOCATABLE_OPT, DRY_RUN_OPT, PRIORITY_OPT] + SUBMIT_OPTS, " ", "Modify storage volume on a node"), "repair-storage": ( RepairStorage, [ArgNode(min=1, max=1), ArgChoice(min=1, max=1, choices=_REPAIRABLE_STORAGE_TYPES), ArgFile(min=1, max=1)], [IGNORE_CONSIST_OPT, DRY_RUN_OPT, PRIORITY_OPT] + SUBMIT_OPTS, " ", "Repairs a storage volume on a node"), "list-tags": ( ListTags, ARGS_ONE_NODE, [], "", "List the tags of the given node"), "add-tags": ( AddTags, [ArgNode(min=1, max=1), ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS, " tag...", "Add tags to the given node"), "remove-tags": ( RemoveTags, [ArgNode(min=1, max=1), ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS, " tag...", "Remove tags from the given node"), "health": ( Health, ARGS_MANY_NODES, [NOHDR_OPT, SEP_OPT, PRIORITY_OPT, OOB_TIMEOUT_OPT], "[...]", "List health of node(s) using out-of-band"), "list-drbd": ( ListDrbd, ARGS_ONE_NODE, [NOHDR_OPT, SEP_OPT], "[]", "Query the list of used DRBD minors on the given node"), "restricted-command": ( RestrictedCommand, [ArgUnknown(min=1, max=1)] + ARGS_MANY_NODES, [SYNC_OPT, PRIORITY_OPT] + SUBMIT_OPTS + [SHOW_MACHINE_OPT, NODEGROUP_OPT], " [...]", "Executes a restricted command on node(s)"), } #: dictionary with aliases for commands aliases = { "show": "info", } def Main(): return GenericMain(commands, aliases=aliases, override={"tag_type": constants.TAG_NODE}, env_override=_ENV_OVERRIDE) ganeti-2.9.3/lib/client/gnt_network.py0000644000000000000000000002761612271422343017745 0ustar00rootroot00000000000000# # # Copyright (C) 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """IP pool related commands""" # pylint: disable=W0401,W0614 # W0401: Wildcard import ganeti.cli # W0614: Unused import %s from wildcard import (since we need cli) import textwrap import itertools from ganeti.cli import * from ganeti import constants from ganeti import opcodes from ganeti import utils from ganeti import errors #: default list of fields for L{ListNetworks} _LIST_DEF_FIELDS = ["name", "network", "gateway", "mac_prefix", "group_list", "tags"] def _HandleReservedIPs(ips): if ips is None: return None elif not ips: return [] else: return utils.UnescapeAndSplit(ips, sep=",") def AddNetwork(opts, args): """Add a network to the cluster. @param opts: the command line options selected by the user @type args: list @param args: a list of length 1 with the network name to create @rtype: int @return: the desired exit code """ (network_name, ) = args if opts.network is None: raise errors.OpPrereqError("The --network option must be given", errors.ECODE_INVAL) if opts.tags is not None: tags = opts.tags.split(",") else: tags = [] reserved_ips = _HandleReservedIPs(opts.add_reserved_ips) op = opcodes.OpNetworkAdd(network_name=network_name, gateway=opts.gateway, network=opts.network, gateway6=opts.gateway6, network6=opts.network6, mac_prefix=opts.mac_prefix, add_reserved_ips=reserved_ips, conflicts_check=opts.conflicts_check, tags=tags) SubmitOrSend(op, opts) def _GetDefaultGroups(cl, groups): """Gets list of groups to operate on. If C{groups} doesn't contain groups, a list of all groups in the cluster is returned. @type cl: L{luxi.Client} @type groups: list @rtype: list """ if groups: return groups return list(itertools.chain(*cl.QueryGroups([], ["uuid"], False))) def ConnectNetwork(opts, args): """Map a network to a node group. @param opts: the command line options selected by the user @type args: list @param args: Network, mode, physlink and node groups @rtype: int @return: the desired exit code """ cl = GetClient() (network, mode, link) = args[:3] groups = _GetDefaultGroups(cl, args[3:]) # TODO: Change logic to support "--submit" for group in groups: op = opcodes.OpNetworkConnect(group_name=group, network_name=network, network_mode=mode, network_link=link, conflicts_check=opts.conflicts_check) SubmitOpCode(op, opts=opts, cl=cl) def DisconnectNetwork(opts, args): """Unmap a network from a node group. @param opts: the command line options selected by the user @type args: list @param args: Network and node groups @rtype: int @return: the desired exit code """ cl = GetClient() (network, ) = args[:1] groups = _GetDefaultGroups(cl, args[1:]) # TODO: Change logic to support "--submit" for group in groups: op = opcodes.OpNetworkDisconnect(group_name=group, network_name=network) SubmitOpCode(op, opts=opts, cl=cl) def ListNetworks(opts, args): """List Ip pools and their properties. @param opts: the command line options selected by the user @type args: list @param args: networks to list, or empty for all @rtype: int @return: the desired exit code """ desired_fields = ParseFields(opts.output, _LIST_DEF_FIELDS) fmtoverride = { "group_list": (lambda data: utils.CommaJoin("%s (%s, %s)" % (name, mode, link) for (name, mode, link) in data), False), "inst_list": (",".join, False), "tags": (",".join, False), } cl = GetClient(query=True) return GenericList(constants.QR_NETWORK, desired_fields, args, None, opts.separator, not opts.no_headers, verbose=opts.verbose, format_override=fmtoverride, cl=cl) def ListNetworkFields(opts, args): """List network fields. @param opts: the command line options selected by the user @type args: list @param args: fields to list, or empty for all @rtype: int @return: the desired exit code """ cl = GetClient(query=True) return GenericListFields(constants.QR_NETWORK, args, opts.separator, not opts.no_headers, cl=cl) def ShowNetworkConfig(_, args): """Show network information. @type args: list @param args: should either be an empty list, in which case we show information about all nodes, or should contain a list of networks (names or UUIDs) to be queried for information @rtype: int @return: the desired exit code """ cl = GetClient() result = cl.QueryNetworks(fields=["name", "network", "gateway", "network6", "gateway6", "mac_prefix", "free_count", "reserved_count", "map", "group_list", "inst_list", "external_reservations", "serial_no", "uuid"], names=args, use_locking=False) for (name, network, gateway, network6, gateway6, mac_prefix, free_count, reserved_count, mapping, group_list, instances, ext_res, serial, uuid) in result: size = free_count + reserved_count ToStdout("Network name: %s", name) ToStdout("UUID: %s", uuid) ToStdout("Serial number: %d", serial) ToStdout(" Subnet: %s", network) ToStdout(" Gateway: %s", gateway) ToStdout(" IPv6 Subnet: %s", network6) ToStdout(" IPv6 Gateway: %s", gateway6) ToStdout(" Mac Prefix: %s", mac_prefix) ToStdout(" Size: %d", size) ToStdout(" Free: %d (%.2f%%)", free_count, 100 * float(free_count) / float(size)) ToStdout(" Usage map:") idx = 0 for line in textwrap.wrap(mapping, width=64): ToStdout(" %s %s %d", str(idx).rjust(3), line.ljust(64), idx + 63) idx += 64 ToStdout(" (X) used (.) free") if ext_res: ToStdout(" externally reserved IPs:") for line in textwrap.wrap(ext_res, width=64): ToStdout(" %s" % line) if group_list: ToStdout(" connected to node groups:") for group, nic_mode, nic_link in group_list: ToStdout(" %s (%s on %s)", group, nic_mode, nic_link) else: ToStdout(" not connected to any node group") if instances: idata = cl.QueryInstances([], ["uuid", "name"], False) uuid2name = dict(idata) ToStdout(" used by %d instances:", len(instances)) for inst in instances: name = uuid2name[inst] ((ips, networks), ) = cl.QueryInstances([name], ["nic.ips", "nic.networks"], use_locking=False) l = lambda value: ", ".join(str(idx) + ":" + str(ip) for idx, (ip, net) in enumerate(value) if net == uuid) ToStdout(" %s: %s", name, l(zip(ips, networks))) else: ToStdout(" not used by any instances") def SetNetworkParams(opts, args): """Modifies an IP address pool's parameters. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the node group name @rtype: int @return: the desired exit code """ # TODO: add "network": opts.network, all_changes = { "gateway": opts.gateway, "add_reserved_ips": _HandleReservedIPs(opts.add_reserved_ips), "remove_reserved_ips": _HandleReservedIPs(opts.remove_reserved_ips), "mac_prefix": opts.mac_prefix, "gateway6": opts.gateway6, "network6": opts.network6, } if all_changes.values().count(None) == len(all_changes): ToStderr("Please give at least one of the parameters.") return 1 # pylint: disable=W0142 op = opcodes.OpNetworkSetParams(network_name=args[0], **all_changes) # TODO: add feedback to user, e.g. list the modifications SubmitOrSend(op, opts) def RemoveNetwork(opts, args): """Remove an IP address pool from the cluster. @param opts: the command line options selected by the user @type args: list @param args: a list of length 1 with the id of the IP address pool to remove @rtype: int @return: the desired exit code """ (network_name,) = args op = opcodes.OpNetworkRemove(network_name=network_name, force=opts.force) SubmitOrSend(op, opts) commands = { "add": ( AddNetwork, ARGS_ONE_NETWORK, [DRY_RUN_OPT, NETWORK_OPT, GATEWAY_OPT, ADD_RESERVED_IPS_OPT, MAC_PREFIX_OPT, NETWORK6_OPT, GATEWAY6_OPT, NOCONFLICTSCHECK_OPT, TAG_ADD_OPT, PRIORITY_OPT] + SUBMIT_OPTS, "", "Add a new IP network to the cluster"), "list": ( ListNetworks, ARGS_MANY_NETWORKS, [NOHDR_OPT, SEP_OPT, FIELDS_OPT, VERBOSE_OPT], "[...]", "Lists the IP networks in the cluster. The available fields can be shown" " using the \"list-fields\" command (see the man page for details)." " The default list is (in order): %s." % utils.CommaJoin(_LIST_DEF_FIELDS)), "list-fields": ( ListNetworkFields, [ArgUnknown()], [NOHDR_OPT, SEP_OPT], "[fields...]", "Lists all available fields for networks"), "info": ( ShowNetworkConfig, ARGS_MANY_NETWORKS, [], "[...]", "Show information about the network(s)"), "modify": ( SetNetworkParams, ARGS_ONE_NETWORK, [DRY_RUN_OPT] + SUBMIT_OPTS + [ADD_RESERVED_IPS_OPT, REMOVE_RESERVED_IPS_OPT, GATEWAY_OPT, MAC_PREFIX_OPT, NETWORK6_OPT, GATEWAY6_OPT, PRIORITY_OPT], "", "Alters the parameters of a network"), "connect": ( ConnectNetwork, [ArgNetwork(min=1, max=1), ArgChoice(min=1, max=1, choices=constants.NIC_VALID_MODES), ArgUnknown(min=1, max=1), ArgGroup()], [NOCONFLICTSCHECK_OPT, PRIORITY_OPT], " [...]", "Map a given network to the specified node group" " with given mode and link (netparams)"), "disconnect": ( DisconnectNetwork, [ArgNetwork(min=1, max=1), ArgGroup()], [PRIORITY_OPT], " [...]", "Unmap a given network from a specified node group"), "remove": ( RemoveNetwork, ARGS_ONE_NETWORK, [FORCE_OPT, DRY_RUN_OPT] + SUBMIT_OPTS + [PRIORITY_OPT], "[--dry-run] ", "Remove an (empty) network from the cluster"), "list-tags": ( ListTags, ARGS_ONE_NETWORK, [], "", "List the tags of the given network"), "add-tags": ( AddTags, [ArgNetwork(min=1, max=1), ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS, " tag...", "Add tags to the given network"), "remove-tags": ( RemoveTags, [ArgNetwork(min=1, max=1), ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS, " tag...", "Remove tags from given network"), } def Main(): return GenericMain(commands, override={"tag_type": constants.TAG_NETWORK}) ganeti-2.9.3/lib/client/gnt_os.py0000644000000000000000000002042712267470014016671 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """OS scripts related commands""" # pylint: disable=W0401,W0613,W0614,C0103 # W0401: Wildcard import ganeti.cli # W0613: Unused argument, since all functions follow the same API # W0614: Unused import %s from wildcard import (since we need cli) # C0103: Invalid name gnt-os from ganeti.cli import * from ganeti import constants from ganeti import opcodes from ganeti import utils def ListOS(opts, args): """List the valid OSes in the cluster. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ op = opcodes.OpOsDiagnose(output_fields=["name", "variants"], names=[]) result = SubmitOpCode(op, opts=opts) if not opts.no_headers: headers = {"name": "Name"} else: headers = None os_names = [] for (name, variants) in result: os_names.extend([[n] for n in CalculateOSNames(name, variants)]) data = GenerateTable(separator=None, headers=headers, fields=["name"], data=os_names, units=None) for line in data: ToStdout(line) return 0 def ShowOSInfo(opts, args): """List detailed information about OSes in the cluster. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ op = opcodes.OpOsDiagnose(output_fields=["name", "valid", "variants", "parameters", "api_versions", "blacklisted", "hidden"], names=[]) result = SubmitOpCode(op, opts=opts) if not result: ToStderr("Can't get the OS list") return 1 do_filter = bool(args) for (name, valid, variants, parameters, api_versions, blk, hid) in result: if do_filter: if name not in args: continue else: args.remove(name) ToStdout("%s:", name) ToStdout(" - valid: %s", valid) ToStdout(" - hidden: %s", hid) ToStdout(" - blacklisted: %s", blk) if valid: ToStdout(" - API versions:") for version in sorted(api_versions): ToStdout(" - %s", version) ToStdout(" - variants:") for vname in variants: ToStdout(" - %s", vname) ToStdout(" - parameters:") for pname, pdesc in parameters: ToStdout(" - %s: %s", pname, pdesc) ToStdout("") if args: for name in args: ToStdout("%s: ", name) ToStdout("") return 0 def _OsStatus(status, diagnose): """Beautifier function for OS status. @type status: boolean @param status: is the OS valid @type diagnose: string @param diagnose: the error message for invalid OSes @rtype: string @return: a formatted status """ if status: return "valid" else: return "invalid - %s" % diagnose def DiagnoseOS(opts, args): """Analyse all OSes on this cluster. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ op = opcodes.OpOsDiagnose(output_fields=["name", "valid", "variants", "node_status", "hidden", "blacklisted"], names=[]) result = SubmitOpCode(op, opts=opts) if not result: ToStderr("Can't get the OS list") return 1 has_bad = False for os_name, _, os_variants, node_data, hid, blk in result: nodes_valid = {} nodes_bad = {} nodes_hidden = {} for node_name, node_info in node_data.iteritems(): nodes_hidden[node_name] = [] if node_info: # at least one entry in the per-node list (fo_path, fo_status, fo_msg, fo_variants, fo_params, fo_api) = node_info.pop(0) fo_msg = "%s (path: %s)" % (_OsStatus(fo_status, fo_msg), fo_path) if fo_api: max_os_api = max(fo_api) fo_msg += " [API versions: %s]" % utils.CommaJoin(fo_api) else: max_os_api = 0 fo_msg += " [no API versions declared]" if max_os_api >= constants.OS_API_V15: if fo_variants: fo_msg += " [variants: %s]" % utils.CommaJoin(fo_variants) else: fo_msg += " [no variants]" if max_os_api >= constants.OS_API_V20: if fo_params: fo_msg += (" [parameters: %s]" % utils.CommaJoin([v[0] for v in fo_params])) else: fo_msg += " [no parameters]" if fo_status: nodes_valid[node_name] = fo_msg else: nodes_bad[node_name] = fo_msg for hpath, hstatus, hmsg, _, _, _ in node_info: nodes_hidden[node_name].append(" [hidden] path: %s, status: %s" % (hpath, _OsStatus(hstatus, hmsg))) else: nodes_bad[node_name] = "OS not found" # TODO: Shouldn't the global status be calculated by the LU? if nodes_valid and not nodes_bad: status = "valid" elif not nodes_valid and nodes_bad: status = "invalid" has_bad = True else: status = "partial valid" has_bad = True def _OutputPerNodeOSStatus(msg_map): map_k = utils.NiceSort(msg_map.keys()) for node_name in map_k: ToStdout(" Node: %s, status: %s", node_name, msg_map[node_name]) for msg in nodes_hidden[node_name]: ToStdout(msg) st_msg = "OS: %s [global status: %s]" % (os_name, status) if hid: st_msg += " [hidden]" if blk: st_msg += " [blacklisted]" ToStdout(st_msg) if os_variants: ToStdout(" Variants: [%s]" % utils.CommaJoin(os_variants)) _OutputPerNodeOSStatus(nodes_valid) _OutputPerNodeOSStatus(nodes_bad) ToStdout("") return int(has_bad) def ModifyOS(opts, args): """Modify OS parameters for one OS. @param opts: the command line options selected by the user @type args: list @param args: should be a list with one entry @rtype: int @return: the desired exit code """ os = args[0] if opts.hvparams: os_hvp = {os: dict(opts.hvparams)} else: os_hvp = None if opts.osparams: osp = {os: opts.osparams} else: osp = None if opts.hidden is not None: if opts.hidden: ohid = [(constants.DDM_ADD, os)] else: ohid = [(constants.DDM_REMOVE, os)] else: ohid = None if opts.blacklisted is not None: if opts.blacklisted: oblk = [(constants.DDM_ADD, os)] else: oblk = [(constants.DDM_REMOVE, os)] else: oblk = None if not (os_hvp or osp or ohid or oblk): ToStderr("At least one of OS parameters or hypervisor parameters" " must be passed") return 1 op = opcodes.OpClusterSetParams(os_hvp=os_hvp, osparams=osp, hidden_os=ohid, blacklisted_os=oblk) SubmitOrSend(op, opts) return 0 commands = { "list": ( ListOS, ARGS_NONE, [NOHDR_OPT, PRIORITY_OPT], "", "Lists all valid operating systems on the cluster"), "diagnose": ( DiagnoseOS, ARGS_NONE, [PRIORITY_OPT], "", "Diagnose all operating systems"), "info": ( ShowOSInfo, [ArgOs()], [PRIORITY_OPT], "", "Show detailed information about " "operating systems"), "modify": ( ModifyOS, ARGS_ONE_OS, [HVLIST_OPT, OSPARAMS_OPT, DRY_RUN_OPT, PRIORITY_OPT, HID_OS_OPT, BLK_OS_OPT] + SUBMIT_OPTS, "", "Modify the OS parameters"), } #: dictionary with aliases for commands aliases = { "show": "info", } def Main(): return GenericMain(commands, aliases=aliases) ganeti-2.9.3/lib/client/gnt_debug.py0000644000000000000000000005567512271422343017350 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Debugging commands""" # pylint: disable=W0401,W0614,C0103 # W0401: Wildcard import ganeti.cli # W0614: Unused import %s from wildcard import (since we need cli) # C0103: Invalid name gnt-backup import simplejson import time import socket import logging from ganeti.cli import * from ganeti import cli from ganeti import constants from ganeti import opcodes from ganeti import utils from ganeti import errors from ganeti import compat from ganeti import ht #: Default fields for L{ListLocks} _LIST_LOCKS_DEF_FIELDS = [ "name", "mode", "owner", "pending", ] def Delay(opts, args): """Sleeps for a while @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the duration the sleep @rtype: int @return: the desired exit code """ delay = float(args[0]) op = opcodes.OpTestDelay(duration=delay, on_master=opts.on_master, on_nodes=opts.on_nodes, repeat=opts.repeat) SubmitOrSend(op, opts) return 0 def GenericOpCodes(opts, args): """Send any opcode to the master. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the path of the file with the opcode definition @rtype: int @return: the desired exit code """ cl = cli.GetClient() jex = cli.JobExecutor(cl=cl, verbose=opts.verbose, opts=opts) job_cnt = 0 op_cnt = 0 if opts.timing_stats: ToStdout("Loading...") for job_idx in range(opts.rep_job): for fname in args: # pylint: disable=W0142 op_data = simplejson.loads(utils.ReadFile(fname)) op_list = [opcodes.OpCode.LoadOpCode(val) for val in op_data] op_list = op_list * opts.rep_op jex.QueueJob("file %s/%d" % (fname, job_idx), *op_list) op_cnt += len(op_list) job_cnt += 1 if opts.timing_stats: t1 = time.time() ToStdout("Submitting...") jex.SubmitPending(each=opts.each) if opts.timing_stats: t2 = time.time() ToStdout("Executing...") jex.GetResults() if opts.timing_stats: t3 = time.time() ToStdout("C:op %4d" % op_cnt) ToStdout("C:job %4d" % job_cnt) ToStdout("T:submit %4.4f" % (t2 - t1)) ToStdout("T:exec %4.4f" % (t3 - t2)) ToStdout("T:total %4.4f" % (t3 - t1)) return 0 def TestAllocator(opts, args): """Runs the test allocator opcode. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the iallocator name @rtype: int @return: the desired exit code """ try: disks = [{ constants.IDISK_SIZE: utils.ParseUnit(val), constants.IDISK_MODE: constants.DISK_RDWR, } for val in opts.disks.split(",")] except errors.UnitParseError, err: ToStderr("Invalid disks parameter '%s': %s", opts.disks, err) return 1 nics = [val.split("/") for val in opts.nics.split(",")] for row in nics: while len(row) < 3: row.append(None) for i in range(3): if row[i] == "": row[i] = None nic_dict = [{ constants.INIC_MAC: v[0], constants.INIC_IP: v[1], # The iallocator interface defines a "bridge" item "bridge": v[2], } for v in nics] if opts.tags is None: opts.tags = [] else: opts.tags = opts.tags.split(",") if opts.target_groups is None: target_groups = [] else: target_groups = opts.target_groups op = opcodes.OpTestAllocator(mode=opts.mode, name=args[0], instances=args, memory=opts.memory, disks=disks, disk_template=opts.disk_template, nics=nic_dict, os=opts.os, vcpus=opts.vcpus, tags=opts.tags, direction=opts.direction, iallocator=opts.iallocator, evac_mode=opts.evac_mode, target_groups=target_groups, spindle_use=opts.spindle_use, count=opts.count) result = SubmitOpCode(op, opts=opts) ToStdout("%s" % result) return 0 def _TestJobDependency(opts): """Tests job dependencies. """ ToStdout("Testing job dependencies") cl = cli.GetClient() try: SubmitOpCode(opcodes.OpTestDelay(duration=0, depends=[(-1, None)]), cl=cl) except errors.GenericError, err: if opts.debug: ToStdout("Ignoring error for 'wrong dependencies' test: %s", err) else: raise errors.OpExecError("Submitting plain opcode with relative job ID" " did not fail as expected") # TODO: Test dependencies on errors jobs = [ [opcodes.OpTestDelay(duration=1)], [opcodes.OpTestDelay(duration=1, depends=[(-1, [])])], [opcodes.OpTestDelay(duration=1, depends=[(-2, [constants.JOB_STATUS_SUCCESS])])], [opcodes.OpTestDelay(duration=1, depends=[])], [opcodes.OpTestDelay(duration=1, depends=[(-2, [constants.JOB_STATUS_SUCCESS])])], ] # Function for checking result check_fn = ht.TListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([ht.TBool, ht.TOr(ht.TNonEmptyString, ht.TJobId)]))) result = cl.SubmitManyJobs(jobs) if not check_fn(result): raise errors.OpExecError("Job submission doesn't match %s: %s" % (check_fn, result)) # Wait for jobs to finish jex = JobExecutor(cl=cl, opts=opts) for (status, job_id) in result: jex.AddJobId(None, status, job_id) job_results = jex.GetResults() if not compat.all(row[0] for row in job_results): raise errors.OpExecError("At least one of the submitted jobs failed: %s" % job_results) # Get details about jobs data = cl.QueryJobs([job_id for (_, job_id) in result], ["id", "opexec", "ops"]) data_job_id = [job_id for (job_id, _, _) in data] data_opexec = [opexec for (_, opexec, _) in data] data_op = [[opcodes.OpCode.LoadOpCode(op) for op in ops] for (_, _, ops) in data] assert compat.all(not op.depends or len(op.depends) == 1 for ops in data_op for op in ops) # Check resolved job IDs in dependencies for (job_idx, res_jobdep) in [(1, data_job_id[0]), (2, data_job_id[0]), (4, data_job_id[2])]: if data_op[job_idx][0].depends[0][0] != res_jobdep: raise errors.OpExecError("Job %s's opcode doesn't depend on correct job" " ID (%s)" % (job_idx, res_jobdep)) # Check execution order if not (data_opexec[0] <= data_opexec[1] and data_opexec[0] <= data_opexec[2] and data_opexec[2] <= data_opexec[4]): raise errors.OpExecError("Jobs did not run in correct order: %s" % data) assert len(jobs) == 5 and compat.all(len(ops) == 1 for ops in jobs) ToStdout("Job dependency tests were successful") def _TestJobSubmission(opts): """Tests submitting jobs. """ ToStdout("Testing job submission") testdata = [ (0, 0, constants.OP_PRIO_LOWEST), (0, 0, constants.OP_PRIO_HIGHEST), ] for priority in (constants.OP_PRIO_SUBMIT_VALID | frozenset([constants.OP_PRIO_LOWEST, constants.OP_PRIO_HIGHEST])): for offset in [-1, +1]: testdata.extend([ (0, 0, priority + offset), (3, 0, priority + offset), (0, 3, priority + offset), (4, 2, priority + offset), ]) cl = cli.GetClient() for before, after, failpriority in testdata: ops = [] ops.extend([opcodes.OpTestDelay(duration=0) for _ in range(before)]) ops.append(opcodes.OpTestDelay(duration=0, priority=failpriority)) ops.extend([opcodes.OpTestDelay(duration=0) for _ in range(after)]) try: cl.SubmitJob(ops) except errors.GenericError, err: if opts.debug: ToStdout("Ignoring error for 'wrong priority' test: %s", err) else: raise errors.OpExecError("Submitting opcode with priority %s did not" " fail when it should (allowed are %s)" % (failpriority, constants.OP_PRIO_SUBMIT_VALID)) jobs = [ [opcodes.OpTestDelay(duration=0), opcodes.OpTestDelay(duration=0, dry_run=False), opcodes.OpTestDelay(duration=0, dry_run=True)], ops, ] result = cl.SubmitManyJobs(jobs) if not (len(result) == 2 and compat.all(len(i) == 2 for i in result) and isinstance(result[0][1], int) and isinstance(result[1][1], basestring) and result[0][0] and not result[1][0]): raise errors.OpExecError("Submitting multiple jobs did not work as" " expected, result %s" % result) assert len(result) == 2 ToStdout("Job submission tests were successful") class _JobQueueTestReporter(cli.StdioJobPollReportCb): def __init__(self): """Initializes this class. """ cli.StdioJobPollReportCb.__init__(self) self._expected_msgcount = 0 self._all_testmsgs = [] self._testmsgs = None self._job_id = None def GetTestMessages(self): """Returns all test log messages received so far. """ return self._all_testmsgs def GetJobId(self): """Returns the job ID. """ return self._job_id def ReportLogMessage(self, job_id, serial, timestamp, log_type, log_msg): """Handles a log message. """ if self._job_id is None: self._job_id = job_id elif self._job_id != job_id: raise errors.ProgrammerError("The same reporter instance was used for" " more than one job") if log_type == constants.ELOG_JQUEUE_TEST: (sockname, test, arg) = log_msg return self._ProcessTestMessage(job_id, sockname, test, arg) elif (log_type == constants.ELOG_MESSAGE and log_msg.startswith(constants.JQT_MSGPREFIX)): if self._testmsgs is None: raise errors.OpExecError("Received test message without a preceding" " start message") testmsg = log_msg[len(constants.JQT_MSGPREFIX):] self._testmsgs.append(testmsg) self._all_testmsgs.append(testmsg) return return cli.StdioJobPollReportCb.ReportLogMessage(self, job_id, serial, timestamp, log_type, log_msg) def _ProcessTestMessage(self, job_id, sockname, test, arg): """Handles a job queue test message. """ if test not in constants.JQT_ALL: raise errors.OpExecError("Received invalid test message %s" % test) sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) try: sock.settimeout(30.0) logging.debug("Connecting to %s", sockname) sock.connect(sockname) logging.debug("Checking status") jobdetails = cli.GetClient().QueryJobs([job_id], ["status"])[0] if not jobdetails: raise errors.OpExecError("Can't find job %s" % job_id) status = jobdetails[0] logging.debug("Status of job %s is %s", job_id, status) if test == constants.JQT_EXPANDNAMES: if status != constants.JOB_STATUS_WAITING: raise errors.OpExecError("Job status while expanding names is '%s'," " not '%s' as expected" % (status, constants.JOB_STATUS_WAITING)) elif test in (constants.JQT_EXEC, constants.JQT_LOGMSG): if status != constants.JOB_STATUS_RUNNING: raise errors.OpExecError("Job status while executing opcode is '%s'," " not '%s' as expected" % (status, constants.JOB_STATUS_RUNNING)) if test == constants.JQT_STARTMSG: logging.debug("Expecting %s test messages", arg) self._testmsgs = [] elif test == constants.JQT_LOGMSG: if len(self._testmsgs) != arg: raise errors.OpExecError("Received %s test messages when %s are" " expected" % (len(self._testmsgs), arg)) finally: logging.debug("Closing socket") sock.close() def TestJobqueue(opts, _): """Runs a few tests on the job queue. """ _TestJobSubmission(opts) _TestJobDependency(opts) (TM_SUCCESS, TM_MULTISUCCESS, TM_FAIL, TM_PARTFAIL) = range(4) TM_ALL = compat.UniqueFrozenset([ TM_SUCCESS, TM_MULTISUCCESS, TM_FAIL, TM_PARTFAIL, ]) for mode in TM_ALL: test_messages = [ "Testing mode %s" % mode, "Hello World", "A", "", "B" "Foo|bar|baz", utils.TimestampForFilename(), ] fail = mode in (TM_FAIL, TM_PARTFAIL) if mode == TM_PARTFAIL: ToStdout("Testing partial job failure") ops = [ opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True, log_messages=test_messages, fail=False), opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True, log_messages=test_messages, fail=False), opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True, log_messages=test_messages, fail=True), opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True, log_messages=test_messages, fail=False), ] expect_messages = 3 * [test_messages] expect_opstatus = [ constants.OP_STATUS_SUCCESS, constants.OP_STATUS_SUCCESS, constants.OP_STATUS_ERROR, constants.OP_STATUS_ERROR, ] expect_resultlen = 2 elif mode == TM_MULTISUCCESS: ToStdout("Testing multiple successful opcodes") ops = [ opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True, log_messages=test_messages, fail=False), opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True, log_messages=test_messages, fail=False), ] expect_messages = 2 * [test_messages] expect_opstatus = [ constants.OP_STATUS_SUCCESS, constants.OP_STATUS_SUCCESS, ] expect_resultlen = 2 else: if mode == TM_SUCCESS: ToStdout("Testing job success") expect_opstatus = [constants.OP_STATUS_SUCCESS] elif mode == TM_FAIL: ToStdout("Testing job failure") expect_opstatus = [constants.OP_STATUS_ERROR] else: raise errors.ProgrammerError("Unknown test mode %s" % mode) ops = [ opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True, log_messages=test_messages, fail=fail), ] expect_messages = [test_messages] expect_resultlen = 1 cl = cli.GetClient() cli.SetGenericOpcodeOpts(ops, opts) # Send job to master daemon job_id = cli.SendJob(ops, cl=cl) reporter = _JobQueueTestReporter() results = None try: results = cli.PollJob(job_id, cl=cl, reporter=reporter) except errors.OpExecError, err: if not fail: raise ToStdout("Ignoring error for 'job fail' test: %s", err) else: if fail: raise errors.OpExecError("Job didn't fail when it should") # Check length of result if fail: if results is not None: raise errors.OpExecError("Received result from failed job") elif len(results) != expect_resultlen: raise errors.OpExecError("Received %s results (%s), expected %s" % (len(results), results, expect_resultlen)) # Check received log messages all_messages = [i for j in expect_messages for i in j] if reporter.GetTestMessages() != all_messages: raise errors.OpExecError("Received test messages don't match input" " (input %r, received %r)" % (all_messages, reporter.GetTestMessages())) # Check final status reported_job_id = reporter.GetJobId() if reported_job_id != job_id: raise errors.OpExecError("Reported job ID %s doesn't match" "submission job ID %s" % (reported_job_id, job_id)) jobdetails = cli.GetClient().QueryJobs([job_id], ["status", "opstatus"])[0] if not jobdetails: raise errors.OpExecError("Can't find job %s" % job_id) if fail: exp_status = constants.JOB_STATUS_ERROR else: exp_status = constants.JOB_STATUS_SUCCESS (final_status, final_opstatus) = jobdetails if final_status != exp_status: raise errors.OpExecError("Final job status is %s, not %s as expected" % (final_status, exp_status)) if len(final_opstatus) != len(ops): raise errors.OpExecError("Did not receive status for all opcodes (got %s," " expected %s)" % (len(final_opstatus), len(ops))) if final_opstatus != expect_opstatus: raise errors.OpExecError("Opcode status is %s, expected %s" % (final_opstatus, expect_opstatus)) ToStdout("Job queue test successful") return 0 def ListLocks(opts, args): # pylint: disable=W0613 """List all locks. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ selected_fields = ParseFields(opts.output, _LIST_LOCKS_DEF_FIELDS) def _DashIfNone(fn): def wrapper(value): if not value: return "-" return fn(value) return wrapper def _FormatPending(value): """Format pending acquires. """ return utils.CommaJoin("%s:%s" % (mode, ",".join(threads)) for mode, threads in value) # Format raw values fmtoverride = { "mode": (_DashIfNone(str), False), "owner": (_DashIfNone(",".join), False), "pending": (_DashIfNone(_FormatPending), False), } while True: ret = GenericList(constants.QR_LOCK, selected_fields, None, None, opts.separator, not opts.no_headers, format_override=fmtoverride, verbose=opts.verbose) if ret != constants.EXIT_SUCCESS: return ret if not opts.interval: break ToStdout("") time.sleep(opts.interval) return 0 commands = { "delay": ( Delay, [ArgUnknown(min=1, max=1)], [cli_option("--no-master", dest="on_master", default=True, action="store_false", help="Do not sleep in the master code"), cli_option("-n", dest="on_nodes", default=[], action="append", help="Select nodes to sleep on"), cli_option("-r", "--repeat", type="int", default="0", dest="repeat", help="Number of times to repeat the sleep"), DRY_RUN_OPT, PRIORITY_OPT] + SUBMIT_OPTS, "[opts...] ", "Executes a TestDelay OpCode"), "submit-job": ( GenericOpCodes, [ArgFile(min=1)], [VERBOSE_OPT, cli_option("--op-repeat", type="int", default="1", dest="rep_op", help="Repeat the opcode sequence this number of times"), cli_option("--job-repeat", type="int", default="1", dest="rep_job", help="Repeat the job this number of times"), cli_option("--timing-stats", default=False, action="store_true", help="Show timing stats"), cli_option("--each", default=False, action="store_true", help="Submit each job separately"), DRY_RUN_OPT, PRIORITY_OPT, ], "", "Submits jobs built from json files" " containing a list of serialized opcodes"), "iallocator": ( TestAllocator, [ArgUnknown(min=1)], [cli_option("--dir", dest="direction", default=constants.IALLOCATOR_DIR_IN, choices=list(constants.VALID_IALLOCATOR_DIRECTIONS), help="Show allocator input (in) or allocator" " results (out)"), IALLOCATOR_OPT, cli_option("-m", "--mode", default="relocate", choices=list(constants.VALID_IALLOCATOR_MODES), help=("Request mode (one of %s)" % utils.CommaJoin(constants.VALID_IALLOCATOR_MODES))), cli_option("--memory", default=128, type="unit", help="Memory size for the instance (MiB)"), cli_option("--disks", default="4096,4096", help="Comma separated list of disk sizes (MiB)"), DISK_TEMPLATE_OPT, cli_option("--nics", default="00:11:22:33:44:55", help="Comma separated list of nics, each nic" " definition is of form mac/ip/bridge, if" " missing values are replace by None"), OS_OPT, cli_option("-p", "--vcpus", default=1, type="int", help="Select number of VCPUs for the instance"), cli_option("--tags", default=None, help="Comma separated list of tags"), cli_option("--evac-mode", default=constants.IALLOCATOR_NEVAC_ALL, choices=list(constants.IALLOCATOR_NEVAC_MODES), help=("Node evacuation mode (one of %s)" % utils.CommaJoin(constants.IALLOCATOR_NEVAC_MODES))), cli_option("--target-groups", help="Target groups for relocation", default=[], action="append"), cli_option("--spindle-use", help="How many spindles to use", default=1, type="int"), cli_option("--count", help="How many instances to allocate", default=2, type="int"), DRY_RUN_OPT, PRIORITY_OPT, ], "{opts...} ", "Executes a TestAllocator OpCode"), "test-jobqueue": ( TestJobqueue, ARGS_NONE, [PRIORITY_OPT], "", "Test a few aspects of the job queue"), "locks": ( ListLocks, ARGS_NONE, [NOHDR_OPT, SEP_OPT, FIELDS_OPT, INTERVAL_OPT, VERBOSE_OPT], "[--interval N]", "Show a list of locks in the master daemon"), } #: dictionary with aliases for commands aliases = { "allocator": "iallocator", } def Main(): return GenericMain(commands, aliases=aliases) ganeti-2.9.3/lib/client/__init__.py0000644000000000000000000000142512230001635017121 0ustar00rootroot00000000000000# # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Common command line client code. """ ganeti-2.9.3/lib/client/gnt_backup.py0000644000000000000000000001161012271422343017504 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Backup related commands""" # pylint: disable=W0401,W0613,W0614,C0103 # W0401: Wildcard import ganeti.cli # W0613: Unused argument, since all functions follow the same API # W0614: Unused import %s from wildcard import (since we need cli) # C0103: Invalid name gnt-backup from ganeti.cli import * from ganeti import opcodes from ganeti import constants from ganeti import errors from ganeti import qlang _LIST_DEF_FIELDS = ["node", "export"] def PrintExportList(opts, args): """Prints a list of all the exported system images. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ selected_fields = ParseFields(opts.output, _LIST_DEF_FIELDS) qfilter = qlang.MakeSimpleFilter("node", opts.nodes) cl = GetClient(query=True) return GenericList(constants.QR_EXPORT, selected_fields, None, opts.units, opts.separator, not opts.no_headers, verbose=opts.verbose, qfilter=qfilter, cl=cl) def ListExportFields(opts, args): """List export fields. @param opts: the command line options selected by the user @type args: list @param args: fields to list, or empty for all @rtype: int @return: the desired exit code """ cl = GetClient(query=True) return GenericListFields(constants.QR_EXPORT, args, opts.separator, not opts.no_headers, cl=cl) def ExportInstance(opts, args): """Export an instance to an image in the cluster. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the name of the instance to be exported @rtype: int @return: the desired exit code """ ignore_remove_failures = opts.ignore_remove_failures if not opts.node: raise errors.OpPrereqError("Target node must be specified", errors.ECODE_INVAL) op = opcodes.OpBackupExport(instance_name=args[0], target_node=opts.node, shutdown=opts.shutdown, shutdown_timeout=opts.shutdown_timeout, remove_instance=opts.remove_instance, ignore_remove_failures=ignore_remove_failures) SubmitOrSend(op, opts) return 0 def ImportInstance(opts, args): """Add an instance to the cluster. This is just a wrapper over GenericInstanceCreate. """ return GenericInstanceCreate(constants.INSTANCE_IMPORT, opts, args) def RemoveExport(opts, args): """Remove an export from the cluster. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the name of the instance whose backup should be removed @rtype: int @return: the desired exit code """ op = opcodes.OpBackupRemove(instance_name=args[0]) SubmitOrSend(op, opts) return 0 # this is defined separately due to readability only import_opts = [ IDENTIFY_DEFAULTS_OPT, SRC_DIR_OPT, SRC_NODE_OPT, IGNORE_IPOLICY_OPT, ] commands = { "list": ( PrintExportList, ARGS_NONE, [NODE_LIST_OPT, NOHDR_OPT, SEP_OPT, USEUNITS_OPT, FIELDS_OPT, VERBOSE_OPT], "", "Lists instance exports available in the ganeti cluster"), "list-fields": ( ListExportFields, [ArgUnknown()], [NOHDR_OPT, SEP_OPT], "[fields...]", "Lists all available fields for exports"), "export": ( ExportInstance, ARGS_ONE_INSTANCE, [FORCE_OPT, SINGLE_NODE_OPT, NOSHUTDOWN_OPT, SHUTDOWN_TIMEOUT_OPT, REMOVE_INSTANCE_OPT, IGNORE_REMOVE_FAILURES_OPT, DRY_RUN_OPT, PRIORITY_OPT] + SUBMIT_OPTS, "-n [opts...] ", "Exports an instance to an image"), "import": ( ImportInstance, ARGS_ONE_INSTANCE, COMMON_CREATE_OPTS + import_opts, "[...] -t disk-type -n node[:secondary-node] ", "Imports an instance from an exported image"), "remove": ( RemoveExport, [ArgUnknown(min=1, max=1)], [DRY_RUN_OPT, PRIORITY_OPT] + SUBMIT_OPTS, "", "Remove exports of named instance from the filesystem."), } def Main(): return GenericMain(commands) ganeti-2.9.3/lib/client/gnt_instance.py0000644000000000000000000015776212271422343020066 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Instance related commands""" # pylint: disable=W0401,W0614,C0103 # W0401: Wildcard import ganeti.cli # W0614: Unused import %s from wildcard import (since we need cli) # C0103: Invalid name gnt-instance import copy import itertools import simplejson import logging from ganeti.cli import * from ganeti import opcodes from ganeti import constants from ganeti import compat from ganeti import utils from ganeti import errors from ganeti import netutils from ganeti import ssh from ganeti import objects from ganeti import ht _EXPAND_CLUSTER = "cluster" _EXPAND_NODES_BOTH = "nodes" _EXPAND_NODES_PRI = "nodes-pri" _EXPAND_NODES_SEC = "nodes-sec" _EXPAND_NODES_BOTH_BY_TAGS = "nodes-by-tags" _EXPAND_NODES_PRI_BY_TAGS = "nodes-pri-by-tags" _EXPAND_NODES_SEC_BY_TAGS = "nodes-sec-by-tags" _EXPAND_INSTANCES = "instances" _EXPAND_INSTANCES_BY_TAGS = "instances-by-tags" _EXPAND_NODES_TAGS_MODES = compat.UniqueFrozenset([ _EXPAND_NODES_BOTH_BY_TAGS, _EXPAND_NODES_PRI_BY_TAGS, _EXPAND_NODES_SEC_BY_TAGS, ]) #: default list of options for L{ListInstances} _LIST_DEF_FIELDS = [ "name", "hypervisor", "os", "pnode", "status", "oper_ram", ] _MISSING = object() _ENV_OVERRIDE = compat.UniqueFrozenset(["list"]) _INST_DATA_VAL = ht.TListOf(ht.TDict) def _ExpandMultiNames(mode, names, client=None): """Expand the given names using the passed mode. For _EXPAND_CLUSTER, all instances will be returned. For _EXPAND_NODES_PRI/SEC, all instances having those nodes as primary/secondary will be returned. For _EXPAND_NODES_BOTH, all instances having those nodes as either primary or secondary will be returned. For _EXPAND_INSTANCES, the given instances will be returned. @param mode: one of L{_EXPAND_CLUSTER}, L{_EXPAND_NODES_BOTH}, L{_EXPAND_NODES_PRI}, L{_EXPAND_NODES_SEC} or L{_EXPAND_INSTANCES} @param names: a list of names; for cluster, it must be empty, and for node and instance it must be a list of valid item names (short names are valid as usual, e.g. node1 instead of node1.example.com) @rtype: list @return: the list of names after the expansion @raise errors.ProgrammerError: for unknown selection type @raise errors.OpPrereqError: for invalid input parameters """ # pylint: disable=W0142 if client is None: client = GetClient() if mode == _EXPAND_CLUSTER: if names: raise errors.OpPrereqError("Cluster filter mode takes no arguments", errors.ECODE_INVAL) idata = client.QueryInstances([], ["name"], False) inames = [row[0] for row in idata] elif (mode in _EXPAND_NODES_TAGS_MODES or mode in (_EXPAND_NODES_BOTH, _EXPAND_NODES_PRI, _EXPAND_NODES_SEC)): if mode in _EXPAND_NODES_TAGS_MODES: if not names: raise errors.OpPrereqError("No node tags passed", errors.ECODE_INVAL) ndata = client.QueryNodes([], ["name", "pinst_list", "sinst_list", "tags"], False) ndata = [row for row in ndata if set(row[3]).intersection(names)] else: if not names: raise errors.OpPrereqError("No node names passed", errors.ECODE_INVAL) ndata = client.QueryNodes(names, ["name", "pinst_list", "sinst_list"], False) ipri = [row[1] for row in ndata] pri_names = list(itertools.chain(*ipri)) isec = [row[2] for row in ndata] sec_names = list(itertools.chain(*isec)) if mode in (_EXPAND_NODES_BOTH, _EXPAND_NODES_BOTH_BY_TAGS): inames = pri_names + sec_names elif mode in (_EXPAND_NODES_PRI, _EXPAND_NODES_PRI_BY_TAGS): inames = pri_names elif mode in (_EXPAND_NODES_SEC, _EXPAND_NODES_SEC_BY_TAGS): inames = sec_names else: raise errors.ProgrammerError("Unhandled shutdown type") elif mode == _EXPAND_INSTANCES: if not names: raise errors.OpPrereqError("No instance names passed", errors.ECODE_INVAL) idata = client.QueryInstances(names, ["name"], False) inames = [row[0] for row in idata] elif mode == _EXPAND_INSTANCES_BY_TAGS: if not names: raise errors.OpPrereqError("No instance tags passed", errors.ECODE_INVAL) idata = client.QueryInstances([], ["name", "tags"], False) inames = [row[0] for row in idata if set(row[1]).intersection(names)] else: raise errors.OpPrereqError("Unknown mode '%s'" % mode, errors.ECODE_INVAL) return inames def _EnsureInstancesExist(client, names): """Check for and ensure the given instance names exist. This function will raise an OpPrereqError in case they don't exist. Otherwise it will exit cleanly. @type client: L{ganeti.luxi.Client} @param client: the client to use for the query @type names: list @param names: the list of instance names to query @raise errors.OpPrereqError: in case any instance is missing """ # TODO: change LUInstanceQuery to that it actually returns None # instead of raising an exception, or devise a better mechanism result = client.QueryInstances(names, ["name"], False) for orig_name, row in zip(names, result): if row[0] is None: raise errors.OpPrereqError("Instance '%s' does not exist" % orig_name, errors.ECODE_NOENT) def GenericManyOps(operation, fn): """Generic multi-instance operations. The will return a wrapper that processes the options and arguments given, and uses the passed function to build the opcode needed for the specific operation. Thus all the generic loop/confirmation code is abstracted into this function. """ def realfn(opts, args): if opts.multi_mode is None: opts.multi_mode = _EXPAND_INSTANCES cl = GetClient() inames = _ExpandMultiNames(opts.multi_mode, args, client=cl) if not inames: if opts.multi_mode == _EXPAND_CLUSTER: ToStdout("Cluster is empty, no instances to shutdown") return 0 raise errors.OpPrereqError("Selection filter does not match" " any instances", errors.ECODE_INVAL) multi_on = opts.multi_mode != _EXPAND_INSTANCES or len(inames) > 1 if not (opts.force_multi or not multi_on or ConfirmOperation(inames, "instances", operation)): return 1 jex = JobExecutor(verbose=multi_on, cl=cl, opts=opts) for name in inames: op = fn(name, opts) jex.QueueJob(name, op) results = jex.WaitOrShow(not opts.submit_only) rcode = compat.all(row[0] for row in results) return int(not rcode) return realfn def ListInstances(opts, args): """List instances and their properties. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ selected_fields = ParseFields(opts.output, _LIST_DEF_FIELDS) fmtoverride = dict.fromkeys(["tags", "disk.sizes", "nic.macs", "nic.ips", "nic.modes", "nic.links", "nic.bridges", "nic.networks", "snodes", "snodes.group", "snodes.group.uuid"], (lambda value: ",".join(str(item) for item in value), False)) return GenericList(constants.QR_INSTANCE, selected_fields, args, opts.units, opts.separator, not opts.no_headers, format_override=fmtoverride, verbose=opts.verbose, force_filter=opts.force_filter) def ListInstanceFields(opts, args): """List instance fields. @param opts: the command line options selected by the user @type args: list @param args: fields to list, or empty for all @rtype: int @return: the desired exit code """ return GenericListFields(constants.QR_INSTANCE, args, opts.separator, not opts.no_headers) def AddInstance(opts, args): """Add an instance to the cluster. This is just a wrapper over GenericInstanceCreate. """ return GenericInstanceCreate(constants.INSTANCE_CREATE, opts, args) def BatchCreate(opts, args): """Create instances using a definition file. This function reads a json file with L{opcodes.OpInstanceCreate} serialisations. @param opts: the command line options selected by the user @type args: list @param args: should contain one element, the json filename @rtype: int @return: the desired exit code """ (json_filename,) = args cl = GetClient() try: instance_data = simplejson.loads(utils.ReadFile(json_filename)) except Exception, err: # pylint: disable=W0703 ToStderr("Can't parse the instance definition file: %s" % str(err)) return 1 if not _INST_DATA_VAL(instance_data): ToStderr("The instance definition file is not %s" % _INST_DATA_VAL) return 1 instances = [] possible_params = set(opcodes.OpInstanceCreate.GetAllSlots()) for (idx, inst) in enumerate(instance_data): unknown = set(inst.keys()) - possible_params if unknown: # TODO: Suggest closest match for more user friendly experience raise errors.OpPrereqError("Unknown fields in definition %s: %s" % (idx, utils.CommaJoin(unknown)), errors.ECODE_INVAL) op = opcodes.OpInstanceCreate(**inst) # pylint: disable=W0142 op.Validate(False) instances.append(op) op = opcodes.OpInstanceMultiAlloc(iallocator=opts.iallocator, instances=instances) result = SubmitOrSend(op, opts, cl=cl) # Keep track of submitted jobs jex = JobExecutor(cl=cl, opts=opts) for (status, job_id) in result[constants.JOB_IDS_KEY]: jex.AddJobId(None, status, job_id) results = jex.GetResults() bad_cnt = len([row for row in results if not row[0]]) if bad_cnt == 0: ToStdout("All instances created successfully.") rcode = constants.EXIT_SUCCESS else: ToStdout("There were %s errors during the creation.", bad_cnt) rcode = constants.EXIT_FAILURE return rcode def ReinstallInstance(opts, args): """Reinstall an instance. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the name of the instance to be reinstalled @rtype: int @return: the desired exit code """ # first, compute the desired name list if opts.multi_mode is None: opts.multi_mode = _EXPAND_INSTANCES inames = _ExpandMultiNames(opts.multi_mode, args) if not inames: raise errors.OpPrereqError("Selection filter does not match any instances", errors.ECODE_INVAL) # second, if requested, ask for an OS if opts.select_os is True: op = opcodes.OpOsDiagnose(output_fields=["name", "variants"], names=[]) result = SubmitOpCode(op, opts=opts) if not result: ToStdout("Can't get the OS list") return 1 ToStdout("Available OS templates:") number = 0 choices = [] for (name, variants) in result: for entry in CalculateOSNames(name, variants): ToStdout("%3s: %s", number, entry) choices.append(("%s" % number, entry, entry)) number += 1 choices.append(("x", "exit", "Exit gnt-instance reinstall")) selected = AskUser("Enter OS template number (or x to abort):", choices) if selected == "exit": ToStderr("User aborted reinstall, exiting") return 1 os_name = selected os_msg = "change the OS to '%s'" % selected else: os_name = opts.os if opts.os is not None: os_msg = "change the OS to '%s'" % os_name else: os_msg = "keep the same OS" # third, get confirmation: multi-reinstall requires --force-multi, # single-reinstall either --force or --force-multi (--force-multi is # a stronger --force) multi_on = opts.multi_mode != _EXPAND_INSTANCES or len(inames) > 1 if multi_on: warn_msg = ("Note: this will remove *all* data for the" " below instances! It will %s.\n" % os_msg) if not (opts.force_multi or ConfirmOperation(inames, "instances", "reinstall", extra=warn_msg)): return 1 else: if not (opts.force or opts.force_multi): usertext = ("This will reinstall the instance '%s' (and %s) which" " removes all data. Continue?") % (inames[0], os_msg) if not AskUser(usertext): return 1 jex = JobExecutor(verbose=multi_on, opts=opts) for instance_name in inames: op = opcodes.OpInstanceReinstall(instance_name=instance_name, os_type=os_name, force_variant=opts.force_variant, osparams=opts.osparams) jex.QueueJob(instance_name, op) results = jex.WaitOrShow(not opts.submit_only) if compat.all(map(compat.fst, results)): return constants.EXIT_SUCCESS else: return constants.EXIT_FAILURE def RemoveInstance(opts, args): """Remove an instance. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the name of the instance to be removed @rtype: int @return: the desired exit code """ instance_name = args[0] force = opts.force cl = GetClient() if not force: _EnsureInstancesExist(cl, [instance_name]) usertext = ("This will remove the volumes of the instance %s" " (including mirrors), thus removing all the data" " of the instance. Continue?") % instance_name if not AskUser(usertext): return 1 op = opcodes.OpInstanceRemove(instance_name=instance_name, ignore_failures=opts.ignore_failures, shutdown_timeout=opts.shutdown_timeout) SubmitOrSend(op, opts, cl=cl) return 0 def RenameInstance(opts, args): """Rename an instance. @param opts: the command line options selected by the user @type args: list @param args: should contain two elements, the old and the new instance names @rtype: int @return: the desired exit code """ if not opts.name_check: if not AskUser("As you disabled the check of the DNS entry, please verify" " that '%s' is a FQDN. Continue?" % args[1]): return 1 op = opcodes.OpInstanceRename(instance_name=args[0], new_name=args[1], ip_check=opts.ip_check, name_check=opts.name_check) result = SubmitOrSend(op, opts) if result: ToStdout("Instance '%s' renamed to '%s'", args[0], result) return 0 def ActivateDisks(opts, args): """Activate an instance's disks. This serves two purposes: - it allows (as long as the instance is not running) mounting the disks and modifying them from the node - it repairs inactive secondary drbds @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the instance name @rtype: int @return: the desired exit code """ instance_name = args[0] op = opcodes.OpInstanceActivateDisks(instance_name=instance_name, ignore_size=opts.ignore_size, wait_for_sync=opts.wait_for_sync) disks_info = SubmitOrSend(op, opts) for host, iname, nname in disks_info: ToStdout("%s:%s:%s", host, iname, nname) return 0 def DeactivateDisks(opts, args): """Deactivate an instance's disks. This function takes the instance name, looks for its primary node and the tries to shutdown its block devices on that node. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the instance name @rtype: int @return: the desired exit code """ instance_name = args[0] op = opcodes.OpInstanceDeactivateDisks(instance_name=instance_name, force=opts.force) SubmitOrSend(op, opts) return 0 def RecreateDisks(opts, args): """Recreate an instance's disks. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the instance name @rtype: int @return: the desired exit code """ instance_name = args[0] disks = [] if opts.disks: for didx, ddict in opts.disks: didx = int(didx) if not ht.TDict(ddict): msg = "Invalid disk/%d value: expected dict, got %s" % (didx, ddict) raise errors.OpPrereqError(msg, errors.ECODE_INVAL) if constants.IDISK_SIZE in ddict: try: ddict[constants.IDISK_SIZE] = \ utils.ParseUnit(ddict[constants.IDISK_SIZE]) except ValueError, err: raise errors.OpPrereqError("Invalid disk size for disk %d: %s" % (didx, err), errors.ECODE_INVAL) disks.append((didx, ddict)) # TODO: Verify modifyable parameters (already done in # LUInstanceRecreateDisks, but it'd be nice to have in the client) if opts.node: if opts.iallocator: msg = "At most one of either --nodes or --iallocator can be passed" raise errors.OpPrereqError(msg, errors.ECODE_INVAL) pnode, snode = SplitNodeOption(opts.node) nodes = [pnode] if snode is not None: nodes.append(snode) else: nodes = [] op = opcodes.OpInstanceRecreateDisks(instance_name=instance_name, disks=disks, nodes=nodes, iallocator=opts.iallocator) SubmitOrSend(op, opts) return 0 def GrowDisk(opts, args): """Grow an instance's disks. @param opts: the command line options selected by the user @type args: list @param args: should contain three elements, the target instance name, the target disk id, and the target growth @rtype: int @return: the desired exit code """ instance = args[0] disk = args[1] try: disk = int(disk) except (TypeError, ValueError), err: raise errors.OpPrereqError("Invalid disk index: %s" % str(err), errors.ECODE_INVAL) try: amount = utils.ParseUnit(args[2]) except errors.UnitParseError: raise errors.OpPrereqError("Can't parse the given amount '%s'" % args[2], errors.ECODE_INVAL) op = opcodes.OpInstanceGrowDisk(instance_name=instance, disk=disk, amount=amount, wait_for_sync=opts.wait_for_sync, absolute=opts.absolute) SubmitOrSend(op, opts) return 0 def _StartupInstance(name, opts): """Startup instances. This returns the opcode to start an instance, and its decorator will wrap this into a loop starting all desired instances. @param name: the name of the instance to act on @param opts: the command line options selected by the user @return: the opcode needed for the operation """ op = opcodes.OpInstanceStartup(instance_name=name, force=opts.force, ignore_offline_nodes=opts.ignore_offline, no_remember=opts.no_remember, startup_paused=opts.startup_paused) # do not add these parameters to the opcode unless they're defined if opts.hvparams: op.hvparams = opts.hvparams if opts.beparams: op.beparams = opts.beparams return op def _RebootInstance(name, opts): """Reboot instance(s). This returns the opcode to reboot an instance, and its decorator will wrap this into a loop rebooting all desired instances. @param name: the name of the instance to act on @param opts: the command line options selected by the user @return: the opcode needed for the operation """ return opcodes.OpInstanceReboot(instance_name=name, reboot_type=opts.reboot_type, ignore_secondaries=opts.ignore_secondaries, shutdown_timeout=opts.shutdown_timeout) def _ShutdownInstance(name, opts): """Shutdown an instance. This returns the opcode to shutdown an instance, and its decorator will wrap this into a loop shutting down all desired instances. @param name: the name of the instance to act on @param opts: the command line options selected by the user @return: the opcode needed for the operation """ return opcodes.OpInstanceShutdown(instance_name=name, force=opts.force, timeout=opts.timeout, ignore_offline_nodes=opts.ignore_offline, no_remember=opts.no_remember) def ReplaceDisks(opts, args): """Replace the disks of an instance @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the instance name @rtype: int @return: the desired exit code """ new_2ndary = opts.dst_node iallocator = opts.iallocator if opts.disks is None: disks = [] else: try: disks = [int(i) for i in opts.disks.split(",")] except (TypeError, ValueError), err: raise errors.OpPrereqError("Invalid disk index passed: %s" % str(err), errors.ECODE_INVAL) cnt = [opts.on_primary, opts.on_secondary, opts.auto, new_2ndary is not None, iallocator is not None].count(True) if cnt != 1: raise errors.OpPrereqError("One and only one of the -p, -s, -a, -n and -I" " options must be passed", errors.ECODE_INVAL) elif opts.on_primary: mode = constants.REPLACE_DISK_PRI elif opts.on_secondary: mode = constants.REPLACE_DISK_SEC elif opts.auto: mode = constants.REPLACE_DISK_AUTO if disks: raise errors.OpPrereqError("Cannot specify disks when using automatic" " mode", errors.ECODE_INVAL) elif new_2ndary is not None or iallocator is not None: # replace secondary mode = constants.REPLACE_DISK_CHG op = opcodes.OpInstanceReplaceDisks(instance_name=args[0], disks=disks, remote_node=new_2ndary, mode=mode, iallocator=iallocator, early_release=opts.early_release, ignore_ipolicy=opts.ignore_ipolicy) SubmitOrSend(op, opts) return 0 def FailoverInstance(opts, args): """Failover an instance. The failover is done by shutting it down on its present node and starting it on the secondary. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the instance name @rtype: int @return: the desired exit code """ cl = GetClient() instance_name = args[0] force = opts.force iallocator = opts.iallocator target_node = opts.dst_node if iallocator and target_node: raise errors.OpPrereqError("Specify either an iallocator (-I), or a target" " node (-n) but not both", errors.ECODE_INVAL) if not force: _EnsureInstancesExist(cl, [instance_name]) usertext = ("Failover will happen to image %s." " This requires a shutdown of the instance. Continue?" % (instance_name,)) if not AskUser(usertext): return 1 op = opcodes.OpInstanceFailover(instance_name=instance_name, ignore_consistency=opts.ignore_consistency, shutdown_timeout=opts.shutdown_timeout, iallocator=iallocator, target_node=target_node, ignore_ipolicy=opts.ignore_ipolicy) SubmitOrSend(op, opts, cl=cl) return 0 def MigrateInstance(opts, args): """Migrate an instance. The migrate is done without shutdown. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the instance name @rtype: int @return: the desired exit code """ cl = GetClient() instance_name = args[0] force = opts.force iallocator = opts.iallocator target_node = opts.dst_node if iallocator and target_node: raise errors.OpPrereqError("Specify either an iallocator (-I), or a target" " node (-n) but not both", errors.ECODE_INVAL) if not force: _EnsureInstancesExist(cl, [instance_name]) if opts.cleanup: usertext = ("Instance %s will be recovered from a failed migration." " Note that the migration procedure (including cleanup)" % (instance_name,)) else: usertext = ("Instance %s will be migrated. Note that migration" % (instance_name,)) usertext += (" might impact the instance if anything goes wrong" " (e.g. due to bugs in the hypervisor). Continue?") if not AskUser(usertext): return 1 # this should be removed once --non-live is deprecated if not opts.live and opts.migration_mode is not None: raise errors.OpPrereqError("Only one of the --non-live and " "--migration-mode options can be passed", errors.ECODE_INVAL) if not opts.live: # --non-live passed mode = constants.HT_MIGRATION_NONLIVE else: mode = opts.migration_mode op = opcodes.OpInstanceMigrate(instance_name=instance_name, mode=mode, cleanup=opts.cleanup, iallocator=iallocator, target_node=target_node, allow_failover=opts.allow_failover, allow_runtime_changes=opts.allow_runtime_chgs, ignore_ipolicy=opts.ignore_ipolicy) SubmitOrSend(op, cl=cl, opts=opts) return 0 def MoveInstance(opts, args): """Move an instance. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the instance name @rtype: int @return: the desired exit code """ cl = GetClient() instance_name = args[0] force = opts.force if not force: usertext = ("Instance %s will be moved." " This requires a shutdown of the instance. Continue?" % (instance_name,)) if not AskUser(usertext): return 1 op = opcodes.OpInstanceMove(instance_name=instance_name, target_node=opts.node, shutdown_timeout=opts.shutdown_timeout, ignore_consistency=opts.ignore_consistency, ignore_ipolicy=opts.ignore_ipolicy) SubmitOrSend(op, opts, cl=cl) return 0 def ConnectToInstanceConsole(opts, args): """Connect to the console of an instance. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the instance name @rtype: int @return: the desired exit code """ instance_name = args[0] cl = GetClient() try: cluster_name = cl.QueryConfigValues(["cluster_name"])[0] ((console_data, oper_state), ) = \ cl.QueryInstances([instance_name], ["console", "oper_state"], False) finally: # Ensure client connection is closed while external commands are run cl.Close() del cl if not console_data: if oper_state: # Instance is running raise errors.OpExecError("Console information for instance %s is" " unavailable" % instance_name) else: raise errors.OpExecError("Instance %s is not running, can't get console" % instance_name) return _DoConsole(objects.InstanceConsole.FromDict(console_data), opts.show_command, cluster_name) def _DoConsole(console, show_command, cluster_name, feedback_fn=ToStdout, _runcmd_fn=utils.RunCmd): """Acts based on the result of L{opcodes.OpInstanceConsole}. @type console: L{objects.InstanceConsole} @param console: Console object @type show_command: bool @param show_command: Whether to just display commands @type cluster_name: string @param cluster_name: Cluster name as retrieved from master daemon """ assert console.Validate() if console.kind == constants.CONS_MESSAGE: feedback_fn(console.message) elif console.kind == constants.CONS_VNC: feedback_fn("Instance %s has VNC listening on %s:%s (display %s)," " URL ", console.instance, console.host, console.port, console.display, console.host, console.port) elif console.kind == constants.CONS_SPICE: feedback_fn("Instance %s has SPICE listening on %s:%s", console.instance, console.host, console.port) elif console.kind == constants.CONS_SSH: # Convert to string if not already one if isinstance(console.command, basestring): cmd = console.command else: cmd = utils.ShellQuoteArgs(console.command) srun = ssh.SshRunner(cluster_name=cluster_name) ssh_cmd = srun.BuildCmd(console.host, console.user, cmd, batch=True, quiet=False, tty=True) if show_command: feedback_fn(utils.ShellQuoteArgs(ssh_cmd)) else: result = _runcmd_fn(ssh_cmd, interactive=True) if result.failed: logging.error("Console command \"%s\" failed with reason '%s' and" " output %r", result.cmd, result.fail_reason, result.output) raise errors.OpExecError("Connection to console of instance %s failed," " please check cluster configuration" % console.instance) else: raise errors.GenericError("Unknown console type '%s'" % console.kind) return constants.EXIT_SUCCESS def _FormatDiskDetails(dev_type, dev, roman): """Formats the logical_id of a disk. """ if dev_type == constants.DT_DRBD8: drbd_info = dev["drbd_info"] data = [ ("nodeA", "%s, minor=%s" % (drbd_info["primary_node"], compat.TryToRoman(drbd_info["primary_minor"], convert=roman))), ("nodeB", "%s, minor=%s" % (drbd_info["secondary_node"], compat.TryToRoman(drbd_info["secondary_minor"], convert=roman))), ("port", str(compat.TryToRoman(drbd_info["port"], convert=roman))), ("auth key", str(drbd_info["secret"])), ] elif dev_type == constants.DT_PLAIN: vg_name, lv_name = dev["logical_id"] data = ["%s/%s" % (vg_name, lv_name)] else: data = [str(dev["logical_id"])] return data def _FormatListInfo(data): return list(str(i) for i in data) def _FormatBlockDevInfo(idx, top_level, dev, roman): """Show block device information. This is only used by L{ShowInstanceConfig}, but it's too big to be left for an inline definition. @type idx: int @param idx: the index of the current disk @type top_level: boolean @param top_level: if this a top-level disk? @type dev: dict @param dev: dictionary with disk information @type roman: boolean @param roman: whether to try to use roman integers @return: a list of either strings, tuples or lists (which should be formatted at a higher indent level) """ def helper(dtype, status): """Format one line for physical device status. @type dtype: str @param dtype: a constant from the L{constants.DTS_BLOCK} set @type status: tuple @param status: a tuple as returned from L{backend.FindBlockDevice} @return: the string representing the status """ if not status: return "not active" txt = "" (path, major, minor, syncp, estt, degr, ldisk_status) = status if major is None: major_string = "N/A" else: major_string = str(compat.TryToRoman(major, convert=roman)) if minor is None: minor_string = "N/A" else: minor_string = str(compat.TryToRoman(minor, convert=roman)) txt += ("%s (%s:%s)" % (path, major_string, minor_string)) if dtype in (constants.DT_DRBD8, ): if syncp is not None: sync_text = "*RECOVERING* %5.2f%%," % syncp if estt: sync_text += " ETA %ss" % compat.TryToRoman(estt, convert=roman) else: sync_text += " ETA unknown" else: sync_text = "in sync" if degr: degr_text = "*DEGRADED*" else: degr_text = "ok" if ldisk_status == constants.LDS_FAULTY: ldisk_text = " *MISSING DISK*" elif ldisk_status == constants.LDS_UNKNOWN: ldisk_text = " *UNCERTAIN STATE*" else: ldisk_text = "" txt += (" %s, status %s%s" % (sync_text, degr_text, ldisk_text)) elif dtype == constants.DT_PLAIN: if ldisk_status == constants.LDS_FAULTY: ldisk_text = " *FAILED* (failed drive?)" else: ldisk_text = "" txt += ldisk_text return txt # the header if top_level: if dev["iv_name"] is not None: txt = dev["iv_name"] else: txt = "disk %s" % compat.TryToRoman(idx, convert=roman) else: txt = "child %s" % compat.TryToRoman(idx, convert=roman) if isinstance(dev["size"], int): nice_size = utils.FormatUnit(dev["size"], "h") else: nice_size = str(dev["size"]) data = [(txt, "%s, size %s" % (dev["dev_type"], nice_size))] if top_level: if dev["spindles"] is not None: data.append(("spindles", dev["spindles"])) data.append(("access mode", dev["mode"])) if dev["logical_id"] is not None: try: l_id = _FormatDiskDetails(dev["dev_type"], dev, roman) except ValueError: l_id = [str(dev["logical_id"])] if len(l_id) == 1: data.append(("logical_id", l_id[0])) else: data.extend(l_id) elif dev["physical_id"] is not None: data.append(("physical_id:", _FormatListInfo(dev["physical_id"]))) if dev["pstatus"]: data.append(("on primary", helper(dev["dev_type"], dev["pstatus"]))) if dev["sstatus"]: data.append(("on secondary", helper(dev["dev_type"], dev["sstatus"]))) data.append(("name", dev["name"])) data.append(("UUID", dev["uuid"])) if dev["children"]: data.append(("child devices", [ _FormatBlockDevInfo(c_idx, False, child, roman) for c_idx, child in enumerate(dev["children"]) ])) return data def _FormatInstanceNicInfo(idx, nic): """Helper function for L{_FormatInstanceInfo()}""" (name, uuid, ip, mac, mode, link, _, netinfo) = nic network_name = None if netinfo: network_name = netinfo["name"] return [ ("nic/%d" % idx, ""), ("MAC", str(mac)), ("IP", str(ip)), ("mode", str(mode)), ("link", str(link)), ("network", str(network_name)), ("UUID", str(uuid)), ("name", str(name)), ] def _FormatInstanceNodesInfo(instance): """Helper function for L{_FormatInstanceInfo()}""" pgroup = ("%s (UUID %s)" % (instance["pnode_group_name"], instance["pnode_group_uuid"])) secs = utils.CommaJoin(("%s (group %s, group UUID %s)" % (name, group_name, group_uuid)) for (name, group_name, group_uuid) in zip(instance["snodes"], instance["snodes_group_names"], instance["snodes_group_uuids"])) return [ [ ("primary", instance["pnode"]), ("group", pgroup), ], [("secondaries", secs)], ] def _GetVncConsoleInfo(instance): """Helper function for L{_FormatInstanceInfo()}""" vnc_bind_address = instance["hv_actual"].get(constants.HV_VNC_BIND_ADDRESS, None) if vnc_bind_address: port = instance["network_port"] display = int(port) - constants.VNC_BASE_PORT if display > 0 and vnc_bind_address == constants.IP4_ADDRESS_ANY: vnc_console_port = "%s:%s (display %s)" % (instance["pnode"], port, display) elif display > 0 and netutils.IP4Address.IsValid(vnc_bind_address): vnc_console_port = ("%s:%s (node %s) (display %s)" % (vnc_bind_address, port, instance["pnode"], display)) else: # vnc bind address is a file vnc_console_port = "%s:%s" % (instance["pnode"], vnc_bind_address) ret = "vnc to %s" % vnc_console_port else: ret = None return ret def _FormatInstanceInfo(instance, roman_integers): """Format instance information for L{cli.PrintGenericInfo()}""" istate = "configured to be %s" % instance["config_state"] if instance["run_state"]: istate += ", actual state is %s" % instance["run_state"] info = [ ("Instance name", instance["name"]), ("UUID", instance["uuid"]), ("Serial number", str(compat.TryToRoman(instance["serial_no"], convert=roman_integers))), ("Creation time", utils.FormatTime(instance["ctime"])), ("Modification time", utils.FormatTime(instance["mtime"])), ("State", istate), ("Nodes", _FormatInstanceNodesInfo(instance)), ("Operating system", instance["os"]), ("Operating system parameters", FormatParamsDictInfo(instance["os_instance"], instance["os_actual"])), ] if "network_port" in instance: info.append(("Allocated network port", str(compat.TryToRoman(instance["network_port"], convert=roman_integers)))) info.append(("Hypervisor", instance["hypervisor"])) console = _GetVncConsoleInfo(instance) if console: info.append(("console connection", console)) # deprecated "memory" value, kept for one version for compatibility # TODO(ganeti 2.7) remove. be_actual = copy.deepcopy(instance["be_actual"]) be_actual["memory"] = be_actual[constants.BE_MAXMEM] info.extend([ ("Hypervisor parameters", FormatParamsDictInfo(instance["hv_instance"], instance["hv_actual"])), ("Back-end parameters", FormatParamsDictInfo(instance["be_instance"], be_actual)), ("NICs", [ _FormatInstanceNicInfo(idx, nic) for (idx, nic) in enumerate(instance["nics"]) ]), ("Disk template", instance["disk_template"]), ("Disks", [ _FormatBlockDevInfo(idx, True, device, roman_integers) for (idx, device) in enumerate(instance["disks"]) ]), ]) return info def ShowInstanceConfig(opts, args): """Compute instance run-time status. @param opts: the command line options selected by the user @type args: list @param args: either an empty list, and then we query all instances, or should contain a list of instance names @rtype: int @return: the desired exit code """ if not args and not opts.show_all: ToStderr("No instance selected." " Please pass in --all if you want to query all instances.\n" "Note that this can take a long time on a big cluster.") return 1 elif args and opts.show_all: ToStderr("Cannot use --all if you specify instance names.") return 1 retcode = 0 op = opcodes.OpInstanceQueryData(instances=args, static=opts.static, use_locking=not opts.static) result = SubmitOpCode(op, opts=opts) if not result: ToStdout("No instances.") return 1 PrintGenericInfo([ _FormatInstanceInfo(instance, opts.roman_integers) for instance in result.values() ]) return retcode def _ConvertNicDiskModifications(mods): """Converts NIC/disk modifications from CLI to opcode. When L{opcodes.OpInstanceSetParams} was changed to support adding/removing disks at arbitrary indices, its parameter format changed. This function converts legacy requests (e.g. "--net add" or "--disk add:size=4G") to the newer format and adds support for new-style requests (e.g. "--new 4:add"). @type mods: list of tuples @param mods: Modifications as given by command line parser @rtype: list of tuples @return: Modifications as understood by L{opcodes.OpInstanceSetParams} """ result = [] for (identifier, params) in mods: if identifier == constants.DDM_ADD: # Add item as last item (legacy interface) action = constants.DDM_ADD identifier = -1 elif identifier == constants.DDM_REMOVE: # Remove last item (legacy interface) action = constants.DDM_REMOVE identifier = -1 else: # Modifications and adding/removing at arbitrary indices add = params.pop(constants.DDM_ADD, _MISSING) remove = params.pop(constants.DDM_REMOVE, _MISSING) modify = params.pop(constants.DDM_MODIFY, _MISSING) if modify is _MISSING: if not (add is _MISSING or remove is _MISSING): raise errors.OpPrereqError("Cannot add and remove at the same time", errors.ECODE_INVAL) elif add is not _MISSING: action = constants.DDM_ADD elif remove is not _MISSING: action = constants.DDM_REMOVE else: action = constants.DDM_MODIFY elif add is _MISSING and remove is _MISSING: action = constants.DDM_MODIFY else: raise errors.OpPrereqError("Cannot modify and add/remove at the" " same time", errors.ECODE_INVAL) assert not (constants.DDMS_VALUES_WITH_MODIFY & set(params.keys())) if action == constants.DDM_REMOVE and params: raise errors.OpPrereqError("Not accepting parameters on removal", errors.ECODE_INVAL) result.append((action, identifier, params)) return result def _ParseDiskSizes(mods): """Parses disk sizes in parameters. """ for (action, _, params) in mods: if params and constants.IDISK_SIZE in params: params[constants.IDISK_SIZE] = \ utils.ParseUnit(params[constants.IDISK_SIZE]) elif action == constants.DDM_ADD: raise errors.OpPrereqError("Missing required parameter 'size'", errors.ECODE_INVAL) return mods def SetInstanceParams(opts, args): """Modifies an instance. All parameters take effect only at the next restart of the instance. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the instance name @rtype: int @return: the desired exit code """ if not (opts.nics or opts.disks or opts.disk_template or opts.hvparams or opts.beparams or opts.os or opts.osparams or opts.offline_inst or opts.online_inst or opts.runtime_mem or opts.new_primary_node): ToStderr("Please give at least one of the parameters.") return 1 for param in opts.beparams: if isinstance(opts.beparams[param], basestring): if opts.beparams[param].lower() == "default": opts.beparams[param] = constants.VALUE_DEFAULT utils.ForceDictType(opts.beparams, constants.BES_PARAMETER_COMPAT, allowed_values=[constants.VALUE_DEFAULT]) for param in opts.hvparams: if isinstance(opts.hvparams[param], basestring): if opts.hvparams[param].lower() == "default": opts.hvparams[param] = constants.VALUE_DEFAULT utils.ForceDictType(opts.hvparams, constants.HVS_PARAMETER_TYPES, allowed_values=[constants.VALUE_DEFAULT]) FixHvParams(opts.hvparams) nics = _ConvertNicDiskModifications(opts.nics) disks = _ParseDiskSizes(_ConvertNicDiskModifications(opts.disks)) if (opts.disk_template and opts.disk_template in constants.DTS_INT_MIRROR and not opts.node): ToStderr("Changing the disk template to a mirrored one requires" " specifying a secondary node") return 1 if opts.offline_inst: offline = True elif opts.online_inst: offline = False else: offline = None op = opcodes.OpInstanceSetParams(instance_name=args[0], nics=nics, disks=disks, disk_template=opts.disk_template, remote_node=opts.node, pnode=opts.new_primary_node, hvparams=opts.hvparams, beparams=opts.beparams, runtime_mem=opts.runtime_mem, os_name=opts.os, osparams=opts.osparams, force_variant=opts.force_variant, force=opts.force, wait_for_sync=opts.wait_for_sync, offline=offline, conflicts_check=opts.conflicts_check, ignore_ipolicy=opts.ignore_ipolicy) # even if here we process the result, we allow submit only result = SubmitOrSend(op, opts) if result: ToStdout("Modified instance %s", args[0]) for param, data in result: ToStdout(" - %-5s -> %s", param, data) ToStdout("Please don't forget that most parameters take effect" " only at the next (re)start of the instance initiated by" " ganeti; restarting from within the instance will" " not be enough.") return 0 def ChangeGroup(opts, args): """Moves an instance to another group. """ (instance_name, ) = args cl = GetClient() op = opcodes.OpInstanceChangeGroup(instance_name=instance_name, iallocator=opts.iallocator, target_groups=opts.to, early_release=opts.early_release) result = SubmitOrSend(op, opts, cl=cl) # Keep track of submitted jobs jex = JobExecutor(cl=cl, opts=opts) for (status, job_id) in result[constants.JOB_IDS_KEY]: jex.AddJobId(None, status, job_id) results = jex.GetResults() bad_cnt = len([row for row in results if not row[0]]) if bad_cnt == 0: ToStdout("Instance '%s' changed group successfully.", instance_name) rcode = constants.EXIT_SUCCESS else: ToStdout("There were %s errors while changing group of instance '%s'.", bad_cnt, instance_name) rcode = constants.EXIT_FAILURE return rcode # multi-instance selection options m_force_multi = cli_option("--force-multiple", dest="force_multi", help="Do not ask for confirmation when more than" " one instance is affected", action="store_true", default=False) m_pri_node_opt = cli_option("--primary", dest="multi_mode", help="Filter by nodes (primary only)", const=_EXPAND_NODES_PRI, action="store_const") m_sec_node_opt = cli_option("--secondary", dest="multi_mode", help="Filter by nodes (secondary only)", const=_EXPAND_NODES_SEC, action="store_const") m_node_opt = cli_option("--node", dest="multi_mode", help="Filter by nodes (primary and secondary)", const=_EXPAND_NODES_BOTH, action="store_const") m_clust_opt = cli_option("--all", dest="multi_mode", help="Select all instances in the cluster", const=_EXPAND_CLUSTER, action="store_const") m_inst_opt = cli_option("--instance", dest="multi_mode", help="Filter by instance name [default]", const=_EXPAND_INSTANCES, action="store_const") m_node_tags_opt = cli_option("--node-tags", dest="multi_mode", help="Filter by node tag", const=_EXPAND_NODES_BOTH_BY_TAGS, action="store_const") m_pri_node_tags_opt = cli_option("--pri-node-tags", dest="multi_mode", help="Filter by primary node tag", const=_EXPAND_NODES_PRI_BY_TAGS, action="store_const") m_sec_node_tags_opt = cli_option("--sec-node-tags", dest="multi_mode", help="Filter by secondary node tag", const=_EXPAND_NODES_SEC_BY_TAGS, action="store_const") m_inst_tags_opt = cli_option("--tags", dest="multi_mode", help="Filter by instance tag", const=_EXPAND_INSTANCES_BY_TAGS, action="store_const") # this is defined separately due to readability only add_opts = [ NOSTART_OPT, OS_OPT, FORCE_VARIANT_OPT, NO_INSTALL_OPT, IGNORE_IPOLICY_OPT, ] commands = { "add": ( AddInstance, [ArgHost(min=1, max=1)], COMMON_CREATE_OPTS + add_opts, "[...] -t disk-type -n node[:secondary-node] -o os-type ", "Creates and adds a new instance to the cluster"), "batch-create": ( BatchCreate, [ArgFile(min=1, max=1)], [DRY_RUN_OPT, PRIORITY_OPT, IALLOCATOR_OPT] + SUBMIT_OPTS, "", "Create a bunch of instances based on specs in the file."), "console": ( ConnectToInstanceConsole, ARGS_ONE_INSTANCE, [SHOWCMD_OPT, PRIORITY_OPT], "[--show-cmd] ", "Opens a console on the specified instance"), "failover": ( FailoverInstance, ARGS_ONE_INSTANCE, [FORCE_OPT, IGNORE_CONSIST_OPT] + SUBMIT_OPTS + [SHUTDOWN_TIMEOUT_OPT, DRY_RUN_OPT, PRIORITY_OPT, DST_NODE_OPT, IALLOCATOR_OPT, IGNORE_IPOLICY_OPT, CLEANUP_OPT], "[-f] ", "Stops the instance, changes its primary node and" " (if it was originally running) starts it on the new node" " (the secondary for mirrored instances or any node" " for shared storage)."), "migrate": ( MigrateInstance, ARGS_ONE_INSTANCE, [FORCE_OPT, NONLIVE_OPT, MIGRATION_MODE_OPT, CLEANUP_OPT, DRY_RUN_OPT, PRIORITY_OPT, DST_NODE_OPT, IALLOCATOR_OPT, ALLOW_FAILOVER_OPT, IGNORE_IPOLICY_OPT, NORUNTIME_CHGS_OPT] + SUBMIT_OPTS, "[-f] ", "Migrate instance to its secondary node" " (only for mirrored instances)"), "move": ( MoveInstance, ARGS_ONE_INSTANCE, [FORCE_OPT] + SUBMIT_OPTS + [SINGLE_NODE_OPT, SHUTDOWN_TIMEOUT_OPT, DRY_RUN_OPT, PRIORITY_OPT, IGNORE_CONSIST_OPT, IGNORE_IPOLICY_OPT], "[-f] ", "Move instance to an arbitrary node" " (only for instances of type file and lv)"), "info": ( ShowInstanceConfig, ARGS_MANY_INSTANCES, [STATIC_OPT, ALL_OPT, ROMAN_OPT, PRIORITY_OPT], "[-s] {--all | ...}", "Show information on the specified instance(s)"), "list": ( ListInstances, ARGS_MANY_INSTANCES, [NOHDR_OPT, SEP_OPT, USEUNITS_OPT, FIELDS_OPT, VERBOSE_OPT, FORCE_FILTER_OPT], "[...]", "Lists the instances and their status. The available fields can be shown" " using the \"list-fields\" command (see the man page for details)." " The default field list is (in order): %s." % utils.CommaJoin(_LIST_DEF_FIELDS), ), "list-fields": ( ListInstanceFields, [ArgUnknown()], [NOHDR_OPT, SEP_OPT], "[fields...]", "Lists all available fields for instances"), "reinstall": ( ReinstallInstance, [ArgInstance()], [FORCE_OPT, OS_OPT, FORCE_VARIANT_OPT, m_force_multi, m_node_opt, m_pri_node_opt, m_sec_node_opt, m_clust_opt, m_inst_opt, m_node_tags_opt, m_pri_node_tags_opt, m_sec_node_tags_opt, m_inst_tags_opt, SELECT_OS_OPT] + SUBMIT_OPTS + [DRY_RUN_OPT, PRIORITY_OPT, OSPARAMS_OPT], "[-f] ", "Reinstall a stopped instance"), "remove": ( RemoveInstance, ARGS_ONE_INSTANCE, [FORCE_OPT, SHUTDOWN_TIMEOUT_OPT, IGNORE_FAILURES_OPT] + SUBMIT_OPTS + [DRY_RUN_OPT, PRIORITY_OPT], "[-f] ", "Shuts down the instance and removes it"), "rename": ( RenameInstance, [ArgInstance(min=1, max=1), ArgHost(min=1, max=1)], [NOIPCHECK_OPT, NONAMECHECK_OPT] + SUBMIT_OPTS + [DRY_RUN_OPT, PRIORITY_OPT], " ", "Rename the instance"), "replace-disks": ( ReplaceDisks, ARGS_ONE_INSTANCE, [AUTO_REPLACE_OPT, DISKIDX_OPT, IALLOCATOR_OPT, EARLY_RELEASE_OPT, NEW_SECONDARY_OPT, ON_PRIMARY_OPT, ON_SECONDARY_OPT] + SUBMIT_OPTS + [DRY_RUN_OPT, PRIORITY_OPT, IGNORE_IPOLICY_OPT], "[-s|-p|-a|-n NODE|-I NAME] ", "Replaces disks for the instance"), "modify": ( SetInstanceParams, ARGS_ONE_INSTANCE, [BACKEND_OPT, DISK_OPT, FORCE_OPT, HVOPTS_OPT, NET_OPT] + SUBMIT_OPTS + [DISK_TEMPLATE_OPT, SINGLE_NODE_OPT, OS_OPT, FORCE_VARIANT_OPT, OSPARAMS_OPT, DRY_RUN_OPT, PRIORITY_OPT, NWSYNC_OPT, OFFLINE_INST_OPT, ONLINE_INST_OPT, IGNORE_IPOLICY_OPT, RUNTIME_MEM_OPT, NOCONFLICTSCHECK_OPT, NEW_PRIMARY_OPT], "", "Alters the parameters of an instance"), "shutdown": ( GenericManyOps("shutdown", _ShutdownInstance), [ArgInstance()], [FORCE_OPT, m_node_opt, m_pri_node_opt, m_sec_node_opt, m_clust_opt, m_node_tags_opt, m_pri_node_tags_opt, m_sec_node_tags_opt, m_inst_tags_opt, m_inst_opt, m_force_multi, TIMEOUT_OPT] + SUBMIT_OPTS + [DRY_RUN_OPT, PRIORITY_OPT, IGNORE_OFFLINE_OPT, NO_REMEMBER_OPT], "", "Stops an instance"), "startup": ( GenericManyOps("startup", _StartupInstance), [ArgInstance()], [FORCE_OPT, m_force_multi, m_node_opt, m_pri_node_opt, m_sec_node_opt, m_node_tags_opt, m_pri_node_tags_opt, m_sec_node_tags_opt, m_inst_tags_opt, m_clust_opt, m_inst_opt] + SUBMIT_OPTS + [HVOPTS_OPT, BACKEND_OPT, DRY_RUN_OPT, PRIORITY_OPT, IGNORE_OFFLINE_OPT, NO_REMEMBER_OPT, STARTUP_PAUSED_OPT], "", "Starts an instance"), "reboot": ( GenericManyOps("reboot", _RebootInstance), [ArgInstance()], [m_force_multi, REBOOT_TYPE_OPT, IGNORE_SECONDARIES_OPT, m_node_opt, m_pri_node_opt, m_sec_node_opt, m_clust_opt, m_inst_opt] + SUBMIT_OPTS + [m_node_tags_opt, m_pri_node_tags_opt, m_sec_node_tags_opt, m_inst_tags_opt, SHUTDOWN_TIMEOUT_OPT, DRY_RUN_OPT, PRIORITY_OPT], "", "Reboots an instance"), "activate-disks": ( ActivateDisks, ARGS_ONE_INSTANCE, SUBMIT_OPTS + [IGNORE_SIZE_OPT, PRIORITY_OPT, WFSYNC_OPT], "", "Activate an instance's disks"), "deactivate-disks": ( DeactivateDisks, ARGS_ONE_INSTANCE, [FORCE_OPT] + SUBMIT_OPTS + [DRY_RUN_OPT, PRIORITY_OPT], "[-f] ", "Deactivate an instance's disks"), "recreate-disks": ( RecreateDisks, ARGS_ONE_INSTANCE, SUBMIT_OPTS + [DISK_OPT, NODE_PLACEMENT_OPT, DRY_RUN_OPT, PRIORITY_OPT, IALLOCATOR_OPT], "", "Recreate an instance's disks"), "grow-disk": ( GrowDisk, [ArgInstance(min=1, max=1), ArgUnknown(min=1, max=1), ArgUnknown(min=1, max=1)], SUBMIT_OPTS + [NWSYNC_OPT, DRY_RUN_OPT, PRIORITY_OPT, ABSOLUTE_OPT], " ", "Grow an instance's disk"), "change-group": ( ChangeGroup, ARGS_ONE_INSTANCE, [TO_GROUP_OPT, IALLOCATOR_OPT, EARLY_RELEASE_OPT, PRIORITY_OPT] + SUBMIT_OPTS, "[-I ] [--to ]", "Change group of instance"), "list-tags": ( ListTags, ARGS_ONE_INSTANCE, [], "", "List the tags of the given instance"), "add-tags": ( AddTags, [ArgInstance(min=1, max=1), ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS, " tag...", "Add tags to the given instance"), "remove-tags": ( RemoveTags, [ArgInstance(min=1, max=1), ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS, " tag...", "Remove tags from given instance"), } #: dictionary with aliases for commands aliases = { "start": "startup", "stop": "shutdown", "show": "info", } def Main(): return GenericMain(commands, aliases=aliases, override={"tag_type": constants.TAG_INSTANCE}, env_override=_ENV_OVERRIDE) ganeti-2.9.3/lib/client/gnt_cluster.py0000644000000000000000000015261212271422343017730 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Cluster related commands""" # pylint: disable=W0401,W0613,W0614,C0103 # W0401: Wildcard import ganeti.cli # W0613: Unused argument, since all functions follow the same API # W0614: Unused import %s from wildcard import (since we need cli) # C0103: Invalid name gnt-cluster from cStringIO import StringIO import os.path import time import OpenSSL import itertools from ganeti.cli import * from ganeti import opcodes from ganeti import constants from ganeti import errors from ganeti import utils from ganeti import bootstrap from ganeti import ssh from ganeti import objects from ganeti import uidpool from ganeti import compat from ganeti import netutils from ganeti import pathutils ON_OPT = cli_option("--on", default=False, action="store_true", dest="on", help="Recover from an EPO") GROUPS_OPT = cli_option("--groups", default=False, action="store_true", dest="groups", help="Arguments are node groups instead of nodes") FORCE_FAILOVER = cli_option("--yes-do-it", dest="yes_do_it", help="Override interactive check for --no-voting", default=False, action="store_true") _EPO_PING_INTERVAL = 30 # 30 seconds between pings _EPO_PING_TIMEOUT = 1 # 1 second _EPO_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes def _CheckNoLvmStorageOptDeprecated(opts): """Checks if the legacy option '--no-lvm-storage' is used. """ if not opts.lvm_storage: ToStderr("The option --no-lvm-storage is no longer supported. If you want" " to disable lvm-based storage cluster-wide, use the option" " --enabled-disk-templates to disable all of these lvm-base disk " " templates: %s" % utils.CommaJoin(utils.GetLvmDiskTemplates())) return 1 @UsesRPC def InitCluster(opts, args): """Initialize the cluster. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the desired cluster name @rtype: int @return: the desired exit code """ if _CheckNoLvmStorageOptDeprecated(opts): return 1 enabled_disk_templates = opts.enabled_disk_templates if enabled_disk_templates: enabled_disk_templates = enabled_disk_templates.split(",") else: enabled_disk_templates = constants.DEFAULT_ENABLED_DISK_TEMPLATES vg_name = None if opts.vg_name is not None: vg_name = opts.vg_name if vg_name: if not utils.IsLvmEnabled(enabled_disk_templates): ToStdout("You specified a volume group with --vg-name, but you did not" " enable any disk template that uses lvm.") else: if utils.IsLvmEnabled(enabled_disk_templates): ToStderr("LVM disk templates are enabled, but vg name not set.") return 1 else: if utils.IsLvmEnabled(enabled_disk_templates): vg_name = constants.DEFAULT_VG if not opts.drbd_storage and opts.drbd_helper: ToStderr("Options --no-drbd-storage and --drbd-usermode-helper conflict.") return 1 drbd_helper = opts.drbd_helper if opts.drbd_storage and not opts.drbd_helper: drbd_helper = constants.DEFAULT_DRBD_HELPER master_netdev = opts.master_netdev if master_netdev is None: master_netdev = constants.DEFAULT_BRIDGE hvlist = opts.enabled_hypervisors if hvlist is None: hvlist = constants.DEFAULT_ENABLED_HYPERVISOR hvlist = hvlist.split(",") hvparams = dict(opts.hvparams) beparams = opts.beparams nicparams = opts.nicparams diskparams = dict(opts.diskparams) # check the disk template types here, as we cannot rely on the type check done # by the opcode parameter types diskparams_keys = set(diskparams.keys()) if not (diskparams_keys <= constants.DISK_TEMPLATES): unknown = utils.NiceSort(diskparams_keys - constants.DISK_TEMPLATES) ToStderr("Disk templates unknown: %s" % utils.CommaJoin(unknown)) return 1 # prepare beparams dict beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams) utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT) # prepare nicparams dict nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams) utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES) # prepare ndparams dict if opts.ndparams is None: ndparams = dict(constants.NDC_DEFAULTS) else: ndparams = objects.FillDict(constants.NDC_DEFAULTS, opts.ndparams) utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES) # prepare hvparams dict for hv in constants.HYPER_TYPES: if hv not in hvparams: hvparams[hv] = {} hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv]) utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES) # prepare diskparams dict for templ in constants.DISK_TEMPLATES: if templ not in diskparams: diskparams[templ] = {} diskparams[templ] = objects.FillDict(constants.DISK_DT_DEFAULTS[templ], diskparams[templ]) utils.ForceDictType(diskparams[templ], constants.DISK_DT_TYPES) # prepare ipolicy dict ipolicy = CreateIPolicyFromOpts( ispecs_mem_size=opts.ispecs_mem_size, ispecs_cpu_count=opts.ispecs_cpu_count, ispecs_disk_count=opts.ispecs_disk_count, ispecs_disk_size=opts.ispecs_disk_size, ispecs_nic_count=opts.ispecs_nic_count, minmax_ispecs=opts.ipolicy_bounds_specs, std_ispecs=opts.ipolicy_std_specs, ipolicy_disk_templates=opts.ipolicy_disk_templates, ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio, ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio, fill_all=True) if opts.candidate_pool_size is None: opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT if opts.mac_prefix is None: opts.mac_prefix = constants.DEFAULT_MAC_PREFIX uid_pool = opts.uid_pool if uid_pool is not None: uid_pool = uidpool.ParseUidPool(uid_pool) if opts.prealloc_wipe_disks is None: opts.prealloc_wipe_disks = False external_ip_setup_script = opts.use_external_mip_script if external_ip_setup_script is None: external_ip_setup_script = False try: primary_ip_version = int(opts.primary_ip_version) except (ValueError, TypeError), err: ToStderr("Invalid primary ip version value: %s" % str(err)) return 1 master_netmask = opts.master_netmask try: if master_netmask is not None: master_netmask = int(master_netmask) except (ValueError, TypeError), err: ToStderr("Invalid master netmask value: %s" % str(err)) return 1 if opts.disk_state: disk_state = utils.FlatToDict(opts.disk_state) else: disk_state = {} hv_state = dict(opts.hv_state) bootstrap.InitCluster(cluster_name=args[0], secondary_ip=opts.secondary_ip, vg_name=vg_name, mac_prefix=opts.mac_prefix, master_netmask=master_netmask, master_netdev=master_netdev, file_storage_dir=opts.file_storage_dir, shared_file_storage_dir=opts.shared_file_storage_dir, enabled_hypervisors=hvlist, hvparams=hvparams, beparams=beparams, nicparams=nicparams, ndparams=ndparams, diskparams=diskparams, ipolicy=ipolicy, candidate_pool_size=opts.candidate_pool_size, modify_etc_hosts=opts.modify_etc_hosts, modify_ssh_setup=opts.modify_ssh_setup, maintain_node_health=opts.maintain_node_health, drbd_helper=drbd_helper, uid_pool=uid_pool, default_iallocator=opts.default_iallocator, primary_ip_version=primary_ip_version, prealloc_wipe_disks=opts.prealloc_wipe_disks, use_external_mip_script=external_ip_setup_script, hv_state=hv_state, disk_state=disk_state, enabled_disk_templates=enabled_disk_templates, ) op = opcodes.OpClusterPostInit() SubmitOpCode(op, opts=opts) return 0 @UsesRPC def DestroyCluster(opts, args): """Destroy the cluster. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ if not opts.yes_do_it: ToStderr("Destroying a cluster is irreversible. If you really want" " destroy this cluster, supply the --yes-do-it option.") return 1 op = opcodes.OpClusterDestroy() master_uuid = SubmitOpCode(op, opts=opts) # if we reached this, the opcode didn't fail; we can proceed to # shutdown all the daemons bootstrap.FinalizeClusterDestroy(master_uuid) return 0 def RenameCluster(opts, args): """Rename the cluster. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the new cluster name @rtype: int @return: the desired exit code """ cl = GetClient() (cluster_name, ) = cl.QueryConfigValues(["cluster_name"]) new_name = args[0] if not opts.force: usertext = ("This will rename the cluster from '%s' to '%s'. If you are" " connected over the network to the cluster name, the" " operation is very dangerous as the IP address will be" " removed from the node and the change may not go through." " Continue?") % (cluster_name, new_name) if not AskUser(usertext): return 1 op = opcodes.OpClusterRename(name=new_name) result = SubmitOpCode(op, opts=opts, cl=cl) if result: ToStdout("Cluster renamed from '%s' to '%s'", cluster_name, result) return 0 def ActivateMasterIp(opts, args): """Activates the master IP. """ op = opcodes.OpClusterActivateMasterIp() SubmitOpCode(op) return 0 def DeactivateMasterIp(opts, args): """Deactivates the master IP. """ if not opts.confirm: usertext = ("This will disable the master IP. All the open connections to" " the master IP will be closed. To reach the master you will" " need to use its node IP." " Continue?") if not AskUser(usertext): return 1 op = opcodes.OpClusterDeactivateMasterIp() SubmitOpCode(op) return 0 def RedistributeConfig(opts, args): """Forces push of the cluster configuration. @param opts: the command line options selected by the user @type args: list @param args: empty list @rtype: int @return: the desired exit code """ op = opcodes.OpClusterRedistConf() SubmitOrSend(op, opts) return 0 def ShowClusterVersion(opts, args): """Write version of ganeti software to the standard output. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ cl = GetClient(query=True) result = cl.QueryClusterInfo() ToStdout("Software version: %s", result["software_version"]) ToStdout("Internode protocol: %s", result["protocol_version"]) ToStdout("Configuration format: %s", result["config_version"]) ToStdout("OS api version: %s", result["os_api_version"]) ToStdout("Export interface: %s", result["export_version"]) ToStdout("VCS version: %s", result["vcs_version"]) return 0 def ShowClusterMaster(opts, args): """Write name of master node to the standard output. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ master = bootstrap.GetMaster() ToStdout(master) return 0 def _FormatGroupedParams(paramsdict, roman=False): """Format Grouped parameters (be, nic, disk) by group. @type paramsdict: dict of dicts @param paramsdict: {group: {param: value, ...}, ...} @rtype: dict of dicts @return: copy of the input dictionaries with strings as values """ ret = {} for (item, val) in paramsdict.items(): if isinstance(val, dict): ret[item] = _FormatGroupedParams(val, roman=roman) elif roman and isinstance(val, int): ret[item] = compat.TryToRoman(val) else: ret[item] = str(val) return ret def ShowClusterConfig(opts, args): """Shows cluster information. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ cl = GetClient(query=True) result = cl.QueryClusterInfo() if result["tags"]: tags = utils.CommaJoin(utils.NiceSort(result["tags"])) else: tags = "(none)" if result["reserved_lvs"]: reserved_lvs = utils.CommaJoin(result["reserved_lvs"]) else: reserved_lvs = "(none)" enabled_hv = result["enabled_hypervisors"] hvparams = dict((k, v) for k, v in result["hvparams"].iteritems() if k in enabled_hv) info = [ ("Cluster name", result["name"]), ("Cluster UUID", result["uuid"]), ("Creation time", utils.FormatTime(result["ctime"])), ("Modification time", utils.FormatTime(result["mtime"])), ("Master node", result["master"]), ("Architecture (this node)", "%s (%s)" % (result["architecture"][0], result["architecture"][1])), ("Tags", tags), ("Default hypervisor", result["default_hypervisor"]), ("Enabled hypervisors", utils.CommaJoin(enabled_hv)), ("Hypervisor parameters", _FormatGroupedParams(hvparams)), ("OS-specific hypervisor parameters", _FormatGroupedParams(result["os_hvp"])), ("OS parameters", _FormatGroupedParams(result["osparams"])), ("Hidden OSes", utils.CommaJoin(result["hidden_os"])), ("Blacklisted OSes", utils.CommaJoin(result["blacklisted_os"])), ("Cluster parameters", [ ("candidate pool size", compat.TryToRoman(result["candidate_pool_size"], convert=opts.roman_integers)), ("master netdev", result["master_netdev"]), ("master netmask", result["master_netmask"]), ("use external master IP address setup script", result["use_external_mip_script"]), ("lvm volume group", result["volume_group_name"]), ("lvm reserved volumes", reserved_lvs), ("drbd usermode helper", result["drbd_usermode_helper"]), ("file storage path", result["file_storage_dir"]), ("shared file storage path", result["shared_file_storage_dir"]), ("maintenance of node health", result["maintain_node_health"]), ("uid pool", uidpool.FormatUidPool(result["uid_pool"])), ("default instance allocator", result["default_iallocator"]), ("primary ip version", result["primary_ip_version"]), ("preallocation wipe disks", result["prealloc_wipe_disks"]), ("OS search path", utils.CommaJoin(pathutils.OS_SEARCH_PATH)), ("ExtStorage Providers search path", utils.CommaJoin(pathutils.ES_SEARCH_PATH)), ("enabled disk templates", utils.CommaJoin(result["enabled_disk_templates"])), ]), ("Default node parameters", _FormatGroupedParams(result["ndparams"], roman=opts.roman_integers)), ("Default instance parameters", _FormatGroupedParams(result["beparams"], roman=opts.roman_integers)), ("Default nic parameters", _FormatGroupedParams(result["nicparams"], roman=opts.roman_integers)), ("Default disk parameters", _FormatGroupedParams(result["diskparams"], roman=opts.roman_integers)), ("Instance policy - limits for instances", FormatPolicyInfo(result["ipolicy"], None, True)), ] PrintGenericInfo(info) return 0 def ClusterCopyFile(opts, args): """Copy a file from master to some nodes. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the path of the file to be copied @rtype: int @return: the desired exit code """ filename = args[0] if not os.path.exists(filename): raise errors.OpPrereqError("No such filename '%s'" % filename, errors.ECODE_INVAL) cl = GetClient() cluster_name = cl.QueryConfigValues(["cluster_name"])[0] results = GetOnlineNodes(nodes=opts.nodes, cl=cl, filter_master=True, secondary_ips=opts.use_replication_network, nodegroup=opts.nodegroup) srun = ssh.SshRunner(cluster_name) for node in results: if not srun.CopyFileToNode(node, filename): ToStderr("Copy of file %s to node %s failed", filename, node) return 0 def RunClusterCommand(opts, args): """Run a command on some nodes. @param opts: the command line options selected by the user @type args: list @param args: should contain the command to be run and its arguments @rtype: int @return: the desired exit code """ cl = GetClient() command = " ".join(args) nodes = GetOnlineNodes(nodes=opts.nodes, cl=cl, nodegroup=opts.nodegroup) cluster_name, master_node = cl.QueryConfigValues(["cluster_name", "master_node"]) srun = ssh.SshRunner(cluster_name=cluster_name) # Make sure master node is at list end if master_node in nodes: nodes.remove(master_node) nodes.append(master_node) for name in nodes: result = srun.Run(name, constants.SSH_LOGIN_USER, command) if opts.failure_only and result.exit_code == constants.EXIT_SUCCESS: # Do not output anything for successful commands continue ToStdout("------------------------------------------------") if opts.show_machine_names: for line in result.output.splitlines(): ToStdout("%s: %s", name, line) else: ToStdout("node: %s", name) ToStdout("%s", result.output) ToStdout("return code = %s", result.exit_code) return 0 def VerifyCluster(opts, args): """Verify integrity of cluster, performing various test on nodes. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ skip_checks = [] if opts.skip_nplusone_mem: skip_checks.append(constants.VERIFY_NPLUSONE_MEM) cl = GetClient() op = opcodes.OpClusterVerify(verbose=opts.verbose, error_codes=opts.error_codes, debug_simulate_errors=opts.simulate_errors, skip_checks=skip_checks, ignore_errors=opts.ignore_errors, group_name=opts.nodegroup) result = SubmitOpCode(op, cl=cl, opts=opts) # Keep track of submitted jobs jex = JobExecutor(cl=cl, opts=opts) for (status, job_id) in result[constants.JOB_IDS_KEY]: jex.AddJobId(None, status, job_id) results = jex.GetResults() (bad_jobs, bad_results) = \ map(len, # Convert iterators to lists map(list, # Count errors map(compat.partial(itertools.ifilterfalse, bool), # Convert result to booleans in a tuple zip(*((job_success, len(op_results) == 1 and op_results[0]) for (job_success, op_results) in results))))) if bad_jobs == 0 and bad_results == 0: rcode = constants.EXIT_SUCCESS else: rcode = constants.EXIT_FAILURE if bad_jobs > 0: ToStdout("%s job(s) failed while verifying the cluster.", bad_jobs) return rcode def VerifyDisks(opts, args): """Verify integrity of cluster disks. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ cl = GetClient() op = opcodes.OpClusterVerifyDisks() result = SubmitOpCode(op, cl=cl, opts=opts) # Keep track of submitted jobs jex = JobExecutor(cl=cl, opts=opts) for (status, job_id) in result[constants.JOB_IDS_KEY]: jex.AddJobId(None, status, job_id) retcode = constants.EXIT_SUCCESS for (status, result) in jex.GetResults(): if not status: ToStdout("Job failed: %s", result) continue ((bad_nodes, instances, missing), ) = result for node, text in bad_nodes.items(): ToStdout("Error gathering data on node %s: %s", node, utils.SafeEncode(text[-400:])) retcode = constants.EXIT_FAILURE ToStdout("You need to fix these nodes first before fixing instances") for iname in instances: if iname in missing: continue op = opcodes.OpInstanceActivateDisks(instance_name=iname) try: ToStdout("Activating disks for instance '%s'", iname) SubmitOpCode(op, opts=opts, cl=cl) except errors.GenericError, err: nret, msg = FormatError(err) retcode |= nret ToStderr("Error activating disks for instance %s: %s", iname, msg) if missing: for iname, ival in missing.iteritems(): all_missing = compat.all(x[0] in bad_nodes for x in ival) if all_missing: ToStdout("Instance %s cannot be verified as it lives on" " broken nodes", iname) else: ToStdout("Instance %s has missing logical volumes:", iname) ival.sort() for node, vol in ival: if node in bad_nodes: ToStdout("\tbroken node %s /dev/%s", node, vol) else: ToStdout("\t%s /dev/%s", node, vol) ToStdout("You need to replace or recreate disks for all the above" " instances if this message persists after fixing broken nodes.") retcode = constants.EXIT_FAILURE elif not instances: ToStdout("No disks need to be activated.") return retcode def RepairDiskSizes(opts, args): """Verify sizes of cluster disks. @param opts: the command line options selected by the user @type args: list @param args: optional list of instances to restrict check to @rtype: int @return: the desired exit code """ op = opcodes.OpClusterRepairDiskSizes(instances=args) SubmitOpCode(op, opts=opts) @UsesRPC def MasterFailover(opts, args): """Failover the master node. This command, when run on a non-master node, will cause the current master to cease being master, and the non-master to become new master. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ if opts.no_voting and not opts.yes_do_it: usertext = ("This will perform the failover even if most other nodes" " are down, or if this node is outdated. This is dangerous" " as it can lead to a non-consistent cluster. Check the" " gnt-cluster(8) man page before proceeding. Continue?") if not AskUser(usertext): return 1 return bootstrap.MasterFailover(no_voting=opts.no_voting) def MasterPing(opts, args): """Checks if the master is alive. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ try: cl = GetClient() cl.QueryClusterInfo() return 0 except Exception: # pylint: disable=W0703 return 1 def SearchTags(opts, args): """Searches the tags on all the cluster. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the tag pattern @rtype: int @return: the desired exit code """ op = opcodes.OpTagsSearch(pattern=args[0]) result = SubmitOpCode(op, opts=opts) if not result: return 1 result = list(result) result.sort() for path, tag in result: ToStdout("%s %s", path, tag) def _ReadAndVerifyCert(cert_filename, verify_private_key=False): """Reads and verifies an X509 certificate. @type cert_filename: string @param cert_filename: the path of the file containing the certificate to verify encoded in PEM format @type verify_private_key: bool @param verify_private_key: whether to verify the private key in addition to the public certificate @rtype: string @return: a string containing the PEM-encoded certificate. """ try: pem = utils.ReadFile(cert_filename) except IOError, err: raise errors.X509CertError(cert_filename, "Unable to read certificate: %s" % str(err)) try: OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, pem) except Exception, err: raise errors.X509CertError(cert_filename, "Unable to load certificate: %s" % str(err)) if verify_private_key: try: OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, pem) except Exception, err: raise errors.X509CertError(cert_filename, "Unable to load private key: %s" % str(err)) return pem def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911 rapi_cert_filename, new_spice_cert, spice_cert_filename, spice_cacert_filename, new_confd_hmac_key, new_cds, cds_filename, force): """Renews cluster certificates, keys and secrets. @type new_cluster_cert: bool @param new_cluster_cert: Whether to generate a new cluster certificate @type new_rapi_cert: bool @param new_rapi_cert: Whether to generate a new RAPI certificate @type rapi_cert_filename: string @param rapi_cert_filename: Path to file containing new RAPI certificate @type new_spice_cert: bool @param new_spice_cert: Whether to generate a new SPICE certificate @type spice_cert_filename: string @param spice_cert_filename: Path to file containing new SPICE certificate @type spice_cacert_filename: string @param spice_cacert_filename: Path to file containing the certificate of the CA that signed the SPICE certificate @type new_confd_hmac_key: bool @param new_confd_hmac_key: Whether to generate a new HMAC key @type new_cds: bool @param new_cds: Whether to generate a new cluster domain secret @type cds_filename: string @param cds_filename: Path to file containing new cluster domain secret @type force: bool @param force: Whether to ask user for confirmation """ if new_rapi_cert and rapi_cert_filename: ToStderr("Only one of the --new-rapi-certificate and --rapi-certificate" " options can be specified at the same time.") return 1 if new_cds and cds_filename: ToStderr("Only one of the --new-cluster-domain-secret and" " --cluster-domain-secret options can be specified at" " the same time.") return 1 if new_spice_cert and (spice_cert_filename or spice_cacert_filename): ToStderr("When using --new-spice-certificate, the --spice-certificate" " and --spice-ca-certificate must not be used.") return 1 if bool(spice_cacert_filename) ^ bool(spice_cert_filename): ToStderr("Both --spice-certificate and --spice-ca-certificate must be" " specified.") return 1 rapi_cert_pem, spice_cert_pem, spice_cacert_pem = (None, None, None) try: if rapi_cert_filename: rapi_cert_pem = _ReadAndVerifyCert(rapi_cert_filename, True) if spice_cert_filename: spice_cert_pem = _ReadAndVerifyCert(spice_cert_filename, True) spice_cacert_pem = _ReadAndVerifyCert(spice_cacert_filename) except errors.X509CertError, err: ToStderr("Unable to load X509 certificate from %s: %s", err[0], err[1]) return 1 if cds_filename: try: cds = utils.ReadFile(cds_filename) except Exception, err: # pylint: disable=W0703 ToStderr("Can't load new cluster domain secret from %s: %s" % (cds_filename, str(err))) return 1 else: cds = None if not force: usertext = ("This requires all daemons on all nodes to be restarted and" " may take some time. Continue?") if not AskUser(usertext): return 1 def _RenewCryptoInner(ctx): ctx.feedback_fn("Updating certificates and keys") bootstrap.GenerateClusterCrypto(new_cluster_cert, new_rapi_cert, new_spice_cert, new_confd_hmac_key, new_cds, rapi_cert_pem=rapi_cert_pem, spice_cert_pem=spice_cert_pem, spice_cacert_pem=spice_cacert_pem, cds=cds) files_to_copy = [] if new_cluster_cert: files_to_copy.append(pathutils.NODED_CERT_FILE) if new_rapi_cert or rapi_cert_pem: files_to_copy.append(pathutils.RAPI_CERT_FILE) if new_spice_cert or spice_cert_pem: files_to_copy.append(pathutils.SPICE_CERT_FILE) files_to_copy.append(pathutils.SPICE_CACERT_FILE) if new_confd_hmac_key: files_to_copy.append(pathutils.CONFD_HMAC_KEY) if new_cds or cds: files_to_copy.append(pathutils.CLUSTER_DOMAIN_SECRET_FILE) if files_to_copy: for node_name in ctx.nonmaster_nodes: ctx.feedback_fn("Copying %s to %s" % (", ".join(files_to_copy), node_name)) for file_name in files_to_copy: ctx.ssh.CopyFileToNode(node_name, file_name) RunWhileClusterStopped(ToStdout, _RenewCryptoInner) ToStdout("All requested certificates and keys have been replaced." " Running \"gnt-cluster verify\" now is recommended.") return 0 def RenewCrypto(opts, args): """Renews cluster certificates, keys and secrets. """ return _RenewCrypto(opts.new_cluster_cert, opts.new_rapi_cert, opts.rapi_cert, opts.new_spice_cert, opts.spice_cert, opts.spice_cacert, opts.new_confd_hmac_key, opts.new_cluster_domain_secret, opts.cluster_domain_secret, opts.force) def SetClusterParams(opts, args): """Modify the cluster. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ if not (opts.vg_name is not None or opts.drbd_helper or opts.enabled_hypervisors or opts.hvparams or opts.beparams or opts.nicparams or opts.ndparams or opts.diskparams or opts.candidate_pool_size is not None or opts.uid_pool is not None or opts.maintain_node_health is not None or opts.add_uids is not None or opts.remove_uids is not None or opts.default_iallocator is not None or opts.reserved_lvs is not None or opts.master_netdev is not None or opts.master_netmask is not None or opts.use_external_mip_script is not None or opts.prealloc_wipe_disks is not None or opts.hv_state or opts.enabled_disk_templates or opts.disk_state or opts.ipolicy_bounds_specs is not None or opts.ipolicy_std_specs is not None or opts.ipolicy_disk_templates is not None or opts.ipolicy_vcpu_ratio is not None or opts.ipolicy_spindle_ratio is not None or opts.modify_etc_hosts is not None or opts.file_storage_dir is not None): ToStderr("Please give at least one of the parameters.") return 1 if _CheckNoLvmStorageOptDeprecated(opts): return 1 enabled_disk_templates = None if opts.enabled_disk_templates: enabled_disk_templates = opts.enabled_disk_templates.split(",") # consistency between vg name and enabled disk templates vg_name = None if opts.vg_name is not None: vg_name = opts.vg_name if enabled_disk_templates: if vg_name and not utils.IsLvmEnabled(enabled_disk_templates): ToStdout("You specified a volume group with --vg-name, but you did not" " enable any of the following lvm-based disk templates: %s" % utils.CommaJoin(utils.GetLvmDiskTemplates())) drbd_helper = opts.drbd_helper if not opts.drbd_storage and opts.drbd_helper: ToStderr("Options --no-drbd-storage and --drbd-usermode-helper conflict.") return 1 if not opts.drbd_storage: drbd_helper = "" hvlist = opts.enabled_hypervisors if hvlist is not None: hvlist = hvlist.split(",") # a list of (name, dict) we can pass directly to dict() (or []) hvparams = dict(opts.hvparams) for hv_params in hvparams.values(): utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) diskparams = dict(opts.diskparams) for dt_params in diskparams.values(): utils.ForceDictType(dt_params, constants.DISK_DT_TYPES) beparams = opts.beparams utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT) nicparams = opts.nicparams utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES) ndparams = opts.ndparams if ndparams is not None: utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES) ipolicy = CreateIPolicyFromOpts( minmax_ispecs=opts.ipolicy_bounds_specs, std_ispecs=opts.ipolicy_std_specs, ipolicy_disk_templates=opts.ipolicy_disk_templates, ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio, ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio, ) mnh = opts.maintain_node_health uid_pool = opts.uid_pool if uid_pool is not None: uid_pool = uidpool.ParseUidPool(uid_pool) add_uids = opts.add_uids if add_uids is not None: add_uids = uidpool.ParseUidPool(add_uids) remove_uids = opts.remove_uids if remove_uids is not None: remove_uids = uidpool.ParseUidPool(remove_uids) if opts.reserved_lvs is not None: if opts.reserved_lvs == "": opts.reserved_lvs = [] else: opts.reserved_lvs = utils.UnescapeAndSplit(opts.reserved_lvs, sep=",") if opts.master_netmask is not None: try: opts.master_netmask = int(opts.master_netmask) except ValueError: ToStderr("The --master-netmask option expects an int parameter.") return 1 ext_ip_script = opts.use_external_mip_script if opts.disk_state: disk_state = utils.FlatToDict(opts.disk_state) else: disk_state = {} hv_state = dict(opts.hv_state) op = opcodes.OpClusterSetParams( vg_name=vg_name, drbd_helper=drbd_helper, enabled_hypervisors=hvlist, hvparams=hvparams, os_hvp=None, beparams=beparams, nicparams=nicparams, ndparams=ndparams, diskparams=diskparams, ipolicy=ipolicy, candidate_pool_size=opts.candidate_pool_size, maintain_node_health=mnh, modify_etc_hosts=opts.modify_etc_hosts, uid_pool=uid_pool, add_uids=add_uids, remove_uids=remove_uids, default_iallocator=opts.default_iallocator, prealloc_wipe_disks=opts.prealloc_wipe_disks, master_netdev=opts.master_netdev, master_netmask=opts.master_netmask, reserved_lvs=opts.reserved_lvs, use_external_mip_script=ext_ip_script, hv_state=hv_state, disk_state=disk_state, enabled_disk_templates=enabled_disk_templates, force=opts.force, file_storage_dir=opts.file_storage_dir, ) SubmitOrSend(op, opts) return 0 def QueueOps(opts, args): """Queue operations. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the subcommand @rtype: int @return: the desired exit code """ command = args[0] client = GetClient() if command in ("drain", "undrain"): drain_flag = command == "drain" client.SetQueueDrainFlag(drain_flag) elif command == "info": result = client.QueryConfigValues(["drain_flag"]) if result[0]: val = "set" else: val = "unset" ToStdout("The drain flag is %s" % val) else: raise errors.OpPrereqError("Command '%s' is not valid." % command, errors.ECODE_INVAL) return 0 def _ShowWatcherPause(until): if until is None or until < time.time(): ToStdout("The watcher is not paused.") else: ToStdout("The watcher is paused until %s.", time.ctime(until)) def WatcherOps(opts, args): """Watcher operations. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the subcommand @rtype: int @return: the desired exit code """ command = args[0] client = GetClient() if command == "continue": client.SetWatcherPause(None) ToStdout("The watcher is no longer paused.") elif command == "pause": if len(args) < 2: raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL) result = client.SetWatcherPause(time.time() + ParseTimespec(args[1])) _ShowWatcherPause(result) elif command == "info": result = client.QueryConfigValues(["watcher_pause"]) _ShowWatcherPause(result[0]) else: raise errors.OpPrereqError("Command '%s' is not valid." % command, errors.ECODE_INVAL) return 0 def _OobPower(opts, node_list, power): """Puts the node in the list to desired power state. @param opts: The command line options selected by the user @param node_list: The list of nodes to operate on @param power: True if they should be powered on, False otherwise @return: The success of the operation (none failed) """ if power: command = constants.OOB_POWER_ON else: command = constants.OOB_POWER_OFF op = opcodes.OpOobCommand(node_names=node_list, command=command, ignore_status=True, timeout=opts.oob_timeout, power_delay=opts.power_delay) result = SubmitOpCode(op, opts=opts) errs = 0 for node_result in result: (node_tuple, data_tuple) = node_result (_, node_name) = node_tuple (data_status, _) = data_tuple if data_status != constants.RS_NORMAL: assert data_status != constants.RS_UNAVAIL errs += 1 ToStderr("There was a problem changing power for %s, please investigate", node_name) if errs > 0: return False return True def _InstanceStart(opts, inst_list, start, no_remember=False): """Puts the instances in the list to desired state. @param opts: The command line options selected by the user @param inst_list: The list of instances to operate on @param start: True if they should be started, False for shutdown @param no_remember: If the instance state should be remembered @return: The success of the operation (none failed) """ if start: opcls = opcodes.OpInstanceStartup text_submit, text_success, text_failed = ("startup", "started", "starting") else: opcls = compat.partial(opcodes.OpInstanceShutdown, timeout=opts.shutdown_timeout, no_remember=no_remember) text_submit, text_success, text_failed = ("shutdown", "stopped", "stopping") jex = JobExecutor(opts=opts) for inst in inst_list: ToStdout("Submit %s of instance %s", text_submit, inst) op = opcls(instance_name=inst) jex.QueueJob(inst, op) results = jex.GetResults() bad_cnt = len([1 for (success, _) in results if not success]) if bad_cnt == 0: ToStdout("All instances have been %s successfully", text_success) else: ToStderr("There were errors while %s instances:\n" "%d error(s) out of %d instance(s)", text_failed, bad_cnt, len(results)) return False return True class _RunWhenNodesReachableHelper: """Helper class to make shared internal state sharing easier. @ivar success: Indicates if all action_cb calls were successful """ def __init__(self, node_list, action_cb, node2ip, port, feedback_fn, _ping_fn=netutils.TcpPing, _sleep_fn=time.sleep): """Init the object. @param node_list: The list of nodes to be reachable @param action_cb: Callback called when a new host is reachable @type node2ip: dict @param node2ip: Node to ip mapping @param port: The port to use for the TCP ping @param feedback_fn: The function used for feedback @param _ping_fn: Function to check reachabilty (for unittest use only) @param _sleep_fn: Function to sleep (for unittest use only) """ self.down = set(node_list) self.up = set() self.node2ip = node2ip self.success = True self.action_cb = action_cb self.port = port self.feedback_fn = feedback_fn self._ping_fn = _ping_fn self._sleep_fn = _sleep_fn def __call__(self): """When called we run action_cb. @raises utils.RetryAgain: When there are still down nodes """ if not self.action_cb(self.up): self.success = False if self.down: raise utils.RetryAgain() else: return self.success def Wait(self, secs): """Checks if a host is up or waits remaining seconds. @param secs: The secs remaining """ start = time.time() for node in self.down: if self._ping_fn(self.node2ip[node], self.port, timeout=_EPO_PING_TIMEOUT, live_port_needed=True): self.feedback_fn("Node %s became available" % node) self.up.add(node) self.down -= self.up # If we have a node available there is the possibility to run the # action callback successfully, therefore we don't wait and return return self._sleep_fn(max(0.0, start + secs - time.time())) def _RunWhenNodesReachable(node_list, action_cb, interval): """Run action_cb when nodes become reachable. @param node_list: The list of nodes to be reachable @param action_cb: Callback called when a new host is reachable @param interval: The earliest time to retry """ client = GetClient() cluster_info = client.QueryClusterInfo() if cluster_info["primary_ip_version"] == constants.IP4_VERSION: family = netutils.IPAddress.family else: family = netutils.IP6Address.family node2ip = dict((node, netutils.GetHostname(node, family=family).ip) for node in node_list) port = netutils.GetDaemonPort(constants.NODED) helper = _RunWhenNodesReachableHelper(node_list, action_cb, node2ip, port, ToStdout) try: return utils.Retry(helper, interval, _EPO_REACHABLE_TIMEOUT, wait_fn=helper.Wait) except utils.RetryTimeout: ToStderr("Time exceeded while waiting for nodes to become reachable" " again:\n - %s", " - ".join(helper.down)) return False def _MaybeInstanceStartup(opts, inst_map, nodes_online, _instance_start_fn=_InstanceStart): """Start the instances conditional based on node_states. @param opts: The command line options selected by the user @param inst_map: A dict of inst -> nodes mapping @param nodes_online: A list of nodes online @param _instance_start_fn: Callback to start instances (unittest use only) @return: Success of the operation on all instances """ start_inst_list = [] for (inst, nodes) in inst_map.items(): if not (nodes - nodes_online): # All nodes the instance lives on are back online start_inst_list.append(inst) for inst in start_inst_list: del inst_map[inst] if start_inst_list: return _instance_start_fn(opts, start_inst_list, True) return True def _EpoOn(opts, full_node_list, node_list, inst_map): """Does the actual power on. @param opts: The command line options selected by the user @param full_node_list: All nodes to operate on (includes nodes not supporting OOB) @param node_list: The list of nodes to operate on (all need to support OOB) @param inst_map: A dict of inst -> nodes mapping @return: The desired exit status """ if node_list and not _OobPower(opts, node_list, False): ToStderr("Not all nodes seem to get back up, investigate and start" " manually if needed") # Wait for the nodes to be back up action_cb = compat.partial(_MaybeInstanceStartup, opts, dict(inst_map)) ToStdout("Waiting until all nodes are available again") if not _RunWhenNodesReachable(full_node_list, action_cb, _EPO_PING_INTERVAL): ToStderr("Please investigate and start stopped instances manually") return constants.EXIT_FAILURE return constants.EXIT_SUCCESS def _EpoOff(opts, node_list, inst_map): """Does the actual power off. @param opts: The command line options selected by the user @param node_list: The list of nodes to operate on (all need to support OOB) @param inst_map: A dict of inst -> nodes mapping @return: The desired exit status """ if not _InstanceStart(opts, inst_map.keys(), False, no_remember=True): ToStderr("Please investigate and stop instances manually before continuing") return constants.EXIT_FAILURE if not node_list: return constants.EXIT_SUCCESS if _OobPower(opts, node_list, False): return constants.EXIT_SUCCESS else: return constants.EXIT_FAILURE def Epo(opts, args, cl=None, _on_fn=_EpoOn, _off_fn=_EpoOff, _confirm_fn=ConfirmOperation, _stdout_fn=ToStdout, _stderr_fn=ToStderr): """EPO operations. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the subcommand @rtype: int @return: the desired exit code """ if opts.groups and opts.show_all: _stderr_fn("Only one of --groups or --all are allowed") return constants.EXIT_FAILURE elif args and opts.show_all: _stderr_fn("Arguments in combination with --all are not allowed") return constants.EXIT_FAILURE if cl is None: cl = GetClient() if opts.groups: node_query_list = \ itertools.chain(*cl.QueryGroups(args, ["node_list"], False)) else: node_query_list = args result = cl.QueryNodes(node_query_list, ["name", "master", "pinst_list", "sinst_list", "powered", "offline"], False) all_nodes = map(compat.fst, result) node_list = [] inst_map = {} for (node, master, pinsts, sinsts, powered, offline) in result: if not offline: for inst in (pinsts + sinsts): if inst in inst_map: if not master: inst_map[inst].add(node) elif master: inst_map[inst] = set() else: inst_map[inst] = set([node]) if master and opts.on: # We ignore the master for turning on the machines, in fact we are # already operating on the master at this point :) continue elif master and not opts.show_all: _stderr_fn("%s is the master node, please do a master-failover to another" " node not affected by the EPO or use --all if you intend to" " shutdown the whole cluster", node) return constants.EXIT_FAILURE elif powered is None: _stdout_fn("Node %s does not support out-of-band handling, it can not be" " handled in a fully automated manner", node) elif powered == opts.on: _stdout_fn("Node %s is already in desired power state, skipping", node) elif not offline or (offline and powered): node_list.append(node) if not (opts.force or _confirm_fn(all_nodes, "nodes", "epo")): return constants.EXIT_FAILURE if opts.on: return _on_fn(opts, all_nodes, node_list, inst_map) else: return _off_fn(opts, node_list, inst_map) def _GetCreateCommand(info): buf = StringIO() buf.write("gnt-cluster init") PrintIPolicyCommand(buf, info["ipolicy"], False) buf.write(" ") buf.write(info["name"]) return buf.getvalue() def ShowCreateCommand(opts, args): """Shows the command that can be used to re-create the cluster. Currently it works only for ipolicy specs. """ cl = GetClient(query=True) result = cl.QueryClusterInfo() ToStdout(_GetCreateCommand(result)) commands = { "init": ( InitCluster, [ArgHost(min=1, max=1)], [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, GLOBAL_FILEDIR_OPT, HVLIST_OPT, MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT, NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, NOMODIFY_ETCHOSTS_OPT, NOMODIFY_SSH_SETUP_OPT, SECONDARY_IP_OPT, VG_NAME_OPT, MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, DRBD_HELPER_OPT, NODRBD_STORAGE_OPT, DEFAULT_IALLOCATOR_OPT, PRIMARY_IP_VERSION_OPT, PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT, GLOBAL_SHARED_FILEDIR_OPT, USE_EXTERNAL_MIP_SCRIPT, DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT, ENABLED_DISK_TEMPLATES_OPT, IPOLICY_STD_SPECS_OPT] + INSTANCE_POLICY_OPTS + SPLIT_ISPECS_OPTS, "[opts...] ", "Initialises a new cluster configuration"), "destroy": ( DestroyCluster, ARGS_NONE, [YES_DOIT_OPT], "", "Destroy cluster"), "rename": ( RenameCluster, [ArgHost(min=1, max=1)], [FORCE_OPT, DRY_RUN_OPT], "", "Renames the cluster"), "redist-conf": ( RedistributeConfig, ARGS_NONE, SUBMIT_OPTS + [DRY_RUN_OPT, PRIORITY_OPT], "", "Forces a push of the configuration file and ssconf files" " to the nodes in the cluster"), "verify": ( VerifyCluster, ARGS_NONE, [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT, DRY_RUN_OPT, PRIORITY_OPT, NODEGROUP_OPT, IGNORE_ERRORS_OPT], "", "Does a check on the cluster configuration"), "verify-disks": ( VerifyDisks, ARGS_NONE, [PRIORITY_OPT], "", "Does a check on the cluster disk status"), "repair-disk-sizes": ( RepairDiskSizes, ARGS_MANY_INSTANCES, [DRY_RUN_OPT, PRIORITY_OPT], "[instance...]", "Updates mismatches in recorded disk sizes"), "master-failover": ( MasterFailover, ARGS_NONE, [NOVOTING_OPT, FORCE_FAILOVER], "", "Makes the current node the master"), "master-ping": ( MasterPing, ARGS_NONE, [], "", "Checks if the master is alive"), "version": ( ShowClusterVersion, ARGS_NONE, [], "", "Shows the cluster version"), "getmaster": ( ShowClusterMaster, ARGS_NONE, [], "", "Shows the cluster master"), "copyfile": ( ClusterCopyFile, [ArgFile(min=1, max=1)], [NODE_LIST_OPT, USE_REPL_NET_OPT, NODEGROUP_OPT], "[-n node...] ", "Copies a file to all (or only some) nodes"), "command": ( RunClusterCommand, [ArgCommand(min=1)], [NODE_LIST_OPT, NODEGROUP_OPT, SHOW_MACHINE_OPT, FAILURE_ONLY_OPT], "[-n node...] ", "Runs a command on all (or only some) nodes"), "info": ( ShowClusterConfig, ARGS_NONE, [ROMAN_OPT], "[--roman]", "Show cluster configuration"), "list-tags": ( ListTags, ARGS_NONE, [], "", "List the tags of the cluster"), "add-tags": ( AddTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS, "tag...", "Add tags to the cluster"), "remove-tags": ( RemoveTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS, "tag...", "Remove tags from the cluster"), "search-tags": ( SearchTags, [ArgUnknown(min=1, max=1)], [PRIORITY_OPT], "", "Searches the tags on all objects on" " the cluster for a given pattern (regex)"), "queue": ( QueueOps, [ArgChoice(min=1, max=1, choices=["drain", "undrain", "info"])], [], "drain|undrain|info", "Change queue properties"), "watcher": ( WatcherOps, [ArgChoice(min=1, max=1, choices=["pause", "continue", "info"]), ArgSuggest(min=0, max=1, choices=["30m", "1h", "4h"])], [], "{pause |continue|info}", "Change watcher properties"), "modify": ( SetClusterParams, ARGS_NONE, [FORCE_OPT, BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, HVLIST_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT, NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, VG_NAME_OPT, MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, ADD_UIDS_OPT, REMOVE_UIDS_OPT, DRBD_HELPER_OPT, NODRBD_STORAGE_OPT, DEFAULT_IALLOCATOR_OPT, RESERVED_LVS_OPT, DRY_RUN_OPT, PRIORITY_OPT, PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT, DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT] + SUBMIT_OPTS + [ENABLED_DISK_TEMPLATES_OPT, IPOLICY_STD_SPECS_OPT, MODIFY_ETCHOSTS_OPT] + INSTANCE_POLICY_OPTS + [GLOBAL_FILEDIR_OPT], "[opts...]", "Alters the parameters of the cluster"), "renew-crypto": ( RenewCrypto, ARGS_NONE, [NEW_CLUSTER_CERT_OPT, NEW_RAPI_CERT_OPT, RAPI_CERT_OPT, NEW_CONFD_HMAC_KEY_OPT, FORCE_OPT, NEW_CLUSTER_DOMAIN_SECRET_OPT, CLUSTER_DOMAIN_SECRET_OPT, NEW_SPICE_CERT_OPT, SPICE_CERT_OPT, SPICE_CACERT_OPT], "[opts...]", "Renews cluster certificates, keys and secrets"), "epo": ( Epo, [ArgUnknown()], [FORCE_OPT, ON_OPT, GROUPS_OPT, ALL_OPT, OOB_TIMEOUT_OPT, SHUTDOWN_TIMEOUT_OPT, POWER_DELAY_OPT], "[opts...] [args]", "Performs an emergency power-off on given args"), "activate-master-ip": ( ActivateMasterIp, ARGS_NONE, [], "", "Activates the master IP"), "deactivate-master-ip": ( DeactivateMasterIp, ARGS_NONE, [CONFIRM_OPT], "", "Deactivates the master IP"), "show-ispecs-cmd": ( ShowCreateCommand, ARGS_NONE, [], "", "Show the command line to re-create the cluster"), } #: dictionary with aliases for commands aliases = { "masterfailover": "master-failover", "show": "info", } def Main(): return GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER}, aliases=aliases) ganeti-2.9.3/lib/client/gnt_storage.py0000644000000000000000000001305412244641676017723 0ustar00rootroot00000000000000# # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """External Storage related commands""" # pylint: disable=W0401,W0613,W0614,C0103 # W0401: Wildcard import ganeti.cli # W0613: Unused argument, since all functions follow the same API # W0614: Unused import %s from wildcard import (since we need cli) # C0103: Invalid name gnt-storage from ganeti.cli import * from ganeti import opcodes from ganeti import utils def ShowExtStorageInfo(opts, args): """List detailed information about ExtStorage providers. @param opts: the command line options selected by the user @type args: list @param args: empty list or list of ExtStorage providers' names @rtype: int @return: the desired exit code """ op = opcodes.OpExtStorageDiagnose(output_fields=["name", "nodegroup_status", "parameters"], names=[]) result = SubmitOpCode(op, opts=opts) if not result: ToStderr("Can't get the ExtStorage providers list") return 1 do_filter = bool(args) for (name, nodegroup_data, parameters) in result: if do_filter: if name not in args: continue else: args.remove(name) nodegroups_valid = [] for nodegroup_name, nodegroup_status in nodegroup_data.iteritems(): if nodegroup_status: nodegroups_valid.append(nodegroup_name) ToStdout("%s:", name) if nodegroups_valid != []: ToStdout(" - Valid for nodegroups:") for ndgrp in utils.NiceSort(nodegroups_valid): ToStdout(" %s", ndgrp) ToStdout(" - Supported parameters:") for pname, pdesc in parameters: ToStdout(" %s: %s", pname, pdesc) else: ToStdout(" - Invalid for all nodegroups") ToStdout("") if args: for name in args: ToStdout("%s: Not Found", name) ToStdout("") return 0 def _ExtStorageStatus(status, diagnose): """Beautifier function for ExtStorage status. @type status: boolean @param status: is the ExtStorage provider valid @type diagnose: string @param diagnose: the error message for invalid ExtStorages @rtype: string @return: a formatted status """ if status: return "valid" else: return "invalid - %s" % diagnose def DiagnoseExtStorage(opts, args): """Analyse all ExtStorage providers. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ op = opcodes.OpExtStorageDiagnose(output_fields=["name", "node_status", "nodegroup_status"], names=[]) result = SubmitOpCode(op, opts=opts) if not result: ToStderr("Can't get the list of ExtStorage providers") return 1 for provider_name, node_data, nodegroup_data in result: nodes_valid = {} nodes_bad = {} nodegroups_valid = {} nodegroups_bad = {} # Per node diagnose for node_name, node_info in node_data.iteritems(): if node_info: # at least one entry in the per-node list (fo_path, fo_status, fo_msg, fo_params) = node_info.pop(0) fo_msg = "%s (path: %s)" % (_ExtStorageStatus(fo_status, fo_msg), fo_path) if fo_params: fo_msg += (" [parameters: %s]" % utils.CommaJoin([v[0] for v in fo_params])) else: fo_msg += " [no parameters]" if fo_status: nodes_valid[node_name] = fo_msg else: nodes_bad[node_name] = fo_msg else: nodes_bad[node_name] = "ExtStorage provider not found" # Per nodegroup diagnose for nodegroup_name, nodegroup_status in nodegroup_data.iteritems(): status = nodegroup_status if status: nodegroups_valid[nodegroup_name] = "valid" else: nodegroups_bad[nodegroup_name] = "invalid" def _OutputPerNodegroupStatus(msg_map): map_k = utils.NiceSort(msg_map.keys()) for nodegroup in map_k: ToStdout(" For nodegroup: %s --> %s", nodegroup, msg_map[nodegroup]) def _OutputPerNodeStatus(msg_map): map_k = utils.NiceSort(msg_map.keys()) for node_name in map_k: ToStdout(" Node: %s, status: %s", node_name, msg_map[node_name]) # Print the output st_msg = "Provider: %s" % provider_name ToStdout(st_msg) ToStdout("---") _OutputPerNodeStatus(nodes_valid) _OutputPerNodeStatus(nodes_bad) ToStdout(" --") _OutputPerNodegroupStatus(nodegroups_valid) _OutputPerNodegroupStatus(nodegroups_bad) ToStdout("") return 0 commands = { "diagnose": ( DiagnoseExtStorage, ARGS_NONE, [PRIORITY_OPT], "", "Diagnose all ExtStorage providers"), "info": ( ShowExtStorageInfo, [ArgOs()], [PRIORITY_OPT], "", "Show info about ExtStorage providers"), } def Main(): return GenericMain(commands) ganeti-2.9.3/lib/client/gnt_job.py0000644000000000000000000004117012267470014017020 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Job related commands""" # pylint: disable=W0401,W0613,W0614,C0103 # W0401: Wildcard import ganeti.cli # W0613: Unused argument, since all functions follow the same API # W0614: Unused import %s from wildcard import (since we need cli) # C0103: Invalid name gnt-job from ganeti.cli import * from ganeti import constants from ganeti import errors from ganeti import utils from ganeti import cli from ganeti import qlang #: default list of fields for L{ListJobs} _LIST_DEF_FIELDS = ["id", "status", "summary"] #: map converting the job status contants to user-visible #: names _USER_JOB_STATUS = { constants.JOB_STATUS_QUEUED: "queued", constants.JOB_STATUS_WAITING: "waiting", constants.JOB_STATUS_CANCELING: "canceling", constants.JOB_STATUS_RUNNING: "running", constants.JOB_STATUS_CANCELED: "canceled", constants.JOB_STATUS_SUCCESS: "success", constants.JOB_STATUS_ERROR: "error", } def _FormatStatus(value): """Formats a job status. """ try: return _USER_JOB_STATUS[value] except KeyError: raise errors.ProgrammerError("Unknown job status code '%s'" % value) def _FormatSummary(value): """Formats a job's summary. Takes possible non-ascii encoding into account. """ return ','.encode('utf-8').join(item.encode('utf-8') for item in value) _JOB_LIST_FORMAT = { "status": (_FormatStatus, False), "summary": (_FormatSummary, False), } _JOB_LIST_FORMAT.update(dict.fromkeys(["opstart", "opexec", "opend"], (lambda value: map(FormatTimestamp, value), None))) def _ParseJobIds(args): """Parses a list of string job IDs into integers. @param args: list of strings @return: list of integers @raise OpPrereqError: in case of invalid values """ try: return [int(a) for a in args] except (ValueError, TypeError), err: raise errors.OpPrereqError("Invalid job ID passed: %s" % err, errors.ECODE_INVAL) def ListJobs(opts, args): """List the jobs @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ selected_fields = ParseFields(opts.output, _LIST_DEF_FIELDS) if opts.archived and "archived" not in selected_fields: selected_fields.append("archived") qfilter = qlang.MakeSimpleFilter("status", opts.status_filter) cl = GetClient(query=True) return GenericList(constants.QR_JOB, selected_fields, args, None, opts.separator, not opts.no_headers, format_override=_JOB_LIST_FORMAT, verbose=opts.verbose, force_filter=opts.force_filter, namefield="id", qfilter=qfilter, isnumeric=True, cl=cl) def ListJobFields(opts, args): """List job fields. @param opts: the command line options selected by the user @type args: list @param args: fields to list, or empty for all @rtype: int @return: the desired exit code """ cl = GetClient(query=True) return GenericListFields(constants.QR_JOB, args, opts.separator, not opts.no_headers, cl=cl) def ArchiveJobs(opts, args): """Archive jobs. @param opts: the command line options selected by the user @type args: list @param args: should contain the job IDs to be archived @rtype: int @return: the desired exit code """ client = GetClient() rcode = 0 for job_id in args: if not client.ArchiveJob(job_id): ToStderr("Failed to archive job with ID '%s'", job_id) rcode = 1 return rcode def AutoArchiveJobs(opts, args): """Archive jobs based on age. This will archive jobs based on their age, or all jobs if a 'all' is passed. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the age as a time spec that can be parsed by L{ganeti.cli.ParseTimespec} or the keyword I{all}, which will cause all jobs to be archived @rtype: int @return: the desired exit code """ client = GetClient() age = args[0] if age == "all": age = -1 else: age = ParseTimespec(age) (archived_count, jobs_left) = client.AutoArchiveJobs(age) ToStdout("Archived %s jobs, %s unchecked left", archived_count, jobs_left) return 0 def _MultiJobAction(opts, args, cl, stdout_fn, ask_fn, question, action_fn): """Applies a function to multipe jobs. @param opts: Command line options @type args: list @param args: Job IDs @rtype: int @return: Exit code """ if cl is None: cl = GetClient() if stdout_fn is None: stdout_fn = ToStdout if ask_fn is None: ask_fn = AskUser result = constants.EXIT_SUCCESS if bool(args) ^ (opts.status_filter is None): raise errors.OpPrereqError("Either a status filter or job ID(s) must be" " specified and never both", errors.ECODE_INVAL) if opts.status_filter is not None: response = cl.Query(constants.QR_JOB, ["id", "status", "summary"], qlang.MakeSimpleFilter("status", opts.status_filter)) jobs = [i for ((_, i), _, _) in response.data] if not jobs: raise errors.OpPrereqError("No jobs with the requested status have been" " found", errors.ECODE_STATE) if not opts.force: (_, table) = FormatQueryResult(response, header=True, format_override=_JOB_LIST_FORMAT) for line in table: stdout_fn(line) if not ask_fn(question): return constants.EXIT_CONFIRMATION else: jobs = args for job_id in jobs: (success, msg) = action_fn(cl, job_id) if not success: result = constants.EXIT_FAILURE stdout_fn(msg) return result def CancelJobs(opts, args, cl=None, _stdout_fn=ToStdout, _ask_fn=AskUser): """Cancel not-yet-started jobs. @param opts: the command line options selected by the user @type args: list @param args: should contain the job IDs to be cancelled @rtype: int @return: the desired exit code """ return _MultiJobAction(opts, args, cl, _stdout_fn, _ask_fn, "Cancel job(s) listed above?", lambda cl, job_id: cl.CancelJob(job_id)) def ChangePriority(opts, args): """Change priority of jobs. @param opts: Command line options @type args: list @param args: Job IDs @rtype: int @return: Exit code """ if opts.priority is None: ToStderr("--priority option must be given.") return constants.EXIT_FAILURE return _MultiJobAction(opts, args, None, None, None, "Change priority of job(s) listed above?", lambda cl, job_id: cl.ChangeJobPriority(job_id, opts.priority)) def ShowJobs(opts, args): """Show detailed information about jobs. @param opts: the command line options selected by the user @type args: list @param args: should contain the job IDs to be queried @rtype: int @return: the desired exit code """ def format_msg(level, text): """Display the text indented.""" ToStdout("%s%s", " " * level, text) def result_helper(value): """Format a result field in a nice way.""" if isinstance(value, (tuple, list)): return "[%s]" % utils.CommaJoin(value) else: return str(value) selected_fields = [ "id", "status", "ops", "opresult", "opstatus", "oplog", "opstart", "opexec", "opend", "received_ts", "start_ts", "end_ts", ] qfilter = qlang.MakeSimpleFilter("id", _ParseJobIds(args)) cl = GetClient(query=True) result = cl.Query(constants.QR_JOB, selected_fields, qfilter).data first = True for entry in result: if not first: format_msg(0, "") else: first = False ((_, job_id), (rs_status, status), (_, ops), (_, opresult), (_, opstatus), (_, oplog), (_, opstart), (_, opexec), (_, opend), (_, recv_ts), (_, start_ts), (_, end_ts)) = entry # Detect non-normal results if rs_status != constants.RS_NORMAL: format_msg(0, "Job ID %s not found" % job_id) continue format_msg(0, "Job ID: %s" % job_id) if status in _USER_JOB_STATUS: status = _USER_JOB_STATUS[status] else: raise errors.ProgrammerError("Unknown job status code '%s'" % status) format_msg(1, "Status: %s" % status) if recv_ts is not None: format_msg(1, "Received: %s" % FormatTimestamp(recv_ts)) else: format_msg(1, "Missing received timestamp (%s)" % str(recv_ts)) if start_ts is not None: if recv_ts is not None: d1 = start_ts[0] - recv_ts[0] + (start_ts[1] - recv_ts[1]) / 1000000.0 delta = " (delta %.6fs)" % d1 else: delta = "" format_msg(1, "Processing start: %s%s" % (FormatTimestamp(start_ts), delta)) else: format_msg(1, "Processing start: unknown (%s)" % str(start_ts)) if end_ts is not None: if start_ts is not None: d2 = end_ts[0] - start_ts[0] + (end_ts[1] - start_ts[1]) / 1000000.0 delta = " (delta %.6fs)" % d2 else: delta = "" format_msg(1, "Processing end: %s%s" % (FormatTimestamp(end_ts), delta)) else: format_msg(1, "Processing end: unknown (%s)" % str(end_ts)) if end_ts is not None and recv_ts is not None: d3 = end_ts[0] - recv_ts[0] + (end_ts[1] - recv_ts[1]) / 1000000.0 format_msg(1, "Total processing time: %.6f seconds" % d3) else: format_msg(1, "Total processing time: N/A") format_msg(1, "Opcodes:") for (opcode, result, status, log, s_ts, x_ts, e_ts) in \ zip(ops, opresult, opstatus, oplog, opstart, opexec, opend): format_msg(2, "%s" % opcode["OP_ID"]) format_msg(3, "Status: %s" % status) if isinstance(s_ts, (tuple, list)): format_msg(3, "Processing start: %s" % FormatTimestamp(s_ts)) else: format_msg(3, "No processing start time") if isinstance(x_ts, (tuple, list)): format_msg(3, "Execution start: %s" % FormatTimestamp(x_ts)) else: format_msg(3, "No execution start time") if isinstance(e_ts, (tuple, list)): format_msg(3, "Processing end: %s" % FormatTimestamp(e_ts)) else: format_msg(3, "No processing end time") format_msg(3, "Input fields:") for key in utils.NiceSort(opcode.keys()): if key == "OP_ID": continue val = opcode[key] if isinstance(val, (tuple, list)): val = ",".join([str(item) for item in val]) format_msg(4, "%s: %s" % (key, val)) if result is None: format_msg(3, "No output data") elif isinstance(result, (tuple, list)): if not result: format_msg(3, "Result: empty sequence") else: format_msg(3, "Result:") for elem in result: format_msg(4, result_helper(elem)) elif isinstance(result, dict): if not result: format_msg(3, "Result: empty dictionary") else: format_msg(3, "Result:") for key, val in result.iteritems(): format_msg(4, "%s: %s" % (key, result_helper(val))) else: format_msg(3, "Result: %s" % result) format_msg(3, "Execution log:") for serial, log_ts, log_type, log_msg in log: time_txt = FormatTimestamp(log_ts) encoded = FormatLogMessage(log_type, log_msg) format_msg(4, "%s:%s:%s %s" % (serial, time_txt, log_type, encoded)) return 0 def WatchJob(opts, args): """Follow a job and print its output as it arrives. @param opts: the command line options selected by the user @type args: list @param args: Contains the job ID @rtype: int @return: the desired exit code """ job_id = args[0] msg = ("Output from job %s follows" % job_id) ToStdout(msg) ToStdout("-" * len(msg)) retcode = 0 try: cli.PollJob(job_id) except errors.GenericError, err: (retcode, job_result) = cli.FormatError(err) ToStderr("Job %s failed: %s", job_id, job_result) return retcode def WaitJob(opts, args): """Wait for a job to finish, not producing any output. @param opts: the command line options selected by the user @type args: list @param args: Contains the job ID @rtype: int @return: the desired exit code """ job_id = args[0] retcode = 0 try: cli.PollJob(job_id, feedback_fn=lambda _: None) except errors.GenericError, err: (retcode, job_result) = cli.FormatError(err) ToStderr("Job %s failed: %s", job_id, job_result) return retcode _PENDING_OPT = \ cli_option("--pending", default=None, action="store_const", dest="status_filter", const=constants.JOBS_PENDING, help="Select jobs pending execution or being cancelled") _RUNNING_OPT = \ cli_option("--running", default=None, action="store_const", dest="status_filter", const=frozenset([ constants.JOB_STATUS_RUNNING, ]), help="Show jobs currently running only") _ERROR_OPT = \ cli_option("--error", default=None, action="store_const", dest="status_filter", const=frozenset([ constants.JOB_STATUS_ERROR, ]), help="Show failed jobs only") _FINISHED_OPT = \ cli_option("--finished", default=None, action="store_const", dest="status_filter", const=constants.JOBS_FINALIZED, help="Show finished jobs only") _ARCHIVED_OPT = \ cli_option("--archived", default=False, action="store_true", dest="archived", help="Include archived jobs in list (slow and expensive)") _QUEUED_OPT = \ cli_option("--queued", default=None, action="store_const", dest="status_filter", const=frozenset([ constants.JOB_STATUS_QUEUED, ]), help="Select queued jobs only") _WAITING_OPT = \ cli_option("--waiting", default=None, action="store_const", dest="status_filter", const=frozenset([ constants.JOB_STATUS_WAITING, ]), help="Select waiting jobs only") commands = { "list": ( ListJobs, [ArgJobId()], [NOHDR_OPT, SEP_OPT, FIELDS_OPT, VERBOSE_OPT, FORCE_FILTER_OPT, _PENDING_OPT, _RUNNING_OPT, _ERROR_OPT, _FINISHED_OPT, _ARCHIVED_OPT], "[job_id ...]", "Lists the jobs and their status. The available fields can be shown" " using the \"list-fields\" command (see the man page for details)." " The default field list is (in order): %s." % utils.CommaJoin(_LIST_DEF_FIELDS)), "list-fields": ( ListJobFields, [ArgUnknown()], [NOHDR_OPT, SEP_OPT], "[fields...]", "Lists all available fields for jobs"), "archive": ( ArchiveJobs, [ArgJobId(min=1)], [], " [ ...]", "Archive specified jobs"), "autoarchive": ( AutoArchiveJobs, [ArgSuggest(min=1, max=1, choices=["1d", "1w", "4w", "all"])], [], "", "Auto archive jobs older than the given age"), "cancel": ( CancelJobs, [ArgJobId()], [FORCE_OPT, _PENDING_OPT, _QUEUED_OPT, _WAITING_OPT], "{[--force] {--pending | --queued | --waiting} |" " [ ...]}", "Cancel jobs"), "info": ( ShowJobs, [ArgJobId(min=1)], [], " [ ...]", "Show detailed information about the specified jobs"), "wait": ( WaitJob, [ArgJobId(min=1, max=1)], [], "", "Wait for a job to finish"), "watch": ( WatchJob, [ArgJobId(min=1, max=1)], [], "", "Follows a job and prints its output as it arrives"), "change-priority": ( ChangePriority, [ArgJobId()], [PRIORITY_OPT, FORCE_OPT, _PENDING_OPT, _QUEUED_OPT, _WAITING_OPT], "--priority {[--force] {--pending | --queued | --waiting} |" " [ ...]}", "Change the priority of jobs"), } #: dictionary with aliases for commands aliases = { "show": "info", } def Main(): return GenericMain(commands, aliases=aliases) ganeti-2.9.3/lib/client/gnt_group.py0000644000000000000000000002721412267470014017405 0ustar00rootroot00000000000000# # # Copyright (C) 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Node group related commands""" # pylint: disable=W0401,W0614 # W0401: Wildcard import ganeti.cli # W0614: Unused import %s from wildcard import (since we need cli) from cStringIO import StringIO from ganeti.cli import * from ganeti import constants from ganeti import opcodes from ganeti import utils from ganeti import compat #: default list of fields for L{ListGroups} _LIST_DEF_FIELDS = ["name", "node_cnt", "pinst_cnt", "alloc_policy", "ndparams"] _ENV_OVERRIDE = compat.UniqueFrozenset(["list"]) def AddGroup(opts, args): """Add a node group to the cluster. @param opts: the command line options selected by the user @type args: list @param args: a list of length 1 with the name of the group to create @rtype: int @return: the desired exit code """ ipolicy = CreateIPolicyFromOpts( minmax_ispecs=opts.ipolicy_bounds_specs, ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio, ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio, ipolicy_disk_templates=opts.ipolicy_disk_templates, group_ipolicy=True) (group_name,) = args diskparams = dict(opts.diskparams) if opts.disk_state: disk_state = utils.FlatToDict(opts.disk_state) else: disk_state = {} hv_state = dict(opts.hv_state) op = opcodes.OpGroupAdd(group_name=group_name, ndparams=opts.ndparams, alloc_policy=opts.alloc_policy, diskparams=diskparams, ipolicy=ipolicy, hv_state=hv_state, disk_state=disk_state) SubmitOrSend(op, opts) def AssignNodes(opts, args): """Assign nodes to a group. @param opts: the command line options selected by the user @type args: list @param args: args[0]: group to assign nodes to; args[1:]: nodes to assign @rtype: int @return: the desired exit code """ group_name = args[0] node_names = args[1:] op = opcodes.OpGroupAssignNodes(group_name=group_name, nodes=node_names, force=opts.force) SubmitOrSend(op, opts) def _FmtDict(data): """Format dict data into command-line format. @param data: The input dict to be formatted @return: The formatted dict """ if not data: return "(empty)" return utils.CommaJoin(["%s=%s" % (key, value) for key, value in data.items()]) def ListGroups(opts, args): """List node groups and their properties. @param opts: the command line options selected by the user @type args: list @param args: groups to list, or empty for all @rtype: int @return: the desired exit code """ desired_fields = ParseFields(opts.output, _LIST_DEF_FIELDS) fmtoverride = { "node_list": (",".join, False), "pinst_list": (",".join, False), "ndparams": (_FmtDict, False), } cl = GetClient(query=True) return GenericList(constants.QR_GROUP, desired_fields, args, None, opts.separator, not opts.no_headers, format_override=fmtoverride, verbose=opts.verbose, force_filter=opts.force_filter, cl=cl) def ListGroupFields(opts, args): """List node fields. @param opts: the command line options selected by the user @type args: list @param args: fields to list, or empty for all @rtype: int @return: the desired exit code """ cl = GetClient(query=True) return GenericListFields(constants.QR_GROUP, args, opts.separator, not opts.no_headers, cl=cl) def SetGroupParams(opts, args): """Modifies a node group's parameters. @param opts: the command line options selected by the user @type args: list @param args: should contain only one element, the node group name @rtype: int @return: the desired exit code """ allmods = [opts.ndparams, opts.alloc_policy, opts.diskparams, opts.hv_state, opts.disk_state, opts.ipolicy_bounds_specs, opts.ipolicy_vcpu_ratio, opts.ipolicy_spindle_ratio, opts.diskparams, opts.ipolicy_disk_templates] if allmods.count(None) == len(allmods): ToStderr("Please give at least one of the parameters.") return 1 if opts.disk_state: disk_state = utils.FlatToDict(opts.disk_state) else: disk_state = {} hv_state = dict(opts.hv_state) diskparams = dict(opts.diskparams) # create ipolicy object ipolicy = CreateIPolicyFromOpts( minmax_ispecs=opts.ipolicy_bounds_specs, ipolicy_disk_templates=opts.ipolicy_disk_templates, ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio, ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio, group_ipolicy=True, allowed_values=[constants.VALUE_DEFAULT]) op = opcodes.OpGroupSetParams(group_name=args[0], ndparams=opts.ndparams, alloc_policy=opts.alloc_policy, hv_state=hv_state, disk_state=disk_state, diskparams=diskparams, ipolicy=ipolicy) result = SubmitOrSend(op, opts) if result: ToStdout("Modified node group %s", args[0]) for param, data in result: ToStdout(" - %-5s -> %s", param, data) return 0 def RemoveGroup(opts, args): """Remove a node group from the cluster. @param opts: the command line options selected by the user @type args: list @param args: a list of length 1 with the name of the group to remove @rtype: int @return: the desired exit code """ (group_name,) = args op = opcodes.OpGroupRemove(group_name=group_name) SubmitOrSend(op, opts) def RenameGroup(opts, args): """Rename a node group. @param opts: the command line options selected by the user @type args: list @param args: a list of length 2, [old_name, new_name] @rtype: int @return: the desired exit code """ group_name, new_name = args op = opcodes.OpGroupRename(group_name=group_name, new_name=new_name) SubmitOrSend(op, opts) def EvacuateGroup(opts, args): """Evacuate a node group. """ (group_name, ) = args cl = GetClient() op = opcodes.OpGroupEvacuate(group_name=group_name, iallocator=opts.iallocator, target_groups=opts.to, early_release=opts.early_release) result = SubmitOrSend(op, opts, cl=cl) # Keep track of submitted jobs jex = JobExecutor(cl=cl, opts=opts) for (status, job_id) in result[constants.JOB_IDS_KEY]: jex.AddJobId(None, status, job_id) results = jex.GetResults() bad_cnt = len([row for row in results if not row[0]]) if bad_cnt == 0: ToStdout("All instances evacuated successfully.") rcode = constants.EXIT_SUCCESS else: ToStdout("There were %s errors during the evacuation.", bad_cnt) rcode = constants.EXIT_FAILURE return rcode def _FormatGroupInfo(group): (name, ndparams, custom_ndparams, diskparams, custom_diskparams, ipolicy, custom_ipolicy) = group return [ ("Node group", name), ("Node parameters", FormatParamsDictInfo(custom_ndparams, ndparams)), ("Disk parameters", FormatParamsDictInfo(custom_diskparams, diskparams)), ("Instance policy", FormatPolicyInfo(custom_ipolicy, ipolicy, False)), ] def GroupInfo(_, args): """Shows info about node group. """ cl = GetClient(query=True) selected_fields = ["name", "ndparams", "custom_ndparams", "diskparams", "custom_diskparams", "ipolicy", "custom_ipolicy"] result = cl.QueryGroups(names=args, fields=selected_fields, use_locking=False) PrintGenericInfo([ _FormatGroupInfo(group) for group in result ]) def _GetCreateCommand(group): (name, ipolicy) = group buf = StringIO() buf.write("gnt-group add") PrintIPolicyCommand(buf, ipolicy, True) buf.write(" ") buf.write(name) return buf.getvalue() def ShowCreateCommand(opts, args): """Shows the command that can be used to re-create a node group. Currently it works only for ipolicy specs. """ cl = GetClient(query=True) selected_fields = ["name"] if opts.include_defaults: selected_fields += ["ipolicy"] else: selected_fields += ["custom_ipolicy"] result = cl.QueryGroups(names=args, fields=selected_fields, use_locking=False) for group in result: ToStdout(_GetCreateCommand(group)) commands = { "add": ( AddGroup, ARGS_ONE_GROUP, [DRY_RUN_OPT, ALLOC_POLICY_OPT, NODE_PARAMS_OPT, DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT, PRIORITY_OPT] + SUBMIT_OPTS + INSTANCE_POLICY_OPTS, "", "Add a new node group to the cluster"), "assign-nodes": ( AssignNodes, ARGS_ONE_GROUP + ARGS_MANY_NODES, [DRY_RUN_OPT, FORCE_OPT, PRIORITY_OPT] + SUBMIT_OPTS, " ...", "Assign nodes to a group"), "list": ( ListGroups, ARGS_MANY_GROUPS, [NOHDR_OPT, SEP_OPT, FIELDS_OPT, VERBOSE_OPT, FORCE_FILTER_OPT], "[...]", "Lists the node groups in the cluster. The available fields can be shown" " using the \"list-fields\" command (see the man page for details)." " The default list is (in order): %s." % utils.CommaJoin(_LIST_DEF_FIELDS)), "list-fields": ( ListGroupFields, [ArgUnknown()], [NOHDR_OPT, SEP_OPT], "[fields...]", "Lists all available fields for node groups"), "modify": ( SetGroupParams, ARGS_ONE_GROUP, [DRY_RUN_OPT] + SUBMIT_OPTS + [ALLOC_POLICY_OPT, NODE_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT, DISK_PARAMS_OPT, PRIORITY_OPT] + INSTANCE_POLICY_OPTS, "", "Alters the parameters of a node group"), "remove": ( RemoveGroup, ARGS_ONE_GROUP, [DRY_RUN_OPT, PRIORITY_OPT] + SUBMIT_OPTS, "[--dry-run] ", "Remove an (empty) node group from the cluster"), "rename": ( RenameGroup, [ArgGroup(min=2, max=2)], [DRY_RUN_OPT] + SUBMIT_OPTS + [PRIORITY_OPT], "[--dry-run] ", "Rename a node group"), "evacuate": ( EvacuateGroup, [ArgGroup(min=1, max=1)], [TO_GROUP_OPT, IALLOCATOR_OPT, EARLY_RELEASE_OPT] + SUBMIT_OPTS, "[-I ] [--to ]", "Evacuate all instances within a group"), "list-tags": ( ListTags, ARGS_ONE_GROUP, [], "", "List the tags of the given group"), "add-tags": ( AddTags, [ArgGroup(min=1, max=1), ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS, " tag...", "Add tags to the given group"), "remove-tags": ( RemoveTags, [ArgGroup(min=1, max=1), ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS, " tag...", "Remove tags from the given group"), "info": ( GroupInfo, ARGS_MANY_GROUPS, [], "[...]", "Show group information"), "show-ispecs-cmd": ( ShowCreateCommand, ARGS_MANY_GROUPS, [INCLUDEDEFAULTS_OPT], "[--include-defaults] [...]", "Show the command line to re-create a group"), } def Main(): return GenericMain(commands, override={"tag_type": constants.TAG_NODEGROUP}, env_override=_ENV_OVERRIDE) ganeti-2.9.3/lib/cmdlib/0000755000000000000000000000000012271445544015002 5ustar00rootroot00000000000000ganeti-2.9.3/lib/cmdlib/base.py0000644000000000000000000004347112267470014016272 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Base classes and functions for cmdlib.""" import logging from ganeti import errors from ganeti import constants from ganeti import locking from ganeti import query from ganeti import utils from ganeti.cmdlib.common import ExpandInstanceUuidAndName class ResultWithJobs: """Data container for LU results with jobs. Instances of this class returned from L{LogicalUnit.Exec} will be recognized by L{mcpu._ProcessResult}. The latter will then submit the jobs contained in the C{jobs} attribute and include the job IDs in the opcode result. """ def __init__(self, jobs, **kwargs): """Initializes this class. Additional return values can be specified as keyword arguments. @type jobs: list of lists of L{opcode.OpCode} @param jobs: A list of lists of opcode objects """ self.jobs = jobs self.other = kwargs class LogicalUnit(object): """Logical Unit base class. Subclasses must follow these rules: - implement ExpandNames - implement CheckPrereq (except when tasklets are used) - implement Exec (except when tasklets are used) - implement BuildHooksEnv - implement BuildHooksNodes - redefine HPATH and HTYPE - optionally redefine their run requirements: REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively Note that all commands require root permissions. @ivar dry_run_result: the value (if any) that will be returned to the caller in dry-run mode (signalled by opcode dry_run parameter) """ HPATH = None HTYPE = None REQ_BGL = True def __init__(self, processor, op, context, rpc_runner): """Constructor for LogicalUnit. This needs to be overridden in derived classes in order to check op validity. """ self.proc = processor self.op = op self.cfg = context.cfg self.glm = context.glm # readability alias self.owned_locks = context.glm.list_owned self.context = context self.rpc = rpc_runner # Dictionaries used to declare locking needs to mcpu self.needed_locks = None self.share_locks = dict.fromkeys(locking.LEVELS, 0) self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False) self.add_locks = {} self.remove_locks = {} # Used to force good behavior when calling helper functions self.recalculate_locks = {} # logging self.Log = processor.Log # pylint: disable=C0103 self.LogWarning = processor.LogWarning # pylint: disable=C0103 self.LogInfo = processor.LogInfo # pylint: disable=C0103 self.LogStep = processor.LogStep # pylint: disable=C0103 # support for dry-run self.dry_run_result = None # support for generic debug attribute if (not hasattr(self.op, "debug_level") or not isinstance(self.op.debug_level, int)): self.op.debug_level = 0 # Tasklets self.tasklets = None # Validate opcode parameters and set defaults self.op.Validate(True) self.CheckArguments() def CheckArguments(self): """Check syntactic validity for the opcode arguments. This method is for doing a simple syntactic check and ensure validity of opcode parameters, without any cluster-related checks. While the same can be accomplished in ExpandNames and/or CheckPrereq, doing these separate is better because: - ExpandNames is left as as purely a lock-related function - CheckPrereq is run after we have acquired locks (and possible waited for them) The function is allowed to change the self.op attribute so that later methods can no longer worry about missing parameters. """ pass def ExpandNames(self): """Expand names for this LU. This method is called before starting to execute the opcode, and it should update all the parameters of the opcode to their canonical form (e.g. a short node name must be fully expanded after this method has successfully completed). This way locking, hooks, logging, etc. can work correctly. LUs which implement this method must also populate the self.needed_locks member, as a dict with lock levels as keys, and a list of needed lock names as values. Rules: - use an empty dict if you don't need any lock - if you don't need any lock at a particular level omit that level (note that in this case C{DeclareLocks} won't be called at all for that level) - if you need locks at a level, but you can't calculate it in this function, initialise that level with an empty list and do further processing in L{LogicalUnit.DeclareLocks} (see that function's docstring) - don't put anything for the BGL level - if you want all locks at a level use L{locking.ALL_SET} as a value If you need to share locks (rather than acquire them exclusively) at one level you can modify self.share_locks, setting a true value (usually 1) for that level. By default locks are not shared. This function can also define a list of tasklets, which then will be executed in order instead of the usual LU-level CheckPrereq and Exec functions, if those are not defined by the LU. Examples:: # Acquire all nodes and one instance self.needed_locks = { locking.LEVEL_NODE: locking.ALL_SET, locking.LEVEL_INSTANCE: ['instance1.example.com'], } # Acquire just two nodes self.needed_locks = { locking.LEVEL_NODE: ['node1-uuid', 'node2-uuid'], } # Acquire no locks self.needed_locks = {} # No, you can't leave it to the default value None """ # The implementation of this method is mandatory only if the new LU is # concurrent, so that old LUs don't need to be changed all at the same # time. if self.REQ_BGL: self.needed_locks = {} # Exclusive LUs don't need locks. else: raise NotImplementedError def DeclareLocks(self, level): """Declare LU locking needs for a level While most LUs can just declare their locking needs at ExpandNames time, sometimes there's the need to calculate some locks after having acquired the ones before. This function is called just before acquiring locks at a particular level, but after acquiring the ones at lower levels, and permits such calculations. It can be used to modify self.needed_locks, and by default it does nothing. This function is only called if you have something already set in self.needed_locks for the level. @param level: Locking level which is going to be locked @type level: member of L{ganeti.locking.LEVELS} """ def CheckPrereq(self): """Check prerequisites for this LU. This method should check that the prerequisites for the execution of this LU are fulfilled. It can do internode communication, but it should be idempotent - no cluster or system changes are allowed. The method should raise errors.OpPrereqError in case something is not fulfilled. Its return value is ignored. This method should also update all the parameters of the opcode to their canonical form if it hasn't been done by ExpandNames before. """ if self.tasklets is not None: for (idx, tl) in enumerate(self.tasklets): logging.debug("Checking prerequisites for tasklet %s/%s", idx + 1, len(self.tasklets)) tl.CheckPrereq() else: pass def Exec(self, feedback_fn): """Execute the LU. This method should implement the actual work. It should raise errors.OpExecError for failures that are somewhat dealt with in code, or expected. """ if self.tasklets is not None: for (idx, tl) in enumerate(self.tasklets): logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets)) tl.Exec(feedback_fn) else: raise NotImplementedError def BuildHooksEnv(self): """Build hooks environment for this LU. @rtype: dict @return: Dictionary containing the environment that will be used for running the hooks for this LU. The keys of the dict must not be prefixed with "GANETI_"--that'll be added by the hooks runner. The hooks runner will extend the environment with additional variables. If no environment should be defined, an empty dictionary should be returned (not C{None}). @note: If the C{HPATH} attribute of the LU class is C{None}, this function will not be called. """ raise NotImplementedError def BuildHooksNodes(self): """Build list of nodes to run LU's hooks. @rtype: tuple; (list, list) or (list, list, list) @return: Tuple containing a list of node UUIDs on which the hook should run before the execution and a list of node UUIDs on which the hook should run after the execution. As it might be possible that the node UUID is not known at the time this method is invoked, an optional third list can be added which contains node names on which the hook should run after the execution (in case of node add, for instance). No nodes should be returned as an empty list (and not None). @note: If the C{HPATH} attribute of the LU class is C{None}, this function will not be called. """ raise NotImplementedError def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result): """Notify the LU about the results of its hooks. This method is called every time a hooks phase is executed, and notifies the Logical Unit about the hooks' result. The LU can then use it to alter its result based on the hooks. By default the method does nothing and the previous result is passed back unchanged but any LU can define it if it wants to use the local cluster hook-scripts somehow. @param phase: one of L{constants.HOOKS_PHASE_POST} or L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase @param hook_results: the results of the multi-node hooks rpc call @param feedback_fn: function used send feedback back to the caller @param lu_result: the previous Exec result this LU had, or None in the PRE phase @return: the new Exec result, based on the previous result and hook results """ # API must be kept, thus we ignore the unused argument and could # be a function warnings # pylint: disable=W0613,R0201 return lu_result def _ExpandAndLockInstance(self): """Helper function to expand and lock an instance. Many LUs that work on an instance take its name in self.op.instance_name and need to expand it and then declare the expanded name for locking. This function does it, and then updates self.op.instance_name to the expanded name. It also initializes needed_locks as a dict, if this hasn't been done before. """ if self.needed_locks is None: self.needed_locks = {} else: assert locking.LEVEL_INSTANCE not in self.needed_locks, \ "_ExpandAndLockInstance called with instance-level locks set" (self.op.instance_uuid, self.op.instance_name) = \ ExpandInstanceUuidAndName(self.cfg, self.op.instance_uuid, self.op.instance_name) self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name def _LockInstancesNodes(self, primary_only=False, level=locking.LEVEL_NODE): """Helper function to declare instances' nodes for locking. This function should be called after locking one or more instances to lock their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE] with all primary or secondary nodes for instances already locked and present in self.needed_locks[locking.LEVEL_INSTANCE]. It should be called from DeclareLocks, and for safety only works if self.recalculate_locks[locking.LEVEL_NODE] is set. In the future it may grow parameters to just lock some instance's nodes, or to just lock primaries or secondary nodes, if needed. If should be called in DeclareLocks in a way similar to:: if level == locking.LEVEL_NODE: self._LockInstancesNodes() @type primary_only: boolean @param primary_only: only lock primary nodes of locked instances @param level: Which lock level to use for locking nodes """ assert level in self.recalculate_locks, \ "_LockInstancesNodes helper function called with no nodes to recalculate" # TODO: check if we're really been called with the instance locks held # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the # future we might want to have different behaviors depending on the value # of self.recalculate_locks[locking.LEVEL_NODE] wanted_node_uuids = [] locked_i = self.owned_locks(locking.LEVEL_INSTANCE) for _, instance in self.cfg.GetMultiInstanceInfoByName(locked_i): wanted_node_uuids.append(instance.primary_node) if not primary_only: wanted_node_uuids.extend(instance.secondary_nodes) if self.recalculate_locks[level] == constants.LOCKS_REPLACE: self.needed_locks[level] = wanted_node_uuids elif self.recalculate_locks[level] == constants.LOCKS_APPEND: self.needed_locks[level].extend(wanted_node_uuids) else: raise errors.ProgrammerError("Unknown recalculation mode") del self.recalculate_locks[level] class NoHooksLU(LogicalUnit): # pylint: disable=W0223 """Simple LU which runs no hooks. This LU is intended as a parent for other LogicalUnits which will run no hooks, in order to reduce duplicate code. """ HPATH = None HTYPE = None def BuildHooksEnv(self): """Empty BuildHooksEnv for NoHooksLu. This just raises an error. """ raise AssertionError("BuildHooksEnv called for NoHooksLUs") def BuildHooksNodes(self): """Empty BuildHooksNodes for NoHooksLU. """ raise AssertionError("BuildHooksNodes called for NoHooksLU") class Tasklet: """Tasklet base class. Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or they can mix legacy code with tasklets. Locking needs to be done in the LU, tasklets know nothing about locks. Subclasses must follow these rules: - Implement CheckPrereq - Implement Exec """ def __init__(self, lu): self.lu = lu # Shortcuts self.cfg = lu.cfg self.rpc = lu.rpc def CheckPrereq(self): """Check prerequisites for this tasklets. This method should check whether the prerequisites for the execution of this tasklet are fulfilled. It can do internode communication, but it should be idempotent - no cluster or system changes are allowed. The method should raise errors.OpPrereqError in case something is not fulfilled. Its return value is ignored. This method should also update all parameters to their canonical form if it hasn't been done before. """ pass def Exec(self, feedback_fn): """Execute the tasklet. This method should implement the actual work. It should raise errors.OpExecError for failures that are somewhat dealt with in code, or expected. """ raise NotImplementedError class QueryBase: """Base for query utility classes. """ #: Attribute holding field definitions FIELDS = None #: Field to sort by SORT_FIELD = "name" def __init__(self, qfilter, fields, use_locking): """Initializes this class. """ self.use_locking = use_locking self.query = query.Query(self.FIELDS, fields, qfilter=qfilter, namefield=self.SORT_FIELD) self.requested_data = self.query.RequestedData() self.names = self.query.RequestedNames() # Sort only if no names were requested self.sort_by_name = not self.names self.do_locking = None self.wanted = None def _GetNames(self, lu, all_names, lock_level): """Helper function to determine names asked for in the query. """ if self.do_locking: names = lu.owned_locks(lock_level) else: names = all_names if self.wanted == locking.ALL_SET: assert not self.names # caller didn't specify names, so ordering is not important return utils.NiceSort(names) # caller specified names and we must keep the same order assert self.names assert not self.do_locking or lu.glm.is_owned(lock_level) missing = set(self.wanted).difference(names) if missing: raise errors.OpExecError("Some items were removed before retrieving" " their data: %s" % missing) # Return expanded names return self.wanted def ExpandNames(self, lu): """Expand names for this query. See L{LogicalUnit.ExpandNames}. """ raise NotImplementedError() def DeclareLocks(self, lu, level): """Declare locks for this query. See L{LogicalUnit.DeclareLocks}. """ raise NotImplementedError() def _GetQueryData(self, lu): """Collects all data for this query. @return: Query data object """ raise NotImplementedError() def NewStyleQuery(self, lu): """Collect data and execute query. """ return query.GetQueryResponse(self.query, self._GetQueryData(lu), sort_by_name=self.sort_by_name) def OldStyleQuery(self, lu): """Collect data and execute query. """ return self.query.OldStyleQuery(self._GetQueryData(lu), sort_by_name=self.sort_by_name) ganeti-2.9.3/lib/cmdlib/instance_utils.py0000644000000000000000000004525012271422343020376 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utility function mainly, but not only used by instance LU's.""" import logging import os from ganeti import constants from ganeti import errors from ganeti import locking from ganeti import network from ganeti import objects from ganeti import pathutils from ganeti import utils from ganeti.cmdlib.common import AnnotateDiskParams, \ ComputeIPolicyInstanceViolation, CheckDiskTemplateEnabled def BuildInstanceHookEnv(name, primary_node_name, secondary_node_names, os_type, status, minmem, maxmem, vcpus, nics, disk_template, disks, bep, hvp, hypervisor_name, tags): """Builds instance related env variables for hooks This builds the hook environment from individual variables. @type name: string @param name: the name of the instance @type primary_node_name: string @param primary_node_name: the name of the instance's primary node @type secondary_node_names: list @param secondary_node_names: list of secondary nodes as strings @type os_type: string @param os_type: the name of the instance's OS @type status: string @param status: the desired status of the instance @type minmem: string @param minmem: the minimum memory size of the instance @type maxmem: string @param maxmem: the maximum memory size of the instance @type vcpus: string @param vcpus: the count of VCPUs the instance has @type nics: list @param nics: list of tuples (name, uuid, ip, mac, mode, link, net, netinfo) representing the NICs the instance has @type disk_template: string @param disk_template: the disk template of the instance @type disks: list @param disks: list of tuples (name, uuid, size, mode) @type bep: dict @param bep: the backend parameters for the instance @type hvp: dict @param hvp: the hypervisor parameters for the instance @type hypervisor_name: string @param hypervisor_name: the hypervisor for the instance @type tags: list @param tags: list of instance tags as strings @rtype: dict @return: the hook environment for this instance """ env = { "OP_TARGET": name, "INSTANCE_NAME": name, "INSTANCE_PRIMARY": primary_node_name, "INSTANCE_SECONDARIES": " ".join(secondary_node_names), "INSTANCE_OS_TYPE": os_type, "INSTANCE_STATUS": status, "INSTANCE_MINMEM": minmem, "INSTANCE_MAXMEM": maxmem, # TODO(2.9) remove deprecated "memory" value "INSTANCE_MEMORY": maxmem, "INSTANCE_VCPUS": vcpus, "INSTANCE_DISK_TEMPLATE": disk_template, "INSTANCE_HYPERVISOR": hypervisor_name, } if nics: nic_count = len(nics) for idx, (name, uuid, ip, mac, mode, link, net, netinfo) in enumerate(nics): if ip is None: ip = "" if name: env["INSTANCE_NIC%d_NAME" % idx] = name env["INSTANCE_NIC%d_UUID" % idx] = uuid env["INSTANCE_NIC%d_IP" % idx] = ip env["INSTANCE_NIC%d_MAC" % idx] = mac env["INSTANCE_NIC%d_MODE" % idx] = mode env["INSTANCE_NIC%d_LINK" % idx] = link if netinfo: nobj = objects.Network.FromDict(netinfo) env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx)) elif network: # FIXME: broken network reference: the instance NIC specifies a # network, but the relevant network entry was not in the config. This # should be made impossible. env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net if mode == constants.NIC_MODE_BRIDGED: env["INSTANCE_NIC%d_BRIDGE" % idx] = link else: nic_count = 0 env["INSTANCE_NIC_COUNT"] = nic_count if disks: disk_count = len(disks) for idx, (name, uuid, size, mode) in enumerate(disks): if name: env["INSTANCE_DISK%d_NAME" % idx] = name env["INSTANCE_DISK%d_UUID" % idx] = uuid env["INSTANCE_DISK%d_SIZE" % idx] = size env["INSTANCE_DISK%d_MODE" % idx] = mode else: disk_count = 0 env["INSTANCE_DISK_COUNT"] = disk_count if not tags: tags = [] env["INSTANCE_TAGS"] = " ".join(tags) for source, kind in [(bep, "BE"), (hvp, "HV")]: for key, value in source.items(): env["INSTANCE_%s_%s" % (kind, key)] = value return env def BuildInstanceHookEnvByObject(lu, instance, override=None): """Builds instance related env variables for hooks from an object. @type lu: L{LogicalUnit} @param lu: the logical unit on whose behalf we execute @type instance: L{objects.Instance} @param instance: the instance for which we should build the environment @type override: dict @param override: dictionary with key/values that will override our values @rtype: dict @return: the hook environment dictionary """ cluster = lu.cfg.GetClusterInfo() bep = cluster.FillBE(instance) hvp = cluster.FillHV(instance) args = { "name": instance.name, "primary_node_name": lu.cfg.GetNodeName(instance.primary_node), "secondary_node_names": lu.cfg.GetNodeNames(instance.secondary_nodes), "os_type": instance.os, "status": instance.admin_state, "maxmem": bep[constants.BE_MAXMEM], "minmem": bep[constants.BE_MINMEM], "vcpus": bep[constants.BE_VCPUS], "nics": NICListToTuple(lu, instance.nics), "disk_template": instance.disk_template, "disks": [(disk.name, disk.uuid, disk.size, disk.mode) for disk in instance.disks], "bep": bep, "hvp": hvp, "hypervisor_name": instance.hypervisor, "tags": instance.tags, } if override: args.update(override) return BuildInstanceHookEnv(**args) # pylint: disable=W0142 def GetClusterDomainSecret(): """Reads the cluster domain secret. """ return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE, strict=True) def CheckNodeNotDrained(lu, node_uuid): """Ensure that a given node is not drained. @param lu: the LU on behalf of which we make the check @param node_uuid: the node to check @raise errors.OpPrereqError: if the node is drained """ node = lu.cfg.GetNodeInfo(node_uuid) if node.drained: raise errors.OpPrereqError("Can't use drained node %s" % node.name, errors.ECODE_STATE) def CheckNodeVmCapable(lu, node_uuid): """Ensure that a given node is vm capable. @param lu: the LU on behalf of which we make the check @param node_uuid: the node to check @raise errors.OpPrereqError: if the node is not vm capable """ if not lu.cfg.GetNodeInfo(node_uuid).vm_capable: raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node_uuid, errors.ECODE_STATE) def RemoveInstance(lu, feedback_fn, instance, ignore_failures): """Utility function to remove an instance. """ logging.info("Removing block devices for instance %s", instance.name) if not RemoveDisks(lu, instance, ignore_failures=ignore_failures): if not ignore_failures: raise errors.OpExecError("Can't remove instance's disks") feedback_fn("Warning: can't remove instance's disks") logging.info("Removing instance %s out of cluster config", instance.name) lu.cfg.RemoveInstance(instance.uuid) assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \ "Instance lock removal conflict" # Remove lock for the instance lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name def RemoveDisks(lu, instance, target_node_uuid=None, ignore_failures=False): """Remove all disks for an instance. This abstracts away some work from `AddInstance()` and `RemoveInstance()`. Note that in case some of the devices couldn't be removed, the removal will continue with the other ones. @type lu: L{LogicalUnit} @param lu: the logical unit on whose behalf we execute @type instance: L{objects.Instance} @param instance: the instance whose disks we should remove @type target_node_uuid: string @param target_node_uuid: used to override the node on which to remove the disks @rtype: boolean @return: the success of the removal """ logging.info("Removing block devices for instance %s", instance.name) all_result = True ports_to_release = set() anno_disks = AnnotateDiskParams(instance, instance.disks, lu.cfg) for (idx, device) in enumerate(anno_disks): if target_node_uuid: edata = [(target_node_uuid, device)] else: edata = device.ComputeNodeTree(instance.primary_node) for node_uuid, disk in edata: lu.cfg.SetDiskID(disk, node_uuid) result = lu.rpc.call_blockdev_remove(node_uuid, disk) if result.fail_msg: lu.LogWarning("Could not remove disk %s on node %s," " continuing anyway: %s", idx, lu.cfg.GetNodeName(node_uuid), result.fail_msg) if not (result.offline and node_uuid != instance.primary_node): all_result = False # if this is a DRBD disk, return its port to the pool if device.dev_type in constants.DTS_DRBD: ports_to_release.add(device.logical_id[2]) if all_result or ignore_failures: for port in ports_to_release: lu.cfg.AddTcpUdpPort(port) CheckDiskTemplateEnabled(lu.cfg.GetClusterInfo(), instance.disk_template) if instance.disk_template in constants.DTS_FILEBASED: file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1]) if target_node_uuid: tgt = target_node_uuid else: tgt = instance.primary_node result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir) if result.fail_msg: lu.LogWarning("Could not remove directory '%s' on node %s: %s", file_storage_dir, lu.cfg.GetNodeName(tgt), result.fail_msg) all_result = False return all_result def NICToTuple(lu, nic): """Build a tupple of nic information. @type lu: L{LogicalUnit} @param lu: the logical unit on whose behalf we execute @type nic: L{objects.NIC} @param nic: nic to convert to hooks tuple """ cluster = lu.cfg.GetClusterInfo() filled_params = cluster.SimpleFillNIC(nic.nicparams) mode = filled_params[constants.NIC_MODE] link = filled_params[constants.NIC_LINK] netinfo = None if nic.network: nobj = lu.cfg.GetNetwork(nic.network) netinfo = objects.Network.ToDict(nobj) return (nic.name, nic.uuid, nic.ip, nic.mac, mode, link, nic.network, netinfo) def NICListToTuple(lu, nics): """Build a list of nic information tuples. This list is suitable to be passed to _BuildInstanceHookEnv or as a return value in LUInstanceQueryData. @type lu: L{LogicalUnit} @param lu: the logical unit on whose behalf we execute @type nics: list of L{objects.NIC} @param nics: list of nics to convert to hooks tuples """ hooks_nics = [] for nic in nics: hooks_nics.append(NICToTuple(lu, nic)) return hooks_nics def CopyLockList(names): """Makes a copy of a list of lock names. Handles L{locking.ALL_SET} correctly. """ if names == locking.ALL_SET: return locking.ALL_SET else: return names[:] def ReleaseLocks(lu, level, names=None, keep=None): """Releases locks owned by an LU. @type lu: L{LogicalUnit} @param level: Lock level @type names: list or None @param names: Names of locks to release @type keep: list or None @param keep: Names of locks to retain """ assert not (keep is not None and names is not None), \ "Only one of the 'names' and the 'keep' parameters can be given" if names is not None: should_release = names.__contains__ elif keep: should_release = lambda name: name not in keep else: should_release = None owned = lu.owned_locks(level) if not owned: # Not owning any lock at this level, do nothing pass elif should_release: retain = [] release = [] # Determine which locks to release for name in owned: if should_release(name): release.append(name) else: retain.append(name) assert len(lu.owned_locks(level)) == (len(retain) + len(release)) # Release just some locks lu.glm.release(level, names=release) assert frozenset(lu.owned_locks(level)) == frozenset(retain) else: # Release everything lu.glm.release(level) assert not lu.glm.is_owned(level), "No locks should be owned" def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group, target_group, cfg, _compute_fn=ComputeIPolicyInstanceViolation): """Compute if instance meets the specs of the new target group. @param ipolicy: The ipolicy to verify @param instance: The instance object to verify @param current_group: The current group of the instance @param target_group: The new group of the instance @type cfg: L{config.ConfigWriter} @param cfg: Cluster configuration @param _compute_fn: The function to verify ipolicy (unittest only) @see: L{ganeti.cmdlib.common.ComputeIPolicySpecViolation} """ if current_group == target_group: return [] else: return _compute_fn(ipolicy, instance, cfg) def CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False, _compute_fn=_ComputeIPolicyNodeViolation): """Checks that the target node is correct in terms of instance policy. @param ipolicy: The ipolicy to verify @param instance: The instance object to verify @param node: The new node to relocate @type cfg: L{config.ConfigWriter} @param cfg: Cluster configuration @param ignore: Ignore violations of the ipolicy @param _compute_fn: The function to verify ipolicy (unittest only) @see: L{ganeti.cmdlib.common.ComputeIPolicySpecViolation} """ primary_node = lu.cfg.GetNodeInfo(instance.primary_node) res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg) if res: msg = ("Instance does not meet target node group's (%s) instance" " policy: %s") % (node.group, utils.CommaJoin(res)) if ignore: lu.LogWarning(msg) else: raise errors.OpPrereqError(msg, errors.ECODE_INVAL) def GetInstanceInfoText(instance): """Compute that text that should be added to the disk's metadata. """ return "originstname+%s" % instance.name def CheckNodeFreeMemory(lu, node_uuid, reason, requested, hvname, hvparams): """Checks if a node has enough free memory. This function checks if a given node has the needed amount of free memory. In case the node has less memory or we cannot get the information from the node, this function raises an OpPrereqError exception. @type lu: C{LogicalUnit} @param lu: a logical unit from which we get configuration data @type node_uuid: C{str} @param node_uuid: the node to check @type reason: C{str} @param reason: string to use in the error message @type requested: C{int} @param requested: the amount of memory in MiB to check for @type hvname: string @param hvname: the hypervisor's name @type hvparams: dict of strings @param hvparams: the hypervisor's parameters @rtype: integer @return: node current free memory @raise errors.OpPrereqError: if the node doesn't have enough memory, or we cannot check the node """ node_name = lu.cfg.GetNodeName(node_uuid) nodeinfo = lu.rpc.call_node_info([node_uuid], None, [(hvname, hvparams)]) nodeinfo[node_uuid].Raise("Can't get data from node %s" % node_name, prereq=True, ecode=errors.ECODE_ENVIRON) (_, _, (hv_info, )) = nodeinfo[node_uuid].payload free_mem = hv_info.get("memory_free", None) if not isinstance(free_mem, int): raise errors.OpPrereqError("Can't compute free memory on node %s, result" " was '%s'" % (node_name, free_mem), errors.ECODE_ENVIRON) if requested > free_mem: raise errors.OpPrereqError("Not enough memory on node %s for %s:" " needed %s MiB, available %s MiB" % (node_name, reason, requested, free_mem), errors.ECODE_NORES) return free_mem def CheckInstanceBridgesExist(lu, instance, node_uuid=None): """Check that the brigdes needed by an instance exist. """ if node_uuid is None: node_uuid = instance.primary_node CheckNicsBridgesExist(lu, instance.nics, node_uuid) def CheckNicsBridgesExist(lu, nics, node_uuid): """Check that the brigdes needed by a list of nics exist. """ cluster = lu.cfg.GetClusterInfo() paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in nics] brlist = [params[constants.NIC_LINK] for params in paramslist if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED] if brlist: result = lu.rpc.call_bridges_exist(node_uuid, brlist) result.Raise("Error checking bridges on destination node '%s'" % lu.cfg.GetNodeName(node_uuid), prereq=True, ecode=errors.ECODE_ENVIRON) def CheckNodeHasOS(lu, node_uuid, os_name, force_variant): """Ensure that a node supports a given OS. @param lu: the LU on behalf of which we make the check @param node_uuid: the node to check @param os_name: the OS to query about @param force_variant: whether to ignore variant errors @raise errors.OpPrereqError: if the node is not supporting the OS """ result = lu.rpc.call_os_get(node_uuid, os_name) result.Raise("OS '%s' not in supported OS list for node %s" % (os_name, lu.cfg.GetNodeName(node_uuid)), prereq=True, ecode=errors.ECODE_INVAL) if not force_variant: _CheckOSVariant(result.payload, os_name) def _CheckOSVariant(os_obj, name): """Check whether an OS name conforms to the os variants specification. @type os_obj: L{objects.OS} @param os_obj: OS object to check @type name: string @param name: OS name passed by the user, to check for validity """ variant = objects.OS.GetVariant(name) if not os_obj.supported_variants: if variant: raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'" " passed)" % (os_obj.name, variant), errors.ECODE_INVAL) return if not variant: raise errors.OpPrereqError("OS name must include a variant", errors.ECODE_INVAL) if variant not in os_obj.supported_variants: raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL) ganeti-2.9.3/lib/cmdlib/instance.py0000644000000000000000000043671112271422343017164 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Logical units dealing with instances.""" import OpenSSL import copy import logging import os from ganeti import compat from ganeti import constants from ganeti import errors from ganeti import ht from ganeti import hypervisor from ganeti import locking from ganeti.masterd import iallocator from ganeti import masterd from ganeti import netutils from ganeti import objects from ganeti import opcodes from ganeti import pathutils from ganeti import rpc from ganeti import utils from ganeti.cmdlib.base import NoHooksLU, LogicalUnit, ResultWithJobs from ganeti.cmdlib.common import INSTANCE_DOWN, \ INSTANCE_NOT_RUNNING, CAN_CHANGE_INSTANCE_OFFLINE, CheckNodeOnline, \ ShareAll, GetDefaultIAllocator, CheckInstanceNodeGroups, \ LoadNodeEvacResult, CheckIAllocatorOrNode, CheckParamsNotGlobal, \ IsExclusiveStorageEnabledNode, CheckHVParams, CheckOSParams, \ AnnotateDiskParams, GetUpdatedParams, ExpandInstanceUuidAndName, \ ComputeIPolicySpecViolation, CheckInstanceState, ExpandNodeUuidAndName, \ CheckDiskTemplateEnabled from ganeti.cmdlib.instance_storage import CreateDisks, \ CheckNodesFreeDiskPerVG, WipeDisks, WipeOrCleanupDisks, WaitForSync, \ IsExclusiveStorageEnabledNodeUuid, CreateSingleBlockDev, ComputeDisks, \ CheckRADOSFreeSpace, ComputeDiskSizePerVG, GenerateDiskTemplate, \ StartInstanceDisks, ShutdownInstanceDisks, AssembleInstanceDisks, \ CheckSpindlesExclusiveStorage from ganeti.cmdlib.instance_utils import BuildInstanceHookEnvByObject, \ GetClusterDomainSecret, BuildInstanceHookEnv, NICListToTuple, \ NICToTuple, CheckNodeNotDrained, RemoveInstance, CopyLockList, \ ReleaseLocks, CheckNodeVmCapable, CheckTargetNodeIPolicy, \ GetInstanceInfoText, RemoveDisks, CheckNodeFreeMemory, \ CheckInstanceBridgesExist, CheckNicsBridgesExist, CheckNodeHasOS import ganeti.masterd.instance #: Type description for changes as returned by L{_ApplyContainerMods}'s #: callbacks _TApplyContModsCbChanges = \ ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([ ht.TNonEmptyString, ht.TAny, ]))) def _CheckHostnameSane(lu, name): """Ensures that a given hostname resolves to a 'sane' name. The given name is required to be a prefix of the resolved hostname, to prevent accidental mismatches. @param lu: the logical unit on behalf of which we're checking @param name: the name we should resolve and check @return: the resolved hostname object """ hostname = netutils.GetHostname(name=name) if hostname.name != name: lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name) if not utils.MatchNameComponent(name, [hostname.name]): raise errors.OpPrereqError(("Resolved hostname '%s' does not look the" " same as given hostname '%s'") % (hostname.name, name), errors.ECODE_INVAL) return hostname def _CheckOpportunisticLocking(op): """Generate error if opportunistic locking is not possible. """ if op.opportunistic_locking and not op.iallocator: raise errors.OpPrereqError("Opportunistic locking is only available in" " combination with an instance allocator", errors.ECODE_INVAL) def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_name_whitelist): """Wrapper around IAReqInstanceAlloc. @param op: The instance opcode @param disks: The computed disks @param nics: The computed nics @param beparams: The full filled beparams @param node_name_whitelist: List of nodes which should appear as online to the allocator (unless the node is already marked offline) @returns: A filled L{iallocator.IAReqInstanceAlloc} """ spindle_use = beparams[constants.BE_SPINDLE_USE] return iallocator.IAReqInstanceAlloc(name=op.instance_name, disk_template=op.disk_template, tags=op.tags, os=op.os_type, vcpus=beparams[constants.BE_VCPUS], memory=beparams[constants.BE_MAXMEM], spindle_use=spindle_use, disks=disks, nics=[n.ToDict() for n in nics], hypervisor=op.hypervisor, node_whitelist=node_name_whitelist) def _ComputeFullBeParams(op, cluster): """Computes the full beparams. @param op: The instance opcode @param cluster: The cluster config object @return: The fully filled beparams """ default_beparams = cluster.beparams[constants.PP_DEFAULT] for param, value in op.beparams.iteritems(): if value == constants.VALUE_AUTO: op.beparams[param] = default_beparams[param] objects.UpgradeBeParams(op.beparams) utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES) return cluster.SimpleFillBE(op.beparams) def _ComputeNics(op, cluster, default_ip, cfg, ec_id): """Computes the nics. @param op: The instance opcode @param cluster: Cluster configuration object @param default_ip: The default ip to assign @param cfg: An instance of the configuration object @param ec_id: Execution context ID @returns: The build up nics """ nics = [] for nic in op.nics: nic_mode_req = nic.get(constants.INIC_MODE, None) nic_mode = nic_mode_req if nic_mode is None or nic_mode == constants.VALUE_AUTO: nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE] net = nic.get(constants.INIC_NETWORK, None) link = nic.get(constants.NIC_LINK, None) ip = nic.get(constants.INIC_IP, None) if net is None or net.lower() == constants.VALUE_NONE: net = None else: if nic_mode_req is not None or link is not None: raise errors.OpPrereqError("If network is given, no mode or link" " is allowed to be passed", errors.ECODE_INVAL) # ip validity checks if ip is None or ip.lower() == constants.VALUE_NONE: nic_ip = None elif ip.lower() == constants.VALUE_AUTO: if not op.name_check: raise errors.OpPrereqError("IP address set to auto but name checks" " have been skipped", errors.ECODE_INVAL) nic_ip = default_ip else: # We defer pool operations until later, so that the iallocator has # filled in the instance's node(s) dimara if ip.lower() == constants.NIC_IP_POOL: if net is None: raise errors.OpPrereqError("if ip=pool, parameter network" " must be passed too", errors.ECODE_INVAL) elif not netutils.IPAddress.IsValid(ip): raise errors.OpPrereqError("Invalid IP address '%s'" % ip, errors.ECODE_INVAL) nic_ip = ip # TODO: check the ip address for uniqueness if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip: raise errors.OpPrereqError("Routed nic mode requires an ip address", errors.ECODE_INVAL) # MAC address verification mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO) if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE): mac = utils.NormalizeAndValidateMac(mac) try: # TODO: We need to factor this out cfg.ReserveMAC(mac, ec_id) except errors.ReservationError: raise errors.OpPrereqError("MAC address %s already in use" " in cluster" % mac, errors.ECODE_NOTUNIQUE) # Build nic parameters nicparams = {} if nic_mode_req: nicparams[constants.NIC_MODE] = nic_mode if link: nicparams[constants.NIC_LINK] = link check_params = cluster.SimpleFillNIC(nicparams) objects.NIC.CheckParameterSyntax(check_params) net_uuid = cfg.LookupNetwork(net) name = nic.get(constants.INIC_NAME, None) if name is not None and name.lower() == constants.VALUE_NONE: name = None nic_obj = objects.NIC(mac=mac, ip=nic_ip, name=name, network=net_uuid, nicparams=nicparams) nic_obj.uuid = cfg.GenerateUniqueID(ec_id) nics.append(nic_obj) return nics def _CheckForConflictingIp(lu, ip, node_uuid): """In case of conflicting IP address raise error. @type ip: string @param ip: IP address @type node_uuid: string @param node_uuid: node UUID """ (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node_uuid) if conf_net is not None: raise errors.OpPrereqError(("The requested IP address (%s) belongs to" " network %s, but the target NIC does not." % (ip, conf_net)), errors.ECODE_STATE) return (None, None) def _ComputeIPolicyInstanceSpecViolation( ipolicy, instance_spec, disk_template, _compute_fn=ComputeIPolicySpecViolation): """Compute if instance specs meets the specs of ipolicy. @type ipolicy: dict @param ipolicy: The ipolicy to verify against @param instance_spec: dict @param instance_spec: The instance spec to verify @type disk_template: string @param disk_template: the disk template of the instance @param _compute_fn: The function to verify ipolicy (unittest only) @see: L{ComputeIPolicySpecViolation} """ mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None) cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None) disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0) disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, []) nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0) spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None) return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count, disk_sizes, spindle_use, disk_template) def _CheckOSVariant(os_obj, name): """Check whether an OS name conforms to the os variants specification. @type os_obj: L{objects.OS} @param os_obj: OS object to check @type name: string @param name: OS name passed by the user, to check for validity """ variant = objects.OS.GetVariant(name) if not os_obj.supported_variants: if variant: raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'" " passed)" % (os_obj.name, variant), errors.ECODE_INVAL) return if not variant: raise errors.OpPrereqError("OS name must include a variant", errors.ECODE_INVAL) if variant not in os_obj.supported_variants: raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL) class LUInstanceCreate(LogicalUnit): """Create an instance. """ HPATH = "instance-add" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False def _CheckDiskTemplateValid(self): """Checks validity of disk template. """ cluster = self.cfg.GetClusterInfo() if self.op.disk_template is None: # FIXME: It would be better to take the default disk template from the # ipolicy, but for the ipolicy we need the primary node, which we get from # the iallocator, which wants the disk template as input. To solve this # chicken-and-egg problem, it should be possible to specify just a node # group from the iallocator and take the ipolicy from that. self.op.disk_template = cluster.enabled_disk_templates[0] CheckDiskTemplateEnabled(cluster, self.op.disk_template) def _CheckDiskArguments(self): """Checks validity of disk-related arguments. """ # check that disk's names are unique and valid utils.ValidateDeviceNames("disk", self.op.disks) self._CheckDiskTemplateValid() # check disks. parameter names and consistent adopt/no-adopt strategy has_adopt = has_no_adopt = False for disk in self.op.disks: if self.op.disk_template != constants.DT_EXT: utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES) if constants.IDISK_ADOPT in disk: has_adopt = True else: has_no_adopt = True if has_adopt and has_no_adopt: raise errors.OpPrereqError("Either all disks are adopted or none is", errors.ECODE_INVAL) if has_adopt: if self.op.disk_template not in constants.DTS_MAY_ADOPT: raise errors.OpPrereqError("Disk adoption is not supported for the" " '%s' disk template" % self.op.disk_template, errors.ECODE_INVAL) if self.op.iallocator is not None: raise errors.OpPrereqError("Disk adoption not allowed with an" " iallocator script", errors.ECODE_INVAL) if self.op.mode == constants.INSTANCE_IMPORT: raise errors.OpPrereqError("Disk adoption not allowed for" " instance import", errors.ECODE_INVAL) else: if self.op.disk_template in constants.DTS_MUST_ADOPT: raise errors.OpPrereqError("Disk template %s requires disk adoption," " but no 'adopt' parameter given" % self.op.disk_template, errors.ECODE_INVAL) self.adopt_disks = has_adopt def CheckArguments(self): """Check arguments. """ # do not require name_check to ease forward/backward compatibility # for tools if self.op.no_install and self.op.start: self.LogInfo("No-installation mode selected, disabling startup") self.op.start = False # validate/normalize the instance name self.op.instance_name = \ netutils.Hostname.GetNormalizedName(self.op.instance_name) if self.op.ip_check and not self.op.name_check: # TODO: make the ip check more flexible and not depend on the name check raise errors.OpPrereqError("Cannot do IP address check without a name" " check", errors.ECODE_INVAL) # check nics' parameter names for nic in self.op.nics: utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES) # check that NIC's parameters names are unique and valid utils.ValidateDeviceNames("NIC", self.op.nics) self._CheckDiskArguments() # instance name verification if self.op.name_check: self.hostname = _CheckHostnameSane(self, self.op.instance_name) self.op.instance_name = self.hostname.name # used in CheckPrereq for ip ping check self.check_ip = self.hostname.ip else: self.check_ip = None # file storage checks if (self.op.file_driver and not self.op.file_driver in constants.FILE_DRIVER): raise errors.OpPrereqError("Invalid file driver name '%s'" % self.op.file_driver, errors.ECODE_INVAL) # set default file_driver if unset and required if (not self.op.file_driver and self.op.disk_template in [constants.DT_FILE, constants.DT_SHARED_FILE]): self.op.file_driver = constants.FD_DEFAULT ### Node/iallocator related checks CheckIAllocatorOrNode(self, "iallocator", "pnode") if self.op.pnode is not None: if self.op.disk_template in constants.DTS_INT_MIRROR: if self.op.snode is None: raise errors.OpPrereqError("The networked disk templates need" " a mirror node", errors.ECODE_INVAL) elif self.op.snode: self.LogWarning("Secondary node will be ignored on non-mirrored disk" " template") self.op.snode = None _CheckOpportunisticLocking(self.op) self._cds = GetClusterDomainSecret() if self.op.mode == constants.INSTANCE_IMPORT: # On import force_variant must be True, because if we forced it at # initial install, our only chance when importing it back is that it # works again! self.op.force_variant = True if self.op.no_install: self.LogInfo("No-installation mode has no effect during import") elif self.op.mode == constants.INSTANCE_CREATE: if self.op.os_type is None: raise errors.OpPrereqError("No guest OS specified", errors.ECODE_INVAL) if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os: raise errors.OpPrereqError("Guest OS '%s' is not allowed for" " installation" % self.op.os_type, errors.ECODE_STATE) if self.op.disk_template is None: raise errors.OpPrereqError("No disk template specified", errors.ECODE_INVAL) elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT: # Check handshake to ensure both clusters have the same domain secret src_handshake = self.op.source_handshake if not src_handshake: raise errors.OpPrereqError("Missing source handshake", errors.ECODE_INVAL) errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds, src_handshake) if errmsg: raise errors.OpPrereqError("Invalid handshake: %s" % errmsg, errors.ECODE_INVAL) # Load and check source CA self.source_x509_ca_pem = self.op.source_x509_ca if not self.source_x509_ca_pem: raise errors.OpPrereqError("Missing source X509 CA", errors.ECODE_INVAL) try: (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem, self._cds) except OpenSSL.crypto.Error, err: raise errors.OpPrereqError("Unable to load source X509 CA (%s)" % (err, ), errors.ECODE_INVAL) (errcode, msg) = utils.VerifyX509Certificate(cert, None, None) if errcode is not None: raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ), errors.ECODE_INVAL) self.source_x509_ca = cert src_instance_name = self.op.source_instance_name if not src_instance_name: raise errors.OpPrereqError("Missing source instance name", errors.ECODE_INVAL) self.source_instance_name = \ netutils.GetHostname(name=src_instance_name).name else: raise errors.OpPrereqError("Invalid instance creation mode %r" % self.op.mode, errors.ECODE_INVAL) def ExpandNames(self): """ExpandNames for CreateInstance. Figure out the right locks for instance creation. """ self.needed_locks = {} # this is just a preventive check, but someone might still add this # instance in the meantime, and creation will fail at lock-add time if self.op.instance_name in\ [inst.name for inst in self.cfg.GetAllInstancesInfo().values()]: raise errors.OpPrereqError("Instance '%s' is already in the cluster" % self.op.instance_name, errors.ECODE_EXISTS) self.add_locks[locking.LEVEL_INSTANCE] = self.op.instance_name if self.op.iallocator: # TODO: Find a solution to not lock all nodes in the cluster, e.g. by # specifying a group on instance creation and then selecting nodes from # that group self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET if self.op.opportunistic_locking: self.opportunistic_locks[locking.LEVEL_NODE] = True else: (self.op.pnode_uuid, self.op.pnode) = \ ExpandNodeUuidAndName(self.cfg, self.op.pnode_uuid, self.op.pnode) nodelist = [self.op.pnode_uuid] if self.op.snode is not None: (self.op.snode_uuid, self.op.snode) = \ ExpandNodeUuidAndName(self.cfg, self.op.snode_uuid, self.op.snode) nodelist.append(self.op.snode_uuid) self.needed_locks[locking.LEVEL_NODE] = nodelist # in case of import lock the source node too if self.op.mode == constants.INSTANCE_IMPORT: src_node = self.op.src_node src_path = self.op.src_path if src_path is None: self.op.src_path = src_path = self.op.instance_name if src_node is None: self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET self.op.src_node = None if os.path.isabs(src_path): raise errors.OpPrereqError("Importing an instance from a path" " requires a source node option", errors.ECODE_INVAL) else: (self.op.src_node_uuid, self.op.src_node) = (_, src_node) = \ ExpandNodeUuidAndName(self.cfg, self.op.src_node_uuid, src_node) if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET: self.needed_locks[locking.LEVEL_NODE].append(self.op.src_node_uuid) if not os.path.isabs(src_path): self.op.src_path = \ utils.PathJoin(pathutils.EXPORT_DIR, src_path) self.needed_locks[locking.LEVEL_NODE_RES] = \ CopyLockList(self.needed_locks[locking.LEVEL_NODE]) # Optimistically acquire shared group locks (we're reading the # configuration). We can't just call GetInstanceNodeGroups, because the # instance doesn't exist yet. Therefore we lock all node groups of all # nodes we have. if self.needed_locks[locking.LEVEL_NODE] == locking.ALL_SET: # In the case we lock all nodes for opportunistic allocation, we have no # choice than to lock all groups, because they're allocated before nodes. # This is sad, but true. At least we release all those we don't need in # CheckPrereq later. self.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET else: self.needed_locks[locking.LEVEL_NODEGROUP] = \ list(self.cfg.GetNodeGroupsFromNodes( self.needed_locks[locking.LEVEL_NODE])) self.share_locks[locking.LEVEL_NODEGROUP] = 1 def DeclareLocks(self, level): if level == locking.LEVEL_NODE_RES and \ self.opportunistic_locks[locking.LEVEL_NODE]: # Even when using opportunistic locking, we require the same set of # NODE_RES locks as we got NODE locks self.needed_locks[locking.LEVEL_NODE_RES] = \ self.owned_locks(locking.LEVEL_NODE) def _RunAllocator(self): """Run the allocator based on input opcode. """ if self.op.opportunistic_locking: # Only consider nodes for which a lock is held node_name_whitelist = self.cfg.GetNodeNames( self.owned_locks(locking.LEVEL_NODE)) else: node_name_whitelist = None #TODO Export network to iallocator so that it chooses a pnode # in a nodegroup that has the desired network connected to req = _CreateInstanceAllocRequest(self.op, self.disks, self.nics, self.be_full, node_name_whitelist) ial = iallocator.IAllocator(self.cfg, self.rpc, req) ial.Run(self.op.iallocator) if not ial.success: # When opportunistic locks are used only a temporary failure is generated if self.op.opportunistic_locking: ecode = errors.ECODE_TEMP_NORES else: ecode = errors.ECODE_NORES raise errors.OpPrereqError("Can't compute nodes using" " iallocator '%s': %s" % (self.op.iallocator, ial.info), ecode) (self.op.pnode_uuid, self.op.pnode) = \ ExpandNodeUuidAndName(self.cfg, None, ial.result[0]) self.LogInfo("Selected nodes for instance %s via iallocator %s: %s", self.op.instance_name, self.op.iallocator, utils.CommaJoin(ial.result)) assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator" if req.RequiredNodes() == 2: (self.op.snode_uuid, self.op.snode) = \ ExpandNodeUuidAndName(self.cfg, None, ial.result[1]) def BuildHooksEnv(self): """Build hooks env. This runs on master, primary and secondary nodes of the instance. """ env = { "ADD_MODE": self.op.mode, } if self.op.mode == constants.INSTANCE_IMPORT: env["SRC_NODE"] = self.op.src_node env["SRC_PATH"] = self.op.src_path env["SRC_IMAGES"] = self.src_images env.update(BuildInstanceHookEnv( name=self.op.instance_name, primary_node_name=self.op.pnode, secondary_node_names=self.cfg.GetNodeNames(self.secondaries), status=self.op.start, os_type=self.op.os_type, minmem=self.be_full[constants.BE_MINMEM], maxmem=self.be_full[constants.BE_MAXMEM], vcpus=self.be_full[constants.BE_VCPUS], nics=NICListToTuple(self, self.nics), disk_template=self.op.disk_template, disks=[(d[constants.IDISK_NAME], d.get("uuid", ""), d[constants.IDISK_SIZE], d[constants.IDISK_MODE]) for d in self.disks], bep=self.be_full, hvp=self.hv_full, hypervisor_name=self.op.hypervisor, tags=self.op.tags, )) return env def BuildHooksNodes(self): """Build hooks nodes. """ nl = [self.cfg.GetMasterNode(), self.op.pnode_uuid] + self.secondaries return nl, nl def _ReadExportInfo(self): """Reads the export information from disk. It will override the opcode source node and path with the actual information, if these two were not specified before. @return: the export information """ assert self.op.mode == constants.INSTANCE_IMPORT if self.op.src_node_uuid is None: locked_nodes = self.owned_locks(locking.LEVEL_NODE) exp_list = self.rpc.call_export_list(locked_nodes) found = False for node_uuid in exp_list: if exp_list[node_uuid].fail_msg: continue if self.op.src_path in exp_list[node_uuid].payload: found = True self.op.src_node = self.cfg.GetNodeInfo(node_uuid).name self.op.src_node_uuid = node_uuid self.op.src_path = utils.PathJoin(pathutils.EXPORT_DIR, self.op.src_path) break if not found: raise errors.OpPrereqError("No export found for relative path %s" % self.op.src_path, errors.ECODE_INVAL) CheckNodeOnline(self, self.op.src_node_uuid) result = self.rpc.call_export_info(self.op.src_node_uuid, self.op.src_path) result.Raise("No export or invalid export found in dir %s" % self.op.src_path) export_info = objects.SerializableConfigParser.Loads(str(result.payload)) if not export_info.has_section(constants.INISECT_EXP): raise errors.ProgrammerError("Corrupted export config", errors.ECODE_ENVIRON) ei_version = export_info.get(constants.INISECT_EXP, "version") if int(ei_version) != constants.EXPORT_VERSION: raise errors.OpPrereqError("Wrong export version %s (wanted %d)" % (ei_version, constants.EXPORT_VERSION), errors.ECODE_ENVIRON) return export_info def _ReadExportParams(self, einfo): """Use export parameters as defaults. In case the opcode doesn't specify (as in override) some instance parameters, then try to use them from the export information, if that declares them. """ self.op.os_type = einfo.get(constants.INISECT_EXP, "os") if not self.op.disks: disks = [] # TODO: import the disk iv_name too for idx in range(constants.MAX_DISKS): if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx): disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx) disks.append({constants.IDISK_SIZE: disk_sz}) self.op.disks = disks if not disks and self.op.disk_template != constants.DT_DISKLESS: raise errors.OpPrereqError("No disk info specified and the export" " is missing the disk information", errors.ECODE_INVAL) if not self.op.nics: nics = [] for idx in range(constants.MAX_NICS): if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx): ndict = {} for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]: nic_param_name = "nic%d_%s" % (idx, name) if einfo.has_option(constants.INISECT_INS, nic_param_name): v = einfo.get(constants.INISECT_INS, nic_param_name) ndict[name] = v nics.append(ndict) else: break self.op.nics = nics if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"): self.op.tags = einfo.get(constants.INISECT_INS, "tags").split() if (self.op.hypervisor is None and einfo.has_option(constants.INISECT_INS, "hypervisor")): self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor") if einfo.has_section(constants.INISECT_HYP): # use the export parameters but do not override the ones # specified by the user for name, value in einfo.items(constants.INISECT_HYP): if name not in self.op.hvparams: self.op.hvparams[name] = value if einfo.has_section(constants.INISECT_BEP): # use the parameters, without overriding for name, value in einfo.items(constants.INISECT_BEP): if name not in self.op.beparams: self.op.beparams[name] = value # Compatibility for the old "memory" be param if name == constants.BE_MEMORY: if constants.BE_MAXMEM not in self.op.beparams: self.op.beparams[constants.BE_MAXMEM] = value if constants.BE_MINMEM not in self.op.beparams: self.op.beparams[constants.BE_MINMEM] = value else: # try to read the parameters old style, from the main section for name in constants.BES_PARAMETERS: if (name not in self.op.beparams and einfo.has_option(constants.INISECT_INS, name)): self.op.beparams[name] = einfo.get(constants.INISECT_INS, name) if einfo.has_section(constants.INISECT_OSP): # use the parameters, without overriding for name, value in einfo.items(constants.INISECT_OSP): if name not in self.op.osparams: self.op.osparams[name] = value def _RevertToDefaults(self, cluster): """Revert the instance parameters to the default values. """ # hvparams hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {}) for name in self.op.hvparams.keys(): if name in hv_defs and hv_defs[name] == self.op.hvparams[name]: del self.op.hvparams[name] # beparams be_defs = cluster.SimpleFillBE({}) for name in self.op.beparams.keys(): if name in be_defs and be_defs[name] == self.op.beparams[name]: del self.op.beparams[name] # nic params nic_defs = cluster.SimpleFillNIC({}) for nic in self.op.nics: for name in constants.NICS_PARAMETERS: if name in nic and name in nic_defs and nic[name] == nic_defs[name]: del nic[name] # osparams os_defs = cluster.SimpleFillOS(self.op.os_type, {}) for name in self.op.osparams.keys(): if name in os_defs and os_defs[name] == self.op.osparams[name]: del self.op.osparams[name] def _CalculateFileStorageDir(self): """Calculate final instance file storage dir. """ # file storage dir calculation/check self.instance_file_storage_dir = None if self.op.disk_template in constants.DTS_FILEBASED: # build the full file storage dir path joinargs = [] if self.op.disk_template == constants.DT_SHARED_FILE: get_fsd_fn = self.cfg.GetSharedFileStorageDir else: get_fsd_fn = self.cfg.GetFileStorageDir cfg_storagedir = get_fsd_fn() if not cfg_storagedir: raise errors.OpPrereqError("Cluster file storage dir not defined", errors.ECODE_STATE) joinargs.append(cfg_storagedir) if self.op.file_storage_dir is not None: joinargs.append(self.op.file_storage_dir) joinargs.append(self.op.instance_name) # pylint: disable=W0142 self.instance_file_storage_dir = utils.PathJoin(*joinargs) def CheckPrereq(self): # pylint: disable=R0914 """Check prerequisites. """ # Check that the optimistically acquired groups are correct wrt the # acquired nodes owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE)) cur_groups = list(self.cfg.GetNodeGroupsFromNodes(owned_nodes)) if not owned_groups.issuperset(cur_groups): raise errors.OpPrereqError("New instance %s's node groups changed since" " locks were acquired, current groups are" " are '%s', owning groups '%s'; retry the" " operation" % (self.op.instance_name, utils.CommaJoin(cur_groups), utils.CommaJoin(owned_groups)), errors.ECODE_STATE) self._CalculateFileStorageDir() if self.op.mode == constants.INSTANCE_IMPORT: export_info = self._ReadExportInfo() self._ReadExportParams(export_info) self._old_instance_name = export_info.get(constants.INISECT_INS, "name") else: self._old_instance_name = None if (not self.cfg.GetVGName() and self.op.disk_template not in constants.DTS_NOT_LVM): raise errors.OpPrereqError("Cluster does not support lvm-based" " instances", errors.ECODE_STATE) if (self.op.hypervisor is None or self.op.hypervisor == constants.VALUE_AUTO): self.op.hypervisor = self.cfg.GetHypervisorType() cluster = self.cfg.GetClusterInfo() enabled_hvs = cluster.enabled_hypervisors if self.op.hypervisor not in enabled_hvs: raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the" " cluster (%s)" % (self.op.hypervisor, ",".join(enabled_hvs)), errors.ECODE_STATE) # Check tag validity for tag in self.op.tags: objects.TaggableObject.ValidateTag(tag) # check hypervisor parameter syntax (locally) utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES) filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, self.op.hvparams) hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor) hv_type.CheckParameterSyntax(filled_hvp) self.hv_full = filled_hvp # check that we don't specify global parameters on an instance CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor", "instance", "cluster") # fill and remember the beparams dict self.be_full = _ComputeFullBeParams(self.op, cluster) # build os parameters self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams) # now that hvp/bep are in final format, let's reset to defaults, # if told to do so if self.op.identify_defaults: self._RevertToDefaults(cluster) # NIC buildup self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg, self.proc.GetECId()) # disk checks/pre-build default_vg = self.cfg.GetVGName() self.disks = ComputeDisks(self.op, default_vg) if self.op.mode == constants.INSTANCE_IMPORT: disk_images = [] for idx in range(len(self.disks)): option = "disk%d_dump" % idx if export_info.has_option(constants.INISECT_INS, option): # FIXME: are the old os-es, disk sizes, etc. useful? export_name = export_info.get(constants.INISECT_INS, option) image = utils.PathJoin(self.op.src_path, export_name) disk_images.append(image) else: disk_images.append(False) self.src_images = disk_images if self.op.instance_name == self._old_instance_name: for idx, nic in enumerate(self.nics): if nic.mac == constants.VALUE_AUTO: nic_mac_ini = "nic%d_mac" % idx nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini) # ENDIF: self.op.mode == constants.INSTANCE_IMPORT # ip ping checks (we use the same ip that was resolved in ExpandNames) if self.op.ip_check: if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT): raise errors.OpPrereqError("IP %s of instance %s already in use" % (self.check_ip, self.op.instance_name), errors.ECODE_NOTUNIQUE) #### mac address generation # By generating here the mac address both the allocator and the hooks get # the real final mac address rather than the 'auto' or 'generate' value. # There is a race condition between the generation and the instance object # creation, which means that we know the mac is valid now, but we're not # sure it will be when we actually add the instance. If things go bad # adding the instance will abort because of a duplicate mac, and the # creation job will fail. for nic in self.nics: if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE): nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId()) #### allocator run if self.op.iallocator is not None: self._RunAllocator() # Release all unneeded node locks keep_locks = filter(None, [self.op.pnode_uuid, self.op.snode_uuid, self.op.src_node_uuid]) ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks) ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks) ReleaseLocks(self, locking.LEVEL_NODE_ALLOC) # Release all unneeded group locks ReleaseLocks(self, locking.LEVEL_NODEGROUP, keep=self.cfg.GetNodeGroupsFromNodes(keep_locks)) assert (self.owned_locks(locking.LEVEL_NODE) == self.owned_locks(locking.LEVEL_NODE_RES)), \ "Node locks differ from node resource locks" #### node related checks # check primary node self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode_uuid) assert self.pnode is not None, \ "Cannot retrieve locked node %s" % self.op.pnode_uuid if pnode.offline: raise errors.OpPrereqError("Cannot use offline primary node '%s'" % pnode.name, errors.ECODE_STATE) if pnode.drained: raise errors.OpPrereqError("Cannot use drained primary node '%s'" % pnode.name, errors.ECODE_STATE) if not pnode.vm_capable: raise errors.OpPrereqError("Cannot use non-vm_capable primary node" " '%s'" % pnode.name, errors.ECODE_STATE) self.secondaries = [] # Fill in any IPs from IP pools. This must happen here, because we need to # know the nic's primary node, as specified by the iallocator for idx, nic in enumerate(self.nics): net_uuid = nic.network if net_uuid is not None: nobj = self.cfg.GetNetwork(net_uuid) netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.uuid) if netparams is None: raise errors.OpPrereqError("No netparams found for network" " %s. Propably not connected to" " node's %s nodegroup" % (nobj.name, self.pnode.name), errors.ECODE_INVAL) self.LogInfo("NIC/%d inherits netparams %s" % (idx, netparams.values())) nic.nicparams = dict(netparams) if nic.ip is not None: if nic.ip.lower() == constants.NIC_IP_POOL: try: nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId()) except errors.ReservationError: raise errors.OpPrereqError("Unable to get a free IP for NIC %d" " from the address pool" % idx, errors.ECODE_STATE) self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name) else: try: self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId()) except errors.ReservationError: raise errors.OpPrereqError("IP address %s already in use" " or does not belong to network %s" % (nic.ip, nobj.name), errors.ECODE_NOTUNIQUE) # net is None, ip None or given elif self.op.conflicts_check: _CheckForConflictingIp(self, nic.ip, self.pnode.uuid) # mirror node verification if self.op.disk_template in constants.DTS_INT_MIRROR: if self.op.snode_uuid == pnode.uuid: raise errors.OpPrereqError("The secondary node cannot be the" " primary node", errors.ECODE_INVAL) CheckNodeOnline(self, self.op.snode_uuid) CheckNodeNotDrained(self, self.op.snode_uuid) CheckNodeVmCapable(self, self.op.snode_uuid) self.secondaries.append(self.op.snode_uuid) snode = self.cfg.GetNodeInfo(self.op.snode_uuid) if pnode.group != snode.group: self.LogWarning("The primary and secondary nodes are in two" " different node groups; the disk parameters" " from the first disk's node group will be" " used") nodes = [pnode] if self.op.disk_template in constants.DTS_INT_MIRROR: nodes.append(snode) has_es = lambda n: IsExclusiveStorageEnabledNode(self.cfg, n) excl_stor = compat.any(map(has_es, nodes)) if excl_stor and not self.op.disk_template in constants.DTS_EXCL_STORAGE: raise errors.OpPrereqError("Disk template %s not supported with" " exclusive storage" % self.op.disk_template, errors.ECODE_STATE) for disk in self.disks: CheckSpindlesExclusiveStorage(disk, excl_stor, True) node_uuids = [pnode.uuid] + self.secondaries if not self.adopt_disks: if self.op.disk_template == constants.DT_RBD: # _CheckRADOSFreeSpace() is just a placeholder. # Any function that checks prerequisites can be placed here. # Check if there is enough space on the RADOS cluster. CheckRADOSFreeSpace() elif self.op.disk_template == constants.DT_EXT: # FIXME: Function that checks prereqs if needed pass elif self.op.disk_template in utils.GetLvmDiskTemplates(): # Check lv size requirements, if not adopting req_sizes = ComputeDiskSizePerVG(self.op.disk_template, self.disks) CheckNodesFreeDiskPerVG(self, node_uuids, req_sizes) else: # FIXME: add checks for other, non-adopting, non-lvm disk templates pass elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG], disk[constants.IDISK_ADOPT]) for disk in self.disks]) if len(all_lvs) != len(self.disks): raise errors.OpPrereqError("Duplicate volume names given for adoption", errors.ECODE_INVAL) for lv_name in all_lvs: try: # FIXME: lv_name here is "vg/lv" need to ensure that other calls # to ReserveLV uses the same syntax self.cfg.ReserveLV(lv_name, self.proc.GetECId()) except errors.ReservationError: raise errors.OpPrereqError("LV named %s used by another instance" % lv_name, errors.ECODE_NOTUNIQUE) vg_names = self.rpc.call_vg_list([pnode.uuid])[pnode.uuid] vg_names.Raise("Cannot get VG information from node %s" % pnode.name) node_lvs = self.rpc.call_lv_list([pnode.uuid], vg_names.payload.keys())[pnode.uuid] node_lvs.Raise("Cannot get LV information from node %s" % pnode.name) node_lvs = node_lvs.payload delta = all_lvs.difference(node_lvs.keys()) if delta: raise errors.OpPrereqError("Missing logical volume(s): %s" % utils.CommaJoin(delta), errors.ECODE_INVAL) online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]] if online_lvs: raise errors.OpPrereqError("Online logical volumes found, cannot" " adopt: %s" % utils.CommaJoin(online_lvs), errors.ECODE_STATE) # update the size of disk based on what is found for dsk in self.disks: dsk[constants.IDISK_SIZE] = \ int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG], dsk[constants.IDISK_ADOPT])][0])) elif self.op.disk_template == constants.DT_BLOCK: # Normalize and de-duplicate device paths all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT]) for disk in self.disks]) if len(all_disks) != len(self.disks): raise errors.OpPrereqError("Duplicate disk names given for adoption", errors.ECODE_INVAL) baddisks = [d for d in all_disks if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)] if baddisks: raise errors.OpPrereqError("Device node(s) %s lie outside %s and" " cannot be adopted" % (utils.CommaJoin(baddisks), constants.ADOPTABLE_BLOCKDEV_ROOT), errors.ECODE_INVAL) node_disks = self.rpc.call_bdev_sizes([pnode.uuid], list(all_disks))[pnode.uuid] node_disks.Raise("Cannot get block device information from node %s" % pnode.name) node_disks = node_disks.payload delta = all_disks.difference(node_disks.keys()) if delta: raise errors.OpPrereqError("Missing block device(s): %s" % utils.CommaJoin(delta), errors.ECODE_INVAL) for dsk in self.disks: dsk[constants.IDISK_SIZE] = \ int(float(node_disks[dsk[constants.IDISK_ADOPT]])) # Verify instance specs spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None) ispec = { constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None), constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None), constants.ISPEC_DISK_COUNT: len(self.disks), constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE] for disk in self.disks], constants.ISPEC_NIC_COUNT: len(self.nics), constants.ISPEC_SPINDLE_USE: spindle_use, } group_info = self.cfg.GetNodeGroup(pnode.group) ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info) res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec, self.op.disk_template) if not self.op.ignore_ipolicy and res: msg = ("Instance allocation to group %s (%s) violates policy: %s" % (pnode.group, group_info.name, utils.CommaJoin(res))) raise errors.OpPrereqError(msg, errors.ECODE_INVAL) CheckHVParams(self, node_uuids, self.op.hypervisor, self.op.hvparams) CheckNodeHasOS(self, pnode.uuid, self.op.os_type, self.op.force_variant) # check OS parameters (remotely) CheckOSParams(self, True, node_uuids, self.op.os_type, self.os_full) CheckNicsBridgesExist(self, self.nics, self.pnode.uuid) #TODO: _CheckExtParams (remotely) # Check parameters for extstorage # memory check on primary node #TODO(dynmem): use MINMEM for checking if self.op.start: hvfull = objects.FillDict(cluster.hvparams.get(self.op.hypervisor, {}), self.op.hvparams) CheckNodeFreeMemory(self, self.pnode.uuid, "creating instance %s" % self.op.instance_name, self.be_full[constants.BE_MAXMEM], self.op.hypervisor, hvfull) self.dry_run_result = list(node_uuids) def Exec(self, feedback_fn): """Create and add the instance to the cluster. """ assert not (self.owned_locks(locking.LEVEL_NODE_RES) - self.owned_locks(locking.LEVEL_NODE)), \ "Node locks differ from node resource locks" assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC) ht_kind = self.op.hypervisor if ht_kind in constants.HTS_REQ_PORT: network_port = self.cfg.AllocatePort() else: network_port = None instance_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId()) # This is ugly but we got a chicken-egg problem here # We can only take the group disk parameters, as the instance # has no disks yet (we are generating them right here). nodegroup = self.cfg.GetNodeGroup(self.pnode.group) disks = GenerateDiskTemplate(self, self.op.disk_template, instance_uuid, self.pnode.uuid, self.secondaries, self.disks, self.instance_file_storage_dir, self.op.file_driver, 0, feedback_fn, self.cfg.GetGroupDiskParams(nodegroup)) iobj = objects.Instance(name=self.op.instance_name, uuid=instance_uuid, os=self.op.os_type, primary_node=self.pnode.uuid, nics=self.nics, disks=disks, disk_template=self.op.disk_template, disks_active=False, admin_state=constants.ADMINST_DOWN, network_port=network_port, beparams=self.op.beparams, hvparams=self.op.hvparams, hypervisor=self.op.hypervisor, osparams=self.op.osparams, ) if self.op.tags: for tag in self.op.tags: iobj.AddTag(tag) if self.adopt_disks: if self.op.disk_template == constants.DT_PLAIN: # rename LVs to the newly-generated names; we need to construct # 'fake' LV disks with the old data, plus the new unique_id tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks] rename_to = [] for t_dsk, a_dsk in zip(tmp_disks, self.disks): rename_to.append(t_dsk.logical_id) t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT]) self.cfg.SetDiskID(t_dsk, self.pnode.uuid) result = self.rpc.call_blockdev_rename(self.pnode.uuid, zip(tmp_disks, rename_to)) result.Raise("Failed to rename adoped LVs") else: feedback_fn("* creating instance disks...") try: CreateDisks(self, iobj) except errors.OpExecError: self.LogWarning("Device creation failed") self.cfg.ReleaseDRBDMinors(self.op.instance_name) raise feedback_fn("adding instance %s to cluster config" % self.op.instance_name) self.cfg.AddInstance(iobj, self.proc.GetECId()) # Declare that we don't want to remove the instance lock anymore, as we've # added the instance to the config del self.remove_locks[locking.LEVEL_INSTANCE] if self.op.mode == constants.INSTANCE_IMPORT: # Release unused nodes ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node_uuid]) else: # Release all nodes ReleaseLocks(self, locking.LEVEL_NODE) disk_abort = False if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks: feedback_fn("* wiping instance disks...") try: WipeDisks(self, iobj) except errors.OpExecError, err: logging.exception("Wiping disks failed") self.LogWarning("Wiping instance disks failed (%s)", err) disk_abort = True if disk_abort: # Something is already wrong with the disks, don't do anything else pass elif self.op.wait_for_sync: disk_abort = not WaitForSync(self, iobj) elif iobj.disk_template in constants.DTS_INT_MIRROR: # make sure the disks are not degraded (still sync-ing is ok) feedback_fn("* checking mirrors status") disk_abort = not WaitForSync(self, iobj, oneshot=True) else: disk_abort = False if disk_abort: RemoveDisks(self, iobj) self.cfg.RemoveInstance(iobj.uuid) # Make sure the instance lock gets removed self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name raise errors.OpExecError("There are some degraded disks for" " this instance") # instance disks are now active iobj.disks_active = True # Release all node resource locks ReleaseLocks(self, locking.LEVEL_NODE_RES) if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks: # we need to set the disks ID to the primary node, since the # preceding code might or might have not done it, depending on # disk template and other options for disk in iobj.disks: self.cfg.SetDiskID(disk, self.pnode.uuid) if self.op.mode == constants.INSTANCE_CREATE: if not self.op.no_install: pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and not self.op.wait_for_sync) if pause_sync: feedback_fn("* pausing disk sync to install instance OS") result = self.rpc.call_blockdev_pause_resume_sync(self.pnode.uuid, (iobj.disks, iobj), True) for idx, success in enumerate(result.payload): if not success: logging.warn("pause-sync of instance %s for disk %d failed", self.op.instance_name, idx) feedback_fn("* running the instance OS create scripts...") # FIXME: pass debug option from opcode to backend os_add_result = \ self.rpc.call_instance_os_add(self.pnode.uuid, (iobj, None), False, self.op.debug_level) if pause_sync: feedback_fn("* resuming disk sync") result = self.rpc.call_blockdev_pause_resume_sync(self.pnode.uuid, (iobj.disks, iobj), False) for idx, success in enumerate(result.payload): if not success: logging.warn("resume-sync of instance %s for disk %d failed", self.op.instance_name, idx) os_add_result.Raise("Could not add os for instance %s" " on node %s" % (self.op.instance_name, self.pnode.name)) else: if self.op.mode == constants.INSTANCE_IMPORT: feedback_fn("* running the instance OS import scripts...") transfers = [] for idx, image in enumerate(self.src_images): if not image: continue # FIXME: pass debug option from opcode to backend dt = masterd.instance.DiskTransfer("disk/%s" % idx, constants.IEIO_FILE, (image, ), constants.IEIO_SCRIPT, (iobj.disks[idx], idx), None) transfers.append(dt) import_result = \ masterd.instance.TransferInstanceData(self, feedback_fn, self.op.src_node_uuid, self.pnode.uuid, self.pnode.secondary_ip, iobj, transfers) if not compat.all(import_result): self.LogWarning("Some disks for instance %s on node %s were not" " imported successfully" % (self.op.instance_name, self.pnode.name)) rename_from = self._old_instance_name elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT: feedback_fn("* preparing remote import...") # The source cluster will stop the instance before attempting to make # a connection. In some cases stopping an instance can take a long # time, hence the shutdown timeout is added to the connection # timeout. connect_timeout = (constants.RIE_CONNECT_TIMEOUT + self.op.source_shutdown_timeout) timeouts = masterd.instance.ImportExportTimeouts(connect_timeout) assert iobj.primary_node == self.pnode.uuid disk_results = \ masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode, self.source_x509_ca, self._cds, timeouts) if not compat.all(disk_results): # TODO: Should the instance still be started, even if some disks # failed to import (valid for local imports, too)? self.LogWarning("Some disks for instance %s on node %s were not" " imported successfully" % (self.op.instance_name, self.pnode.name)) rename_from = self.source_instance_name else: # also checked in the prereq part raise errors.ProgrammerError("Unknown OS initialization mode '%s'" % self.op.mode) # Run rename script on newly imported instance assert iobj.name == self.op.instance_name feedback_fn("Running rename script for %s" % self.op.instance_name) result = self.rpc.call_instance_run_rename(self.pnode.uuid, iobj, rename_from, self.op.debug_level) result.Warn("Failed to run rename script for %s on node %s" % (self.op.instance_name, self.pnode.name), self.LogWarning) assert not self.owned_locks(locking.LEVEL_NODE_RES) if self.op.start: iobj.admin_state = constants.ADMINST_UP self.cfg.Update(iobj, feedback_fn) logging.info("Starting instance %s on node %s", self.op.instance_name, self.pnode.name) feedback_fn("* starting instance...") result = self.rpc.call_instance_start(self.pnode.uuid, (iobj, None, None), False, self.op.reason) result.Raise("Could not start instance") return list(iobj.all_nodes) class LUInstanceRename(LogicalUnit): """Rename an instance. """ HPATH = "instance-rename" HTYPE = constants.HTYPE_INSTANCE def CheckArguments(self): """Check arguments. """ if self.op.ip_check and not self.op.name_check: # TODO: make the ip check more flexible and not depend on the name check raise errors.OpPrereqError("IP address check requires a name check", errors.ECODE_INVAL) def BuildHooksEnv(self): """Build hooks env. This runs on master, primary and secondary nodes of the instance. """ env = BuildInstanceHookEnvByObject(self, self.instance) env["INSTANCE_NEW_NAME"] = self.op.new_name return env def BuildHooksNodes(self): """Build hooks nodes. """ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) return (nl, nl) def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster and is not running. """ (self.op.instance_uuid, self.op.instance_name) = \ ExpandInstanceUuidAndName(self.cfg, self.op.instance_uuid, self.op.instance_name) instance = self.cfg.GetInstanceInfo(self.op.instance_uuid) assert instance is not None # It should actually not happen that an instance is running with a disabled # disk template, but in case it does, the renaming of file-based instances # will fail horribly. Thus, we test it before. if (instance.disk_template in constants.DTS_FILEBASED and self.op.new_name != instance.name): CheckDiskTemplateEnabled(self.cfg.GetClusterInfo(), instance.disk_template) CheckNodeOnline(self, instance.primary_node) CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING, msg="cannot rename") self.instance = instance new_name = self.op.new_name if self.op.name_check: hostname = _CheckHostnameSane(self, new_name) new_name = self.op.new_name = hostname.name if (self.op.ip_check and netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)): raise errors.OpPrereqError("IP %s of instance %s already in use" % (hostname.ip, new_name), errors.ECODE_NOTUNIQUE) instance_names = [inst.name for inst in self.cfg.GetAllInstancesInfo().values()] if new_name in instance_names and new_name != instance.name: raise errors.OpPrereqError("Instance '%s' is already in the cluster" % new_name, errors.ECODE_EXISTS) def Exec(self, feedback_fn): """Rename the instance. """ old_name = self.instance.name rename_file_storage = False if (self.instance.disk_template in constants.DTS_FILEBASED and self.op.new_name != self.instance.name): old_file_storage_dir = os.path.dirname( self.instance.disks[0].logical_id[1]) rename_file_storage = True self.cfg.RenameInstance(self.instance.uuid, self.op.new_name) # Change the instance lock. This is definitely safe while we hold the BGL. # Otherwise the new lock would have to be added in acquired mode. assert self.REQ_BGL assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER) self.glm.remove(locking.LEVEL_INSTANCE, old_name) self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name) # re-read the instance from the configuration after rename renamed_inst = self.cfg.GetInstanceInfo(self.instance.uuid) if rename_file_storage: new_file_storage_dir = os.path.dirname( renamed_inst.disks[0].logical_id[1]) result = self.rpc.call_file_storage_dir_rename(renamed_inst.primary_node, old_file_storage_dir, new_file_storage_dir) result.Raise("Could not rename on node %s directory '%s' to '%s'" " (but the instance has been renamed in Ganeti)" % (self.cfg.GetNodeName(renamed_inst.primary_node), old_file_storage_dir, new_file_storage_dir)) StartInstanceDisks(self, renamed_inst, None) # update info on disks info = GetInstanceInfoText(renamed_inst) for (idx, disk) in enumerate(renamed_inst.disks): for node_uuid in renamed_inst.all_nodes: self.cfg.SetDiskID(disk, node_uuid) result = self.rpc.call_blockdev_setinfo(node_uuid, disk, info) result.Warn("Error setting info on node %s for disk %s" % (self.cfg.GetNodeName(node_uuid), idx), self.LogWarning) try: result = self.rpc.call_instance_run_rename(renamed_inst.primary_node, renamed_inst, old_name, self.op.debug_level) result.Warn("Could not run OS rename script for instance %s on node %s" " (but the instance has been renamed in Ganeti)" % (renamed_inst.name, self.cfg.GetNodeName(renamed_inst.primary_node)), self.LogWarning) finally: ShutdownInstanceDisks(self, renamed_inst) return renamed_inst.name class LUInstanceRemove(LogicalUnit): """Remove an instance. """ HPATH = "instance-remove" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False def ExpandNames(self): self._ExpandAndLockInstance() self.needed_locks[locking.LEVEL_NODE] = [] self.needed_locks[locking.LEVEL_NODE_RES] = [] self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE def DeclareLocks(self, level): if level == locking.LEVEL_NODE: self._LockInstancesNodes() elif level == locking.LEVEL_NODE_RES: # Copy node locks self.needed_locks[locking.LEVEL_NODE_RES] = \ CopyLockList(self.needed_locks[locking.LEVEL_NODE]) def BuildHooksEnv(self): """Build hooks env. This runs on master, primary and secondary nodes of the instance. """ env = BuildInstanceHookEnvByObject(self, self.instance) env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout return env def BuildHooksNodes(self): """Build hooks nodes. """ nl = [self.cfg.GetMasterNode()] nl_post = list(self.instance.all_nodes) + nl return (nl, nl_post) def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster. """ self.instance = self.cfg.GetInstanceInfo(self.op.instance_uuid) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name def Exec(self, feedback_fn): """Remove the instance. """ logging.info("Shutting down instance %s on node %s", self.instance.name, self.cfg.GetNodeName(self.instance.primary_node)) result = self.rpc.call_instance_shutdown(self.instance.primary_node, self.instance, self.op.shutdown_timeout, self.op.reason) if self.op.ignore_failures: result.Warn("Warning: can't shutdown instance", feedback_fn) else: result.Raise("Could not shutdown instance %s on node %s" % (self.instance.name, self.cfg.GetNodeName(self.instance.primary_node))) assert (self.owned_locks(locking.LEVEL_NODE) == self.owned_locks(locking.LEVEL_NODE_RES)) assert not (set(self.instance.all_nodes) - self.owned_locks(locking.LEVEL_NODE)), \ "Not owning correct locks" RemoveInstance(self, feedback_fn, self.instance, self.op.ignore_failures) class LUInstanceMove(LogicalUnit): """Move an instance by data-copying. """ HPATH = "instance-move" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False def ExpandNames(self): self._ExpandAndLockInstance() (self.op.target_node_uuid, self.op.target_node) = \ ExpandNodeUuidAndName(self.cfg, self.op.target_node_uuid, self.op.target_node) self.needed_locks[locking.LEVEL_NODE] = [self.op.target_node_uuid] self.needed_locks[locking.LEVEL_NODE_RES] = [] self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND def DeclareLocks(self, level): if level == locking.LEVEL_NODE: self._LockInstancesNodes(primary_only=True) elif level == locking.LEVEL_NODE_RES: # Copy node locks self.needed_locks[locking.LEVEL_NODE_RES] = \ CopyLockList(self.needed_locks[locking.LEVEL_NODE]) def BuildHooksEnv(self): """Build hooks env. This runs on master, primary and secondary nodes of the instance. """ env = { "TARGET_NODE": self.op.target_node, "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, } env.update(BuildInstanceHookEnvByObject(self, self.instance)) return env def BuildHooksNodes(self): """Build hooks nodes. """ nl = [ self.cfg.GetMasterNode(), self.instance.primary_node, self.op.target_node_uuid, ] return (nl, nl) def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster. """ self.instance = self.cfg.GetInstanceInfo(self.op.instance_uuid) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name if self.instance.disk_template not in constants.DTS_COPYABLE: raise errors.OpPrereqError("Disk template %s not suitable for copying" % self.instance.disk_template, errors.ECODE_STATE) target_node = self.cfg.GetNodeInfo(self.op.target_node_uuid) assert target_node is not None, \ "Cannot retrieve locked node %s" % self.op.target_node self.target_node_uuid = target_node.uuid if target_node.uuid == self.instance.primary_node: raise errors.OpPrereqError("Instance %s is already on the node %s" % (self.instance.name, target_node.name), errors.ECODE_STATE) bep = self.cfg.GetClusterInfo().FillBE(self.instance) for idx, dsk in enumerate(self.instance.disks): if dsk.dev_type not in (constants.DT_PLAIN, constants.DT_FILE, constants.DT_SHARED_FILE): raise errors.OpPrereqError("Instance disk %d has a complex layout," " cannot copy" % idx, errors.ECODE_STATE) CheckNodeOnline(self, target_node.uuid) CheckNodeNotDrained(self, target_node.uuid) CheckNodeVmCapable(self, target_node.uuid) cluster = self.cfg.GetClusterInfo() group_info = self.cfg.GetNodeGroup(target_node.group) ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info) CheckTargetNodeIPolicy(self, ipolicy, self.instance, target_node, self.cfg, ignore=self.op.ignore_ipolicy) if self.instance.admin_state == constants.ADMINST_UP: # check memory requirements on the secondary node CheckNodeFreeMemory( self, target_node.uuid, "failing over instance %s" % self.instance.name, bep[constants.BE_MAXMEM], self.instance.hypervisor, self.cfg.GetClusterInfo().hvparams[self.instance.hypervisor]) else: self.LogInfo("Not checking memory on the secondary node as" " instance will not be started") # check bridge existance CheckInstanceBridgesExist(self, self.instance, node_uuid=target_node.uuid) def Exec(self, feedback_fn): """Move an instance. The move is done by shutting it down on its present node, copying the data over (slow) and starting it on the new node. """ source_node = self.cfg.GetNodeInfo(self.instance.primary_node) target_node = self.cfg.GetNodeInfo(self.target_node_uuid) self.LogInfo("Shutting down instance %s on source node %s", self.instance.name, source_node.name) assert (self.owned_locks(locking.LEVEL_NODE) == self.owned_locks(locking.LEVEL_NODE_RES)) result = self.rpc.call_instance_shutdown(source_node.uuid, self.instance, self.op.shutdown_timeout, self.op.reason) if self.op.ignore_consistency: result.Warn("Could not shutdown instance %s on node %s. Proceeding" " anyway. Please make sure node %s is down. Error details" % (self.instance.name, source_node.name, source_node.name), self.LogWarning) else: result.Raise("Could not shutdown instance %s on node %s" % (self.instance.name, source_node.name)) # create the target disks try: CreateDisks(self, self.instance, target_node_uuid=target_node.uuid) except errors.OpExecError: self.LogWarning("Device creation failed") self.cfg.ReleaseDRBDMinors(self.instance.uuid) raise cluster_name = self.cfg.GetClusterInfo().cluster_name errs = [] # activate, get path, copy the data over for idx, disk in enumerate(self.instance.disks): self.LogInfo("Copying data for disk %d", idx) result = self.rpc.call_blockdev_assemble( target_node.uuid, (disk, self.instance), self.instance.name, True, idx) if result.fail_msg: self.LogWarning("Can't assemble newly created disk %d: %s", idx, result.fail_msg) errs.append(result.fail_msg) break dev_path = result.payload result = self.rpc.call_blockdev_export(source_node.uuid, (disk, self.instance), target_node.secondary_ip, dev_path, cluster_name) if result.fail_msg: self.LogWarning("Can't copy data over for disk %d: %s", idx, result.fail_msg) errs.append(result.fail_msg) break if errs: self.LogWarning("Some disks failed to copy, aborting") try: RemoveDisks(self, self.instance, target_node_uuid=target_node.uuid) finally: self.cfg.ReleaseDRBDMinors(self.instance.uuid) raise errors.OpExecError("Errors during disk copy: %s" % (",".join(errs),)) self.instance.primary_node = target_node.uuid self.cfg.Update(self.instance, feedback_fn) self.LogInfo("Removing the disks on the original node") RemoveDisks(self, self.instance, target_node_uuid=source_node.uuid) # Only start the instance if it's marked as up if self.instance.admin_state == constants.ADMINST_UP: self.LogInfo("Starting instance %s on node %s", self.instance.name, target_node.name) disks_ok, _ = AssembleInstanceDisks(self, self.instance, ignore_secondaries=True) if not disks_ok: ShutdownInstanceDisks(self, self.instance) raise errors.OpExecError("Can't activate the instance's disks") result = self.rpc.call_instance_start(target_node.uuid, (self.instance, None, None), False, self.op.reason) msg = result.fail_msg if msg: ShutdownInstanceDisks(self, self.instance) raise errors.OpExecError("Could not start instance %s on node %s: %s" % (self.instance.name, target_node.name, msg)) class LUInstanceMultiAlloc(NoHooksLU): """Allocates multiple instances at the same time. """ REQ_BGL = False def CheckArguments(self): """Check arguments. """ nodes = [] for inst in self.op.instances: if inst.iallocator is not None: raise errors.OpPrereqError("iallocator are not allowed to be set on" " instance objects", errors.ECODE_INVAL) nodes.append(bool(inst.pnode)) if inst.disk_template in constants.DTS_INT_MIRROR: nodes.append(bool(inst.snode)) has_nodes = compat.any(nodes) if compat.all(nodes) ^ has_nodes: raise errors.OpPrereqError("There are instance objects providing" " pnode/snode while others do not", errors.ECODE_INVAL) if not has_nodes and self.op.iallocator is None: default_iallocator = self.cfg.GetDefaultIAllocator() if default_iallocator: self.op.iallocator = default_iallocator else: raise errors.OpPrereqError("No iallocator or nodes on the instances" " given and no cluster-wide default" " iallocator found; please specify either" " an iallocator or nodes on the instances" " or set a cluster-wide default iallocator", errors.ECODE_INVAL) _CheckOpportunisticLocking(self.op) dups = utils.FindDuplicates([op.instance_name for op in self.op.instances]) if dups: raise errors.OpPrereqError("There are duplicate instance names: %s" % utils.CommaJoin(dups), errors.ECODE_INVAL) def ExpandNames(self): """Calculate the locks. """ self.share_locks = ShareAll() self.needed_locks = { # iallocator will select nodes and even if no iallocator is used, # collisions with LUInstanceCreate should be avoided locking.LEVEL_NODE_ALLOC: locking.ALL_SET, } if self.op.iallocator: self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET if self.op.opportunistic_locking: self.opportunistic_locks[locking.LEVEL_NODE] = True else: nodeslist = [] for inst in self.op.instances: (inst.pnode_uuid, inst.pnode) = \ ExpandNodeUuidAndName(self.cfg, inst.pnode_uuid, inst.pnode) nodeslist.append(inst.pnode_uuid) if inst.snode is not None: (inst.snode_uuid, inst.snode) = \ ExpandNodeUuidAndName(self.cfg, inst.snode_uuid, inst.snode) nodeslist.append(inst.snode_uuid) self.needed_locks[locking.LEVEL_NODE] = nodeslist # Lock resources of instance's primary and secondary nodes (copy to # prevent accidential modification) self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist) def DeclareLocks(self, level): if level == locking.LEVEL_NODE_RES and \ self.opportunistic_locks[locking.LEVEL_NODE]: # Even when using opportunistic locking, we require the same set of # NODE_RES locks as we got NODE locks self.needed_locks[locking.LEVEL_NODE_RES] = \ self.owned_locks(locking.LEVEL_NODE) def CheckPrereq(self): """Check prerequisite. """ if self.op.iallocator: cluster = self.cfg.GetClusterInfo() default_vg = self.cfg.GetVGName() ec_id = self.proc.GetECId() if self.op.opportunistic_locking: # Only consider nodes for which a lock is held node_whitelist = self.cfg.GetNodeNames( list(self.owned_locks(locking.LEVEL_NODE))) else: node_whitelist = None insts = [_CreateInstanceAllocRequest(op, ComputeDisks(op, default_vg), _ComputeNics(op, cluster, None, self.cfg, ec_id), _ComputeFullBeParams(op, cluster), node_whitelist) for op in self.op.instances] req = iallocator.IAReqMultiInstanceAlloc(instances=insts) ial = iallocator.IAllocator(self.cfg, self.rpc, req) ial.Run(self.op.iallocator) if not ial.success: raise errors.OpPrereqError("Can't compute nodes using" " iallocator '%s': %s" % (self.op.iallocator, ial.info), errors.ECODE_NORES) self.ia_result = ial.result if self.op.dry_run: self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), { constants.JOB_IDS_KEY: [], }) def _ConstructPartialResult(self): """Contructs the partial result. """ if self.op.iallocator: (allocatable, failed_insts) = self.ia_result allocatable_insts = map(compat.fst, allocatable) else: allocatable_insts = [op.instance_name for op in self.op.instances] failed_insts = [] return { opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY: allocatable_insts, opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed_insts, } def Exec(self, feedback_fn): """Executes the opcode. """ jobs = [] if self.op.iallocator: op2inst = dict((op.instance_name, op) for op in self.op.instances) (allocatable, failed) = self.ia_result for (name, node_names) in allocatable: op = op2inst.pop(name) (op.pnode_uuid, op.pnode) = \ ExpandNodeUuidAndName(self.cfg, None, node_names[0]) if len(node_names) > 1: (op.snode_uuid, op.snode) = \ ExpandNodeUuidAndName(self.cfg, None, node_names[1]) jobs.append([op]) missing = set(op2inst.keys()) - set(failed) assert not missing, \ "Iallocator did return incomplete result: %s" % \ utils.CommaJoin(missing) else: jobs.extend([op] for op in self.op.instances) return ResultWithJobs(jobs, **self._ConstructPartialResult()) class _InstNicModPrivate: """Data structure for network interface modifications. Used by L{LUInstanceSetParams}. """ def __init__(self): self.params = None self.filled = None def _PrepareContainerMods(mods, private_fn): """Prepares a list of container modifications by adding a private data field. @type mods: list of tuples; (operation, index, parameters) @param mods: List of modifications @type private_fn: callable or None @param private_fn: Callable for constructing a private data field for a modification @rtype: list """ if private_fn is None: fn = lambda: None else: fn = private_fn return [(op, idx, params, fn()) for (op, idx, params) in mods] def _CheckNodesPhysicalCPUs(lu, node_uuids, requested, hypervisor_specs): """Checks if nodes have enough physical CPUs This function checks if all given nodes have the needed number of physical CPUs. In case any node has less CPUs or we cannot get the information from the node, this function raises an OpPrereqError exception. @type lu: C{LogicalUnit} @param lu: a logical unit from which we get configuration data @type node_uuids: C{list} @param node_uuids: the list of node UUIDs to check @type requested: C{int} @param requested: the minimum acceptable number of physical CPUs @type hypervisor_specs: list of pairs (string, dict of strings) @param hypervisor_specs: list of hypervisor specifications in pairs (hypervisor_name, hvparams) @raise errors.OpPrereqError: if the node doesn't have enough CPUs, or we cannot check the node """ nodeinfo = lu.rpc.call_node_info(node_uuids, None, hypervisor_specs) for node_uuid in node_uuids: info = nodeinfo[node_uuid] node_name = lu.cfg.GetNodeName(node_uuid) info.Raise("Cannot get current information from node %s" % node_name, prereq=True, ecode=errors.ECODE_ENVIRON) (_, _, (hv_info, )) = info.payload num_cpus = hv_info.get("cpu_total", None) if not isinstance(num_cpus, int): raise errors.OpPrereqError("Can't compute the number of physical CPUs" " on node %s, result was '%s'" % (node_name, num_cpus), errors.ECODE_ENVIRON) if requested > num_cpus: raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are " "required" % (node_name, num_cpus, requested), errors.ECODE_NORES) def GetItemFromContainer(identifier, kind, container): """Return the item refered by the identifier. @type identifier: string @param identifier: Item index or name or UUID @type kind: string @param kind: One-word item description @type container: list @param container: Container to get the item from """ # Index try: idx = int(identifier) if idx == -1: # Append absidx = len(container) - 1 elif idx < 0: raise IndexError("Not accepting negative indices other than -1") elif idx > len(container): raise IndexError("Got %s index %s, but there are only %s" % (kind, idx, len(container))) else: absidx = idx return (absidx, container[idx]) except ValueError: pass for idx, item in enumerate(container): if item.uuid == identifier or item.name == identifier: return (idx, item) raise errors.OpPrereqError("Cannot find %s with identifier %s" % (kind, identifier), errors.ECODE_NOENT) def _ApplyContainerMods(kind, container, chgdesc, mods, create_fn, modify_fn, remove_fn): """Applies descriptions in C{mods} to C{container}. @type kind: string @param kind: One-word item description @type container: list @param container: Container to modify @type chgdesc: None or list @param chgdesc: List of applied changes @type mods: list @param mods: Modifications as returned by L{_PrepareContainerMods} @type create_fn: callable @param create_fn: Callback for creating a new item (L{constants.DDM_ADD}); receives absolute item index, parameters and private data object as added by L{_PrepareContainerMods}, returns tuple containing new item and changes as list @type modify_fn: callable @param modify_fn: Callback for modifying an existing item (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters and private data object as added by L{_PrepareContainerMods}, returns changes as list @type remove_fn: callable @param remove_fn: Callback on removing item; receives absolute item index, item and private data object as added by L{_PrepareContainerMods} """ for (op, identifier, params, private) in mods: changes = None if op == constants.DDM_ADD: # Calculate where item will be added # When adding an item, identifier can only be an index try: idx = int(identifier) except ValueError: raise errors.OpPrereqError("Only possitive integer or -1 is accepted as" " identifier for %s" % constants.DDM_ADD, errors.ECODE_INVAL) if idx == -1: addidx = len(container) else: if idx < 0: raise IndexError("Not accepting negative indices other than -1") elif idx > len(container): raise IndexError("Got %s index %s, but there are only %s" % (kind, idx, len(container))) addidx = idx if create_fn is None: item = params else: (item, changes) = create_fn(addidx, params, private) if idx == -1: container.append(item) else: assert idx >= 0 assert idx <= len(container) # list.insert does so before the specified index container.insert(idx, item) else: # Retrieve existing item (absidx, item) = GetItemFromContainer(identifier, kind, container) if op == constants.DDM_REMOVE: assert not params if remove_fn is not None: remove_fn(absidx, item, private) changes = [("%s/%s" % (kind, absidx), "remove")] assert container[absidx] == item del container[absidx] elif op == constants.DDM_MODIFY: if modify_fn is not None: changes = modify_fn(absidx, item, params, private) else: raise errors.ProgrammerError("Unhandled operation '%s'" % op) assert _TApplyContModsCbChanges(changes) if not (chgdesc is None or changes is None): chgdesc.extend(changes) def _UpdateIvNames(base_index, disks): """Updates the C{iv_name} attribute of disks. @type disks: list of L{objects.Disk} """ for (idx, disk) in enumerate(disks): disk.iv_name = "disk/%s" % (base_index + idx, ) class LUInstanceSetParams(LogicalUnit): """Modifies an instances's parameters. """ HPATH = "instance-modify" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False @staticmethod def _UpgradeDiskNicMods(kind, mods, verify_fn): assert ht.TList(mods) assert not mods or len(mods[0]) in (2, 3) if mods and len(mods[0]) == 2: result = [] addremove = 0 for op, params in mods: if op in (constants.DDM_ADD, constants.DDM_REMOVE): result.append((op, -1, params)) addremove += 1 if addremove > 1: raise errors.OpPrereqError("Only one %s add or remove operation is" " supported at a time" % kind, errors.ECODE_INVAL) else: result.append((constants.DDM_MODIFY, op, params)) assert verify_fn(result) else: result = mods return result @staticmethod def _CheckMods(kind, mods, key_types, item_fn): """Ensures requested disk/NIC modifications are valid. """ for (op, _, params) in mods: assert ht.TDict(params) # If 'key_types' is an empty dict, we assume we have an # 'ext' template and thus do not ForceDictType if key_types: utils.ForceDictType(params, key_types) if op == constants.DDM_REMOVE: if params: raise errors.OpPrereqError("No settings should be passed when" " removing a %s" % kind, errors.ECODE_INVAL) elif op in (constants.DDM_ADD, constants.DDM_MODIFY): item_fn(op, params) else: raise errors.ProgrammerError("Unhandled operation '%s'" % op) def _VerifyDiskModification(self, op, params, excl_stor): """Verifies a disk modification. """ if op == constants.DDM_ADD: mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR) if mode not in constants.DISK_ACCESS_SET: raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode, errors.ECODE_INVAL) size = params.get(constants.IDISK_SIZE, None) if size is None: raise errors.OpPrereqError("Required disk parameter '%s' missing" % constants.IDISK_SIZE, errors.ECODE_INVAL) try: size = int(size) except (TypeError, ValueError), err: raise errors.OpPrereqError("Invalid disk size parameter: %s" % err, errors.ECODE_INVAL) params[constants.IDISK_SIZE] = size name = params.get(constants.IDISK_NAME, None) if name is not None and name.lower() == constants.VALUE_NONE: params[constants.IDISK_NAME] = None CheckSpindlesExclusiveStorage(params, excl_stor, True) elif op == constants.DDM_MODIFY: if constants.IDISK_SIZE in params: raise errors.OpPrereqError("Disk size change not possible, use" " grow-disk", errors.ECODE_INVAL) # Disk modification supports changing only the disk name and mode. # Changing arbitrary parameters is allowed only for ext disk template", if self.instance.disk_template != constants.DT_EXT: utils.ForceDictType(params, constants.MODIFIABLE_IDISK_PARAMS_TYPES) name = params.get(constants.IDISK_NAME, None) if name is not None and name.lower() == constants.VALUE_NONE: params[constants.IDISK_NAME] = None @staticmethod def _VerifyNicModification(op, params): """Verifies a network interface modification. """ if op in (constants.DDM_ADD, constants.DDM_MODIFY): ip = params.get(constants.INIC_IP, None) name = params.get(constants.INIC_NAME, None) req_net = params.get(constants.INIC_NETWORK, None) link = params.get(constants.NIC_LINK, None) mode = params.get(constants.NIC_MODE, None) if name is not None and name.lower() == constants.VALUE_NONE: params[constants.INIC_NAME] = None if req_net is not None: if req_net.lower() == constants.VALUE_NONE: params[constants.INIC_NETWORK] = None req_net = None elif link is not None or mode is not None: raise errors.OpPrereqError("If network is given" " mode or link should not", errors.ECODE_INVAL) if op == constants.DDM_ADD: macaddr = params.get(constants.INIC_MAC, None) if macaddr is None: params[constants.INIC_MAC] = constants.VALUE_AUTO if ip is not None: if ip.lower() == constants.VALUE_NONE: params[constants.INIC_IP] = None else: if ip.lower() == constants.NIC_IP_POOL: if op == constants.DDM_ADD and req_net is None: raise errors.OpPrereqError("If ip=pool, parameter network" " cannot be none", errors.ECODE_INVAL) else: if not netutils.IPAddress.IsValid(ip): raise errors.OpPrereqError("Invalid IP address '%s'" % ip, errors.ECODE_INVAL) if constants.INIC_MAC in params: macaddr = params[constants.INIC_MAC] if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE): macaddr = utils.NormalizeAndValidateMac(macaddr) if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO: raise errors.OpPrereqError("'auto' is not a valid MAC address when" " modifying an existing NIC", errors.ECODE_INVAL) def CheckArguments(self): if not (self.op.nics or self.op.disks or self.op.disk_template or self.op.hvparams or self.op.beparams or self.op.os_name or self.op.osparams or self.op.offline is not None or self.op.runtime_mem or self.op.pnode): raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL) if self.op.hvparams: CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor", "instance", "cluster") self.op.disks = self._UpgradeDiskNicMods( "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications) self.op.nics = self._UpgradeDiskNicMods( "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications) if self.op.disks and self.op.disk_template is not None: raise errors.OpPrereqError("Disk template conversion and other disk" " changes not supported at the same time", errors.ECODE_INVAL) if (self.op.disk_template and self.op.disk_template in constants.DTS_INT_MIRROR and self.op.remote_node is None): raise errors.OpPrereqError("Changing the disk template to a mirrored" " one requires specifying a secondary node", errors.ECODE_INVAL) # Check NIC modifications self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES, self._VerifyNicModification) if self.op.pnode: (self.op.pnode_uuid, self.op.pnode) = \ ExpandNodeUuidAndName(self.cfg, self.op.pnode_uuid, self.op.pnode) def ExpandNames(self): self._ExpandAndLockInstance() self.needed_locks[locking.LEVEL_NODEGROUP] = [] # Can't even acquire node locks in shared mode as upcoming changes in # Ganeti 2.6 will start to modify the node object on disk conversion self.needed_locks[locking.LEVEL_NODE] = [] self.needed_locks[locking.LEVEL_NODE_RES] = [] self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE # Look node group to look up the ipolicy self.share_locks[locking.LEVEL_NODEGROUP] = 1 def DeclareLocks(self, level): if level == locking.LEVEL_NODEGROUP: assert not self.needed_locks[locking.LEVEL_NODEGROUP] # Acquire locks for the instance's nodegroups optimistically. Needs # to be verified in CheckPrereq self.needed_locks[locking.LEVEL_NODEGROUP] = \ self.cfg.GetInstanceNodeGroups(self.op.instance_uuid) elif level == locking.LEVEL_NODE: self._LockInstancesNodes() if self.op.disk_template and self.op.remote_node: (self.op.remote_node_uuid, self.op.remote_node) = \ ExpandNodeUuidAndName(self.cfg, self.op.remote_node_uuid, self.op.remote_node) self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node_uuid) elif level == locking.LEVEL_NODE_RES and self.op.disk_template: # Copy node locks self.needed_locks[locking.LEVEL_NODE_RES] = \ CopyLockList(self.needed_locks[locking.LEVEL_NODE]) def BuildHooksEnv(self): """Build hooks env. This runs on the master, primary and secondaries. """ args = {} if constants.BE_MINMEM in self.be_new: args["minmem"] = self.be_new[constants.BE_MINMEM] if constants.BE_MAXMEM in self.be_new: args["maxmem"] = self.be_new[constants.BE_MAXMEM] if constants.BE_VCPUS in self.be_new: args["vcpus"] = self.be_new[constants.BE_VCPUS] # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk # information at all. if self._new_nics is not None: nics = [] for nic in self._new_nics: n = copy.deepcopy(nic) nicparams = self.cluster.SimpleFillNIC(n.nicparams) n.nicparams = nicparams nics.append(NICToTuple(self, n)) args["nics"] = nics env = BuildInstanceHookEnvByObject(self, self.instance, override=args) if self.op.disk_template: env["NEW_DISK_TEMPLATE"] = self.op.disk_template if self.op.runtime_mem: env["RUNTIME_MEMORY"] = self.op.runtime_mem return env def BuildHooksNodes(self): """Build hooks nodes. """ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) return (nl, nl) def _PrepareNicModification(self, params, private, old_ip, old_net_uuid, old_params, cluster, pnode_uuid): update_params_dict = dict([(key, params[key]) for key in constants.NICS_PARAMETERS if key in params]) req_link = update_params_dict.get(constants.NIC_LINK, None) req_mode = update_params_dict.get(constants.NIC_MODE, None) new_net_uuid = None new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid) if new_net_uuid_or_name: new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name) new_net_obj = self.cfg.GetNetwork(new_net_uuid) if old_net_uuid: old_net_obj = self.cfg.GetNetwork(old_net_uuid) if new_net_uuid: netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode_uuid) if not netparams: raise errors.OpPrereqError("No netparams found for the network" " %s, probably not connected" % new_net_obj.name, errors.ECODE_INVAL) new_params = dict(netparams) else: new_params = GetUpdatedParams(old_params, update_params_dict) utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES) new_filled_params = cluster.SimpleFillNIC(new_params) objects.NIC.CheckParameterSyntax(new_filled_params) new_mode = new_filled_params[constants.NIC_MODE] if new_mode == constants.NIC_MODE_BRIDGED: bridge = new_filled_params[constants.NIC_LINK] msg = self.rpc.call_bridges_exist(pnode_uuid, [bridge]).fail_msg if msg: msg = "Error checking bridges on node '%s': %s" % \ (self.cfg.GetNodeName(pnode_uuid), msg) if self.op.force: self.warn.append(msg) else: raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON) elif new_mode == constants.NIC_MODE_ROUTED: ip = params.get(constants.INIC_IP, old_ip) if ip is None: raise errors.OpPrereqError("Cannot set the NIC IP address to None" " on a routed NIC", errors.ECODE_INVAL) elif new_mode == constants.NIC_MODE_OVS: # TODO: check OVS link self.LogInfo("OVS links are currently not checked for correctness") if constants.INIC_MAC in params: mac = params[constants.INIC_MAC] if mac is None: raise errors.OpPrereqError("Cannot unset the NIC MAC address", errors.ECODE_INVAL) elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE): # otherwise generate the MAC address params[constants.INIC_MAC] = \ self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId()) else: # or validate/reserve the current one try: self.cfg.ReserveMAC(mac, self.proc.GetECId()) except errors.ReservationError: raise errors.OpPrereqError("MAC address '%s' already in use" " in cluster" % mac, errors.ECODE_NOTUNIQUE) elif new_net_uuid != old_net_uuid: def get_net_prefix(net_uuid): mac_prefix = None if net_uuid: nobj = self.cfg.GetNetwork(net_uuid) mac_prefix = nobj.mac_prefix return mac_prefix new_prefix = get_net_prefix(new_net_uuid) old_prefix = get_net_prefix(old_net_uuid) if old_prefix != new_prefix: params[constants.INIC_MAC] = \ self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId()) # if there is a change in (ip, network) tuple new_ip = params.get(constants.INIC_IP, old_ip) if (new_ip, new_net_uuid) != (old_ip, old_net_uuid): if new_ip: # if IP is pool then require a network and generate one IP if new_ip.lower() == constants.NIC_IP_POOL: if new_net_uuid: try: new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId()) except errors.ReservationError: raise errors.OpPrereqError("Unable to get a free IP" " from the address pool", errors.ECODE_STATE) self.LogInfo("Chose IP %s from network %s", new_ip, new_net_obj.name) params[constants.INIC_IP] = new_ip else: raise errors.OpPrereqError("ip=pool, but no network found", errors.ECODE_INVAL) # Reserve new IP if in the new network if any elif new_net_uuid: try: self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId()) self.LogInfo("Reserving IP %s in network %s", new_ip, new_net_obj.name) except errors.ReservationError: raise errors.OpPrereqError("IP %s not available in network %s" % (new_ip, new_net_obj.name), errors.ECODE_NOTUNIQUE) # new network is None so check if new IP is a conflicting IP elif self.op.conflicts_check: _CheckForConflictingIp(self, new_ip, pnode_uuid) # release old IP if old network is not None if old_ip and old_net_uuid: try: self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId()) except errors.AddressPoolError: logging.warning("Release IP %s not contained in network %s", old_ip, old_net_obj.name) # there are no changes in (ip, network) tuple and old network is not None elif (old_net_uuid is not None and (req_link is not None or req_mode is not None)): raise errors.OpPrereqError("Not allowed to change link or mode of" " a NIC that is connected to a network", errors.ECODE_INVAL) private.params = new_params private.filled = new_filled_params def _PreCheckDiskTemplate(self, pnode_info): """CheckPrereq checks related to a new disk template.""" # Arguments are passed to avoid configuration lookups pnode_uuid = self.instance.primary_node if self.instance.disk_template == self.op.disk_template: raise errors.OpPrereqError("Instance already has disk template %s" % self.instance.disk_template, errors.ECODE_INVAL) if not self.cluster.IsDiskTemplateEnabled(self.op.disk_template): raise errors.OpPrereqError("Disk template '%s' is not enabled for this" " cluster." % self.op.disk_template) if (self.instance.disk_template, self.op.disk_template) not in self._DISK_CONVERSIONS: raise errors.OpPrereqError("Unsupported disk template conversion from" " %s to %s" % (self.instance.disk_template, self.op.disk_template), errors.ECODE_INVAL) CheckInstanceState(self, self.instance, INSTANCE_DOWN, msg="cannot change disk template") if self.op.disk_template in constants.DTS_INT_MIRROR: if self.op.remote_node_uuid == pnode_uuid: raise errors.OpPrereqError("Given new secondary node %s is the same" " as the primary node of the instance" % self.op.remote_node, errors.ECODE_STATE) CheckNodeOnline(self, self.op.remote_node_uuid) CheckNodeNotDrained(self, self.op.remote_node_uuid) # FIXME: here we assume that the old instance type is DT_PLAIN assert self.instance.disk_template == constants.DT_PLAIN disks = [{constants.IDISK_SIZE: d.size, constants.IDISK_VG: d.logical_id[0]} for d in self.instance.disks] required = ComputeDiskSizePerVG(self.op.disk_template, disks) CheckNodesFreeDiskPerVG(self, [self.op.remote_node_uuid], required) snode_info = self.cfg.GetNodeInfo(self.op.remote_node_uuid) snode_group = self.cfg.GetNodeGroup(snode_info.group) ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(self.cluster, snode_group) CheckTargetNodeIPolicy(self, ipolicy, self.instance, snode_info, self.cfg, ignore=self.op.ignore_ipolicy) if pnode_info.group != snode_info.group: self.LogWarning("The primary and secondary nodes are in two" " different node groups; the disk parameters" " from the first disk's node group will be" " used") if not self.op.disk_template in constants.DTS_EXCL_STORAGE: # Make sure none of the nodes require exclusive storage nodes = [pnode_info] if self.op.disk_template in constants.DTS_INT_MIRROR: assert snode_info nodes.append(snode_info) has_es = lambda n: IsExclusiveStorageEnabledNode(self.cfg, n) if compat.any(map(has_es, nodes)): errmsg = ("Cannot convert disk template from %s to %s when exclusive" " storage is enabled" % (self.instance.disk_template, self.op.disk_template)) raise errors.OpPrereqError(errmsg, errors.ECODE_STATE) def _PreCheckDisks(self, ispec): """CheckPrereq checks related to disk changes. @type ispec: dict @param ispec: instance specs to be updated with the new disks """ self.diskparams = self.cfg.GetInstanceDiskParams(self.instance) excl_stor = compat.any( rpc.GetExclusiveStorageForNodes(self.cfg, self.instance.all_nodes).values() ) # Check disk modifications. This is done here and not in CheckArguments # (as with NICs), because we need to know the instance's disk template ver_fn = lambda op, par: self._VerifyDiskModification(op, par, excl_stor) if self.instance.disk_template == constants.DT_EXT: self._CheckMods("disk", self.op.disks, {}, ver_fn) else: self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES, ver_fn) self.diskmod = _PrepareContainerMods(self.op.disks, None) # Check the validity of the `provider' parameter if self.instance.disk_template in constants.DT_EXT: for mod in self.diskmod: ext_provider = mod[2].get(constants.IDISK_PROVIDER, None) if mod[0] == constants.DDM_ADD: if ext_provider is None: raise errors.OpPrereqError("Instance template is '%s' and parameter" " '%s' missing, during disk add" % (constants.DT_EXT, constants.IDISK_PROVIDER), errors.ECODE_NOENT) elif mod[0] == constants.DDM_MODIFY: if ext_provider: raise errors.OpPrereqError("Parameter '%s' is invalid during disk" " modification" % constants.IDISK_PROVIDER, errors.ECODE_INVAL) else: for mod in self.diskmod: ext_provider = mod[2].get(constants.IDISK_PROVIDER, None) if ext_provider is not None: raise errors.OpPrereqError("Parameter '%s' is only valid for" " instances of type '%s'" % (constants.IDISK_PROVIDER, constants.DT_EXT), errors.ECODE_INVAL) if self.op.disks and self.instance.disk_template == constants.DT_DISKLESS: raise errors.OpPrereqError("Disk operations not supported for" " diskless instances", errors.ECODE_INVAL) def _PrepareDiskMod(_, disk, params, __): disk.name = params.get(constants.IDISK_NAME, None) # Verify disk changes (operating on a copy) disks = copy.deepcopy(self.instance.disks) _ApplyContainerMods("disk", disks, None, self.diskmod, None, _PrepareDiskMod, None) utils.ValidateDeviceNames("disk", disks) if len(disks) > constants.MAX_DISKS: raise errors.OpPrereqError("Instance has too many disks (%d), cannot add" " more" % constants.MAX_DISKS, errors.ECODE_STATE) disk_sizes = [disk.size for disk in self.instance.disks] disk_sizes.extend(params["size"] for (op, idx, params, private) in self.diskmod if op == constants.DDM_ADD) ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes) ispec[constants.ISPEC_DISK_SIZE] = disk_sizes if self.op.offline is not None and self.op.offline: CheckInstanceState(self, self.instance, CAN_CHANGE_INSTANCE_OFFLINE, msg="can't change to offline") def CheckPrereq(self): """Check prerequisites. This only checks the instance list against the existing names. """ assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE) self.instance = self.cfg.GetInstanceInfo(self.op.instance_uuid) self.cluster = self.cfg.GetClusterInfo() assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name pnode_uuid = self.instance.primary_node self.warn = [] if (self.op.pnode_uuid is not None and self.op.pnode_uuid != pnode_uuid and not self.op.force): # verify that the instance is not up instance_info = self.rpc.call_instance_info( pnode_uuid, self.instance.name, self.instance.hypervisor, self.instance.hvparams) if instance_info.fail_msg: self.warn.append("Can't get instance runtime information: %s" % instance_info.fail_msg) elif instance_info.payload: raise errors.OpPrereqError("Instance is still running on %s" % self.cfg.GetNodeName(pnode_uuid), errors.ECODE_STATE) assert pnode_uuid in self.owned_locks(locking.LEVEL_NODE) node_uuids = list(self.instance.all_nodes) pnode_info = self.cfg.GetNodeInfo(pnode_uuid) #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups) assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP) group_info = self.cfg.GetNodeGroup(pnode_info.group) # dictionary with instance information after the modification ispec = {} # Prepare NIC modifications self.nicmod = _PrepareContainerMods(self.op.nics, _InstNicModPrivate) # OS change if self.op.os_name and not self.op.force: CheckNodeHasOS(self, self.instance.primary_node, self.op.os_name, self.op.force_variant) instance_os = self.op.os_name else: instance_os = self.instance.os assert not (self.op.disk_template and self.op.disks), \ "Can't modify disk template and apply disk changes at the same time" if self.op.disk_template: self._PreCheckDiskTemplate(pnode_info) self._PreCheckDisks(ispec) # hvparams processing if self.op.hvparams: hv_type = self.instance.hypervisor i_hvdict = GetUpdatedParams(self.instance.hvparams, self.op.hvparams) utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES) hv_new = self.cluster.SimpleFillHV(hv_type, self.instance.os, i_hvdict) # local check hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new) CheckHVParams(self, node_uuids, self.instance.hypervisor, hv_new) self.hv_proposed = self.hv_new = hv_new # the new actual values self.hv_inst = i_hvdict # the new dict (without defaults) else: self.hv_proposed = self.cluster.SimpleFillHV(self.instance.hypervisor, self.instance.os, self.instance.hvparams) self.hv_new = self.hv_inst = {} # beparams processing if self.op.beparams: i_bedict = GetUpdatedParams(self.instance.beparams, self.op.beparams, use_none=True) objects.UpgradeBeParams(i_bedict) utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES) be_new = self.cluster.SimpleFillBE(i_bedict) self.be_proposed = self.be_new = be_new # the new actual values self.be_inst = i_bedict # the new dict (without defaults) else: self.be_new = self.be_inst = {} self.be_proposed = self.cluster.SimpleFillBE(self.instance.beparams) be_old = self.cluster.FillBE(self.instance) # CPU param validation -- checking every time a parameter is # changed to cover all cases where either CPU mask or vcpus have # changed if (constants.BE_VCPUS in self.be_proposed and constants.HV_CPU_MASK in self.hv_proposed): cpu_list = \ utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK]) # Verify mask is consistent with number of vCPUs. Can skip this # test if only 1 entry in the CPU mask, which means same mask # is applied to all vCPUs. if (len(cpu_list) > 1 and len(cpu_list) != self.be_proposed[constants.BE_VCPUS]): raise errors.OpPrereqError("Number of vCPUs [%d] does not match the" " CPU mask [%s]" % (self.be_proposed[constants.BE_VCPUS], self.hv_proposed[constants.HV_CPU_MASK]), errors.ECODE_INVAL) # Only perform this test if a new CPU mask is given if constants.HV_CPU_MASK in self.hv_new: # Calculate the largest CPU number requested max_requested_cpu = max(map(max, cpu_list)) # Check that all of the instance's nodes have enough physical CPUs to # satisfy the requested CPU mask hvspecs = [(self.instance.hypervisor, self.cfg.GetClusterInfo() .hvparams[self.instance.hypervisor])] _CheckNodesPhysicalCPUs(self, self.instance.all_nodes, max_requested_cpu + 1, hvspecs) # osparams processing if self.op.osparams: i_osdict = GetUpdatedParams(self.instance.osparams, self.op.osparams) CheckOSParams(self, True, node_uuids, instance_os, i_osdict) self.os_inst = i_osdict # the new dict (without defaults) else: self.os_inst = {} #TODO(dynmem): do the appropriate check involving MINMEM if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]): mem_check_list = [pnode_uuid] if be_new[constants.BE_AUTO_BALANCE]: # either we changed auto_balance to yes or it was from before mem_check_list.extend(self.instance.secondary_nodes) instance_info = self.rpc.call_instance_info( pnode_uuid, self.instance.name, self.instance.hypervisor, self.instance.hvparams) hvspecs = [(self.instance.hypervisor, self.cluster.hvparams[self.instance.hypervisor])] nodeinfo = self.rpc.call_node_info(mem_check_list, None, hvspecs) pninfo = nodeinfo[pnode_uuid] msg = pninfo.fail_msg if msg: # Assume the primary node is unreachable and go ahead self.warn.append("Can't get info from primary node %s: %s" % (self.cfg.GetNodeName(pnode_uuid), msg)) else: (_, _, (pnhvinfo, )) = pninfo.payload if not isinstance(pnhvinfo.get("memory_free", None), int): self.warn.append("Node data from primary node %s doesn't contain" " free memory information" % self.cfg.GetNodeName(pnode_uuid)) elif instance_info.fail_msg: self.warn.append("Can't get instance runtime information: %s" % instance_info.fail_msg) else: if instance_info.payload: current_mem = int(instance_info.payload["memory"]) else: # Assume instance not running # (there is a slight race condition here, but it's not very # probable, and we have no other way to check) # TODO: Describe race condition current_mem = 0 #TODO(dynmem): do the appropriate check involving MINMEM miss_mem = (be_new[constants.BE_MAXMEM] - current_mem - pnhvinfo["memory_free"]) if miss_mem > 0: raise errors.OpPrereqError("This change will prevent the instance" " from starting, due to %d MB of memory" " missing on its primary node" % miss_mem, errors.ECODE_NORES) if be_new[constants.BE_AUTO_BALANCE]: for node_uuid, nres in nodeinfo.items(): if node_uuid not in self.instance.secondary_nodes: continue nres.Raise("Can't get info from secondary node %s" % self.cfg.GetNodeName(node_uuid), prereq=True, ecode=errors.ECODE_STATE) (_, _, (nhvinfo, )) = nres.payload if not isinstance(nhvinfo.get("memory_free", None), int): raise errors.OpPrereqError("Secondary node %s didn't return free" " memory information" % self.cfg.GetNodeName(node_uuid), errors.ECODE_STATE) #TODO(dynmem): do the appropriate check involving MINMEM elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]: raise errors.OpPrereqError("This change will prevent the instance" " from failover to its secondary node" " %s, due to not enough memory" % self.cfg.GetNodeName(node_uuid), errors.ECODE_STATE) if self.op.runtime_mem: remote_info = self.rpc.call_instance_info( self.instance.primary_node, self.instance.name, self.instance.hypervisor, self.cluster.hvparams[self.instance.hypervisor]) remote_info.Raise("Error checking node %s" % self.cfg.GetNodeName(self.instance.primary_node)) if not remote_info.payload: # not running already raise errors.OpPrereqError("Instance %s is not running" % self.instance.name, errors.ECODE_STATE) current_memory = remote_info.payload["memory"] if (not self.op.force and (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])): raise errors.OpPrereqError("Instance %s must have memory between %d" " and %d MB of memory unless --force is" " given" % (self.instance.name, self.be_proposed[constants.BE_MINMEM], self.be_proposed[constants.BE_MAXMEM]), errors.ECODE_INVAL) delta = self.op.runtime_mem - current_memory if delta > 0: CheckNodeFreeMemory( self, self.instance.primary_node, "ballooning memory for instance %s" % self.instance.name, delta, self.instance.hypervisor, self.cfg.GetClusterInfo().hvparams[self.instance.hypervisor]) # make self.cluster visible in the functions below cluster = self.cluster def _PrepareNicCreate(_, params, private): self._PrepareNicModification(params, private, None, None, {}, cluster, pnode_uuid) return (None, None) def _PrepareNicMod(_, nic, params, private): self._PrepareNicModification(params, private, nic.ip, nic.network, nic.nicparams, cluster, pnode_uuid) return None def _PrepareNicRemove(_, params, __): ip = params.ip net = params.network if net is not None and ip is not None: self.cfg.ReleaseIp(net, ip, self.proc.GetECId()) # Verify NIC changes (operating on copy) nics = self.instance.nics[:] _ApplyContainerMods("NIC", nics, None, self.nicmod, _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove) if len(nics) > constants.MAX_NICS: raise errors.OpPrereqError("Instance has too many network interfaces" " (%d), cannot add more" % constants.MAX_NICS, errors.ECODE_STATE) # Pre-compute NIC changes (necessary to use result in hooks) self._nic_chgdesc = [] if self.nicmod: # Operate on copies as this is still in prereq nics = [nic.Copy() for nic in self.instance.nics] _ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod, self._CreateNewNic, self._ApplyNicMods, None) # Verify that NIC names are unique and valid utils.ValidateDeviceNames("NIC", nics) self._new_nics = nics ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics) else: self._new_nics = None ispec[constants.ISPEC_NIC_COUNT] = len(self.instance.nics) if not self.op.ignore_ipolicy: ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(self.cluster, group_info) # Fill ispec with backend parameters ispec[constants.ISPEC_SPINDLE_USE] = \ self.be_new.get(constants.BE_SPINDLE_USE, None) ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS, None) # Copy ispec to verify parameters with min/max values separately if self.op.disk_template: new_disk_template = self.op.disk_template else: new_disk_template = self.instance.disk_template ispec_max = ispec.copy() ispec_max[constants.ISPEC_MEM_SIZE] = \ self.be_new.get(constants.BE_MAXMEM, None) res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max, new_disk_template) ispec_min = ispec.copy() ispec_min[constants.ISPEC_MEM_SIZE] = \ self.be_new.get(constants.BE_MINMEM, None) res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min, new_disk_template) if (res_max or res_min): # FIXME: Improve error message by including information about whether # the upper or lower limit of the parameter fails the ipolicy. msg = ("Instance allocation to group %s (%s) violates policy: %s" % (group_info, group_info.name, utils.CommaJoin(set(res_max + res_min)))) raise errors.OpPrereqError(msg, errors.ECODE_INVAL) def _ConvertPlainToDrbd(self, feedback_fn): """Converts an instance from plain to drbd. """ feedback_fn("Converting template to drbd") pnode_uuid = self.instance.primary_node snode_uuid = self.op.remote_node_uuid assert self.instance.disk_template == constants.DT_PLAIN # create a fake disk info for _GenerateDiskTemplate disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode, constants.IDISK_VG: d.logical_id[0], constants.IDISK_NAME: d.name} for d in self.instance.disks] new_disks = GenerateDiskTemplate(self, self.op.disk_template, self.instance.uuid, pnode_uuid, [snode_uuid], disk_info, None, None, 0, feedback_fn, self.diskparams) anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks, self.diskparams) p_excl_stor = IsExclusiveStorageEnabledNodeUuid(self.cfg, pnode_uuid) s_excl_stor = IsExclusiveStorageEnabledNodeUuid(self.cfg, snode_uuid) info = GetInstanceInfoText(self.instance) feedback_fn("Creating additional volumes...") # first, create the missing data and meta devices for disk in anno_disks: # unfortunately this is... not too nice CreateSingleBlockDev(self, pnode_uuid, self.instance, disk.children[1], info, True, p_excl_stor) for child in disk.children: CreateSingleBlockDev(self, snode_uuid, self.instance, child, info, True, s_excl_stor) # at this stage, all new LVs have been created, we can rename the # old ones feedback_fn("Renaming original volumes...") rename_list = [(o, n.children[0].logical_id) for (o, n) in zip(self.instance.disks, new_disks)] result = self.rpc.call_blockdev_rename(pnode_uuid, rename_list) result.Raise("Failed to rename original LVs") feedback_fn("Initializing DRBD devices...") # all child devices are in place, we can now create the DRBD devices try: for disk in anno_disks: for (node_uuid, excl_stor) in [(pnode_uuid, p_excl_stor), (snode_uuid, s_excl_stor)]: f_create = node_uuid == pnode_uuid CreateSingleBlockDev(self, node_uuid, self.instance, disk, info, f_create, excl_stor) except errors.GenericError, e: feedback_fn("Initializing of DRBD devices failed;" " renaming back original volumes...") for disk in new_disks: self.cfg.SetDiskID(disk, pnode_uuid) rename_back_list = [(n.children[0], o.logical_id) for (n, o) in zip(new_disks, self.instance.disks)] result = self.rpc.call_blockdev_rename(pnode_uuid, rename_back_list) result.Raise("Failed to rename LVs back after error %s" % str(e)) raise # at this point, the instance has been modified self.instance.disk_template = constants.DT_DRBD8 self.instance.disks = new_disks self.cfg.Update(self.instance, feedback_fn) # Release node locks while waiting for sync ReleaseLocks(self, locking.LEVEL_NODE) # disks are created, waiting for sync disk_abort = not WaitForSync(self, self.instance, oneshot=not self.op.wait_for_sync) if disk_abort: raise errors.OpExecError("There are some degraded disks for" " this instance, please cleanup manually") # Node resource locks will be released by caller def _ConvertDrbdToPlain(self, feedback_fn): """Converts an instance from drbd to plain. """ assert len(self.instance.secondary_nodes) == 1 assert self.instance.disk_template == constants.DT_DRBD8 pnode_uuid = self.instance.primary_node snode_uuid = self.instance.secondary_nodes[0] feedback_fn("Converting template to plain") old_disks = AnnotateDiskParams(self.instance, self.instance.disks, self.cfg) new_disks = [d.children[0] for d in self.instance.disks] # copy over size, mode and name for parent, child in zip(old_disks, new_disks): child.size = parent.size child.mode = parent.mode child.name = parent.name # this is a DRBD disk, return its port to the pool # NOTE: this must be done right before the call to cfg.Update! for disk in old_disks: tcp_port = disk.logical_id[2] self.cfg.AddTcpUdpPort(tcp_port) # update instance structure self.instance.disks = new_disks self.instance.disk_template = constants.DT_PLAIN _UpdateIvNames(0, self.instance.disks) self.cfg.Update(self.instance, feedback_fn) # Release locks in case removing disks takes a while ReleaseLocks(self, locking.LEVEL_NODE) feedback_fn("Removing volumes on the secondary node...") for disk in old_disks: self.cfg.SetDiskID(disk, snode_uuid) msg = self.rpc.call_blockdev_remove(snode_uuid, disk).fail_msg if msg: self.LogWarning("Could not remove block device %s on node %s," " continuing anyway: %s", disk.iv_name, self.cfg.GetNodeName(snode_uuid), msg) feedback_fn("Removing unneeded volumes on the primary node...") for idx, disk in enumerate(old_disks): meta = disk.children[1] self.cfg.SetDiskID(meta, pnode_uuid) msg = self.rpc.call_blockdev_remove(pnode_uuid, meta).fail_msg if msg: self.LogWarning("Could not remove metadata for disk %d on node %s," " continuing anyway: %s", idx, self.cfg.GetNodeName(pnode_uuid), msg) def _CreateNewDisk(self, idx, params, _): """Creates a new disk. """ # add a new disk if self.instance.disk_template in constants.DTS_FILEBASED: (file_driver, file_path) = self.instance.disks[0].logical_id file_path = os.path.dirname(file_path) else: file_driver = file_path = None disk = \ GenerateDiskTemplate(self, self.instance.disk_template, self.instance.uuid, self.instance.primary_node, self.instance.secondary_nodes, [params], file_path, file_driver, idx, self.Log, self.diskparams)[0] new_disks = CreateDisks(self, self.instance, disks=[disk]) if self.cluster.prealloc_wipe_disks: # Wipe new disk WipeOrCleanupDisks(self, self.instance, disks=[(idx, disk, 0)], cleanup=new_disks) return (disk, [ ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)), ]) def _ModifyDisk(self, idx, disk, params, _): """Modifies a disk. """ changes = [] if constants.IDISK_MODE in params: disk.mode = params.get(constants.IDISK_MODE) changes.append(("disk.mode/%d" % idx, disk.mode)) if constants.IDISK_NAME in params: disk.name = params.get(constants.IDISK_NAME) changes.append(("disk.name/%d" % idx, disk.name)) # Modify arbitrary params in case instance template is ext for key, value in params.iteritems(): if (key not in constants.MODIFIABLE_IDISK_PARAMS and self.instance.disk_template == constants.DT_EXT): # stolen from GetUpdatedParams: default means reset/delete if value.lower() == constants.VALUE_DEFAULT: try: del disk.params[key] except KeyError: pass else: disk.params[key] = value changes.append(("disk.params:%s/%d" % (key, idx), value)) return changes def _RemoveDisk(self, idx, root, _): """Removes a disk. """ (anno_disk,) = AnnotateDiskParams(self.instance, [root], self.cfg) for node_uuid, disk in anno_disk.ComputeNodeTree( self.instance.primary_node): self.cfg.SetDiskID(disk, node_uuid) msg = self.rpc.call_blockdev_remove(node_uuid, disk).fail_msg if msg: self.LogWarning("Could not remove disk/%d on node '%s': %s," " continuing anyway", idx, self.cfg.GetNodeName(node_uuid), msg) # if this is a DRBD disk, return its port to the pool if root.dev_type in constants.DTS_DRBD: self.cfg.AddTcpUdpPort(root.logical_id[2]) def _CreateNewNic(self, idx, params, private): """Creates data structure for a new network interface. """ mac = params[constants.INIC_MAC] ip = params.get(constants.INIC_IP, None) net = params.get(constants.INIC_NETWORK, None) name = params.get(constants.INIC_NAME, None) net_uuid = self.cfg.LookupNetwork(net) #TODO: not private.filled?? can a nic have no nicparams?? nicparams = private.filled nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, name=name, nicparams=nicparams) nobj.uuid = self.cfg.GenerateUniqueID(self.proc.GetECId()) return (nobj, [ ("nic.%d" % idx, "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" % (mac, ip, private.filled[constants.NIC_MODE], private.filled[constants.NIC_LINK], net)), ]) def _ApplyNicMods(self, idx, nic, params, private): """Modifies a network interface. """ changes = [] for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NAME]: if key in params: changes.append(("nic.%s/%d" % (key, idx), params[key])) setattr(nic, key, params[key]) new_net = params.get(constants.INIC_NETWORK, nic.network) new_net_uuid = self.cfg.LookupNetwork(new_net) if new_net_uuid != nic.network: changes.append(("nic.network/%d" % idx, new_net)) nic.network = new_net_uuid if private.filled: nic.nicparams = private.filled for (key, val) in nic.nicparams.items(): changes.append(("nic.%s/%d" % (key, idx), val)) return changes def Exec(self, feedback_fn): """Modifies an instance. All parameters take effect only at the next restart of the instance. """ # Process here the warnings from CheckPrereq, as we don't have a # feedback_fn there. # TODO: Replace with self.LogWarning for warn in self.warn: feedback_fn("WARNING: %s" % warn) assert ((self.op.disk_template is None) ^ bool(self.owned_locks(locking.LEVEL_NODE_RES))), \ "Not owning any node resource locks" result = [] # New primary node if self.op.pnode_uuid: self.instance.primary_node = self.op.pnode_uuid # runtime memory if self.op.runtime_mem: rpcres = self.rpc.call_instance_balloon_memory(self.instance.primary_node, self.instance, self.op.runtime_mem) rpcres.Raise("Cannot modify instance runtime memory") result.append(("runtime_memory", self.op.runtime_mem)) # Apply disk changes _ApplyContainerMods("disk", self.instance.disks, result, self.diskmod, self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk) _UpdateIvNames(0, self.instance.disks) if self.op.disk_template: if __debug__: check_nodes = set(self.instance.all_nodes) if self.op.remote_node_uuid: check_nodes.add(self.op.remote_node_uuid) for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]: owned = self.owned_locks(level) assert not (check_nodes - owned), \ ("Not owning the correct locks, owning %r, expected at least %r" % (owned, check_nodes)) r_shut = ShutdownInstanceDisks(self, self.instance) if not r_shut: raise errors.OpExecError("Cannot shutdown instance disks, unable to" " proceed with disk template conversion") mode = (self.instance.disk_template, self.op.disk_template) try: self._DISK_CONVERSIONS[mode](self, feedback_fn) except: self.cfg.ReleaseDRBDMinors(self.instance.uuid) raise result.append(("disk_template", self.op.disk_template)) assert self.instance.disk_template == self.op.disk_template, \ ("Expected disk template '%s', found '%s'" % (self.op.disk_template, self.instance.disk_template)) # Release node and resource locks if there are any (they might already have # been released during disk conversion) ReleaseLocks(self, locking.LEVEL_NODE) ReleaseLocks(self, locking.LEVEL_NODE_RES) # Apply NIC changes if self._new_nics is not None: self.instance.nics = self._new_nics result.extend(self._nic_chgdesc) # hvparams changes if self.op.hvparams: self.instance.hvparams = self.hv_inst for key, val in self.op.hvparams.iteritems(): result.append(("hv/%s" % key, val)) # beparams changes if self.op.beparams: self.instance.beparams = self.be_inst for key, val in self.op.beparams.iteritems(): result.append(("be/%s" % key, val)) # OS change if self.op.os_name: self.instance.os = self.op.os_name # osparams changes if self.op.osparams: self.instance.osparams = self.os_inst for key, val in self.op.osparams.iteritems(): result.append(("os/%s" % key, val)) if self.op.offline is None: # Ignore pass elif self.op.offline: # Mark instance as offline self.cfg.MarkInstanceOffline(self.instance.uuid) result.append(("admin_state", constants.ADMINST_OFFLINE)) else: # Mark instance as online, but stopped self.cfg.MarkInstanceDown(self.instance.uuid) result.append(("admin_state", constants.ADMINST_DOWN)) self.cfg.Update(self.instance, feedback_fn, self.proc.GetECId()) assert not (self.owned_locks(locking.LEVEL_NODE_RES) or self.owned_locks(locking.LEVEL_NODE)), \ "All node locks should have been released by now" return result _DISK_CONVERSIONS = { (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd, (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain, } class LUInstanceChangeGroup(LogicalUnit): HPATH = "instance-change-group" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False def ExpandNames(self): self.share_locks = ShareAll() self.needed_locks = { locking.LEVEL_NODEGROUP: [], locking.LEVEL_NODE: [], locking.LEVEL_NODE_ALLOC: locking.ALL_SET, } self._ExpandAndLockInstance() if self.op.target_groups: self.req_target_uuids = map(self.cfg.LookupNodeGroup, self.op.target_groups) else: self.req_target_uuids = None self.op.iallocator = GetDefaultIAllocator(self.cfg, self.op.iallocator) def DeclareLocks(self, level): if level == locking.LEVEL_NODEGROUP: assert not self.needed_locks[locking.LEVEL_NODEGROUP] if self.req_target_uuids: lock_groups = set(self.req_target_uuids) # Lock all groups used by instance optimistically; this requires going # via the node before it's locked, requiring verification later on instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_uuid) lock_groups.update(instance_groups) else: # No target groups, need to lock all of them lock_groups = locking.ALL_SET self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups elif level == locking.LEVEL_NODE: if self.req_target_uuids: # Lock all nodes used by instances self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND self._LockInstancesNodes() # Lock all nodes in all potential target groups lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) - self.cfg.GetInstanceNodeGroups(self.op.instance_uuid)) member_nodes = [node_uuid for group in lock_groups for node_uuid in self.cfg.GetNodeGroup(group).members] self.needed_locks[locking.LEVEL_NODE].extend(member_nodes) else: # Lock all nodes as all groups are potential targets self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET def CheckPrereq(self): owned_instance_names = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE)) assert (self.req_target_uuids is None or owned_groups.issuperset(self.req_target_uuids)) assert owned_instance_names == set([self.op.instance_name]) # Get instance information self.instance = self.cfg.GetInstanceInfo(self.op.instance_uuid) # Check if node groups for locked instance are still correct assert owned_nodes.issuperset(self.instance.all_nodes), \ ("Instance %s's nodes changed while we kept the lock" % self.op.instance_name) inst_groups = CheckInstanceNodeGroups(self.cfg, self.op.instance_uuid, owned_groups) if self.req_target_uuids: # User requested specific target groups self.target_uuids = frozenset(self.req_target_uuids) else: # All groups except those used by the instance are potential targets self.target_uuids = owned_groups - inst_groups conflicting_groups = self.target_uuids & inst_groups if conflicting_groups: raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are" " used by the instance '%s'" % (utils.CommaJoin(conflicting_groups), self.op.instance_name), errors.ECODE_INVAL) if not self.target_uuids: raise errors.OpPrereqError("There are no possible target groups", errors.ECODE_INVAL) def BuildHooksEnv(self): """Build hooks env. """ assert self.target_uuids env = { "TARGET_GROUPS": " ".join(self.target_uuids), } env.update(BuildInstanceHookEnvByObject(self, self.instance)) return env def BuildHooksNodes(self): """Build hooks nodes. """ mn = self.cfg.GetMasterNode() return ([mn], [mn]) def Exec(self, feedback_fn): instances = list(self.owned_locks(locking.LEVEL_INSTANCE)) assert instances == [self.op.instance_name], "Instance not locked" req = iallocator.IAReqGroupChange(instances=instances, target_groups=list(self.target_uuids)) ial = iallocator.IAllocator(self.cfg, self.rpc, req) ial.Run(self.op.iallocator) if not ial.success: raise errors.OpPrereqError("Can't compute solution for changing group of" " instance '%s' using iallocator '%s': %s" % (self.op.instance_name, self.op.iallocator, ial.info), errors.ECODE_NORES) jobs = LoadNodeEvacResult(self, ial.result, self.op.early_release, False) self.LogInfo("Iallocator returned %s job(s) for changing group of" " instance '%s'", len(jobs), self.op.instance_name) return ResultWithJobs(jobs) ganeti-2.9.3/lib/cmdlib/tags.py0000644000000000000000000001455112271422343016310 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Logical units dealing with tags.""" import re from ganeti import constants from ganeti import errors from ganeti import locking from ganeti import objects from ganeti import utils from ganeti.cmdlib.base import NoHooksLU from ganeti.cmdlib.common import ExpandNodeUuidAndName, \ ExpandInstanceUuidAndName, ShareAll class TagsLU(NoHooksLU): # pylint: disable=W0223 """Generic tags LU. This is an abstract class which is the parent of all the other tags LUs. """ def ExpandNames(self): self.group_uuid = None self.needed_locks = {} if self.op.kind == constants.TAG_NODE: (self.node_uuid, _) = \ ExpandNodeUuidAndName(self.cfg, None, self.op.name) lock_level = locking.LEVEL_NODE lock_name = self.node_uuid elif self.op.kind == constants.TAG_INSTANCE: (self.inst_uuid, inst_name) = \ ExpandInstanceUuidAndName(self.cfg, None, self.op.name) lock_level = locking.LEVEL_INSTANCE lock_name = inst_name elif self.op.kind == constants.TAG_NODEGROUP: self.group_uuid = self.cfg.LookupNodeGroup(self.op.name) lock_level = locking.LEVEL_NODEGROUP lock_name = self.group_uuid elif self.op.kind == constants.TAG_NETWORK: self.network_uuid = self.cfg.LookupNetwork(self.op.name) lock_level = locking.LEVEL_NETWORK lock_name = self.network_uuid else: lock_level = None lock_name = None if lock_level and getattr(self.op, "use_locking", True): self.needed_locks[lock_level] = lock_name # FIXME: Acquire BGL for cluster tag operations (as of this writing it's # not possible to acquire the BGL based on opcode parameters) def CheckPrereq(self): """Check prerequisites. """ if self.op.kind == constants.TAG_CLUSTER: self.target = self.cfg.GetClusterInfo() elif self.op.kind == constants.TAG_NODE: self.target = self.cfg.GetNodeInfo(self.node_uuid) elif self.op.kind == constants.TAG_INSTANCE: self.target = self.cfg.GetInstanceInfo(self.inst_uuid) elif self.op.kind == constants.TAG_NODEGROUP: self.target = self.cfg.GetNodeGroup(self.group_uuid) elif self.op.kind == constants.TAG_NETWORK: self.target = self.cfg.GetNetwork(self.network_uuid) else: raise errors.OpPrereqError("Wrong tag type requested (%s)" % str(self.op.kind), errors.ECODE_INVAL) class LUTagsGet(TagsLU): """Returns the tags of a given object. """ REQ_BGL = False def ExpandNames(self): TagsLU.ExpandNames(self) # Share locks as this is only a read operation self.share_locks = ShareAll() def Exec(self, feedback_fn): """Returns the tag list. """ return list(self.target.GetTags()) class LUTagsSearch(NoHooksLU): """Searches the tags for a given pattern. """ REQ_BGL = False def ExpandNames(self): self.needed_locks = {} def CheckPrereq(self): """Check prerequisites. This checks the pattern passed for validity by compiling it. """ try: self.re = re.compile(self.op.pattern) except re.error, err: raise errors.OpPrereqError("Invalid search pattern '%s': %s" % (self.op.pattern, err), errors.ECODE_INVAL) @staticmethod def _ExtendTagTargets(targets, object_type_name, object_info_dict): return targets.extend(("/%s/%s" % (object_type_name, o.name), o) for o in object_info_dict.values()) def Exec(self, feedback_fn): """Returns the tag list. """ tgts = [("/cluster", self.cfg.GetClusterInfo())] LUTagsSearch._ExtendTagTargets(tgts, "instances", self.cfg.GetAllInstancesInfo()) LUTagsSearch._ExtendTagTargets(tgts, "nodes", self.cfg.GetAllNodesInfo()) LUTagsSearch._ExtendTagTargets(tgts, "nodegroup", self.cfg.GetAllNodeGroupsInfo()) LUTagsSearch._ExtendTagTargets(tgts, "network", self.cfg.GetAllNetworksInfo()) results = [] for path, target in tgts: for tag in target.GetTags(): if self.re.search(tag): results.append((path, tag)) return results class LUTagsSet(TagsLU): """Sets a tag on a given object. """ REQ_BGL = False def CheckPrereq(self): """Check prerequisites. This checks the type and length of the tag name and value. """ TagsLU.CheckPrereq(self) for tag in self.op.tags: objects.TaggableObject.ValidateTag(tag) def Exec(self, feedback_fn): """Sets the tag. """ try: for tag in self.op.tags: self.target.AddTag(tag) except errors.TagError, err: raise errors.OpExecError("Error while setting tag: %s" % str(err)) self.cfg.Update(self.target, feedback_fn) class LUTagsDel(TagsLU): """Delete a list of tags from a given object. """ REQ_BGL = False def CheckPrereq(self): """Check prerequisites. This checks that we have the given tag. """ TagsLU.CheckPrereq(self) for tag in self.op.tags: objects.TaggableObject.ValidateTag(tag) del_tags = frozenset(self.op.tags) cur_tags = self.target.GetTags() diff_tags = del_tags - cur_tags if diff_tags: diff_names = ("'%s'" % i for i in sorted(diff_tags)) raise errors.OpPrereqError("Tag(s) %s not found" % (utils.CommaJoin(diff_names), ), errors.ECODE_NOENT) def Exec(self, feedback_fn): """Remove the tag from the object. """ for tag in self.op.tags: self.target.RemoveTag(tag) self.cfg.Update(self.target, feedback_fn) ganeti-2.9.3/lib/cmdlib/common.py0000644000000000000000000011746712271422343016654 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Common functions used by multiple logical units.""" import copy import os from ganeti import compat from ganeti import constants from ganeti import errors from ganeti import hypervisor from ganeti import locking from ganeti import objects from ganeti import opcodes from ganeti import pathutils from ganeti import rpc from ganeti import ssconf from ganeti import utils # States of instance INSTANCE_DOWN = [constants.ADMINST_DOWN] INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP] INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE] #: Instance status in which an instance can be marked as offline/online CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([ constants.ADMINST_OFFLINE, ])) def _ExpandItemName(expand_fn, name, kind): """Expand an item name. @param expand_fn: the function to use for expansion @param name: requested item name @param kind: text description ('Node' or 'Instance') @return: the result of the expand_fn, if successful @raise errors.OpPrereqError: if the item is not found """ (uuid, full_name) = expand_fn(name) if uuid is None or full_name is None: raise errors.OpPrereqError("%s '%s' not known" % (kind, name), errors.ECODE_NOENT) return (uuid, full_name) def ExpandInstanceUuidAndName(cfg, expected_uuid, name): """Wrapper over L{_ExpandItemName} for instance.""" (uuid, full_name) = _ExpandItemName(cfg.ExpandInstanceName, name, "Instance") if expected_uuid is not None and uuid != expected_uuid: raise errors.OpPrereqError( "The instances UUID '%s' does not match the expected UUID '%s' for" " instance '%s'. Maybe the instance changed since you submitted this" " job." % (uuid, expected_uuid, full_name), errors.ECODE_NOTUNIQUE) return (uuid, full_name) def ExpandNodeUuidAndName(cfg, expected_uuid, name): """Expand a short node name into the node UUID and full name. @type cfg: L{config.ConfigWriter} @param cfg: The cluster configuration @type expected_uuid: string @param expected_uuid: expected UUID for the node (or None if there is no expectation). If it does not match, a L{errors.OpPrereqError} is raised. @type name: string @param name: the short node name """ (uuid, full_name) = _ExpandItemName(cfg.ExpandNodeName, name, "Node") if expected_uuid is not None and uuid != expected_uuid: raise errors.OpPrereqError( "The nodes UUID '%s' does not match the expected UUID '%s' for node" " '%s'. Maybe the node changed since you submitted this job." % (uuid, expected_uuid, full_name), errors.ECODE_NOTUNIQUE) return (uuid, full_name) def ShareAll(): """Returns a dict declaring all lock levels shared. """ return dict.fromkeys(locking.LEVELS, 1) def CheckNodeGroupInstances(cfg, group_uuid, owned_instance_names): """Checks if the instances in a node group are still correct. @type cfg: L{config.ConfigWriter} @param cfg: The cluster configuration @type group_uuid: string @param group_uuid: Node group UUID @type owned_instance_names: set or frozenset @param owned_instance_names: List of currently owned instances """ wanted_instances = frozenset(cfg.GetInstanceNames( cfg.GetNodeGroupInstances(group_uuid))) if owned_instance_names != wanted_instances: raise errors.OpPrereqError("Instances in node group '%s' changed since" " locks were acquired, wanted '%s', have '%s';" " retry the operation" % (group_uuid, utils.CommaJoin(wanted_instances), utils.CommaJoin(owned_instance_names)), errors.ECODE_STATE) return wanted_instances def GetWantedNodes(lu, short_node_names): """Returns list of checked and expanded node names. @type lu: L{LogicalUnit} @param lu: the logical unit on whose behalf we execute @type short_node_names: list @param short_node_names: list of node names or None for all nodes @rtype: tuple of lists @return: tupe with (list of node UUIDs, list of node names) @raise errors.ProgrammerError: if the nodes parameter is wrong type """ if short_node_names: node_uuids = [ExpandNodeUuidAndName(lu.cfg, None, name)[0] for name in short_node_names] else: node_uuids = lu.cfg.GetNodeList() return (node_uuids, [lu.cfg.GetNodeName(uuid) for uuid in node_uuids]) def GetWantedInstances(lu, short_inst_names): """Returns list of checked and expanded instance names. @type lu: L{LogicalUnit} @param lu: the logical unit on whose behalf we execute @type short_inst_names: list @param short_inst_names: list of instance names or None for all instances @rtype: tuple of lists @return: tuple of (instance UUIDs, instance names) @raise errors.OpPrereqError: if the instances parameter is wrong type @raise errors.OpPrereqError: if any of the passed instances is not found """ if short_inst_names: inst_uuids = [ExpandInstanceUuidAndName(lu.cfg, None, name)[0] for name in short_inst_names] else: inst_uuids = lu.cfg.GetInstanceList() return (inst_uuids, [lu.cfg.GetInstanceName(uuid) for uuid in inst_uuids]) def RunPostHook(lu, node_name): """Runs the post-hook for an opcode on a single node. """ hm = lu.proc.BuildHooksManager(lu) try: hm.RunPhase(constants.HOOKS_PHASE_POST, node_names=[node_name]) except Exception, err: # pylint: disable=W0703 lu.LogWarning("Errors occurred running hooks on %s: %s", node_name, err) def RedistributeAncillaryFiles(lu): """Distribute additional files which are part of the cluster configuration. ConfigWriter takes care of distributing the config and ssconf files, but there are more files which should be distributed to all nodes. This function makes sure those are copied. """ # Gather target nodes cluster = lu.cfg.GetClusterInfo() master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode()) online_node_uuids = lu.cfg.GetOnlineNodeList() online_node_uuid_set = frozenset(online_node_uuids) vm_node_uuids = list(online_node_uuid_set.intersection( lu.cfg.GetVmCapableNodeList())) # Never distribute to master node for node_uuids in [online_node_uuids, vm_node_uuids]: if master_info.uuid in node_uuids: node_uuids.remove(master_info.uuid) # Gather file lists (files_all, _, files_mc, files_vm) = \ ComputeAncillaryFiles(cluster, True) # Never re-distribute configuration file from here assert not (pathutils.CLUSTER_CONF_FILE in files_all or pathutils.CLUSTER_CONF_FILE in files_vm) assert not files_mc, "Master candidates not handled in this function" filemap = [ (online_node_uuids, files_all), (vm_node_uuids, files_vm), ] # Upload the files for (node_uuids, files) in filemap: for fname in files: UploadHelper(lu, node_uuids, fname) def ComputeAncillaryFiles(cluster, redist): """Compute files external to Ganeti which need to be consistent. @type redist: boolean @param redist: Whether to include files which need to be redistributed """ # Compute files for all nodes files_all = set([ pathutils.SSH_KNOWN_HOSTS_FILE, pathutils.CONFD_HMAC_KEY, pathutils.CLUSTER_DOMAIN_SECRET_FILE, pathutils.SPICE_CERT_FILE, pathutils.SPICE_CACERT_FILE, pathutils.RAPI_USERS_FILE, ]) if redist: # we need to ship at least the RAPI certificate files_all.add(pathutils.RAPI_CERT_FILE) else: files_all.update(pathutils.ALL_CERT_FILES) files_all.update(ssconf.SimpleStore().GetFileList()) if cluster.modify_etc_hosts: files_all.add(pathutils.ETC_HOSTS) if cluster.use_external_mip_script: files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT) # Files which are optional, these must: # - be present in one other category as well # - either exist or not exist on all nodes of that category (mc, vm all) files_opt = set([ pathutils.RAPI_USERS_FILE, ]) # Files which should only be on master candidates files_mc = set() if not redist: files_mc.add(pathutils.CLUSTER_CONF_FILE) # File storage if (not redist and (cluster.IsFileStorageEnabled() or cluster.IsSharedFileStorageEnabled())): files_all.add(pathutils.FILE_STORAGE_PATHS_FILE) files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE) # Files which should only be on VM-capable nodes files_vm = set( filename for hv_name in cluster.enabled_hypervisors for filename in hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0]) files_opt |= set( filename for hv_name in cluster.enabled_hypervisors for filename in hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1]) # Filenames in each category must be unique all_files_set = files_all | files_mc | files_vm assert (len(all_files_set) == sum(map(len, [files_all, files_mc, files_vm]))), \ "Found file listed in more than one file list" # Optional files must be present in one other category assert all_files_set.issuperset(files_opt), \ "Optional file not in a different required list" # This one file should never ever be re-distributed via RPC assert not (redist and pathutils.FILE_STORAGE_PATHS_FILE in all_files_set) return (files_all, files_opt, files_mc, files_vm) def UploadHelper(lu, node_uuids, fname): """Helper for uploading a file and showing warnings. """ if os.path.exists(fname): result = lu.rpc.call_upload_file(node_uuids, fname) for to_node_uuids, to_result in result.items(): msg = to_result.fail_msg if msg: msg = ("Copy of file %s to node %s failed: %s" % (fname, lu.cfg.GetNodeName(to_node_uuids), msg)) lu.LogWarning(msg) def MergeAndVerifyHvState(op_input, obj_input): """Combines the hv state from an opcode with the one of the object @param op_input: The input dict from the opcode @param obj_input: The input dict from the objects @return: The verified and updated dict """ if op_input: invalid_hvs = set(op_input) - constants.HYPER_TYPES if invalid_hvs: raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:" " %s" % utils.CommaJoin(invalid_hvs), errors.ECODE_INVAL) if obj_input is None: obj_input = {} type_check = constants.HVSTS_PARAMETER_TYPES return _UpdateAndVerifySubDict(obj_input, op_input, type_check) return None def MergeAndVerifyDiskState(op_input, obj_input): """Combines the disk state from an opcode with the one of the object @param op_input: The input dict from the opcode @param obj_input: The input dict from the objects @return: The verified and updated dict """ if op_input: invalid_dst = set(op_input) - constants.DS_VALID_TYPES if invalid_dst: raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" % utils.CommaJoin(invalid_dst), errors.ECODE_INVAL) type_check = constants.DSS_PARAMETER_TYPES if obj_input is None: obj_input = {} return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value, type_check)) for key, value in op_input.items()) return None def CheckOSParams(lu, required, node_uuids, osname, osparams): """OS parameters validation. @type lu: L{LogicalUnit} @param lu: the logical unit for which we check @type required: boolean @param required: whether the validation should fail if the OS is not found @type node_uuids: list @param node_uuids: the list of nodes on which we should check @type osname: string @param osname: the name of the hypervisor we should use @type osparams: dict @param osparams: the parameters which we need to check @raise errors.OpPrereqError: if the parameters are not valid """ node_uuids = _FilterVmNodes(lu, node_uuids) result = lu.rpc.call_os_validate(node_uuids, required, osname, [constants.OS_VALIDATE_PARAMETERS], osparams) for node_uuid, nres in result.items(): # we don't check for offline cases since this should be run only # against the master node and/or an instance's nodes nres.Raise("OS Parameters validation failed on node %s" % lu.cfg.GetNodeName(node_uuid)) if not nres.payload: lu.LogInfo("OS %s not found on node %s, validation skipped", osname, lu.cfg.GetNodeName(node_uuid)) def CheckHVParams(lu, node_uuids, hvname, hvparams): """Hypervisor parameter validation. This function abstract the hypervisor parameter validation to be used in both instance create and instance modify. @type lu: L{LogicalUnit} @param lu: the logical unit for which we check @type node_uuids: list @param node_uuids: the list of nodes on which we should check @type hvname: string @param hvname: the name of the hypervisor we should use @type hvparams: dict @param hvparams: the parameters which we need to check @raise errors.OpPrereqError: if the parameters are not valid """ node_uuids = _FilterVmNodes(lu, node_uuids) cluster = lu.cfg.GetClusterInfo() hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams) hvinfo = lu.rpc.call_hypervisor_validate_params(node_uuids, hvname, hvfull) for node_uuid in node_uuids: info = hvinfo[node_uuid] if info.offline: continue info.Raise("Hypervisor parameter validation failed on node %s" % lu.cfg.GetNodeName(node_uuid)) def AdjustCandidatePool(lu, exceptions): """Adjust the candidate pool after node operations. """ mod_list = lu.cfg.MaintainCandidatePool(exceptions) if mod_list: lu.LogInfo("Promoted nodes to master candidate role: %s", utils.CommaJoin(node.name for node in mod_list)) for node in mod_list: lu.context.ReaddNode(node) mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions) if mc_now > mc_max: lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" % (mc_now, mc_max)) def CheckNodePVs(nresult, exclusive_storage): """Check node PVs. """ pvlist_dict = nresult.get(constants.NV_PVLIST, None) if pvlist_dict is None: return (["Can't get PV list from node"], None) pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict) errlist = [] # check that ':' is not present in PV names, since it's a # special character for lvcreate (denotes the range of PEs to # use on the PV) for pv in pvlist: if ":" in pv.name: errlist.append("Invalid character ':' in PV '%s' of VG '%s'" % (pv.name, pv.vg_name)) es_pvinfo = None if exclusive_storage: (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist) errlist.extend(errmsgs) shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None) if shared_pvs: for (pvname, lvlist) in shared_pvs: # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...) errlist.append("PV %s is shared among unrelated LVs (%s)" % (pvname, utils.CommaJoin(lvlist))) return (errlist, es_pvinfo) def _ComputeMinMaxSpec(name, qualifier, ispecs, value): """Computes if value is in the desired range. @param name: name of the parameter for which we perform the check @param qualifier: a qualifier used in the error message (e.g. 'disk/1', not just 'disk') @param ispecs: dictionary containing min and max values @param value: actual value that we want to use @return: None or an error string """ if value in [None, constants.VALUE_AUTO]: return None max_v = ispecs[constants.ISPECS_MAX].get(name, value) min_v = ispecs[constants.ISPECS_MIN].get(name, value) if value > max_v or min_v > value: if qualifier: fqn = "%s/%s" % (name, qualifier) else: fqn = name return ("%s value %s is not in range [%s, %s]" % (fqn, value, min_v, max_v)) return None def ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count, nic_count, disk_sizes, spindle_use, disk_template, _compute_fn=_ComputeMinMaxSpec): """Verifies ipolicy against provided specs. @type ipolicy: dict @param ipolicy: The ipolicy @type mem_size: int @param mem_size: The memory size @type cpu_count: int @param cpu_count: Used cpu cores @type disk_count: int @param disk_count: Number of disks used @type nic_count: int @param nic_count: Number of nics used @type disk_sizes: list of ints @param disk_sizes: Disk sizes of used disk (len must match C{disk_count}) @type spindle_use: int @param spindle_use: The number of spindles this instance uses @type disk_template: string @param disk_template: The disk template of the instance @param _compute_fn: The compute function (unittest only) @return: A list of violations, or an empty list of no violations are found """ assert disk_count == len(disk_sizes) test_settings = [ (constants.ISPEC_MEM_SIZE, "", mem_size), (constants.ISPEC_CPU_COUNT, "", cpu_count), (constants.ISPEC_NIC_COUNT, "", nic_count), (constants.ISPEC_SPINDLE_USE, "", spindle_use), ] + [(constants.ISPEC_DISK_SIZE, str(idx), d) for idx, d in enumerate(disk_sizes)] if disk_template != constants.DT_DISKLESS: # This check doesn't make sense for diskless instances test_settings.append((constants.ISPEC_DISK_COUNT, "", disk_count)) ret = [] allowed_dts = ipolicy[constants.IPOLICY_DTS] if disk_template not in allowed_dts: ret.append("Disk template %s is not allowed (allowed templates: %s)" % (disk_template, utils.CommaJoin(allowed_dts))) min_errs = None for minmax in ipolicy[constants.ISPECS_MINMAX]: errs = filter(None, (_compute_fn(name, qualifier, minmax, value) for (name, qualifier, value) in test_settings)) if min_errs is None or len(errs) < len(min_errs): min_errs = errs assert min_errs is not None return ret + min_errs def ComputeIPolicyInstanceViolation(ipolicy, instance, cfg, _compute_fn=ComputeIPolicySpecViolation): """Compute if instance meets the specs of ipolicy. @type ipolicy: dict @param ipolicy: The ipolicy to verify against @type instance: L{objects.Instance} @param instance: The instance to verify @type cfg: L{config.ConfigWriter} @param cfg: Cluster configuration @param _compute_fn: The function to verify ipolicy (unittest only) @see: L{ComputeIPolicySpecViolation} """ ret = [] be_full = cfg.GetClusterInfo().FillBE(instance) mem_size = be_full[constants.BE_MAXMEM] cpu_count = be_full[constants.BE_VCPUS] es_flags = rpc.GetExclusiveStorageForNodes(cfg, instance.all_nodes) if any(es_flags.values()): # With exclusive storage use the actual spindles try: spindle_use = sum([disk.spindles for disk in instance.disks]) except TypeError: ret.append("Number of spindles not configured for disks of instance %s" " while exclusive storage is enabled, try running gnt-cluster" " repair-disk-sizes" % instance.name) # _ComputeMinMaxSpec ignores 'None's spindle_use = None else: spindle_use = be_full[constants.BE_SPINDLE_USE] disk_count = len(instance.disks) disk_sizes = [disk.size for disk in instance.disks] nic_count = len(instance.nics) disk_template = instance.disk_template return ret + _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count, disk_sizes, spindle_use, disk_template) def _ComputeViolatingInstances(ipolicy, instances, cfg): """Computes a set of instances who violates given ipolicy. @param ipolicy: The ipolicy to verify @type instances: L{objects.Instance} @param instances: List of instances to verify @type cfg: L{config.ConfigWriter} @param cfg: Cluster configuration @return: A frozenset of instance names violating the ipolicy """ return frozenset([inst.name for inst in instances if ComputeIPolicyInstanceViolation(ipolicy, inst, cfg)]) def ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances, cfg): """Computes a set of any instances that would violate the new ipolicy. @param old_ipolicy: The current (still in-place) ipolicy @param new_ipolicy: The new (to become) ipolicy @param instances: List of instances to verify @type cfg: L{config.ConfigWriter} @param cfg: Cluster configuration @return: A list of instances which violates the new ipolicy but did not before """ return (_ComputeViolatingInstances(new_ipolicy, instances, cfg) - _ComputeViolatingInstances(old_ipolicy, instances, cfg)) def GetUpdatedParams(old_params, update_dict, use_default=True, use_none=False): """Return the new version of a parameter dictionary. @type old_params: dict @param old_params: old parameters @type update_dict: dict @param update_dict: dict containing new parameter values, or constants.VALUE_DEFAULT to reset the parameter to its default value @param use_default: boolean @type use_default: whether to recognise L{constants.VALUE_DEFAULT} values as 'to be deleted' values @param use_none: boolean @type use_none: whether to recognise C{None} values as 'to be deleted' values @rtype: dict @return: the new parameter dictionary """ params_copy = copy.deepcopy(old_params) for key, val in update_dict.iteritems(): if ((use_default and val == constants.VALUE_DEFAULT) or (use_none and val is None)): try: del params_copy[key] except KeyError: pass else: params_copy[key] = val return params_copy def GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False): """Return the new version of an instance policy. @param group_policy: whether this policy applies to a group and thus we should support removal of policy entries """ ipolicy = copy.deepcopy(old_ipolicy) for key, value in new_ipolicy.items(): if key not in constants.IPOLICY_ALL_KEYS: raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key, errors.ECODE_INVAL) if (not value or value == [constants.VALUE_DEFAULT] or value == constants.VALUE_DEFAULT): if group_policy: if key in ipolicy: del ipolicy[key] else: raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'" " on the cluster'" % key, errors.ECODE_INVAL) else: if key in constants.IPOLICY_PARAMETERS: # FIXME: we assume all such values are float try: ipolicy[key] = float(value) except (TypeError, ValueError), err: raise errors.OpPrereqError("Invalid value for attribute" " '%s': '%s', error: %s" % (key, value, err), errors.ECODE_INVAL) elif key == constants.ISPECS_MINMAX: for minmax in value: for k in minmax.keys(): utils.ForceDictType(minmax[k], constants.ISPECS_PARAMETER_TYPES) ipolicy[key] = value elif key == constants.ISPECS_STD: if group_policy: msg = "%s cannot appear in group instance specs" % key raise errors.OpPrereqError(msg, errors.ECODE_INVAL) ipolicy[key] = GetUpdatedParams(old_ipolicy.get(key, {}), value, use_none=False, use_default=False) utils.ForceDictType(ipolicy[key], constants.ISPECS_PARAMETER_TYPES) else: # FIXME: we assume all others are lists; this should be redone # in a nicer way ipolicy[key] = list(value) try: objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy) except errors.ConfigurationError, err: raise errors.OpPrereqError("Invalid instance policy: %s" % err, errors.ECODE_INVAL) return ipolicy def AnnotateDiskParams(instance, devs, cfg): """Little helper wrapper to the rpc annotation method. @param instance: The instance object @type devs: List of L{objects.Disk} @param devs: The root devices (not any of its children!) @param cfg: The config object @returns The annotated disk copies @see L{rpc.AnnotateDiskParams} """ return rpc.AnnotateDiskParams(instance.disk_template, devs, cfg.GetInstanceDiskParams(instance)) def SupportsOob(cfg, node): """Tells if node supports OOB. @type cfg: L{config.ConfigWriter} @param cfg: The cluster configuration @type node: L{objects.Node} @param node: The node @return: The OOB script if supported or an empty string otherwise """ return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM] def _UpdateAndVerifySubDict(base, updates, type_check): """Updates and verifies a dict with sub dicts of the same type. @param base: The dict with the old data @param updates: The dict with the new data @param type_check: Dict suitable to ForceDictType to verify correct types @returns: A new dict with updated and verified values """ def fn(old, value): new = GetUpdatedParams(old, value) utils.ForceDictType(new, type_check) return new ret = copy.deepcopy(base) ret.update(dict((key, fn(base.get(key, {}), value)) for key, value in updates.items())) return ret def _FilterVmNodes(lu, node_uuids): """Filters out non-vm_capable nodes from a list. @type lu: L{LogicalUnit} @param lu: the logical unit for which we check @type node_uuids: list @param node_uuids: the list of nodes on which we should check @rtype: list @return: the list of vm-capable nodes """ vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList()) return [uuid for uuid in node_uuids if uuid not in vm_nodes] def GetDefaultIAllocator(cfg, ialloc): """Decides on which iallocator to use. @type cfg: L{config.ConfigWriter} @param cfg: Cluster configuration object @type ialloc: string or None @param ialloc: Iallocator specified in opcode @rtype: string @return: Iallocator name """ if not ialloc: # Use default iallocator ialloc = cfg.GetDefaultIAllocator() if not ialloc: raise errors.OpPrereqError("No iallocator was specified, neither in the" " opcode nor as a cluster-wide default", errors.ECODE_INVAL) return ialloc def CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_node_uuids, cur_group_uuid): """Checks if node groups for locked instances are still correct. @type cfg: L{config.ConfigWriter} @param cfg: Cluster configuration @type instances: dict; string as key, L{objects.Instance} as value @param instances: Dictionary, instance UUID as key, instance object as value @type owned_groups: iterable of string @param owned_groups: List of owned groups @type owned_node_uuids: iterable of string @param owned_node_uuids: List of owned nodes @type cur_group_uuid: string or None @param cur_group_uuid: Optional group UUID to check against instance's groups """ for (uuid, inst) in instances.items(): assert owned_node_uuids.issuperset(inst.all_nodes), \ "Instance %s's nodes changed while we kept the lock" % inst.name inst_groups = CheckInstanceNodeGroups(cfg, uuid, owned_groups) assert cur_group_uuid is None or cur_group_uuid in inst_groups, \ "Instance %s has no node in group %s" % (inst.name, cur_group_uuid) def CheckInstanceNodeGroups(cfg, inst_uuid, owned_groups, primary_only=False): """Checks if the owned node groups are still correct for an instance. @type cfg: L{config.ConfigWriter} @param cfg: The cluster configuration @type inst_uuid: string @param inst_uuid: Instance UUID @type owned_groups: set or frozenset @param owned_groups: List of currently owned node groups @type primary_only: boolean @param primary_only: Whether to check node groups for only the primary node """ inst_groups = cfg.GetInstanceNodeGroups(inst_uuid, primary_only) if not owned_groups.issuperset(inst_groups): raise errors.OpPrereqError("Instance %s's node groups changed since" " locks were acquired, current groups are" " are '%s', owning groups '%s'; retry the" " operation" % (cfg.GetInstanceName(inst_uuid), utils.CommaJoin(inst_groups), utils.CommaJoin(owned_groups)), errors.ECODE_STATE) return inst_groups def LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes): """Unpacks the result of change-group and node-evacuate iallocator requests. Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and L{constants.IALLOCATOR_MODE_CHG_GROUP}. @type lu: L{LogicalUnit} @param lu: Logical unit instance @type alloc_result: tuple/list @param alloc_result: Result from iallocator @type early_release: bool @param early_release: Whether to release locks early if possible @type use_nodes: bool @param use_nodes: Whether to display node names instead of groups """ (moved, failed, jobs) = alloc_result if failed: failreason = utils.CommaJoin("%s (%s)" % (name, reason) for (name, reason) in failed) lu.LogWarning("Unable to evacuate instances %s", failreason) raise errors.OpExecError("Unable to evacuate instances %s" % failreason) if moved: lu.LogInfo("Instances to be moved: %s", utils.CommaJoin( "%s (to %s)" % (name, _NodeEvacDest(use_nodes, group, node_names)) for (name, group, node_names) in moved)) return [map(compat.partial(_SetOpEarlyRelease, early_release), map(opcodes.OpCode.LoadOpCode, ops)) for ops in jobs] def _NodeEvacDest(use_nodes, group, node_names): """Returns group or nodes depending on caller's choice. """ if use_nodes: return utils.CommaJoin(node_names) else: return group def _SetOpEarlyRelease(early_release, op): """Sets C{early_release} flag on opcodes if available. """ try: op.early_release = early_release except AttributeError: assert not isinstance(op, opcodes.OpInstanceReplaceDisks) return op def MapInstanceLvsToNodes(instances): """Creates a map from (node, volume) to instance name. @type instances: list of L{objects.Instance} @rtype: dict; tuple of (node uuid, volume name) as key, L{objects.Instance} object as value """ return dict(((node_uuid, vol), inst) for inst in instances for (node_uuid, vols) in inst.MapLVsByNode().items() for vol in vols) def CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels): """Make sure that none of the given paramters is global. If a global parameter is found, an L{errors.OpPrereqError} exception is raised. This is used to avoid setting global parameters for individual nodes. @type params: dictionary @param params: Parameters to check @type glob_pars: dictionary @param glob_pars: Forbidden parameters @type kind: string @param kind: Kind of parameters (e.g. "node") @type bad_levels: string @param bad_levels: Level(s) at which the parameters are forbidden (e.g. "instance") @type good_levels: strings @param good_levels: Level(s) at which the parameters are allowed (e.g. "cluster or group") """ used_globals = glob_pars.intersection(params) if used_globals: msg = ("The following %s parameters are global and cannot" " be customized at %s level, please modify them at" " %s level: %s" % (kind, bad_levels, good_levels, utils.CommaJoin(used_globals))) raise errors.OpPrereqError(msg, errors.ECODE_INVAL) def IsExclusiveStorageEnabledNode(cfg, node): """Whether exclusive_storage is in effect for the given node. @type cfg: L{config.ConfigWriter} @param cfg: The cluster configuration @type node: L{objects.Node} @param node: The node @rtype: bool @return: The effective value of exclusive_storage """ return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE] def CheckInstanceState(lu, instance, req_states, msg=None): """Ensure that an instance is in one of the required states. @param lu: the LU on behalf of which we make the check @param instance: the instance to check @param msg: if passed, should be a message to replace the default one @raise errors.OpPrereqError: if the instance is not in the required state """ if msg is None: msg = ("can't use instance from outside %s states" % utils.CommaJoin(req_states)) if instance.admin_state not in req_states: raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" % (instance.name, instance.admin_state, msg), errors.ECODE_STATE) if constants.ADMINST_UP not in req_states: pnode_uuid = instance.primary_node if not lu.cfg.GetNodeInfo(pnode_uuid).offline: all_hvparams = lu.cfg.GetClusterInfo().hvparams ins_l = lu.rpc.call_instance_list( [pnode_uuid], [instance.hypervisor], all_hvparams)[pnode_uuid] ins_l.Raise("Can't contact node %s for instance information" % lu.cfg.GetNodeName(pnode_uuid), prereq=True, ecode=errors.ECODE_ENVIRON) if instance.name in ins_l.payload: raise errors.OpPrereqError("Instance %s is running, %s" % (instance.name, msg), errors.ECODE_STATE) else: lu.LogWarning("Primary node offline, ignoring check that instance" " is down") def CheckIAllocatorOrNode(lu, iallocator_slot, node_slot): """Check the sanity of iallocator and node arguments and use the cluster-wide iallocator if appropriate. Check that at most one of (iallocator, node) is specified. If none is specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT}, then the LU's opcode's iallocator slot is filled with the cluster-wide default iallocator. @type iallocator_slot: string @param iallocator_slot: the name of the opcode iallocator slot @type node_slot: string @param node_slot: the name of the opcode target node slot """ node = getattr(lu.op, node_slot, None) ialloc = getattr(lu.op, iallocator_slot, None) if node == []: node = None if node is not None and ialloc is not None: raise errors.OpPrereqError("Do not specify both, iallocator and node", errors.ECODE_INVAL) elif ((node is None and ialloc is None) or ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT): default_iallocator = lu.cfg.GetDefaultIAllocator() if default_iallocator: setattr(lu.op, iallocator_slot, default_iallocator) else: raise errors.OpPrereqError("No iallocator or node given and no" " cluster-wide default iallocator found;" " please specify either an iallocator or a" " node, or set a cluster-wide default" " iallocator", errors.ECODE_INVAL) def FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_uuid, prereq): faulty = [] for dev in instance.disks: cfg.SetDiskID(dev, node_uuid) result = rpc_runner.call_blockdev_getmirrorstatus( node_uuid, (instance.disks, instance)) result.Raise("Failed to get disk status from node %s" % cfg.GetNodeName(node_uuid), prereq=prereq, ecode=errors.ECODE_ENVIRON) for idx, bdev_status in enumerate(result.payload): if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY: faulty.append(idx) return faulty def CheckNodeOnline(lu, node_uuid, msg=None): """Ensure that a given node is online. @param lu: the LU on behalf of which we make the check @param node_uuid: the node to check @param msg: if passed, should be a message to replace the default one @raise errors.OpPrereqError: if the node is offline """ if msg is None: msg = "Can't use offline node" if lu.cfg.GetNodeInfo(node_uuid).offline: raise errors.OpPrereqError("%s: %s" % (msg, lu.cfg.GetNodeName(node_uuid)), errors.ECODE_STATE) def CheckDiskTemplateEnabled(cluster, disk_template): """Helper function to check if a disk template is enabled. @type cluster: C{objects.Cluster} @param cluster: the cluster's configuration @type disk_template: str @param disk_template: the disk template to be checked """ assert disk_template is not None if disk_template not in constants.DISK_TEMPLATES: raise errors.OpPrereqError("'%s' is not a valid disk template." " Valid disk templates are: %s" % (disk_template, ",".join(constants.DISK_TEMPLATES))) if not disk_template in cluster.enabled_disk_templates: raise errors.OpPrereqError("Disk template '%s' is not enabled in cluster." " Enabled disk templates are: %s" % (disk_template, ",".join(cluster.enabled_disk_templates))) def CheckStorageTypeEnabled(cluster, storage_type): """Helper function to check if a storage type is enabled. @type cluster: C{objects.Cluster} @param cluster: the cluster's configuration @type storage_type: str @param storage_type: the storage type to be checked """ assert storage_type is not None assert storage_type in constants.STORAGE_TYPES # special case for lvm-pv, because it cannot be enabled # via disk templates if storage_type == constants.ST_LVM_PV: CheckStorageTypeEnabled(cluster, constants.ST_LVM_VG) else: possible_disk_templates = \ utils.storage.GetDiskTemplatesOfStorageType(storage_type) for disk_template in possible_disk_templates: if disk_template in cluster.enabled_disk_templates: return raise errors.OpPrereqError("No disk template of storage type '%s' is" " enabled in this cluster. Enabled disk" " templates are: %s" % (storage_type, ",".join(cluster.enabled_disk_templates))) def CheckIpolicyVsDiskTemplates(ipolicy, enabled_disk_templates): """Checks ipolicy disk templates against enabled disk tempaltes. @type ipolicy: dict @param ipolicy: the new ipolicy @type enabled_disk_templates: list of string @param enabled_disk_templates: list of enabled disk templates on the cluster @raises errors.OpPrereqError: if there is at least one allowed disk template that is not also enabled. """ assert constants.IPOLICY_DTS in ipolicy allowed_disk_templates = ipolicy[constants.IPOLICY_DTS] not_enabled = set(allowed_disk_templates) - set(enabled_disk_templates) if not_enabled: raise errors.OpPrereqError("The following disk template are allowed" " by the ipolicy, but not enabled on the" " cluster: %s" % utils.CommaJoin(not_enabled)) ganeti-2.9.3/lib/cmdlib/cluster.py0000644000000000000000000035353012271422343017036 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Logical units dealing with the cluster.""" import OpenSSL import copy import itertools import logging import operator import os import re import time from ganeti import compat from ganeti import constants from ganeti import errors from ganeti import hypervisor from ganeti import locking from ganeti import masterd from ganeti import netutils from ganeti import objects from ganeti import opcodes from ganeti import pathutils from ganeti import query from ganeti import rpc from ganeti import runtime from ganeti import ssh from ganeti import uidpool from ganeti import utils from ganeti import vcluster from ganeti.cmdlib.base import NoHooksLU, QueryBase, LogicalUnit, \ ResultWithJobs from ganeti.cmdlib.common import ShareAll, RunPostHook, \ ComputeAncillaryFiles, RedistributeAncillaryFiles, UploadHelper, \ GetWantedInstances, MergeAndVerifyHvState, MergeAndVerifyDiskState, \ GetUpdatedIPolicy, ComputeNewInstanceViolations, GetUpdatedParams, \ CheckOSParams, CheckHVParams, AdjustCandidatePool, CheckNodePVs, \ ComputeIPolicyInstanceViolation, AnnotateDiskParams, SupportsOob, \ CheckIpolicyVsDiskTemplates import ganeti.masterd.instance class LUClusterActivateMasterIp(NoHooksLU): """Activate the master IP on the master node. """ def Exec(self, feedback_fn): """Activate the master IP. """ master_params = self.cfg.GetMasterNetworkParameters() ems = self.cfg.GetUseExternalMipScript() result = self.rpc.call_node_activate_master_ip(master_params.uuid, master_params, ems) result.Raise("Could not activate the master IP") class LUClusterDeactivateMasterIp(NoHooksLU): """Deactivate the master IP on the master node. """ def Exec(self, feedback_fn): """Deactivate the master IP. """ master_params = self.cfg.GetMasterNetworkParameters() ems = self.cfg.GetUseExternalMipScript() result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, master_params, ems) result.Raise("Could not deactivate the master IP") class LUClusterConfigQuery(NoHooksLU): """Return configuration values. """ REQ_BGL = False def CheckArguments(self): self.cq = ClusterQuery(None, self.op.output_fields, False) def ExpandNames(self): self.cq.ExpandNames(self) def DeclareLocks(self, level): self.cq.DeclareLocks(self, level) def Exec(self, feedback_fn): result = self.cq.OldStyleQuery(self) assert len(result) == 1 return result[0] class LUClusterDestroy(LogicalUnit): """Logical unit for destroying the cluster. """ HPATH = "cluster-destroy" HTYPE = constants.HTYPE_CLUSTER def BuildHooksEnv(self): """Build hooks env. """ return { "OP_TARGET": self.cfg.GetClusterName(), } def BuildHooksNodes(self): """Build hooks nodes. """ return ([], []) def CheckPrereq(self): """Check prerequisites. This checks whether the cluster is empty. Any errors are signaled by raising errors.OpPrereqError. """ master = self.cfg.GetMasterNode() nodelist = self.cfg.GetNodeList() if len(nodelist) != 1 or nodelist[0] != master: raise errors.OpPrereqError("There are still %d node(s) in" " this cluster." % (len(nodelist) - 1), errors.ECODE_INVAL) instancelist = self.cfg.GetInstanceList() if instancelist: raise errors.OpPrereqError("There are still %d instance(s) in" " this cluster." % len(instancelist), errors.ECODE_INVAL) def Exec(self, feedback_fn): """Destroys the cluster. """ master_params = self.cfg.GetMasterNetworkParameters() # Run post hooks on master node before it's removed RunPostHook(self, self.cfg.GetNodeName(master_params.uuid)) ems = self.cfg.GetUseExternalMipScript() result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, master_params, ems) result.Warn("Error disabling the master IP address", self.LogWarning) return master_params.uuid class LUClusterPostInit(LogicalUnit): """Logical unit for running hooks after cluster initialization. """ HPATH = "cluster-init" HTYPE = constants.HTYPE_CLUSTER def BuildHooksEnv(self): """Build hooks env. """ return { "OP_TARGET": self.cfg.GetClusterName(), } def BuildHooksNodes(self): """Build hooks nodes. """ return ([], [self.cfg.GetMasterNode()]) def Exec(self, feedback_fn): """Nothing to do. """ return True class ClusterQuery(QueryBase): FIELDS = query.CLUSTER_FIELDS #: Do not sort (there is only one item) SORT_FIELD = None def ExpandNames(self, lu): lu.needed_locks = {} # The following variables interact with _QueryBase._GetNames self.wanted = locking.ALL_SET self.do_locking = self.use_locking if self.do_locking: raise errors.OpPrereqError("Can not use locking for cluster queries", errors.ECODE_INVAL) def DeclareLocks(self, lu, level): pass def _GetQueryData(self, lu): """Computes the list of nodes and their attributes. """ # Locking is not used assert not (compat.any(lu.glm.is_owned(level) for level in locking.LEVELS if level != locking.LEVEL_CLUSTER) or self.do_locking or self.use_locking) if query.CQ_CONFIG in self.requested_data: cluster = lu.cfg.GetClusterInfo() nodes = lu.cfg.GetAllNodesInfo() else: cluster = NotImplemented nodes = NotImplemented if query.CQ_QUEUE_DRAINED in self.requested_data: drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE) else: drain_flag = NotImplemented if query.CQ_WATCHER_PAUSE in self.requested_data: master_node_uuid = lu.cfg.GetMasterNode() result = lu.rpc.call_get_watcher_pause(master_node_uuid) result.Raise("Can't retrieve watcher pause from master node '%s'" % lu.cfg.GetMasterNodeName()) watcher_pause = result.payload else: watcher_pause = NotImplemented return query.ClusterQueryData(cluster, nodes, drain_flag, watcher_pause) class LUClusterQuery(NoHooksLU): """Query cluster configuration. """ REQ_BGL = False def ExpandNames(self): self.needed_locks = {} def Exec(self, feedback_fn): """Return cluster config. """ cluster = self.cfg.GetClusterInfo() os_hvp = {} # Filter just for enabled hypervisors for os_name, hv_dict in cluster.os_hvp.items(): os_hvp[os_name] = {} for hv_name, hv_params in hv_dict.items(): if hv_name in cluster.enabled_hypervisors: os_hvp[os_name][hv_name] = hv_params # Convert ip_family to ip_version primary_ip_version = constants.IP4_VERSION if cluster.primary_ip_family == netutils.IP6Address.family: primary_ip_version = constants.IP6_VERSION result = { "software_version": constants.RELEASE_VERSION, "protocol_version": constants.PROTOCOL_VERSION, "config_version": constants.CONFIG_VERSION, "os_api_version": max(constants.OS_API_VERSIONS), "export_version": constants.EXPORT_VERSION, "vcs_version": constants.VCS_VERSION, "architecture": runtime.GetArchInfo(), "name": cluster.cluster_name, "master": self.cfg.GetMasterNodeName(), "default_hypervisor": cluster.primary_hypervisor, "enabled_hypervisors": cluster.enabled_hypervisors, "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name]) for hypervisor_name in cluster.enabled_hypervisors]), "os_hvp": os_hvp, "beparams": cluster.beparams, "osparams": cluster.osparams, "ipolicy": cluster.ipolicy, "nicparams": cluster.nicparams, "ndparams": cluster.ndparams, "diskparams": cluster.diskparams, "candidate_pool_size": cluster.candidate_pool_size, "master_netdev": cluster.master_netdev, "master_netmask": cluster.master_netmask, "use_external_mip_script": cluster.use_external_mip_script, "volume_group_name": cluster.volume_group_name, "drbd_usermode_helper": cluster.drbd_usermode_helper, "file_storage_dir": cluster.file_storage_dir, "shared_file_storage_dir": cluster.shared_file_storage_dir, "maintain_node_health": cluster.maintain_node_health, "ctime": cluster.ctime, "mtime": cluster.mtime, "uuid": cluster.uuid, "tags": list(cluster.GetTags()), "uid_pool": cluster.uid_pool, "default_iallocator": cluster.default_iallocator, "reserved_lvs": cluster.reserved_lvs, "primary_ip_version": primary_ip_version, "prealloc_wipe_disks": cluster.prealloc_wipe_disks, "hidden_os": cluster.hidden_os, "blacklisted_os": cluster.blacklisted_os, "enabled_disk_templates": cluster.enabled_disk_templates, } return result class LUClusterRedistConf(NoHooksLU): """Force the redistribution of cluster configuration. This is a very simple LU. """ REQ_BGL = False def ExpandNames(self): self.needed_locks = { locking.LEVEL_NODE: locking.ALL_SET, locking.LEVEL_NODE_ALLOC: locking.ALL_SET, } self.share_locks = ShareAll() def Exec(self, feedback_fn): """Redistribute the configuration. """ self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn) RedistributeAncillaryFiles(self) class LUClusterRename(LogicalUnit): """Rename the cluster. """ HPATH = "cluster-rename" HTYPE = constants.HTYPE_CLUSTER def BuildHooksEnv(self): """Build hooks env. """ return { "OP_TARGET": self.cfg.GetClusterName(), "NEW_NAME": self.op.name, } def BuildHooksNodes(self): """Build hooks nodes. """ return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList()) def CheckPrereq(self): """Verify that the passed name is a valid one. """ hostname = netutils.GetHostname(name=self.op.name, family=self.cfg.GetPrimaryIPFamily()) new_name = hostname.name self.ip = new_ip = hostname.ip old_name = self.cfg.GetClusterName() old_ip = self.cfg.GetMasterIP() if new_name == old_name and new_ip == old_ip: raise errors.OpPrereqError("Neither the name nor the IP address of the" " cluster has changed", errors.ECODE_INVAL) if new_ip != old_ip: if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT): raise errors.OpPrereqError("The given cluster IP address (%s) is" " reachable on the network" % new_ip, errors.ECODE_NOTUNIQUE) self.op.name = new_name def Exec(self, feedback_fn): """Rename the cluster. """ clustername = self.op.name new_ip = self.ip # shutdown the master IP master_params = self.cfg.GetMasterNetworkParameters() ems = self.cfg.GetUseExternalMipScript() result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, master_params, ems) result.Raise("Could not disable the master role") try: cluster = self.cfg.GetClusterInfo() cluster.cluster_name = clustername cluster.master_ip = new_ip self.cfg.Update(cluster, feedback_fn) # update the known hosts file ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE) node_list = self.cfg.GetOnlineNodeList() try: node_list.remove(master_params.uuid) except ValueError: pass UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE) finally: master_params.ip = new_ip result = self.rpc.call_node_activate_master_ip(master_params.uuid, master_params, ems) result.Warn("Could not re-enable the master role on the master," " please restart manually", self.LogWarning) return clustername class LUClusterRepairDiskSizes(NoHooksLU): """Verifies the cluster disks sizes. """ REQ_BGL = False def ExpandNames(self): if self.op.instances: (_, self.wanted_names) = GetWantedInstances(self, self.op.instances) # Not getting the node allocation lock as only a specific set of # instances (and their nodes) is going to be acquired self.needed_locks = { locking.LEVEL_NODE_RES: [], locking.LEVEL_INSTANCE: self.wanted_names, } self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE else: self.wanted_names = None self.needed_locks = { locking.LEVEL_NODE_RES: locking.ALL_SET, locking.LEVEL_INSTANCE: locking.ALL_SET, # This opcode is acquires the node locks for all instances locking.LEVEL_NODE_ALLOC: locking.ALL_SET, } self.share_locks = { locking.LEVEL_NODE_RES: 1, locking.LEVEL_INSTANCE: 0, locking.LEVEL_NODE_ALLOC: 1, } def DeclareLocks(self, level): if level == locking.LEVEL_NODE_RES and self.wanted_names is not None: self._LockInstancesNodes(primary_only=True, level=level) def CheckPrereq(self): """Check prerequisites. This only checks the optional instance list against the existing names. """ if self.wanted_names is None: self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE) self.wanted_instances = \ map(compat.snd, self.cfg.GetMultiInstanceInfoByName(self.wanted_names)) def _EnsureChildSizes(self, disk): """Ensure children of the disk have the needed disk size. This is valid mainly for DRBD8 and fixes an issue where the children have smaller disk size. @param disk: an L{ganeti.objects.Disk} object """ if disk.dev_type == constants.DT_DRBD8: assert disk.children, "Empty children for DRBD8?" fchild = disk.children[0] mismatch = fchild.size < disk.size if mismatch: self.LogInfo("Child disk has size %d, parent %d, fixing", fchild.size, disk.size) fchild.size = disk.size # and we recurse on this child only, not on the metadev return self._EnsureChildSizes(fchild) or mismatch else: return False def Exec(self, feedback_fn): """Verify the size of cluster disks. """ # TODO: check child disks too # TODO: check differences in size between primary/secondary nodes per_node_disks = {} for instance in self.wanted_instances: pnode = instance.primary_node if pnode not in per_node_disks: per_node_disks[pnode] = [] for idx, disk in enumerate(instance.disks): per_node_disks[pnode].append((instance, idx, disk)) assert not (frozenset(per_node_disks.keys()) - self.owned_locks(locking.LEVEL_NODE_RES)), \ "Not owning correct locks" assert not self.owned_locks(locking.LEVEL_NODE) es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, per_node_disks.keys()) changed = [] for node_uuid, dskl in per_node_disks.items(): newl = [v[2].Copy() for v in dskl] for dsk in newl: self.cfg.SetDiskID(dsk, node_uuid) node_name = self.cfg.GetNodeName(node_uuid) result = self.rpc.call_blockdev_getdimensions(node_uuid, newl) if result.fail_msg: self.LogWarning("Failure in blockdev_getdimensions call to node" " %s, ignoring", node_name) continue if len(result.payload) != len(dskl): logging.warning("Invalid result from node %s: len(dksl)=%d," " result.payload=%s", node_name, len(dskl), result.payload) self.LogWarning("Invalid result from node %s, ignoring node results", node_name) continue for ((instance, idx, disk), dimensions) in zip(dskl, result.payload): if dimensions is None: self.LogWarning("Disk %d of instance %s did not return size" " information, ignoring", idx, instance.name) continue if not isinstance(dimensions, (tuple, list)): self.LogWarning("Disk %d of instance %s did not return valid" " dimension information, ignoring", idx, instance.name) continue (size, spindles) = dimensions if not isinstance(size, (int, long)): self.LogWarning("Disk %d of instance %s did not return valid" " size information, ignoring", idx, instance.name) continue size = size >> 20 if size != disk.size: self.LogInfo("Disk %d of instance %s has mismatched size," " correcting: recorded %d, actual %d", idx, instance.name, disk.size, size) disk.size = size self.cfg.Update(instance, feedback_fn) changed.append((instance.name, idx, "size", size)) if es_flags[node_uuid]: if spindles is None: self.LogWarning("Disk %d of instance %s did not return valid" " spindles information, ignoring", idx, instance.name) elif disk.spindles is None or disk.spindles != spindles: self.LogInfo("Disk %d of instance %s has mismatched spindles," " correcting: recorded %s, actual %s", idx, instance.name, disk.spindles, spindles) disk.spindles = spindles self.cfg.Update(instance, feedback_fn) changed.append((instance.name, idx, "spindles", disk.spindles)) if self._EnsureChildSizes(disk): self.cfg.Update(instance, feedback_fn) changed.append((instance.name, idx, "size", disk.size)) return changed def _ValidateNetmask(cfg, netmask): """Checks if a netmask is valid. @type cfg: L{config.ConfigWriter} @param cfg: The cluster configuration @type netmask: int @param netmask: the netmask to be verified @raise errors.OpPrereqError: if the validation fails """ ip_family = cfg.GetPrimaryIPFamily() try: ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family) except errors.ProgrammerError: raise errors.OpPrereqError("Invalid primary ip family: %s." % ip_family, errors.ECODE_INVAL) if not ipcls.ValidateNetmask(netmask): raise errors.OpPrereqError("CIDR netmask (%s) not valid" % (netmask), errors.ECODE_INVAL) def CheckFileBasedStoragePathVsEnabledDiskTemplates( logging_warn_fn, file_storage_dir, enabled_disk_templates, file_disk_template): """Checks whether the given file-based storage directory is acceptable. Note: This function is public, because it is also used in bootstrap.py. @type logging_warn_fn: function @param logging_warn_fn: function which accepts a string and logs it @type file_storage_dir: string @param file_storage_dir: the directory to be used for file-based instances @type enabled_disk_templates: list of string @param enabled_disk_templates: the list of enabled disk templates @type file_disk_template: string @param file_disk_template: the file-based disk template for which the path should be checked """ assert (file_disk_template in utils.storage.GetDiskTemplatesOfStorageType(constants.ST_FILE)) file_storage_enabled = file_disk_template in enabled_disk_templates if file_storage_dir is not None: if file_storage_dir == "": if file_storage_enabled: raise errors.OpPrereqError( "Unsetting the '%s' storage directory while having '%s' storage" " enabled is not permitted." % (file_disk_template, file_disk_template)) else: if not file_storage_enabled: logging_warn_fn( "Specified a %s storage directory, although %s storage is not" " enabled." % (file_disk_template, file_disk_template)) else: raise errors.ProgrammerError("Received %s storage dir with value" " 'None'." % file_disk_template) def CheckFileStoragePathVsEnabledDiskTemplates( logging_warn_fn, file_storage_dir, enabled_disk_templates): """Checks whether the given file storage directory is acceptable. @see: C{CheckFileBasedStoragePathVsEnabledDiskTemplates} """ CheckFileBasedStoragePathVsEnabledDiskTemplates( logging_warn_fn, file_storage_dir, enabled_disk_templates, constants.DT_FILE) def CheckSharedFileStoragePathVsEnabledDiskTemplates( logging_warn_fn, file_storage_dir, enabled_disk_templates): """Checks whether the given shared file storage directory is acceptable. @see: C{CheckFileBasedStoragePathVsEnabledDiskTemplates} """ CheckFileBasedStoragePathVsEnabledDiskTemplates( logging_warn_fn, file_storage_dir, enabled_disk_templates, constants.DT_SHARED_FILE) class LUClusterSetParams(LogicalUnit): """Change the parameters of the cluster. """ HPATH = "cluster-modify" HTYPE = constants.HTYPE_CLUSTER REQ_BGL = False def CheckArguments(self): """Check parameters """ if self.op.uid_pool: uidpool.CheckUidPool(self.op.uid_pool) if self.op.add_uids: uidpool.CheckUidPool(self.op.add_uids) if self.op.remove_uids: uidpool.CheckUidPool(self.op.remove_uids) if self.op.master_netmask is not None: _ValidateNetmask(self.cfg, self.op.master_netmask) if self.op.diskparams: for dt_params in self.op.diskparams.values(): utils.ForceDictType(dt_params, constants.DISK_DT_TYPES) try: utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS) except errors.OpPrereqError, err: raise errors.OpPrereqError("While verify diskparams options: %s" % err, errors.ECODE_INVAL) def ExpandNames(self): # FIXME: in the future maybe other cluster params won't require checking on # all nodes to be modified. # FIXME: This opcode changes cluster-wide settings. Is acquiring all # resource locks the right thing, shouldn't it be the BGL instead? self.needed_locks = { locking.LEVEL_NODE: locking.ALL_SET, locking.LEVEL_INSTANCE: locking.ALL_SET, locking.LEVEL_NODEGROUP: locking.ALL_SET, locking.LEVEL_NODE_ALLOC: locking.ALL_SET, } self.share_locks = ShareAll() def BuildHooksEnv(self): """Build hooks env. """ return { "OP_TARGET": self.cfg.GetClusterName(), "NEW_VG_NAME": self.op.vg_name, } def BuildHooksNodes(self): """Build hooks nodes. """ mn = self.cfg.GetMasterNode() return ([mn], [mn]) def _CheckVgName(self, node_uuids, enabled_disk_templates, new_enabled_disk_templates): """Check the consistency of the vg name on all nodes and in case it gets unset whether there are instances still using it. """ lvm_is_enabled = utils.IsLvmEnabled(enabled_disk_templates) lvm_gets_enabled = utils.LvmGetsEnabled(enabled_disk_templates, new_enabled_disk_templates) current_vg_name = self.cfg.GetVGName() if self.op.vg_name == '': if lvm_is_enabled: raise errors.OpPrereqError("Cannot unset volume group if lvm-based" " disk templates are or get enabled.") if self.op.vg_name is None: if current_vg_name is None and lvm_is_enabled: raise errors.OpPrereqError("Please specify a volume group when" " enabling lvm-based disk-templates.") if self.op.vg_name is not None and not self.op.vg_name: if self.cfg.HasAnyDiskOfType(constants.DT_PLAIN): raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based" " instances exist", errors.ECODE_INVAL) if (self.op.vg_name is not None and lvm_is_enabled) or \ (self.cfg.GetVGName() is not None and lvm_gets_enabled): self._CheckVgNameOnNodes(node_uuids) def _CheckVgNameOnNodes(self, node_uuids): """Check the status of the volume group on each node. """ vglist = self.rpc.call_vg_list(node_uuids) for node_uuid in node_uuids: msg = vglist[node_uuid].fail_msg if msg: # ignoring down node self.LogWarning("Error while gathering data on node %s" " (ignoring node): %s", self.cfg.GetNodeName(node_uuid), msg) continue vgstatus = utils.CheckVolumeGroupSize(vglist[node_uuid].payload, self.op.vg_name, constants.MIN_VG_SIZE) if vgstatus: raise errors.OpPrereqError("Error on node '%s': %s" % (self.cfg.GetNodeName(node_uuid), vgstatus), errors.ECODE_ENVIRON) @staticmethod def _GetEnabledDiskTemplatesInner(op_enabled_disk_templates, old_enabled_disk_templates): """Determines the enabled disk templates and the subset of disk templates that are newly enabled by this operation. """ enabled_disk_templates = None new_enabled_disk_templates = [] if op_enabled_disk_templates: enabled_disk_templates = op_enabled_disk_templates new_enabled_disk_templates = \ list(set(enabled_disk_templates) - set(old_enabled_disk_templates)) else: enabled_disk_templates = old_enabled_disk_templates return (enabled_disk_templates, new_enabled_disk_templates) def _GetEnabledDiskTemplates(self, cluster): """Determines the enabled disk templates and the subset of disk templates that are newly enabled by this operation. """ return self._GetEnabledDiskTemplatesInner(self.op.enabled_disk_templates, cluster.enabled_disk_templates) def _CheckIpolicy(self, cluster, enabled_disk_templates): """Checks the ipolicy. @type cluster: C{objects.Cluster} @param cluster: the cluster's configuration @type enabled_disk_templates: list of string @param enabled_disk_templates: list of (possibly newly) enabled disk templates """ # FIXME: write unit tests for this if self.op.ipolicy: self.new_ipolicy = GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy, group_policy=False) CheckIpolicyVsDiskTemplates(self.new_ipolicy, enabled_disk_templates) all_instances = self.cfg.GetAllInstancesInfo().values() violations = set() for group in self.cfg.GetAllNodeGroupsInfo().values(): instances = frozenset([inst for inst in all_instances if compat.any(nuuid in group.members for nuuid in inst.all_nodes)]) new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy) ipol = masterd.instance.CalculateGroupIPolicy(cluster, group) new = ComputeNewInstanceViolations(ipol, new_ipolicy, instances, self.cfg) if new: violations.update(new) if violations: self.LogWarning("After the ipolicy change the following instances" " violate them: %s", utils.CommaJoin(utils.NiceSort(violations))) else: CheckIpolicyVsDiskTemplates(cluster.ipolicy, enabled_disk_templates) def CheckPrereq(self): """Check prerequisites. This checks whether the given params don't conflict and if the given volume group is valid. """ if self.op.drbd_helper is not None and not self.op.drbd_helper: if self.cfg.HasAnyDiskOfType(constants.DT_DRBD8): raise errors.OpPrereqError("Cannot disable drbd helper while" " drbd-based instances exist", errors.ECODE_INVAL) node_uuids = self.owned_locks(locking.LEVEL_NODE) self.cluster = cluster = self.cfg.GetClusterInfo() vm_capable_node_uuids = [node.uuid for node in self.cfg.GetAllNodesInfo().values() if node.uuid in node_uuids and node.vm_capable] (enabled_disk_templates, new_enabled_disk_templates) = \ self._GetEnabledDiskTemplates(cluster) self._CheckVgName(vm_capable_node_uuids, enabled_disk_templates, new_enabled_disk_templates) if self.op.file_storage_dir is not None: CheckFileStoragePathVsEnabledDiskTemplates( self.LogWarning, self.op.file_storage_dir, enabled_disk_templates) if self.op.shared_file_storage_dir is not None: CheckSharedFileStoragePathVsEnabledDiskTemplates( self.LogWarning, self.op.shared_file_storage_dir, enabled_disk_templates) if self.op.drbd_helper: # checks given drbd helper on all nodes helpers = self.rpc.call_drbd_helper(node_uuids) for (_, ninfo) in self.cfg.GetMultiNodeInfo(node_uuids): if ninfo.offline: self.LogInfo("Not checking drbd helper on offline node %s", ninfo.name) continue msg = helpers[ninfo.uuid].fail_msg if msg: raise errors.OpPrereqError("Error checking drbd helper on node" " '%s': %s" % (ninfo.name, msg), errors.ECODE_ENVIRON) node_helper = helpers[ninfo.uuid].payload if node_helper != self.op.drbd_helper: raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" % (ninfo.name, node_helper), errors.ECODE_ENVIRON) # validate params changes if self.op.beparams: objects.UpgradeBeParams(self.op.beparams) utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) self.new_beparams = cluster.SimpleFillBE(self.op.beparams) if self.op.ndparams: utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES) self.new_ndparams = cluster.SimpleFillND(self.op.ndparams) # TODO: we need a more general way to handle resetting # cluster-level parameters to default values if self.new_ndparams["oob_program"] == "": self.new_ndparams["oob_program"] = \ constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM] if self.op.hv_state: new_hv_state = MergeAndVerifyHvState(self.op.hv_state, self.cluster.hv_state_static) self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values)) for hv, values in new_hv_state.items()) if self.op.disk_state: new_disk_state = MergeAndVerifyDiskState(self.op.disk_state, self.cluster.disk_state_static) self.new_disk_state = \ dict((storage, dict((name, cluster.SimpleFillDiskState(values)) for name, values in svalues.items())) for storage, svalues in new_disk_state.items()) self._CheckIpolicy(cluster, enabled_disk_templates) if self.op.nicparams: utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES) self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams) objects.NIC.CheckParameterSyntax(self.new_nicparams) nic_errors = [] # check all instances for consistency for instance in self.cfg.GetAllInstancesInfo().values(): for nic_idx, nic in enumerate(instance.nics): params_copy = copy.deepcopy(nic.nicparams) params_filled = objects.FillDict(self.new_nicparams, params_copy) # check parameter syntax try: objects.NIC.CheckParameterSyntax(params_filled) except errors.ConfigurationError, err: nic_errors.append("Instance %s, nic/%d: %s" % (instance.name, nic_idx, err)) # if we're moving instances to routed, check that they have an ip target_mode = params_filled[constants.NIC_MODE] if target_mode == constants.NIC_MODE_ROUTED and not nic.ip: nic_errors.append("Instance %s, nic/%d: routed NIC with no ip" " address" % (instance.name, nic_idx)) if nic_errors: raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" % "\n".join(nic_errors), errors.ECODE_INVAL) # hypervisor list/parameters self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {}) if self.op.hvparams: for hv_name, hv_dict in self.op.hvparams.items(): if hv_name not in self.new_hvparams: self.new_hvparams[hv_name] = hv_dict else: self.new_hvparams[hv_name].update(hv_dict) # disk template parameters self.new_diskparams = objects.FillDict(cluster.diskparams, {}) if self.op.diskparams: for dt_name, dt_params in self.op.diskparams.items(): if dt_name not in self.new_diskparams: self.new_diskparams[dt_name] = dt_params else: self.new_diskparams[dt_name].update(dt_params) # os hypervisor parameters self.new_os_hvp = objects.FillDict(cluster.os_hvp, {}) if self.op.os_hvp: for os_name, hvs in self.op.os_hvp.items(): if os_name not in self.new_os_hvp: self.new_os_hvp[os_name] = hvs else: for hv_name, hv_dict in hvs.items(): if hv_dict is None: # Delete if it exists self.new_os_hvp[os_name].pop(hv_name, None) elif hv_name not in self.new_os_hvp[os_name]: self.new_os_hvp[os_name][hv_name] = hv_dict else: self.new_os_hvp[os_name][hv_name].update(hv_dict) # os parameters self.new_osp = objects.FillDict(cluster.osparams, {}) if self.op.osparams: for os_name, osp in self.op.osparams.items(): if os_name not in self.new_osp: self.new_osp[os_name] = {} self.new_osp[os_name] = GetUpdatedParams(self.new_osp[os_name], osp, use_none=True) if not self.new_osp[os_name]: # we removed all parameters del self.new_osp[os_name] else: # check the parameter validity (remote check) CheckOSParams(self, False, [self.cfg.GetMasterNode()], os_name, self.new_osp[os_name]) # changes to the hypervisor list if self.op.enabled_hypervisors is not None: self.hv_list = self.op.enabled_hypervisors for hv in self.hv_list: # if the hypervisor doesn't already exist in the cluster # hvparams, we initialize it to empty, and then (in both # cases) we make sure to fill the defaults, as we might not # have a complete defaults list if the hypervisor wasn't # enabled before if hv not in new_hvp: new_hvp[hv] = {} new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv]) utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES) else: self.hv_list = cluster.enabled_hypervisors if self.op.hvparams or self.op.enabled_hypervisors is not None: # either the enabled list has changed, or the parameters have, validate for hv_name, hv_params in self.new_hvparams.items(): if ((self.op.hvparams and hv_name in self.op.hvparams) or (self.op.enabled_hypervisors and hv_name in self.op.enabled_hypervisors)): # either this is a new hypervisor, or its parameters have changed hv_class = hypervisor.GetHypervisorClass(hv_name) utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) hv_class.CheckParameterSyntax(hv_params) CheckHVParams(self, node_uuids, hv_name, hv_params) self._CheckDiskTemplateConsistency() if self.op.os_hvp: # no need to check any newly-enabled hypervisors, since the # defaults have already been checked in the above code-block for os_name, os_hvp in self.new_os_hvp.items(): for hv_name, hv_params in os_hvp.items(): utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) # we need to fill in the new os_hvp on top of the actual hv_p cluster_defaults = self.new_hvparams.get(hv_name, {}) new_osp = objects.FillDict(cluster_defaults, hv_params) hv_class = hypervisor.GetHypervisorClass(hv_name) hv_class.CheckParameterSyntax(new_osp) CheckHVParams(self, node_uuids, hv_name, new_osp) if self.op.default_iallocator: alloc_script = utils.FindFile(self.op.default_iallocator, constants.IALLOCATOR_SEARCH_PATH, os.path.isfile) if alloc_script is None: raise errors.OpPrereqError("Invalid default iallocator script '%s'" " specified" % self.op.default_iallocator, errors.ECODE_INVAL) def _CheckDiskTemplateConsistency(self): """Check whether the disk templates that are going to be disabled are still in use by some instances. """ if self.op.enabled_disk_templates: cluster = self.cfg.GetClusterInfo() instances = self.cfg.GetAllInstancesInfo() disk_templates_to_remove = set(cluster.enabled_disk_templates) \ - set(self.op.enabled_disk_templates) for instance in instances.itervalues(): if instance.disk_template in disk_templates_to_remove: raise errors.OpPrereqError("Cannot disable disk template '%s'," " because instance '%s' is using it." % (instance.disk_template, instance.name)) def _SetVgName(self, feedback_fn): """Determines and sets the new volume group name. """ if self.op.vg_name is not None: new_volume = self.op.vg_name if not new_volume: new_volume = None if new_volume != self.cfg.GetVGName(): self.cfg.SetVGName(new_volume) else: feedback_fn("Cluster LVM configuration already in desired" " state, not changing") def _SetFileStorageDir(self, feedback_fn): """Set the file storage directory. """ if self.op.file_storage_dir is not None: if self.cluster.file_storage_dir == self.op.file_storage_dir: feedback_fn("Global file storage dir already set to value '%s'" % self.cluster.file_storage_dir) else: self.cluster.file_storage_dir = self.op.file_storage_dir def Exec(self, feedback_fn): """Change the parameters of the cluster. """ if self.op.enabled_disk_templates: self.cluster.enabled_disk_templates = \ list(set(self.op.enabled_disk_templates)) self._SetVgName(feedback_fn) self._SetFileStorageDir(feedback_fn) if self.op.drbd_helper is not None: if not constants.DT_DRBD8 in self.cluster.enabled_disk_templates: feedback_fn("Note that you specified a drbd user helper, but did" " enabled the drbd disk template.") new_helper = self.op.drbd_helper if not new_helper: new_helper = None if new_helper != self.cfg.GetDRBDHelper(): self.cfg.SetDRBDHelper(new_helper) else: feedback_fn("Cluster DRBD helper already in desired state," " not changing") if self.op.hvparams: self.cluster.hvparams = self.new_hvparams if self.op.os_hvp: self.cluster.os_hvp = self.new_os_hvp if self.op.enabled_hypervisors is not None: self.cluster.hvparams = self.new_hvparams self.cluster.enabled_hypervisors = self.op.enabled_hypervisors if self.op.beparams: self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams if self.op.nicparams: self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams if self.op.ipolicy: self.cluster.ipolicy = self.new_ipolicy if self.op.osparams: self.cluster.osparams = self.new_osp if self.op.ndparams: self.cluster.ndparams = self.new_ndparams if self.op.diskparams: self.cluster.diskparams = self.new_diskparams if self.op.hv_state: self.cluster.hv_state_static = self.new_hv_state if self.op.disk_state: self.cluster.disk_state_static = self.new_disk_state if self.op.candidate_pool_size is not None: self.cluster.candidate_pool_size = self.op.candidate_pool_size # we need to update the pool size here, otherwise the save will fail AdjustCandidatePool(self, []) if self.op.maintain_node_health is not None: if self.op.maintain_node_health and not constants.ENABLE_CONFD: feedback_fn("Note: CONFD was disabled at build time, node health" " maintenance is not useful (still enabling it)") self.cluster.maintain_node_health = self.op.maintain_node_health if self.op.modify_etc_hosts is not None: self.cluster.modify_etc_hosts = self.op.modify_etc_hosts if self.op.prealloc_wipe_disks is not None: self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks if self.op.add_uids is not None: uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids) if self.op.remove_uids is not None: uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids) if self.op.uid_pool is not None: self.cluster.uid_pool = self.op.uid_pool if self.op.default_iallocator is not None: self.cluster.default_iallocator = self.op.default_iallocator if self.op.reserved_lvs is not None: self.cluster.reserved_lvs = self.op.reserved_lvs if self.op.use_external_mip_script is not None: self.cluster.use_external_mip_script = self.op.use_external_mip_script def helper_os(aname, mods, desc): desc += " OS list" lst = getattr(self.cluster, aname) for key, val in mods: if key == constants.DDM_ADD: if val in lst: feedback_fn("OS %s already in %s, ignoring" % (val, desc)) else: lst.append(val) elif key == constants.DDM_REMOVE: if val in lst: lst.remove(val) else: feedback_fn("OS %s not found in %s, ignoring" % (val, desc)) else: raise errors.ProgrammerError("Invalid modification '%s'" % key) if self.op.hidden_os: helper_os("hidden_os", self.op.hidden_os, "hidden") if self.op.blacklisted_os: helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted") if self.op.master_netdev: master_params = self.cfg.GetMasterNetworkParameters() ems = self.cfg.GetUseExternalMipScript() feedback_fn("Shutting down master ip on the current netdev (%s)" % self.cluster.master_netdev) result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, master_params, ems) if not self.op.force: result.Raise("Could not disable the master ip") else: if result.fail_msg: msg = ("Could not disable the master ip (continuing anyway): %s" % result.fail_msg) feedback_fn(msg) feedback_fn("Changing master_netdev from %s to %s" % (master_params.netdev, self.op.master_netdev)) self.cluster.master_netdev = self.op.master_netdev if self.op.master_netmask: master_params = self.cfg.GetMasterNetworkParameters() feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask) result = self.rpc.call_node_change_master_netmask( master_params.uuid, master_params.netmask, self.op.master_netmask, master_params.ip, master_params.netdev) result.Warn("Could not change the master IP netmask", feedback_fn) self.cluster.master_netmask = self.op.master_netmask self.cfg.Update(self.cluster, feedback_fn) if self.op.master_netdev: master_params = self.cfg.GetMasterNetworkParameters() feedback_fn("Starting the master ip on the new master netdev (%s)" % self.op.master_netdev) ems = self.cfg.GetUseExternalMipScript() result = self.rpc.call_node_activate_master_ip(master_params.uuid, master_params, ems) result.Warn("Could not re-enable the master ip on the master," " please restart manually", self.LogWarning) class LUClusterVerify(NoHooksLU): """Submits all jobs necessary to verify the cluster. """ REQ_BGL = False def ExpandNames(self): self.needed_locks = {} def Exec(self, feedback_fn): jobs = [] if self.op.group_name: groups = [self.op.group_name] depends_fn = lambda: None else: groups = self.cfg.GetNodeGroupList() # Verify global configuration jobs.append([ opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors), ]) # Always depend on global verification depends_fn = lambda: [(-len(jobs), [])] jobs.extend( [opcodes.OpClusterVerifyGroup(group_name=group, ignore_errors=self.op.ignore_errors, depends=depends_fn())] for group in groups) # Fix up all parameters for op in itertools.chain(*jobs): # pylint: disable=W0142 op.debug_simulate_errors = self.op.debug_simulate_errors op.verbose = self.op.verbose op.error_codes = self.op.error_codes try: op.skip_checks = self.op.skip_checks except AttributeError: assert not isinstance(op, opcodes.OpClusterVerifyGroup) return ResultWithJobs(jobs) class _VerifyErrors(object): """Mix-in for cluster/group verify LUs. It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects self.op and self._feedback_fn to be available.) """ ETYPE_FIELD = "code" ETYPE_ERROR = "ERROR" ETYPE_WARNING = "WARNING" def _Error(self, ecode, item, msg, *args, **kwargs): """Format an error message. Based on the opcode's error_codes parameter, either format a parseable error code, or a simpler error string. This must be called only from Exec and functions called from Exec. """ ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) itype, etxt, _ = ecode # If the error code is in the list of ignored errors, demote the error to a # warning if etxt in self.op.ignore_errors: # pylint: disable=E1101 ltype = self.ETYPE_WARNING # first complete the msg if args: msg = msg % args # then format the whole message if self.op.error_codes: # This is a mix-in. pylint: disable=E1101 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg) else: if item: item = " " + item else: item = "" msg = "%s: %s%s: %s" % (ltype, itype, item, msg) # and finally report it via the feedback_fn self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101 # do not mark the operation as failed for WARN cases only if ltype == self.ETYPE_ERROR: self.bad = True def _ErrorIf(self, cond, *args, **kwargs): """Log an error message if the passed condition is True. """ if (bool(cond) or self.op.debug_simulate_errors): # pylint: disable=E1101 self._Error(*args, **kwargs) def _VerifyCertificate(filename): """Verifies a certificate for L{LUClusterVerifyConfig}. @type filename: string @param filename: Path to PEM file """ try: cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, utils.ReadFile(filename)) except Exception, err: # pylint: disable=W0703 return (LUClusterVerifyConfig.ETYPE_ERROR, "Failed to load X509 certificate %s: %s" % (filename, err)) (errcode, msg) = \ utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN, constants.SSL_CERT_EXPIRATION_ERROR) if msg: fnamemsg = "While verifying %s: %s" % (filename, msg) else: fnamemsg = None if errcode is None: return (None, fnamemsg) elif errcode == utils.CERT_WARNING: return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg) elif errcode == utils.CERT_ERROR: return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg) raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode) def _GetAllHypervisorParameters(cluster, instances): """Compute the set of all hypervisor parameters. @type cluster: L{objects.Cluster} @param cluster: the cluster object @param instances: list of L{objects.Instance} @param instances: additional instances from which to obtain parameters @rtype: list of (origin, hypervisor, parameters) @return: a list with all parameters found, indicating the hypervisor they apply to, and the origin (can be "cluster", "os X", or "instance Y") """ hvp_data = [] for hv_name in cluster.enabled_hypervisors: hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name))) for os_name, os_hvp in cluster.os_hvp.items(): for hv_name, hv_params in os_hvp.items(): if hv_params: full_params = cluster.GetHVDefaults(hv_name, os_name=os_name) hvp_data.append(("os %s" % os_name, hv_name, full_params)) # TODO: collapse identical parameter values in a single one for instance in instances: if instance.hvparams: hvp_data.append(("instance %s" % instance.name, instance.hypervisor, cluster.FillHV(instance))) return hvp_data class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors): """Verifies the cluster config. """ REQ_BGL = False def _VerifyHVP(self, hvp_data): """Verifies locally the syntax of the hypervisor parameters. """ for item, hv_name, hv_params in hvp_data: msg = ("hypervisor %s parameters syntax check (source %s): %%s" % (item, hv_name)) try: hv_class = hypervisor.GetHypervisorClass(hv_name) utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) hv_class.CheckParameterSyntax(hv_params) except errors.GenericError, err: self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err)) def ExpandNames(self): self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET) self.share_locks = ShareAll() def CheckPrereq(self): """Check prerequisites. """ # Retrieve all information self.all_group_info = self.cfg.GetAllNodeGroupsInfo() self.all_node_info = self.cfg.GetAllNodesInfo() self.all_inst_info = self.cfg.GetAllInstancesInfo() def Exec(self, feedback_fn): """Verify integrity of cluster, performing various test on nodes. """ self.bad = False self._feedback_fn = feedback_fn feedback_fn("* Verifying cluster config") for msg in self.cfg.VerifyConfig(): self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg) feedback_fn("* Verifying cluster certificate files") for cert_filename in pathutils.ALL_CERT_FILES: (errcode, msg) = _VerifyCertificate(cert_filename) self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode) self._ErrorIf(not utils.CanRead(constants.LUXID_USER, pathutils.NODED_CERT_FILE), constants.CV_ECLUSTERCERT, None, pathutils.NODED_CERT_FILE + " must be accessible by the " + constants.LUXID_USER + " user") feedback_fn("* Verifying hypervisor parameters") self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(), self.all_inst_info.values())) feedback_fn("* Verifying all nodes belong to an existing group") # We do this verification here because, should this bogus circumstance # occur, it would never be caught by VerifyGroup, which only acts on # nodes/instances reachable from existing node groups. dangling_nodes = set(node for node in self.all_node_info.values() if node.group not in self.all_group_info) dangling_instances = {} no_node_instances = [] for inst in self.all_inst_info.values(): if inst.primary_node in [node.uuid for node in dangling_nodes]: dangling_instances.setdefault(inst.primary_node, []).append(inst) elif inst.primary_node not in self.all_node_info: no_node_instances.append(inst) pretty_dangling = [ "%s (%s)" % (node.name, utils.CommaJoin(inst.name for inst in dangling_instances.get(node.uuid, []))) for node in dangling_nodes] self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES, None, "the following nodes (and their instances) belong to a non" " existing group: %s", utils.CommaJoin(pretty_dangling)) self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST, None, "the following instances have a non-existing primary-node:" " %s", utils.CommaJoin(inst.name for inst in no_node_instances)) return not self.bad class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): """Verifies the status of a node group. """ HPATH = "cluster-verify" HTYPE = constants.HTYPE_CLUSTER REQ_BGL = False _HOOKS_INDENT_RE = re.compile("^", re.M) class NodeImage(object): """A class representing the logical and physical status of a node. @type uuid: string @ivar uuid: the node UUID to which this object refers @ivar volumes: a structure as returned from L{ganeti.backend.GetVolumeList} (runtime) @ivar instances: a list of running instances (runtime) @ivar pinst: list of configured primary instances (config) @ivar sinst: list of configured secondary instances (config) @ivar sbp: dictionary of {primary-node: list of instances} for all instances for which this node is secondary (config) @ivar mfree: free memory, as reported by hypervisor (runtime) @ivar dfree: free disk, as reported by the node (runtime) @ivar offline: the offline status (config) @type rpc_fail: boolean @ivar rpc_fail: whether the RPC verify call was successfull (overall, not whether the individual keys were correct) (runtime) @type lvm_fail: boolean @ivar lvm_fail: whether the RPC call didn't return valid LVM data @type hyp_fail: boolean @ivar hyp_fail: whether the RPC call didn't return the instance list @type ghost: boolean @ivar ghost: whether this is a known node or not (config) @type os_fail: boolean @ivar os_fail: whether the RPC call didn't return valid OS data @type oslist: list @ivar oslist: list of OSes as diagnosed by DiagnoseOS @type vm_capable: boolean @ivar vm_capable: whether the node can host instances @type pv_min: float @ivar pv_min: size in MiB of the smallest PVs @type pv_max: float @ivar pv_max: size in MiB of the biggest PVs """ def __init__(self, offline=False, uuid=None, vm_capable=True): self.uuid = uuid self.volumes = {} self.instances = [] self.pinst = [] self.sinst = [] self.sbp = {} self.mfree = 0 self.dfree = 0 self.offline = offline self.vm_capable = vm_capable self.rpc_fail = False self.lvm_fail = False self.hyp_fail = False self.ghost = False self.os_fail = False self.oslist = {} self.pv_min = None self.pv_max = None def ExpandNames(self): # This raises errors.OpPrereqError on its own: self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) # Get instances in node group; this is unsafe and needs verification later inst_uuids = \ self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) self.needed_locks = { locking.LEVEL_INSTANCE: self.cfg.GetInstanceNames(inst_uuids), locking.LEVEL_NODEGROUP: [self.group_uuid], locking.LEVEL_NODE: [], # This opcode is run by watcher every five minutes and acquires all nodes # for a group. It doesn't run for a long time, so it's better to acquire # the node allocation lock as well. locking.LEVEL_NODE_ALLOC: locking.ALL_SET, } self.share_locks = ShareAll() def DeclareLocks(self, level): if level == locking.LEVEL_NODE: # Get members of node group; this is unsafe and needs verification later nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members) # In Exec(), we warn about mirrored instances that have primary and # secondary living in separate node groups. To fully verify that # volumes for these instances are healthy, we will need to do an # extra call to their secondaries. We ensure here those nodes will # be locked. for inst_name in self.owned_locks(locking.LEVEL_INSTANCE): # Important: access only the instances whose lock is owned instance = self.cfg.GetInstanceInfoByName(inst_name) if instance.disk_template in constants.DTS_INT_MIRROR: nodes.update(instance.secondary_nodes) self.needed_locks[locking.LEVEL_NODE] = nodes def CheckPrereq(self): assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) self.group_info = self.cfg.GetNodeGroup(self.group_uuid) group_node_uuids = set(self.group_info.members) group_inst_uuids = \ self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) unlocked_node_uuids = \ group_node_uuids.difference(self.owned_locks(locking.LEVEL_NODE)) unlocked_inst_uuids = \ group_inst_uuids.difference( [self.cfg.GetInstanceInfoByName(name).uuid for name in self.owned_locks(locking.LEVEL_INSTANCE)]) if unlocked_node_uuids: raise errors.OpPrereqError( "Missing lock for nodes: %s" % utils.CommaJoin(self.cfg.GetNodeNames(unlocked_node_uuids)), errors.ECODE_STATE) if unlocked_inst_uuids: raise errors.OpPrereqError( "Missing lock for instances: %s" % utils.CommaJoin(self.cfg.GetInstanceNames(unlocked_inst_uuids)), errors.ECODE_STATE) self.all_node_info = self.cfg.GetAllNodesInfo() self.all_inst_info = self.cfg.GetAllInstancesInfo() self.my_node_uuids = group_node_uuids self.my_node_info = dict((node_uuid, self.all_node_info[node_uuid]) for node_uuid in group_node_uuids) self.my_inst_uuids = group_inst_uuids self.my_inst_info = dict((inst_uuid, self.all_inst_info[inst_uuid]) for inst_uuid in group_inst_uuids) # We detect here the nodes that will need the extra RPC calls for verifying # split LV volumes; they should be locked. extra_lv_nodes = set() for inst in self.my_inst_info.values(): if inst.disk_template in constants.DTS_INT_MIRROR: for nuuid in inst.all_nodes: if self.all_node_info[nuuid].group != self.group_uuid: extra_lv_nodes.add(nuuid) unlocked_lv_nodes = \ extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) if unlocked_lv_nodes: raise errors.OpPrereqError("Missing node locks for LV check: %s" % utils.CommaJoin(unlocked_lv_nodes), errors.ECODE_STATE) self.extra_lv_nodes = list(extra_lv_nodes) def _VerifyNode(self, ninfo, nresult): """Perform some basic validation on data returned from a node. - check the result data structure is well formed and has all the mandatory fields - check ganeti version @type ninfo: L{objects.Node} @param ninfo: the node to check @param nresult: the results from the node @rtype: boolean @return: whether overall this call was successful (and we can expect reasonable values in the respose) """ # main result, nresult should be a non-empty dict test = not nresult or not isinstance(nresult, dict) self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name, "unable to verify node: no data returned") if test: return False # compares ganeti version local_version = constants.PROTOCOL_VERSION remote_version = nresult.get("version", None) test = not (remote_version and isinstance(remote_version, (list, tuple)) and len(remote_version) == 2) self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name, "connection to node returned invalid data") if test: return False test = local_version != remote_version[0] self._ErrorIf(test, constants.CV_ENODEVERSION, ninfo.name, "incompatible protocol versions: master %s," " node %s", local_version, remote_version[0]) if test: return False # node seems compatible, we can actually try to look into its results # full package version self._ErrorIf(constants.RELEASE_VERSION != remote_version[1], constants.CV_ENODEVERSION, ninfo.name, "software version mismatch: master %s, node %s", constants.RELEASE_VERSION, remote_version[1], code=self.ETYPE_WARNING) hyp_result = nresult.get(constants.NV_HYPERVISOR, None) if ninfo.vm_capable and isinstance(hyp_result, dict): for hv_name, hv_result in hyp_result.iteritems(): test = hv_result is not None self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name, "hypervisor %s verify failure: '%s'", hv_name, hv_result) hvp_result = nresult.get(constants.NV_HVPARAMS, None) if ninfo.vm_capable and isinstance(hvp_result, list): for item, hv_name, hv_result in hvp_result: self._ErrorIf(True, constants.CV_ENODEHV, ninfo.name, "hypervisor %s parameter verify failure (source %s): %s", hv_name, item, hv_result) test = nresult.get(constants.NV_NODESETUP, ["Missing NODESETUP results"]) self._ErrorIf(test, constants.CV_ENODESETUP, ninfo.name, "node setup error: %s", "; ".join(test)) return True def _VerifyNodeTime(self, ninfo, nresult, nvinfo_starttime, nvinfo_endtime): """Check the node time. @type ninfo: L{objects.Node} @param ninfo: the node to check @param nresult: the remote results for the node @param nvinfo_starttime: the start time of the RPC call @param nvinfo_endtime: the end time of the RPC call """ ntime = nresult.get(constants.NV_TIME, None) try: ntime_merged = utils.MergeTime(ntime) except (ValueError, TypeError): self._ErrorIf(True, constants.CV_ENODETIME, ninfo.name, "Node returned invalid time") return if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged) elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime) else: ntime_diff = None self._ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, ninfo.name, "Node time diverges by at least %s from master node time", ntime_diff) def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg): """Check the node LVM results and update info for cross-node checks. @type ninfo: L{objects.Node} @param ninfo: the node to check @param nresult: the remote results for the node @param vg_name: the configured VG name @type nimg: L{NodeImage} @param nimg: node image """ if vg_name is None: return # checks vg existence and size > 20G vglist = nresult.get(constants.NV_VGLIST, None) test = not vglist self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name, "unable to check volume groups") if not test: vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, constants.MIN_VG_SIZE) self._ErrorIf(vgstatus, constants.CV_ENODELVM, ninfo.name, vgstatus) # Check PVs (errmsgs, pvminmax) = CheckNodePVs(nresult, self._exclusive_storage) for em in errmsgs: self._Error(constants.CV_ENODELVM, ninfo.name, em) if pvminmax is not None: (nimg.pv_min, nimg.pv_max) = pvminmax def _VerifyGroupDRBDVersion(self, node_verify_infos): """Check cross-node DRBD version consistency. @type node_verify_infos: dict @param node_verify_infos: infos about nodes as returned from the node_verify call. """ node_versions = {} for node_uuid, ndata in node_verify_infos.items(): nresult = ndata.payload if nresult: version = nresult.get(constants.NV_DRBDVERSION, "Missing DRBD version") node_versions[node_uuid] = version if len(set(node_versions.values())) > 1: for node_uuid, version in sorted(node_versions.items()): msg = "DRBD version mismatch: %s" % version self._Error(constants.CV_ENODEDRBDHELPER, node_uuid, msg, code=self.ETYPE_WARNING) def _VerifyGroupLVM(self, node_image, vg_name): """Check cross-node consistency in LVM. @type node_image: dict @param node_image: info about nodes, mapping from node to names to L{NodeImage} objects @param vg_name: the configured VG name """ if vg_name is None: return # Only exclusive storage needs this kind of checks if not self._exclusive_storage: return # exclusive_storage wants all PVs to have the same size (approximately), # if the smallest and the biggest ones are okay, everything is fine. # pv_min is None iff pv_max is None vals = filter((lambda ni: ni.pv_min is not None), node_image.values()) if not vals: return (pvmin, minnode_uuid) = min((ni.pv_min, ni.uuid) for ni in vals) (pvmax, maxnode_uuid) = max((ni.pv_max, ni.uuid) for ni in vals) bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax) self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name, "PV sizes differ too much in the group; smallest (%s MB) is" " on %s, biggest (%s MB) is on %s", pvmin, self.cfg.GetNodeName(minnode_uuid), pvmax, self.cfg.GetNodeName(maxnode_uuid)) def _VerifyNodeBridges(self, ninfo, nresult, bridges): """Check the node bridges. @type ninfo: L{objects.Node} @param ninfo: the node to check @param nresult: the remote results for the node @param bridges: the expected list of bridges """ if not bridges: return missing = nresult.get(constants.NV_BRIDGES, None) test = not isinstance(missing, list) self._ErrorIf(test, constants.CV_ENODENET, ninfo.name, "did not return valid bridge information") if not test: self._ErrorIf(bool(missing), constants.CV_ENODENET, ninfo.name, "missing bridges: %s" % utils.CommaJoin(sorted(missing))) def _VerifyNodeUserScripts(self, ninfo, nresult): """Check the results of user scripts presence and executability on the node @type ninfo: L{objects.Node} @param ninfo: the node to check @param nresult: the remote results for the node """ test = not constants.NV_USERSCRIPTS in nresult self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, ninfo.name, "did not return user scripts information") broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None) if not test: self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, ninfo.name, "user scripts not present or not executable: %s" % utils.CommaJoin(sorted(broken_scripts))) def _VerifyNodeNetwork(self, ninfo, nresult): """Check the node network connectivity results. @type ninfo: L{objects.Node} @param ninfo: the node to check @param nresult: the remote results for the node """ test = constants.NV_NODELIST not in nresult self._ErrorIf(test, constants.CV_ENODESSH, ninfo.name, "node hasn't returned node ssh connectivity data") if not test: if nresult[constants.NV_NODELIST]: for a_node, a_msg in nresult[constants.NV_NODELIST].items(): self._ErrorIf(True, constants.CV_ENODESSH, ninfo.name, "ssh communication with node '%s': %s", a_node, a_msg) test = constants.NV_NODENETTEST not in nresult self._ErrorIf(test, constants.CV_ENODENET, ninfo.name, "node hasn't returned node tcp connectivity data") if not test: if nresult[constants.NV_NODENETTEST]: nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) for anode in nlist: self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, "tcp communication with node '%s': %s", anode, nresult[constants.NV_NODENETTEST][anode]) test = constants.NV_MASTERIP not in nresult self._ErrorIf(test, constants.CV_ENODENET, ninfo.name, "node hasn't returned node master IP reachability data") if not test: if not nresult[constants.NV_MASTERIP]: if ninfo.uuid == self.master_node: msg = "the master node cannot reach the master IP (not configured?)" else: msg = "cannot reach the master IP" self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, msg) def _VerifyInstance(self, instance, node_image, diskstatus): """Verify an instance. This function checks to see if the required block devices are available on the instance's node, and that the nodes are in the correct state. """ pnode_uuid = instance.primary_node pnode_img = node_image[pnode_uuid] groupinfo = self.cfg.GetAllNodeGroupsInfo() node_vol_should = {} instance.MapLVsByNode(node_vol_should) cluster = self.cfg.GetClusterInfo() ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, self.group_info) err = ComputeIPolicyInstanceViolation(ipolicy, instance, self.cfg) self._ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance.name, utils.CommaJoin(err), code=self.ETYPE_WARNING) for node_uuid in node_vol_should: n_img = node_image[node_uuid] if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: # ignore missing volumes on offline or broken nodes continue for volume in node_vol_should[node_uuid]: test = volume not in n_img.volumes self._ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance.name, "volume %s missing on node %s", volume, self.cfg.GetNodeName(node_uuid)) if instance.admin_state == constants.ADMINST_UP: test = instance.uuid not in pnode_img.instances and not pnode_img.offline self._ErrorIf(test, constants.CV_EINSTANCEDOWN, instance.name, "instance not running on its primary node %s", self.cfg.GetNodeName(pnode_uuid)) self._ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance.name, "instance is marked as running and lives on" " offline node %s", self.cfg.GetNodeName(pnode_uuid)) diskdata = [(nname, success, status, idx) for (nname, disks) in diskstatus.items() for idx, (success, status) in enumerate(disks)] for nname, success, bdev_status, idx in diskdata: # the 'ghost node' construction in Exec() ensures that we have a # node here snode = node_image[nname] bad_snode = snode.ghost or snode.offline self._ErrorIf(instance.disks_active and not success and not bad_snode, constants.CV_EINSTANCEFAULTYDISK, instance.name, "couldn't retrieve status for disk/%s on %s: %s", idx, self.cfg.GetNodeName(nname), bdev_status) if instance.disks_active and success and \ (bdev_status.is_degraded or bdev_status.ldisk_status != constants.LDS_OKAY): msg = "disk/%s on %s" % (idx, self.cfg.GetNodeName(nname)) if bdev_status.is_degraded: msg += " is degraded" if bdev_status.ldisk_status != constants.LDS_OKAY: msg += "; state is '%s'" % \ constants.LDS_NAMES[bdev_status.ldisk_status] self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg) self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid), "instance %s, connection to primary node failed", instance.name) self._ErrorIf(len(instance.secondary_nodes) > 1, constants.CV_EINSTANCELAYOUT, instance.name, "instance has multiple secondary nodes: %s", utils.CommaJoin(instance.secondary_nodes), code=self.ETYPE_WARNING) es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, instance.all_nodes) if any(es_flags.values()): if instance.disk_template not in constants.DTS_EXCL_STORAGE: # Disk template not compatible with exclusive_storage: no instance # node should have the flag set es_nodes = [n for (n, es) in es_flags.items() if es] self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance.name, "instance has template %s, which is not supported on nodes" " that have exclusive storage set: %s", instance.disk_template, utils.CommaJoin(self.cfg.GetNodeNames(es_nodes))) for (idx, disk) in enumerate(instance.disks): self._ErrorIf(disk.spindles is None, constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance.name, "number of spindles not configured for disk %s while" " exclusive storage is enabled, try running" " gnt-cluster repair-disk-sizes", idx) if instance.disk_template in constants.DTS_INT_MIRROR: instance_nodes = utils.NiceSort(instance.all_nodes) instance_groups = {} for node_uuid in instance_nodes: instance_groups.setdefault(self.all_node_info[node_uuid].group, []).append(node_uuid) pretty_list = [ "%s (group %s)" % (utils.CommaJoin(self.cfg.GetNodeNames(nodes)), groupinfo[group].name) # Sort so that we always list the primary node first. for group, nodes in sorted(instance_groups.items(), key=lambda (_, nodes): pnode_uuid in nodes, reverse=True)] self._ErrorIf(len(instance_groups) > 1, constants.CV_EINSTANCESPLITGROUPS, instance.name, "instance has primary and secondary nodes in" " different groups: %s", utils.CommaJoin(pretty_list), code=self.ETYPE_WARNING) inst_nodes_offline = [] for snode in instance.secondary_nodes: s_img = node_image[snode] self._ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC, self.cfg.GetNodeName(snode), "instance %s, connection to secondary node failed", instance.name) if s_img.offline: inst_nodes_offline.append(snode) # warn that the instance lives on offline nodes self._ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance.name, "instance has offline secondary node(s) %s", utils.CommaJoin(self.cfg.GetNodeNames(inst_nodes_offline))) # ... or ghost/non-vm_capable nodes for node_uuid in instance.all_nodes: self._ErrorIf(node_image[node_uuid].ghost, constants.CV_EINSTANCEBADNODE, instance.name, "instance lives on ghost node %s", self.cfg.GetNodeName(node_uuid)) self._ErrorIf(not node_image[node_uuid].vm_capable, constants.CV_EINSTANCEBADNODE, instance.name, "instance lives on non-vm_capable node %s", self.cfg.GetNodeName(node_uuid)) def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved): """Verify if there are any unknown volumes in the cluster. The .os, .swap and backup volumes are ignored. All other volumes are reported as unknown. @type reserved: L{ganeti.utils.FieldSet} @param reserved: a FieldSet of reserved volume names """ for node_uuid, n_img in node_image.items(): if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or self.all_node_info[node_uuid].group != self.group_uuid): # skip non-healthy nodes continue for volume in n_img.volumes: test = ((node_uuid not in node_vol_should or volume not in node_vol_should[node_uuid]) and not reserved.Matches(volume)) self._ErrorIf(test, constants.CV_ENODEORPHANLV, self.cfg.GetNodeName(node_uuid), "volume %s is unknown", volume) def _VerifyNPlusOneMemory(self, node_image, all_insts): """Verify N+1 Memory Resilience. Check that if one single node dies we can still start all the instances it was primary for. """ cluster_info = self.cfg.GetClusterInfo() for node_uuid, n_img in node_image.items(): # This code checks that every node which is now listed as # secondary has enough memory to host all instances it is # supposed to should a single other node in the cluster fail. # FIXME: not ready for failover to an arbitrary node # FIXME: does not support file-backed instances # WARNING: we currently take into account down instances as well # as up ones, considering that even if they're down someone # might want to start them even in the event of a node failure. if n_img.offline or \ self.all_node_info[node_uuid].group != self.group_uuid: # we're skipping nodes marked offline and nodes in other groups from # the N+1 warning, since most likely we don't have good memory # infromation from them; we already list instances living on such # nodes, and that's enough warning continue #TODO(dynmem): also consider ballooning out other instances for prinode, inst_uuids in n_img.sbp.items(): needed_mem = 0 for inst_uuid in inst_uuids: bep = cluster_info.FillBE(all_insts[inst_uuid]) if bep[constants.BE_AUTO_BALANCE]: needed_mem += bep[constants.BE_MINMEM] test = n_img.mfree < needed_mem self._ErrorIf(test, constants.CV_ENODEN1, self.cfg.GetNodeName(node_uuid), "not enough memory to accomodate instance failovers" " should node %s fail (%dMiB needed, %dMiB available)", self.cfg.GetNodeName(prinode), needed_mem, n_img.mfree) def _VerifyFiles(self, nodes, master_node_uuid, all_nvinfo, (files_all, files_opt, files_mc, files_vm)): """Verifies file checksums collected from all nodes. @param nodes: List of L{objects.Node} objects @param master_node_uuid: UUID of master node @param all_nvinfo: RPC results """ # Define functions determining which nodes to consider for a file files2nodefn = [ (files_all, None), (files_mc, lambda node: (node.master_candidate or node.uuid == master_node_uuid)), (files_vm, lambda node: node.vm_capable), ] # Build mapping from filename to list of nodes which should have the file nodefiles = {} for (files, fn) in files2nodefn: if fn is None: filenodes = nodes else: filenodes = filter(fn, nodes) nodefiles.update((filename, frozenset(map(operator.attrgetter("uuid"), filenodes))) for filename in files) assert set(nodefiles) == (files_all | files_mc | files_vm) fileinfo = dict((filename, {}) for filename in nodefiles) ignore_nodes = set() for node in nodes: if node.offline: ignore_nodes.add(node.uuid) continue nresult = all_nvinfo[node.uuid] if nresult.fail_msg or not nresult.payload: node_files = None else: fingerprints = nresult.payload.get(constants.NV_FILELIST, None) node_files = dict((vcluster.LocalizeVirtualPath(key), value) for (key, value) in fingerprints.items()) del fingerprints test = not (node_files and isinstance(node_files, dict)) self._ErrorIf(test, constants.CV_ENODEFILECHECK, node.name, "Node did not return file checksum data") if test: ignore_nodes.add(node.uuid) continue # Build per-checksum mapping from filename to nodes having it for (filename, checksum) in node_files.items(): assert filename in nodefiles fileinfo[filename].setdefault(checksum, set()).add(node.uuid) for (filename, checksums) in fileinfo.items(): assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum" # Nodes having the file with_file = frozenset(node_uuid for node_uuids in fileinfo[filename].values() for node_uuid in node_uuids) - ignore_nodes expected_nodes = nodefiles[filename] - ignore_nodes # Nodes missing file missing_file = expected_nodes - with_file if filename in files_opt: # All or no nodes self._ErrorIf(missing_file and missing_file != expected_nodes, constants.CV_ECLUSTERFILECHECK, None, "File %s is optional, but it must exist on all or no" " nodes (not found on %s)", filename, utils.CommaJoin( utils.NiceSort( map(self.cfg.GetNodeName, missing_file)))) else: self._ErrorIf(missing_file, constants.CV_ECLUSTERFILECHECK, None, "File %s is missing from node(s) %s", filename, utils.CommaJoin( utils.NiceSort( map(self.cfg.GetNodeName, missing_file)))) # Warn if a node has a file it shouldn't unexpected = with_file - expected_nodes self._ErrorIf(unexpected, constants.CV_ECLUSTERFILECHECK, None, "File %s should not exist on node(s) %s", filename, utils.CommaJoin( utils.NiceSort(map(self.cfg.GetNodeName, unexpected)))) # See if there are multiple versions of the file test = len(checksums) > 1 if test: variants = ["variant %s on %s" % (idx + 1, utils.CommaJoin(utils.NiceSort( map(self.cfg.GetNodeName, node_uuids)))) for (idx, (checksum, node_uuids)) in enumerate(sorted(checksums.items()))] else: variants = [] self._ErrorIf(test, constants.CV_ECLUSTERFILECHECK, None, "File %s found with %s different checksums (%s)", filename, len(checksums), "; ".join(variants)) def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper, drbd_map): """Verifies and the node DRBD status. @type ninfo: L{objects.Node} @param ninfo: the node to check @param nresult: the remote results for the node @param instanceinfo: the dict of instances @param drbd_helper: the configured DRBD usermode helper @param drbd_map: the DRBD map as returned by L{ganeti.config.ConfigWriter.ComputeDRBDMap} """ if drbd_helper: helper_result = nresult.get(constants.NV_DRBDHELPER, None) test = (helper_result is None) self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name, "no drbd usermode helper returned") if helper_result: status, payload = helper_result test = not status self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name, "drbd usermode helper check unsuccessful: %s", payload) test = status and (payload != drbd_helper) self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name, "wrong drbd usermode helper: %s", payload) # compute the DRBD minors node_drbd = {} for minor, inst_uuid in drbd_map[ninfo.uuid].items(): test = inst_uuid not in instanceinfo self._ErrorIf(test, constants.CV_ECLUSTERCFG, None, "ghost instance '%s' in temporary DRBD map", inst_uuid) # ghost instance should not be running, but otherwise we # don't give double warnings (both ghost instance and # unallocated minor in use) if test: node_drbd[minor] = (inst_uuid, False) else: instance = instanceinfo[inst_uuid] node_drbd[minor] = (inst_uuid, instance.disks_active) # and now check them used_minors = nresult.get(constants.NV_DRBDLIST, []) test = not isinstance(used_minors, (tuple, list)) self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name, "cannot parse drbd status file: %s", str(used_minors)) if test: # we cannot check drbd status return for minor, (inst_uuid, must_exist) in node_drbd.items(): test = minor not in used_minors and must_exist self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name, "drbd minor %d of instance %s is not active", minor, self.cfg.GetInstanceName(inst_uuid)) for minor in used_minors: test = minor not in node_drbd self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name, "unallocated drbd minor %d is in use", minor) def _UpdateNodeOS(self, ninfo, nresult, nimg): """Builds the node OS structures. @type ninfo: L{objects.Node} @param ninfo: the node to check @param nresult: the remote results for the node @param nimg: the node image object """ remote_os = nresult.get(constants.NV_OSLIST, None) test = (not isinstance(remote_os, list) or not compat.all(isinstance(v, list) and len(v) == 7 for v in remote_os)) self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name, "node hasn't returned valid OS data") nimg.os_fail = test if test: return os_dict = {} for (name, os_path, status, diagnose, variants, parameters, api_ver) in nresult[constants.NV_OSLIST]: if name not in os_dict: os_dict[name] = [] # parameters is a list of lists instead of list of tuples due to # JSON lacking a real tuple type, fix it: parameters = [tuple(v) for v in parameters] os_dict[name].append((os_path, status, diagnose, set(variants), set(parameters), set(api_ver))) nimg.oslist = os_dict def _VerifyNodeOS(self, ninfo, nimg, base): """Verifies the node OS list. @type ninfo: L{objects.Node} @param ninfo: the node to check @param nimg: the node image object @param base: the 'template' node we match against (e.g. from the master) """ assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?" beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l] for os_name, os_data in nimg.oslist.items(): assert os_data, "Empty OS status for OS %s?!" % os_name f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0] self._ErrorIf(not f_status, constants.CV_ENODEOS, ninfo.name, "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag) self._ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, ninfo.name, "OS '%s' has multiple entries" " (first one shadows the rest): %s", os_name, utils.CommaJoin([v[0] for v in os_data])) # comparisons with the 'base' image test = os_name not in base.oslist self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name, "Extra OS %s not present on reference node (%s)", os_name, self.cfg.GetNodeName(base.uuid)) if test: continue assert base.oslist[os_name], "Base node has empty OS status?" _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0] if not b_status: # base OS is invalid, skipping continue for kind, a, b in [("API version", f_api, b_api), ("variants list", f_var, b_var), ("parameters", beautify_params(f_param), beautify_params(b_param))]: self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name, "OS %s for %s differs from reference node %s:" " [%s] vs. [%s]", kind, os_name, self.cfg.GetNodeName(base.uuid), utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b))) # check any missing OSes missing = set(base.oslist.keys()).difference(nimg.oslist.keys()) self._ErrorIf(missing, constants.CV_ENODEOS, ninfo.name, "OSes present on reference node %s" " but missing on this node: %s", self.cfg.GetNodeName(base.uuid), utils.CommaJoin(missing)) def _VerifyAcceptedFileStoragePaths(self, ninfo, nresult, is_master): """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}. @type ninfo: L{objects.Node} @param ninfo: the node to check @param nresult: the remote results for the node @type is_master: bool @param is_master: Whether node is the master node """ cluster = self.cfg.GetClusterInfo() if (is_master and (cluster.IsFileStorageEnabled() or cluster.IsSharedFileStorageEnabled())): try: fspaths = nresult[constants.NV_ACCEPTED_STORAGE_PATHS] except KeyError: # This should never happen self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name, "Node did not return forbidden file storage paths") else: self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name, "Found forbidden file storage paths: %s", utils.CommaJoin(fspaths)) else: self._ErrorIf(constants.NV_ACCEPTED_STORAGE_PATHS in nresult, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name, "Node should not have returned forbidden file storage" " paths") def _VerifyStoragePaths(self, ninfo, nresult, file_disk_template, verify_key, error_key): """Verifies (file) storage paths. @type ninfo: L{objects.Node} @param ninfo: the node to check @param nresult: the remote results for the node @type file_disk_template: string @param file_disk_template: file-based disk template, whose directory is supposed to be verified @type verify_key: string @param verify_key: key for the verification map of this file verification step @param error_key: error key to be added to the verification results in case something goes wrong in this verification step """ assert (file_disk_template in utils.storage.GetDiskTemplatesOfStorageType(constants.ST_FILE)) cluster = self.cfg.GetClusterInfo() if cluster.IsDiskTemplateEnabled(file_disk_template): self._ErrorIf( verify_key in nresult, error_key, ninfo.name, "The configured %s storage path is unusable: %s" % (file_disk_template, nresult.get(verify_key))) def _VerifyFileStoragePaths(self, ninfo, nresult): """Verifies (file) storage paths. @see: C{_VerifyStoragePaths} """ self._VerifyStoragePaths( ninfo, nresult, constants.DT_FILE, constants.NV_FILE_STORAGE_PATH, constants.CV_ENODEFILESTORAGEPATHUNUSABLE) def _VerifySharedFileStoragePaths(self, ninfo, nresult): """Verifies (file) storage paths. @see: C{_VerifyStoragePaths} """ self._VerifyStoragePaths( ninfo, nresult, constants.DT_SHARED_FILE, constants.NV_SHARED_FILE_STORAGE_PATH, constants.CV_ENODESHAREDFILESTORAGEPATHUNUSABLE) def _VerifyOob(self, ninfo, nresult): """Verifies out of band functionality of a node. @type ninfo: L{objects.Node} @param ninfo: the node to check @param nresult: the remote results for the node """ # We just have to verify the paths on master and/or master candidates # as the oob helper is invoked on the master if ((ninfo.master_candidate or ninfo.master_capable) and constants.NV_OOB_PATHS in nresult): for path_result in nresult[constants.NV_OOB_PATHS]: self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, ninfo.name, path_result) def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name): """Verifies and updates the node volume data. This function will update a L{NodeImage}'s internal structures with data from the remote call. @type ninfo: L{objects.Node} @param ninfo: the node to check @param nresult: the remote results for the node @param nimg: the node image object @param vg_name: the configured VG name """ nimg.lvm_fail = True lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data") if vg_name is None: pass elif isinstance(lvdata, basestring): self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name, "LVM problem on node: %s", utils.SafeEncode(lvdata)) elif not isinstance(lvdata, dict): self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name, "rpc call to node failed (lvlist)") else: nimg.volumes = lvdata nimg.lvm_fail = False def _UpdateNodeInstances(self, ninfo, nresult, nimg): """Verifies and updates the node instance list. If the listing was successful, then updates this node's instance list. Otherwise, it marks the RPC call as failed for the instance list key. @type ninfo: L{objects.Node} @param ninfo: the node to check @param nresult: the remote results for the node @param nimg: the node image object """ idata = nresult.get(constants.NV_INSTANCELIST, None) test = not isinstance(idata, list) self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name, "rpc call to node failed (instancelist): %s", utils.SafeEncode(str(idata))) if test: nimg.hyp_fail = True else: nimg.instances = [inst.uuid for (_, inst) in self.cfg.GetMultiInstanceInfoByName(idata)] def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name): """Verifies and computes a node information map @type ninfo: L{objects.Node} @param ninfo: the node to check @param nresult: the remote results for the node @param nimg: the node image object @param vg_name: the configured VG name """ # try to read free memory (from the hypervisor) hv_info = nresult.get(constants.NV_HVINFO, None) test = not isinstance(hv_info, dict) or "memory_free" not in hv_info self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name, "rpc call to node failed (hvinfo)") if not test: try: nimg.mfree = int(hv_info["memory_free"]) except (ValueError, TypeError): self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name, "node returned invalid nodeinfo, check hypervisor") # FIXME: devise a free space model for file based instances as well if vg_name is not None: test = (constants.NV_VGLIST not in nresult or vg_name not in nresult[constants.NV_VGLIST]) self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name, "node didn't return data for the volume group '%s'" " - it is either missing or broken", vg_name) if not test: try: nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name]) except (ValueError, TypeError): self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name, "node returned invalid LVM info, check LVM status") def _CollectDiskInfo(self, node_uuids, node_image, instanceinfo): """Gets per-disk status information for all instances. @type node_uuids: list of strings @param node_uuids: Node UUIDs @type node_image: dict of (UUID, L{objects.Node}) @param node_image: Node objects @type instanceinfo: dict of (UUID, L{objects.Instance}) @param instanceinfo: Instance objects @rtype: {instance: {node: [(succes, payload)]}} @return: a dictionary of per-instance dictionaries with nodes as keys and disk information as values; the disk information is a list of tuples (success, payload) """ node_disks = {} node_disks_devonly = {} diskless_instances = set() diskless = constants.DT_DISKLESS for nuuid in node_uuids: node_inst_uuids = list(itertools.chain(node_image[nuuid].pinst, node_image[nuuid].sinst)) diskless_instances.update(uuid for uuid in node_inst_uuids if instanceinfo[uuid].disk_template == diskless) disks = [(inst_uuid, disk) for inst_uuid in node_inst_uuids for disk in instanceinfo[inst_uuid].disks] if not disks: # No need to collect data continue node_disks[nuuid] = disks # _AnnotateDiskParams makes already copies of the disks devonly = [] for (inst_uuid, dev) in disks: (anno_disk,) = AnnotateDiskParams(instanceinfo[inst_uuid], [dev], self.cfg) self.cfg.SetDiskID(anno_disk, nuuid) devonly.append(anno_disk) node_disks_devonly[nuuid] = devonly assert len(node_disks) == len(node_disks_devonly) # Collect data from all nodes with disks result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(), node_disks_devonly) assert len(result) == len(node_disks) instdisk = {} for (nuuid, nres) in result.items(): node = self.cfg.GetNodeInfo(nuuid) disks = node_disks[node.uuid] if nres.offline: # No data from this node data = len(disks) * [(False, "node offline")] else: msg = nres.fail_msg self._ErrorIf(msg, constants.CV_ENODERPC, node.name, "while getting disk information: %s", msg) if msg: # No data from this node data = len(disks) * [(False, msg)] else: data = [] for idx, i in enumerate(nres.payload): if isinstance(i, (tuple, list)) and len(i) == 2: data.append(i) else: logging.warning("Invalid result from node %s, entry %d: %s", node.name, idx, i) data.append((False, "Invalid result from the remote node")) for ((inst_uuid, _), status) in zip(disks, data): instdisk.setdefault(inst_uuid, {}).setdefault(node.uuid, []) \ .append(status) # Add empty entries for diskless instances. for inst_uuid in diskless_instances: assert inst_uuid not in instdisk instdisk[inst_uuid] = {} assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and len(nuuids) <= len(instanceinfo[inst].all_nodes) and compat.all(isinstance(s, (tuple, list)) and len(s) == 2 for s in statuses) for inst, nuuids in instdisk.items() for nuuid, statuses in nuuids.items()) if __debug__: instdisk_keys = set(instdisk) instanceinfo_keys = set(instanceinfo) assert instdisk_keys == instanceinfo_keys, \ ("instdisk keys (%s) do not match instanceinfo keys (%s)" % (instdisk_keys, instanceinfo_keys)) return instdisk @staticmethod def _SshNodeSelector(group_uuid, all_nodes): """Create endless iterators for all potential SSH check hosts. """ nodes = [node for node in all_nodes if (node.group != group_uuid and not node.offline)] keyfunc = operator.attrgetter("group") return map(itertools.cycle, [sorted(map(operator.attrgetter("name"), names)) for _, names in itertools.groupby(sorted(nodes, key=keyfunc), keyfunc)]) @classmethod def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes): """Choose which nodes should talk to which other nodes. We will make nodes contact all nodes in their group, and one node from every other group. @warning: This algorithm has a known issue if one node group is much smaller than others (e.g. just one node). In such a case all other nodes will talk to the single node. """ online_nodes = sorted(node.name for node in group_nodes if not node.offline) sel = cls._SshNodeSelector(group_uuid, all_nodes) return (online_nodes, dict((name, sorted([i.next() for i in sel])) for name in online_nodes)) def BuildHooksEnv(self): """Build hooks env. Cluster-Verify hooks just ran in the post phase and their failure makes the output be logged in the verify output and the verification to fail. """ env = { "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()), } env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags())) for node in self.my_node_info.values()) return env def BuildHooksNodes(self): """Build hooks nodes. """ return ([], list(self.my_node_info.keys())) def Exec(self, feedback_fn): """Verify integrity of the node group, performing various test on nodes. """ # This method has too many local variables. pylint: disable=R0914 feedback_fn("* Verifying group '%s'" % self.group_info.name) if not self.my_node_uuids: # empty node group feedback_fn("* Empty node group, skipping verification") return True self.bad = False verbose = self.op.verbose self._feedback_fn = feedback_fn vg_name = self.cfg.GetVGName() drbd_helper = self.cfg.GetDRBDHelper() cluster = self.cfg.GetClusterInfo() hypervisors = cluster.enabled_hypervisors node_data_list = self.my_node_info.values() i_non_redundant = [] # Non redundant instances i_non_a_balanced = [] # Non auto-balanced instances i_offline = 0 # Count of offline instances n_offline = 0 # Count of offline nodes n_drained = 0 # Count of nodes being drained node_vol_should = {} # FIXME: verify OS list # File verification filemap = ComputeAncillaryFiles(cluster, False) # do local checksums master_node_uuid = self.master_node = self.cfg.GetMasterNode() master_ip = self.cfg.GetMasterIP() feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_uuids)) user_scripts = [] if self.cfg.GetUseExternalMipScript(): user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT) node_verify_param = { constants.NV_FILELIST: map(vcluster.MakeVirtualPath, utils.UniqueSequence(filename for files in filemap for filename in files)), constants.NV_NODELIST: self._SelectSshCheckNodes(node_data_list, self.group_uuid, self.all_node_info.values()), constants.NV_HYPERVISOR: hypervisors, constants.NV_HVPARAMS: _GetAllHypervisorParameters(cluster, self.all_inst_info.values()), constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip) for node in node_data_list if not node.offline], constants.NV_INSTANCELIST: hypervisors, constants.NV_VERSION: None, constants.NV_HVINFO: self.cfg.GetHypervisorType(), constants.NV_NODESETUP: None, constants.NV_TIME: None, constants.NV_MASTERIP: (self.cfg.GetMasterNodeName(), master_ip), constants.NV_OSLIST: None, constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(), constants.NV_USERSCRIPTS: user_scripts, } if vg_name is not None: node_verify_param[constants.NV_VGLIST] = None node_verify_param[constants.NV_LVLIST] = vg_name node_verify_param[constants.NV_PVLIST] = [vg_name] if drbd_helper: node_verify_param[constants.NV_DRBDVERSION] = None node_verify_param[constants.NV_DRBDLIST] = None node_verify_param[constants.NV_DRBDHELPER] = drbd_helper if cluster.IsFileStorageEnabled() or \ cluster.IsSharedFileStorageEnabled(): # Load file storage paths only from master node node_verify_param[constants.NV_ACCEPTED_STORAGE_PATHS] = \ self.cfg.GetMasterNodeName() if cluster.IsFileStorageEnabled(): node_verify_param[constants.NV_FILE_STORAGE_PATH] = \ cluster.file_storage_dir # bridge checks # FIXME: this needs to be changed per node-group, not cluster-wide bridges = set() default_nicpp = cluster.nicparams[constants.PP_DEFAULT] if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: bridges.add(default_nicpp[constants.NIC_LINK]) for inst_uuid in self.my_inst_info.values(): for nic in inst_uuid.nics: full_nic = cluster.SimpleFillNIC(nic.nicparams) if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: bridges.add(full_nic[constants.NIC_LINK]) if bridges: node_verify_param[constants.NV_BRIDGES] = list(bridges) # Build our expected cluster state node_image = dict((node.uuid, self.NodeImage(offline=node.offline, uuid=node.uuid, vm_capable=node.vm_capable)) for node in node_data_list) # Gather OOB paths oob_paths = [] for node in self.all_node_info.values(): path = SupportsOob(self.cfg, node) if path and path not in oob_paths: oob_paths.append(path) if oob_paths: node_verify_param[constants.NV_OOB_PATHS] = oob_paths for inst_uuid in self.my_inst_uuids: instance = self.my_inst_info[inst_uuid] if instance.admin_state == constants.ADMINST_OFFLINE: i_offline += 1 for nuuid in instance.all_nodes: if nuuid not in node_image: gnode = self.NodeImage(uuid=nuuid) gnode.ghost = (nuuid not in self.all_node_info) node_image[nuuid] = gnode instance.MapLVsByNode(node_vol_should) pnode = instance.primary_node node_image[pnode].pinst.append(instance.uuid) for snode in instance.secondary_nodes: nimg = node_image[snode] nimg.sinst.append(instance.uuid) if pnode not in nimg.sbp: nimg.sbp[pnode] = [] nimg.sbp[pnode].append(instance.uuid) es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, self.my_node_info.keys()) # The value of exclusive_storage should be the same across the group, so if # it's True for at least a node, we act as if it were set for all the nodes self._exclusive_storage = compat.any(es_flags.values()) if self._exclusive_storage: node_verify_param[constants.NV_EXCLUSIVEPVS] = True # At this point, we have the in-memory data structures complete, # except for the runtime information, which we'll gather next # Due to the way our RPC system works, exact response times cannot be # guaranteed (e.g. a broken node could run into a timeout). By keeping the # time before and after executing the request, we can at least have a time # window. nvinfo_starttime = time.time() all_nvinfo = self.rpc.call_node_verify(self.my_node_uuids, node_verify_param, self.cfg.GetClusterName(), self.cfg.GetClusterInfo().hvparams) nvinfo_endtime = time.time() if self.extra_lv_nodes and vg_name is not None: extra_lv_nvinfo = \ self.rpc.call_node_verify(self.extra_lv_nodes, {constants.NV_LVLIST: vg_name}, self.cfg.GetClusterName(), self.cfg.GetClusterInfo().hvparams) else: extra_lv_nvinfo = {} all_drbd_map = self.cfg.ComputeDRBDMap() feedback_fn("* Gathering disk information (%s nodes)" % len(self.my_node_uuids)) instdisk = self._CollectDiskInfo(self.my_node_info.keys(), node_image, self.my_inst_info) feedback_fn("* Verifying configuration file consistency") # If not all nodes are being checked, we need to make sure the master node # and a non-checked vm_capable node are in the list. absent_node_uuids = set(self.all_node_info).difference(self.my_node_info) if absent_node_uuids: vf_nvinfo = all_nvinfo.copy() vf_node_info = list(self.my_node_info.values()) additional_node_uuids = [] if master_node_uuid not in self.my_node_info: additional_node_uuids.append(master_node_uuid) vf_node_info.append(self.all_node_info[master_node_uuid]) # Add the first vm_capable node we find which is not included, # excluding the master node (which we already have) for node_uuid in absent_node_uuids: nodeinfo = self.all_node_info[node_uuid] if (nodeinfo.vm_capable and not nodeinfo.offline and node_uuid != master_node_uuid): additional_node_uuids.append(node_uuid) vf_node_info.append(self.all_node_info[node_uuid]) break key = constants.NV_FILELIST vf_nvinfo.update(self.rpc.call_node_verify( additional_node_uuids, {key: node_verify_param[key]}, self.cfg.GetClusterName(), self.cfg.GetClusterInfo().hvparams)) else: vf_nvinfo = all_nvinfo vf_node_info = self.my_node_info.values() self._VerifyFiles(vf_node_info, master_node_uuid, vf_nvinfo, filemap) feedback_fn("* Verifying node status") refos_img = None for node_i in node_data_list: nimg = node_image[node_i.uuid] if node_i.offline: if verbose: feedback_fn("* Skipping offline node %s" % (node_i.name,)) n_offline += 1 continue if node_i.uuid == master_node_uuid: ntype = "master" elif node_i.master_candidate: ntype = "master candidate" elif node_i.drained: ntype = "drained" n_drained += 1 else: ntype = "regular" if verbose: feedback_fn("* Verifying node %s (%s)" % (node_i.name, ntype)) msg = all_nvinfo[node_i.uuid].fail_msg self._ErrorIf(msg, constants.CV_ENODERPC, node_i.name, "while contacting node: %s", msg) if msg: nimg.rpc_fail = True continue nresult = all_nvinfo[node_i.uuid].payload nimg.call_ok = self._VerifyNode(node_i, nresult) self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime) self._VerifyNodeNetwork(node_i, nresult) self._VerifyNodeUserScripts(node_i, nresult) self._VerifyOob(node_i, nresult) self._VerifyAcceptedFileStoragePaths(node_i, nresult, node_i.uuid == master_node_uuid) self._VerifyFileStoragePaths(node_i, nresult) self._VerifySharedFileStoragePaths(node_i, nresult) if nimg.vm_capable: self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg) self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper, all_drbd_map) self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name) self._UpdateNodeInstances(node_i, nresult, nimg) self._UpdateNodeInfo(node_i, nresult, nimg, vg_name) self._UpdateNodeOS(node_i, nresult, nimg) if not nimg.os_fail: if refos_img is None: refos_img = nimg self._VerifyNodeOS(node_i, nimg, refos_img) self._VerifyNodeBridges(node_i, nresult, bridges) # Check whether all running instances are primary for the node. (This # can no longer be done from _VerifyInstance below, since some of the # wrong instances could be from other node groups.) non_primary_inst_uuids = set(nimg.instances).difference(nimg.pinst) for inst_uuid in non_primary_inst_uuids: test = inst_uuid in self.all_inst_info self._ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, self.cfg.GetInstanceName(inst_uuid), "instance should not run on node %s", node_i.name) self._ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name, "node is running unknown instance %s", inst_uuid) self._VerifyGroupDRBDVersion(all_nvinfo) self._VerifyGroupLVM(node_image, vg_name) for node_uuid, result in extra_lv_nvinfo.items(): self._UpdateNodeVolumes(self.all_node_info[node_uuid], result.payload, node_image[node_uuid], vg_name) feedback_fn("* Verifying instance status") for inst_uuid in self.my_inst_uuids: instance = self.my_inst_info[inst_uuid] if verbose: feedback_fn("* Verifying instance %s" % instance.name) self._VerifyInstance(instance, node_image, instdisk[inst_uuid]) # If the instance is non-redundant we cannot survive losing its primary # node, so we are not N+1 compliant. if instance.disk_template not in constants.DTS_MIRRORED: i_non_redundant.append(instance) if not cluster.FillBE(instance)[constants.BE_AUTO_BALANCE]: i_non_a_balanced.append(instance) feedback_fn("* Verifying orphan volumes") reserved = utils.FieldSet(*cluster.reserved_lvs) # We will get spurious "unknown volume" warnings if any node of this group # is secondary for an instance whose primary is in another group. To avoid # them, we find these instances and add their volumes to node_vol_should. for instance in self.all_inst_info.values(): for secondary in instance.secondary_nodes: if (secondary in self.my_node_info and instance.name not in self.my_inst_info): instance.MapLVsByNode(node_vol_should) break self._VerifyOrphanVolumes(node_vol_should, node_image, reserved) if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks: feedback_fn("* Verifying N+1 Memory redundancy") self._VerifyNPlusOneMemory(node_image, self.my_inst_info) feedback_fn("* Other Notes") if i_non_redundant: feedback_fn(" - NOTICE: %d non-redundant instance(s) found." % len(i_non_redundant)) if i_non_a_balanced: feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found." % len(i_non_a_balanced)) if i_offline: feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline) if n_offline: feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline) if n_drained: feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained) return not self.bad def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result): """Analyze the post-hooks' result This method analyses the hook result, handles it, and sends some nicely-formatted feedback back to the user. @param phase: one of L{constants.HOOKS_PHASE_POST} or L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase @param hooks_results: the results of the multi-node hooks rpc call @param feedback_fn: function used send feedback back to the caller @param lu_result: previous Exec result @return: the new Exec result, based on the previous result and hook results """ # We only really run POST phase hooks, only for non-empty groups, # and are only interested in their results if not self.my_node_uuids: # empty node group pass elif phase == constants.HOOKS_PHASE_POST: # Used to change hooks' output to proper indentation feedback_fn("* Hooks Results") assert hooks_results, "invalid result from hooks" for node_name in hooks_results: res = hooks_results[node_name] msg = res.fail_msg test = msg and not res.offline self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name, "Communication failure in hooks execution: %s", msg) if res.offline or msg: # No need to investigate payload if node is offline or gave # an error. continue for script, hkr, output in res.payload: test = hkr == constants.HKR_FAIL self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name, "Script %s failed, output:", script) if test: output = self._HOOKS_INDENT_RE.sub(" ", output) feedback_fn("%s" % output) lu_result = False return lu_result class LUClusterVerifyDisks(NoHooksLU): """Verifies the cluster disks status. """ REQ_BGL = False def ExpandNames(self): self.share_locks = ShareAll() self.needed_locks = { locking.LEVEL_NODEGROUP: locking.ALL_SET, } def Exec(self, feedback_fn): group_names = self.owned_locks(locking.LEVEL_NODEGROUP) # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)] for group in group_names]) ganeti-2.9.3/lib/cmdlib/node.py0000644000000000000000000016624012271422343016302 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Logical units dealing with nodes.""" import logging import operator from ganeti import constants from ganeti import errors from ganeti import locking from ganeti import netutils from ganeti import objects from ganeti import opcodes from ganeti import qlang from ganeti import query from ganeti import rpc from ganeti import utils from ganeti.masterd import iallocator from ganeti.cmdlib.base import LogicalUnit, NoHooksLU, QueryBase, \ ResultWithJobs from ganeti.cmdlib.common import CheckParamsNotGlobal, \ MergeAndVerifyHvState, MergeAndVerifyDiskState, \ IsExclusiveStorageEnabledNode, CheckNodePVs, \ RedistributeAncillaryFiles, ExpandNodeUuidAndName, ShareAll, SupportsOob, \ CheckInstanceState, INSTANCE_DOWN, GetUpdatedParams, \ AdjustCandidatePool, CheckIAllocatorOrNode, LoadNodeEvacResult, \ GetWantedNodes, MapInstanceLvsToNodes, RunPostHook, \ FindFaultyInstanceDisks, CheckStorageTypeEnabled def _DecideSelfPromotion(lu, exceptions=None): """Decide whether I should promote myself as a master candidate. """ cp_size = lu.cfg.GetClusterInfo().candidate_pool_size mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions) # the new node will increase mc_max with one, so: mc_should = min(mc_should + 1, cp_size) return mc_now < mc_should def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq): """Ensure that a node has the given secondary ip. @type lu: L{LogicalUnit} @param lu: the LU on behalf of which we make the check @type node: L{objects.Node} @param node: the node to check @type secondary_ip: string @param secondary_ip: the ip to check @type prereq: boolean @param prereq: whether to throw a prerequisite or an execute error @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False """ # this can be called with a new node, which has no UUID yet, so perform the # RPC call using its name result = lu.rpc.call_node_has_ip_address(node.name, secondary_ip) result.Raise("Failure checking secondary ip on node %s" % node.name, prereq=prereq, ecode=errors.ECODE_ENVIRON) if not result.payload: msg = ("Node claims it doesn't have the secondary ip you gave (%s)," " please fix and re-run this command" % secondary_ip) if prereq: raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON) else: raise errors.OpExecError(msg) class LUNodeAdd(LogicalUnit): """Logical unit for adding node to the cluster. """ HPATH = "node-add" HTYPE = constants.HTYPE_NODE _NFLAGS = ["master_capable", "vm_capable"] def CheckArguments(self): self.primary_ip_family = self.cfg.GetPrimaryIPFamily() # validate/normalize the node name self.hostname = netutils.GetHostname(name=self.op.node_name, family=self.primary_ip_family) self.op.node_name = self.hostname.name if self.op.readd and self.op.node_name == self.cfg.GetMasterNodeName(): raise errors.OpPrereqError("Cannot readd the master node", errors.ECODE_STATE) if self.op.readd and self.op.group: raise errors.OpPrereqError("Cannot pass a node group when a node is" " being readded", errors.ECODE_INVAL) def BuildHooksEnv(self): """Build hooks env. This will run on all nodes before, and on all nodes + the new node after. """ return { "OP_TARGET": self.op.node_name, "NODE_NAME": self.op.node_name, "NODE_PIP": self.op.primary_ip, "NODE_SIP": self.op.secondary_ip, "MASTER_CAPABLE": str(self.op.master_capable), "VM_CAPABLE": str(self.op.vm_capable), } def BuildHooksNodes(self): """Build hooks nodes. """ hook_nodes = self.cfg.GetNodeList() new_node_info = self.cfg.GetNodeInfoByName(self.op.node_name) if new_node_info is not None: # Exclude added node hook_nodes = list(set(hook_nodes) - set([new_node_info.uuid])) # add the new node as post hook node by name; it does not have an UUID yet return (hook_nodes, hook_nodes, [self.op.node_name, ]) def CheckPrereq(self): """Check prerequisites. This checks: - the new node is not already in the config - it is resolvable - its parameters (single/dual homed) matches the cluster Any errors are signaled by raising errors.OpPrereqError. """ node_name = self.hostname.name self.op.primary_ip = self.hostname.ip if self.op.secondary_ip is None: if self.primary_ip_family == netutils.IP6Address.family: raise errors.OpPrereqError("When using a IPv6 primary address, a valid" " IPv4 address must be given as secondary", errors.ECODE_INVAL) self.op.secondary_ip = self.op.primary_ip secondary_ip = self.op.secondary_ip if not netutils.IP4Address.IsValid(secondary_ip): raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4" " address" % secondary_ip, errors.ECODE_INVAL) existing_node_info = self.cfg.GetNodeInfoByName(node_name) if not self.op.readd and existing_node_info is not None: raise errors.OpPrereqError("Node %s is already in the configuration" % node_name, errors.ECODE_EXISTS) elif self.op.readd and existing_node_info is None: raise errors.OpPrereqError("Node %s is not in the configuration" % node_name, errors.ECODE_NOENT) self.changed_primary_ip = False for existing_node in self.cfg.GetAllNodesInfo().values(): if self.op.readd and node_name == existing_node.name: if existing_node.secondary_ip != secondary_ip: raise errors.OpPrereqError("Readded node doesn't have the same IP" " address configuration as before", errors.ECODE_INVAL) if existing_node.primary_ip != self.op.primary_ip: self.changed_primary_ip = True continue if (existing_node.primary_ip == self.op.primary_ip or existing_node.secondary_ip == self.op.primary_ip or existing_node.primary_ip == secondary_ip or existing_node.secondary_ip == secondary_ip): raise errors.OpPrereqError("New node ip address(es) conflict with" " existing node %s" % existing_node.name, errors.ECODE_NOTUNIQUE) # After this 'if' block, None is no longer a valid value for the # _capable op attributes if self.op.readd: assert existing_node_info is not None, \ "Can't retrieve locked node %s" % node_name for attr in self._NFLAGS: if getattr(self.op, attr) is None: setattr(self.op, attr, getattr(existing_node_info, attr)) else: for attr in self._NFLAGS: if getattr(self.op, attr) is None: setattr(self.op, attr, True) if self.op.readd and not self.op.vm_capable: pri, sec = self.cfg.GetNodeInstances(existing_node_info.uuid) if pri or sec: raise errors.OpPrereqError("Node %s being re-added with vm_capable" " flag set to false, but it already holds" " instances" % node_name, errors.ECODE_STATE) # check that the type of the node (single versus dual homed) is the # same as for the master myself = self.cfg.GetNodeInfo(self.cfg.GetMasterNode()) master_singlehomed = myself.secondary_ip == myself.primary_ip newbie_singlehomed = secondary_ip == self.op.primary_ip if master_singlehomed != newbie_singlehomed: if master_singlehomed: raise errors.OpPrereqError("The master has no secondary ip but the" " new node has one", errors.ECODE_INVAL) else: raise errors.OpPrereqError("The master has a secondary ip but the" " new node doesn't have one", errors.ECODE_INVAL) # checks reachability if not netutils.TcpPing(self.op.primary_ip, constants.DEFAULT_NODED_PORT): raise errors.OpPrereqError("Node not reachable by ping", errors.ECODE_ENVIRON) if not newbie_singlehomed: # check reachability from my secondary ip to newbie's secondary ip if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT, source=myself.secondary_ip): raise errors.OpPrereqError("Node secondary ip not reachable by TCP" " based ping to node daemon port", errors.ECODE_ENVIRON) if self.op.readd: exceptions = [existing_node_info.uuid] else: exceptions = [] if self.op.master_capable: self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions) else: self.master_candidate = False if self.op.readd: self.new_node = existing_node_info else: node_group = self.cfg.LookupNodeGroup(self.op.group) self.new_node = objects.Node(name=node_name, primary_ip=self.op.primary_ip, secondary_ip=secondary_ip, master_candidate=self.master_candidate, offline=False, drained=False, group=node_group, ndparams={}) if self.op.ndparams: utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES) CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node", "node", "cluster or group") if self.op.hv_state: self.new_hv_state = MergeAndVerifyHvState(self.op.hv_state, None) if self.op.disk_state: self.new_disk_state = MergeAndVerifyDiskState(self.op.disk_state, None) # TODO: If we need to have multiple DnsOnlyRunner we probably should make # it a property on the base class. rpcrunner = rpc.DnsOnlyRunner() result = rpcrunner.call_version([node_name])[node_name] result.Raise("Can't get version information from node %s" % node_name) if constants.PROTOCOL_VERSION == result.payload: logging.info("Communication to node %s fine, sw version %s match", node_name, result.payload) else: raise errors.OpPrereqError("Version mismatch master version %s," " node version %s" % (constants.PROTOCOL_VERSION, result.payload), errors.ECODE_ENVIRON) vg_name = self.cfg.GetVGName() if vg_name is not None: vparams = {constants.NV_PVLIST: [vg_name]} excl_stor = IsExclusiveStorageEnabledNode(self.cfg, self.new_node) cname = self.cfg.GetClusterName() result = rpcrunner.call_node_verify_light( [node_name], vparams, cname, self.cfg.GetClusterInfo().hvparams)[node_name] (errmsgs, _) = CheckNodePVs(result.payload, excl_stor) if errmsgs: raise errors.OpPrereqError("Checks on node PVs failed: %s" % "; ".join(errmsgs), errors.ECODE_ENVIRON) def Exec(self, feedback_fn): """Adds the new node to the cluster. """ assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \ "Not owning BGL" # We adding a new node so we assume it's powered self.new_node.powered = True # for re-adds, reset the offline/drained/master-candidate flags; # we need to reset here, otherwise offline would prevent RPC calls # later in the procedure; this also means that if the re-add # fails, we are left with a non-offlined, broken node if self.op.readd: self.new_node.offline = False self.new_node.drained = False self.LogInfo("Readding a node, the offline/drained flags were reset") # if we demote the node, we do cleanup later in the procedure self.new_node.master_candidate = self.master_candidate if self.changed_primary_ip: self.new_node.primary_ip = self.op.primary_ip # copy the master/vm_capable flags for attr in self._NFLAGS: setattr(self.new_node, attr, getattr(self.op, attr)) # notify the user about any possible mc promotion if self.new_node.master_candidate: self.LogInfo("Node will be a master candidate") if self.op.ndparams: self.new_node.ndparams = self.op.ndparams else: self.new_node.ndparams = {} if self.op.hv_state: self.new_node.hv_state_static = self.new_hv_state if self.op.disk_state: self.new_node.disk_state_static = self.new_disk_state # Add node to our /etc/hosts, and add key to known_hosts if self.cfg.GetClusterInfo().modify_etc_hosts: master_node = self.cfg.GetMasterNode() result = self.rpc.call_etc_hosts_modify( master_node, constants.ETC_HOSTS_ADD, self.hostname.name, self.hostname.ip) result.Raise("Can't update hosts file with new host data") if self.new_node.secondary_ip != self.new_node.primary_ip: _CheckNodeHasSecondaryIP(self, self.new_node, self.new_node.secondary_ip, False) node_verifier_uuids = [self.cfg.GetMasterNode()] node_verify_param = { constants.NV_NODELIST: ([self.new_node.name], {}), # TODO: do a node-net-test as well? } result = self.rpc.call_node_verify( node_verifier_uuids, node_verify_param, self.cfg.GetClusterName(), self.cfg.GetClusterInfo().hvparams) for verifier in node_verifier_uuids: result[verifier].Raise("Cannot communicate with node %s" % verifier) nl_payload = result[verifier].payload[constants.NV_NODELIST] if nl_payload: for failed in nl_payload: feedback_fn("ssh/hostname verification failed" " (checking from %s): %s" % (verifier, nl_payload[failed])) raise errors.OpExecError("ssh/hostname verification failed") if self.op.readd: self.context.ReaddNode(self.new_node) RedistributeAncillaryFiles(self) # make sure we redistribute the config self.cfg.Update(self.new_node, feedback_fn) # and make sure the new node will not have old files around if not self.new_node.master_candidate: result = self.rpc.call_node_demote_from_mc(self.new_node.uuid) result.Warn("Node failed to demote itself from master candidate status", self.LogWarning) else: self.context.AddNode(self.new_node, self.proc.GetECId()) RedistributeAncillaryFiles(self) class LUNodeSetParams(LogicalUnit): """Modifies the parameters of a node. @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline) to the node role (as _ROLE_*) @cvar _R2F: a dictionary from node role to tuples of flags @cvar _FLAGS: a list of attribute names corresponding to the flags """ HPATH = "node-modify" HTYPE = constants.HTYPE_NODE REQ_BGL = False (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4) _F2R = { (True, False, False): _ROLE_CANDIDATE, (False, True, False): _ROLE_DRAINED, (False, False, True): _ROLE_OFFLINE, (False, False, False): _ROLE_REGULAR, } _R2F = dict((v, k) for k, v in _F2R.items()) _FLAGS = ["master_candidate", "drained", "offline"] def CheckArguments(self): (self.op.node_uuid, self.op.node_name) = \ ExpandNodeUuidAndName(self.cfg, self.op.node_uuid, self.op.node_name) all_mods = [self.op.offline, self.op.master_candidate, self.op.drained, self.op.master_capable, self.op.vm_capable, self.op.secondary_ip, self.op.ndparams, self.op.hv_state, self.op.disk_state] if all_mods.count(None) == len(all_mods): raise errors.OpPrereqError("Please pass at least one modification", errors.ECODE_INVAL) if all_mods.count(True) > 1: raise errors.OpPrereqError("Can't set the node into more than one" " state at the same time", errors.ECODE_INVAL) # Boolean value that tells us whether we might be demoting from MC self.might_demote = (self.op.master_candidate is False or self.op.offline is True or self.op.drained is True or self.op.master_capable is False) if self.op.secondary_ip: if not netutils.IP4Address.IsValid(self.op.secondary_ip): raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4" " address" % self.op.secondary_ip, errors.ECODE_INVAL) self.lock_all = self.op.auto_promote and self.might_demote self.lock_instances = self.op.secondary_ip is not None def _InstanceFilter(self, instance): """Filter for getting affected instances. """ return (instance.disk_template in constants.DTS_INT_MIRROR and self.op.node_uuid in instance.all_nodes) def ExpandNames(self): if self.lock_all: self.needed_locks = { locking.LEVEL_NODE: locking.ALL_SET, # Block allocations when all nodes are locked locking.LEVEL_NODE_ALLOC: locking.ALL_SET, } else: self.needed_locks = { locking.LEVEL_NODE: self.op.node_uuid, } # Since modifying a node can have severe effects on currently running # operations the resource lock is at least acquired in shared mode self.needed_locks[locking.LEVEL_NODE_RES] = \ self.needed_locks[locking.LEVEL_NODE] # Get all locks except nodes in shared mode; they are not used for anything # but read-only access self.share_locks = ShareAll() self.share_locks[locking.LEVEL_NODE] = 0 self.share_locks[locking.LEVEL_NODE_RES] = 0 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0 if self.lock_instances: self.needed_locks[locking.LEVEL_INSTANCE] = \ self.cfg.GetInstanceNames( self.cfg.GetInstancesInfoByFilter(self._InstanceFilter).keys()) def BuildHooksEnv(self): """Build hooks env. This runs on the master node. """ return { "OP_TARGET": self.op.node_name, "MASTER_CANDIDATE": str(self.op.master_candidate), "OFFLINE": str(self.op.offline), "DRAINED": str(self.op.drained), "MASTER_CAPABLE": str(self.op.master_capable), "VM_CAPABLE": str(self.op.vm_capable), } def BuildHooksNodes(self): """Build hooks nodes. """ nl = [self.cfg.GetMasterNode(), self.op.node_uuid] return (nl, nl) def CheckPrereq(self): """Check prerequisites. This only checks the instance list against the existing names. """ node = self.cfg.GetNodeInfo(self.op.node_uuid) if self.lock_instances: affected_instances = \ self.cfg.GetInstancesInfoByFilter(self._InstanceFilter) # Verify instance locks owned_instance_names = self.owned_locks(locking.LEVEL_INSTANCE) wanted_instance_names = frozenset([inst.name for inst in affected_instances.values()]) if wanted_instance_names - owned_instance_names: raise errors.OpPrereqError("Instances affected by changing node %s's" " secondary IP address have changed since" " locks were acquired, wanted '%s', have" " '%s'; retry the operation" % (node.name, utils.CommaJoin(wanted_instance_names), utils.CommaJoin(owned_instance_names)), errors.ECODE_STATE) else: affected_instances = None if (self.op.master_candidate is not None or self.op.drained is not None or self.op.offline is not None): # we can't change the master's node flags if node.uuid == self.cfg.GetMasterNode(): raise errors.OpPrereqError("The master role can be changed" " only via master-failover", errors.ECODE_INVAL) if self.op.master_candidate and not node.master_capable: raise errors.OpPrereqError("Node %s is not master capable, cannot make" " it a master candidate" % node.name, errors.ECODE_STATE) if self.op.vm_capable is False: (ipri, isec) = self.cfg.GetNodeInstances(node.uuid) if ipri or isec: raise errors.OpPrereqError("Node %s hosts instances, cannot unset" " the vm_capable flag" % node.name, errors.ECODE_STATE) if node.master_candidate and self.might_demote and not self.lock_all: assert not self.op.auto_promote, "auto_promote set but lock_all not" # check if after removing the current node, we're missing master # candidates (mc_remaining, mc_should, _) = \ self.cfg.GetMasterCandidateStats(exceptions=[node.uuid]) if mc_remaining < mc_should: raise errors.OpPrereqError("Not enough master candidates, please" " pass auto promote option to allow" " promotion (--auto-promote or RAPI" " auto_promote=True)", errors.ECODE_STATE) self.old_flags = old_flags = (node.master_candidate, node.drained, node.offline) assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags) self.old_role = old_role = self._F2R[old_flags] # Check for ineffective changes for attr in self._FLAGS: if getattr(self.op, attr) is False and getattr(node, attr) is False: self.LogInfo("Ignoring request to unset flag %s, already unset", attr) setattr(self.op, attr, None) # Past this point, any flag change to False means a transition # away from the respective state, as only real changes are kept # TODO: We might query the real power state if it supports OOB if SupportsOob(self.cfg, node): if self.op.offline is False and not (node.powered or self.op.powered is True): raise errors.OpPrereqError(("Node %s needs to be turned on before its" " offline status can be reset") % self.op.node_name, errors.ECODE_STATE) elif self.op.powered is not None: raise errors.OpPrereqError(("Unable to change powered state for node %s" " as it does not support out-of-band" " handling") % self.op.node_name, errors.ECODE_STATE) # If we're being deofflined/drained, we'll MC ourself if needed if (self.op.drained is False or self.op.offline is False or (self.op.master_capable and not node.master_capable)): if _DecideSelfPromotion(self): self.op.master_candidate = True self.LogInfo("Auto-promoting node to master candidate") # If we're no longer master capable, we'll demote ourselves from MC if self.op.master_capable is False and node.master_candidate: self.LogInfo("Demoting from master candidate") self.op.master_candidate = False # Compute new role assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1 if self.op.master_candidate: new_role = self._ROLE_CANDIDATE elif self.op.drained: new_role = self._ROLE_DRAINED elif self.op.offline: new_role = self._ROLE_OFFLINE elif False in [self.op.master_candidate, self.op.drained, self.op.offline]: # False is still in new flags, which means we're un-setting (the # only) True flag new_role = self._ROLE_REGULAR else: # no new flags, nothing, keep old role new_role = old_role self.new_role = new_role if old_role == self._ROLE_OFFLINE and new_role != old_role: # Trying to transition out of offline status result = self.rpc.call_version([node.uuid])[node.uuid] if result.fail_msg: raise errors.OpPrereqError("Node %s is being de-offlined but fails" " to report its version: %s" % (node.name, result.fail_msg), errors.ECODE_STATE) else: self.LogWarning("Transitioning node from offline to online state" " without using re-add. Please make sure the node" " is healthy!") # When changing the secondary ip, verify if this is a single-homed to # multi-homed transition or vice versa, and apply the relevant # restrictions. if self.op.secondary_ip: # Ok even without locking, because this can't be changed by any LU master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode()) master_singlehomed = master.secondary_ip == master.primary_ip if master_singlehomed and self.op.secondary_ip != node.primary_ip: if self.op.force and node.uuid == master.uuid: self.LogWarning("Transitioning from single-homed to multi-homed" " cluster; all nodes will require a secondary IP" " address") else: raise errors.OpPrereqError("Changing the secondary ip on a" " single-homed cluster requires the" " --force option to be passed, and the" " target node to be the master", errors.ECODE_INVAL) elif not master_singlehomed and self.op.secondary_ip == node.primary_ip: if self.op.force and node.uuid == master.uuid: self.LogWarning("Transitioning from multi-homed to single-homed" " cluster; secondary IP addresses will have to be" " removed") else: raise errors.OpPrereqError("Cannot set the secondary IP to be the" " same as the primary IP on a multi-homed" " cluster, unless the --force option is" " passed, and the target node is the" " master", errors.ECODE_INVAL) assert not (set([inst.name for inst in affected_instances.values()]) - self.owned_locks(locking.LEVEL_INSTANCE)) if node.offline: if affected_instances: msg = ("Cannot change secondary IP address: offline node has" " instances (%s) configured to use it" % utils.CommaJoin( [inst.name for inst in affected_instances.values()])) raise errors.OpPrereqError(msg, errors.ECODE_STATE) else: # On online nodes, check that no instances are running, and that # the node has the new ip and we can reach it. for instance in affected_instances.values(): CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot change secondary ip") _CheckNodeHasSecondaryIP(self, node, self.op.secondary_ip, True) if master.uuid != node.uuid: # check reachability from master secondary ip to new secondary ip if not netutils.TcpPing(self.op.secondary_ip, constants.DEFAULT_NODED_PORT, source=master.secondary_ip): raise errors.OpPrereqError("Node secondary ip not reachable by TCP" " based ping to node daemon port", errors.ECODE_ENVIRON) if self.op.ndparams: new_ndparams = GetUpdatedParams(node.ndparams, self.op.ndparams) utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES) CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node", "node", "cluster or group") self.new_ndparams = new_ndparams if self.op.hv_state: self.new_hv_state = MergeAndVerifyHvState(self.op.hv_state, node.hv_state_static) if self.op.disk_state: self.new_disk_state = \ MergeAndVerifyDiskState(self.op.disk_state, node.disk_state_static) def Exec(self, feedback_fn): """Modifies a node. """ node = self.cfg.GetNodeInfo(self.op.node_uuid) result = [] if self.op.ndparams: node.ndparams = self.new_ndparams if self.op.powered is not None: node.powered = self.op.powered if self.op.hv_state: node.hv_state_static = self.new_hv_state if self.op.disk_state: node.disk_state_static = self.new_disk_state for attr in ["master_capable", "vm_capable"]: val = getattr(self.op, attr) if val is not None: setattr(node, attr, val) result.append((attr, str(val))) if self.new_role != self.old_role: # Tell the node to demote itself, if no longer MC and not offline if self.old_role == self._ROLE_CANDIDATE and \ self.new_role != self._ROLE_OFFLINE: msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg if msg: self.LogWarning("Node failed to demote itself: %s", msg) new_flags = self._R2F[self.new_role] for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS): if of != nf: result.append((desc, str(nf))) (node.master_candidate, node.drained, node.offline) = new_flags # we locked all nodes, we adjust the CP before updating this node if self.lock_all: AdjustCandidatePool(self, [node.uuid]) if self.op.secondary_ip: node.secondary_ip = self.op.secondary_ip result.append(("secondary_ip", self.op.secondary_ip)) # this will trigger configuration file update, if needed self.cfg.Update(node, feedback_fn) # this will trigger job queue propagation or cleanup if the mc # flag changed if [self.old_role, self.new_role].count(self._ROLE_CANDIDATE) == 1: self.context.ReaddNode(node) return result class LUNodePowercycle(NoHooksLU): """Powercycles a node. """ REQ_BGL = False def CheckArguments(self): (self.op.node_uuid, self.op.node_name) = \ ExpandNodeUuidAndName(self.cfg, self.op.node_uuid, self.op.node_name) if self.op.node_uuid == self.cfg.GetMasterNode() and not self.op.force: raise errors.OpPrereqError("The node is the master and the force" " parameter was not set", errors.ECODE_INVAL) def ExpandNames(self): """Locking for PowercycleNode. This is a last-resort option and shouldn't block on other jobs. Therefore, we grab no locks. """ self.needed_locks = {} def Exec(self, feedback_fn): """Reboots a node. """ default_hypervisor = self.cfg.GetHypervisorType() hvparams = self.cfg.GetClusterInfo().hvparams[default_hypervisor] result = self.rpc.call_node_powercycle(self.op.node_uuid, default_hypervisor, hvparams) result.Raise("Failed to schedule the reboot") return result.payload def _GetNodeInstancesInner(cfg, fn): return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)] def _GetNodePrimaryInstances(cfg, node_uuid): """Returns primary instances on a node. """ return _GetNodeInstancesInner(cfg, lambda inst: node_uuid == inst.primary_node) def _GetNodeSecondaryInstances(cfg, node_uuid): """Returns secondary instances on a node. """ return _GetNodeInstancesInner(cfg, lambda inst: node_uuid in inst.secondary_nodes) def _GetNodeInstances(cfg, node_uuid): """Returns a list of all primary and secondary instances on a node. """ return _GetNodeInstancesInner(cfg, lambda inst: node_uuid in inst.all_nodes) class LUNodeEvacuate(NoHooksLU): """Evacuates instances off a list of nodes. """ REQ_BGL = False _MODE2IALLOCATOR = { constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI, constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC, constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL, } assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES assert (frozenset(_MODE2IALLOCATOR.values()) == constants.IALLOCATOR_NEVAC_MODES) def CheckArguments(self): CheckIAllocatorOrNode(self, "iallocator", "remote_node") def ExpandNames(self): (self.op.node_uuid, self.op.node_name) = \ ExpandNodeUuidAndName(self.cfg, self.op.node_uuid, self.op.node_name) if self.op.remote_node is not None: (self.op.remote_node_uuid, self.op.remote_node) = \ ExpandNodeUuidAndName(self.cfg, self.op.remote_node_uuid, self.op.remote_node) assert self.op.remote_node if self.op.node_uuid == self.op.remote_node_uuid: raise errors.OpPrereqError("Can not use evacuated node as a new" " secondary node", errors.ECODE_INVAL) if self.op.mode != constants.NODE_EVAC_SEC: raise errors.OpPrereqError("Without the use of an iallocator only" " secondary instances can be evacuated", errors.ECODE_INVAL) # Declare locks self.share_locks = ShareAll() self.needed_locks = { locking.LEVEL_INSTANCE: [], locking.LEVEL_NODEGROUP: [], locking.LEVEL_NODE: [], } # Determine nodes (via group) optimistically, needs verification once locks # have been acquired self.lock_nodes = self._DetermineNodes() def _DetermineNodes(self): """Gets the list of node UUIDs to operate on. """ if self.op.remote_node is None: # Iallocator will choose any node(s) in the same group group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_uuid]) else: group_nodes = frozenset([self.op.remote_node_uuid]) # Determine nodes to be locked return set([self.op.node_uuid]) | group_nodes def _DetermineInstances(self): """Builds list of instances to operate on. """ assert self.op.mode in constants.NODE_EVAC_MODES if self.op.mode == constants.NODE_EVAC_PRI: # Primary instances only inst_fn = _GetNodePrimaryInstances assert self.op.remote_node is None, \ "Evacuating primary instances requires iallocator" elif self.op.mode == constants.NODE_EVAC_SEC: # Secondary instances only inst_fn = _GetNodeSecondaryInstances else: # All instances assert self.op.mode == constants.NODE_EVAC_ALL inst_fn = _GetNodeInstances # TODO: In 2.6, change the iallocator interface to take an evacuation mode # per instance raise errors.OpPrereqError("Due to an issue with the iallocator" " interface it is not possible to evacuate" " all instances at once; specify explicitly" " whether to evacuate primary or secondary" " instances", errors.ECODE_INVAL) return inst_fn(self.cfg, self.op.node_uuid) def DeclareLocks(self, level): if level == locking.LEVEL_INSTANCE: # Lock instances optimistically, needs verification once node and group # locks have been acquired self.needed_locks[locking.LEVEL_INSTANCE] = \ set(i.name for i in self._DetermineInstances()) elif level == locking.LEVEL_NODEGROUP: # Lock node groups for all potential target nodes optimistically, needs # verification once nodes have been acquired self.needed_locks[locking.LEVEL_NODEGROUP] = \ self.cfg.GetNodeGroupsFromNodes(self.lock_nodes) elif level == locking.LEVEL_NODE: self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes def CheckPrereq(self): # Verify locks owned_instance_names = self.owned_locks(locking.LEVEL_INSTANCE) owned_nodes = self.owned_locks(locking.LEVEL_NODE) owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP) need_nodes = self._DetermineNodes() if not owned_nodes.issuperset(need_nodes): raise errors.OpPrereqError("Nodes in same group as '%s' changed since" " locks were acquired, current nodes are" " are '%s', used to be '%s'; retry the" " operation" % (self.op.node_name, utils.CommaJoin(need_nodes), utils.CommaJoin(owned_nodes)), errors.ECODE_STATE) wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes) if owned_groups != wanted_groups: raise errors.OpExecError("Node groups changed since locks were acquired," " current groups are '%s', used to be '%s';" " retry the operation" % (utils.CommaJoin(wanted_groups), utils.CommaJoin(owned_groups))) # Determine affected instances self.instances = self._DetermineInstances() self.instance_names = [i.name for i in self.instances] if set(self.instance_names) != owned_instance_names: raise errors.OpExecError("Instances on node '%s' changed since locks" " were acquired, current instances are '%s'," " used to be '%s'; retry the operation" % (self.op.node_name, utils.CommaJoin(self.instance_names), utils.CommaJoin(owned_instance_names))) if self.instance_names: self.LogInfo("Evacuating instances from node '%s': %s", self.op.node_name, utils.CommaJoin(utils.NiceSort(self.instance_names))) else: self.LogInfo("No instances to evacuate from node '%s'", self.op.node_name) if self.op.remote_node is not None: for i in self.instances: if i.primary_node == self.op.remote_node_uuid: raise errors.OpPrereqError("Node %s is the primary node of" " instance %s, cannot use it as" " secondary" % (self.op.remote_node, i.name), errors.ECODE_INVAL) def Exec(self, feedback_fn): assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None) if not self.instance_names: # No instances to evacuate jobs = [] elif self.op.iallocator is not None: # TODO: Implement relocation to other group evac_mode = self._MODE2IALLOCATOR[self.op.mode] req = iallocator.IAReqNodeEvac(evac_mode=evac_mode, instances=list(self.instance_names)) ial = iallocator.IAllocator(self.cfg, self.rpc, req) ial.Run(self.op.iallocator) if not ial.success: raise errors.OpPrereqError("Can't compute node evacuation using" " iallocator '%s': %s" % (self.op.iallocator, ial.info), errors.ECODE_NORES) jobs = LoadNodeEvacResult(self, ial.result, self.op.early_release, True) elif self.op.remote_node is not None: assert self.op.mode == constants.NODE_EVAC_SEC jobs = [ [opcodes.OpInstanceReplaceDisks(instance_name=instance_name, remote_node=self.op.remote_node, disks=[], mode=constants.REPLACE_DISK_CHG, early_release=self.op.early_release)] for instance_name in self.instance_names] else: raise errors.ProgrammerError("No iallocator or remote node") return ResultWithJobs(jobs) class LUNodeMigrate(LogicalUnit): """Migrate all instances from a node. """ HPATH = "node-migrate" HTYPE = constants.HTYPE_NODE REQ_BGL = False def CheckArguments(self): pass def ExpandNames(self): (self.op.node_uuid, self.op.node_name) = \ ExpandNodeUuidAndName(self.cfg, self.op.node_uuid, self.op.node_name) self.share_locks = ShareAll() self.needed_locks = { locking.LEVEL_NODE: [self.op.node_uuid], } def BuildHooksEnv(self): """Build hooks env. This runs on the master, the primary and all the secondaries. """ return { "NODE_NAME": self.op.node_name, "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes, } def BuildHooksNodes(self): """Build hooks nodes. """ nl = [self.cfg.GetMasterNode()] return (nl, nl) def CheckPrereq(self): pass def Exec(self, feedback_fn): # Prepare jobs for migration instances jobs = [ [opcodes.OpInstanceMigrate( instance_name=inst.name, mode=self.op.mode, live=self.op.live, iallocator=self.op.iallocator, target_node=self.op.target_node, allow_runtime_changes=self.op.allow_runtime_changes, ignore_ipolicy=self.op.ignore_ipolicy)] for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_uuid)] # TODO: Run iallocator in this opcode and pass correct placement options to # OpInstanceMigrate. Since other jobs can modify the cluster between # running the iallocator and the actual migration, a good consistency model # will have to be found. assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) == frozenset([self.op.node_uuid])) return ResultWithJobs(jobs) def _GetStorageTypeArgs(cfg, storage_type): """Returns the arguments for a storage type. """ # Special case for file storage if storage_type == constants.ST_FILE: # storage.FileStorage wants a list of storage directories return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]] return [] class LUNodeModifyStorage(NoHooksLU): """Logical unit for modifying a storage volume on a node. """ REQ_BGL = False def CheckArguments(self): (self.op.node_uuid, self.op.node_name) = \ ExpandNodeUuidAndName(self.cfg, self.op.node_uuid, self.op.node_name) storage_type = self.op.storage_type try: modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type] except KeyError: raise errors.OpPrereqError("Storage units of type '%s' can not be" " modified" % storage_type, errors.ECODE_INVAL) diff = set(self.op.changes.keys()) - modifiable if diff: raise errors.OpPrereqError("The following fields can not be modified for" " storage units of type '%s': %r" % (storage_type, list(diff)), errors.ECODE_INVAL) def CheckPrereq(self): """Check prerequisites. """ CheckStorageTypeEnabled(self.cfg.GetClusterInfo(), self.op.storage_type) def ExpandNames(self): self.needed_locks = { locking.LEVEL_NODE: self.op.node_uuid, } def Exec(self, feedback_fn): """Computes the list of nodes and their attributes. """ st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) result = self.rpc.call_storage_modify(self.op.node_uuid, self.op.storage_type, st_args, self.op.name, self.op.changes) result.Raise("Failed to modify storage unit '%s' on %s" % (self.op.name, self.op.node_name)) class NodeQuery(QueryBase): FIELDS = query.NODE_FIELDS def ExpandNames(self, lu): lu.needed_locks = {} lu.share_locks = ShareAll() if self.names: (self.wanted, _) = GetWantedNodes(lu, self.names) else: self.wanted = locking.ALL_SET self.do_locking = (self.use_locking and query.NQ_LIVE in self.requested_data) if self.do_locking: # If any non-static field is requested we need to lock the nodes lu.needed_locks[locking.LEVEL_NODE] = self.wanted lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET def DeclareLocks(self, lu, level): pass def _GetQueryData(self, lu): """Computes the list of nodes and their attributes. """ all_info = lu.cfg.GetAllNodesInfo() node_uuids = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE) # Gather data as requested if query.NQ_LIVE in self.requested_data: # filter out non-vm_capable nodes toquery_node_uuids = [node.uuid for node in all_info.values() if node.vm_capable and node.uuid in node_uuids] lvm_enabled = utils.storage.IsLvmEnabled( lu.cfg.GetClusterInfo().enabled_disk_templates) # FIXME: this per default asks for storage space information for all # enabled disk templates. Fix this by making it possible to specify # space report fields for specific disk templates. raw_storage_units = utils.storage.GetStorageUnitsOfCluster( lu.cfg, include_spindles=lvm_enabled) storage_units = rpc.PrepareStorageUnitsForNodes( lu.cfg, raw_storage_units, toquery_node_uuids) default_hypervisor = lu.cfg.GetHypervisorType() hvparams = lu.cfg.GetClusterInfo().hvparams[default_hypervisor] hvspecs = [(default_hypervisor, hvparams)] node_data = lu.rpc.call_node_info(toquery_node_uuids, storage_units, hvspecs) live_data = dict( (uuid, rpc.MakeLegacyNodeInfo(nresult.payload, require_spindles=lvm_enabled)) for (uuid, nresult) in node_data.items() if not nresult.fail_msg and nresult.payload) else: live_data = None if query.NQ_INST in self.requested_data: node_to_primary = dict([(uuid, set()) for uuid in node_uuids]) node_to_secondary = dict([(uuid, set()) for uuid in node_uuids]) inst_data = lu.cfg.GetAllInstancesInfo() inst_uuid_to_inst_name = {} for inst in inst_data.values(): inst_uuid_to_inst_name[inst.uuid] = inst.name if inst.primary_node in node_to_primary: node_to_primary[inst.primary_node].add(inst.uuid) for secnode in inst.secondary_nodes: if secnode in node_to_secondary: node_to_secondary[secnode].add(inst.uuid) else: node_to_primary = None node_to_secondary = None inst_uuid_to_inst_name = None if query.NQ_OOB in self.requested_data: oob_support = dict((uuid, bool(SupportsOob(lu.cfg, node))) for uuid, node in all_info.iteritems()) else: oob_support = None if query.NQ_GROUP in self.requested_data: groups = lu.cfg.GetAllNodeGroupsInfo() else: groups = {} return query.NodeQueryData([all_info[uuid] for uuid in node_uuids], live_data, lu.cfg.GetMasterNode(), node_to_primary, node_to_secondary, inst_uuid_to_inst_name, groups, oob_support, lu.cfg.GetClusterInfo()) class LUNodeQuery(NoHooksLU): """Logical unit for querying nodes. """ # pylint: disable=W0142 REQ_BGL = False def CheckArguments(self): self.nq = NodeQuery(qlang.MakeSimpleFilter("name", self.op.names), self.op.output_fields, self.op.use_locking) def ExpandNames(self): self.nq.ExpandNames(self) def DeclareLocks(self, level): self.nq.DeclareLocks(self, level) def Exec(self, feedback_fn): return self.nq.OldStyleQuery(self) def _CheckOutputFields(static, dynamic, selected): """Checks whether all selected fields are valid. @type static: L{utils.FieldSet} @param static: static fields set @type dynamic: L{utils.FieldSet} @param dynamic: dynamic fields set """ f = utils.FieldSet() f.Extend(static) f.Extend(dynamic) delta = f.NonMatching(selected) if delta: raise errors.OpPrereqError("Unknown output fields selected: %s" % ",".join(delta), errors.ECODE_INVAL) class LUNodeQueryvols(NoHooksLU): """Logical unit for getting volumes on node(s). """ REQ_BGL = False _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance") _FIELDS_STATIC = utils.FieldSet("node") def CheckArguments(self): _CheckOutputFields(static=self._FIELDS_STATIC, dynamic=self._FIELDS_DYNAMIC, selected=self.op.output_fields) def ExpandNames(self): self.share_locks = ShareAll() if self.op.nodes: self.needed_locks = { locking.LEVEL_NODE: GetWantedNodes(self, self.op.nodes)[0], } else: self.needed_locks = { locking.LEVEL_NODE: locking.ALL_SET, locking.LEVEL_NODE_ALLOC: locking.ALL_SET, } def Exec(self, feedback_fn): """Computes the list of nodes and their attributes. """ node_uuids = self.owned_locks(locking.LEVEL_NODE) volumes = self.rpc.call_node_volumes(node_uuids) ilist = self.cfg.GetAllInstancesInfo() vol2inst = MapInstanceLvsToNodes(ilist.values()) output = [] for node_uuid in node_uuids: nresult = volumes[node_uuid] if nresult.offline: continue msg = nresult.fail_msg if msg: self.LogWarning("Can't compute volume data on node %s: %s", self.cfg.GetNodeName(node_uuid), msg) continue node_vols = sorted(nresult.payload, key=operator.itemgetter("dev")) for vol in node_vols: node_output = [] for field in self.op.output_fields: if field == "node": val = self.cfg.GetNodeName(node_uuid) elif field == "phys": val = vol["dev"] elif field == "vg": val = vol["vg"] elif field == "name": val = vol["name"] elif field == "size": val = int(float(vol["size"])) elif field == "instance": inst = vol2inst.get((node_uuid, vol["vg"] + "/" + vol["name"]), None) if inst is not None: val = inst.name else: val = "-" else: raise errors.ParameterError(field) node_output.append(str(val)) output.append(node_output) return output class LUNodeQueryStorage(NoHooksLU): """Logical unit for getting information on storage units on node(s). """ _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE) REQ_BGL = False def CheckArguments(self): _CheckOutputFields(static=self._FIELDS_STATIC, dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS), selected=self.op.output_fields) def ExpandNames(self): self.share_locks = ShareAll() if self.op.nodes: self.needed_locks = { locking.LEVEL_NODE: GetWantedNodes(self, self.op.nodes)[0], } else: self.needed_locks = { locking.LEVEL_NODE: locking.ALL_SET, locking.LEVEL_NODE_ALLOC: locking.ALL_SET, } def CheckPrereq(self): """Check prerequisites. """ CheckStorageTypeEnabled(self.cfg.GetClusterInfo(), self.op.storage_type) def Exec(self, feedback_fn): """Computes the list of nodes and their attributes. """ self.node_uuids = self.owned_locks(locking.LEVEL_NODE) # Always get name to sort by if constants.SF_NAME in self.op.output_fields: fields = self.op.output_fields[:] else: fields = [constants.SF_NAME] + self.op.output_fields # Never ask for node or type as it's only known to the LU for extra in [constants.SF_NODE, constants.SF_TYPE]: while extra in fields: fields.remove(extra) field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)]) name_idx = field_idx[constants.SF_NAME] st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) data = self.rpc.call_storage_list(self.node_uuids, self.op.storage_type, st_args, self.op.name, fields) result = [] for node_uuid in utils.NiceSort(self.node_uuids): node_name = self.cfg.GetNodeName(node_uuid) nresult = data[node_uuid] if nresult.offline: continue msg = nresult.fail_msg if msg: self.LogWarning("Can't get storage data from node %s: %s", node_name, msg) continue rows = dict([(row[name_idx], row) for row in nresult.payload]) for name in utils.NiceSort(rows.keys()): row = rows[name] out = [] for field in self.op.output_fields: if field == constants.SF_NODE: val = node_name elif field == constants.SF_TYPE: val = self.op.storage_type elif field in field_idx: val = row[field_idx[field]] else: raise errors.ParameterError(field) out.append(val) result.append(out) return result class LUNodeRemove(LogicalUnit): """Logical unit for removing a node. """ HPATH = "node-remove" HTYPE = constants.HTYPE_NODE def BuildHooksEnv(self): """Build hooks env. """ return { "OP_TARGET": self.op.node_name, "NODE_NAME": self.op.node_name, } def BuildHooksNodes(self): """Build hooks nodes. This doesn't run on the target node in the pre phase as a failed node would then be impossible to remove. """ all_nodes = self.cfg.GetNodeList() try: all_nodes.remove(self.op.node_uuid) except ValueError: pass return (all_nodes, all_nodes) def CheckPrereq(self): """Check prerequisites. This checks: - the node exists in the configuration - it does not have primary or secondary instances - it's not the master Any errors are signaled by raising errors.OpPrereqError. """ (self.op.node_uuid, self.op.node_name) = \ ExpandNodeUuidAndName(self.cfg, self.op.node_uuid, self.op.node_name) node = self.cfg.GetNodeInfo(self.op.node_uuid) assert node is not None masternode = self.cfg.GetMasterNode() if node.uuid == masternode: raise errors.OpPrereqError("Node is the master node, failover to another" " node is required", errors.ECODE_INVAL) for _, instance in self.cfg.GetAllInstancesInfo().items(): if node.uuid in instance.all_nodes: raise errors.OpPrereqError("Instance %s is still running on the node," " please remove first" % instance.name, errors.ECODE_INVAL) self.op.node_name = node.name self.node = node def Exec(self, feedback_fn): """Removes the node from the cluster. """ logging.info("Stopping the node daemon and removing configs from node %s", self.node.name) modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \ "Not owning BGL" # Promote nodes to master candidate as needed AdjustCandidatePool(self, exceptions=[self.node.uuid]) self.context.RemoveNode(self.node) # Run post hooks on the node before it's removed RunPostHook(self, self.node.name) # we have to call this by name rather than by UUID, as the node is no longer # in the config result = self.rpc.call_node_leave_cluster(self.node.name, modify_ssh_setup) msg = result.fail_msg if msg: self.LogWarning("Errors encountered on the remote node while leaving" " the cluster: %s", msg) # Remove node from our /etc/hosts if self.cfg.GetClusterInfo().modify_etc_hosts: master_node_uuid = self.cfg.GetMasterNode() result = self.rpc.call_etc_hosts_modify(master_node_uuid, constants.ETC_HOSTS_REMOVE, self.node.name, None) result.Raise("Can't update hosts file with new host data") RedistributeAncillaryFiles(self) class LURepairNodeStorage(NoHooksLU): """Repairs the volume group on a node. """ REQ_BGL = False def CheckArguments(self): (self.op.node_uuid, self.op.node_name) = \ ExpandNodeUuidAndName(self.cfg, self.op.node_uuid, self.op.node_name) storage_type = self.op.storage_type if (constants.SO_FIX_CONSISTENCY not in constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])): raise errors.OpPrereqError("Storage units of type '%s' can not be" " repaired" % storage_type, errors.ECODE_INVAL) def ExpandNames(self): self.needed_locks = { locking.LEVEL_NODE: [self.op.node_uuid], } def _CheckFaultyDisks(self, instance, node_uuid): """Ensure faulty disks abort the opcode or at least warn.""" try: if FindFaultyInstanceDisks(self.cfg, self.rpc, instance, node_uuid, True): raise errors.OpPrereqError("Instance '%s' has faulty disks on" " node '%s'" % (instance.name, self.cfg.GetNodeName(node_uuid)), errors.ECODE_STATE) except errors.OpPrereqError, err: if self.op.ignore_consistency: self.LogWarning(str(err.args[0])) else: raise def CheckPrereq(self): """Check prerequisites. """ CheckStorageTypeEnabled(self.cfg.GetClusterInfo(), self.op.storage_type) # Check whether any instance on this node has faulty disks for inst in _GetNodeInstances(self.cfg, self.op.node_uuid): if not inst.disks_active: continue check_nodes = set(inst.all_nodes) check_nodes.discard(self.op.node_uuid) for inst_node_uuid in check_nodes: self._CheckFaultyDisks(inst, inst_node_uuid) def Exec(self, feedback_fn): feedback_fn("Repairing storage unit '%s' on %s ..." % (self.op.name, self.op.node_name)) st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) result = self.rpc.call_storage_execute(self.op.node_uuid, self.op.storage_type, st_args, self.op.name, constants.SO_FIX_CONSISTENCY) result.Raise("Failed to repair storage unit '%s' on %s" % (self.op.name, self.op.node_name)) ganeti-2.9.3/lib/cmdlib/instance_migration.py0000644000000000000000000011365312271422343021232 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Logical units dealing with instance migration an failover.""" import logging import time from ganeti import constants from ganeti import errors from ganeti import locking from ganeti.masterd import iallocator from ganeti import utils from ganeti.cmdlib.base import LogicalUnit, Tasklet from ganeti.cmdlib.common import ExpandInstanceUuidAndName, \ CheckIAllocatorOrNode, ExpandNodeUuidAndName from ganeti.cmdlib.instance_storage import CheckDiskConsistency, \ ExpandCheckDisks, ShutdownInstanceDisks, AssembleInstanceDisks from ganeti.cmdlib.instance_utils import BuildInstanceHookEnvByObject, \ CheckTargetNodeIPolicy, ReleaseLocks, CheckNodeNotDrained, \ CopyLockList, CheckNodeFreeMemory, CheckInstanceBridgesExist import ganeti.masterd.instance def _ExpandNamesForMigration(lu): """Expands names for use with L{TLMigrateInstance}. @type lu: L{LogicalUnit} """ if lu.op.target_node is not None: (lu.op.target_node_uuid, lu.op.target_node) = \ ExpandNodeUuidAndName(lu.cfg, lu.op.target_node_uuid, lu.op.target_node) lu.needed_locks[locking.LEVEL_NODE] = [] lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE lu.needed_locks[locking.LEVEL_NODE_RES] = [] lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE # The node allocation lock is actually only needed for externally replicated # instances (e.g. sharedfile or RBD) and if an iallocator is used. lu.needed_locks[locking.LEVEL_NODE_ALLOC] = [] def _DeclareLocksForMigration(lu, level): """Declares locks for L{TLMigrateInstance}. @type lu: L{LogicalUnit} @param level: Lock level """ if level == locking.LEVEL_NODE_ALLOC: assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE) instance = lu.cfg.GetInstanceInfo(lu.op.instance_uuid) # Node locks are already declared here rather than at LEVEL_NODE as we need # the instance object anyway to declare the node allocation lock. if instance.disk_template in constants.DTS_EXT_MIRROR: if lu.op.target_node is None: lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET else: lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node, lu.op.target_node_uuid] del lu.recalculate_locks[locking.LEVEL_NODE] else: lu._LockInstancesNodes() # pylint: disable=W0212 elif level == locking.LEVEL_NODE: # Node locks are declared together with the node allocation lock assert (lu.needed_locks[locking.LEVEL_NODE] or lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET) elif level == locking.LEVEL_NODE_RES: # Copy node locks lu.needed_locks[locking.LEVEL_NODE_RES] = \ CopyLockList(lu.needed_locks[locking.LEVEL_NODE]) class LUInstanceFailover(LogicalUnit): """Failover an instance. """ HPATH = "instance-failover" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False def CheckArguments(self): """Check the arguments. """ self.iallocator = getattr(self.op, "iallocator", None) self.target_node = getattr(self.op, "target_node", None) def ExpandNames(self): self._ExpandAndLockInstance() _ExpandNamesForMigration(self) self._migrater = \ TLMigrateInstance(self, self.op.instance_uuid, self.op.instance_name, self.op.cleanup, True, False, self.op.ignore_consistency, True, self.op.shutdown_timeout, self.op.ignore_ipolicy) self.tasklets = [self._migrater] def DeclareLocks(self, level): _DeclareLocksForMigration(self, level) def BuildHooksEnv(self): """Build hooks env. This runs on master, primary and secondary nodes of the instance. """ instance = self._migrater.instance source_node_uuid = instance.primary_node env = { "IGNORE_CONSISTENCY": self.op.ignore_consistency, "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, "OLD_PRIMARY": self.cfg.GetNodeName(source_node_uuid), "NEW_PRIMARY": self.op.target_node, "FAILOVER_CLEANUP": self.op.cleanup, } if instance.disk_template in constants.DTS_INT_MIRROR: env["OLD_SECONDARY"] = self.cfg.GetNodeName(instance.secondary_nodes[0]) env["NEW_SECONDARY"] = self.cfg.GetNodeName(source_node_uuid) else: env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = "" env.update(BuildInstanceHookEnvByObject(self, instance)) return env def BuildHooksNodes(self): """Build hooks nodes. """ instance = self._migrater.instance nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes) return (nl, nl + [instance.primary_node]) class LUInstanceMigrate(LogicalUnit): """Migrate an instance. This is migration without shutting down, compared to the failover, which is done with shutdown. """ HPATH = "instance-migrate" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False def ExpandNames(self): self._ExpandAndLockInstance() _ExpandNamesForMigration(self) self._migrater = \ TLMigrateInstance(self, self.op.instance_uuid, self.op.instance_name, self.op.cleanup, False, self.op.allow_failover, False, self.op.allow_runtime_changes, constants.DEFAULT_SHUTDOWN_TIMEOUT, self.op.ignore_ipolicy) self.tasklets = [self._migrater] def DeclareLocks(self, level): _DeclareLocksForMigration(self, level) def BuildHooksEnv(self): """Build hooks env. This runs on master, primary and secondary nodes of the instance. """ instance = self._migrater.instance source_node_uuid = instance.primary_node env = BuildInstanceHookEnvByObject(self, instance) env.update({ "MIGRATE_LIVE": self._migrater.live, "MIGRATE_CLEANUP": self.op.cleanup, "OLD_PRIMARY": self.cfg.GetNodeName(source_node_uuid), "NEW_PRIMARY": self.op.target_node, "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes, }) if instance.disk_template in constants.DTS_INT_MIRROR: env["OLD_SECONDARY"] = self.cfg.GetNodeName(instance.secondary_nodes[0]) env["NEW_SECONDARY"] = self.cfg.GetNodeName(source_node_uuid) else: env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None return env def BuildHooksNodes(self): """Build hooks nodes. """ instance = self._migrater.instance snode_uuids = list(instance.secondary_nodes) nl = [self.cfg.GetMasterNode(), instance.primary_node] + snode_uuids return (nl, nl) class TLMigrateInstance(Tasklet): """Tasklet class for instance migration. @type live: boolean @ivar live: whether the migration will be done live or non-live; this variable is initalized only after CheckPrereq has run @type cleanup: boolean @ivar cleanup: Wheater we cleanup from a failed migration @type iallocator: string @ivar iallocator: The iallocator used to determine target_node @type target_node_uuid: string @ivar target_node_uuid: If given, the target node UUID to reallocate the instance to @type failover: boolean @ivar failover: Whether operation results in failover or migration @type fallback: boolean @ivar fallback: Whether fallback to failover is allowed if migration not possible @type ignore_consistency: boolean @ivar ignore_consistency: Wheter we should ignore consistency between source and target node @type shutdown_timeout: int @ivar shutdown_timeout: In case of failover timeout of the shutdown @type ignore_ipolicy: bool @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating """ # Constants _MIGRATION_POLL_INTERVAL = 1 # seconds _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds def __init__(self, lu, instance_uuid, instance_name, cleanup, failover, fallback, ignore_consistency, allow_runtime_changes, shutdown_timeout, ignore_ipolicy): """Initializes this class. """ Tasklet.__init__(self, lu) # Parameters self.instance_uuid = instance_uuid self.instance_name = instance_name self.cleanup = cleanup self.live = False # will be overridden later self.failover = failover self.fallback = fallback self.ignore_consistency = ignore_consistency self.shutdown_timeout = shutdown_timeout self.ignore_ipolicy = ignore_ipolicy self.allow_runtime_changes = allow_runtime_changes def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster. """ (self.instance_uuid, self.instance_name) = \ ExpandInstanceUuidAndName(self.lu.cfg, self.instance_uuid, self.instance_name) self.instance = self.cfg.GetInstanceInfo(self.instance_uuid) assert self.instance is not None cluster = self.cfg.GetClusterInfo() if (not self.cleanup and not self.instance.admin_state == constants.ADMINST_UP and not self.failover and self.fallback): self.lu.LogInfo("Instance is marked down or offline, fallback allowed," " switching to failover") self.failover = True if self.instance.disk_template not in constants.DTS_MIRRORED: if self.failover: text = "failovers" else: text = "migrations" raise errors.OpPrereqError("Instance's disk layout '%s' does not allow" " %s" % (self.instance.disk_template, text), errors.ECODE_STATE) if self.instance.disk_template in constants.DTS_EXT_MIRROR: CheckIAllocatorOrNode(self.lu, "iallocator", "target_node") if self.lu.op.iallocator: assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC) self._RunAllocator() else: # We set set self.target_node_uuid as it is required by # BuildHooksEnv self.target_node_uuid = self.lu.op.target_node_uuid # Check that the target node is correct in terms of instance policy nodeinfo = self.cfg.GetNodeInfo(self.target_node_uuid) group_info = self.cfg.GetNodeGroup(nodeinfo.group) ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info) CheckTargetNodeIPolicy(self.lu, ipolicy, self.instance, nodeinfo, self.cfg, ignore=self.ignore_ipolicy) # self.target_node is already populated, either directly or by the # iallocator run target_node_uuid = self.target_node_uuid if self.target_node_uuid == self.instance.primary_node: raise errors.OpPrereqError( "Cannot migrate instance %s to its primary (%s)" % (self.instance.name, self.cfg.GetNodeName(self.instance.primary_node)), errors.ECODE_STATE) if len(self.lu.tasklets) == 1: # It is safe to release locks only when we're the only tasklet # in the LU ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=[self.instance.primary_node, self.target_node_uuid]) ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC) else: assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC) secondary_node_uuids = self.instance.secondary_nodes if not secondary_node_uuids: raise errors.ConfigurationError("No secondary node but using" " %s disk template" % self.instance.disk_template) target_node_uuid = secondary_node_uuids[0] if self.lu.op.iallocator or \ (self.lu.op.target_node_uuid and self.lu.op.target_node_uuid != target_node_uuid): if self.failover: text = "failed over" else: text = "migrated" raise errors.OpPrereqError("Instances with disk template %s cannot" " be %s to arbitrary nodes" " (neither an iallocator nor a target" " node can be passed)" % (self.instance.disk_template, text), errors.ECODE_INVAL) nodeinfo = self.cfg.GetNodeInfo(target_node_uuid) group_info = self.cfg.GetNodeGroup(nodeinfo.group) ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info) CheckTargetNodeIPolicy(self.lu, ipolicy, self.instance, nodeinfo, self.cfg, ignore=self.ignore_ipolicy) i_be = cluster.FillBE(self.instance) # check memory requirements on the secondary node if (not self.cleanup and (not self.failover or self.instance.admin_state == constants.ADMINST_UP)): self.tgt_free_mem = CheckNodeFreeMemory( self.lu, target_node_uuid, "migrating instance %s" % self.instance.name, i_be[constants.BE_MINMEM], self.instance.hypervisor, self.cfg.GetClusterInfo().hvparams[self.instance.hypervisor]) else: self.lu.LogInfo("Not checking memory on the secondary node as" " instance will not be started") # check if failover must be forced instead of migration if (not self.cleanup and not self.failover and i_be[constants.BE_ALWAYS_FAILOVER]): self.lu.LogInfo("Instance configured to always failover; fallback" " to failover") self.failover = True # check bridge existance CheckInstanceBridgesExist(self.lu, self.instance, node_uuid=target_node_uuid) if not self.cleanup: CheckNodeNotDrained(self.lu, target_node_uuid) if not self.failover: result = self.rpc.call_instance_migratable(self.instance.primary_node, self.instance) if result.fail_msg and self.fallback: self.lu.LogInfo("Can't migrate, instance offline, fallback to" " failover") self.failover = True else: result.Raise("Can't migrate, please use failover", prereq=True, ecode=errors.ECODE_STATE) assert not (self.failover and self.cleanup) if not self.failover: if self.lu.op.live is not None and self.lu.op.mode is not None: raise errors.OpPrereqError("Only one of the 'live' and 'mode'" " parameters are accepted", errors.ECODE_INVAL) if self.lu.op.live is not None: if self.lu.op.live: self.lu.op.mode = constants.HT_MIGRATION_LIVE else: self.lu.op.mode = constants.HT_MIGRATION_NONLIVE # reset the 'live' parameter to None so that repeated # invocations of CheckPrereq do not raise an exception self.lu.op.live = None elif self.lu.op.mode is None: # read the default value from the hypervisor i_hv = cluster.FillHV(self.instance, skip_globals=False) self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE] self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE else: # Failover is never live self.live = False if not (self.failover or self.cleanup): remote_info = self.rpc.call_instance_info( self.instance.primary_node, self.instance.name, self.instance.hypervisor, cluster.hvparams[self.instance.hypervisor]) remote_info.Raise("Error checking instance on node %s" % self.cfg.GetNodeName(self.instance.primary_node)) instance_running = bool(remote_info.payload) if instance_running: self.current_mem = int(remote_info.payload["memory"]) def _RunAllocator(self): """Run the allocator based on input opcode. """ assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC) # FIXME: add a self.ignore_ipolicy option req = iallocator.IAReqRelocate( inst_uuid=self.instance_uuid, relocate_from_node_uuids=[self.instance.primary_node]) ial = iallocator.IAllocator(self.cfg, self.rpc, req) ial.Run(self.lu.op.iallocator) if not ial.success: raise errors.OpPrereqError("Can't compute nodes using" " iallocator '%s': %s" % (self.lu.op.iallocator, ial.info), errors.ECODE_NORES) self.target_node_uuid = self.cfg.GetNodeInfoByName(ial.result[0]).uuid self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s", self.instance_name, self.lu.op.iallocator, utils.CommaJoin(ial.result)) def _WaitUntilSync(self): """Poll with custom rpc for disk sync. This uses our own step-based rpc call. """ self.feedback_fn("* wait until resync is done") all_done = False while not all_done: all_done = True result = self.rpc.call_drbd_wait_sync(self.all_node_uuids, self.nodes_ip, (self.instance.disks, self.instance)) min_percent = 100 for node_uuid, nres in result.items(): nres.Raise("Cannot resync disks on node %s" % self.cfg.GetNodeName(node_uuid)) node_done, node_percent = nres.payload all_done = all_done and node_done if node_percent is not None: min_percent = min(min_percent, node_percent) if not all_done: if min_percent < 100: self.feedback_fn(" - progress: %.1f%%" % min_percent) time.sleep(2) def _EnsureSecondary(self, node_uuid): """Demote a node to secondary. """ self.feedback_fn("* switching node %s to secondary mode" % self.cfg.GetNodeName(node_uuid)) for dev in self.instance.disks: self.cfg.SetDiskID(dev, node_uuid) result = self.rpc.call_blockdev_close(node_uuid, self.instance.name, self.instance.disks) result.Raise("Cannot change disk to secondary on node %s" % self.cfg.GetNodeName(node_uuid)) def _GoStandalone(self): """Disconnect from the network. """ self.feedback_fn("* changing into standalone mode") result = self.rpc.call_drbd_disconnect_net(self.all_node_uuids, self.nodes_ip, self.instance.disks) for node_uuid, nres in result.items(): nres.Raise("Cannot disconnect disks node %s" % self.cfg.GetNodeName(node_uuid)) def _GoReconnect(self, multimaster): """Reconnect to the network. """ if multimaster: msg = "dual-master" else: msg = "single-master" self.feedback_fn("* changing disks into %s mode" % msg) result = self.rpc.call_drbd_attach_net(self.all_node_uuids, self.nodes_ip, (self.instance.disks, self.instance), self.instance.name, multimaster) for node_uuid, nres in result.items(): nres.Raise("Cannot change disks config on node %s" % self.cfg.GetNodeName(node_uuid)) def _ExecCleanup(self): """Try to cleanup after a failed migration. The cleanup is done by: - check that the instance is running only on one node (and update the config if needed) - change disks on its secondary node to secondary - wait until disks are fully synchronized - disconnect from the network - change disks into single-master mode - wait again until disks are fully synchronized """ # check running on only one node self.feedback_fn("* checking where the instance actually runs" " (if this hangs, the hypervisor might be in" " a bad state)") cluster_hvparams = self.cfg.GetClusterInfo().hvparams ins_l = self.rpc.call_instance_list(self.all_node_uuids, [self.instance.hypervisor], cluster_hvparams) for node_uuid, result in ins_l.items(): result.Raise("Can't contact node %s" % node_uuid) runningon_source = self.instance.name in \ ins_l[self.source_node_uuid].payload runningon_target = self.instance.name in \ ins_l[self.target_node_uuid].payload if runningon_source and runningon_target: raise errors.OpExecError("Instance seems to be running on two nodes," " or the hypervisor is confused; you will have" " to ensure manually that it runs only on one" " and restart this operation") if not (runningon_source or runningon_target): raise errors.OpExecError("Instance does not seem to be running at all;" " in this case it's safer to repair by" " running 'gnt-instance stop' to ensure disk" " shutdown, and then restarting it") if runningon_target: # the migration has actually succeeded, we need to update the config self.feedback_fn("* instance running on secondary node (%s)," " updating config" % self.cfg.GetNodeName(self.target_node_uuid)) self.instance.primary_node = self.target_node_uuid self.cfg.Update(self.instance, self.feedback_fn) demoted_node_uuid = self.source_node_uuid else: self.feedback_fn("* instance confirmed to be running on its" " primary node (%s)" % self.cfg.GetNodeName(self.source_node_uuid)) demoted_node_uuid = self.target_node_uuid if self.instance.disk_template in constants.DTS_INT_MIRROR: self._EnsureSecondary(demoted_node_uuid) try: self._WaitUntilSync() except errors.OpExecError: # we ignore here errors, since if the device is standalone, it # won't be able to sync pass self._GoStandalone() self._GoReconnect(False) self._WaitUntilSync() self.feedback_fn("* done") def _RevertDiskStatus(self): """Try to revert the disk status after a failed migration. """ if self.instance.disk_template in constants.DTS_EXT_MIRROR: return try: self._EnsureSecondary(self.target_node_uuid) self._GoStandalone() self._GoReconnect(False) self._WaitUntilSync() except errors.OpExecError, err: self.lu.LogWarning("Migration failed and I can't reconnect the drives," " please try to recover the instance manually;" " error '%s'" % str(err)) def _AbortMigration(self): """Call the hypervisor code to abort a started migration. """ abort_result = self.rpc.call_instance_finalize_migration_dst( self.target_node_uuid, self.instance, self.migration_info, False) abort_msg = abort_result.fail_msg if abort_msg: logging.error("Aborting migration failed on target node %s: %s", self.cfg.GetNodeName(self.target_node_uuid), abort_msg) # Don't raise an exception here, as we stil have to try to revert the # disk status, even if this step failed. abort_result = self.rpc.call_instance_finalize_migration_src( self.source_node_uuid, self.instance, False, self.live) abort_msg = abort_result.fail_msg if abort_msg: logging.error("Aborting migration failed on source node %s: %s", self.cfg.GetNodeName(self.source_node_uuid), abort_msg) def _ExecMigration(self): """Migrate an instance. The migrate is done by: - change the disks into dual-master mode - wait until disks are fully synchronized again - migrate the instance - change disks on the new secondary node (the old primary) to secondary - wait until disks are fully synchronized - change disks into single-master mode """ # Check for hypervisor version mismatch and warn the user. hvspecs = [(self.instance.hypervisor, self.cfg.GetClusterInfo().hvparams[self.instance.hypervisor])] nodeinfo = self.rpc.call_node_info( [self.source_node_uuid, self.target_node_uuid], None, hvspecs) for ninfo in nodeinfo.values(): ninfo.Raise("Unable to retrieve node information from node '%s'" % ninfo.node) (_, _, (src_info, )) = nodeinfo[self.source_node_uuid].payload (_, _, (dst_info, )) = nodeinfo[self.target_node_uuid].payload if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and (constants.HV_NODEINFO_KEY_VERSION in dst_info)): src_version = src_info[constants.HV_NODEINFO_KEY_VERSION] dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION] if src_version != dst_version: self.feedback_fn("* warning: hypervisor version mismatch between" " source (%s) and target (%s) node" % (src_version, dst_version)) self.feedback_fn("* checking disk consistency between source and target") for (idx, dev) in enumerate(self.instance.disks): if not CheckDiskConsistency(self.lu, self.instance, dev, self.target_node_uuid, False): raise errors.OpExecError("Disk %s is degraded or not fully" " synchronized on target node," " aborting migration" % idx) if self.current_mem > self.tgt_free_mem: if not self.allow_runtime_changes: raise errors.OpExecError("Memory ballooning not allowed and not enough" " free memory to fit instance %s on target" " node %s (have %dMB, need %dMB)" % (self.instance.name, self.cfg.GetNodeName(self.target_node_uuid), self.tgt_free_mem, self.current_mem)) self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem) rpcres = self.rpc.call_instance_balloon_memory(self.instance.primary_node, self.instance, self.tgt_free_mem) rpcres.Raise("Cannot modify instance runtime memory") # First get the migration information from the remote node result = self.rpc.call_migration_info(self.source_node_uuid, self.instance) msg = result.fail_msg if msg: log_err = ("Failed fetching source migration information from %s: %s" % (self.cfg.GetNodeName(self.source_node_uuid), msg)) logging.error(log_err) raise errors.OpExecError(log_err) self.migration_info = migration_info = result.payload if self.instance.disk_template not in constants.DTS_EXT_MIRROR: # Then switch the disks to master/master mode self._EnsureSecondary(self.target_node_uuid) self._GoStandalone() self._GoReconnect(True) self._WaitUntilSync() self.feedback_fn("* preparing %s to accept the instance" % self.cfg.GetNodeName(self.target_node_uuid)) # This fills physical_id slot that may be missing on newly created disks for disk in self.instance.disks: self.cfg.SetDiskID(disk, self.target_node_uuid) result = self.rpc.call_accept_instance(self.target_node_uuid, self.instance, migration_info, self.nodes_ip[self.target_node_uuid]) msg = result.fail_msg if msg: logging.error("Instance pre-migration failed, trying to revert" " disk status: %s", msg) self.feedback_fn("Pre-migration failed, aborting") self._AbortMigration() self._RevertDiskStatus() raise errors.OpExecError("Could not pre-migrate instance %s: %s" % (self.instance.name, msg)) self.feedback_fn("* migrating instance to %s" % self.cfg.GetNodeName(self.target_node_uuid)) cluster = self.cfg.GetClusterInfo() result = self.rpc.call_instance_migrate( self.source_node_uuid, cluster.cluster_name, self.instance, self.nodes_ip[self.target_node_uuid], self.live) msg = result.fail_msg if msg: logging.error("Instance migration failed, trying to revert" " disk status: %s", msg) self.feedback_fn("Migration failed, aborting") self._AbortMigration() self._RevertDiskStatus() raise errors.OpExecError("Could not migrate instance %s: %s" % (self.instance.name, msg)) self.feedback_fn("* starting memory transfer") last_feedback = time.time() while True: result = self.rpc.call_instance_get_migration_status( self.source_node_uuid, self.instance) msg = result.fail_msg ms = result.payload # MigrationStatus instance if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES): logging.error("Instance migration failed, trying to revert" " disk status: %s", msg) self.feedback_fn("Migration failed, aborting") self._AbortMigration() self._RevertDiskStatus() if not msg: msg = "hypervisor returned failure" raise errors.OpExecError("Could not migrate instance %s: %s" % (self.instance.name, msg)) if result.payload.status != constants.HV_MIGRATION_ACTIVE: self.feedback_fn("* memory transfer complete") break if (utils.TimeoutExpired(last_feedback, self._MIGRATION_FEEDBACK_INTERVAL) and ms.transferred_ram is not None): mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram) self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress) last_feedback = time.time() time.sleep(self._MIGRATION_POLL_INTERVAL) result = self.rpc.call_instance_finalize_migration_src( self.source_node_uuid, self.instance, True, self.live) msg = result.fail_msg if msg: logging.error("Instance migration succeeded, but finalization failed" " on the source node: %s", msg) raise errors.OpExecError("Could not finalize instance migration: %s" % msg) self.instance.primary_node = self.target_node_uuid # distribute new instance config to the other nodes self.cfg.Update(self.instance, self.feedback_fn) result = self.rpc.call_instance_finalize_migration_dst( self.target_node_uuid, self.instance, migration_info, True) msg = result.fail_msg if msg: logging.error("Instance migration succeeded, but finalization failed" " on the target node: %s", msg) raise errors.OpExecError("Could not finalize instance migration: %s" % msg) if self.instance.disk_template not in constants.DTS_EXT_MIRROR: self._EnsureSecondary(self.source_node_uuid) self._WaitUntilSync() self._GoStandalone() self._GoReconnect(False) self._WaitUntilSync() # If the instance's disk template is `rbd' or `ext' and there was a # successful migration, unmap the device from the source node. if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT): disks = ExpandCheckDisks(self.instance, self.instance.disks) self.feedback_fn("* unmapping instance's disks from %s" % self.cfg.GetNodeName(self.source_node_uuid)) for disk in disks: result = self.rpc.call_blockdev_shutdown(self.source_node_uuid, (disk, self.instance)) msg = result.fail_msg if msg: logging.error("Migration was successful, but couldn't unmap the" " block device %s on source node %s: %s", disk.iv_name, self.cfg.GetNodeName(self.source_node_uuid), msg) logging.error("You need to unmap the device %s manually on %s", disk.iv_name, self.cfg.GetNodeName(self.source_node_uuid)) self.feedback_fn("* done") def _ExecFailover(self): """Failover an instance. The failover is done by shutting it down on its present node and starting it on the secondary. """ primary_node = self.cfg.GetNodeInfo(self.instance.primary_node) source_node_uuid = self.instance.primary_node if self.instance.disks_active: self.feedback_fn("* checking disk consistency between source and target") for (idx, dev) in enumerate(self.instance.disks): # for drbd, these are drbd over lvm if not CheckDiskConsistency(self.lu, self.instance, dev, self.target_node_uuid, False): if primary_node.offline: self.feedback_fn("Node %s is offline, ignoring degraded disk %s on" " target node %s" % (primary_node.name, idx, self.cfg.GetNodeName(self.target_node_uuid))) elif not self.ignore_consistency: raise errors.OpExecError("Disk %s is degraded on target node," " aborting failover" % idx) else: self.feedback_fn("* not checking disk consistency as instance is not" " running") self.feedback_fn("* shutting down instance on source node") logging.info("Shutting down instance %s on node %s", self.instance.name, self.cfg.GetNodeName(source_node_uuid)) result = self.rpc.call_instance_shutdown(source_node_uuid, self.instance, self.shutdown_timeout, self.lu.op.reason) msg = result.fail_msg if msg: if self.ignore_consistency or primary_node.offline: self.lu.LogWarning("Could not shutdown instance %s on node %s," " proceeding anyway; please make sure node" " %s is down; error details: %s", self.instance.name, self.cfg.GetNodeName(source_node_uuid), self.cfg.GetNodeName(source_node_uuid), msg) else: raise errors.OpExecError("Could not shutdown instance %s on" " node %s: %s" % (self.instance.name, self.cfg.GetNodeName(source_node_uuid), msg)) self.feedback_fn("* deactivating the instance's disks on source node") if not ShutdownInstanceDisks(self.lu, self.instance, ignore_primary=True): raise errors.OpExecError("Can't shut down the instance's disks") self.instance.primary_node = self.target_node_uuid # distribute new instance config to the other nodes self.cfg.Update(self.instance, self.feedback_fn) # Only start the instance if it's marked as up if self.instance.admin_state == constants.ADMINST_UP: self.feedback_fn("* activating the instance's disks on target node %s" % self.cfg.GetNodeName(self.target_node_uuid)) logging.info("Starting instance %s on node %s", self.instance.name, self.cfg.GetNodeName(self.target_node_uuid)) disks_ok, _ = AssembleInstanceDisks(self.lu, self.instance, ignore_secondaries=True) if not disks_ok: ShutdownInstanceDisks(self.lu, self.instance) raise errors.OpExecError("Can't activate the instance's disks") self.feedback_fn("* starting the instance on the target node %s" % self.cfg.GetNodeName(self.target_node_uuid)) result = self.rpc.call_instance_start(self.target_node_uuid, (self.instance, None, None), False, self.lu.op.reason) msg = result.fail_msg if msg: ShutdownInstanceDisks(self.lu, self.instance) raise errors.OpExecError("Could not start instance %s on node %s: %s" % (self.instance.name, self.cfg.GetNodeName(self.target_node_uuid), msg)) def Exec(self, feedback_fn): """Perform the migration. """ self.feedback_fn = feedback_fn self.source_node_uuid = self.instance.primary_node # FIXME: if we implement migrate-to-any in DRBD, this needs fixing if self.instance.disk_template in constants.DTS_INT_MIRROR: self.target_node_uuid = self.instance.secondary_nodes[0] # Otherwise self.target_node has been populated either # directly, or through an iallocator. self.all_node_uuids = [self.source_node_uuid, self.target_node_uuid] self.nodes_ip = dict((uuid, node.secondary_ip) for (uuid, node) in self.cfg.GetMultiNodeInfo(self.all_node_uuids)) if self.failover: feedback_fn("Failover instance %s" % self.instance.name) self._ExecFailover() else: feedback_fn("Migrating instance %s" % self.instance.name) if self.cleanup: return self._ExecCleanup() else: return self._ExecMigration() ganeti-2.9.3/lib/cmdlib/__init__.py0000644000000000000000000000627512271422343017115 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module implementing the master-side code. This file only imports all LU's (and other classes) in order to re-export them to clients of cmdlib. """ from ganeti.cmdlib.base import \ LogicalUnit, \ NoHooksLU, \ ResultWithJobs from ganeti.cmdlib.cluster import \ LUClusterActivateMasterIp, \ LUClusterDeactivateMasterIp, \ LUClusterConfigQuery, \ LUClusterDestroy, \ LUClusterPostInit, \ LUClusterQuery, \ LUClusterRedistConf, \ LUClusterRename, \ LUClusterRepairDiskSizes, \ LUClusterSetParams, \ LUClusterVerify, \ LUClusterVerifyConfig, \ LUClusterVerifyGroup, \ LUClusterVerifyDisks from ganeti.cmdlib.group import \ LUGroupAdd, \ LUGroupAssignNodes, \ LUGroupQuery, \ LUGroupSetParams, \ LUGroupRemove, \ LUGroupRename, \ LUGroupEvacuate, \ LUGroupVerifyDisks from ganeti.cmdlib.node import \ LUNodeAdd, \ LUNodeSetParams, \ LUNodePowercycle, \ LUNodeEvacuate, \ LUNodeMigrate, \ LUNodeModifyStorage, \ LUNodeQuery, \ LUNodeQueryvols, \ LUNodeQueryStorage, \ LUNodeRemove, \ LURepairNodeStorage from ganeti.cmdlib.instance import \ LUInstanceCreate, \ LUInstanceRename, \ LUInstanceRemove, \ LUInstanceMove, \ LUInstanceMultiAlloc, \ LUInstanceSetParams, \ LUInstanceChangeGroup from ganeti.cmdlib.instance_storage import \ LUInstanceRecreateDisks, \ LUInstanceGrowDisk, \ LUInstanceReplaceDisks, \ LUInstanceActivateDisks, \ LUInstanceDeactivateDisks from ganeti.cmdlib.instance_migration import \ LUInstanceFailover, \ LUInstanceMigrate from ganeti.cmdlib.instance_operation import \ LUInstanceStartup, \ LUInstanceShutdown, \ LUInstanceReinstall, \ LUInstanceReboot, \ LUInstanceConsole from ganeti.cmdlib.instance_query import \ LUInstanceQuery, \ LUInstanceQueryData from ganeti.cmdlib.backup import \ LUBackupQuery, \ LUBackupPrepare, \ LUBackupExport, \ LUBackupRemove from ganeti.cmdlib.query import \ LUQuery, \ LUQueryFields from ganeti.cmdlib.operating_system import \ LUOsDiagnose from ganeti.cmdlib.tags import \ LUTagsGet, \ LUTagsSearch, \ LUTagsSet, \ LUTagsDel from ganeti.cmdlib.network import \ LUNetworkAdd, \ LUNetworkRemove, \ LUNetworkSetParams, \ LUNetworkQuery, \ LUNetworkConnect, \ LUNetworkDisconnect from ganeti.cmdlib.misc import \ LUOobCommand, \ LUExtStorageDiagnose, \ LURestrictedCommand from ganeti.cmdlib.test import \ LUTestDelay, \ LUTestJqueue, \ LUTestAllocator ganeti-2.9.3/lib/cmdlib/operating_system.py0000644000000000000000000001425712267470014020754 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Logical units dealing with OS.""" from ganeti import compat from ganeti import locking from ganeti import qlang from ganeti import query from ganeti.cmdlib.base import QueryBase, NoHooksLU class OsQuery(QueryBase): FIELDS = query.OS_FIELDS def ExpandNames(self, lu): # Lock all nodes in shared mode # Temporary removal of locks, should be reverted later # TODO: reintroduce locks when they are lighter-weight lu.needed_locks = {} #self.share_locks[locking.LEVEL_NODE] = 1 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET # The following variables interact with _QueryBase._GetNames if self.names: self.wanted = self.names else: self.wanted = locking.ALL_SET self.do_locking = self.use_locking def DeclareLocks(self, lu, level): pass @staticmethod def _DiagnoseByOS(rlist): """Remaps a per-node return list into an a per-os per-node dictionary @param rlist: a map with node names as keys and OS objects as values @rtype: dict @return: a dictionary with osnames as keys and as value another map, with node UUIDs as keys and tuples of (path, status, diagnose, variants, parameters, api_versions) as values, eg:: {"debian-etch": {"node1-uuid": [(/usr/lib/..., True, "", [], []), (/srv/..., False, "invalid api")], "node2-uuid": [(/srv/..., True, "", [], [])]} } """ all_os = {} # we build here the list of nodes that didn't fail the RPC (at RPC # level), so that nodes with a non-responding node daemon don't # make all OSes invalid good_node_uuids = [node_uuid for node_uuid in rlist if not rlist[node_uuid].fail_msg] for node_uuid, nr in rlist.items(): if nr.fail_msg or not nr.payload: continue for (name, path, status, diagnose, variants, params, api_versions) in nr.payload: if name not in all_os: # build a list of nodes for this os containing empty lists # for each node in node_list all_os[name] = {} for nuuid in good_node_uuids: all_os[name][nuuid] = [] # convert params from [name, help] to (name, help) params = [tuple(v) for v in params] all_os[name][node_uuid].append((path, status, diagnose, variants, params, api_versions)) return all_os def _GetQueryData(self, lu): """Computes the list of nodes and their attributes. """ # Locking is not used assert not (compat.any(lu.glm.is_owned(level) for level in locking.LEVELS if level != locking.LEVEL_CLUSTER) or self.do_locking or self.use_locking) valid_node_uuids = [node.uuid for node in lu.cfg.GetAllNodesInfo().values() if not node.offline and node.vm_capable] pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_node_uuids)) cluster = lu.cfg.GetClusterInfo() data = {} for (os_name, os_data) in pol.items(): info = query.OsInfo(name=os_name, valid=True, node_status=os_data, hidden=(os_name in cluster.hidden_os), blacklisted=(os_name in cluster.blacklisted_os)) variants = set() parameters = set() api_versions = set() for idx, osl in enumerate(os_data.values()): info.valid = bool(info.valid and osl and osl[0][1]) if not info.valid: break (node_variants, node_params, node_api) = osl[0][3:6] if idx == 0: # First entry variants.update(node_variants) parameters.update(node_params) api_versions.update(node_api) else: # Filter out inconsistent values variants.intersection_update(node_variants) parameters.intersection_update(node_params) api_versions.intersection_update(node_api) info.variants = list(variants) info.parameters = list(parameters) info.api_versions = list(api_versions) data[os_name] = info # Prepare data in requested order return [data[name] for name in self._GetNames(lu, pol.keys(), None) if name in data] class LUOsDiagnose(NoHooksLU): """Logical unit for OS diagnose/query. """ REQ_BGL = False @staticmethod def _BuildFilter(fields, names): """Builds a filter for querying OSes. """ name_filter = qlang.MakeSimpleFilter("name", names) # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the # respective field is not requested status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]] for fname in ["hidden", "blacklisted"] if fname not in fields] if "valid" not in fields: status_filter.append([qlang.OP_TRUE, "valid"]) if status_filter: status_filter.insert(0, qlang.OP_AND) else: status_filter = None if name_filter and status_filter: return [qlang.OP_AND, name_filter, status_filter] elif name_filter: return name_filter else: return status_filter def CheckArguments(self): self.oq = OsQuery(self._BuildFilter(self.op.output_fields, self.op.names), self.op.output_fields, False) def ExpandNames(self): self.oq.ExpandNames(self) def Exec(self, feedback_fn): return self.oq.OldStyleQuery(self) ganeti-2.9.3/lib/cmdlib/query.py0000644000000000000000000000553312271422343016517 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Logical units for queries.""" from ganeti import constants from ganeti import errors from ganeti import query from ganeti.cmdlib.backup import ExportQuery from ganeti.cmdlib.base import NoHooksLU from ganeti.cmdlib.cluster import ClusterQuery from ganeti.cmdlib.group import GroupQuery from ganeti.cmdlib.instance_query import InstanceQuery from ganeti.cmdlib.misc import ExtStorageQuery from ganeti.cmdlib.network import NetworkQuery from ganeti.cmdlib.node import NodeQuery from ganeti.cmdlib.operating_system import OsQuery #: Query type implementations _QUERY_IMPL = { constants.QR_CLUSTER: ClusterQuery, constants.QR_INSTANCE: InstanceQuery, constants.QR_NODE: NodeQuery, constants.QR_GROUP: GroupQuery, constants.QR_NETWORK: NetworkQuery, constants.QR_OS: OsQuery, constants.QR_EXTSTORAGE: ExtStorageQuery, constants.QR_EXPORT: ExportQuery, } assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP def _GetQueryImplementation(name): """Returns the implemtnation for a query type. @param name: Query type, must be one of L{constants.QR_VIA_OP} """ try: return _QUERY_IMPL[name] except KeyError: raise errors.OpPrereqError("Unknown query resource '%s'" % name, errors.ECODE_INVAL) class LUQuery(NoHooksLU): """Query for resources/items of a certain kind. """ # pylint: disable=W0142 REQ_BGL = False def CheckArguments(self): qcls = _GetQueryImplementation(self.op.what) self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking) def ExpandNames(self): self.impl.ExpandNames(self) def DeclareLocks(self, level): self.impl.DeclareLocks(self, level) def Exec(self, feedback_fn): return self.impl.NewStyleQuery(self) class LUQueryFields(NoHooksLU): """Query for resources/items of a certain kind. """ # pylint: disable=W0142 REQ_BGL = False def CheckArguments(self): self.qcls = _GetQueryImplementation(self.op.what) def ExpandNames(self): self.needed_locks = {} def Exec(self, feedback_fn): return query.QueryFields(self.qcls.FIELDS, self.op.fields) ganeti-2.9.3/lib/cmdlib/group.py0000644000000000000000000010471412271422343016507 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Logical units dealing with node groups.""" import itertools import logging from ganeti import constants from ganeti import errors from ganeti import locking from ganeti import objects from ganeti import qlang from ganeti import query from ganeti import utils from ganeti.masterd import iallocator from ganeti.cmdlib.base import LogicalUnit, NoHooksLU, QueryBase, \ ResultWithJobs from ganeti.cmdlib.common import MergeAndVerifyHvState, \ MergeAndVerifyDiskState, GetWantedNodes, GetUpdatedParams, \ CheckNodeGroupInstances, GetUpdatedIPolicy, \ ComputeNewInstanceViolations, GetDefaultIAllocator, ShareAll, \ CheckInstancesNodeGroups, LoadNodeEvacResult, MapInstanceLvsToNodes, \ CheckIpolicyVsDiskTemplates import ganeti.masterd.instance class LUGroupAdd(LogicalUnit): """Logical unit for creating node groups. """ HPATH = "group-add" HTYPE = constants.HTYPE_GROUP REQ_BGL = False def ExpandNames(self): # We need the new group's UUID here so that we can create and acquire the # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup # that it should not check whether the UUID exists in the configuration. self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId()) self.needed_locks = {} self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid def _CheckIpolicy(self): """Checks the group's ipolicy for consistency and validity. """ if self.op.ipolicy: cluster = self.cfg.GetClusterInfo() full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy) try: objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False) except errors.ConfigurationError, err: raise errors.OpPrereqError("Invalid instance policy: %s" % err, errors.ECODE_INVAL) CheckIpolicyVsDiskTemplates(full_ipolicy, cluster.enabled_disk_templates) def CheckPrereq(self): """Check prerequisites. This checks that the given group name is not an existing node group already. """ try: existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name) except errors.OpPrereqError: pass else: raise errors.OpPrereqError("Desired group name '%s' already exists as a" " node group (UUID: %s)" % (self.op.group_name, existing_uuid), errors.ECODE_EXISTS) if self.op.ndparams: utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES) if self.op.hv_state: self.new_hv_state = MergeAndVerifyHvState(self.op.hv_state, None) else: self.new_hv_state = None if self.op.disk_state: self.new_disk_state = MergeAndVerifyDiskState(self.op.disk_state, None) else: self.new_disk_state = None if self.op.diskparams: for templ in constants.DISK_TEMPLATES: if templ in self.op.diskparams: utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES) self.new_diskparams = self.op.diskparams try: utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS) except errors.OpPrereqError, err: raise errors.OpPrereqError("While verify diskparams options: %s" % err, errors.ECODE_INVAL) else: self.new_diskparams = {} self._CheckIpolicy() def BuildHooksEnv(self): """Build hooks env. """ return { "GROUP_NAME": self.op.group_name, } def BuildHooksNodes(self): """Build hooks nodes. """ mn = self.cfg.GetMasterNode() return ([mn], [mn]) def Exec(self, feedback_fn): """Add the node group to the cluster. """ group_obj = objects.NodeGroup(name=self.op.group_name, members=[], uuid=self.group_uuid, alloc_policy=self.op.alloc_policy, ndparams=self.op.ndparams, diskparams=self.new_diskparams, ipolicy=self.op.ipolicy, hv_state_static=self.new_hv_state, disk_state_static=self.new_disk_state) self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False) del self.remove_locks[locking.LEVEL_NODEGROUP] class LUGroupAssignNodes(NoHooksLU): """Logical unit for assigning nodes to groups. """ REQ_BGL = False def ExpandNames(self): # These raise errors.OpPrereqError on their own: self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) (self.op.node_uuids, self.op.nodes) = GetWantedNodes(self, self.op.nodes) # We want to lock all the affected nodes and groups. We have readily # available the list of nodes, and the *destination* group. To gather the # list of "source" groups, we need to fetch node information later on. self.needed_locks = { locking.LEVEL_NODEGROUP: set([self.group_uuid]), locking.LEVEL_NODE: self.op.node_uuids, } def DeclareLocks(self, level): if level == locking.LEVEL_NODEGROUP: assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1 # Try to get all affected nodes' groups without having the group or node # lock yet. Needs verification later in the code flow. groups = self.cfg.GetNodeGroupsFromNodes(self.op.node_uuids) self.needed_locks[locking.LEVEL_NODEGROUP].update(groups) def CheckPrereq(self): """Check prerequisites. """ assert self.needed_locks[locking.LEVEL_NODEGROUP] assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) == frozenset(self.op.node_uuids)) expected_locks = (set([self.group_uuid]) | self.cfg.GetNodeGroupsFromNodes(self.op.node_uuids)) actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP) if actual_locks != expected_locks: raise errors.OpExecError("Nodes changed groups since locks were acquired," " current groups are '%s', used to be '%s'" % (utils.CommaJoin(expected_locks), utils.CommaJoin(actual_locks))) self.node_data = self.cfg.GetAllNodesInfo() self.group = self.cfg.GetNodeGroup(self.group_uuid) instance_data = self.cfg.GetAllInstancesInfo() if self.group is None: raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" % (self.op.group_name, self.group_uuid)) (new_splits, previous_splits) = \ self.CheckAssignmentForSplitInstances([(uuid, self.group_uuid) for uuid in self.op.node_uuids], self.node_data, instance_data) if new_splits: fmt_new_splits = utils.CommaJoin(utils.NiceSort( self.cfg.GetInstanceNames(new_splits))) if not self.op.force: raise errors.OpExecError("The following instances get split by this" " change and --force was not given: %s" % fmt_new_splits) else: self.LogWarning("This operation will split the following instances: %s", fmt_new_splits) if previous_splits: self.LogWarning("In addition, these already-split instances continue" " to be split across groups: %s", utils.CommaJoin(utils.NiceSort( self.cfg.GetInstanceNames(previous_splits)))) def Exec(self, feedback_fn): """Assign nodes to a new group. """ mods = [(node_uuid, self.group_uuid) for node_uuid in self.op.node_uuids] self.cfg.AssignGroupNodes(mods) @staticmethod def CheckAssignmentForSplitInstances(changes, node_data, instance_data): """Check for split instances after a node assignment. This method considers a series of node assignments as an atomic operation, and returns information about split instances after applying the set of changes. In particular, it returns information about newly split instances, and instances that were already split, and remain so after the change. Only instances whose disk template is listed in constants.DTS_INT_MIRROR are considered. @type changes: list of (node_uuid, new_group_uuid) pairs. @param changes: list of node assignments to consider. @param node_data: a dict with data for all nodes @param instance_data: a dict with all instances to consider @rtype: a two-tuple @return: a list of instances that were previously okay and result split as a consequence of this change, and a list of instances that were previously split and this change does not fix. """ changed_nodes = dict((uuid, group) for uuid, group in changes if node_data[uuid].group != group) all_split_instances = set() previously_split_instances = set() for inst in instance_data.values(): if inst.disk_template not in constants.DTS_INT_MIRROR: continue if len(set(node_data[node_uuid].group for node_uuid in inst.all_nodes)) > 1: previously_split_instances.add(inst.uuid) if len(set(changed_nodes.get(node_uuid, node_data[node_uuid].group) for node_uuid in inst.all_nodes)) > 1: all_split_instances.add(inst.uuid) return (list(all_split_instances - previously_split_instances), list(previously_split_instances & all_split_instances)) class GroupQuery(QueryBase): FIELDS = query.GROUP_FIELDS def ExpandNames(self, lu): lu.needed_locks = {} self._all_groups = lu.cfg.GetAllNodeGroupsInfo() self._cluster = lu.cfg.GetClusterInfo() name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values()) if not self.names: self.wanted = [name_to_uuid[name] for name in utils.NiceSort(name_to_uuid.keys())] else: # Accept names to be either names or UUIDs. missing = [] self.wanted = [] all_uuid = frozenset(self._all_groups.keys()) for name in self.names: if name in all_uuid: self.wanted.append(name) elif name in name_to_uuid: self.wanted.append(name_to_uuid[name]) else: missing.append(name) if missing: raise errors.OpPrereqError("Some groups do not exist: %s" % utils.CommaJoin(missing), errors.ECODE_NOENT) def DeclareLocks(self, lu, level): pass def _GetQueryData(self, lu): """Computes the list of node groups and their attributes. """ do_nodes = query.GQ_NODE in self.requested_data do_instances = query.GQ_INST in self.requested_data group_to_nodes = None group_to_instances = None # For GQ_NODE, we need to map group->[nodes], and group->[instances] for # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the # latter GetAllInstancesInfo() is not enough, for we have to go through # instance->node. Hence, we will need to process nodes even if we only need # instance information. if do_nodes or do_instances: all_nodes = lu.cfg.GetAllNodesInfo() group_to_nodes = dict((uuid, []) for uuid in self.wanted) node_to_group = {} for node in all_nodes.values(): if node.group in group_to_nodes: group_to_nodes[node.group].append(node.uuid) node_to_group[node.uuid] = node.group if do_instances: all_instances = lu.cfg.GetAllInstancesInfo() group_to_instances = dict((uuid, []) for uuid in self.wanted) for instance in all_instances.values(): node = instance.primary_node if node in node_to_group: group_to_instances[node_to_group[node]].append(instance.uuid) if not do_nodes: # Do not pass on node information if it was not requested. group_to_nodes = None return query.GroupQueryData(self._cluster, [self._all_groups[uuid] for uuid in self.wanted], group_to_nodes, group_to_instances, query.GQ_DISKPARAMS in self.requested_data) class LUGroupQuery(NoHooksLU): """Logical unit for querying node groups. """ REQ_BGL = False def CheckArguments(self): self.gq = GroupQuery(qlang.MakeSimpleFilter("name", self.op.names), self.op.output_fields, False) def ExpandNames(self): self.gq.ExpandNames(self) def DeclareLocks(self, level): self.gq.DeclareLocks(self, level) def Exec(self, feedback_fn): return self.gq.OldStyleQuery(self) class LUGroupSetParams(LogicalUnit): """Modifies the parameters of a node group. """ HPATH = "group-modify" HTYPE = constants.HTYPE_GROUP REQ_BGL = False def CheckArguments(self): all_changes = [ self.op.ndparams, self.op.diskparams, self.op.alloc_policy, self.op.hv_state, self.op.disk_state, self.op.ipolicy, ] if all_changes.count(None) == len(all_changes): raise errors.OpPrereqError("Please pass at least one modification", errors.ECODE_INVAL) def ExpandNames(self): # This raises errors.OpPrereqError on its own: self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) self.needed_locks = { locking.LEVEL_INSTANCE: [], locking.LEVEL_NODEGROUP: [self.group_uuid], } self.share_locks[locking.LEVEL_INSTANCE] = 1 def DeclareLocks(self, level): if level == locking.LEVEL_INSTANCE: assert not self.needed_locks[locking.LEVEL_INSTANCE] # Lock instances optimistically, needs verification once group lock has # been acquired self.needed_locks[locking.LEVEL_INSTANCE] = \ self.cfg.GetInstanceNames( self.cfg.GetNodeGroupInstances(self.group_uuid)) @staticmethod def _UpdateAndVerifyDiskParams(old, new): """Updates and verifies disk parameters. """ new_params = GetUpdatedParams(old, new) utils.ForceDictType(new_params, constants.DISK_DT_TYPES) return new_params def _CheckIpolicy(self, cluster, owned_instance_names): """Sanity checks for the ipolicy. @type cluster: C{objects.Cluster} @param cluster: the cluster's configuration @type owned_instance_names: list of string @param owned_instance_names: list of instances """ if self.op.ipolicy: self.new_ipolicy = GetUpdatedIPolicy(self.group.ipolicy, self.op.ipolicy, group_policy=True) new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy) CheckIpolicyVsDiskTemplates(new_ipolicy, cluster.enabled_disk_templates) instances = \ dict(self.cfg.GetMultiInstanceInfoByName(owned_instance_names)) gmi = ganeti.masterd.instance violations = \ ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster, self.group), new_ipolicy, instances.values(), self.cfg) if violations: self.LogWarning("After the ipolicy change the following instances" " violate them: %s", utils.CommaJoin(violations)) def CheckPrereq(self): """Check prerequisites. """ owned_instance_names = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) # Check if locked instances are still correct CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instance_names) self.group = self.cfg.GetNodeGroup(self.group_uuid) cluster = self.cfg.GetClusterInfo() if self.group is None: raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" % (self.op.group_name, self.group_uuid)) if self.op.ndparams: new_ndparams = GetUpdatedParams(self.group.ndparams, self.op.ndparams) utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES) self.new_ndparams = new_ndparams if self.op.diskparams: diskparams = self.group.diskparams uavdp = self._UpdateAndVerifyDiskParams # For each disktemplate subdict update and verify the values new_diskparams = dict((dt, uavdp(diskparams.get(dt, {}), self.op.diskparams[dt])) for dt in constants.DISK_TEMPLATES if dt in self.op.diskparams) # As we've all subdicts of diskparams ready, lets merge the actual # dict with all updated subdicts self.new_diskparams = objects.FillDict(diskparams, new_diskparams) try: utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS) except errors.OpPrereqError, err: raise errors.OpPrereqError("While verify diskparams options: %s" % err, errors.ECODE_INVAL) if self.op.hv_state: self.new_hv_state = MergeAndVerifyHvState(self.op.hv_state, self.group.hv_state_static) if self.op.disk_state: self.new_disk_state = \ MergeAndVerifyDiskState(self.op.disk_state, self.group.disk_state_static) self._CheckIpolicy(cluster, owned_instance_names) def BuildHooksEnv(self): """Build hooks env. """ return { "GROUP_NAME": self.op.group_name, "NEW_ALLOC_POLICY": self.op.alloc_policy, } def BuildHooksNodes(self): """Build hooks nodes. """ mn = self.cfg.GetMasterNode() return ([mn], [mn]) def Exec(self, feedback_fn): """Modifies the node group. """ result = [] if self.op.ndparams: self.group.ndparams = self.new_ndparams result.append(("ndparams", str(self.group.ndparams))) if self.op.diskparams: self.group.diskparams = self.new_diskparams result.append(("diskparams", str(self.group.diskparams))) if self.op.alloc_policy: self.group.alloc_policy = self.op.alloc_policy if self.op.hv_state: self.group.hv_state_static = self.new_hv_state if self.op.disk_state: self.group.disk_state_static = self.new_disk_state if self.op.ipolicy: self.group.ipolicy = self.new_ipolicy self.cfg.Update(self.group, feedback_fn) return result class LUGroupRemove(LogicalUnit): HPATH = "group-remove" HTYPE = constants.HTYPE_GROUP REQ_BGL = False def ExpandNames(self): # This will raises errors.OpPrereqError on its own: self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) self.needed_locks = { locking.LEVEL_NODEGROUP: [self.group_uuid], } def CheckPrereq(self): """Check prerequisites. This checks that the given group name exists as a node group, that is empty (i.e., contains no nodes), and that is not the last group of the cluster. """ # Verify that the group is empty. group_nodes = [node.uuid for node in self.cfg.GetAllNodesInfo().values() if node.group == self.group_uuid] if group_nodes: raise errors.OpPrereqError("Group '%s' not empty, has the following" " nodes: %s" % (self.op.group_name, utils.CommaJoin(utils.NiceSort(group_nodes))), errors.ECODE_STATE) # Verify the cluster would not be left group-less. if len(self.cfg.GetNodeGroupList()) == 1: raise errors.OpPrereqError("Group '%s' is the only group, cannot be" " removed" % self.op.group_name, errors.ECODE_STATE) def BuildHooksEnv(self): """Build hooks env. """ return { "GROUP_NAME": self.op.group_name, } def BuildHooksNodes(self): """Build hooks nodes. """ mn = self.cfg.GetMasterNode() return ([mn], [mn]) def Exec(self, feedback_fn): """Remove the node group. """ try: self.cfg.RemoveNodeGroup(self.group_uuid) except errors.ConfigurationError: raise errors.OpExecError("Group '%s' with UUID %s disappeared" % (self.op.group_name, self.group_uuid)) self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid class LUGroupRename(LogicalUnit): HPATH = "group-rename" HTYPE = constants.HTYPE_GROUP REQ_BGL = False def ExpandNames(self): # This raises errors.OpPrereqError on its own: self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) self.needed_locks = { locking.LEVEL_NODEGROUP: [self.group_uuid], } def CheckPrereq(self): """Check prerequisites. Ensures requested new name is not yet used. """ try: new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name) except errors.OpPrereqError: pass else: raise errors.OpPrereqError("Desired new name '%s' clashes with existing" " node group (UUID: %s)" % (self.op.new_name, new_name_uuid), errors.ECODE_EXISTS) def BuildHooksEnv(self): """Build hooks env. """ return { "OLD_NAME": self.op.group_name, "NEW_NAME": self.op.new_name, } def BuildHooksNodes(self): """Build hooks nodes. """ mn = self.cfg.GetMasterNode() all_nodes = self.cfg.GetAllNodesInfo() all_nodes.pop(mn, None) run_nodes = [mn] run_nodes.extend(node.uuid for node in all_nodes.values() if node.group == self.group_uuid) return (run_nodes, run_nodes) def Exec(self, feedback_fn): """Rename the node group. """ group = self.cfg.GetNodeGroup(self.group_uuid) if group is None: raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" % (self.op.group_name, self.group_uuid)) group.name = self.op.new_name self.cfg.Update(group, feedback_fn) return self.op.new_name class LUGroupEvacuate(LogicalUnit): HPATH = "group-evacuate" HTYPE = constants.HTYPE_GROUP REQ_BGL = False def ExpandNames(self): # This raises errors.OpPrereqError on its own: self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) if self.op.target_groups: self.req_target_uuids = map(self.cfg.LookupNodeGroup, self.op.target_groups) else: self.req_target_uuids = [] if self.group_uuid in self.req_target_uuids: raise errors.OpPrereqError("Group to be evacuated (%s) can not be used" " as a target group (targets are %s)" % (self.group_uuid, utils.CommaJoin(self.req_target_uuids)), errors.ECODE_INVAL) self.op.iallocator = GetDefaultIAllocator(self.cfg, self.op.iallocator) self.share_locks = ShareAll() self.needed_locks = { locking.LEVEL_INSTANCE: [], locking.LEVEL_NODEGROUP: [], locking.LEVEL_NODE: [], } def DeclareLocks(self, level): if level == locking.LEVEL_INSTANCE: assert not self.needed_locks[locking.LEVEL_INSTANCE] # Lock instances optimistically, needs verification once node and group # locks have been acquired self.needed_locks[locking.LEVEL_INSTANCE] = \ self.cfg.GetInstanceNames( self.cfg.GetNodeGroupInstances(self.group_uuid)) elif level == locking.LEVEL_NODEGROUP: assert not self.needed_locks[locking.LEVEL_NODEGROUP] if self.req_target_uuids: lock_groups = set([self.group_uuid] + self.req_target_uuids) # Lock all groups used by instances optimistically; this requires going # via the node before it's locked, requiring verification later on lock_groups.update(group_uuid for instance_name in self.owned_locks(locking.LEVEL_INSTANCE) for group_uuid in self.cfg.GetInstanceNodeGroups( self.cfg.GetInstanceInfoByName(instance_name) .uuid)) else: # No target groups, need to lock all of them lock_groups = locking.ALL_SET self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups elif level == locking.LEVEL_NODE: # This will only lock the nodes in the group to be evacuated which # contain actual instances self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND self._LockInstancesNodes() # Lock all nodes in group to be evacuated and target groups owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) assert self.group_uuid in owned_groups member_node_uuids = [node_uuid for group in owned_groups for node_uuid in self.cfg.GetNodeGroup(group).members] self.needed_locks[locking.LEVEL_NODE].extend(member_node_uuids) def CheckPrereq(self): owned_instance_names = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) owned_node_uuids = frozenset(self.owned_locks(locking.LEVEL_NODE)) assert owned_groups.issuperset(self.req_target_uuids) assert self.group_uuid in owned_groups # Check if locked instances are still correct CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instance_names) # Get instance information self.instances = \ dict(self.cfg.GetMultiInstanceInfoByName(owned_instance_names)) # Check if node groups for locked instances are still correct CheckInstancesNodeGroups(self.cfg, self.instances, owned_groups, owned_node_uuids, self.group_uuid) if self.req_target_uuids: # User requested specific target groups self.target_uuids = self.req_target_uuids else: # All groups except the one to be evacuated are potential targets self.target_uuids = [group_uuid for group_uuid in owned_groups if group_uuid != self.group_uuid] if not self.target_uuids: raise errors.OpPrereqError("There are no possible target groups", errors.ECODE_INVAL) def BuildHooksEnv(self): """Build hooks env. """ return { "GROUP_NAME": self.op.group_name, "TARGET_GROUPS": " ".join(self.target_uuids), } def BuildHooksNodes(self): """Build hooks nodes. """ mn = self.cfg.GetMasterNode() assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members return (run_nodes, run_nodes) def Exec(self, feedback_fn): inst_names = list(self.owned_locks(locking.LEVEL_INSTANCE)) assert self.group_uuid not in self.target_uuids req = iallocator.IAReqGroupChange(instances=inst_names, target_groups=self.target_uuids) ial = iallocator.IAllocator(self.cfg, self.rpc, req) ial.Run(self.op.iallocator) if not ial.success: raise errors.OpPrereqError("Can't compute group evacuation using" " iallocator '%s': %s" % (self.op.iallocator, ial.info), errors.ECODE_NORES) jobs = LoadNodeEvacResult(self, ial.result, self.op.early_release, False) self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s", len(jobs), self.op.group_name) return ResultWithJobs(jobs) class LUGroupVerifyDisks(NoHooksLU): """Verifies the status of all disks in a node group. """ REQ_BGL = False def ExpandNames(self): # Raises errors.OpPrereqError on its own if group can't be found self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) self.share_locks = ShareAll() self.needed_locks = { locking.LEVEL_INSTANCE: [], locking.LEVEL_NODEGROUP: [], locking.LEVEL_NODE: [], # This opcode is acquires all node locks in a group. LUClusterVerifyDisks # starts one instance of this opcode for every group, which means all # nodes will be locked for a short amount of time, so it's better to # acquire the node allocation lock as well. locking.LEVEL_NODE_ALLOC: locking.ALL_SET, } def DeclareLocks(self, level): if level == locking.LEVEL_INSTANCE: assert not self.needed_locks[locking.LEVEL_INSTANCE] # Lock instances optimistically, needs verification once node and group # locks have been acquired self.needed_locks[locking.LEVEL_INSTANCE] = \ self.cfg.GetInstanceNames( self.cfg.GetNodeGroupInstances(self.group_uuid)) elif level == locking.LEVEL_NODEGROUP: assert not self.needed_locks[locking.LEVEL_NODEGROUP] self.needed_locks[locking.LEVEL_NODEGROUP] = \ set([self.group_uuid] + # Lock all groups used by instances optimistically; this requires # going via the node before it's locked, requiring verification # later on [group_uuid for instance_name in self.owned_locks(locking.LEVEL_INSTANCE) for group_uuid in self.cfg.GetInstanceNodeGroups( self.cfg.GetInstanceInfoByName(instance_name).uuid)]) elif level == locking.LEVEL_NODE: # This will only lock the nodes in the group to be verified which contain # actual instances self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND self._LockInstancesNodes() # Lock all nodes in group to be verified assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) member_node_uuids = self.cfg.GetNodeGroup(self.group_uuid).members self.needed_locks[locking.LEVEL_NODE].extend(member_node_uuids) def CheckPrereq(self): owned_inst_names = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) owned_node_uuids = frozenset(self.owned_locks(locking.LEVEL_NODE)) assert self.group_uuid in owned_groups # Check if locked instances are still correct CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_inst_names) # Get instance information self.instances = dict(self.cfg.GetMultiInstanceInfoByName(owned_inst_names)) # Check if node groups for locked instances are still correct CheckInstancesNodeGroups(self.cfg, self.instances, owned_groups, owned_node_uuids, self.group_uuid) def _VerifyInstanceLvs(self, node_errors, offline_disk_instance_names, missing_disks): node_lv_to_inst = MapInstanceLvsToNodes( [inst for inst in self.instances.values() if inst.disks_active]) if node_lv_to_inst: node_uuids = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) & set(self.cfg.GetVmCapableNodeList())) node_lvs = self.rpc.call_lv_list(node_uuids, []) for (node_uuid, node_res) in node_lvs.items(): if node_res.offline: continue msg = node_res.fail_msg if msg: logging.warning("Error enumerating LVs on node %s: %s", self.cfg.GetNodeName(node_uuid), msg) node_errors[node_uuid] = msg continue for lv_name, (_, _, lv_online) in node_res.payload.items(): inst = node_lv_to_inst.pop((node_uuid, lv_name), None) if not lv_online and inst is not None: offline_disk_instance_names.add(inst.name) # any leftover items in nv_dict are missing LVs, let's arrange the data # better for key, inst in node_lv_to_inst.iteritems(): missing_disks.setdefault(inst.name, []).append(list(key)) def _VerifyDrbdStates(self, node_errors, offline_disk_instance_names): node_to_inst = {} for inst in self.instances.values(): if not inst.disks_active or inst.disk_template != constants.DT_DRBD8: continue for node_uuid in itertools.chain([inst.primary_node], inst.secondary_nodes): node_to_inst.setdefault(node_uuid, []).append(inst) nodes_ip = dict((uuid, node.secondary_ip) for (uuid, node) in self.cfg.GetMultiNodeInfo(node_to_inst.keys())) for (node_uuid, insts) in node_to_inst.items(): node_disks = [(inst.disks, inst) for inst in insts] node_res = self.rpc.call_drbd_needs_activation(node_uuid, nodes_ip, node_disks) msg = node_res.fail_msg if msg: logging.warning("Error getting DRBD status on node %s: %s", self.cfg.GetNodeName(node_uuid), msg) node_errors[node_uuid] = msg continue faulty_disk_uuids = set(node_res.payload) for inst in self.instances.values(): inst_disk_uuids = set([disk.uuid for disk in inst.disks]) if inst_disk_uuids.intersection(faulty_disk_uuids): offline_disk_instance_names.add(inst.name) def Exec(self, feedback_fn): """Verify integrity of cluster disks. @rtype: tuple of three items @return: a tuple of (dict of node-to-node_error, list of instances which need activate-disks, dict of instance: (node, volume) for missing volumes """ node_errors = {} offline_disk_instance_names = set() missing_disks = {} self._VerifyInstanceLvs(node_errors, offline_disk_instance_names, missing_disks) self._VerifyDrbdStates(node_errors, offline_disk_instance_names) return (node_errors, list(offline_disk_instance_names), missing_disks) ganeti-2.9.3/lib/cmdlib/backup.py0000644000000000000000000004502012271422343016612 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Logical units dealing with backup operations.""" import OpenSSL import logging from ganeti import compat from ganeti import constants from ganeti import errors from ganeti import locking from ganeti import masterd from ganeti import qlang from ganeti import query from ganeti import utils from ganeti.cmdlib.base import QueryBase, NoHooksLU, LogicalUnit from ganeti.cmdlib.common import GetWantedNodes, ShareAll, CheckNodeOnline, \ ExpandNodeUuidAndName from ganeti.cmdlib.instance_storage import StartInstanceDisks, \ ShutdownInstanceDisks from ganeti.cmdlib.instance_utils import GetClusterDomainSecret, \ BuildInstanceHookEnvByObject, CheckNodeNotDrained, RemoveInstance class ExportQuery(QueryBase): FIELDS = query.EXPORT_FIELDS #: The node name is not a unique key for this query SORT_FIELD = "node" def ExpandNames(self, lu): lu.needed_locks = {} # The following variables interact with _QueryBase._GetNames if self.names: (self.wanted, _) = GetWantedNodes(lu, self.names) else: self.wanted = locking.ALL_SET self.do_locking = self.use_locking if self.do_locking: lu.share_locks = ShareAll() lu.needed_locks = { locking.LEVEL_NODE: self.wanted, } if not self.names: lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET def DeclareLocks(self, lu, level): pass def _GetQueryData(self, lu): """Computes the list of nodes and their attributes. """ # Locking is not used # TODO assert not (compat.any(lu.glm.is_owned(level) for level in locking.LEVELS if level != locking.LEVEL_CLUSTER) or self.do_locking or self.use_locking) node_uuids = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE) result = [] for (node_uuid, nres) in lu.rpc.call_export_list(node_uuids).items(): if nres.fail_msg: result.append((node_uuid, None)) else: result.extend((node_uuid, expname) for expname in nres.payload) return result class LUBackupQuery(NoHooksLU): """Query the exports list """ REQ_BGL = False def CheckArguments(self): self.expq = ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes), ["node", "export"], self.op.use_locking) def ExpandNames(self): self.expq.ExpandNames(self) def DeclareLocks(self, level): self.expq.DeclareLocks(self, level) def Exec(self, feedback_fn): result = {} for (node, expname) in self.expq.OldStyleQuery(self): if expname is None: result[node] = False else: result.setdefault(node, []).append(expname) return result class LUBackupPrepare(NoHooksLU): """Prepares an instance for an export and returns useful information. """ REQ_BGL = False def ExpandNames(self): self._ExpandAndLockInstance() def CheckPrereq(self): """Check prerequisites. """ self.instance = self.cfg.GetInstanceInfoByName(self.op.instance_name) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name CheckNodeOnline(self, self.instance.primary_node) self._cds = GetClusterDomainSecret() def Exec(self, feedback_fn): """Prepares an instance for an export. """ if self.op.mode == constants.EXPORT_MODE_REMOTE: salt = utils.GenerateSecret(8) feedback_fn("Generating X509 certificate on %s" % self.cfg.GetNodeName(self.instance.primary_node)) result = self.rpc.call_x509_cert_create(self.instance.primary_node, constants.RIE_CERT_VALIDITY) result.Raise("Can't create X509 key and certificate on %s" % self.cfg.GetNodeName(result.node)) (name, cert_pem) = result.payload cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, cert_pem) return { "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds), "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt), salt), "x509_ca": utils.SignX509Certificate(cert, self._cds, salt), } return None class LUBackupExport(LogicalUnit): """Export an instance to an image in the cluster. """ HPATH = "instance-export" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False def CheckArguments(self): """Check the arguments. """ self.x509_key_name = self.op.x509_key_name self.dest_x509_ca_pem = self.op.destination_x509_ca if self.op.mode == constants.EXPORT_MODE_REMOTE: if not self.x509_key_name: raise errors.OpPrereqError("Missing X509 key name for encryption", errors.ECODE_INVAL) if not self.dest_x509_ca_pem: raise errors.OpPrereqError("Missing destination X509 CA", errors.ECODE_INVAL) def ExpandNames(self): self._ExpandAndLockInstance() # Lock all nodes for local exports if self.op.mode == constants.EXPORT_MODE_LOCAL: (self.op.target_node_uuid, self.op.target_node) = \ ExpandNodeUuidAndName(self.cfg, self.op.target_node_uuid, self.op.target_node) # FIXME: lock only instance primary and destination node # # Sad but true, for now we have do lock all nodes, as we don't know where # the previous export might be, and in this LU we search for it and # remove it from its current node. In the future we could fix this by: # - making a tasklet to search (share-lock all), then create the # new one, then one to remove, after # - removing the removal operation altogether self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET # Allocations should be stopped while this LU runs with node locks, but # it doesn't have to be exclusive self.share_locks[locking.LEVEL_NODE_ALLOC] = 1 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET def DeclareLocks(self, level): """Last minute lock declaration.""" # All nodes are locked anyway, so nothing to do here. def BuildHooksEnv(self): """Build hooks env. This will run on the master, primary node and target node. """ env = { "EXPORT_MODE": self.op.mode, "EXPORT_NODE": self.op.target_node, "EXPORT_DO_SHUTDOWN": self.op.shutdown, "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, # TODO: Generic function for boolean env variables "REMOVE_INSTANCE": str(bool(self.op.remove_instance)), } env.update(BuildInstanceHookEnvByObject(self, self.instance)) return env def BuildHooksNodes(self): """Build hooks nodes. """ nl = [self.cfg.GetMasterNode(), self.instance.primary_node] if self.op.mode == constants.EXPORT_MODE_LOCAL: nl.append(self.op.target_node_uuid) return (nl, nl) def CheckPrereq(self): """Check prerequisites. This checks that the instance and node names are valid. """ self.instance = self.cfg.GetInstanceInfoByName(self.op.instance_name) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name CheckNodeOnline(self, self.instance.primary_node) if (self.op.remove_instance and self.instance.admin_state == constants.ADMINST_UP and not self.op.shutdown): raise errors.OpPrereqError("Can not remove instance without shutting it" " down before", errors.ECODE_STATE) if self.op.mode == constants.EXPORT_MODE_LOCAL: self.dst_node = self.cfg.GetNodeInfo(self.op.target_node_uuid) assert self.dst_node is not None CheckNodeOnline(self, self.dst_node.uuid) CheckNodeNotDrained(self, self.dst_node.uuid) self._cds = None self.dest_disk_info = None self.dest_x509_ca = None elif self.op.mode == constants.EXPORT_MODE_REMOTE: self.dst_node = None if len(self.op.target_node) != len(self.instance.disks): raise errors.OpPrereqError(("Received destination information for %s" " disks, but instance %s has %s disks") % (len(self.op.target_node), self.op.instance_name, len(self.instance.disks)), errors.ECODE_INVAL) cds = GetClusterDomainSecret() # Check X509 key name try: (key_name, hmac_digest, hmac_salt) = self.x509_key_name except (TypeError, ValueError), err: raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err, errors.ECODE_INVAL) if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt): raise errors.OpPrereqError("HMAC for X509 key name is wrong", errors.ECODE_INVAL) # Load and verify CA try: (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds) except OpenSSL.crypto.Error, err: raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" % (err, ), errors.ECODE_INVAL) (errcode, msg) = utils.VerifyX509Certificate(cert, None, None) if errcode is not None: raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % (msg, ), errors.ECODE_INVAL) self.dest_x509_ca = cert # Verify target information disk_info = [] for idx, disk_data in enumerate(self.op.target_node): try: (host, port, magic) = \ masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data) except errors.GenericError, err: raise errors.OpPrereqError("Target info for disk %s: %s" % (idx, err), errors.ECODE_INVAL) disk_info.append((host, port, magic)) assert len(disk_info) == len(self.op.target_node) self.dest_disk_info = disk_info else: raise errors.ProgrammerError("Unhandled export mode %r" % self.op.mode) # instance disk type verification # TODO: Implement export support for file-based disks for disk in self.instance.disks: if disk.dev_type in [constants.DT_FILE, constants.DT_SHARED_FILE]: raise errors.OpPrereqError("Export not supported for instances with" " file-based disks", errors.ECODE_INVAL) def _CleanupExports(self, feedback_fn): """Removes exports of current instance from all other nodes. If an instance in a cluster with nodes A..D was exported to node C, its exports will be removed from the nodes A, B and D. """ assert self.op.mode != constants.EXPORT_MODE_REMOTE node_uuids = self.cfg.GetNodeList() node_uuids.remove(self.dst_node.uuid) # on one-node clusters nodelist will be empty after the removal # if we proceed the backup would be removed because OpBackupQuery # substitutes an empty list with the full cluster node list. iname = self.instance.name if node_uuids: feedback_fn("Removing old exports for instance %s" % iname) exportlist = self.rpc.call_export_list(node_uuids) for node_uuid in exportlist: if exportlist[node_uuid].fail_msg: continue if iname in exportlist[node_uuid].payload: msg = self.rpc.call_export_remove(node_uuid, iname).fail_msg if msg: self.LogWarning("Could not remove older export for instance %s" " on node %s: %s", iname, self.cfg.GetNodeName(node_uuid), msg) def Exec(self, feedback_fn): """Export an instance to an image in the cluster. """ assert self.op.mode in constants.EXPORT_MODES src_node_uuid = self.instance.primary_node if self.op.shutdown: # shutdown the instance, but not the disks feedback_fn("Shutting down instance %s" % self.instance.name) result = self.rpc.call_instance_shutdown(src_node_uuid, self.instance, self.op.shutdown_timeout, self.op.reason) # TODO: Maybe ignore failures if ignore_remove_failures is set result.Raise("Could not shutdown instance %s on" " node %s" % (self.instance.name, self.cfg.GetNodeName(src_node_uuid))) # set the disks ID correctly since call_instance_start needs the # correct drbd minor to create the symlinks for disk in self.instance.disks: self.cfg.SetDiskID(disk, src_node_uuid) activate_disks = not self.instance.disks_active if activate_disks: # Activate the instance disks if we'exporting a stopped instance feedback_fn("Activating disks for %s" % self.instance.name) StartInstanceDisks(self, self.instance, None) try: helper = masterd.instance.ExportInstanceHelper(self, feedback_fn, self.instance) helper.CreateSnapshots() try: if (self.op.shutdown and self.instance.admin_state == constants.ADMINST_UP and not self.op.remove_instance): assert not activate_disks feedback_fn("Starting instance %s" % self.instance.name) result = self.rpc.call_instance_start(src_node_uuid, (self.instance, None, None), False, self.op.reason) msg = result.fail_msg if msg: feedback_fn("Failed to start instance: %s" % msg) ShutdownInstanceDisks(self, self.instance) raise errors.OpExecError("Could not start instance: %s" % msg) if self.op.mode == constants.EXPORT_MODE_LOCAL: (fin_resu, dresults) = helper.LocalExport(self.dst_node) elif self.op.mode == constants.EXPORT_MODE_REMOTE: connect_timeout = constants.RIE_CONNECT_TIMEOUT timeouts = masterd.instance.ImportExportTimeouts(connect_timeout) (key_name, _, _) = self.x509_key_name dest_ca_pem = \ OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, self.dest_x509_ca) (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info, key_name, dest_ca_pem, timeouts) finally: helper.Cleanup() # Check for backwards compatibility assert len(dresults) == len(self.instance.disks) assert compat.all(isinstance(i, bool) for i in dresults), \ "Not all results are boolean: %r" % dresults finally: if activate_disks: feedback_fn("Deactivating disks for %s" % self.instance.name) ShutdownInstanceDisks(self, self.instance) if not (compat.all(dresults) and fin_resu): failures = [] if not fin_resu: failures.append("export finalization") if not compat.all(dresults): fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults) if not dsk) failures.append("disk export: disk(s) %s" % fdsk) raise errors.OpExecError("Export failed, errors in %s" % utils.CommaJoin(failures)) # At this point, the export was successful, we can cleanup/finish # Remove instance if requested if self.op.remove_instance: feedback_fn("Removing instance %s" % self.instance.name) RemoveInstance(self, feedback_fn, self.instance, self.op.ignore_remove_failures) if self.op.mode == constants.EXPORT_MODE_LOCAL: self._CleanupExports(feedback_fn) return fin_resu, dresults class LUBackupRemove(NoHooksLU): """Remove exports related to the named instance. """ REQ_BGL = False def ExpandNames(self): self.needed_locks = { # We need all nodes to be locked in order for RemoveExport to work, but # we don't need to lock the instance itself, as nothing will happen to it # (and we can remove exports also for a removed instance) locking.LEVEL_NODE: locking.ALL_SET, # Removing backups is quick, so blocking allocations is justified locking.LEVEL_NODE_ALLOC: locking.ALL_SET, } # Allocations should be stopped while this LU runs with node locks, but it # doesn't have to be exclusive self.share_locks[locking.LEVEL_NODE_ALLOC] = 1 def Exec(self, feedback_fn): """Remove any export. """ (_, inst_name) = self.cfg.ExpandInstanceName(self.op.instance_name) # If the instance was not found we'll try with the name that was passed in. # This will only work if it was an FQDN, though. fqdn_warn = False if not inst_name: fqdn_warn = True inst_name = self.op.instance_name locked_nodes = self.owned_locks(locking.LEVEL_NODE) exportlist = self.rpc.call_export_list(locked_nodes) found = False for node_uuid in exportlist: msg = exportlist[node_uuid].fail_msg if msg: self.LogWarning("Failed to query node %s (continuing): %s", self.cfg.GetNodeName(node_uuid), msg) continue if inst_name in exportlist[node_uuid].payload: found = True result = self.rpc.call_export_remove(node_uuid, inst_name) msg = result.fail_msg if msg: logging.error("Could not remove export for instance %s" " on node %s: %s", inst_name, self.cfg.GetNodeName(node_uuid), msg) if fqdn_warn and not found: feedback_fn("Export not found. If trying to remove an export belonging" " to a deleted instance please use its Fully Qualified" " Domain Name.") ganeti-2.9.3/lib/cmdlib/network.py0000644000000000000000000005507212271422343017046 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Logical units dealing with networks.""" from ganeti import constants from ganeti import errors from ganeti import locking from ganeti import network from ganeti import objects from ganeti import qlang from ganeti import query from ganeti import utils from ganeti.cmdlib.base import LogicalUnit, NoHooksLU, QueryBase from ganeti.cmdlib.common import ShareAll, CheckNodeGroupInstances def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6, mac_prefix, tags): """Builds network related env variables for hooks This builds the hook environment from individual variables. @type name: string @param name: the name of the network @type subnet: string @param subnet: the ipv4 subnet @type gateway: string @param gateway: the ipv4 gateway @type network6: string @param network6: the ipv6 subnet @type gateway6: string @param gateway6: the ipv6 gateway @type mac_prefix: string @param mac_prefix: the mac_prefix @type tags: list @param tags: the tags of the network """ env = {} if name: env["NETWORK_NAME"] = name if subnet: env["NETWORK_SUBNET"] = subnet if gateway: env["NETWORK_GATEWAY"] = gateway if network6: env["NETWORK_SUBNET6"] = network6 if gateway6: env["NETWORK_GATEWAY6"] = gateway6 if mac_prefix: env["NETWORK_MAC_PREFIX"] = mac_prefix if tags: env["NETWORK_TAGS"] = " ".join(tags) return env class LUNetworkAdd(LogicalUnit): """Logical unit for creating networks. """ HPATH = "network-add" HTYPE = constants.HTYPE_NETWORK REQ_BGL = False def BuildHooksNodes(self): """Build hooks nodes. """ mn = self.cfg.GetMasterNode() return ([mn], [mn]) def CheckArguments(self): if self.op.mac_prefix: self.op.mac_prefix = \ utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix) def ExpandNames(self): self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId()) if self.op.conflicts_check: self.share_locks[locking.LEVEL_NODE] = 1 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1 self.needed_locks = { locking.LEVEL_NODE: locking.ALL_SET, locking.LEVEL_NODE_ALLOC: locking.ALL_SET, } else: self.needed_locks = {} self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid def CheckPrereq(self): if self.op.network is None: raise errors.OpPrereqError("Network must be given", errors.ECODE_INVAL) try: existing_uuid = self.cfg.LookupNetwork(self.op.network_name) except errors.OpPrereqError: pass else: raise errors.OpPrereqError("Desired network name '%s' already exists as a" " network (UUID: %s)" % (self.op.network_name, existing_uuid), errors.ECODE_EXISTS) # Check tag validity for tag in self.op.tags: objects.TaggableObject.ValidateTag(tag) def BuildHooksEnv(self): """Build hooks env. """ args = { "name": self.op.network_name, "subnet": self.op.network, "gateway": self.op.gateway, "network6": self.op.network6, "gateway6": self.op.gateway6, "mac_prefix": self.op.mac_prefix, "tags": self.op.tags, } return _BuildNetworkHookEnv(**args) # pylint: disable=W0142 def Exec(self, feedback_fn): """Add the ip pool to the cluster. """ nobj = objects.Network(name=self.op.network_name, network=self.op.network, gateway=self.op.gateway, network6=self.op.network6, gateway6=self.op.gateway6, mac_prefix=self.op.mac_prefix, uuid=self.network_uuid) # Initialize the associated address pool try: pool = network.AddressPool.InitializeNetwork(nobj) except errors.AddressPoolError, err: raise errors.OpExecError("Cannot create IP address pool for network" " '%s': %s" % (self.op.network_name, err)) # Check if we need to reserve the nodes and the cluster master IP # These may not be allocated to any instances in routed mode, as # they wouldn't function anyway. if self.op.conflicts_check: for node in self.cfg.GetAllNodesInfo().values(): for ip in [node.primary_ip, node.secondary_ip]: try: if pool.Contains(ip): pool.Reserve(ip) self.LogInfo("Reserved IP address of node '%s' (%s)", node.name, ip) except errors.AddressPoolError, err: self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s", ip, node.name, err) master_ip = self.cfg.GetClusterInfo().master_ip try: if pool.Contains(master_ip): pool.Reserve(master_ip) self.LogInfo("Reserved cluster master IP address (%s)", master_ip) except errors.AddressPoolError, err: self.LogWarning("Cannot reserve cluster master IP address (%s): %s", master_ip, err) if self.op.add_reserved_ips: for ip in self.op.add_reserved_ips: try: pool.Reserve(ip, external=True) except errors.AddressPoolError, err: raise errors.OpExecError("Cannot reserve IP address '%s': %s" % (ip, err)) if self.op.tags: for tag in self.op.tags: nobj.AddTag(tag) self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False) del self.remove_locks[locking.LEVEL_NETWORK] class LUNetworkRemove(LogicalUnit): HPATH = "network-remove" HTYPE = constants.HTYPE_NETWORK REQ_BGL = False def ExpandNames(self): self.network_uuid = self.cfg.LookupNetwork(self.op.network_name) self.share_locks[locking.LEVEL_NODEGROUP] = 1 self.needed_locks = { locking.LEVEL_NETWORK: [self.network_uuid], locking.LEVEL_NODEGROUP: locking.ALL_SET, } def CheckPrereq(self): """Check prerequisites. This checks that the given network name exists as a network, that is empty (i.e., contains no nodes), and that is not the last group of the cluster. """ # Verify that the network is not conncted. node_groups = [group.name for group in self.cfg.GetAllNodeGroupsInfo().values() if self.network_uuid in group.networks] if node_groups: self.LogWarning("Network '%s' is connected to the following" " node groups: %s" % (self.op.network_name, utils.CommaJoin(utils.NiceSort(node_groups)))) raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE) def BuildHooksEnv(self): """Build hooks env. """ return { "NETWORK_NAME": self.op.network_name, } def BuildHooksNodes(self): """Build hooks nodes. """ mn = self.cfg.GetMasterNode() return ([mn], [mn]) def Exec(self, feedback_fn): """Remove the network. """ try: self.cfg.RemoveNetwork(self.network_uuid) except errors.ConfigurationError: raise errors.OpExecError("Network '%s' with UUID %s disappeared" % (self.op.network_name, self.network_uuid)) class LUNetworkSetParams(LogicalUnit): """Modifies the parameters of a network. """ HPATH = "network-modify" HTYPE = constants.HTYPE_NETWORK REQ_BGL = False def CheckArguments(self): if (self.op.gateway and (self.op.add_reserved_ips or self.op.remove_reserved_ips)): raise errors.OpPrereqError("Cannot modify gateway and reserved ips" " at once", errors.ECODE_INVAL) def ExpandNames(self): self.network_uuid = self.cfg.LookupNetwork(self.op.network_name) self.needed_locks = { locking.LEVEL_NETWORK: [self.network_uuid], } def CheckPrereq(self): """Check prerequisites. """ self.network = self.cfg.GetNetwork(self.network_uuid) self.gateway = self.network.gateway self.mac_prefix = self.network.mac_prefix self.network6 = self.network.network6 self.gateway6 = self.network.gateway6 self.tags = self.network.tags self.pool = network.AddressPool(self.network) if self.op.gateway: if self.op.gateway == constants.VALUE_NONE: self.gateway = None else: self.gateway = self.op.gateway if self.pool.IsReserved(self.gateway): raise errors.OpPrereqError("Gateway IP address '%s' is already" " reserved" % self.gateway, errors.ECODE_STATE) if self.op.mac_prefix: if self.op.mac_prefix == constants.VALUE_NONE: self.mac_prefix = None else: self.mac_prefix = \ utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix) if self.op.gateway6: if self.op.gateway6 == constants.VALUE_NONE: self.gateway6 = None else: self.gateway6 = self.op.gateway6 if self.op.network6: if self.op.network6 == constants.VALUE_NONE: self.network6 = None else: self.network6 = self.op.network6 def BuildHooksEnv(self): """Build hooks env. """ args = { "name": self.op.network_name, "subnet": self.network.network, "gateway": self.gateway, "network6": self.network6, "gateway6": self.gateway6, "mac_prefix": self.mac_prefix, "tags": self.tags, } return _BuildNetworkHookEnv(**args) # pylint: disable=W0142 def BuildHooksNodes(self): """Build hooks nodes. """ mn = self.cfg.GetMasterNode() return ([mn], [mn]) def Exec(self, feedback_fn): """Modifies the network. """ #TODO: reserve/release via temporary reservation manager # extend cfg.ReserveIp/ReleaseIp with the external flag if self.op.gateway: if self.gateway == self.network.gateway: self.LogWarning("Gateway is already %s", self.gateway) else: if self.gateway: self.pool.Reserve(self.gateway, external=True) if self.network.gateway: self.pool.Release(self.network.gateway, external=True) self.network.gateway = self.gateway if self.op.add_reserved_ips: for ip in self.op.add_reserved_ips: try: if self.pool.IsReserved(ip): self.LogWarning("IP address %s is already reserved", ip) else: self.pool.Reserve(ip, external=True) except errors.AddressPoolError, err: self.LogWarning("Cannot reserve IP address %s: %s", ip, err) if self.op.remove_reserved_ips: for ip in self.op.remove_reserved_ips: if ip == self.network.gateway: self.LogWarning("Cannot unreserve Gateway's IP") continue try: if not self.pool.IsReserved(ip): self.LogWarning("IP address %s is already unreserved", ip) else: self.pool.Release(ip, external=True) except errors.AddressPoolError, err: self.LogWarning("Cannot release IP address %s: %s", ip, err) if self.op.mac_prefix: self.network.mac_prefix = self.mac_prefix if self.op.network6: self.network.network6 = self.network6 if self.op.gateway6: self.network.gateway6 = self.gateway6 self.pool.Validate() self.cfg.Update(self.network, feedback_fn) class NetworkQuery(QueryBase): FIELDS = query.NETWORK_FIELDS def ExpandNames(self, lu): lu.needed_locks = {} lu.share_locks = ShareAll() self.do_locking = self.use_locking all_networks = lu.cfg.GetAllNetworksInfo() name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values()) if self.names: missing = [] self.wanted = [] for name in self.names: if name in name_to_uuid: self.wanted.append(name_to_uuid[name]) else: missing.append(name) if missing: raise errors.OpPrereqError("Some networks do not exist: %s" % missing, errors.ECODE_NOENT) else: self.wanted = locking.ALL_SET if self.do_locking: lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted if query.NETQ_INST in self.requested_data: lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET if query.NETQ_GROUP in self.requested_data: lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET def DeclareLocks(self, lu, level): pass def _GetQueryData(self, lu): """Computes the list of networks and their attributes. """ all_networks = lu.cfg.GetAllNetworksInfo() network_uuids = self._GetNames(lu, all_networks.keys(), locking.LEVEL_NETWORK) do_instances = query.NETQ_INST in self.requested_data do_groups = query.NETQ_GROUP in self.requested_data network_to_instances = None network_to_groups = None # For NETQ_GROUP, we need to map network->[groups] if do_groups: all_groups = lu.cfg.GetAllNodeGroupsInfo() network_to_groups = dict((uuid, []) for uuid in network_uuids) for _, group in all_groups.iteritems(): for net_uuid in network_uuids: netparams = group.networks.get(net_uuid, None) if netparams: info = (group.name, netparams[constants.NIC_MODE], netparams[constants.NIC_LINK]) network_to_groups[net_uuid].append(info) if do_instances: all_instances = lu.cfg.GetAllInstancesInfo() network_to_instances = dict((uuid, []) for uuid in network_uuids) for instance in all_instances.values(): for nic in instance.nics: if nic.network in network_uuids: network_to_instances[nic.network].append(instance.uuid) break if query.NETQ_STATS in self.requested_data: stats = \ dict((uuid, self._GetStats(network.AddressPool(all_networks[uuid]))) for uuid in network_uuids) else: stats = None return query.NetworkQueryData([all_networks[uuid] for uuid in network_uuids], network_to_groups, network_to_instances, stats) @staticmethod def _GetStats(pool): """Returns statistics for a network address pool. """ return { "free_count": pool.GetFreeCount(), "reserved_count": pool.GetReservedCount(), "map": pool.GetMap(), "external_reservations": utils.CommaJoin(pool.GetExternalReservations()), } class LUNetworkQuery(NoHooksLU): """Logical unit for querying networks. """ REQ_BGL = False def CheckArguments(self): self.nq = NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names), self.op.output_fields, self.op.use_locking) def ExpandNames(self): self.nq.ExpandNames(self) def Exec(self, feedback_fn): return self.nq.OldStyleQuery(self) def _FmtNetworkConflict(details): """Utility for L{_NetworkConflictCheck}. """ return utils.CommaJoin("nic%s/%s" % (idx, ipaddr) for (idx, ipaddr) in details) def _NetworkConflictCheck(lu, check_fn, action, instances): """Checks for network interface conflicts with a network. @type lu: L{LogicalUnit} @type check_fn: callable receiving one parameter (L{objects.NIC}) and returning boolean @param check_fn: Function checking for conflict @type action: string @param action: Part of error message (see code) @param instances: the instances to check @type instances: list of instance objects @raise errors.OpPrereqError: If conflicting IP addresses are found. """ conflicts = [] for instance in instances: instconflicts = [(idx, nic.ip) for (idx, nic) in enumerate(instance.nics) if check_fn(nic)] if instconflicts: conflicts.append((instance.name, instconflicts)) if conflicts: lu.LogWarning("IP addresses from network '%s', which is about to %s" " node group '%s', are in use: %s" % (lu.network_name, action, lu.group.name, utils.CommaJoin(("%s: %s" % (name, _FmtNetworkConflict(details))) for (name, details) in conflicts))) raise errors.OpPrereqError("Conflicting IP addresses found; " " remove/modify the corresponding network" " interfaces", errors.ECODE_STATE) class LUNetworkConnect(LogicalUnit): """Connect a network to a nodegroup """ HPATH = "network-connect" HTYPE = constants.HTYPE_NETWORK REQ_BGL = False def ExpandNames(self): self.network_name = self.op.network_name self.group_name = self.op.group_name self.network_mode = self.op.network_mode self.network_link = self.op.network_link self.network_uuid = self.cfg.LookupNetwork(self.network_name) self.group_uuid = self.cfg.LookupNodeGroup(self.group_name) self.needed_locks = { locking.LEVEL_INSTANCE: [], locking.LEVEL_NODEGROUP: [self.group_uuid], } self.share_locks[locking.LEVEL_INSTANCE] = 1 if self.op.conflicts_check: self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid] self.share_locks[locking.LEVEL_NETWORK] = 1 def DeclareLocks(self, level): if level == locking.LEVEL_INSTANCE: assert not self.needed_locks[locking.LEVEL_INSTANCE] # Lock instances optimistically, needs verification once group lock has # been acquired if self.op.conflicts_check: self.needed_locks[locking.LEVEL_INSTANCE] = \ self.cfg.GetInstanceNames( self.cfg.GetNodeGroupInstances(self.group_uuid)) def BuildHooksEnv(self): ret = { "GROUP_NAME": self.group_name, "GROUP_NETWORK_MODE": self.network_mode, "GROUP_NETWORK_LINK": self.network_link, } return ret def BuildHooksNodes(self): node_uuids = self.cfg.GetNodeGroup(self.group_uuid).members return (node_uuids, node_uuids) def CheckPrereq(self): owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) assert self.group_uuid in owned_groups # Check if locked instances are still correct owned_instance_names = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) if self.op.conflicts_check: CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instance_names) self.netparams = { constants.NIC_MODE: self.network_mode, constants.NIC_LINK: self.network_link, } objects.NIC.CheckParameterSyntax(self.netparams) self.group = self.cfg.GetNodeGroup(self.group_uuid) #if self.network_mode == constants.NIC_MODE_BRIDGED: # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid) self.connected = False if self.network_uuid in self.group.networks: self.LogWarning("Network '%s' is already mapped to group '%s'" % (self.network_name, self.group.name)) self.connected = True # check only if not already connected elif self.op.conflicts_check: pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid)) _NetworkConflictCheck( self, lambda nic: pool.Contains(nic.ip), "connect to", [instance_info for (_, instance_info) in self.cfg.GetMultiInstanceInfoByName(owned_instance_names)]) def Exec(self, feedback_fn): # Connect the network and update the group only if not already connected if not self.connected: self.group.networks[self.network_uuid] = self.netparams self.cfg.Update(self.group, feedback_fn) class LUNetworkDisconnect(LogicalUnit): """Disconnect a network to a nodegroup """ HPATH = "network-disconnect" HTYPE = constants.HTYPE_NETWORK REQ_BGL = False def ExpandNames(self): self.network_name = self.op.network_name self.group_name = self.op.group_name self.network_uuid = self.cfg.LookupNetwork(self.network_name) self.group_uuid = self.cfg.LookupNodeGroup(self.group_name) self.needed_locks = { locking.LEVEL_INSTANCE: [], locking.LEVEL_NODEGROUP: [self.group_uuid], } self.share_locks[locking.LEVEL_INSTANCE] = 1 def DeclareLocks(self, level): if level == locking.LEVEL_INSTANCE: assert not self.needed_locks[locking.LEVEL_INSTANCE] # Lock instances optimistically, needs verification once group lock has # been acquired self.needed_locks[locking.LEVEL_INSTANCE] = \ self.cfg.GetInstanceNames( self.cfg.GetNodeGroupInstances(self.group_uuid)) def BuildHooksEnv(self): ret = { "GROUP_NAME": self.group_name, } return ret def BuildHooksNodes(self): nodes = self.cfg.GetNodeGroup(self.group_uuid).members return (nodes, nodes) def CheckPrereq(self): owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) assert self.group_uuid in owned_groups # Check if locked instances are still correct owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances) self.group = self.cfg.GetNodeGroup(self.group_uuid) self.connected = True if self.network_uuid not in self.group.networks: self.LogWarning("Network '%s' is not mapped to group '%s'", self.network_name, self.group.name) self.connected = False # We need this check only if network is not already connected else: _NetworkConflictCheck( self, lambda nic: nic.network == self.network_uuid, "disconnect from", [instance_info for (_, instance_info) in self.cfg.GetMultiInstanceInfoByName(owned_instances)]) def Exec(self, feedback_fn): # Disconnect the network and update the group only if network is connected if self.connected: del self.group.networks[self.network_uuid] self.cfg.Update(self.group, feedback_fn) ganeti-2.9.3/lib/cmdlib/instance_query.py0000644000000000000000000004066012271422343020403 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Logical units for querying instances.""" import itertools import logging import operator from ganeti import compat from ganeti import constants from ganeti import locking from ganeti import qlang from ganeti import query from ganeti.cmdlib.base import QueryBase, NoHooksLU from ganeti.cmdlib.common import ShareAll, GetWantedInstances, \ CheckInstanceNodeGroups, CheckInstancesNodeGroups, AnnotateDiskParams from ganeti.cmdlib.instance_operation import GetInstanceConsole from ganeti.cmdlib.instance_utils import NICListToTuple import ganeti.masterd.instance class InstanceQuery(QueryBase): FIELDS = query.INSTANCE_FIELDS def ExpandNames(self, lu): lu.needed_locks = {} lu.share_locks = ShareAll() if self.names: (_, self.wanted) = GetWantedInstances(lu, self.names) else: self.wanted = locking.ALL_SET self.do_locking = (self.use_locking and query.IQ_LIVE in self.requested_data) if self.do_locking: lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted lu.needed_locks[locking.LEVEL_NODEGROUP] = [] lu.needed_locks[locking.LEVEL_NODE] = [] lu.needed_locks[locking.LEVEL_NETWORK] = [] lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE self.do_grouplocks = (self.do_locking and query.IQ_NODES in self.requested_data) def DeclareLocks(self, lu, level): if self.do_locking: if level == locking.LEVEL_NODEGROUP and self.do_grouplocks: assert not lu.needed_locks[locking.LEVEL_NODEGROUP] # Lock all groups used by instances optimistically; this requires going # via the node before it's locked, requiring verification later on lu.needed_locks[locking.LEVEL_NODEGROUP] = \ set(group_uuid for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE) for group_uuid in lu.cfg.GetInstanceNodeGroups( lu.cfg.GetInstanceInfoByName(instance_name).uuid)) elif level == locking.LEVEL_NODE: lu._LockInstancesNodes() # pylint: disable=W0212 elif level == locking.LEVEL_NETWORK: lu.needed_locks[locking.LEVEL_NETWORK] = \ frozenset(net_uuid for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE) for net_uuid in lu.cfg.GetInstanceNetworks( lu.cfg.GetInstanceInfoByName(instance_name).uuid)) @staticmethod def _CheckGroupLocks(lu): owned_instance_names = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE)) owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP)) # Check if node groups for locked instances are still correct for instance_name in owned_instance_names: instance = lu.cfg.GetInstanceInfoByName(instance_name) CheckInstanceNodeGroups(lu.cfg, instance.uuid, owned_groups) def _GetQueryData(self, lu): """Computes the list of instances and their attributes. """ if self.do_grouplocks: self._CheckGroupLocks(lu) cluster = lu.cfg.GetClusterInfo() insts_by_name = dict((inst.name, inst) for inst in lu.cfg.GetAllInstancesInfo().values()) instance_names = self._GetNames(lu, insts_by_name.keys(), locking.LEVEL_INSTANCE) instance_list = [insts_by_name[node] for node in instance_names] node_uuids = frozenset(itertools.chain(*(inst.all_nodes for inst in instance_list))) hv_list = list(set([inst.hypervisor for inst in instance_list])) bad_node_uuids = [] offline_node_uuids = [] wrongnode_inst_uuids = set() # Gather data as requested if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]): live_data = {} node_data = lu.rpc.call_all_instances_info(node_uuids, hv_list, cluster.hvparams) for node_uuid in node_uuids: result = node_data[node_uuid] if result.offline: # offline nodes will be in both lists assert result.fail_msg offline_node_uuids.append(node_uuid) if result.fail_msg: bad_node_uuids.append(node_uuid) elif result.payload: for inst_name in result.payload: if inst_name in insts_by_name: instance = insts_by_name[inst_name] if instance.primary_node == node_uuid: for iname in result.payload: live_data[insts_by_name[iname].uuid] = result.payload[iname] else: wrongnode_inst_uuids.add(instance.uuid) else: # orphan instance; we don't list it here as we don't # handle this case yet in the output of instance listing logging.warning("Orphan instance '%s' found on node %s", inst_name, lu.cfg.GetNodeName(node_uuid)) # else no instance is alive else: live_data = {} if query.IQ_DISKUSAGE in self.requested_data: gmi = ganeti.masterd.instance disk_usage = dict((inst.uuid, gmi.ComputeDiskSize(inst.disk_template, [{constants.IDISK_SIZE: disk.size} for disk in inst.disks])) for inst in instance_list) else: disk_usage = None if query.IQ_CONSOLE in self.requested_data: consinfo = {} for inst in instance_list: if inst.uuid in live_data: # Instance is running consinfo[inst.uuid] = \ GetInstanceConsole(cluster, inst, lu.cfg.GetNodeInfo(inst.primary_node)) else: consinfo[inst.uuid] = None else: consinfo = None if query.IQ_NODES in self.requested_data: nodes = dict(lu.cfg.GetMultiNodeInfo(node_uuids)) groups = dict((uuid, lu.cfg.GetNodeGroup(uuid)) for uuid in set(map(operator.attrgetter("group"), nodes.values()))) else: nodes = None groups = None if query.IQ_NETWORKS in self.requested_data: net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.uuid) for i in instance_list)) networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids) else: networks = None return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(), disk_usage, offline_node_uuids, bad_node_uuids, live_data, wrongnode_inst_uuids, consinfo, nodes, groups, networks) class LUInstanceQuery(NoHooksLU): """Logical unit for querying instances. """ # pylint: disable=W0142 REQ_BGL = False def CheckArguments(self): self.iq = InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names), self.op.output_fields, self.op.use_locking) def ExpandNames(self): self.iq.ExpandNames(self) def DeclareLocks(self, level): self.iq.DeclareLocks(self, level) def Exec(self, feedback_fn): return self.iq.OldStyleQuery(self) class LUInstanceQueryData(NoHooksLU): """Query runtime instance data. """ REQ_BGL = False def ExpandNames(self): self.needed_locks = {} # Use locking if requested or when non-static information is wanted if not (self.op.static or self.op.use_locking): self.LogWarning("Non-static data requested, locks need to be acquired") self.op.use_locking = True if self.op.instances or not self.op.use_locking: # Expand instance names right here (_, self.wanted_names) = GetWantedInstances(self, self.op.instances) else: # Will use acquired locks self.wanted_names = None if self.op.use_locking: self.share_locks = ShareAll() if self.wanted_names is None: self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET else: self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names self.needed_locks[locking.LEVEL_NODEGROUP] = [] self.needed_locks[locking.LEVEL_NODE] = [] self.needed_locks[locking.LEVEL_NETWORK] = [] self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE def DeclareLocks(self, level): if self.op.use_locking: owned_instances = dict(self.cfg.GetMultiInstanceInfoByName( self.owned_locks(locking.LEVEL_INSTANCE))) if level == locking.LEVEL_NODEGROUP: # Lock all groups used by instances optimistically; this requires going # via the node before it's locked, requiring verification later on self.needed_locks[locking.LEVEL_NODEGROUP] = \ frozenset(group_uuid for instance_uuid in owned_instances.keys() for group_uuid in self.cfg.GetInstanceNodeGroups(instance_uuid)) elif level == locking.LEVEL_NODE: self._LockInstancesNodes() elif level == locking.LEVEL_NETWORK: self.needed_locks[locking.LEVEL_NETWORK] = \ frozenset(net_uuid for instance_uuid in owned_instances.keys() for net_uuid in self.cfg.GetInstanceNetworks(instance_uuid)) def CheckPrereq(self): """Check prerequisites. This only checks the optional instance list against the existing names. """ owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) owned_node_uuids = frozenset(self.owned_locks(locking.LEVEL_NODE)) owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK)) if self.wanted_names is None: assert self.op.use_locking, "Locking was not used" self.wanted_names = owned_instances instances = dict(self.cfg.GetMultiInstanceInfoByName(self.wanted_names)) if self.op.use_locking: CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_node_uuids, None) else: assert not (owned_instances or owned_groups or owned_node_uuids or owned_networks) self.wanted_instances = instances.values() def _ComputeBlockdevStatus(self, node_uuid, instance, dev): """Returns the status of a block device """ if self.op.static or not node_uuid: return None self.cfg.SetDiskID(dev, node_uuid) result = self.rpc.call_blockdev_find(node_uuid, dev) if result.offline: return None result.Raise("Can't compute disk status for %s" % instance.name) status = result.payload if status is None: return None return (status.dev_path, status.major, status.minor, status.sync_percent, status.estimated_time, status.is_degraded, status.ldisk_status) def _ComputeDiskStatus(self, instance, node_uuid2name_fn, dev): """Compute block device status. """ (anno_dev,) = AnnotateDiskParams(instance, [dev], self.cfg) return self._ComputeDiskStatusInner(instance, None, node_uuid2name_fn, anno_dev) def _ComputeDiskStatusInner(self, instance, snode_uuid, node_uuid2name_fn, dev): """Compute block device status. @attention: The device has to be annotated already. """ drbd_info = None if dev.dev_type in constants.DTS_DRBD: # we change the snode then (otherwise we use the one passed in) if dev.logical_id[0] == instance.primary_node: snode_uuid = dev.logical_id[1] else: snode_uuid = dev.logical_id[0] drbd_info = { "primary_node": node_uuid2name_fn(instance.primary_node), "primary_minor": dev.logical_id[3], "secondary_node": node_uuid2name_fn(snode_uuid), "secondary_minor": dev.logical_id[4], "port": dev.logical_id[2], "secret": dev.logical_id[5], } dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node, instance, dev) dev_sstatus = self._ComputeBlockdevStatus(snode_uuid, instance, dev) if dev.children: dev_children = map(compat.partial(self._ComputeDiskStatusInner, instance, snode_uuid, node_uuid2name_fn), dev.children) else: dev_children = [] return { "iv_name": dev.iv_name, "dev_type": dev.dev_type, "logical_id": dev.logical_id, "drbd_info": drbd_info, "physical_id": dev.physical_id, "pstatus": dev_pstatus, "sstatus": dev_sstatus, "children": dev_children, "mode": dev.mode, "size": dev.size, "spindles": dev.spindles, "name": dev.name, "uuid": dev.uuid, } def Exec(self, feedback_fn): """Gather and return data""" result = {} cluster = self.cfg.GetClusterInfo() node_uuids = itertools.chain(*(i.all_nodes for i in self.wanted_instances)) nodes = dict(self.cfg.GetMultiNodeInfo(node_uuids)) groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group for node in nodes.values())) for instance in self.wanted_instances: pnode = nodes[instance.primary_node] if self.op.static or pnode.offline: remote_state = None if pnode.offline: self.LogWarning("Primary node %s is marked offline, returning static" " information only for instance %s" % (pnode.name, instance.name)) else: remote_info = self.rpc.call_instance_info( instance.primary_node, instance.name, instance.hypervisor, cluster.hvparams[instance.hypervisor]) remote_info.Raise("Error checking node %s" % pnode.name) remote_info = remote_info.payload if remote_info and "state" in remote_info: remote_state = "up" else: if instance.admin_state == constants.ADMINST_UP: remote_state = "down" else: remote_state = instance.admin_state group2name_fn = lambda uuid: groups[uuid].name node_uuid2name_fn = lambda uuid: nodes[uuid].name disks = map(compat.partial(self._ComputeDiskStatus, instance, node_uuid2name_fn), instance.disks) snodes_group_uuids = [nodes[snode_uuid].group for snode_uuid in instance.secondary_nodes] result[instance.name] = { "name": instance.name, "config_state": instance.admin_state, "run_state": remote_state, "pnode": pnode.name, "pnode_group_uuid": pnode.group, "pnode_group_name": group2name_fn(pnode.group), "snodes": map(node_uuid2name_fn, instance.secondary_nodes), "snodes_group_uuids": snodes_group_uuids, "snodes_group_names": map(group2name_fn, snodes_group_uuids), "os": instance.os, # this happens to be the same format used for hooks "nics": NICListToTuple(self, instance.nics), "disk_template": instance.disk_template, "disks": disks, "hypervisor": instance.hypervisor, "network_port": instance.network_port, "hv_instance": instance.hvparams, "hv_actual": cluster.FillHV(instance, skip_globals=True), "be_instance": instance.beparams, "be_actual": cluster.FillBE(instance), "os_instance": instance.osparams, "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams), "serial_no": instance.serial_no, "mtime": instance.mtime, "ctime": instance.ctime, "uuid": instance.uuid, } return result ganeti-2.9.3/lib/cmdlib/misc.py0000644000000000000000000003412312267470014016305 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Miscellaneous logical units that don't fit into any category.""" import logging import time from ganeti import compat from ganeti import constants from ganeti import errors from ganeti import locking from ganeti import qlang from ganeti import query from ganeti import utils from ganeti.cmdlib.base import NoHooksLU, QueryBase from ganeti.cmdlib.common import GetWantedNodes, SupportsOob class LUOobCommand(NoHooksLU): """Logical unit for OOB handling. """ REQ_BGL = False _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE) def ExpandNames(self): """Gather locks we need. """ if self.op.node_names: (self.op.node_uuids, self.op.node_names) = \ GetWantedNodes(self, self.op.node_names) lock_node_uuids = self.op.node_uuids else: lock_node_uuids = locking.ALL_SET self.needed_locks = { locking.LEVEL_NODE: lock_node_uuids, } self.share_locks[locking.LEVEL_NODE_ALLOC] = 1 if not self.op.node_names: # Acquire node allocation lock only if all nodes are affected self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET def CheckPrereq(self): """Check prerequisites. This checks: - the node exists in the configuration - OOB is supported Any errors are signaled by raising errors.OpPrereqError. """ self.nodes = [] self.master_node_uuid = self.cfg.GetMasterNode() master_node_obj = self.cfg.GetNodeInfo(self.master_node_uuid) assert self.op.power_delay >= 0.0 if self.op.node_uuids: if (self.op.command in self._SKIP_MASTER and master_node_obj.uuid in self.op.node_uuids): master_oob_handler = SupportsOob(self.cfg, master_node_obj) if master_oob_handler: additional_text = ("run '%s %s %s' if you want to operate on the" " master regardless") % (master_oob_handler, self.op.command, master_node_obj.name) else: additional_text = "it does not support out-of-band operations" raise errors.OpPrereqError(("Operating on the master node %s is not" " allowed for %s; %s") % (master_node_obj.name, self.op.command, additional_text), errors.ECODE_INVAL) else: self.op.node_uuids = self.cfg.GetNodeList() if self.op.command in self._SKIP_MASTER: self.op.node_uuids.remove(master_node_obj.uuid) if self.op.command in self._SKIP_MASTER: assert master_node_obj.uuid not in self.op.node_uuids for node_uuid in self.op.node_uuids: node = self.cfg.GetNodeInfo(node_uuid) if node is None: raise errors.OpPrereqError("Node %s not found" % node_uuid, errors.ECODE_NOENT) self.nodes.append(node) if (not self.op.ignore_status and (self.op.command == constants.OOB_POWER_OFF and not node.offline)): raise errors.OpPrereqError(("Cannot power off node %s because it is" " not marked offline") % node.name, errors.ECODE_STATE) def Exec(self, feedback_fn): """Execute OOB and return result if we expect any. """ ret = [] for idx, node in enumerate(utils.NiceSort(self.nodes, key=lambda node: node.name)): node_entry = [(constants.RS_NORMAL, node.name)] ret.append(node_entry) oob_program = SupportsOob(self.cfg, node) if not oob_program: node_entry.append((constants.RS_UNAVAIL, None)) continue logging.info("Executing out-of-band command '%s' using '%s' on %s", self.op.command, oob_program, node.name) result = self.rpc.call_run_oob(self.master_node_uuid, oob_program, self.op.command, node.name, self.op.timeout) if result.fail_msg: self.LogWarning("Out-of-band RPC failed on node '%s': %s", node.name, result.fail_msg) node_entry.append((constants.RS_NODATA, None)) else: try: self._CheckPayload(result) except errors.OpExecError, err: self.LogWarning("Payload returned by node '%s' is not valid: %s", node.name, err) node_entry.append((constants.RS_NODATA, None)) else: if self.op.command == constants.OOB_HEALTH: # For health we should log important events for item, status in result.payload: if status in [constants.OOB_STATUS_WARNING, constants.OOB_STATUS_CRITICAL]: self.LogWarning("Item '%s' on node '%s' has status '%s'", item, node.name, status) if self.op.command == constants.OOB_POWER_ON: node.powered = True elif self.op.command == constants.OOB_POWER_OFF: node.powered = False elif self.op.command == constants.OOB_POWER_STATUS: powered = result.payload[constants.OOB_POWER_STATUS_POWERED] if powered != node.powered: logging.warning(("Recorded power state (%s) of node '%s' does not" " match actual power state (%s)"), node.powered, node.name, powered) # For configuration changing commands we should update the node if self.op.command in (constants.OOB_POWER_ON, constants.OOB_POWER_OFF): self.cfg.Update(node, feedback_fn) node_entry.append((constants.RS_NORMAL, result.payload)) if (self.op.command == constants.OOB_POWER_ON and idx < len(self.nodes) - 1): time.sleep(self.op.power_delay) return ret def _CheckPayload(self, result): """Checks if the payload is valid. @param result: RPC result @raises errors.OpExecError: If payload is not valid """ errs = [] if self.op.command == constants.OOB_HEALTH: if not isinstance(result.payload, list): errs.append("command 'health' is expected to return a list but got %s" % type(result.payload)) else: for item, status in result.payload: if status not in constants.OOB_STATUSES: errs.append("health item '%s' has invalid status '%s'" % (item, status)) if self.op.command == constants.OOB_POWER_STATUS: if not isinstance(result.payload, dict): errs.append("power-status is expected to return a dict but got %s" % type(result.payload)) if self.op.command in [ constants.OOB_POWER_ON, constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE, ]: if result.payload is not None: errs.append("%s is expected to not return payload but got '%s'" % (self.op.command, result.payload)) if errs: raise errors.OpExecError("Check of out-of-band payload failed due to %s" % utils.CommaJoin(errs)) class ExtStorageQuery(QueryBase): FIELDS = query.EXTSTORAGE_FIELDS def ExpandNames(self, lu): # Lock all nodes in shared mode # Temporary removal of locks, should be reverted later # TODO: reintroduce locks when they are lighter-weight lu.needed_locks = {} #self.share_locks[locking.LEVEL_NODE] = 1 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET # The following variables interact with _QueryBase._GetNames if self.names: self.wanted = [lu.cfg.GetNodeInfoByName(name).uuid for name in self.names] else: self.wanted = locking.ALL_SET self.do_locking = self.use_locking def DeclareLocks(self, lu, level): pass @staticmethod def _DiagnoseByProvider(rlist): """Remaps a per-node return list into an a per-provider per-node dictionary @param rlist: a map with node uuids as keys and ExtStorage objects as values @rtype: dict @return: a dictionary with extstorage providers as keys and as value another map, with node uuids as keys and tuples of (path, status, diagnose, parameters) as values, eg:: {"provider1": {"node_uuid1": [(/usr/lib/..., True, "", [])] "node_uuid2": [(/srv/..., False, "missing file")] "node_uuid3": [(/srv/..., True, "", [])] } """ all_es = {} # we build here the list of nodes that didn't fail the RPC (at RPC # level), so that nodes with a non-responding node daemon don't # make all OSes invalid good_nodes = [node_uuid for node_uuid in rlist if not rlist[node_uuid].fail_msg] for node_uuid, nr in rlist.items(): if nr.fail_msg or not nr.payload: continue for (name, path, status, diagnose, params) in nr.payload: if name not in all_es: # build a list of nodes for this os containing empty lists # for each node in node_list all_es[name] = {} for nuuid in good_nodes: all_es[name][nuuid] = [] # convert params from [name, help] to (name, help) params = [tuple(v) for v in params] all_es[name][node_uuid].append((path, status, diagnose, params)) return all_es def _GetQueryData(self, lu): """Computes the list of nodes and their attributes. """ # Locking is not used assert not (compat.any(lu.glm.is_owned(level) for level in locking.LEVELS if level != locking.LEVEL_CLUSTER) or self.do_locking or self.use_locking) valid_nodes = [node.uuid for node in lu.cfg.GetAllNodesInfo().values() if not node.offline and node.vm_capable] pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes)) data = {} nodegroup_list = lu.cfg.GetNodeGroupList() for (es_name, es_data) in pol.items(): # For every provider compute the nodegroup validity. # To do this we need to check the validity of each node in es_data # and then construct the corresponding nodegroup dict: # { nodegroup1: status # nodegroup2: status # } ndgrp_data = {} for nodegroup in nodegroup_list: ndgrp = lu.cfg.GetNodeGroup(nodegroup) nodegroup_nodes = ndgrp.members nodegroup_name = ndgrp.name node_statuses = [] for node in nodegroup_nodes: if node in valid_nodes: if es_data[node] != []: node_status = es_data[node][0][1] node_statuses.append(node_status) else: node_statuses.append(False) if False in node_statuses: ndgrp_data[nodegroup_name] = False else: ndgrp_data[nodegroup_name] = True # Compute the provider's parameters parameters = set() for idx, esl in enumerate(es_data.values()): valid = bool(esl and esl[0][1]) if not valid: break node_params = esl[0][3] if idx == 0: # First entry parameters.update(node_params) else: # Filter out inconsistent values parameters.intersection_update(node_params) params = list(parameters) # Now fill all the info for this provider info = query.ExtStorageInfo(name=es_name, node_status=es_data, nodegroup_status=ndgrp_data, parameters=params) data[es_name] = info # Prepare data in requested order return [data[name] for name in self._GetNames(lu, pol.keys(), None) if name in data] class LUExtStorageDiagnose(NoHooksLU): """Logical unit for ExtStorage diagnose/query. """ REQ_BGL = False def CheckArguments(self): self.eq = ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names), self.op.output_fields, False) def ExpandNames(self): self.eq.ExpandNames(self) def Exec(self, feedback_fn): return self.eq.OldStyleQuery(self) class LURestrictedCommand(NoHooksLU): """Logical unit for executing restricted commands. """ REQ_BGL = False def ExpandNames(self): if self.op.nodes: (self.op.node_uuids, self.op.nodes) = GetWantedNodes(self, self.op.nodes) self.needed_locks = { locking.LEVEL_NODE: self.op.node_uuids, } self.share_locks = { locking.LEVEL_NODE: not self.op.use_locking, } def CheckPrereq(self): """Check prerequisites. """ def Exec(self, feedback_fn): """Execute restricted command and return output. """ owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE)) # Check if correct locks are held assert set(self.op.node_uuids).issubset(owned_nodes) rpcres = self.rpc.call_restricted_command(self.op.node_uuids, self.op.command) result = [] for node_uuid in self.op.node_uuids: nres = rpcres[node_uuid] if nres.fail_msg: msg = ("Command '%s' on node '%s' failed: %s" % (self.op.command, self.cfg.GetNodeName(node_uuid), nres.fail_msg)) result.append((False, msg)) else: result.append((True, nres.payload)) return result ganeti-2.9.3/lib/cmdlib/instance_operation.py0000644000000000000000000004040612271422343021234 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Logical units dealing with instance operations (start/stop/...). Those operations have in common that they affect the operating system in a running instance directly. """ import logging from ganeti import constants from ganeti import errors from ganeti import hypervisor from ganeti import locking from ganeti import objects from ganeti import utils from ganeti.cmdlib.base import LogicalUnit, NoHooksLU from ganeti.cmdlib.common import INSTANCE_ONLINE, INSTANCE_DOWN, \ CheckHVParams, CheckInstanceState, CheckNodeOnline, GetUpdatedParams, \ CheckOSParams, ShareAll from ganeti.cmdlib.instance_storage import StartInstanceDisks, \ ShutdownInstanceDisks from ganeti.cmdlib.instance_utils import BuildInstanceHookEnvByObject, \ CheckInstanceBridgesExist, CheckNodeFreeMemory, CheckNodeHasOS class LUInstanceStartup(LogicalUnit): """Starts an instance. """ HPATH = "instance-start" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False def CheckArguments(self): # extra beparams if self.op.beparams: # fill the beparams dict objects.UpgradeBeParams(self.op.beparams) utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) def ExpandNames(self): self._ExpandAndLockInstance() self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE def DeclareLocks(self, level): if level == locking.LEVEL_NODE_RES: self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES) def BuildHooksEnv(self): """Build hooks env. This runs on master, primary and secondary nodes of the instance. """ env = { "FORCE": self.op.force, } env.update(BuildInstanceHookEnvByObject(self, self.instance)) return env def BuildHooksNodes(self): """Build hooks nodes. """ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) return (nl, nl) def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster. """ self.instance = self.cfg.GetInstanceInfo(self.op.instance_uuid) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name cluster = self.cfg.GetClusterInfo() # extra hvparams if self.op.hvparams: # check hypervisor parameter syntax (locally) utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES) filled_hvp = cluster.FillHV(self.instance) filled_hvp.update(self.op.hvparams) hv_type = hypervisor.GetHypervisorClass(self.instance.hypervisor) hv_type.CheckParameterSyntax(filled_hvp) CheckHVParams(self, self.instance.all_nodes, self.instance.hypervisor, filled_hvp) CheckInstanceState(self, self.instance, INSTANCE_ONLINE) self.primary_offline = \ self.cfg.GetNodeInfo(self.instance.primary_node).offline if self.primary_offline and self.op.ignore_offline_nodes: self.LogWarning("Ignoring offline primary node") if self.op.hvparams or self.op.beparams: self.LogWarning("Overridden parameters are ignored") else: CheckNodeOnline(self, self.instance.primary_node) bep = self.cfg.GetClusterInfo().FillBE(self.instance) bep.update(self.op.beparams) # check bridges existence CheckInstanceBridgesExist(self, self.instance) remote_info = self.rpc.call_instance_info( self.instance.primary_node, self.instance.name, self.instance.hypervisor, cluster.hvparams[self.instance.hypervisor]) remote_info.Raise("Error checking node %s" % self.cfg.GetNodeName(self.instance.primary_node), prereq=True, ecode=errors.ECODE_ENVIRON) if not remote_info.payload: # not running already CheckNodeFreeMemory( self, self.instance.primary_node, "starting instance %s" % self.instance.name, bep[constants.BE_MINMEM], self.instance.hypervisor, self.cfg.GetClusterInfo().hvparams[self.instance.hypervisor]) def Exec(self, feedback_fn): """Start the instance. """ if not self.op.no_remember: self.cfg.MarkInstanceUp(self.instance.uuid) if self.primary_offline: assert self.op.ignore_offline_nodes self.LogInfo("Primary node offline, marked instance as started") else: StartInstanceDisks(self, self.instance, self.op.force) result = \ self.rpc.call_instance_start(self.instance.primary_node, (self.instance, self.op.hvparams, self.op.beparams), self.op.startup_paused, self.op.reason) msg = result.fail_msg if msg: ShutdownInstanceDisks(self, self.instance) raise errors.OpExecError("Could not start instance: %s" % msg) class LUInstanceShutdown(LogicalUnit): """Shutdown an instance. """ HPATH = "instance-stop" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False def ExpandNames(self): self._ExpandAndLockInstance() def BuildHooksEnv(self): """Build hooks env. This runs on master, primary and secondary nodes of the instance. """ env = BuildInstanceHookEnvByObject(self, self.instance) env["TIMEOUT"] = self.op.timeout return env def BuildHooksNodes(self): """Build hooks nodes. """ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) return (nl, nl) def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster. """ self.instance = self.cfg.GetInstanceInfo(self.op.instance_uuid) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name if not self.op.force: CheckInstanceState(self, self.instance, INSTANCE_ONLINE) else: self.LogWarning("Ignoring offline instance check") self.primary_offline = \ self.cfg.GetNodeInfo(self.instance.primary_node).offline if self.primary_offline and self.op.ignore_offline_nodes: self.LogWarning("Ignoring offline primary node") else: CheckNodeOnline(self, self.instance.primary_node) def Exec(self, feedback_fn): """Shutdown the instance. """ # If the instance is offline we shouldn't mark it as down, as that # resets the offline flag. if not self.op.no_remember and self.instance.admin_state in INSTANCE_ONLINE: self.cfg.MarkInstanceDown(self.instance.uuid) if self.primary_offline: assert self.op.ignore_offline_nodes self.LogInfo("Primary node offline, marked instance as stopped") else: result = self.rpc.call_instance_shutdown(self.instance.primary_node, self.instance, self.op.timeout, self.op.reason) msg = result.fail_msg if msg: self.LogWarning("Could not shutdown instance: %s", msg) ShutdownInstanceDisks(self, self.instance) class LUInstanceReinstall(LogicalUnit): """Reinstall an instance. """ HPATH = "instance-reinstall" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False def ExpandNames(self): self._ExpandAndLockInstance() def BuildHooksEnv(self): """Build hooks env. This runs on master, primary and secondary nodes of the instance. """ return BuildInstanceHookEnvByObject(self, self.instance) def BuildHooksNodes(self): """Build hooks nodes. """ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) return (nl, nl) def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster and is not running. """ instance = self.cfg.GetInstanceInfo(self.op.instance_uuid) assert instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name CheckNodeOnline(self, instance.primary_node, "Instance primary node" " offline, cannot reinstall") if instance.disk_template == constants.DT_DISKLESS: raise errors.OpPrereqError("Instance '%s' has no disks" % self.op.instance_name, errors.ECODE_INVAL) CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall") if self.op.os_type is not None: # OS verification CheckNodeHasOS(self, instance.primary_node, self.op.os_type, self.op.force_variant) instance_os = self.op.os_type else: instance_os = instance.os node_uuids = list(instance.all_nodes) if self.op.osparams: i_osdict = GetUpdatedParams(instance.osparams, self.op.osparams) CheckOSParams(self, True, node_uuids, instance_os, i_osdict) self.os_inst = i_osdict # the new dict (without defaults) else: self.os_inst = None self.instance = instance def Exec(self, feedback_fn): """Reinstall the instance. """ if self.op.os_type is not None: feedback_fn("Changing OS to '%s'..." % self.op.os_type) self.instance.os = self.op.os_type # Write to configuration self.cfg.Update(self.instance, feedback_fn) StartInstanceDisks(self, self.instance, None) try: feedback_fn("Running the instance OS create scripts...") # FIXME: pass debug option from opcode to backend result = self.rpc.call_instance_os_add(self.instance.primary_node, (self.instance, self.os_inst), True, self.op.debug_level) result.Raise("Could not install OS for instance %s on node %s" % (self.instance.name, self.cfg.GetNodeName(self.instance.primary_node))) finally: ShutdownInstanceDisks(self, self.instance) class LUInstanceReboot(LogicalUnit): """Reboot an instance. """ HPATH = "instance-reboot" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False def ExpandNames(self): self._ExpandAndLockInstance() def BuildHooksEnv(self): """Build hooks env. This runs on master, primary and secondary nodes of the instance. """ env = { "IGNORE_SECONDARIES": self.op.ignore_secondaries, "REBOOT_TYPE": self.op.reboot_type, "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, } env.update(BuildInstanceHookEnvByObject(self, self.instance)) return env def BuildHooksNodes(self): """Build hooks nodes. """ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) return (nl, nl) def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster. """ self.instance = self.cfg.GetInstanceInfo(self.op.instance_uuid) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name CheckInstanceState(self, self.instance, INSTANCE_ONLINE) CheckNodeOnline(self, self.instance.primary_node) # check bridges existence CheckInstanceBridgesExist(self, self.instance) def Exec(self, feedback_fn): """Reboot the instance. """ cluster = self.cfg.GetClusterInfo() remote_info = self.rpc.call_instance_info( self.instance.primary_node, self.instance.name, self.instance.hypervisor, cluster.hvparams[self.instance.hypervisor]) remote_info.Raise("Error checking node %s" % self.cfg.GetNodeName(self.instance.primary_node)) instance_running = bool(remote_info.payload) current_node_uuid = self.instance.primary_node if instance_running and \ self.op.reboot_type in [constants.INSTANCE_REBOOT_SOFT, constants.INSTANCE_REBOOT_HARD]: for disk in self.instance.disks: self.cfg.SetDiskID(disk, current_node_uuid) result = self.rpc.call_instance_reboot(current_node_uuid, self.instance, self.op.reboot_type, self.op.shutdown_timeout, self.op.reason) result.Raise("Could not reboot instance") else: if instance_running: result = self.rpc.call_instance_shutdown(current_node_uuid, self.instance, self.op.shutdown_timeout, self.op.reason) result.Raise("Could not shutdown instance for full reboot") ShutdownInstanceDisks(self, self.instance) else: self.LogInfo("Instance %s was already stopped, starting now", self.instance.name) StartInstanceDisks(self, self.instance, self.op.ignore_secondaries) result = self.rpc.call_instance_start(current_node_uuid, (self.instance, None, None), False, self.op.reason) msg = result.fail_msg if msg: ShutdownInstanceDisks(self, self.instance) raise errors.OpExecError("Could not start instance for" " full reboot: %s" % msg) self.cfg.MarkInstanceUp(self.instance.uuid) def GetInstanceConsole(cluster, instance, primary_node): """Returns console information for an instance. @type cluster: L{objects.Cluster} @type instance: L{objects.Instance} @type primary_node: L{objects.Node} @rtype: dict """ hyper = hypervisor.GetHypervisorClass(instance.hypervisor) # beparams and hvparams are passed separately, to avoid editing the # instance and then saving the defaults in the instance itself. hvparams = cluster.FillHV(instance) beparams = cluster.FillBE(instance) console = hyper.GetInstanceConsole(instance, primary_node, hvparams, beparams) assert console.instance == instance.name assert console.Validate() return console.ToDict() class LUInstanceConsole(NoHooksLU): """Connect to an instance's console. This is somewhat special in that it returns the command line that you need to run on the master node in order to connect to the console. """ REQ_BGL = False def ExpandNames(self): self.share_locks = ShareAll() self._ExpandAndLockInstance() def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster. """ self.instance = self.cfg.GetInstanceInfo(self.op.instance_uuid) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name CheckNodeOnline(self, self.instance.primary_node) def Exec(self, feedback_fn): """Connect to the console of an instance """ node_uuid = self.instance.primary_node cluster_hvparams = self.cfg.GetClusterInfo().hvparams node_insts = self.rpc.call_instance_list( [node_uuid], [self.instance.hypervisor], cluster_hvparams)[node_uuid] node_insts.Raise("Can't get node information from %s" % self.cfg.GetNodeName(node_uuid)) if self.instance.name not in node_insts.payload: if self.instance.admin_state == constants.ADMINST_UP: state = constants.INSTST_ERRORDOWN elif self.instance.admin_state == constants.ADMINST_DOWN: state = constants.INSTST_ADMINDOWN else: state = constants.INSTST_ADMINOFFLINE raise errors.OpExecError("Instance %s is not running (state %s)" % (self.instance.name, state)) logging.debug("Connecting to console of %s on %s", self.instance.name, self.cfg.GetNodeName(node_uuid)) return GetInstanceConsole(self.cfg.GetClusterInfo(), self.instance, self.cfg.GetNodeInfo(self.instance.primary_node)) ganeti-2.9.3/lib/cmdlib/instance_storage.py0000644000000000000000000030705112271422343020702 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Logical units dealing with storage of instances.""" import itertools import logging import os import time from ganeti import compat from ganeti import constants from ganeti import errors from ganeti import ht from ganeti import locking from ganeti.masterd import iallocator from ganeti import objects from ganeti import utils from ganeti import rpc from ganeti.cmdlib.base import LogicalUnit, NoHooksLU, Tasklet from ganeti.cmdlib.common import INSTANCE_DOWN, INSTANCE_NOT_RUNNING, \ AnnotateDiskParams, CheckIAllocatorOrNode, ExpandNodeUuidAndName, \ CheckNodeOnline, CheckInstanceNodeGroups, CheckInstanceState, \ IsExclusiveStorageEnabledNode, FindFaultyInstanceDisks, GetWantedNodes, \ CheckDiskTemplateEnabled from ganeti.cmdlib.instance_utils import GetInstanceInfoText, \ CopyLockList, ReleaseLocks, CheckNodeVmCapable, \ BuildInstanceHookEnvByObject, CheckNodeNotDrained, CheckTargetNodeIPolicy import ganeti.masterd.instance _DISK_TEMPLATE_NAME_PREFIX = { constants.DT_PLAIN: "", constants.DT_RBD: ".rbd", constants.DT_EXT: ".ext", } def CreateSingleBlockDev(lu, node_uuid, instance, device, info, force_open, excl_stor): """Create a single block device on a given node. This will not recurse over children of the device, so they must be created in advance. @param lu: the lu on whose behalf we execute @param node_uuid: the node on which to create the device @type instance: L{objects.Instance} @param instance: the instance which owns the device @type device: L{objects.Disk} @param device: the device to create @param info: the extra 'metadata' we should attach to the device (this will be represented as a LVM tag) @type force_open: boolean @param force_open: this parameter will be passes to the L{backend.BlockdevCreate} function where it specifies whether we run on primary or not, and it affects both the child assembly and the device own Open() execution @type excl_stor: boolean @param excl_stor: Whether exclusive_storage is active for the node """ lu.cfg.SetDiskID(device, node_uuid) result = lu.rpc.call_blockdev_create(node_uuid, device, device.size, instance.name, force_open, info, excl_stor) result.Raise("Can't create block device %s on" " node %s for instance %s" % (device, lu.cfg.GetNodeName(node_uuid), instance.name)) if device.physical_id is None: device.physical_id = result.payload def _CreateBlockDevInner(lu, node_uuid, instance, device, force_create, info, force_open, excl_stor): """Create a tree of block devices on a given node. If this device type has to be created on secondaries, create it and all its children. If not, just recurse to children keeping the same 'force' value. @attention: The device has to be annotated already. @param lu: the lu on whose behalf we execute @param node_uuid: the node on which to create the device @type instance: L{objects.Instance} @param instance: the instance which owns the device @type device: L{objects.Disk} @param device: the device to create @type force_create: boolean @param force_create: whether to force creation of this device; this will be change to True whenever we find a device which has CreateOnSecondary() attribute @param info: the extra 'metadata' we should attach to the device (this will be represented as a LVM tag) @type force_open: boolean @param force_open: this parameter will be passes to the L{backend.BlockdevCreate} function where it specifies whether we run on primary or not, and it affects both the child assembly and the device own Open() execution @type excl_stor: boolean @param excl_stor: Whether exclusive_storage is active for the node @return: list of created devices """ created_devices = [] try: if device.CreateOnSecondary(): force_create = True if device.children: for child in device.children: devs = _CreateBlockDevInner(lu, node_uuid, instance, child, force_create, info, force_open, excl_stor) created_devices.extend(devs) if not force_create: return created_devices CreateSingleBlockDev(lu, node_uuid, instance, device, info, force_open, excl_stor) # The device has been completely created, so there is no point in keeping # its subdevices in the list. We just add the device itself instead. created_devices = [(node_uuid, device)] return created_devices except errors.DeviceCreationError, e: e.created_devices.extend(created_devices) raise e except errors.OpExecError, e: raise errors.DeviceCreationError(str(e), created_devices) def IsExclusiveStorageEnabledNodeUuid(cfg, node_uuid): """Whether exclusive_storage is in effect for the given node. @type cfg: L{config.ConfigWriter} @param cfg: The cluster configuration @type node_uuid: string @param node_uuid: The node UUID @rtype: bool @return: The effective value of exclusive_storage @raise errors.OpPrereqError: if no node exists with the given name """ ni = cfg.GetNodeInfo(node_uuid) if ni is None: raise errors.OpPrereqError("Invalid node UUID %s" % node_uuid, errors.ECODE_NOENT) return IsExclusiveStorageEnabledNode(cfg, ni) def _CreateBlockDev(lu, node_uuid, instance, device, force_create, info, force_open): """Wrapper around L{_CreateBlockDevInner}. This method annotates the root device first. """ (disk,) = AnnotateDiskParams(instance, [device], lu.cfg) excl_stor = IsExclusiveStorageEnabledNodeUuid(lu.cfg, node_uuid) return _CreateBlockDevInner(lu, node_uuid, instance, disk, force_create, info, force_open, excl_stor) def _UndoCreateDisks(lu, disks_created): """Undo the work performed by L{CreateDisks}. This function is called in case of an error to undo the work of L{CreateDisks}. @type lu: L{LogicalUnit} @param lu: the logical unit on whose behalf we execute @param disks_created: the result returned by L{CreateDisks} """ for (node_uuid, disk) in disks_created: lu.cfg.SetDiskID(disk, node_uuid) result = lu.rpc.call_blockdev_remove(node_uuid, disk) result.Warn("Failed to remove newly-created disk %s on node %s" % (disk, lu.cfg.GetNodeName(node_uuid)), logging.warning) def CreateDisks(lu, instance, to_skip=None, target_node_uuid=None, disks=None): """Create all disks for an instance. This abstracts away some work from AddInstance. @type lu: L{LogicalUnit} @param lu: the logical unit on whose behalf we execute @type instance: L{objects.Instance} @param instance: the instance whose disks we should create @type to_skip: list @param to_skip: list of indices to skip @type target_node_uuid: string @param target_node_uuid: if passed, overrides the target node for creation @type disks: list of {objects.Disk} @param disks: the disks to create; if not specified, all the disks of the instance are created @return: information about the created disks, to be used to call L{_UndoCreateDisks} @raise errors.OpPrereqError: in case of error """ info = GetInstanceInfoText(instance) if target_node_uuid is None: pnode_uuid = instance.primary_node all_node_uuids = instance.all_nodes else: pnode_uuid = target_node_uuid all_node_uuids = [pnode_uuid] if disks is None: disks = instance.disks CheckDiskTemplateEnabled(lu.cfg.GetClusterInfo(), instance.disk_template) if instance.disk_template in constants.DTS_FILEBASED: file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1]) result = lu.rpc.call_file_storage_dir_create(pnode_uuid, file_storage_dir) result.Raise("Failed to create directory '%s' on" " node %s" % (file_storage_dir, lu.cfg.GetNodeName(pnode_uuid))) disks_created = [] for idx, device in enumerate(disks): if to_skip and idx in to_skip: continue logging.info("Creating disk %s for instance '%s'", idx, instance.name) for node_uuid in all_node_uuids: f_create = node_uuid == pnode_uuid try: _CreateBlockDev(lu, node_uuid, instance, device, f_create, info, f_create) disks_created.append((node_uuid, device)) except errors.DeviceCreationError, e: logging.warning("Creating disk %s for instance '%s' failed", idx, instance.name) disks_created.extend(e.created_devices) _UndoCreateDisks(lu, disks_created) raise errors.OpExecError(e.message) return disks_created def ComputeDiskSizePerVG(disk_template, disks): """Compute disk size requirements in the volume group """ def _compute(disks, payload): """Universal algorithm. """ vgs = {} for disk in disks: vgs[disk[constants.IDISK_VG]] = \ vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload return vgs # Required free disk space as a function of disk and swap space req_size_dict = { constants.DT_DISKLESS: {}, constants.DT_PLAIN: _compute(disks, 0), # 128 MB are added for drbd metadata for each disk constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE), constants.DT_FILE: {}, constants.DT_SHARED_FILE: {}, } if disk_template not in req_size_dict: raise errors.ProgrammerError("Disk template '%s' size requirement" " is unknown" % disk_template) return req_size_dict[disk_template] def ComputeDisks(op, default_vg): """Computes the instance disks. @param op: The instance opcode @param default_vg: The default_vg to assume @return: The computed disks """ disks = [] for disk in op.disks: mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR) if mode not in constants.DISK_ACCESS_SET: raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode, errors.ECODE_INVAL) size = disk.get(constants.IDISK_SIZE, None) if size is None: raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL) try: size = int(size) except (TypeError, ValueError): raise errors.OpPrereqError("Invalid disk size '%s'" % size, errors.ECODE_INVAL) ext_provider = disk.get(constants.IDISK_PROVIDER, None) if ext_provider and op.disk_template != constants.DT_EXT: raise errors.OpPrereqError("The '%s' option is only valid for the %s" " disk template, not %s" % (constants.IDISK_PROVIDER, constants.DT_EXT, op.disk_template), errors.ECODE_INVAL) data_vg = disk.get(constants.IDISK_VG, default_vg) name = disk.get(constants.IDISK_NAME, None) if name is not None and name.lower() == constants.VALUE_NONE: name = None new_disk = { constants.IDISK_SIZE: size, constants.IDISK_MODE: mode, constants.IDISK_VG: data_vg, constants.IDISK_NAME: name, } for key in [ constants.IDISK_METAVG, constants.IDISK_ADOPT, constants.IDISK_SPINDLES, ]: if key in disk: new_disk[key] = disk[key] # For extstorage, demand the `provider' option and add any # additional parameters (ext-params) to the dict if op.disk_template == constants.DT_EXT: if ext_provider: new_disk[constants.IDISK_PROVIDER] = ext_provider for key in disk: if key not in constants.IDISK_PARAMS: new_disk[key] = disk[key] else: raise errors.OpPrereqError("Missing provider for template '%s'" % constants.DT_EXT, errors.ECODE_INVAL) disks.append(new_disk) return disks def CheckRADOSFreeSpace(): """Compute disk size requirements inside the RADOS cluster. """ # For the RADOS cluster we assume there is always enough space. pass def _GenerateDRBD8Branch(lu, primary_uuid, secondary_uuid, size, vgnames, names, iv_name, p_minor, s_minor): """Generate a drbd8 device complete with its children. """ assert len(vgnames) == len(names) == 2 port = lu.cfg.AllocatePort() shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId()) dev_data = objects.Disk(dev_type=constants.DT_PLAIN, size=size, logical_id=(vgnames[0], names[0]), params={}) dev_data.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId()) dev_meta = objects.Disk(dev_type=constants.DT_PLAIN, size=constants.DRBD_META_SIZE, logical_id=(vgnames[1], names[1]), params={}) dev_meta.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId()) drbd_dev = objects.Disk(dev_type=constants.DT_DRBD8, size=size, logical_id=(primary_uuid, secondary_uuid, port, p_minor, s_minor, shared_secret), children=[dev_data, dev_meta], iv_name=iv_name, params={}) drbd_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId()) return drbd_dev def GenerateDiskTemplate( lu, template_name, instance_uuid, primary_node_uuid, secondary_node_uuids, disk_info, file_storage_dir, file_driver, base_index, feedback_fn, full_disk_params): """Generate the entire disk layout for a given template type. """ vgname = lu.cfg.GetVGName() disk_count = len(disk_info) disks = [] CheckDiskTemplateEnabled(lu.cfg.GetClusterInfo(), template_name) if template_name == constants.DT_DISKLESS: pass elif template_name == constants.DT_DRBD8: if len(secondary_node_uuids) != 1: raise errors.ProgrammerError("Wrong template configuration") remote_node_uuid = secondary_node_uuids[0] minors = lu.cfg.AllocateDRBDMinor( [primary_node_uuid, remote_node_uuid] * len(disk_info), instance_uuid) (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name, full_disk_params) drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG] names = [] for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i) for i in range(disk_count)]): names.append(lv_prefix + "_data") names.append(lv_prefix + "_meta") for idx, disk in enumerate(disk_info): disk_index = idx + base_index data_vg = disk.get(constants.IDISK_VG, vgname) meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg) disk_dev = _GenerateDRBD8Branch(lu, primary_node_uuid, remote_node_uuid, disk[constants.IDISK_SIZE], [data_vg, meta_vg], names[idx * 2:idx * 2 + 2], "disk/%d" % disk_index, minors[idx * 2], minors[idx * 2 + 1]) disk_dev.mode = disk[constants.IDISK_MODE] disk_dev.name = disk.get(constants.IDISK_NAME, None) disks.append(disk_dev) else: if secondary_node_uuids: raise errors.ProgrammerError("Wrong template configuration") name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None) if name_prefix is None: names = None else: names = _GenerateUniqueNames(lu, ["%s.disk%s" % (name_prefix, base_index + i) for i in range(disk_count)]) if template_name == constants.DT_PLAIN: def logical_id_fn(idx, _, disk): vg = disk.get(constants.IDISK_VG, vgname) return (vg, names[idx]) elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE): logical_id_fn = \ lambda _, disk_index, disk: (file_driver, "%s/disk%d" % (file_storage_dir, disk_index)) elif template_name == constants.DT_BLOCK: logical_id_fn = \ lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL, disk[constants.IDISK_ADOPT]) elif template_name == constants.DT_RBD: logical_id_fn = lambda idx, _, disk: ("rbd", names[idx]) elif template_name == constants.DT_EXT: def logical_id_fn(idx, _, disk): provider = disk.get(constants.IDISK_PROVIDER, None) if provider is None: raise errors.ProgrammerError("Disk template is %s, but '%s' is" " not found", constants.DT_EXT, constants.IDISK_PROVIDER) return (provider, names[idx]) else: raise errors.ProgrammerError("Unknown disk template '%s'" % template_name) dev_type = template_name for idx, disk in enumerate(disk_info): params = {} # Only for the Ext template add disk_info to params if template_name == constants.DT_EXT: params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER] for key in disk: if key not in constants.IDISK_PARAMS: params[key] = disk[key] disk_index = idx + base_index size = disk[constants.IDISK_SIZE] feedback_fn("* disk %s, size %s" % (disk_index, utils.FormatUnit(size, "h"))) disk_dev = objects.Disk(dev_type=dev_type, size=size, logical_id=logical_id_fn(idx, disk_index, disk), iv_name="disk/%d" % disk_index, mode=disk[constants.IDISK_MODE], params=params, spindles=disk.get(constants.IDISK_SPINDLES)) disk_dev.name = disk.get(constants.IDISK_NAME, None) disk_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId()) disks.append(disk_dev) return disks def CheckSpindlesExclusiveStorage(diskdict, es_flag, required): """Check the presence of the spindle options with exclusive_storage. @type diskdict: dict @param diskdict: disk parameters @type es_flag: bool @param es_flag: the effective value of the exlusive_storage flag @type required: bool @param required: whether spindles are required or just optional @raise errors.OpPrereqError when spindles are given and they should not """ if (not es_flag and constants.IDISK_SPINDLES in diskdict and diskdict[constants.IDISK_SPINDLES] is not None): raise errors.OpPrereqError("Spindles in instance disks cannot be specified" " when exclusive storage is not active", errors.ECODE_INVAL) if (es_flag and required and (constants.IDISK_SPINDLES not in diskdict or diskdict[constants.IDISK_SPINDLES] is None)): raise errors.OpPrereqError("You must specify spindles in instance disks" " when exclusive storage is active", errors.ECODE_INVAL) class LUInstanceRecreateDisks(LogicalUnit): """Recreate an instance's missing disks. """ HPATH = "instance-recreate-disks" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False _MODIFYABLE = compat.UniqueFrozenset([ constants.IDISK_SIZE, constants.IDISK_MODE, constants.IDISK_SPINDLES, ]) # New or changed disk parameters may have different semantics assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([ constants.IDISK_ADOPT, # TODO: Implement support changing VG while recreating constants.IDISK_VG, constants.IDISK_METAVG, constants.IDISK_PROVIDER, constants.IDISK_NAME, ])) def _RunAllocator(self): """Run the allocator based on input opcode. """ be_full = self.cfg.GetClusterInfo().FillBE(self.instance) # FIXME # The allocator should actually run in "relocate" mode, but current # allocators don't support relocating all the nodes of an instance at # the same time. As a workaround we use "allocate" mode, but this is # suboptimal for two reasons: # - The instance name passed to the allocator is present in the list of # existing instances, so there could be a conflict within the # internal structures of the allocator. This doesn't happen with the # current allocators, but it's a liability. # - The allocator counts the resources used by the instance twice: once # because the instance exists already, and once because it tries to # allocate a new instance. # The allocator could choose some of the nodes on which the instance is # running, but that's not a problem. If the instance nodes are broken, # they should be already be marked as drained or offline, and hence # skipped by the allocator. If instance disks have been lost for other # reasons, then recreating the disks on the same nodes should be fine. disk_template = self.instance.disk_template spindle_use = be_full[constants.BE_SPINDLE_USE] disks = [{ constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode, constants.IDISK_SPINDLES: d.spindles, } for d in self.instance.disks] req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name, disk_template=disk_template, tags=list(self.instance.GetTags()), os=self.instance.os, nics=[{}], vcpus=be_full[constants.BE_VCPUS], memory=be_full[constants.BE_MAXMEM], spindle_use=spindle_use, disks=disks, hypervisor=self.instance.hypervisor, node_whitelist=None) ial = iallocator.IAllocator(self.cfg, self.rpc, req) ial.Run(self.op.iallocator) assert req.RequiredNodes() == len(self.instance.all_nodes) if not ial.success: raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':" " %s" % (self.op.iallocator, ial.info), errors.ECODE_NORES) (self.op.node_uuids, self.op.nodes) = GetWantedNodes(self, ial.result) self.LogInfo("Selected nodes for instance %s via iallocator %s: %s", self.op.instance_name, self.op.iallocator, utils.CommaJoin(self.op.nodes)) def CheckArguments(self): if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]): # Normalize and convert deprecated list of disk indices self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))] duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks)) if duplicates: raise errors.OpPrereqError("Some disks have been specified more than" " once: %s" % utils.CommaJoin(duplicates), errors.ECODE_INVAL) # We don't want _CheckIAllocatorOrNode selecting the default iallocator # when neither iallocator nor nodes are specified if self.op.iallocator or self.op.nodes: CheckIAllocatorOrNode(self, "iallocator", "nodes") for (idx, params) in self.op.disks: utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES) unsupported = frozenset(params.keys()) - self._MODIFYABLE if unsupported: raise errors.OpPrereqError("Parameters for disk %s try to change" " unmodifyable parameter(s): %s" % (idx, utils.CommaJoin(unsupported)), errors.ECODE_INVAL) def ExpandNames(self): self._ExpandAndLockInstance() self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND if self.op.nodes: (self.op.node_uuids, self.op.nodes) = GetWantedNodes(self, self.op.nodes) self.needed_locks[locking.LEVEL_NODE] = list(self.op.node_uuids) else: self.needed_locks[locking.LEVEL_NODE] = [] if self.op.iallocator: # iallocator will select a new node in the same group self.needed_locks[locking.LEVEL_NODEGROUP] = [] self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET self.needed_locks[locking.LEVEL_NODE_RES] = [] def DeclareLocks(self, level): if level == locking.LEVEL_NODEGROUP: assert self.op.iallocator is not None assert not self.op.nodes assert not self.needed_locks[locking.LEVEL_NODEGROUP] self.share_locks[locking.LEVEL_NODEGROUP] = 1 # Lock the primary group used by the instance optimistically; this # requires going via the node before it's locked, requiring # verification later on self.needed_locks[locking.LEVEL_NODEGROUP] = \ self.cfg.GetInstanceNodeGroups(self.op.instance_uuid, primary_only=True) elif level == locking.LEVEL_NODE: # If an allocator is used, then we lock all the nodes in the current # instance group, as we don't know yet which ones will be selected; # if we replace the nodes without using an allocator, locks are # already declared in ExpandNames; otherwise, we need to lock all the # instance nodes for disk re-creation if self.op.iallocator: assert not self.op.nodes assert not self.needed_locks[locking.LEVEL_NODE] assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1 # Lock member nodes of the group of the primary node for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP): self.needed_locks[locking.LEVEL_NODE].extend( self.cfg.GetNodeGroup(group_uuid).members) assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC) elif not self.op.nodes: self._LockInstancesNodes(primary_only=False) elif level == locking.LEVEL_NODE_RES: # Copy node locks self.needed_locks[locking.LEVEL_NODE_RES] = \ CopyLockList(self.needed_locks[locking.LEVEL_NODE]) def BuildHooksEnv(self): """Build hooks env. This runs on master, primary and secondary nodes of the instance. """ return BuildInstanceHookEnvByObject(self, self.instance) def BuildHooksNodes(self): """Build hooks nodes. """ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) return (nl, nl) def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster and is not running. """ instance = self.cfg.GetInstanceInfo(self.op.instance_uuid) assert instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name if self.op.node_uuids: if len(self.op.node_uuids) != len(instance.all_nodes): raise errors.OpPrereqError("Instance %s currently has %d nodes, but" " %d replacement nodes were specified" % (instance.name, len(instance.all_nodes), len(self.op.node_uuids)), errors.ECODE_INVAL) assert instance.disk_template != constants.DT_DRBD8 or \ len(self.op.node_uuids) == 2 assert instance.disk_template != constants.DT_PLAIN or \ len(self.op.node_uuids) == 1 primary_node = self.op.node_uuids[0] else: primary_node = instance.primary_node if not self.op.iallocator: CheckNodeOnline(self, primary_node) if instance.disk_template == constants.DT_DISKLESS: raise errors.OpPrereqError("Instance '%s' has no disks" % self.op.instance_name, errors.ECODE_INVAL) # Verify if node group locks are still correct owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP) if owned_groups: # Node group locks are acquired only for the primary node (and only # when the allocator is used) CheckInstanceNodeGroups(self.cfg, instance.uuid, owned_groups, primary_only=True) # if we replace nodes *and* the old primary is offline, we don't # check the instance state old_pnode = self.cfg.GetNodeInfo(instance.primary_node) if not ((self.op.iallocator or self.op.node_uuids) and old_pnode.offline): CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING, msg="cannot recreate disks") if self.op.disks: self.disks = dict(self.op.disks) else: self.disks = dict((idx, {}) for idx in range(len(instance.disks))) maxidx = max(self.disks.keys()) if maxidx >= len(instance.disks): raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx, errors.ECODE_INVAL) if ((self.op.node_uuids or self.op.iallocator) and sorted(self.disks.keys()) != range(len(instance.disks))): raise errors.OpPrereqError("Can't recreate disks partially and" " change the nodes at the same time", errors.ECODE_INVAL) self.instance = instance if self.op.iallocator: self._RunAllocator() # Release unneeded node and node resource locks ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.node_uuids) ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.node_uuids) ReleaseLocks(self, locking.LEVEL_NODE_ALLOC) assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC) if self.op.node_uuids: node_uuids = self.op.node_uuids else: node_uuids = instance.all_nodes excl_stor = compat.any( rpc.GetExclusiveStorageForNodes(self.cfg, node_uuids).values() ) for new_params in self.disks.values(): CheckSpindlesExclusiveStorage(new_params, excl_stor, False) def Exec(self, feedback_fn): """Recreate the disks. """ assert (self.owned_locks(locking.LEVEL_NODE) == self.owned_locks(locking.LEVEL_NODE_RES)) to_skip = [] mods = [] # keeps track of needed changes for idx, disk in enumerate(self.instance.disks): try: changes = self.disks[idx] except KeyError: # Disk should not be recreated to_skip.append(idx) continue # update secondaries for disks, if needed if self.op.node_uuids and disk.dev_type == constants.DT_DRBD8: # need to update the nodes and minors assert len(self.op.node_uuids) == 2 assert len(disk.logical_id) == 6 # otherwise disk internals # have changed (_, _, old_port, _, _, old_secret) = disk.logical_id new_minors = self.cfg.AllocateDRBDMinor(self.op.node_uuids, self.instance.uuid) new_id = (self.op.node_uuids[0], self.op.node_uuids[1], old_port, new_minors[0], new_minors[1], old_secret) assert len(disk.logical_id) == len(new_id) else: new_id = None mods.append((idx, new_id, changes)) # now that we have passed all asserts above, we can apply the mods # in a single run (to avoid partial changes) for idx, new_id, changes in mods: disk = self.instance.disks[idx] if new_id is not None: assert disk.dev_type == constants.DT_DRBD8 disk.logical_id = new_id if changes: disk.Update(size=changes.get(constants.IDISK_SIZE, None), mode=changes.get(constants.IDISK_MODE, None), spindles=changes.get(constants.IDISK_SPINDLES, None)) # change primary node, if needed if self.op.node_uuids: self.instance.primary_node = self.op.node_uuids[0] self.LogWarning("Changing the instance's nodes, you will have to" " remove any disks left on the older nodes manually") if self.op.node_uuids: self.cfg.Update(self.instance, feedback_fn) # All touched nodes must be locked mylocks = self.owned_locks(locking.LEVEL_NODE) assert mylocks.issuperset(frozenset(self.instance.all_nodes)) new_disks = CreateDisks(self, self.instance, to_skip=to_skip) # TODO: Release node locks before wiping, or explain why it's not possible if self.cfg.GetClusterInfo().prealloc_wipe_disks: wipedisks = [(idx, disk, 0) for (idx, disk) in enumerate(self.instance.disks) if idx not in to_skip] WipeOrCleanupDisks(self, self.instance, disks=wipedisks, cleanup=new_disks) def _PerformNodeInfoCall(lu, node_uuids, vg): """Prepares the input and performs a node info call. @type lu: C{LogicalUnit} @param lu: a logical unit from which we get configuration data @type node_uuids: list of string @param node_uuids: list of node UUIDs to perform the call for @type vg: string @param vg: the volume group's name """ lvm_storage_units = [(constants.ST_LVM_VG, vg)] storage_units = rpc.PrepareStorageUnitsForNodes(lu.cfg, lvm_storage_units, node_uuids) hvname = lu.cfg.GetHypervisorType() hvparams = lu.cfg.GetClusterInfo().hvparams nodeinfo = lu.rpc.call_node_info(node_uuids, storage_units, [(hvname, hvparams[hvname])]) return nodeinfo def _CheckVgCapacityForNode(node_name, node_info, vg, requested): """Checks the vg capacity for a given node. @type node_info: tuple (_, list of dicts, _) @param node_info: the result of the node info call for one node @type node_name: string @param node_name: the name of the node @type vg: string @param vg: volume group name @type requested: int @param requested: the amount of disk in MiB to check for @raise errors.OpPrereqError: if the node doesn't have enough disk, or we cannot check the node """ (_, space_info, _) = node_info lvm_vg_info = utils.storage.LookupSpaceInfoByStorageType( space_info, constants.ST_LVM_VG) if not lvm_vg_info: raise errors.OpPrereqError("Can't retrieve storage information for LVM") vg_free = lvm_vg_info.get("storage_free", None) if not isinstance(vg_free, int): raise errors.OpPrereqError("Can't compute free disk space on node" " %s for vg %s, result was '%s'" % (node_name, vg, vg_free), errors.ECODE_ENVIRON) if requested > vg_free: raise errors.OpPrereqError("Not enough disk space on target node %s" " vg %s: required %d MiB, available %d MiB" % (node_name, vg, requested, vg_free), errors.ECODE_NORES) def _CheckNodesFreeDiskOnVG(lu, node_uuids, vg, requested): """Checks if nodes have enough free disk space in the specified VG. This function checks if all given nodes have the needed amount of free disk. In case any node has less disk or we cannot get the information from the node, this function raises an OpPrereqError exception. @type lu: C{LogicalUnit} @param lu: a logical unit from which we get configuration data @type node_uuids: C{list} @param node_uuids: the list of node UUIDs to check @type vg: C{str} @param vg: the volume group to check @type requested: C{int} @param requested: the amount of disk in MiB to check for @raise errors.OpPrereqError: if the node doesn't have enough disk, or we cannot check the node """ nodeinfo = _PerformNodeInfoCall(lu, node_uuids, vg) for node in node_uuids: node_name = lu.cfg.GetNodeName(node) info = nodeinfo[node] info.Raise("Cannot get current information from node %s" % node_name, prereq=True, ecode=errors.ECODE_ENVIRON) _CheckVgCapacityForNode(node_name, info.payload, vg, requested) def CheckNodesFreeDiskPerVG(lu, node_uuids, req_sizes): """Checks if nodes have enough free disk space in all the VGs. This function checks if all given nodes have the needed amount of free disk. In case any node has less disk or we cannot get the information from the node, this function raises an OpPrereqError exception. @type lu: C{LogicalUnit} @param lu: a logical unit from which we get configuration data @type node_uuids: C{list} @param node_uuids: the list of node UUIDs to check @type req_sizes: C{dict} @param req_sizes: the hash of vg and corresponding amount of disk in MiB to check for @raise errors.OpPrereqError: if the node doesn't have enough disk, or we cannot check the node """ for vg, req_size in req_sizes.items(): _CheckNodesFreeDiskOnVG(lu, node_uuids, vg, req_size) def _DiskSizeInBytesToMebibytes(lu, size): """Converts a disk size in bytes to mebibytes. Warns and rounds up if the size isn't an even multiple of 1 MiB. """ (mib, remainder) = divmod(size, 1024 * 1024) if remainder != 0: lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up" " to not overwrite existing data (%s bytes will not be" " wiped)", (1024 * 1024) - remainder) mib += 1 return mib def _CalcEta(time_taken, written, total_size): """Calculates the ETA based on size written and total size. @param time_taken: The time taken so far @param written: amount written so far @param total_size: The total size of data to be written @return: The remaining time in seconds """ avg_time = time_taken / float(written) return (total_size - written) * avg_time def WipeDisks(lu, instance, disks=None): """Wipes instance disks. @type lu: L{LogicalUnit} @param lu: the logical unit on whose behalf we execute @type instance: L{objects.Instance} @param instance: the instance whose disks we should create @type disks: None or list of tuple of (number, L{objects.Disk}, number) @param disks: Disk details; tuple contains disk index, disk object and the start offset """ node_uuid = instance.primary_node node_name = lu.cfg.GetNodeName(node_uuid) if disks is None: disks = [(idx, disk, 0) for (idx, disk) in enumerate(instance.disks)] for (_, device, _) in disks: lu.cfg.SetDiskID(device, node_uuid) logging.info("Pausing synchronization of disks of instance '%s'", instance.name) result = lu.rpc.call_blockdev_pause_resume_sync(node_uuid, (map(compat.snd, disks), instance), True) result.Raise("Failed to pause disk synchronization on node '%s'" % node_name) for idx, success in enumerate(result.payload): if not success: logging.warn("Pausing synchronization of disk %s of instance '%s'" " failed", idx, instance.name) try: for (idx, device, offset) in disks: # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors. wipe_chunk_size = \ int(min(constants.MAX_WIPE_CHUNK, device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT)) size = device.size last_output = 0 start_time = time.time() if offset == 0: info_text = "" else: info_text = (" (from %s to %s)" % (utils.FormatUnit(offset, "h"), utils.FormatUnit(size, "h"))) lu.LogInfo("* Wiping disk %s%s", idx, info_text) logging.info("Wiping disk %d for instance %s on node %s using" " chunk size %s", idx, instance.name, node_name, wipe_chunk_size) while offset < size: wipe_size = min(wipe_chunk_size, size - offset) logging.debug("Wiping disk %d, offset %s, chunk %s", idx, offset, wipe_size) result = lu.rpc.call_blockdev_wipe(node_uuid, (device, instance), offset, wipe_size) result.Raise("Could not wipe disk %d at offset %d for size %d" % (idx, offset, wipe_size)) now = time.time() offset += wipe_size if now - last_output >= 60: eta = _CalcEta(now - start_time, offset, size) lu.LogInfo(" - done: %.1f%% ETA: %s", offset / float(size) * 100, utils.FormatSeconds(eta)) last_output = now finally: logging.info("Resuming synchronization of disks for instance '%s'", instance.name) result = lu.rpc.call_blockdev_pause_resume_sync(node_uuid, (map(compat.snd, disks), instance), False) if result.fail_msg: lu.LogWarning("Failed to resume disk synchronization on node '%s': %s", node_name, result.fail_msg) else: for idx, success in enumerate(result.payload): if not success: lu.LogWarning("Resuming synchronization of disk %s of instance '%s'" " failed", idx, instance.name) def WipeOrCleanupDisks(lu, instance, disks=None, cleanup=None): """Wrapper for L{WipeDisks} that handles errors. @type lu: L{LogicalUnit} @param lu: the logical unit on whose behalf we execute @type instance: L{objects.Instance} @param instance: the instance whose disks we should wipe @param disks: see L{WipeDisks} @param cleanup: the result returned by L{CreateDisks}, used for cleanup in case of error @raise errors.OpPrereqError: in case of failure """ try: WipeDisks(lu, instance, disks=disks) except errors.OpExecError: logging.warning("Wiping disks for instance '%s' failed", instance.name) _UndoCreateDisks(lu, cleanup) raise def ExpandCheckDisks(instance, disks): """Return the instance disks selected by the disks list @type disks: list of L{objects.Disk} or None @param disks: selected disks @rtype: list of L{objects.Disk} @return: selected instance disks to act on """ if disks is None: return instance.disks else: if not set(disks).issubset(instance.disks): raise errors.ProgrammerError("Can only act on disks belonging to the" " target instance: expected a subset of %r," " got %r" % (instance.disks, disks)) return disks def WaitForSync(lu, instance, disks=None, oneshot=False): """Sleep and poll for an instance's disk to sync. """ if not instance.disks or disks is not None and not disks: return True disks = ExpandCheckDisks(instance, disks) if not oneshot: lu.LogInfo("Waiting for instance %s to sync disks", instance.name) node_uuid = instance.primary_node node_name = lu.cfg.GetNodeName(node_uuid) for dev in disks: lu.cfg.SetDiskID(dev, node_uuid) # TODO: Convert to utils.Retry retries = 0 degr_retries = 10 # in seconds, as we sleep 1 second each time while True: max_time = 0 done = True cumul_degraded = False rstats = lu.rpc.call_blockdev_getmirrorstatus(node_uuid, (disks, instance)) msg = rstats.fail_msg if msg: lu.LogWarning("Can't get any data from node %s: %s", node_name, msg) retries += 1 if retries >= 10: raise errors.RemoteError("Can't contact node %s for mirror data," " aborting." % node_name) time.sleep(6) continue rstats = rstats.payload retries = 0 for i, mstat in enumerate(rstats): if mstat is None: lu.LogWarning("Can't compute data for node %s/%s", node_name, disks[i].iv_name) continue cumul_degraded = (cumul_degraded or (mstat.is_degraded and mstat.sync_percent is None)) if mstat.sync_percent is not None: done = False if mstat.estimated_time is not None: rem_time = ("%s remaining (estimated)" % utils.FormatSeconds(mstat.estimated_time)) max_time = mstat.estimated_time else: rem_time = "no time estimate" lu.LogInfo("- device %s: %5.2f%% done, %s", disks[i].iv_name, mstat.sync_percent, rem_time) # if we're done but degraded, let's do a few small retries, to # make sure we see a stable and not transient situation; therefore # we force restart of the loop if (done or oneshot) and cumul_degraded and degr_retries > 0: logging.info("Degraded disks found, %d retries left", degr_retries) degr_retries -= 1 time.sleep(1) continue if done or oneshot: break time.sleep(min(60, max_time)) if done: lu.LogInfo("Instance %s's disks are in sync", instance.name) return not cumul_degraded def ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False): """Shutdown block devices of an instance. This does the shutdown on all nodes of the instance. If the ignore_primary is false, errors on the primary node are ignored. """ lu.cfg.MarkInstanceDisksInactive(instance.uuid) all_result = True disks = ExpandCheckDisks(instance, disks) for disk in disks: for node_uuid, top_disk in disk.ComputeNodeTree(instance.primary_node): lu.cfg.SetDiskID(top_disk, node_uuid) result = lu.rpc.call_blockdev_shutdown(node_uuid, (top_disk, instance)) msg = result.fail_msg if msg: lu.LogWarning("Could not shutdown block device %s on node %s: %s", disk.iv_name, lu.cfg.GetNodeName(node_uuid), msg) if ((node_uuid == instance.primary_node and not ignore_primary) or (node_uuid != instance.primary_node and not result.offline)): all_result = False return all_result def _SafeShutdownInstanceDisks(lu, instance, disks=None): """Shutdown block devices of an instance. This function checks if an instance is running, before calling _ShutdownInstanceDisks. """ CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks") ShutdownInstanceDisks(lu, instance, disks=disks) def AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False, ignore_size=False): """Prepare the block devices for an instance. This sets up the block devices on all nodes. @type lu: L{LogicalUnit} @param lu: the logical unit on whose behalf we execute @type instance: L{objects.Instance} @param instance: the instance for whose disks we assemble @type disks: list of L{objects.Disk} or None @param disks: which disks to assemble (or all, if None) @type ignore_secondaries: boolean @param ignore_secondaries: if true, errors on secondary nodes won't result in an error return from the function @type ignore_size: boolean @param ignore_size: if true, the current known size of the disk will not be used during the disk activation, useful for cases when the size is wrong @return: False if the operation failed, otherwise a list of (host, instance_visible_name, node_visible_name) with the mapping from node devices to instance devices """ device_info = [] disks_ok = True disks = ExpandCheckDisks(instance, disks) # With the two passes mechanism we try to reduce the window of # opportunity for the race condition of switching DRBD to primary # before handshaking occured, but we do not eliminate it # The proper fix would be to wait (with some limits) until the # connection has been made and drbd transitions from WFConnection # into any other network-connected state (Connected, SyncTarget, # SyncSource, etc.) # mark instance disks as active before doing actual work, so watcher does # not try to shut them down erroneously lu.cfg.MarkInstanceDisksActive(instance.uuid) # 1st pass, assemble on all nodes in secondary mode for idx, inst_disk in enumerate(disks): for node_uuid, node_disk in inst_disk.ComputeNodeTree( instance.primary_node): if ignore_size: node_disk = node_disk.Copy() node_disk.UnsetSize() lu.cfg.SetDiskID(node_disk, node_uuid) result = lu.rpc.call_blockdev_assemble(node_uuid, (node_disk, instance), instance.name, False, idx) msg = result.fail_msg if msg: is_offline_secondary = (node_uuid in instance.secondary_nodes and result.offline) lu.LogWarning("Could not prepare block device %s on node %s" " (is_primary=False, pass=1): %s", inst_disk.iv_name, lu.cfg.GetNodeName(node_uuid), msg) if not (ignore_secondaries or is_offline_secondary): disks_ok = False # FIXME: race condition on drbd migration to primary # 2nd pass, do only the primary node for idx, inst_disk in enumerate(disks): dev_path = None for node_uuid, node_disk in inst_disk.ComputeNodeTree( instance.primary_node): if node_uuid != instance.primary_node: continue if ignore_size: node_disk = node_disk.Copy() node_disk.UnsetSize() lu.cfg.SetDiskID(node_disk, node_uuid) result = lu.rpc.call_blockdev_assemble(node_uuid, (node_disk, instance), instance.name, True, idx) msg = result.fail_msg if msg: lu.LogWarning("Could not prepare block device %s on node %s" " (is_primary=True, pass=2): %s", inst_disk.iv_name, lu.cfg.GetNodeName(node_uuid), msg) disks_ok = False else: dev_path = result.payload device_info.append((lu.cfg.GetNodeName(instance.primary_node), inst_disk.iv_name, dev_path)) # leave the disks configured for the primary node # this is a workaround that would be fixed better by # improving the logical/physical id handling for disk in disks: lu.cfg.SetDiskID(disk, instance.primary_node) if not disks_ok: lu.cfg.MarkInstanceDisksInactive(instance.uuid) return disks_ok, device_info def StartInstanceDisks(lu, instance, force): """Start the disks of an instance. """ disks_ok, _ = AssembleInstanceDisks(lu, instance, ignore_secondaries=force) if not disks_ok: ShutdownInstanceDisks(lu, instance) if force is not None and not force: lu.LogWarning("", hint=("If the message above refers to a secondary node," " you can retry the operation using '--force'")) raise errors.OpExecError("Disk consistency error") class LUInstanceGrowDisk(LogicalUnit): """Grow a disk of an instance. """ HPATH = "disk-grow" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False def ExpandNames(self): self._ExpandAndLockInstance() self.needed_locks[locking.LEVEL_NODE] = [] self.needed_locks[locking.LEVEL_NODE_RES] = [] self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE def DeclareLocks(self, level): if level == locking.LEVEL_NODE: self._LockInstancesNodes() elif level == locking.LEVEL_NODE_RES: # Copy node locks self.needed_locks[locking.LEVEL_NODE_RES] = \ CopyLockList(self.needed_locks[locking.LEVEL_NODE]) def BuildHooksEnv(self): """Build hooks env. This runs on the master, the primary and all the secondaries. """ env = { "DISK": self.op.disk, "AMOUNT": self.op.amount, "ABSOLUTE": self.op.absolute, } env.update(BuildInstanceHookEnvByObject(self, self.instance)) return env def BuildHooksNodes(self): """Build hooks nodes. """ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) return (nl, nl) def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster. """ self.instance = self.cfg.GetInstanceInfo(self.op.instance_uuid) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name node_uuids = list(self.instance.all_nodes) for node_uuid in node_uuids: CheckNodeOnline(self, node_uuid) self.node_es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, node_uuids) if self.instance.disk_template not in constants.DTS_GROWABLE: raise errors.OpPrereqError("Instance's disk layout does not support" " growing", errors.ECODE_INVAL) self.disk = self.instance.FindDisk(self.op.disk) if self.op.absolute: self.target = self.op.amount self.delta = self.target - self.disk.size if self.delta < 0: raise errors.OpPrereqError("Requested size (%s) is smaller than " "current disk size (%s)" % (utils.FormatUnit(self.target, "h"), utils.FormatUnit(self.disk.size, "h")), errors.ECODE_STATE) else: self.delta = self.op.amount self.target = self.disk.size + self.delta if self.delta < 0: raise errors.OpPrereqError("Requested increment (%s) is negative" % utils.FormatUnit(self.delta, "h"), errors.ECODE_INVAL) self._CheckDiskSpace(node_uuids, self.disk.ComputeGrowth(self.delta)) def _CheckDiskSpace(self, node_uuids, req_vgspace): template = self.instance.disk_template if (template not in (constants.DTS_NO_FREE_SPACE_CHECK) and not any(self.node_es_flags.values())): # TODO: check the free disk space for file, when that feature will be # supported # With exclusive storage we need to do something smarter than just looking # at free space, which, in the end, is basically a dry run. So we rely on # the dry run performed in Exec() instead. CheckNodesFreeDiskPerVG(self, node_uuids, req_vgspace) def Exec(self, feedback_fn): """Execute disk grow. """ assert set([self.instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE) assert (self.owned_locks(locking.LEVEL_NODE) == self.owned_locks(locking.LEVEL_NODE_RES)) wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks disks_ok, _ = AssembleInstanceDisks(self, self.instance, disks=[self.disk]) if not disks_ok: raise errors.OpExecError("Cannot activate block device to grow") feedback_fn("Growing disk %s of instance '%s' by %s to %s" % (self.op.disk, self.instance.name, utils.FormatUnit(self.delta, "h"), utils.FormatUnit(self.target, "h"))) # First run all grow ops in dry-run mode for node_uuid in self.instance.all_nodes: self.cfg.SetDiskID(self.disk, node_uuid) result = self.rpc.call_blockdev_grow(node_uuid, (self.disk, self.instance), self.delta, True, True, self.node_es_flags[node_uuid]) result.Raise("Dry-run grow request failed to node %s" % self.cfg.GetNodeName(node_uuid)) if wipe_disks: # Get disk size from primary node for wiping self.cfg.SetDiskID(self.disk, self.instance.primary_node) result = self.rpc.call_blockdev_getdimensions(self.instance.primary_node, [self.disk]) result.Raise("Failed to retrieve disk size from node '%s'" % self.instance.primary_node) (disk_dimensions, ) = result.payload if disk_dimensions is None: raise errors.OpExecError("Failed to retrieve disk size from primary" " node '%s'" % self.instance.primary_node) (disk_size_in_bytes, _) = disk_dimensions old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes) assert old_disk_size >= self.disk.size, \ ("Retrieved disk size too small (got %s, should be at least %s)" % (old_disk_size, self.disk.size)) else: old_disk_size = None # We know that (as far as we can test) operations across different # nodes will succeed, time to run it for real on the backing storage for node_uuid in self.instance.all_nodes: self.cfg.SetDiskID(self.disk, node_uuid) result = self.rpc.call_blockdev_grow(node_uuid, (self.disk, self.instance), self.delta, False, True, self.node_es_flags[node_uuid]) result.Raise("Grow request failed to node %s" % self.cfg.GetNodeName(node_uuid)) # And now execute it for logical storage, on the primary node node_uuid = self.instance.primary_node self.cfg.SetDiskID(self.disk, node_uuid) result = self.rpc.call_blockdev_grow(node_uuid, (self.disk, self.instance), self.delta, False, False, self.node_es_flags[node_uuid]) result.Raise("Grow request failed to node %s" % self.cfg.GetNodeName(node_uuid)) self.disk.RecordGrow(self.delta) self.cfg.Update(self.instance, feedback_fn) # Changes have been recorded, release node lock ReleaseLocks(self, locking.LEVEL_NODE) # Downgrade lock while waiting for sync self.glm.downgrade(locking.LEVEL_INSTANCE) assert wipe_disks ^ (old_disk_size is None) if wipe_disks: assert self.instance.disks[self.op.disk] == self.disk # Wipe newly added disk space WipeDisks(self, self.instance, disks=[(self.op.disk, self.disk, old_disk_size)]) if self.op.wait_for_sync: disk_abort = not WaitForSync(self, self.instance, disks=[self.disk]) if disk_abort: self.LogWarning("Disk syncing has not returned a good status; check" " the instance") if not self.instance.disks_active: _SafeShutdownInstanceDisks(self, self.instance, disks=[self.disk]) elif not self.instance.disks_active: self.LogWarning("Not shutting down the disk even if the instance is" " not supposed to be running because no wait for" " sync mode was requested") assert self.owned_locks(locking.LEVEL_NODE_RES) assert set([self.instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE) class LUInstanceReplaceDisks(LogicalUnit): """Replace the disks of an instance. """ HPATH = "mirrors-replace" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False def CheckArguments(self): """Check arguments. """ if self.op.mode == constants.REPLACE_DISK_CHG: if self.op.remote_node is None and self.op.iallocator is None: raise errors.OpPrereqError("When changing the secondary either an" " iallocator script must be used or the" " new node given", errors.ECODE_INVAL) else: CheckIAllocatorOrNode(self, "iallocator", "remote_node") elif self.op.remote_node is not None or self.op.iallocator is not None: # Not replacing the secondary raise errors.OpPrereqError("The iallocator and new node options can" " only be used when changing the" " secondary node", errors.ECODE_INVAL) def ExpandNames(self): self._ExpandAndLockInstance() assert locking.LEVEL_NODE not in self.needed_locks assert locking.LEVEL_NODE_RES not in self.needed_locks assert locking.LEVEL_NODEGROUP not in self.needed_locks assert self.op.iallocator is None or self.op.remote_node is None, \ "Conflicting options" if self.op.remote_node is not None: (self.op.remote_node_uuid, self.op.remote_node) = \ ExpandNodeUuidAndName(self.cfg, self.op.remote_node_uuid, self.op.remote_node) # Warning: do not remove the locking of the new secondary here # unless DRBD8Dev.AddChildren is changed to work in parallel; # currently it doesn't since parallel invocations of # FindUnusedMinor will conflict self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node_uuid] self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND else: self.needed_locks[locking.LEVEL_NODE] = [] self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE if self.op.iallocator is not None: # iallocator will select a new node in the same group self.needed_locks[locking.LEVEL_NODEGROUP] = [] self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET self.needed_locks[locking.LEVEL_NODE_RES] = [] self.replacer = TLReplaceDisks(self, self.op.instance_uuid, self.op.instance_name, self.op.mode, self.op.iallocator, self.op.remote_node_uuid, self.op.disks, self.op.early_release, self.op.ignore_ipolicy) self.tasklets = [self.replacer] def DeclareLocks(self, level): if level == locking.LEVEL_NODEGROUP: assert self.op.remote_node_uuid is None assert self.op.iallocator is not None assert not self.needed_locks[locking.LEVEL_NODEGROUP] self.share_locks[locking.LEVEL_NODEGROUP] = 1 # Lock all groups used by instance optimistically; this requires going # via the node before it's locked, requiring verification later on self.needed_locks[locking.LEVEL_NODEGROUP] = \ self.cfg.GetInstanceNodeGroups(self.op.instance_uuid) elif level == locking.LEVEL_NODE: if self.op.iallocator is not None: assert self.op.remote_node_uuid is None assert not self.needed_locks[locking.LEVEL_NODE] assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC) # Lock member nodes of all locked groups self.needed_locks[locking.LEVEL_NODE] = \ [node_uuid for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) for node_uuid in self.cfg.GetNodeGroup(group_uuid).members] else: assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC) self._LockInstancesNodes() elif level == locking.LEVEL_NODE_RES: # Reuse node locks self.needed_locks[locking.LEVEL_NODE_RES] = \ self.needed_locks[locking.LEVEL_NODE] def BuildHooksEnv(self): """Build hooks env. This runs on the master, the primary and all the secondaries. """ instance = self.replacer.instance env = { "MODE": self.op.mode, "NEW_SECONDARY": self.op.remote_node, "OLD_SECONDARY": self.cfg.GetNodeName(instance.secondary_nodes[0]), } env.update(BuildInstanceHookEnvByObject(self, instance)) return env def BuildHooksNodes(self): """Build hooks nodes. """ instance = self.replacer.instance nl = [ self.cfg.GetMasterNode(), instance.primary_node, ] if self.op.remote_node_uuid is not None: nl.append(self.op.remote_node_uuid) return nl, nl def CheckPrereq(self): """Check prerequisites. """ assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or self.op.iallocator is None) # Verify if node group locks are still correct owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP) if owned_groups: CheckInstanceNodeGroups(self.cfg, self.op.instance_uuid, owned_groups) return LogicalUnit.CheckPrereq(self) class LUInstanceActivateDisks(NoHooksLU): """Bring up an instance's disks. """ REQ_BGL = False def ExpandNames(self): self._ExpandAndLockInstance() self.needed_locks[locking.LEVEL_NODE] = [] self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE def DeclareLocks(self, level): if level == locking.LEVEL_NODE: self._LockInstancesNodes() def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster. """ self.instance = self.cfg.GetInstanceInfo(self.op.instance_uuid) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name CheckNodeOnline(self, self.instance.primary_node) def Exec(self, feedback_fn): """Activate the disks. """ disks_ok, disks_info = \ AssembleInstanceDisks(self, self.instance, ignore_size=self.op.ignore_size) if not disks_ok: raise errors.OpExecError("Cannot activate block devices") if self.op.wait_for_sync: if not WaitForSync(self, self.instance): self.cfg.MarkInstanceDisksInactive(self.instance.uuid) raise errors.OpExecError("Some disks of the instance are degraded!") return disks_info class LUInstanceDeactivateDisks(NoHooksLU): """Shutdown an instance's disks. """ REQ_BGL = False def ExpandNames(self): self._ExpandAndLockInstance() self.needed_locks[locking.LEVEL_NODE] = [] self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE def DeclareLocks(self, level): if level == locking.LEVEL_NODE: self._LockInstancesNodes() def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster. """ self.instance = self.cfg.GetInstanceInfo(self.op.instance_uuid) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name def Exec(self, feedback_fn): """Deactivate the disks """ if self.op.force: ShutdownInstanceDisks(self, self.instance) else: _SafeShutdownInstanceDisks(self, self.instance) def _CheckDiskConsistencyInner(lu, instance, dev, node_uuid, on_primary, ldisk=False): """Check that mirrors are not degraded. @attention: The device has to be annotated already. The ldisk parameter, if True, will change the test from the is_degraded attribute (which represents overall non-ok status for the device(s)) to the ldisk (representing the local storage status). """ lu.cfg.SetDiskID(dev, node_uuid) result = True if on_primary or dev.AssembleOnSecondary(): rstats = lu.rpc.call_blockdev_find(node_uuid, dev) msg = rstats.fail_msg if msg: lu.LogWarning("Can't find disk on node %s: %s", lu.cfg.GetNodeName(node_uuid), msg) result = False elif not rstats.payload: lu.LogWarning("Can't find disk on node %s", lu.cfg.GetNodeName(node_uuid)) result = False else: if ldisk: result = result and rstats.payload.ldisk_status == constants.LDS_OKAY else: result = result and not rstats.payload.is_degraded if dev.children: for child in dev.children: result = result and _CheckDiskConsistencyInner(lu, instance, child, node_uuid, on_primary) return result def CheckDiskConsistency(lu, instance, dev, node_uuid, on_primary, ldisk=False): """Wrapper around L{_CheckDiskConsistencyInner}. """ (disk,) = AnnotateDiskParams(instance, [dev], lu.cfg) return _CheckDiskConsistencyInner(lu, instance, disk, node_uuid, on_primary, ldisk=ldisk) def _BlockdevFind(lu, node_uuid, dev, instance): """Wrapper around call_blockdev_find to annotate diskparams. @param lu: A reference to the lu object @param node_uuid: The node to call out @param dev: The device to find @param instance: The instance object the device belongs to @returns The result of the rpc call """ (disk,) = AnnotateDiskParams(instance, [dev], lu.cfg) return lu.rpc.call_blockdev_find(node_uuid, disk) def _GenerateUniqueNames(lu, exts): """Generate a suitable LV name. This will generate a logical volume name for the given instance. """ results = [] for val in exts: new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId()) results.append("%s%s" % (new_id, val)) return results class TLReplaceDisks(Tasklet): """Replaces disks for an instance. Note: Locking is not within the scope of this class. """ def __init__(self, lu, instance_uuid, instance_name, mode, iallocator_name, remote_node_uuid, disks, early_release, ignore_ipolicy): """Initializes this class. """ Tasklet.__init__(self, lu) # Parameters self.instance_uuid = instance_uuid self.instance_name = instance_name self.mode = mode self.iallocator_name = iallocator_name self.remote_node_uuid = remote_node_uuid self.disks = disks self.early_release = early_release self.ignore_ipolicy = ignore_ipolicy # Runtime data self.instance = None self.new_node_uuid = None self.target_node_uuid = None self.other_node_uuid = None self.remote_node_info = None self.node_secondary_ip = None @staticmethod def _RunAllocator(lu, iallocator_name, instance_uuid, relocate_from_node_uuids): """Compute a new secondary node using an IAllocator. """ req = iallocator.IAReqRelocate( inst_uuid=instance_uuid, relocate_from_node_uuids=list(relocate_from_node_uuids)) ial = iallocator.IAllocator(lu.cfg, lu.rpc, req) ial.Run(iallocator_name) if not ial.success: raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':" " %s" % (iallocator_name, ial.info), errors.ECODE_NORES) remote_node_name = ial.result[0] remote_node = lu.cfg.GetNodeInfoByName(remote_node_name) if remote_node is None: raise errors.OpPrereqError("Node %s not found in configuration" % remote_node_name, errors.ECODE_NOENT) lu.LogInfo("Selected new secondary for instance '%s': %s", instance_uuid, remote_node_name) return remote_node.uuid def _FindFaultyDisks(self, node_uuid): """Wrapper for L{FindFaultyInstanceDisks}. """ return FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance, node_uuid, True) def _CheckDisksActivated(self, instance): """Checks if the instance disks are activated. @param instance: The instance to check disks @return: True if they are activated, False otherwise """ node_uuids = instance.all_nodes for idx, dev in enumerate(instance.disks): for node_uuid in node_uuids: self.lu.LogInfo("Checking disk/%d on %s", idx, self.cfg.GetNodeName(node_uuid)) self.cfg.SetDiskID(dev, node_uuid) result = _BlockdevFind(self, node_uuid, dev, instance) if result.offline: continue elif result.fail_msg or not result.payload: return False return True def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster. """ self.instance = self.cfg.GetInstanceInfo(self.instance_uuid) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.instance_name if self.instance.disk_template != constants.DT_DRBD8: raise errors.OpPrereqError("Can only run replace disks for DRBD8-based" " instances", errors.ECODE_INVAL) if len(self.instance.secondary_nodes) != 1: raise errors.OpPrereqError("The instance has a strange layout," " expected one secondary but found %d" % len(self.instance.secondary_nodes), errors.ECODE_FAULT) secondary_node_uuid = self.instance.secondary_nodes[0] if self.iallocator_name is None: remote_node_uuid = self.remote_node_uuid else: remote_node_uuid = self._RunAllocator(self.lu, self.iallocator_name, self.instance.uuid, self.instance.secondary_nodes) if remote_node_uuid is None: self.remote_node_info = None else: assert remote_node_uuid in self.lu.owned_locks(locking.LEVEL_NODE), \ "Remote node '%s' is not locked" % remote_node_uuid self.remote_node_info = self.cfg.GetNodeInfo(remote_node_uuid) assert self.remote_node_info is not None, \ "Cannot retrieve locked node %s" % remote_node_uuid if remote_node_uuid == self.instance.primary_node: raise errors.OpPrereqError("The specified node is the primary node of" " the instance", errors.ECODE_INVAL) if remote_node_uuid == secondary_node_uuid: raise errors.OpPrereqError("The specified node is already the" " secondary node of the instance", errors.ECODE_INVAL) if self.disks and self.mode in (constants.REPLACE_DISK_AUTO, constants.REPLACE_DISK_CHG): raise errors.OpPrereqError("Cannot specify disks to be replaced", errors.ECODE_INVAL) if self.mode == constants.REPLACE_DISK_AUTO: if not self._CheckDisksActivated(self.instance): raise errors.OpPrereqError("Please run activate-disks on instance %s" " first" % self.instance_name, errors.ECODE_STATE) faulty_primary = self._FindFaultyDisks(self.instance.primary_node) faulty_secondary = self._FindFaultyDisks(secondary_node_uuid) if faulty_primary and faulty_secondary: raise errors.OpPrereqError("Instance %s has faulty disks on more than" " one node and can not be repaired" " automatically" % self.instance_name, errors.ECODE_STATE) if faulty_primary: self.disks = faulty_primary self.target_node_uuid = self.instance.primary_node self.other_node_uuid = secondary_node_uuid check_nodes = [self.target_node_uuid, self.other_node_uuid] elif faulty_secondary: self.disks = faulty_secondary self.target_node_uuid = secondary_node_uuid self.other_node_uuid = self.instance.primary_node check_nodes = [self.target_node_uuid, self.other_node_uuid] else: self.disks = [] check_nodes = [] else: # Non-automatic modes if self.mode == constants.REPLACE_DISK_PRI: self.target_node_uuid = self.instance.primary_node self.other_node_uuid = secondary_node_uuid check_nodes = [self.target_node_uuid, self.other_node_uuid] elif self.mode == constants.REPLACE_DISK_SEC: self.target_node_uuid = secondary_node_uuid self.other_node_uuid = self.instance.primary_node check_nodes = [self.target_node_uuid, self.other_node_uuid] elif self.mode == constants.REPLACE_DISK_CHG: self.new_node_uuid = remote_node_uuid self.other_node_uuid = self.instance.primary_node self.target_node_uuid = secondary_node_uuid check_nodes = [self.new_node_uuid, self.other_node_uuid] CheckNodeNotDrained(self.lu, remote_node_uuid) CheckNodeVmCapable(self.lu, remote_node_uuid) old_node_info = self.cfg.GetNodeInfo(secondary_node_uuid) assert old_node_info is not None if old_node_info.offline and not self.early_release: # doesn't make sense to delay the release self.early_release = True self.lu.LogInfo("Old secondary %s is offline, automatically enabling" " early-release mode", secondary_node_uuid) else: raise errors.ProgrammerError("Unhandled disk replace mode (%s)" % self.mode) # If not specified all disks should be replaced if not self.disks: self.disks = range(len(self.instance.disks)) # TODO: This is ugly, but right now we can't distinguish between internal # submitted opcode and external one. We should fix that. if self.remote_node_info: # We change the node, lets verify it still meets instance policy new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group) cluster = self.cfg.GetClusterInfo() ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, new_group_info) CheckTargetNodeIPolicy(self, ipolicy, self.instance, self.remote_node_info, self.cfg, ignore=self.ignore_ipolicy) for node_uuid in check_nodes: CheckNodeOnline(self.lu, node_uuid) touched_nodes = frozenset(node_uuid for node_uuid in [self.new_node_uuid, self.other_node_uuid, self.target_node_uuid] if node_uuid is not None) # Release unneeded node and node resource locks ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes) ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes) ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC) # Release any owned node group ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP) # Check whether disks are valid for disk_idx in self.disks: self.instance.FindDisk(disk_idx) # Get secondary node IP addresses self.node_secondary_ip = dict((uuid, node.secondary_ip) for (uuid, node) in self.cfg.GetMultiNodeInfo(touched_nodes)) def Exec(self, feedback_fn): """Execute disk replacement. This dispatches the disk replacement to the appropriate handler. """ if __debug__: # Verify owned locks before starting operation owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE) assert set(owned_nodes) == set(self.node_secondary_ip), \ ("Incorrect node locks, owning %s, expected %s" % (owned_nodes, self.node_secondary_ip.keys())) assert (self.lu.owned_locks(locking.LEVEL_NODE) == self.lu.owned_locks(locking.LEVEL_NODE_RES)) assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC) owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE) assert list(owned_instances) == [self.instance_name], \ "Instance '%s' not locked" % self.instance_name assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \ "Should not own any node group lock at this point" if not self.disks: feedback_fn("No disks need replacement for instance '%s'" % self.instance.name) return feedback_fn("Replacing disk(s) %s for instance '%s'" % (utils.CommaJoin(self.disks), self.instance.name)) feedback_fn("Current primary node: %s" % self.cfg.GetNodeName(self.instance.primary_node)) feedback_fn("Current seconary node: %s" % utils.CommaJoin(self.cfg.GetNodeNames( self.instance.secondary_nodes))) activate_disks = not self.instance.disks_active # Activate the instance disks if we're replacing them on a down instance if activate_disks: StartInstanceDisks(self.lu, self.instance, True) try: # Should we replace the secondary node? if self.new_node_uuid is not None: fn = self._ExecDrbd8Secondary else: fn = self._ExecDrbd8DiskOnly result = fn(feedback_fn) finally: # Deactivate the instance disks if we're replacing them on a # down instance if activate_disks: _SafeShutdownInstanceDisks(self.lu, self.instance) assert not self.lu.owned_locks(locking.LEVEL_NODE) if __debug__: # Verify owned locks owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES) nodes = frozenset(self.node_secondary_ip) assert ((self.early_release and not owned_nodes) or (not self.early_release and not (set(owned_nodes) - nodes))), \ ("Not owning the correct locks, early_release=%s, owned=%r," " nodes=%r" % (self.early_release, owned_nodes, nodes)) return result def _CheckVolumeGroup(self, node_uuids): self.lu.LogInfo("Checking volume groups") vgname = self.cfg.GetVGName() # Make sure volume group exists on all involved nodes results = self.rpc.call_vg_list(node_uuids) if not results: raise errors.OpExecError("Can't list volume groups on the nodes") for node_uuid in node_uuids: res = results[node_uuid] res.Raise("Error checking node %s" % self.cfg.GetNodeName(node_uuid)) if vgname not in res.payload: raise errors.OpExecError("Volume group '%s' not found on node %s" % (vgname, self.cfg.GetNodeName(node_uuid))) def _CheckDisksExistence(self, node_uuids): # Check disk existence for idx, dev in enumerate(self.instance.disks): if idx not in self.disks: continue for node_uuid in node_uuids: self.lu.LogInfo("Checking disk/%d on %s", idx, self.cfg.GetNodeName(node_uuid)) self.cfg.SetDiskID(dev, node_uuid) result = _BlockdevFind(self, node_uuid, dev, self.instance) msg = result.fail_msg if msg or not result.payload: if not msg: msg = "disk not found" if not self._CheckDisksActivated(self.instance): extra_hint = ("\nDisks seem to be not properly activated. Try" " running activate-disks on the instance before" " using replace-disks.") else: extra_hint = "" raise errors.OpExecError("Can't find disk/%d on node %s: %s%s" % (idx, self.cfg.GetNodeName(node_uuid), msg, extra_hint)) def _CheckDisksConsistency(self, node_uuid, on_primary, ldisk): for idx, dev in enumerate(self.instance.disks): if idx not in self.disks: continue self.lu.LogInfo("Checking disk/%d consistency on node %s" % (idx, self.cfg.GetNodeName(node_uuid))) if not CheckDiskConsistency(self.lu, self.instance, dev, node_uuid, on_primary, ldisk=ldisk): raise errors.OpExecError("Node %s has degraded storage, unsafe to" " replace disks for instance %s" % (self.cfg.GetNodeName(node_uuid), self.instance.name)) def _CreateNewStorage(self, node_uuid): """Create new storage on the primary or secondary node. This is only used for same-node replaces, not for changing the secondary node, hence we don't want to modify the existing disk. """ iv_names = {} disks = AnnotateDiskParams(self.instance, self.instance.disks, self.cfg) for idx, dev in enumerate(disks): if idx not in self.disks: continue self.lu.LogInfo("Adding storage on %s for disk/%d", self.cfg.GetNodeName(node_uuid), idx) self.cfg.SetDiskID(dev, node_uuid) lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]] names = _GenerateUniqueNames(self.lu, lv_names) (data_disk, meta_disk) = dev.children vg_data = data_disk.logical_id[0] lv_data = objects.Disk(dev_type=constants.DT_PLAIN, size=dev.size, logical_id=(vg_data, names[0]), params=data_disk.params) vg_meta = meta_disk.logical_id[0] lv_meta = objects.Disk(dev_type=constants.DT_PLAIN, size=constants.DRBD_META_SIZE, logical_id=(vg_meta, names[1]), params=meta_disk.params) new_lvs = [lv_data, lv_meta] old_lvs = [child.Copy() for child in dev.children] iv_names[dev.iv_name] = (dev, old_lvs, new_lvs) excl_stor = IsExclusiveStorageEnabledNodeUuid(self.lu.cfg, node_uuid) # we pass force_create=True to force the LVM creation for new_lv in new_lvs: try: _CreateBlockDevInner(self.lu, node_uuid, self.instance, new_lv, True, GetInstanceInfoText(self.instance), False, excl_stor) except errors.DeviceCreationError, e: raise errors.OpExecError("Can't create block device: %s" % e.message) return iv_names def _CheckDevices(self, node_uuid, iv_names): for name, (dev, _, _) in iv_names.iteritems(): self.cfg.SetDiskID(dev, node_uuid) result = _BlockdevFind(self, node_uuid, dev, self.instance) msg = result.fail_msg if msg or not result.payload: if not msg: msg = "disk not found" raise errors.OpExecError("Can't find DRBD device %s: %s" % (name, msg)) if result.payload.is_degraded: raise errors.OpExecError("DRBD device %s is degraded!" % name) def _RemoveOldStorage(self, node_uuid, iv_names): for name, (_, old_lvs, _) in iv_names.iteritems(): self.lu.LogInfo("Remove logical volumes for %s", name) for lv in old_lvs: self.cfg.SetDiskID(lv, node_uuid) msg = self.rpc.call_blockdev_remove(node_uuid, lv).fail_msg if msg: self.lu.LogWarning("Can't remove old LV: %s", msg, hint="remove unused LVs manually") def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613 """Replace a disk on the primary or secondary for DRBD 8. The algorithm for replace is quite complicated: 1. for each disk to be replaced: 1. create new LVs on the target node with unique names 1. detach old LVs from the drbd device 1. rename old LVs to name_replaced. 1. rename new LVs to old LVs 1. attach the new LVs (with the old names now) to the drbd device 1. wait for sync across all devices 1. for each modified disk: 1. remove old LVs (which have the name name_replaces.) Failures are not very well handled. """ steps_total = 6 # Step: check device activation self.lu.LogStep(1, steps_total, "Check device existence") self._CheckDisksExistence([self.other_node_uuid, self.target_node_uuid]) self._CheckVolumeGroup([self.target_node_uuid, self.other_node_uuid]) # Step: check other node consistency self.lu.LogStep(2, steps_total, "Check peer consistency") self._CheckDisksConsistency( self.other_node_uuid, self.other_node_uuid == self.instance.primary_node, False) # Step: create new storage self.lu.LogStep(3, steps_total, "Allocate new storage") iv_names = self._CreateNewStorage(self.target_node_uuid) # Step: for each lv, detach+rename*2+attach self.lu.LogStep(4, steps_total, "Changing drbd configuration") for dev, old_lvs, new_lvs in iv_names.itervalues(): self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name) result = self.rpc.call_blockdev_removechildren(self.target_node_uuid, dev, old_lvs) result.Raise("Can't detach drbd from local storage on node" " %s for device %s" % (self.cfg.GetNodeName(self.target_node_uuid), dev.iv_name)) #dev.children = [] #cfg.Update(instance) # ok, we created the new LVs, so now we know we have the needed # storage; as such, we proceed on the target node to rename # old_lv to _old, and new_lv to old_lv; note that we rename LVs # using the assumption that logical_id == physical_id (which in # turn is the unique_id on that node) # FIXME(iustin): use a better name for the replaced LVs temp_suffix = int(time.time()) ren_fn = lambda d, suff: (d.physical_id[0], d.physical_id[1] + "_replaced-%s" % suff) # Build the rename list based on what LVs exist on the node rename_old_to_new = [] for to_ren in old_lvs: result = self.rpc.call_blockdev_find(self.target_node_uuid, to_ren) if not result.fail_msg and result.payload: # device exists rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix))) self.lu.LogInfo("Renaming the old LVs on the target node") result = self.rpc.call_blockdev_rename(self.target_node_uuid, rename_old_to_new) result.Raise("Can't rename old LVs on node %s" % self.cfg.GetNodeName(self.target_node_uuid)) # Now we rename the new LVs to the old LVs self.lu.LogInfo("Renaming the new LVs on the target node") rename_new_to_old = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)] result = self.rpc.call_blockdev_rename(self.target_node_uuid, rename_new_to_old) result.Raise("Can't rename new LVs on node %s" % self.cfg.GetNodeName(self.target_node_uuid)) # Intermediate steps of in memory modifications for old, new in zip(old_lvs, new_lvs): new.logical_id = old.logical_id self.cfg.SetDiskID(new, self.target_node_uuid) # We need to modify old_lvs so that removal later removes the # right LVs, not the newly added ones; note that old_lvs is a # copy here for disk in old_lvs: disk.logical_id = ren_fn(disk, temp_suffix) self.cfg.SetDiskID(disk, self.target_node_uuid) # Now that the new lvs have the old name, we can add them to the device self.lu.LogInfo("Adding new mirror component on %s", self.cfg.GetNodeName(self.target_node_uuid)) result = self.rpc.call_blockdev_addchildren(self.target_node_uuid, (dev, self.instance), new_lvs) msg = result.fail_msg if msg: for new_lv in new_lvs: msg2 = self.rpc.call_blockdev_remove(self.target_node_uuid, new_lv).fail_msg if msg2: self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2, hint=("cleanup manually the unused logical" "volumes")) raise errors.OpExecError("Can't add local storage to drbd: %s" % msg) cstep = itertools.count(5) if self.early_release: self.lu.LogStep(cstep.next(), steps_total, "Removing old storage") self._RemoveOldStorage(self.target_node_uuid, iv_names) # TODO: Check if releasing locks early still makes sense ReleaseLocks(self.lu, locking.LEVEL_NODE_RES) else: # Release all resource locks except those used by the instance ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=self.node_secondary_ip.keys()) # Release all node locks while waiting for sync ReleaseLocks(self.lu, locking.LEVEL_NODE) # TODO: Can the instance lock be downgraded here? Take the optional disk # shutdown in the caller into consideration. # Wait for sync # This can fail as the old devices are degraded and _WaitForSync # does a combined result over all disks, so we don't check its return value self.lu.LogStep(cstep.next(), steps_total, "Sync devices") WaitForSync(self.lu, self.instance) # Check all devices manually self._CheckDevices(self.instance.primary_node, iv_names) # Step: remove old storage if not self.early_release: self.lu.LogStep(cstep.next(), steps_total, "Removing old storage") self._RemoveOldStorage(self.target_node_uuid, iv_names) def _ExecDrbd8Secondary(self, feedback_fn): """Replace the secondary node for DRBD 8. The algorithm for replace is quite complicated: - for all disks of the instance: - create new LVs on the new node with same names - shutdown the drbd device on the old secondary - disconnect the drbd network on the primary - create the drbd device on the new secondary - network attach the drbd on the primary, using an artifice: the drbd code for Attach() will connect to the network if it finds a device which is connected to the good local disks but not network enabled - wait for sync across all devices - remove all disks from the old secondary Failures are not very well handled. """ steps_total = 6 pnode = self.instance.primary_node # Step: check device activation self.lu.LogStep(1, steps_total, "Check device existence") self._CheckDisksExistence([self.instance.primary_node]) self._CheckVolumeGroup([self.instance.primary_node]) # Step: check other node consistency self.lu.LogStep(2, steps_total, "Check peer consistency") self._CheckDisksConsistency(self.instance.primary_node, True, True) # Step: create new storage self.lu.LogStep(3, steps_total, "Allocate new storage") disks = AnnotateDiskParams(self.instance, self.instance.disks, self.cfg) excl_stor = IsExclusiveStorageEnabledNodeUuid(self.lu.cfg, self.new_node_uuid) for idx, dev in enumerate(disks): self.lu.LogInfo("Adding new local storage on %s for disk/%d" % (self.cfg.GetNodeName(self.new_node_uuid), idx)) # we pass force_create=True to force LVM creation for new_lv in dev.children: try: _CreateBlockDevInner(self.lu, self.new_node_uuid, self.instance, new_lv, True, GetInstanceInfoText(self.instance), False, excl_stor) except errors.DeviceCreationError, e: raise errors.OpExecError("Can't create block device: %s" % e.message) # Step 4: dbrd minors and drbd setups changes # after this, we must manually remove the drbd minors on both the # error and the success paths self.lu.LogStep(4, steps_total, "Changing drbd configuration") minors = self.cfg.AllocateDRBDMinor([self.new_node_uuid for _ in self.instance.disks], self.instance.uuid) logging.debug("Allocated minors %r", minors) iv_names = {} for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)): self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.cfg.GetNodeName(self.new_node_uuid), idx)) # create new devices on new_node; note that we create two IDs: # one without port, so the drbd will be activated without # networking information on the new node at this stage, and one # with network, for the latter activation in step 4 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id if self.instance.primary_node == o_node1: p_minor = o_minor1 else: assert self.instance.primary_node == o_node2, "Three-node instance?" p_minor = o_minor2 new_alone_id = (self.instance.primary_node, self.new_node_uuid, None, p_minor, new_minor, o_secret) new_net_id = (self.instance.primary_node, self.new_node_uuid, o_port, p_minor, new_minor, o_secret) iv_names[idx] = (dev, dev.children, new_net_id) logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor, new_net_id) new_drbd = objects.Disk(dev_type=constants.DT_DRBD8, logical_id=new_alone_id, children=dev.children, size=dev.size, params={}) (anno_new_drbd,) = AnnotateDiskParams(self.instance, [new_drbd], self.cfg) try: CreateSingleBlockDev(self.lu, self.new_node_uuid, self.instance, anno_new_drbd, GetInstanceInfoText(self.instance), False, excl_stor) except errors.GenericError: self.cfg.ReleaseDRBDMinors(self.instance.uuid) raise # We have new devices, shutdown the drbd on the old secondary for idx, dev in enumerate(self.instance.disks): self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx) self.cfg.SetDiskID(dev, self.target_node_uuid) msg = self.rpc.call_blockdev_shutdown(self.target_node_uuid, (dev, self.instance)).fail_msg if msg: self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old" "node: %s" % (idx, msg), hint=("Please cleanup this device manually as" " soon as possible")) self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)") result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip, self.instance.disks)[pnode] msg = result.fail_msg if msg: # detaches didn't succeed (unlikely) self.cfg.ReleaseDRBDMinors(self.instance.uuid) raise errors.OpExecError("Can't detach the disks from the network on" " old node: %s" % (msg,)) # if we managed to detach at least one, we update all the disks of # the instance to point to the new secondary self.lu.LogInfo("Updating instance configuration") for dev, _, new_logical_id in iv_names.itervalues(): dev.logical_id = new_logical_id self.cfg.SetDiskID(dev, self.instance.primary_node) self.cfg.Update(self.instance, feedback_fn) # Release all node locks (the configuration has been updated) ReleaseLocks(self.lu, locking.LEVEL_NODE) # and now perform the drbd attach self.lu.LogInfo("Attaching primary drbds to new secondary" " (standalone => connected)") result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node_uuid], self.node_secondary_ip, (self.instance.disks, self.instance), self.instance.name, False) for to_node, to_result in result.items(): msg = to_result.fail_msg if msg: self.lu.LogWarning("Can't attach drbd disks on node %s: %s", self.cfg.GetNodeName(to_node), msg, hint=("please do a gnt-instance info to see the" " status of disks")) cstep = itertools.count(5) if self.early_release: self.lu.LogStep(cstep.next(), steps_total, "Removing old storage") self._RemoveOldStorage(self.target_node_uuid, iv_names) # TODO: Check if releasing locks early still makes sense ReleaseLocks(self.lu, locking.LEVEL_NODE_RES) else: # Release all resource locks except those used by the instance ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=self.node_secondary_ip.keys()) # TODO: Can the instance lock be downgraded here? Take the optional disk # shutdown in the caller into consideration. # Wait for sync # This can fail as the old devices are degraded and _WaitForSync # does a combined result over all disks, so we don't check its return value self.lu.LogStep(cstep.next(), steps_total, "Sync devices") WaitForSync(self.lu, self.instance) # Check all devices manually self._CheckDevices(self.instance.primary_node, iv_names) # Step: remove old storage if not self.early_release: self.lu.LogStep(cstep.next(), steps_total, "Removing old storage") self._RemoveOldStorage(self.target_node_uuid, iv_names) ganeti-2.9.3/lib/cmdlib/test.py0000644000000000000000000003062212271422343016326 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Test logical units.""" import logging import shutil import socket import tempfile from ganeti import compat from ganeti import constants from ganeti import errors from ganeti import locking from ganeti import utils from ganeti.masterd import iallocator from ganeti.cmdlib.base import NoHooksLU from ganeti.cmdlib.common import ExpandInstanceUuidAndName, GetWantedNodes, \ GetWantedInstances class LUTestDelay(NoHooksLU): """Sleep for a specified amount of time. This LU sleeps on the master and/or nodes for a specified amount of time. """ REQ_BGL = False def ExpandNames(self): """Expand names and set required locks. This expands the node list, if any. """ self.needed_locks = {} if self.op.on_nodes or self.op.on_master: self.needed_locks[locking.LEVEL_NODE] = [] if self.op.on_nodes: # _GetWantedNodes can be used here, but is not always appropriate to use # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for # more information. (self.op.on_node_uuids, self.op.on_nodes) = \ GetWantedNodes(self, self.op.on_nodes) self.needed_locks[locking.LEVEL_NODE].extend(self.op.on_node_uuids) if self.op.on_master: # The node lock should be acquired for the master as well. self.needed_locks[locking.LEVEL_NODE].append(self.cfg.GetMasterNode()) def _TestDelay(self): """Do the actual sleep. """ if self.op.on_master: if not utils.TestDelay(self.op.duration): raise errors.OpExecError("Error during master delay test") if self.op.on_node_uuids: result = self.rpc.call_test_delay(self.op.on_node_uuids, self.op.duration) for node_uuid, node_result in result.items(): node_result.Raise("Failure during rpc call to node %s" % self.cfg.GetNodeName(node_uuid)) def Exec(self, feedback_fn): """Execute the test delay opcode, with the wanted repetitions. """ if self.op.repeat == 0: self._TestDelay() else: top_value = self.op.repeat - 1 for i in range(self.op.repeat): self.LogInfo("Test delay iteration %d/%d", i, top_value) self._TestDelay() class LUTestJqueue(NoHooksLU): """Utility LU to test some aspects of the job queue. """ REQ_BGL = False # Must be lower than default timeout for WaitForJobChange to see whether it # notices changed jobs _CLIENT_CONNECT_TIMEOUT = 20.0 _CLIENT_CONFIRM_TIMEOUT = 60.0 @classmethod def _NotifyUsingSocket(cls, cb, errcls): """Opens a Unix socket and waits for another program to connect. @type cb: callable @param cb: Callback to send socket name to client @type errcls: class @param errcls: Exception class to use for errors """ # Using a temporary directory as there's no easy way to create temporary # sockets without writing a custom loop around tempfile.mktemp and # socket.bind tmpdir = tempfile.mkdtemp() try: tmpsock = utils.PathJoin(tmpdir, "sock") logging.debug("Creating temporary socket at %s", tmpsock) sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) try: sock.bind(tmpsock) sock.listen(1) # Send details to client cb(tmpsock) # Wait for client to connect before continuing sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT) try: (conn, _) = sock.accept() except socket.error, err: raise errcls("Client didn't connect in time (%s)" % err) finally: sock.close() finally: # Remove as soon as client is connected shutil.rmtree(tmpdir) # Wait for client to close try: try: # pylint: disable=E1101 # Instance of '_socketobject' has no ... member conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT) conn.recv(1) except socket.error, err: raise errcls("Client failed to confirm notification (%s)" % err) finally: conn.close() def _SendNotification(self, test, arg, sockname): """Sends a notification to the client. @type test: string @param test: Test name @param arg: Test argument (depends on test) @type sockname: string @param sockname: Socket path """ self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg)) def _Notify(self, prereq, test, arg): """Notifies the client of a test. @type prereq: bool @param prereq: Whether this is a prereq-phase test @type test: string @param test: Test name @param arg: Test argument (depends on test) """ if prereq: errcls = errors.OpPrereqError else: errcls = errors.OpExecError return self._NotifyUsingSocket(compat.partial(self._SendNotification, test, arg), errcls) def CheckArguments(self): self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1 self.expandnames_calls = 0 def ExpandNames(self): checkargs_calls = getattr(self, "checkargs_calls", 0) if checkargs_calls < 1: raise errors.ProgrammerError("CheckArguments was not called") self.expandnames_calls += 1 if self.op.notify_waitlock: self._Notify(True, constants.JQT_EXPANDNAMES, None) self.LogInfo("Expanding names") # Get lock on master node (just to get a lock, not for a particular reason) self.needed_locks = { locking.LEVEL_NODE: self.cfg.GetMasterNode(), } def Exec(self, feedback_fn): if self.expandnames_calls < 1: raise errors.ProgrammerError("ExpandNames was not called") if self.op.notify_exec: self._Notify(False, constants.JQT_EXEC, None) self.LogInfo("Executing") if self.op.log_messages: self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages)) for idx, msg in enumerate(self.op.log_messages): self.LogInfo("Sending log message %s", idx + 1) feedback_fn(constants.JQT_MSGPREFIX + msg) # Report how many test messages have been sent self._Notify(False, constants.JQT_LOGMSG, idx + 1) if self.op.fail: raise errors.OpExecError("Opcode failure was requested") return True class LUTestAllocator(NoHooksLU): """Run allocator tests. This LU runs the allocator tests """ def CheckPrereq(self): """Check prerequisites. This checks the opcode parameters depending on the director and mode test. """ if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC, constants.IALLOCATOR_MODE_MULTI_ALLOC): for attr in ["memory", "disks", "disk_template", "os", "tags", "nics", "vcpus"]: if not hasattr(self.op, attr): raise errors.OpPrereqError("Missing attribute '%s' on opcode input" % attr, errors.ECODE_INVAL) (self.inst_uuid, iname) = self.cfg.ExpandInstanceName(self.op.name) if iname is not None: raise errors.OpPrereqError("Instance '%s' already in the cluster" % iname, errors.ECODE_EXISTS) if not isinstance(self.op.nics, list): raise errors.OpPrereqError("Invalid parameter 'nics'", errors.ECODE_INVAL) if not isinstance(self.op.disks, list): raise errors.OpPrereqError("Invalid parameter 'disks'", errors.ECODE_INVAL) for row in self.op.disks: if (not isinstance(row, dict) or constants.IDISK_SIZE not in row or not isinstance(row[constants.IDISK_SIZE], int) or constants.IDISK_MODE not in row or row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET): raise errors.OpPrereqError("Invalid contents of the 'disks'" " parameter", errors.ECODE_INVAL) if self.op.hypervisor is None: self.op.hypervisor = self.cfg.GetHypervisorType() elif self.op.mode == constants.IALLOCATOR_MODE_RELOC: (fuuid, fname) = ExpandInstanceUuidAndName(self.cfg, None, self.op.name) self.op.name = fname self.relocate_from_node_uuids = \ list(self.cfg.GetInstanceInfo(fuuid).secondary_nodes) elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP, constants.IALLOCATOR_MODE_NODE_EVAC): if not self.op.instances: raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL) (_, self.op.instances) = GetWantedInstances(self, self.op.instances) else: raise errors.OpPrereqError("Invalid test allocator mode '%s'" % self.op.mode, errors.ECODE_INVAL) if self.op.direction == constants.IALLOCATOR_DIR_OUT: if self.op.iallocator is None: raise errors.OpPrereqError("Missing allocator name", errors.ECODE_INVAL) elif self.op.direction != constants.IALLOCATOR_DIR_IN: raise errors.OpPrereqError("Wrong allocator test '%s'" % self.op.direction, errors.ECODE_INVAL) def Exec(self, feedback_fn): """Run the allocator test. """ if self.op.mode == constants.IALLOCATOR_MODE_ALLOC: req = iallocator.IAReqInstanceAlloc(name=self.op.name, memory=self.op.memory, disks=self.op.disks, disk_template=self.op.disk_template, os=self.op.os, tags=self.op.tags, nics=self.op.nics, vcpus=self.op.vcpus, spindle_use=self.op.spindle_use, hypervisor=self.op.hypervisor, node_whitelist=None) elif self.op.mode == constants.IALLOCATOR_MODE_RELOC: req = iallocator.IAReqRelocate( inst_uuid=self.inst_uuid, relocate_from_node_uuids=list(self.relocate_from_node_uuids)) elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP: req = iallocator.IAReqGroupChange(instances=self.op.instances, target_groups=self.op.target_groups) elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC: req = iallocator.IAReqNodeEvac(instances=self.op.instances, evac_mode=self.op.evac_mode) elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC: disk_template = self.op.disk_template insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx), memory=self.op.memory, disks=self.op.disks, disk_template=disk_template, os=self.op.os, tags=self.op.tags, nics=self.op.nics, vcpus=self.op.vcpus, spindle_use=self.op.spindle_use, hypervisor=self.op.hypervisor) for idx in range(self.op.count)] req = iallocator.IAReqMultiInstanceAlloc(instances=insts) else: raise errors.ProgrammerError("Uncatched mode %s in" " LUTestAllocator.Exec", self.op.mode) ial = iallocator.IAllocator(self.cfg, self.rpc, req) if self.op.direction == constants.IALLOCATOR_DIR_IN: result = ial.in_text else: ial.Run(self.op.iallocator, validate=False) result = ial.out_text return result ganeti-2.9.3/lib/outils.py0000644000000000000000000001051212244641676015444 0ustar00rootroot00000000000000# # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module for object related utils.""" #: Supported container types for serialization/de-serialization (must be a #: tuple as it's used as a parameter for C{isinstance}) _SEQUENCE_TYPES = (list, tuple, set, frozenset) class AutoSlots(type): """Meta base class for __slots__ definitions. """ def __new__(mcs, name, bases, attrs): """Called when a class should be created. @param mcs: The meta class @param name: Name of created class @param bases: Base classes @type attrs: dict @param attrs: Class attributes """ assert "__slots__" not in attrs, \ "Class '%s' defines __slots__ when it should not" % name attrs["__slots__"] = mcs._GetSlots(attrs) return type.__new__(mcs, name, bases, attrs) @classmethod def _GetSlots(mcs, attrs): """Used to get the list of defined slots. @param attrs: The attributes of the class """ raise NotImplementedError class ValidatedSlots(object): """Sets and validates slots. """ __slots__ = [] def __init__(self, **kwargs): """Constructor for BaseOpCode. The constructor takes only keyword arguments and will set attributes on this object based on the passed arguments. As such, it means that you should not pass arguments which are not in the __slots__ attribute for this class. """ slots = self.GetAllSlots() for (key, value) in kwargs.items(): if key not in slots: raise TypeError("Object %s doesn't support the parameter '%s'" % (self.__class__.__name__, key)) setattr(self, key, value) @classmethod def GetAllSlots(cls): """Compute the list of all declared slots for a class. """ slots = [] for parent in cls.__mro__: slots.extend(getattr(parent, "__slots__", [])) return slots def Validate(self): """Validates the slots. This method must be implemented by the child classes. """ raise NotImplementedError def ContainerToDicts(container): """Convert the elements of a container to standard Python types. This method converts a container with elements to standard Python types. If the input container is of the type C{dict}, only its values are touched. Those values, as well as all elements of input sequences, must support a C{ToDict} method returning a serialized version. @type container: dict or sequence (see L{_SEQUENCE_TYPES}) """ if isinstance(container, dict): ret = dict([(k, v.ToDict()) for k, v in container.items()]) elif isinstance(container, _SEQUENCE_TYPES): ret = [elem.ToDict() for elem in container] else: raise TypeError("Unknown container type '%s'" % type(container)) return ret def ContainerFromDicts(source, c_type, e_type): """Convert a container from standard python types. This method converts a container with standard Python types to objects. If the container is a dict, we don't touch the keys, only the values. @type source: None, dict or sequence (see L{_SEQUENCE_TYPES}) @param source: Input data @type c_type: type class @param c_type: Desired type for returned container @type e_type: element type class @param e_type: Item type for elements in returned container (must have a C{FromDict} class method) """ if not isinstance(c_type, type): raise TypeError("Container type '%s' is not a type" % type(c_type)) if source is None: source = c_type() if c_type is dict: ret = dict([(k, e_type.FromDict(v)) for k, v in source.items()]) elif c_type in _SEQUENCE_TYPES: ret = c_type(map(e_type.FromDict, source)) else: raise TypeError("Unknown container type '%s'" % c_type) return ret ganeti-2.9.3/lib/netutils.py0000644000000000000000000004613012271422343015765 0ustar00rootroot00000000000000# # # Copyright (C) 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Ganeti network utility module. This module holds functions that can be used in both daemons (all) and the command line scripts. """ import errno import os import re import socket import struct import IN import logging from ganeti import constants from ganeti import errors from ganeti import utils from ganeti import vcluster # Structure definition for getsockopt(SOL_SOCKET, SO_PEERCRED, ...): # struct ucred { pid_t pid; uid_t uid; gid_t gid; }; # # The GNU C Library defines gid_t and uid_t to be "unsigned int" and # pid_t to "int". # # IEEE Std 1003.1-2008: # "nlink_t, uid_t, gid_t, and id_t shall be integer types" # "blksize_t, pid_t, and ssize_t shall be signed integer types" _STRUCT_UCRED = "iII" _STRUCT_UCRED_SIZE = struct.calcsize(_STRUCT_UCRED) # Workaround a bug in some linux distributions that don't define SO_PEERCRED try: # pylint: disable=E1101 _SO_PEERCRED = IN.SO_PEERCRED except AttributeError: _SO_PEERCRED = 17 # Regexes used to find IP addresses in the output of ip. _IP_RE_TEXT = r"[.:a-z0-9]+" # separate for testing purposes _IP_FAMILY_RE = re.compile(r"(?Pinet6?)\s+(?P%s)/" % _IP_RE_TEXT, re.IGNORECASE) # Dict used to convert from a string representing an IP family to an IP # version _NAME_TO_IP_VER = { "inet": constants.IP4_VERSION, "inet6": constants.IP6_VERSION, } def _GetIpAddressesFromIpOutput(ip_output): """Parses the output of the ip command and retrieves the IP addresses and version. @param ip_output: string containing the output of the ip command; @rtype: dict; (int, list) @return: a dict having as keys the IP versions and as values the corresponding list of addresses found in the IP output. """ addr = dict((i, []) for i in _NAME_TO_IP_VER.values()) for row in ip_output.splitlines(): match = _IP_FAMILY_RE.search(row) if match and IPAddress.IsValid(match.group("ip")): addr[_NAME_TO_IP_VER[match.group("family")]].append(match.group("ip")) return addr def GetSocketCredentials(sock): """Returns the credentials of the foreign process connected to a socket. @param sock: Unix socket @rtype: tuple; (number, number, number) @return: The PID, UID and GID of the connected foreign process. """ peercred = sock.getsockopt(socket.SOL_SOCKET, _SO_PEERCRED, _STRUCT_UCRED_SIZE) return struct.unpack(_STRUCT_UCRED, peercred) def IsValidInterface(ifname): """Validate an interface name. @type ifname: string @param ifname: Name of the network interface @return: boolean indicating whether the interface name is valid or not. """ return os.path.exists(utils.PathJoin("/sys/class/net", ifname)) def GetInterfaceIpAddresses(ifname): """Returns the IP addresses associated to the interface. @type ifname: string @param ifname: Name of the network interface @return: A dict having for keys the IP version (either L{constants.IP4_VERSION} or L{constants.IP6_VERSION}) and for values the lists of IP addresses of the respective version associated to the interface """ result = utils.RunCmd([constants.IP_COMMAND_PATH, "-o", "addr", "show", ifname]) if result.failed: logging.error("Error running the ip command while getting the IP" " addresses of %s", ifname) return None return _GetIpAddressesFromIpOutput(result.output) def GetHostname(name=None, family=None): """Returns a Hostname object. @type name: str @param name: hostname or None @type family: int @param family: AF_INET | AF_INET6 | None @rtype: L{Hostname} @return: Hostname object @raise errors.OpPrereqError: in case of errors in resolving """ try: return Hostname(name=name, family=family) except errors.ResolverError, err: raise errors.OpPrereqError("The given name (%s) does not resolve: %s" % (err[0], err[2]), errors.ECODE_RESOLVER) class Hostname: """Class implementing resolver and hostname functionality. """ _VALID_NAME_RE = re.compile("^[a-z0-9._-]{1,255}$") def __init__(self, name=None, family=None): """Initialize the host name object. If the name argument is None, it will use this system's name. @type family: int @param family: AF_INET | AF_INET6 | None @type name: str @param name: hostname or None """ self.name = self.GetFqdn(name) self.ip = self.GetIP(self.name, family=family) @classmethod def GetSysName(cls): """Legacy method the get the current system's name. """ return cls.GetFqdn() @classmethod def GetFqdn(cls, hostname=None): """Return fqdn. If hostname is None the system's fqdn is returned. @type hostname: str @param hostname: name to be fqdn'ed @rtype: str @return: fqdn of given name, if it exists, unmodified name otherwise """ if hostname is None: virtfqdn = vcluster.GetVirtualHostname() if virtfqdn: result = virtfqdn else: result = socket.getfqdn() else: result = socket.getfqdn(hostname) return cls.GetNormalizedName(result) @staticmethod def GetIP(hostname, family=None): """Return IP address of given hostname. Supports both IPv4 and IPv6. @type hostname: str @param hostname: hostname to look up @type family: int @param family: AF_INET | AF_INET6 | None @rtype: str @return: IP address @raise errors.ResolverError: in case of errors in resolving """ try: if family in (socket.AF_INET, socket.AF_INET6): result = socket.getaddrinfo(hostname, None, family) else: result = socket.getaddrinfo(hostname, None) except (socket.gaierror, socket.herror, socket.error), err: # hostname not found in DNS, or other socket exception in the # (code, description format) raise errors.ResolverError(hostname, err.args[0], err.args[1]) # getaddrinfo() returns a list of 5-tupes (family, socktype, proto, # canonname, sockaddr). We return the first tuple's first address in # sockaddr try: return result[0][4][0] except IndexError, err: # we don't have here an actual error code, it's just that the # data type returned by getaddrinfo is not what we expected; # let's keep the same format in the exception arguments with a # dummy error code raise errors.ResolverError(hostname, 0, "Unknown error in getaddrinfo(): %s" % err) @classmethod def GetNormalizedName(cls, hostname): """Validate and normalize the given hostname. @attention: the validation is a bit more relaxed than the standards require; most importantly, we allow underscores in names @raise errors.OpPrereqError: when the name is not valid """ hostname = hostname.lower() if (not cls._VALID_NAME_RE.match(hostname) or # double-dots, meaning empty label ".." in hostname or # empty initial label hostname.startswith(".")): raise errors.OpPrereqError("Invalid hostname '%s'" % hostname, errors.ECODE_INVAL) if hostname.endswith("."): hostname = hostname.rstrip(".") return hostname def TcpPing(target, port, timeout=10, live_port_needed=False, source=None): """Simple ping implementation using TCP connect(2). Check if the given IP is reachable by doing attempting a TCP connect to it. @type target: str @param target: the IP to ping @type port: int @param port: the port to connect to @type timeout: int @param timeout: the timeout on the connection attempt @type live_port_needed: boolean @param live_port_needed: whether a closed port will cause the function to return failure, as if there was a timeout @type source: str or None @param source: if specified, will cause the connect to be made from this specific source address; failures to bind other than C{EADDRNOTAVAIL} will be ignored """ logging.debug("Attempting to reach TCP port %s on target %s with a timeout" " of %s seconds", port, target, timeout) try: family = IPAddress.GetAddressFamily(target) except errors.IPAddressError, err: raise errors.ProgrammerError("Family of IP address given in parameter" " 'target' can't be determined: %s" % err) sock = socket.socket(family, socket.SOCK_STREAM) success = False if source is not None: try: sock.bind((source, 0)) except socket.error, err: if err[0] == errno.EADDRNOTAVAIL: success = False sock.settimeout(timeout) try: sock.connect((target, port)) sock.close() success = True except socket.timeout: success = False except socket.error, err: success = (not live_port_needed) and (err[0] == errno.ECONNREFUSED) return success def GetDaemonPort(daemon_name): """Get the daemon port for this cluster. Note that this routine does not read a ganeti-specific file, but instead uses C{socket.getservbyname} to allow pre-customization of this parameter outside of Ganeti. @type daemon_name: string @param daemon_name: daemon name (in constants.DAEMONS_PORTS) @rtype: int """ if daemon_name not in constants.DAEMONS_PORTS: raise errors.ProgrammerError("Unknown daemon: %s" % daemon_name) (proto, default_port) = constants.DAEMONS_PORTS[daemon_name] try: port = socket.getservbyname(daemon_name, proto) except socket.error: port = default_port return port class IPAddress(object): """Class that represents an IP address. """ iplen = 0 family = None loopback_cidr = None @staticmethod def _GetIPIntFromString(address): """Abstract method to please pylint. """ raise NotImplementedError @classmethod def IsValid(cls, address): """Validate a IP address. @type address: str @param address: IP address to be checked @rtype: bool @return: True if valid, False otherwise """ if cls.family is None: try: family = cls.GetAddressFamily(address) except errors.IPAddressError: return False else: family = cls.family try: socket.inet_pton(family, address) return True except socket.error: return False @classmethod def ValidateNetmask(cls, netmask): """Validate a netmask suffix in CIDR notation. @type netmask: int @param netmask: netmask suffix to validate @rtype: bool @return: True if valid, False otherwise """ assert (isinstance(netmask, (int, long))) return 0 < netmask <= cls.iplen @classmethod def Own(cls, address): """Check if the current host has the the given IP address. This is done by trying to bind the given address. We return True if we succeed or false if a socket.error is raised. @type address: str @param address: IP address to be checked @rtype: bool @return: True if we own the address, False otherwise """ if cls.family is None: try: family = cls.GetAddressFamily(address) except errors.IPAddressError: return False else: family = cls.family s = socket.socket(family, socket.SOCK_DGRAM) success = False try: try: s.bind((address, 0)) success = True except socket.error: success = False finally: s.close() return success @classmethod def InNetwork(cls, cidr, address): """Determine whether an address is within a network. @type cidr: string @param cidr: Network in CIDR notation, e.g. '192.0.2.0/24', '2001:db8::/64' @type address: str @param address: IP address @rtype: bool @return: True if address is in cidr, False otherwise """ address_int = cls._GetIPIntFromString(address) subnet = cidr.split("/") assert len(subnet) == 2 try: prefix = int(subnet[1]) except ValueError: return False assert 0 <= prefix <= cls.iplen target_int = cls._GetIPIntFromString(subnet[0]) # Convert prefix netmask to integer value of netmask netmask_int = (2 ** cls.iplen) - 1 ^ ((2 ** cls.iplen) - 1 >> prefix) # Calculate hostmask hostmask_int = netmask_int ^ (2 ** cls.iplen) - 1 # Calculate network address by and'ing netmask network_int = target_int & netmask_int # Calculate broadcast address by or'ing hostmask broadcast_int = target_int | hostmask_int return network_int <= address_int <= broadcast_int @staticmethod def GetAddressFamily(address): """Get the address family of the given address. @type address: str @param address: ip address whose family will be returned @rtype: int @return: C{socket.AF_INET} or C{socket.AF_INET6} @raise errors.GenericError: for invalid addresses """ try: return IP4Address(address).family except errors.IPAddressError: pass try: return IP6Address(address).family except errors.IPAddressError: pass raise errors.IPAddressError("Invalid address '%s'" % address) @staticmethod def GetVersionFromAddressFamily(family): """Convert an IP address family to the corresponding IP version. @type family: int @param family: IP address family, one of socket.AF_INET or socket.AF_INET6 @return: an int containing the IP version, one of L{constants.IP4_VERSION} or L{constants.IP6_VERSION} @raise errors.ProgrammerError: for unknown families """ if family == socket.AF_INET: return constants.IP4_VERSION elif family == socket.AF_INET6: return constants.IP6_VERSION raise errors.ProgrammerError("%s is not a valid IP address family" % family) @staticmethod def GetAddressFamilyFromVersion(version): """Convert an IP version to the corresponding IP address family. @type version: int @param version: IP version, one of L{constants.IP4_VERSION} or L{constants.IP6_VERSION} @return: an int containing the IP address family, one of C{socket.AF_INET} or C{socket.AF_INET6} @raise errors.ProgrammerError: for unknown IP versions """ if version == constants.IP4_VERSION: return socket.AF_INET elif version == constants.IP6_VERSION: return socket.AF_INET6 raise errors.ProgrammerError("%s is not a valid IP version" % version) @staticmethod def GetClassFromIpVersion(version): """Return the IPAddress subclass for the given IP version. @type version: int @param version: IP version, one of L{constants.IP4_VERSION} or L{constants.IP6_VERSION} @return: a subclass of L{netutils.IPAddress} @raise errors.ProgrammerError: for unknowo IP versions """ if version == constants.IP4_VERSION: return IP4Address elif version == constants.IP6_VERSION: return IP6Address raise errors.ProgrammerError("%s is not a valid IP version" % version) @staticmethod def GetClassFromIpFamily(family): """Return the IPAddress subclass for the given IP family. @param family: IP family (one of C{socket.AF_INET} or C{socket.AF_INET6} @return: a subclass of L{netutils.IPAddress} @raise errors.ProgrammerError: for unknowo IP versions """ return IPAddress.GetClassFromIpVersion( IPAddress.GetVersionFromAddressFamily(family)) @classmethod def IsLoopback(cls, address): """Determine whether it is a loopback address. @type address: str @param address: IP address to be checked @rtype: bool @return: True if loopback, False otherwise """ try: return cls.InNetwork(cls.loopback_cidr, address) except errors.IPAddressError: return False class IP4Address(IPAddress): """IPv4 address class. """ iplen = 32 family = socket.AF_INET loopback_cidr = "127.0.0.0/8" def __init__(self, address): """Constructor for IPv4 address. @type address: str @param address: IP address @raises errors.IPAddressError: if address invalid """ IPAddress.__init__(self) if not self.IsValid(address): raise errors.IPAddressError("IPv4 Address %s invalid" % address) self.address = address @staticmethod def _GetIPIntFromString(address): """Get integer value of IPv4 address. @type address: str @param address: IPv6 address @rtype: int @return: integer value of given IP address """ address_int = 0 parts = address.split(".") assert len(parts) == 4 for part in parts: address_int = (address_int << 8) | int(part) return address_int class IP6Address(IPAddress): """IPv6 address class. """ iplen = 128 family = socket.AF_INET6 loopback_cidr = "::1/128" def __init__(self, address): """Constructor for IPv6 address. @type address: str @param address: IP address @raises errors.IPAddressError: if address invalid """ IPAddress.__init__(self) if not self.IsValid(address): raise errors.IPAddressError("IPv6 Address [%s] invalid" % address) self.address = address @staticmethod def _GetIPIntFromString(address): """Get integer value of IPv6 address. @type address: str @param address: IPv6 address @rtype: int @return: integer value of given IP address """ doublecolons = address.count("::") assert not doublecolons > 1 if doublecolons == 1: # We have a shorthand address, expand it parts = [] twoparts = address.split("::") sep = len(twoparts[0].split(":")) + len(twoparts[1].split(":")) parts = twoparts[0].split(":") parts.extend(["0"] * (8 - sep)) parts += twoparts[1].split(":") else: parts = address.split(":") address_int = 0 for part in parts: address_int = (address_int << 16) + int(part or "0", 16) return address_int def FormatAddress(address, family=None): """Format a socket address @type address: family specific (usually tuple) @param address: address, as reported by this class @type family: integer @param family: socket family (one of socket.AF_*) or None """ if family is None: try: family = IPAddress.GetAddressFamily(address[0]) except errors.IPAddressError: raise errors.ParameterError(address) if family == socket.AF_UNIX and len(address) == 3: return "pid=%s, uid=%s, gid=%s" % address if family in (socket.AF_INET, socket.AF_INET6) and len(address) == 2: host, port = address if family == socket.AF_INET6: res = "[%s]" % host else: res = host if port is not None: res += ":%s" % port return res raise errors.ParameterError(family, address) ganeti-2.9.3/lib/build/0000755000000000000000000000000012271445544014647 5ustar00rootroot00000000000000ganeti-2.9.3/lib/build/sphinx_ext.py0000644000000000000000000004142712271422343017412 0ustar00rootroot00000000000000# # # Copyright (C) 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Sphinx extension for building opcode documentation. """ import re from cStringIO import StringIO import docutils.statemachine import docutils.nodes import docutils.utils import docutils.parsers.rst import sphinx.errors import sphinx.util.compat import sphinx.roles import sphinx.addnodes s_compat = sphinx.util.compat try: # Access to a protected member of a client class # pylint: disable=W0212 orig_manpage_role = docutils.parsers.rst.roles._roles["manpage"] except (AttributeError, ValueError, KeyError), err: # Normally the "manpage" role is registered by sphinx/roles.py raise Exception("Can't find reST role named 'manpage': %s" % err) from ganeti import constants from ganeti import compat from ganeti import errors from ganeti import utils from ganeti import opcodes from ganeti import ht from ganeti import rapi from ganeti import luxi from ganeti import objects from ganeti import http from ganeti import _autoconf import ganeti.rapi.rlib2 # pylint: disable=W0611 import ganeti.rapi.connector # pylint: disable=W0611 #: Regular expression for man page names _MAN_RE = re.compile(r"^(?P[-\w_]+)\((?P
\d+)\)$") _TAB_WIDTH = 2 RAPI_URI_ENCODE_RE = re.compile("[^_a-z0-9]+", re.I) class ReSTError(Exception): """Custom class for generating errors in Sphinx. """ def _GetCommonParamNames(): """Builds a list of parameters common to all opcodes. """ names = set(map(compat.fst, opcodes.OpCode.OP_PARAMS)) # The "depends" attribute should be listed names.remove(opcodes.DEPEND_ATTR) return names COMMON_PARAM_NAMES = _GetCommonParamNames() #: Namespace for evaluating expressions EVAL_NS = dict(compat=compat, constants=constants, utils=utils, errors=errors, rlib2=rapi.rlib2, luxi=luxi, rapi=rapi, objects=objects, http=http) # Constants documentation for man pages CV_ECODES_DOC = "ecodes" # We don't care about the leak of variables _, name and doc here. # pylint: disable=W0621 CV_ECODES_DOC_LIST = [(name, doc) for (_, name, doc) in constants.CV_ALL_ECODES] DOCUMENTED_CONSTANTS = { CV_ECODES_DOC: CV_ECODES_DOC_LIST, } class OpcodeError(sphinx.errors.SphinxError): category = "Opcode error" def _SplitOption(text): """Split simple option list. @type text: string @param text: Options, e.g. "foo, bar, baz" """ return [i.strip(",").strip() for i in text.split()] def _ParseAlias(text): """Parse simple assignment option. @type text: string @param text: Assignments, e.g. "foo=bar, hello=world" @rtype: dict """ result = {} for part in _SplitOption(text): if "=" not in part: raise OpcodeError("Invalid option format, missing equal sign") (name, value) = part.split("=", 1) result[name.strip()] = value.strip() return result def _BuildOpcodeParams(op_id, include, exclude, alias): """Build opcode parameter documentation. @type op_id: string @param op_id: Opcode ID """ op_cls = opcodes.OP_MAPPING[op_id] params_with_alias = \ utils.NiceSort([(alias.get(name, name), name, default, test, doc) for (name, default, test, doc) in op_cls.GetAllParams()], key=compat.fst) for (rapi_name, name, default, test, doc) in params_with_alias: # Hide common parameters if not explicitly included if (name in COMMON_PARAM_NAMES and (not include or name not in include)): continue if exclude is not None and name in exclude: continue if include is not None and name not in include: continue has_default = default is not ht.NoDefault has_test = not (test is None or test is ht.NoType) buf = StringIO() buf.write("``%s``" % (rapi_name,)) if has_default or has_test: buf.write(" (") if has_default: buf.write("defaults to ``%s``" % (default,)) if has_test: buf.write(", ") if has_test: buf.write("must be ``%s``" % (test,)) buf.write(")") yield buf.getvalue() # Add text for line in doc.splitlines(): yield " %s" % line def _BuildOpcodeResult(op_id): """Build opcode result documentation. @type op_id: string @param op_id: Opcode ID """ op_cls = opcodes.OP_MAPPING[op_id] result_fn = getattr(op_cls, "OP_RESULT", None) if not result_fn: raise OpcodeError("Opcode '%s' has no result description" % op_id) return "``%s``" % result_fn class OpcodeParams(s_compat.Directive): """Custom directive for opcode parameters. See also . """ has_content = False required_arguments = 1 optional_arguments = 0 final_argument_whitespace = False option_spec = dict(include=_SplitOption, exclude=_SplitOption, alias=_ParseAlias) def run(self): op_id = self.arguments[0] include = self.options.get("include", None) exclude = self.options.get("exclude", None) alias = self.options.get("alias", {}) path = op_id include_text = "\n".join(_BuildOpcodeParams(op_id, include, exclude, alias)) # Inject into state machine include_lines = docutils.statemachine.string2lines(include_text, _TAB_WIDTH, convert_whitespace=1) self.state_machine.insert_input(include_lines, path) return [] class OpcodeResult(s_compat.Directive): """Custom directive for opcode result. See also . """ has_content = False required_arguments = 1 optional_arguments = 0 final_argument_whitespace = False def run(self): op_id = self.arguments[0] path = op_id include_text = _BuildOpcodeResult(op_id) # Inject into state machine include_lines = docutils.statemachine.string2lines(include_text, _TAB_WIDTH, convert_whitespace=1) self.state_machine.insert_input(include_lines, path) return [] def PythonEvalRole(role, rawtext, text, lineno, inliner, options={}, content=[]): """Custom role to evaluate Python expressions. The expression's result is included as a literal. """ # pylint: disable=W0102,W0613,W0142 # W0102: Dangerous default value as argument # W0142: Used * or ** magic # W0613: Unused argument code = docutils.utils.unescape(text, restore_backslashes=True) try: result = eval(code, EVAL_NS) except Exception, err: # pylint: disable=W0703 msg = inliner.reporter.error("Failed to evaluate %r: %s" % (code, err), line=lineno) return ([inliner.problematic(rawtext, rawtext, msg)], [msg]) node = docutils.nodes.literal("", unicode(result), **options) return ([node], []) class PythonAssert(s_compat.Directive): """Custom directive for writing assertions. The content must be a valid Python expression. If its result does not evaluate to C{True}, the assertion fails. """ has_content = True required_arguments = 0 optional_arguments = 0 final_argument_whitespace = False def run(self): # Handle combinations of Sphinx and docutils not providing the wanted method if hasattr(self, "assert_has_content"): self.assert_has_content() else: assert self.content code = "\n".join(self.content) try: result = eval(code, EVAL_NS) except Exception, err: raise self.error("Failed to evaluate %r: %s" % (code, err)) if not result: raise self.error("Assertion failed: %s" % (code, )) return [] def BuildQueryFields(fields): """Build query fields documentation. @type fields: dict (field name as key, field details as value) """ defs = [(fdef.name, fdef.doc) for (_, (fdef, _, _, _)) in utils.NiceSort(fields.items(), key=compat.fst)] return BuildValuesDoc(defs) def BuildValuesDoc(values): """Builds documentation for a list of values @type values: list of tuples in the form (value, documentation) """ for name, doc in values: assert len(doc.splitlines()) == 1 yield "``%s``" % (name,) yield " %s" % (doc,) def _ManPageNodeClass(*args, **kwargs): """Generates a pending XRef like a ":doc:`...`" reference. """ # Type for sphinx/environment.py:BuildEnvironment.resolve_references kwargs["reftype"] = "doc" # Force custom title kwargs["refexplicit"] = True return sphinx.addnodes.pending_xref(*args, **kwargs) class _ManPageXRefRole(sphinx.roles.XRefRole): def __init__(self): """Initializes this class. """ sphinx.roles.XRefRole.__init__(self, nodeclass=_ManPageNodeClass, warn_dangling=True) assert not hasattr(self, "converted"), \ "Sphinx base class gained an attribute named 'converted'" self.converted = None def process_link(self, env, refnode, has_explicit_title, title, target): """Specialization for man page links. """ if has_explicit_title: raise ReSTError("Setting explicit title is not allowed for man pages") # Check format and extract name and section m = _MAN_RE.match(title) if not m: raise ReSTError("Man page reference '%s' does not match regular" " expression '%s'" % (title, _MAN_RE.pattern)) name = m.group("name") section = int(m.group("section")) wanted_section = _autoconf.MAN_PAGES.get(name, None) if not (wanted_section is None or wanted_section == section): raise ReSTError("Referenced man page '%s' has section number %s, but the" " reference uses section %s" % (name, wanted_section, section)) self.converted = bool(wanted_section is not None and env.app.config.enable_manpages) if self.converted: # Create link to known man page return (title, "man-%s" % name) else: # No changes return (title, target) def _ManPageRole(typ, rawtext, text, lineno, inliner, # pylint: disable=W0102 options={}, content=[]): """Custom role for man page references. Converts man pages to links if enabled during the build. """ xref = _ManPageXRefRole() assert ht.TNone(xref.converted) # Check if it's a known man page try: result = xref(typ, rawtext, text, lineno, inliner, options=options, content=content) except ReSTError, err: msg = inliner.reporter.error(str(err), line=lineno) return ([inliner.problematic(rawtext, rawtext, msg)], [msg]) assert ht.TBool(xref.converted) # Return if the conversion was successful (i.e. the man page was known and # conversion was enabled) if xref.converted: return result # Fallback if man page links are disabled or an unknown page is referenced return orig_manpage_role(typ, rawtext, text, lineno, inliner, options=options, content=content) def _EncodeRapiResourceLink(method, uri): """Encodes a RAPI resource URI for use as a link target. """ parts = [RAPI_URI_ENCODE_RE.sub("-", uri.lower()).strip("-")] if method is not None: parts.append(method.lower()) return "rapi-res-%s" % "+".join(filter(None, parts)) def _MakeRapiResourceLink(method, uri): """Generates link target name for RAPI resource. """ if uri in ["/", "/2"]: # Don't link these return None elif uri == "/version": return _EncodeRapiResourceLink(method, uri) elif uri.startswith("/2/"): return _EncodeRapiResourceLink(method, uri[len("/2/"):]) else: raise ReSTError("Unhandled URI '%s'" % uri) def _GetHandlerMethods(handler): """Returns list of HTTP methods supported by handler class. @type handler: L{rapi.baserlib.ResourceBase} @param handler: Handler class @rtype: list of strings """ return sorted(method for (method, op_attr, _, _) in rapi.baserlib.OPCODE_ATTRS # Only if handler supports method if hasattr(handler, method) or hasattr(handler, op_attr)) def _DescribeHandlerAccess(handler, method): """Returns textual description of required RAPI permissions. @type handler: L{rapi.baserlib.ResourceBase} @param handler: Handler class @type method: string @param method: HTTP method (e.g. L{http.HTTP_GET}) @rtype: string """ access = rapi.baserlib.GetHandlerAccess(handler, method) if access: return utils.CommaJoin(sorted(access)) else: return "*(none)*" class _RapiHandlersForDocsHelper(object): @classmethod def Build(cls): """Returns dictionary of resource handlers. """ resources = \ rapi.connector.GetHandlers("[node_name]", "[instance_name]", "[group_name]", "[network_name]", "[job_id]", "[disk_index]", "[resource]", translate=cls._TranslateResourceUri) return resources @classmethod def _TranslateResourceUri(cls, *args): """Translates a resource URI for use in documentation. @see: L{rapi.connector.GetHandlers} """ return "".join(map(cls._UriPatternToString, args)) @staticmethod def _UriPatternToString(value): """Converts L{rapi.connector.UriPattern} to strings. """ if isinstance(value, rapi.connector.UriPattern): return value.content else: return value _RAPI_RESOURCES_FOR_DOCS = _RapiHandlersForDocsHelper.Build() def _BuildRapiAccessTable(res): """Build a table with access permissions needed for all RAPI resources. """ for (uri, handler) in utils.NiceSort(res.items(), key=compat.fst): reslink = _MakeRapiResourceLink(None, uri) if not reslink: # No link was generated continue yield ":ref:`%s <%s>`" % (uri, reslink) for method in _GetHandlerMethods(handler): yield (" | :ref:`%s <%s>`: %s" % (method, _MakeRapiResourceLink(method, uri), _DescribeHandlerAccess(handler, method))) class RapiAccessTable(s_compat.Directive): """Custom directive to generate table of all RAPI resources. See also . """ has_content = False required_arguments = 0 optional_arguments = 0 final_argument_whitespace = False option_spec = {} def run(self): include_text = "\n".join(_BuildRapiAccessTable(_RAPI_RESOURCES_FOR_DOCS)) # Inject into state machine include_lines = docutils.statemachine.string2lines(include_text, _TAB_WIDTH, convert_whitespace=1) self.state_machine.insert_input(include_lines, self.__class__.__name__) return [] class RapiResourceDetails(s_compat.Directive): """Custom directive for RAPI resource details. See also . """ has_content = False required_arguments = 1 optional_arguments = 0 final_argument_whitespace = False def run(self): uri = self.arguments[0] try: handler = _RAPI_RESOURCES_FOR_DOCS[uri] except KeyError: raise self.error("Unknown resource URI '%s'" % uri) lines = [ ".. list-table::", " :widths: 1 4", " :header-rows: 1", "", " * - Method", " - :ref:`Required permissions `", ] for method in _GetHandlerMethods(handler): lines.extend([ " * - :ref:`%s <%s>`" % (method, _MakeRapiResourceLink(method, uri)), " - %s" % _DescribeHandlerAccess(handler, method), ]) # Inject into state machine include_lines = \ docutils.statemachine.string2lines("\n".join(lines), _TAB_WIDTH, convert_whitespace=1) self.state_machine.insert_input(include_lines, self.__class__.__name__) return [] def setup(app): """Sphinx extension callback. """ # TODO: Implement Sphinx directive for query fields app.add_directive("opcode_params", OpcodeParams) app.add_directive("opcode_result", OpcodeResult) app.add_directive("pyassert", PythonAssert) app.add_role("pyeval", PythonEvalRole) app.add_directive("rapi_access_table", RapiAccessTable) app.add_directive("rapi_resource_details", RapiResourceDetails) app.add_config_value("enable_manpages", False, True) app.add_role("manpage", _ManPageRole) ganeti-2.9.3/lib/build/__init__.py0000644000000000000000000000234212230001635016741 0ustar00rootroot00000000000000# # # Copyright (C) 2009 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module used during the Ganeti build process""" import imp import os def LoadModule(filename): """Loads an external module by filename. Use this function with caution. Python will always write the compiled source to a file named "${filename}c". @type filename: string @param filename: Path to module """ (name, ext) = os.path.splitext(filename) fh = open(filename, "U") try: return imp.load_module(name, fh, filename, (ext, "U", imp.PY_SOURCE)) finally: fh.close() ganeti-2.9.3/lib/build/shell_example_lexer.py0000644000000000000000000000413512230001635021225 0ustar00rootroot00000000000000# # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Pygments lexer for our custom shell example sessions. The lexer support the following custom markup: - comments: # this is a comment - command lines: '$ ' at the beginning of a line denotes a command - variable input: %input% (works in both commands and screen output) - otherwise, regular text output from commands will be plain """ from pygments.lexer import RegexLexer, bygroups, include from pygments.token import Name, Text, Generic, Comment class ShellExampleLexer(RegexLexer): name = "ShellExampleLexer" aliases = "shell-example" filenames = [] tokens = { "root": [ include("comments"), include("userinput"), # switch to state input on '$ ' at the start of the line (r"^\$ ", Text, "input"), (r"\s+", Text), (r"[^#%\s\\]+", Text), (r"\\", Text), ], "input": [ include("comments"), include("userinput"), (r"[^#%\s\\]+", Generic.Strong), (r"\\\n", Generic.Strong), (r"\\", Generic.Strong), # switch to prev state at non-escaped new-line (r"\n", Text, "#pop"), (r"\s+", Text), ], "comments": [ (r"#.*\n", Comment.Single), ], "userinput": [ (r"(\\)(%)", bygroups(None, Text)), (r"(%)([^%]*)(%)", bygroups(None, Name.Variable, None)), ], } def setup(app): app.add_lexer("shell-example", ShellExampleLexer()) ganeti-2.9.3/lib/jstore.py0000644000000000000000000001405212271422343015422 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module implementing the job queue handling.""" import errno import os from ganeti import constants from ganeti import errors from ganeti import runtime from ganeti import utils from ganeti import pathutils JOBS_PER_ARCHIVE_DIRECTORY = 10000 def _ReadNumericFile(file_name): """Reads a file containing a number. @rtype: None or int @return: None if file is not found, otherwise number """ try: contents = utils.ReadFile(file_name) except EnvironmentError, err: if err.errno in (errno.ENOENT, ): return None raise try: return int(contents) except (ValueError, TypeError), err: # Couldn't convert to int raise errors.JobQueueError("Content of file '%s' is not numeric: %s" % (file_name, err)) def ReadSerial(): """Read the serial file. The queue should be locked while this function is called. """ return _ReadNumericFile(pathutils.JOB_QUEUE_SERIAL_FILE) def ReadVersion(): """Read the queue version. The queue should be locked while this function is called. """ return _ReadNumericFile(pathutils.JOB_QUEUE_VERSION_FILE) def InitAndVerifyQueue(must_lock): """Open and lock job queue. If necessary, the queue is automatically initialized. @type must_lock: bool @param must_lock: Whether an exclusive lock must be held. @rtype: utils.FileLock @return: Lock object for the queue. This can be used to change the locking mode. """ getents = runtime.GetEnts() # Lock queue queue_lock = utils.FileLock.Open(pathutils.JOB_QUEUE_LOCK_FILE) try: # The queue needs to be locked in exclusive mode to write to the serial and # version files. if must_lock: queue_lock.Exclusive(blocking=True) holding_lock = True else: try: queue_lock.Exclusive(blocking=False) holding_lock = True except errors.LockError: # Ignore errors and assume the process keeping the lock checked # everything. holding_lock = False if holding_lock: # Verify version version = ReadVersion() if version is None: # Write new version file utils.WriteFile(pathutils.JOB_QUEUE_VERSION_FILE, uid=getents.masterd_uid, gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS, data="%s\n" % constants.JOB_QUEUE_VERSION) # Read again version = ReadVersion() if version != constants.JOB_QUEUE_VERSION: raise errors.JobQueueError("Found job queue version %s, expected %s", version, constants.JOB_QUEUE_VERSION) serial = ReadSerial() if serial is None: # Write new serial file utils.WriteFile(pathutils.JOB_QUEUE_SERIAL_FILE, uid=getents.masterd_uid, gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS, data="%s\n" % 0) # Read again serial = ReadSerial() if serial is None: # There must be a serious problem raise errors.JobQueueError("Can't read/parse the job queue" " serial file") if not must_lock: # There's no need for more error handling. Closing the lock # file below in case of an error will unlock it anyway. queue_lock.Unlock() except: queue_lock.Close() raise return queue_lock def CheckDrainFlag(): """Check if the queue is marked to be drained. This currently uses the queue drain file, which makes it a per-node flag. In the future this can be moved to the config file. @rtype: boolean @return: True if the job queue is marked drained """ return os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE) def SetDrainFlag(drain_flag): """Sets the drain flag for the queue. @type drain_flag: boolean @param drain_flag: Whether to set or unset the drain flag @attention: This function should only called the current holder of the queue lock """ getents = runtime.GetEnts() if drain_flag: utils.WriteFile(pathutils.JOB_QUEUE_DRAIN_FILE, data="", uid=getents.masterd_uid, gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS) else: utils.RemoveFile(pathutils.JOB_QUEUE_DRAIN_FILE) assert (not drain_flag) ^ CheckDrainFlag() def FormatJobID(job_id): """Convert a job ID to int format. Currently this just is a no-op that performs some checks, but if we want to change the job id format this will abstract this change. @type job_id: int or long @param job_id: the numeric job id @rtype: int @return: the formatted job id """ if not isinstance(job_id, (int, long)): raise errors.ProgrammerError("Job ID '%s' not numeric" % job_id) if job_id < 0: raise errors.ProgrammerError("Job ID %s is negative" % job_id) return job_id def GetArchiveDirectory(job_id): """Returns the archive directory for a job. @type job_id: str @param job_id: Job identifier @rtype: str @return: Directory name """ return str(ParseJobId(job_id) / JOBS_PER_ARCHIVE_DIRECTORY) def ParseJobId(job_id): """Parses a job ID and converts it to integer. """ try: return int(job_id) except (ValueError, TypeError): raise errors.ParameterError("Invalid job ID '%s'" % job_id) ganeti-2.9.3/lib/ssh.py0000644000000000000000000002452712271422343014721 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module encapsulating ssh functionality. """ import os import logging from ganeti import utils from ganeti import errors from ganeti import constants from ganeti import netutils from ganeti import pathutils from ganeti import vcluster from ganeti import compat def GetUserFiles(user, mkdir=False, dircheck=True, kind=constants.SSHK_DSA, _homedir_fn=None): """Return the paths of a user's SSH files. @type user: string @param user: Username @type mkdir: bool @param mkdir: Whether to create ".ssh" directory if it doesn't exist @type dircheck: bool @param dircheck: Whether to check if ".ssh" directory exists @type kind: string @param kind: One of L{constants.SSHK_ALL} @rtype: tuple; (string, string, string) @return: Tuple containing three file system paths; the private SSH key file, the public SSH key file and the user's C{authorized_keys} file @raise errors.OpExecError: When home directory of the user can not be determined @raise errors.OpExecError: Regardless of the C{mkdir} parameters, this exception is raised if C{~$user/.ssh} is not a directory and C{dircheck} is set to C{True} """ if _homedir_fn is None: _homedir_fn = utils.GetHomeDir user_dir = _homedir_fn(user) if not user_dir: raise errors.OpExecError("Cannot resolve home of user '%s'" % user) if kind == constants.SSHK_DSA: suffix = "dsa" elif kind == constants.SSHK_RSA: suffix = "rsa" else: raise errors.ProgrammerError("Unknown SSH key kind '%s'" % kind) ssh_dir = utils.PathJoin(user_dir, ".ssh") if mkdir: utils.EnsureDirs([(ssh_dir, constants.SECURE_DIR_MODE)]) elif dircheck and not os.path.isdir(ssh_dir): raise errors.OpExecError("Path %s is not a directory" % ssh_dir) return [utils.PathJoin(ssh_dir, base) for base in ["id_%s" % suffix, "id_%s.pub" % suffix, "authorized_keys"]] def GetAllUserFiles(user, mkdir=False, dircheck=True, _homedir_fn=None): """Wrapper over L{GetUserFiles} to retrieve files for all SSH key types. See L{GetUserFiles} for details. @rtype: tuple; (string, dict with string as key, tuple of (string, string) as value) """ helper = compat.partial(GetUserFiles, user, mkdir=mkdir, dircheck=dircheck, _homedir_fn=_homedir_fn) result = [(kind, helper(kind=kind)) for kind in constants.SSHK_ALL] authorized_keys = [i for (_, (_, _, i)) in result] assert len(frozenset(authorized_keys)) == 1, \ "Different paths for authorized_keys were returned" return (authorized_keys[0], dict((kind, (privkey, pubkey)) for (kind, (privkey, pubkey, _)) in result)) class SshRunner: """Wrapper for SSH commands. """ def __init__(self, cluster_name, ipv6=False): """Initializes this class. @type cluster_name: str @param cluster_name: name of the cluster @type ipv6: bool @param ipv6: If true, force ssh to use IPv6 addresses only """ self.cluster_name = cluster_name self.ipv6 = ipv6 def _BuildSshOptions(self, batch, ask_key, use_cluster_key, strict_host_check, private_key=None, quiet=True): """Builds a list with needed SSH options. @param batch: same as ssh's batch option @param ask_key: allows ssh to ask for key confirmation; this parameter conflicts with the batch one @param use_cluster_key: if True, use the cluster name as the HostKeyAlias name @param strict_host_check: this makes the host key checking strict @param private_key: use this private key instead of the default @param quiet: whether to enable -q to ssh @rtype: list @return: the list of options ready to use in L{utils.process.RunCmd} """ options = [ "-oEscapeChar=none", "-oHashKnownHosts=no", "-oGlobalKnownHostsFile=%s" % pathutils.SSH_KNOWN_HOSTS_FILE, "-oUserKnownHostsFile=/dev/null", "-oCheckHostIp=no", ] if use_cluster_key: options.append("-oHostKeyAlias=%s" % self.cluster_name) if quiet: options.append("-q") if private_key: options.append("-i%s" % private_key) # TODO: Too many boolean options, maybe convert them to more descriptive # constants. # Note: ask_key conflicts with batch mode if batch: if ask_key: raise errors.ProgrammerError("SSH call requested conflicting options") options.append("-oBatchMode=yes") if strict_host_check: options.append("-oStrictHostKeyChecking=yes") else: options.append("-oStrictHostKeyChecking=no") else: # non-batch mode if ask_key: options.append("-oStrictHostKeyChecking=ask") elif strict_host_check: options.append("-oStrictHostKeyChecking=yes") else: options.append("-oStrictHostKeyChecking=no") if self.ipv6: options.append("-6") else: options.append("-4") return options def BuildCmd(self, hostname, user, command, batch=True, ask_key=False, tty=False, use_cluster_key=True, strict_host_check=True, private_key=None, quiet=True): """Build an ssh command to execute a command on a remote node. @param hostname: the target host, string @param user: user to auth as @param command: the command @param batch: if true, ssh will run in batch mode with no prompting @param ask_key: if true, ssh will run with StrictHostKeyChecking=ask, so that we can connect to an unknown host (not valid in batch mode) @param use_cluster_key: whether to expect and use the cluster-global SSH key @param strict_host_check: whether to check the host's SSH key at all @param private_key: use this private key instead of the default @param quiet: whether to enable -q to ssh @return: the ssh call to run 'command' on the remote host. """ argv = [constants.SSH] argv.extend(self._BuildSshOptions(batch, ask_key, use_cluster_key, strict_host_check, private_key, quiet=quiet)) if tty: argv.extend(["-t", "-t"]) argv.append("%s@%s" % (user, hostname)) # Insert variables for virtual nodes argv.extend("export %s=%s;" % (utils.ShellQuote(name), utils.ShellQuote(value)) for (name, value) in vcluster.EnvironmentForHost(hostname).items()) argv.append(command) return argv def Run(self, *args, **kwargs): """Runs a command on a remote node. This method has the same return value as `utils.RunCmd()`, which it uses to launch ssh. Args: see SshRunner.BuildCmd. @rtype: L{utils.process.RunResult} @return: the result as from L{utils.process.RunCmd()} """ return utils.RunCmd(self.BuildCmd(*args, **kwargs)) def CopyFileToNode(self, node, filename): """Copy a file to another node with scp. @param node: node in the cluster @param filename: absolute pathname of a local file @rtype: boolean @return: the success of the operation """ if not os.path.isabs(filename): logging.error("File %s must be an absolute path", filename) return False if not os.path.isfile(filename): logging.error("File %s does not exist", filename) return False command = [constants.SCP, "-p"] command.extend(self._BuildSshOptions(True, False, True, True)) command.append(filename) if netutils.IP6Address.IsValid(node): node = netutils.FormatAddress((node, None)) command.append("%s:%s" % (node, vcluster.ExchangeNodeRoot(node, filename))) result = utils.RunCmd(command) if result.failed: logging.error("Copy to node %s failed (%s) error '%s'," " command was '%s'", node, result.fail_reason, result.output, result.cmd) return not result.failed def VerifyNodeHostname(self, node): """Verify hostname consistency via SSH. This functions connects via ssh to a node and compares the hostname reported by the node to the name with have (the one that we connected to). This is used to detect problems in ssh known_hosts files (conflicting known hosts) and inconsistencies between dns/hosts entries and local machine names @param node: nodename of a host to check; can be short or full qualified hostname @return: (success, detail), where: - success: True/False - detail: string with details """ cmd = ("if test -z \"$GANETI_HOSTNAME\"; then" " hostname --fqdn;" "else" " echo \"$GANETI_HOSTNAME\";" "fi") retval = self.Run(node, constants.SSH_LOGIN_USER, cmd, quiet=False) if retval.failed: msg = "ssh problem" output = retval.output if output: msg += ": %s" % output else: msg += ": %s (no output)" % retval.fail_reason logging.error("Command %s failed: %s", retval.cmd, msg) return False, msg remotehostname = retval.stdout.strip() if not remotehostname or remotehostname != node: if node.startswith(remotehostname + "."): msg = "hostname not FQDN" else: msg = "hostname mismatch" return False, ("%s: expected %s but got %s" % (msg, node, remotehostname)) return True, "host matches" def WriteKnownHostsFile(cfg, file_name): """Writes the cluster-wide equally known_hosts file. """ data = "" if cfg.GetRsaHostKey(): data += "%s ssh-rsa %s\n" % (cfg.GetClusterName(), cfg.GetRsaHostKey()) if cfg.GetDsaHostKey(): data += "%s ssh-dss %s\n" % (cfg.GetClusterName(), cfg.GetDsaHostKey()) utils.WriteFile(file_name, mode=0600, data=data) ganeti-2.9.3/lib/__init__.py0000644000000000000000000000225412244641676015670 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # empty file for package definition """Ganeti python modules""" try: from ganeti import ganeti except ImportError: pass else: raise Exception("A module named \"ganeti.ganeti\" was successfully imported" " and should be removed as it can lead to importing the" " wrong module(s) in other parts of the code, consequently" " leading to failures which are difficult to debug") ganeti-2.9.3/lib/opcodes.py0000644000000000000000000021130712271422343015552 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """OpCodes module This module implements the data structures which define the cluster operations - the so-called opcodes. Every operation which modifies the cluster state is expressed via opcodes. """ # this are practically structures, so disable the message about too # few public methods: # pylint: disable=R0903 import logging import re import ipaddr from ganeti import constants from ganeti import errors from ganeti import ht from ganeti import objects from ganeti import outils # Common opcode attributes #: output fields for a query operation _POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString), "Selected output fields") #: the shutdown timeout _PShutdownTimeout = \ ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TNonNegativeInt, "How long to wait for instance to shut down") #: the force parameter _PForce = ("force", False, ht.TBool, "Whether to force the operation") #: a required instance name (for single-instance LUs) _PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString, "Instance name") #: a instance UUID (for single-instance LUs) _PInstanceUuid = ("instance_uuid", None, ht.TMaybeString, "Instance UUID") #: Whether to ignore offline nodes _PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool, "Whether to ignore offline nodes") #: a required node name (for single-node LUs) _PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString, "Node name") #: a node UUID (for use with _PNodeName) _PNodeUuid = ("node_uuid", None, ht.TMaybeString, "Node UUID") #: a required node group name (for single-group LUs) _PGroupName = ("group_name", ht.NoDefault, ht.TNonEmptyString, "Group name") #: Migration type (live/non-live) _PMigrationMode = ("mode", None, ht.TMaybe(ht.TElemOf(constants.HT_MIGRATION_MODES)), "Migration mode") #: Obsolete 'live' migration mode (boolean) _PMigrationLive = ("live", None, ht.TMaybeBool, "Legacy setting for live migration, do not use") #: Tag type _PTagKind = ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES), "Tag kind") #: List of tag strings _PTags = ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString), "List of tag names") _PForceVariant = ("force_variant", False, ht.TBool, "Whether to force an unknown OS variant") _PWaitForSync = ("wait_for_sync", True, ht.TBool, "Whether to wait for the disk to synchronize") _PWaitForSyncFalse = ("wait_for_sync", False, ht.TBool, "Whether to wait for the disk to synchronize" " (defaults to false)") _PIgnoreConsistency = ("ignore_consistency", False, ht.TBool, "Whether to ignore disk consistency") _PStorageName = ("name", ht.NoDefault, ht.TMaybeString, "Storage name") _PUseLocking = ("use_locking", False, ht.TBool, "Whether to use synchronization") _PNameCheck = ("name_check", True, ht.TBool, "Whether to check name") _PNodeGroupAllocPolicy = \ ("alloc_policy", None, ht.TMaybe(ht.TElemOf(constants.VALID_ALLOC_POLICIES)), "Instance allocation policy") _PGroupNodeParams = ("ndparams", None, ht.TMaybeDict, "Default node parameters for group") _PQueryWhat = ("what", ht.NoDefault, ht.TElemOf(constants.QR_VIA_OP), "Resource(s) to query for") _PEarlyRelease = ("early_release", False, ht.TBool, "Whether to release locks as soon as possible") _PIpCheckDoc = "Whether to ensure instance's IP address is inactive" #: Do not remember instance state changes _PNoRemember = ("no_remember", False, ht.TBool, "Do not remember the state change") #: Target node for instance migration/failover _PMigrationTargetNode = ("target_node", None, ht.TMaybeString, "Target node for shared-storage instances") _PMigrationTargetNodeUuid = ("target_node_uuid", None, ht.TMaybeString, "Target node UUID for shared-storage instances") _PStartupPaused = ("startup_paused", False, ht.TBool, "Pause instance at startup") _PVerbose = ("verbose", False, ht.TBool, "Verbose mode") # Parameters for cluster verification _PDebugSimulateErrors = ("debug_simulate_errors", False, ht.TBool, "Whether to simulate errors (useful for debugging)") _PErrorCodes = ("error_codes", False, ht.TBool, "Error codes") _PSkipChecks = ("skip_checks", ht.EmptyList, ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS)), "Which checks to skip") _PIgnoreErrors = ("ignore_errors", ht.EmptyList, ht.TListOf(ht.TElemOf(constants.CV_ALL_ECODES_STRINGS)), "List of error codes that should be treated as warnings") # Disk parameters _PDiskParams = \ ("diskparams", None, ht.TMaybe(ht.TDictOf(ht.TElemOf(constants.DISK_TEMPLATES), ht.TDict)), "Disk templates' parameter defaults") # Parameters for node resource model _PHvState = ("hv_state", None, ht.TMaybeDict, "Set hypervisor states") _PDiskState = ("disk_state", None, ht.TMaybeDict, "Set disk states") #: Opportunistic locking _POpportunisticLocking = \ ("opportunistic_locking", False, ht.TBool, ("Whether to employ opportunistic locking for nodes, meaning nodes" " already locked by another opcode won't be considered for instance" " allocation (only when an iallocator is used)")) _PIgnoreIpolicy = ("ignore_ipolicy", False, ht.TBool, "Whether to ignore ipolicy violations") # Allow runtime changes while migrating _PAllowRuntimeChgs = ("allow_runtime_changes", True, ht.TBool, "Allow runtime changes (eg. memory ballooning)") #: IAllocator field builder _PIAllocFromDesc = lambda desc: ("iallocator", None, ht.TMaybeString, desc) #: a required network name _PNetworkName = ("network_name", ht.NoDefault, ht.TNonEmptyString, "Set network name") _PTargetGroups = \ ("target_groups", None, ht.TMaybeListOf(ht.TNonEmptyString), "Destination group names or UUIDs (defaults to \"all but current group\")") #: OP_ID conversion regular expression _OPID_RE = re.compile("([a-z])([A-Z])") #: Utility function for L{OpClusterSetParams} _TestClusterOsListItem = \ ht.TAnd(ht.TIsLength(2), ht.TItems([ ht.TElemOf(constants.DDMS_VALUES), ht.TNonEmptyString, ])) _TestClusterOsList = ht.TMaybeListOf(_TestClusterOsListItem) # TODO: Generate check from constants.INIC_PARAMS_TYPES #: Utility function for testing NIC definitions _TestNicDef = \ ht.Comment("NIC parameters")(ht.TDictOf(ht.TElemOf(constants.INIC_PARAMS), ht.TMaybeString)) _TSetParamsResultItemItems = [ ht.Comment("name of changed parameter")(ht.TNonEmptyString), ht.Comment("new value")(ht.TAny), ] _TSetParamsResult = \ ht.TListOf(ht.TAnd(ht.TIsLength(len(_TSetParamsResultItemItems)), ht.TItems(_TSetParamsResultItemItems))) # In the disks option we can provide arbitrary parameters too, which # we may not be able to validate at this level, so we just check the # format of the dict here and the checks concerning IDISK_PARAMS will # happen at the LU level _TDiskParams = \ ht.Comment("Disk parameters")(ht.TDictOf(ht.TNonEmptyString, ht.TOr(ht.TNonEmptyString, ht.TInt))) _TQueryRow = \ ht.TListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([ht.TElemOf(constants.RS_ALL), ht.TAny]))) _TQueryResult = ht.TListOf(_TQueryRow) _TOldQueryRow = ht.TListOf(ht.TAny) _TOldQueryResult = ht.TListOf(_TOldQueryRow) _SUMMARY_PREFIX = { "CLUSTER_": "C_", "GROUP_": "G_", "NODE_": "N_", "INSTANCE_": "I_", } #: Attribute name for dependencies DEPEND_ATTR = "depends" #: Attribute name for comment COMMENT_ATTR = "comment" def _NameComponents(name): """Split an opcode class name into its components @type name: string @param name: the class name, as OpXxxYyy @rtype: array of strings @return: the components of the name """ assert name.startswith("Op") # Note: (?<=[a-z])(?=[A-Z]) would be ideal, since it wouldn't # consume any input, and hence we would just have all the elements # in the list, one by one; but it seems that split doesn't work on # non-consuming input, hence we have to process the input string a # bit name = _OPID_RE.sub(r"\1,\2", name) elems = name.split(",") return elems def _NameToId(name): """Convert an opcode class name to an OP_ID. @type name: string @param name: the class name, as OpXxxYyy @rtype: string @return: the name in the OP_XXXX_YYYY format """ if not name.startswith("Op"): return None return "_".join(n.upper() for n in _NameComponents(name)) def NameToReasonSrc(name): """Convert an opcode class name to a source string for the reason trail @type name: string @param name: the class name, as OpXxxYyy @rtype: string @return: the name in the OP_XXXX_YYYY format """ if not name.startswith("Op"): return None return "%s:%s" % (constants.OPCODE_REASON_SRC_OPCODE, "_".join(n.lower() for n in _NameComponents(name))) def _GenerateObjectTypeCheck(obj, fields_types): """Helper to generate type checks for objects. @param obj: The object to generate type checks @param fields_types: The fields and their types as a dict @return: A ht type check function """ assert set(obj.GetAllSlots()) == set(fields_types.keys()), \ "%s != %s" % (set(obj.GetAllSlots()), set(fields_types.keys())) return ht.TStrictDict(True, True, fields_types) _TQueryFieldDef = \ _GenerateObjectTypeCheck(objects.QueryFieldDefinition, { "name": ht.TNonEmptyString, "title": ht.TNonEmptyString, "kind": ht.TElemOf(constants.QFT_ALL), "doc": ht.TNonEmptyString, }) def _BuildDiskTemplateCheck(accept_none): """Builds check for disk template. @type accept_none: bool @param accept_none: whether to accept None as a correct value @rtype: callable """ template_check = ht.TElemOf(constants.DISK_TEMPLATES) if accept_none: template_check = ht.TMaybe(template_check) return template_check def _CheckStorageType(storage_type): """Ensure a given storage type is valid. """ if storage_type not in constants.STORAGE_TYPES: raise errors.OpPrereqError("Unknown storage type: %s" % storage_type, errors.ECODE_INVAL) return True #: Storage type parameter _PStorageType = ("storage_type", ht.NoDefault, _CheckStorageType, "Storage type") @ht.WithDesc("IPv4 network") def _CheckCIDRNetNotation(value): """Ensure a given CIDR notation type is valid. """ try: ipaddr.IPv4Network(value) except ipaddr.AddressValueError: return False return True @ht.WithDesc("IPv4 address") def _CheckCIDRAddrNotation(value): """Ensure a given CIDR notation type is valid. """ try: ipaddr.IPv4Address(value) except ipaddr.AddressValueError: return False return True @ht.WithDesc("IPv6 address") def _CheckCIDR6AddrNotation(value): """Ensure a given CIDR notation type is valid. """ try: ipaddr.IPv6Address(value) except ipaddr.AddressValueError: return False return True @ht.WithDesc("IPv6 network") def _CheckCIDR6NetNotation(value): """Ensure a given CIDR notation type is valid. """ try: ipaddr.IPv6Network(value) except ipaddr.AddressValueError: return False return True _TIpAddress4 = ht.TAnd(ht.TString, _CheckCIDRAddrNotation) _TIpAddress6 = ht.TAnd(ht.TString, _CheckCIDR6AddrNotation) _TIpNetwork4 = ht.TAnd(ht.TString, _CheckCIDRNetNotation) _TIpNetwork6 = ht.TAnd(ht.TString, _CheckCIDR6NetNotation) _TMaybeAddr4List = ht.TMaybe(ht.TListOf(_TIpAddress4)) class _AutoOpParamSlots(outils.AutoSlots): """Meta class for opcode definitions. """ def __new__(mcs, name, bases, attrs): """Called when a class should be created. @param mcs: The meta class @param name: Name of created class @param bases: Base classes @type attrs: dict @param attrs: Class attributes """ assert "OP_ID" not in attrs, "Class '%s' defining OP_ID" % name slots = mcs._GetSlots(attrs) assert "OP_DSC_FIELD" not in attrs or attrs["OP_DSC_FIELD"] in slots, \ "Class '%s' uses unknown field in OP_DSC_FIELD" % name assert ("OP_DSC_FORMATTER" not in attrs or callable(attrs["OP_DSC_FORMATTER"])), \ ("Class '%s' uses non-callable in OP_DSC_FORMATTER (%s)" % (name, type(attrs["OP_DSC_FORMATTER"]))) attrs["OP_ID"] = _NameToId(name) return outils.AutoSlots.__new__(mcs, name, bases, attrs) @classmethod def _GetSlots(mcs, attrs): """Build the slots out of OP_PARAMS. """ # Always set OP_PARAMS to avoid duplicates in BaseOpCode.GetAllParams params = attrs.setdefault("OP_PARAMS", []) # Use parameter names as slots return [pname for (pname, _, _, _) in params] class BaseOpCode(outils.ValidatedSlots): """A simple serializable object. This object serves as a parent class for OpCode without any custom field handling. """ # pylint: disable=E1101 # as OP_ID is dynamically defined __metaclass__ = _AutoOpParamSlots def __getstate__(self): """Generic serializer. This method just returns the contents of the instance as a dictionary. @rtype: C{dict} @return: the instance attributes and their values """ state = {} for name in self.GetAllSlots(): if hasattr(self, name): state[name] = getattr(self, name) return state def __setstate__(self, state): """Generic unserializer. This method just restores from the serialized state the attributes of the current instance. @param state: the serialized opcode data @type state: C{dict} """ if not isinstance(state, dict): raise ValueError("Invalid data to __setstate__: expected dict, got %s" % type(state)) for name in self.GetAllSlots(): if name not in state and hasattr(self, name): delattr(self, name) for name in state: setattr(self, name, state[name]) @classmethod def GetAllParams(cls): """Compute list of all parameters for an opcode. """ slots = [] for parent in cls.__mro__: slots.extend(getattr(parent, "OP_PARAMS", [])) return slots def Validate(self, set_defaults): # pylint: disable=W0221 """Validate opcode parameters, optionally setting default values. @type set_defaults: bool @param set_defaults: Whether to set default values @raise errors.OpPrereqError: When a parameter value doesn't match requirements """ for (attr_name, default, test, _) in self.GetAllParams(): assert test == ht.NoType or callable(test) if not hasattr(self, attr_name): if default == ht.NoDefault: raise errors.OpPrereqError("Required parameter '%s.%s' missing" % (self.OP_ID, attr_name), errors.ECODE_INVAL) elif set_defaults: if callable(default): dval = default() else: dval = default setattr(self, attr_name, dval) if test == ht.NoType: # no tests here continue if set_defaults or hasattr(self, attr_name): attr_val = getattr(self, attr_name) if not test(attr_val): logging.error("OpCode %s, parameter %s, has invalid type %s/value" " '%s' expecting type %s", self.OP_ID, attr_name, type(attr_val), attr_val, test) raise errors.OpPrereqError("Parameter '%s.%s' fails validation" % (self.OP_ID, attr_name), errors.ECODE_INVAL) def _BuildJobDepCheck(relative): """Builds check for job dependencies (L{DEPEND_ATTR}). @type relative: bool @param relative: Whether to accept relative job IDs (negative) @rtype: callable """ if relative: job_id = ht.TOr(ht.TJobId, ht.TRelativeJobId) else: job_id = ht.TJobId job_dep = \ ht.TAnd(ht.TOr(ht.TList, ht.TTuple), ht.TIsLength(2), ht.TItems([job_id, ht.TListOf(ht.TElemOf(constants.JOBS_FINALIZED))])) return ht.TMaybeListOf(job_dep) TNoRelativeJobDependencies = _BuildJobDepCheck(False) #: List of submission status and job ID as returned by C{SubmitManyJobs} _TJobIdListItem = \ ht.TAnd(ht.TIsLength(2), ht.TItems([ht.Comment("success")(ht.TBool), ht.Comment("Job ID if successful, error message" " otherwise")(ht.TOr(ht.TString, ht.TJobId))])) TJobIdList = ht.TListOf(_TJobIdListItem) #: Result containing only list of submitted jobs TJobIdListOnly = ht.TStrictDict(True, True, { constants.JOB_IDS_KEY: ht.Comment("List of submitted jobs")(TJobIdList), }) class OpCode(BaseOpCode): """Abstract OpCode. This is the root of the actual OpCode hierarchy. All clases derived from this class should override OP_ID. @cvar OP_ID: The ID of this opcode. This should be unique amongst all children of this class. @cvar OP_DSC_FIELD: The name of a field whose value will be included in the string returned by Summary(); see the docstring of that method for details). @cvar OP_DSC_FORMATTER: A callable that should format the OP_DSC_FIELD; if not present, then the field will be simply converted to string @cvar OP_PARAMS: List of opcode attributes, the default values they should get if not already defined, and types they must match. @cvar OP_RESULT: Callable to verify opcode result @cvar WITH_LU: Boolean that specifies whether this should be included in mcpu's dispatch table @ivar dry_run: Whether the LU should be run in dry-run mode, i.e. just the check steps @ivar priority: Opcode priority for queue """ # pylint: disable=E1101 # as OP_ID is dynamically defined WITH_LU = True OP_PARAMS = [ ("dry_run", None, ht.TMaybeBool, "Run checks only, don't execute"), ("debug_level", None, ht.TMaybe(ht.TNonNegativeInt), "Debug level"), ("priority", constants.OP_PRIO_DEFAULT, ht.TElemOf(constants.OP_PRIO_SUBMIT_VALID), "Opcode priority"), (DEPEND_ATTR, None, _BuildJobDepCheck(True), "Job dependencies; if used through ``SubmitManyJobs`` relative (negative)" " job IDs can be used; see :doc:`design document `" " for details"), (COMMENT_ATTR, None, ht.TMaybeString, "Comment describing the purpose of the opcode"), (constants.OPCODE_REASON, ht.EmptyList, ht.TMaybeList, "The reason trail, describing why the OpCode is executed"), ] OP_RESULT = None def __getstate__(self): """Specialized getstate for opcodes. This method adds to the state dictionary the OP_ID of the class, so that on unload we can identify the correct class for instantiating the opcode. @rtype: C{dict} @return: the state as a dictionary """ data = BaseOpCode.__getstate__(self) data["OP_ID"] = self.OP_ID return data @classmethod def LoadOpCode(cls, data): """Generic load opcode method. The method identifies the correct opcode class from the dict-form by looking for a OP_ID key, if this is not found, or its value is not available in this module as a child of this class, we fail. @type data: C{dict} @param data: the serialized opcode """ if not isinstance(data, dict): raise ValueError("Invalid data to LoadOpCode (%s)" % type(data)) if "OP_ID" not in data: raise ValueError("Invalid data to LoadOpcode, missing OP_ID") op_id = data["OP_ID"] op_class = None if op_id in OP_MAPPING: op_class = OP_MAPPING[op_id] else: raise ValueError("Invalid data to LoadOpCode: OP_ID %s unsupported" % op_id) op = op_class() new_data = data.copy() del new_data["OP_ID"] op.__setstate__(new_data) return op def Summary(self): """Generates a summary description of this opcode. The summary is the value of the OP_ID attribute (without the "OP_" prefix), plus the value of the OP_DSC_FIELD attribute, if one was defined; this field should allow to easily identify the operation (for an instance creation job, e.g., it would be the instance name). """ assert self.OP_ID is not None and len(self.OP_ID) > 3 # all OP_ID start with OP_, we remove that txt = self.OP_ID[3:] field_name = getattr(self, "OP_DSC_FIELD", None) if field_name: field_value = getattr(self, field_name, None) field_formatter = getattr(self, "OP_DSC_FORMATTER", None) if callable(field_formatter): field_value = field_formatter(field_value) elif isinstance(field_value, (list, tuple)): field_value = ",".join(str(i) for i in field_value) txt = "%s(%s)" % (txt, field_value) return txt def TinySummary(self): """Generates a compact summary description of the opcode. """ assert self.OP_ID.startswith("OP_") text = self.OP_ID[3:] for (prefix, supplement) in _SUMMARY_PREFIX.items(): if text.startswith(prefix): return supplement + text[len(prefix):] return text # cluster opcodes class OpClusterPostInit(OpCode): """Post cluster initialization. This opcode does not touch the cluster at all. Its purpose is to run hooks after the cluster has been initialized. """ OP_RESULT = ht.TBool class OpClusterDestroy(OpCode): """Destroy the cluster. This opcode has no other parameters. All the state is irreversibly lost after the execution of this opcode. """ OP_RESULT = ht.TNonEmptyString class OpClusterQuery(OpCode): """Query cluster information.""" OP_RESULT = ht.TDictOf(ht.TNonEmptyString, ht.TAny) class OpClusterVerify(OpCode): """Submits all jobs necessary to verify the cluster. """ OP_PARAMS = [ _PDebugSimulateErrors, _PErrorCodes, _PSkipChecks, _PIgnoreErrors, _PVerbose, ("group_name", None, ht.TMaybeString, "Group to verify"), ] OP_RESULT = TJobIdListOnly class OpClusterVerifyConfig(OpCode): """Verify the cluster config. """ OP_PARAMS = [ _PDebugSimulateErrors, _PErrorCodes, _PIgnoreErrors, _PVerbose, ] OP_RESULT = ht.TBool class OpClusterVerifyGroup(OpCode): """Run verify on a node group from the cluster. @type skip_checks: C{list} @ivar skip_checks: steps to be skipped from the verify process; this needs to be a subset of L{constants.VERIFY_OPTIONAL_CHECKS}; currently only L{constants.VERIFY_NPLUSONE_MEM} can be passed """ OP_DSC_FIELD = "group_name" OP_PARAMS = [ _PGroupName, _PDebugSimulateErrors, _PErrorCodes, _PSkipChecks, _PIgnoreErrors, _PVerbose, ] OP_RESULT = ht.TBool class OpClusterVerifyDisks(OpCode): """Verify the cluster disks. """ OP_RESULT = TJobIdListOnly class OpGroupVerifyDisks(OpCode): """Verifies the status of all disks in a node group. Result: a tuple of three elements: - dict of node names with issues (values: error msg) - list of instances with degraded disks (that should be activated) - dict of instances with missing logical volumes (values: (node, vol) pairs with details about the missing volumes) In normal operation, all lists should be empty. A non-empty instance list (3rd element of the result) is still ok (errors were fixed) but non-empty node list means some node is down, and probably there are unfixable drbd errors. Note that only instances that are drbd-based are taken into consideration. This might need to be revisited in the future. """ OP_DSC_FIELD = "group_name" OP_PARAMS = [ _PGroupName, ] OP_RESULT = \ ht.TAnd(ht.TIsLength(3), ht.TItems([ht.TDictOf(ht.TString, ht.TString), ht.TListOf(ht.TString), ht.TDictOf(ht.TString, ht.TListOf(ht.TListOf(ht.TString)))])) class OpClusterRepairDiskSizes(OpCode): """Verify the disk sizes of the instances and fixes configuration mimatches. Parameters: optional instances list, in case we want to restrict the checks to only a subset of the instances. Result: a list of tuples, (instance, disk, parameter, new-size) for changed configurations. In normal operation, the list should be empty. @type instances: list @ivar instances: the list of instances to check, or empty for all instances """ OP_PARAMS = [ ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), None), ] OP_RESULT = ht.TListOf(ht.TAnd(ht.TIsLength(4), ht.TItems([ht.TNonEmptyString, ht.TNonNegativeInt, ht.TNonEmptyString, ht.TNonNegativeInt]))) class OpClusterConfigQuery(OpCode): """Query cluster configuration values.""" OP_PARAMS = [ _POutputFields, ] OP_RESULT = ht.TListOf(ht.TAny) class OpClusterRename(OpCode): """Rename the cluster. @type name: C{str} @ivar name: The new name of the cluster. The name and/or the master IP address will be changed to match the new name and its IP address. """ OP_DSC_FIELD = "name" OP_PARAMS = [ ("name", ht.NoDefault, ht.TNonEmptyString, None), ] OP_RESULT = ht.TNonEmptyString class OpClusterSetParams(OpCode): """Change the parameters of the cluster. @type vg_name: C{str} or C{None} @ivar vg_name: The new volume group name or None to disable LVM usage. """ OP_PARAMS = [ _PForce, _PHvState, _PDiskState, ("vg_name", None, ht.TMaybe(ht.TString), "Volume group name"), ("enabled_hypervisors", None, ht.TMaybe(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue)), "List of enabled hypervisors"), ("hvparams", None, ht.TMaybe(ht.TDictOf(ht.TNonEmptyString, ht.TDict)), "Cluster-wide hypervisor parameter defaults, hypervisor-dependent"), ("beparams", None, ht.TMaybeDict, "Cluster-wide backend parameter defaults"), ("os_hvp", None, ht.TMaybe(ht.TDictOf(ht.TNonEmptyString, ht.TDict)), "Cluster-wide per-OS hypervisor parameter defaults"), ("osparams", None, ht.TMaybe(ht.TDictOf(ht.TNonEmptyString, ht.TDict)), "Cluster-wide OS parameter defaults"), _PDiskParams, ("candidate_pool_size", None, ht.TMaybe(ht.TPositiveInt), "Master candidate pool size"), ("uid_pool", None, ht.NoType, "Set UID pool, must be list of lists describing UID ranges (two items," " start and end inclusive)"), ("add_uids", None, ht.NoType, "Extend UID pool, must be list of lists describing UID ranges (two" " items, start and end inclusive) to be added"), ("remove_uids", None, ht.NoType, "Shrink UID pool, must be list of lists describing UID ranges (two" " items, start and end inclusive) to be removed"), ("maintain_node_health", None, ht.TMaybeBool, "Whether to automatically maintain node health"), ("prealloc_wipe_disks", None, ht.TMaybeBool, "Whether to wipe disks before allocating them to instances"), ("nicparams", None, ht.TMaybeDict, "Cluster-wide NIC parameter defaults"), ("ndparams", None, ht.TMaybeDict, "Cluster-wide node parameter defaults"), ("ipolicy", None, ht.TMaybeDict, "Cluster-wide :ref:`instance policy ` specs"), ("drbd_helper", None, ht.TMaybe(ht.TString), "DRBD helper program"), ("default_iallocator", None, ht.TMaybe(ht.TString), "Default iallocator for cluster"), ("master_netdev", None, ht.TMaybe(ht.TString), "Master network device"), ("master_netmask", None, ht.TMaybe(ht.TNonNegativeInt), "Netmask of the master IP"), ("reserved_lvs", None, ht.TMaybeListOf(ht.TNonEmptyString), "List of reserved LVs"), ("hidden_os", None, _TestClusterOsList, "Modify list of hidden operating systems: each modification must have" " two items, the operation and the OS name; the operation can be" " ``%s`` or ``%s``" % (constants.DDM_ADD, constants.DDM_REMOVE)), ("blacklisted_os", None, _TestClusterOsList, "Modify list of blacklisted operating systems: each modification must" " have two items, the operation and the OS name; the operation can be" " ``%s`` or ``%s``" % (constants.DDM_ADD, constants.DDM_REMOVE)), ("use_external_mip_script", None, ht.TMaybeBool, "Whether to use an external master IP address setup script"), ("enabled_disk_templates", None, ht.TMaybe(ht.TAnd(ht.TListOf(ht.TElemOf(constants.DISK_TEMPLATES)), ht.TTrue)), "List of enabled disk templates"), ("modify_etc_hosts", None, ht.TMaybeBool, "Whether the cluster can modify and keep in sync the /etc/hosts files"), ("file_storage_dir", None, ht.TMaybe(ht.TString), "Default directory for storing file-backed disks"), ("shared_file_storage_dir", None, ht.TMaybe(ht.TString), "Default directory for storing shared-file-backed disks"), ] OP_RESULT = ht.TNone class OpClusterRedistConf(OpCode): """Force a full push of the cluster configuration. """ OP_RESULT = ht.TNone class OpClusterActivateMasterIp(OpCode): """Activate the master IP on the master node. """ OP_RESULT = ht.TNone class OpClusterDeactivateMasterIp(OpCode): """Deactivate the master IP on the master node. """ OP_RESULT = ht.TNone class OpQuery(OpCode): """Query for resources/items. @ivar what: Resources to query for, must be one of L{constants.QR_VIA_OP} @ivar fields: List of fields to retrieve @ivar qfilter: Query filter """ OP_DSC_FIELD = "what" OP_PARAMS = [ _PQueryWhat, _PUseLocking, ("fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString), "Requested fields"), ("qfilter", None, ht.TMaybe(ht.TList), "Query filter"), ] OP_RESULT = \ _GenerateObjectTypeCheck(objects.QueryResponse, { "fields": ht.TListOf(_TQueryFieldDef), "data": _TQueryResult, }) class OpQueryFields(OpCode): """Query for available resource/item fields. @ivar what: Resources to query for, must be one of L{constants.QR_VIA_OP} @ivar fields: List of fields to retrieve """ OP_DSC_FIELD = "what" OP_PARAMS = [ _PQueryWhat, ("fields", None, ht.TMaybeListOf(ht.TNonEmptyString), "Requested fields; if not given, all are returned"), ] OP_RESULT = \ _GenerateObjectTypeCheck(objects.QueryFieldsResponse, { "fields": ht.TListOf(_TQueryFieldDef), }) class OpOobCommand(OpCode): """Interact with OOB.""" OP_PARAMS = [ ("node_names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), "List of node names to run the OOB command against"), ("node_uuids", None, ht.TMaybeListOf(ht.TNonEmptyString), "List of node UUIDs to run the OOB command against"), ("command", ht.NoDefault, ht.TElemOf(constants.OOB_COMMANDS), "OOB command to be run"), ("timeout", constants.OOB_TIMEOUT, ht.TInt, "Timeout before the OOB helper will be terminated"), ("ignore_status", False, ht.TBool, "Ignores the node offline status for power off"), ("power_delay", constants.OOB_POWER_DELAY, ht.TNonNegativeFloat, "Time in seconds to wait between powering on nodes"), ] # Fixme: Make it more specific with all the special cases in LUOobCommand OP_RESULT = _TQueryResult class OpRestrictedCommand(OpCode): """Runs a restricted command on node(s). """ OP_PARAMS = [ _PUseLocking, ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString), "Nodes on which the command should be run (at least one)"), ("node_uuids", None, ht.TMaybeListOf(ht.TNonEmptyString), "Node UUIDs on which the command should be run (at least one)"), ("command", ht.NoDefault, ht.TNonEmptyString, "Command name (no parameters)"), ] _RESULT_ITEMS = [ ht.Comment("success")(ht.TBool), ht.Comment("output or error message")(ht.TString), ] OP_RESULT = \ ht.TListOf(ht.TAnd(ht.TIsLength(len(_RESULT_ITEMS)), ht.TItems(_RESULT_ITEMS))) # node opcodes class OpNodeRemove(OpCode): """Remove a node. @type node_name: C{str} @ivar node_name: The name of the node to remove. If the node still has instances on it, the operation will fail. """ OP_DSC_FIELD = "node_name" OP_PARAMS = [ _PNodeName, _PNodeUuid ] OP_RESULT = ht.TNone class OpNodeAdd(OpCode): """Add a node to the cluster. @type node_name: C{str} @ivar node_name: The name of the node to add. This can be a short name, but it will be expanded to the FQDN. @type primary_ip: IP address @ivar primary_ip: The primary IP of the node. This will be ignored when the opcode is submitted, but will be filled during the node add (so it will be visible in the job query). @type secondary_ip: IP address @ivar secondary_ip: The secondary IP of the node. This needs to be passed if the cluster has been initialized in 'dual-network' mode, otherwise it must not be given. @type readd: C{bool} @ivar readd: Whether to re-add an existing node to the cluster. If this is not passed, then the operation will abort if the node name is already in the cluster; use this parameter to 'repair' a node that had its configuration broken, or was reinstalled without removal from the cluster. @type group: C{str} @ivar group: The node group to which this node will belong. @type vm_capable: C{bool} @ivar vm_capable: The vm_capable node attribute @type master_capable: C{bool} @ivar master_capable: The master_capable node attribute """ OP_DSC_FIELD = "node_name" OP_PARAMS = [ _PNodeName, _PHvState, _PDiskState, ("primary_ip", None, ht.NoType, "Primary IP address"), ("secondary_ip", None, ht.TMaybeString, "Secondary IP address"), ("readd", False, ht.TBool, "Whether node is re-added to cluster"), ("group", None, ht.TMaybeString, "Initial node group"), ("master_capable", None, ht.TMaybeBool, "Whether node can become master or master candidate"), ("vm_capable", None, ht.TMaybeBool, "Whether node can host instances"), ("ndparams", None, ht.TMaybeDict, "Node parameters"), ] OP_RESULT = ht.TNone class OpNodeQuery(OpCode): """Compute the list of nodes.""" OP_PARAMS = [ _POutputFields, _PUseLocking, ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), "Empty list to query all nodes, node names otherwise"), ] OP_RESULT = _TOldQueryResult class OpNodeQueryvols(OpCode): """Get list of volumes on node.""" OP_PARAMS = [ _POutputFields, ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), "Empty list to query all nodes, node names otherwise"), ] OP_RESULT = ht.TListOf(ht.TAny) class OpNodeQueryStorage(OpCode): """Get information on storage for node(s).""" OP_PARAMS = [ _POutputFields, _PStorageType, ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), "List of nodes"), ("name", None, ht.TMaybeString, "Storage name"), ] OP_RESULT = _TOldQueryResult class OpNodeModifyStorage(OpCode): """Modifies the properies of a storage unit""" OP_DSC_FIELD = "node_name" OP_PARAMS = [ _PNodeName, _PNodeUuid, _PStorageType, _PStorageName, ("changes", ht.NoDefault, ht.TDict, "Requested changes"), ] OP_RESULT = ht.TNone class OpRepairNodeStorage(OpCode): """Repairs the volume group on a node.""" OP_DSC_FIELD = "node_name" OP_PARAMS = [ _PNodeName, _PNodeUuid, _PStorageType, _PStorageName, _PIgnoreConsistency, ] OP_RESULT = ht.TNone class OpNodeSetParams(OpCode): """Change the parameters of a node.""" OP_DSC_FIELD = "node_name" OP_PARAMS = [ _PNodeName, _PNodeUuid, _PForce, _PHvState, _PDiskState, ("master_candidate", None, ht.TMaybeBool, "Whether the node should become a master candidate"), ("offline", None, ht.TMaybeBool, "Whether the node should be marked as offline"), ("drained", None, ht.TMaybeBool, "Whether the node should be marked as drained"), ("auto_promote", False, ht.TBool, "Whether node(s) should be promoted to master candidate if necessary"), ("master_capable", None, ht.TMaybeBool, "Denote whether node can become master or master candidate"), ("vm_capable", None, ht.TMaybeBool, "Denote whether node can host instances"), ("secondary_ip", None, ht.TMaybeString, "Change node's secondary IP address"), ("ndparams", None, ht.TMaybeDict, "Set node parameters"), ("powered", None, ht.TMaybeBool, "Whether the node should be marked as powered"), ] OP_RESULT = _TSetParamsResult class OpNodePowercycle(OpCode): """Tries to powercycle a node.""" OP_DSC_FIELD = "node_name" OP_PARAMS = [ _PNodeName, _PNodeUuid, _PForce, ] OP_RESULT = ht.TMaybeString class OpNodeMigrate(OpCode): """Migrate all instances from a node.""" OP_DSC_FIELD = "node_name" OP_PARAMS = [ _PNodeName, _PNodeUuid, _PMigrationMode, _PMigrationLive, _PMigrationTargetNode, _PMigrationTargetNodeUuid, _PAllowRuntimeChgs, _PIgnoreIpolicy, _PIAllocFromDesc("Iallocator for deciding the target node" " for shared-storage instances"), ] OP_RESULT = TJobIdListOnly class OpNodeEvacuate(OpCode): """Evacuate instances off a number of nodes.""" OP_DSC_FIELD = "node_name" OP_PARAMS = [ _PEarlyRelease, _PNodeName, _PNodeUuid, ("remote_node", None, ht.TMaybeString, "New secondary node"), ("remote_node_uuid", None, ht.TMaybeString, "New secondary node UUID"), _PIAllocFromDesc("Iallocator for computing solution"), ("mode", ht.NoDefault, ht.TElemOf(constants.NODE_EVAC_MODES), "Node evacuation mode"), ] OP_RESULT = TJobIdListOnly # instance opcodes class OpInstanceCreate(OpCode): """Create an instance. @ivar instance_name: Instance name @ivar mode: Instance creation mode (one of L{constants.INSTANCE_CREATE_MODES}) @ivar source_handshake: Signed handshake from source (remote import only) @ivar source_x509_ca: Source X509 CA in PEM format (remote import only) @ivar source_instance_name: Previous name of instance (remote import only) @ivar source_shutdown_timeout: Shutdown timeout used for source instance (remote import only) """ OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PForceVariant, _PWaitForSync, _PNameCheck, _PIgnoreIpolicy, _POpportunisticLocking, ("beparams", ht.EmptyDict, ht.TDict, "Backend parameters for instance"), ("disks", ht.NoDefault, ht.TListOf(_TDiskParams), "Disk descriptions, for example ``[{\"%s\": 100}, {\"%s\": 5}]``;" " each disk definition must contain a ``%s`` value and" " can contain an optional ``%s`` value denoting the disk access mode" " (%s)" % (constants.IDISK_SIZE, constants.IDISK_SIZE, constants.IDISK_SIZE, constants.IDISK_MODE, " or ".join("``%s``" % i for i in sorted(constants.DISK_ACCESS_SET)))), ("disk_template", ht.NoDefault, _BuildDiskTemplateCheck(True), "Disk template"), ("file_driver", None, ht.TMaybe(ht.TElemOf(constants.FILE_DRIVER)), "Driver for file-backed disks"), ("file_storage_dir", None, ht.TMaybeString, "Directory for storing file-backed disks"), ("hvparams", ht.EmptyDict, ht.TDict, "Hypervisor parameters for instance, hypervisor-dependent"), ("hypervisor", None, ht.TMaybeString, "Hypervisor"), _PIAllocFromDesc("Iallocator for deciding which node(s) to use"), ("identify_defaults", False, ht.TBool, "Reset instance parameters to default if equal"), ("ip_check", True, ht.TBool, _PIpCheckDoc), ("conflicts_check", True, ht.TBool, "Check for conflicting IPs"), ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES), "Instance creation mode"), ("nics", ht.NoDefault, ht.TListOf(_TestNicDef), "List of NIC (network interface) definitions, for example" " ``[{}, {}, {\"%s\": \"198.51.100.4\"}]``; each NIC definition can" " contain the optional values %s" % (constants.INIC_IP, ", ".join("``%s``" % i for i in sorted(constants.INIC_PARAMS)))), ("no_install", None, ht.TMaybeBool, "Do not install the OS (will disable automatic start)"), ("osparams", ht.EmptyDict, ht.TDict, "OS parameters for instance"), ("os_type", None, ht.TMaybeString, "Operating system"), ("pnode", None, ht.TMaybeString, "Primary node"), ("pnode_uuid", None, ht.TMaybeString, "Primary node UUID"), ("snode", None, ht.TMaybeString, "Secondary node"), ("snode_uuid", None, ht.TMaybeString, "Secondary node UUID"), ("source_handshake", None, ht.TMaybe(ht.TList), "Signed handshake from source (remote import only)"), ("source_instance_name", None, ht.TMaybeString, "Source instance name (remote import only)"), ("source_shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TNonNegativeInt, "How long source instance was given to shut down (remote import only)"), ("source_x509_ca", None, ht.TMaybeString, "Source X509 CA in PEM format (remote import only)"), ("src_node", None, ht.TMaybeString, "Source node for import"), ("src_node_uuid", None, ht.TMaybeString, "Source node UUID for import"), ("src_path", None, ht.TMaybeString, "Source directory for import"), ("start", True, ht.TBool, "Whether to start instance after creation"), ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), "Instance tags"), ] OP_RESULT = ht.Comment("instance nodes")(ht.TListOf(ht.TNonEmptyString)) class OpInstanceMultiAlloc(OpCode): """Allocates multiple instances. """ OP_PARAMS = [ _POpportunisticLocking, _PIAllocFromDesc("Iallocator used to allocate all the instances"), ("instances", ht.EmptyList, ht.TListOf(ht.TInstanceOf(OpInstanceCreate)), "List of instance create opcodes describing the instances to allocate"), ] _JOB_LIST = ht.Comment("List of submitted jobs")(TJobIdList) ALLOCATABLE_KEY = "allocatable" FAILED_KEY = "allocatable" OP_RESULT = ht.TStrictDict(True, True, { constants.JOB_IDS_KEY: _JOB_LIST, ALLOCATABLE_KEY: ht.TListOf(ht.TNonEmptyString), FAILED_KEY: ht.TListOf(ht.TNonEmptyString), }) def __getstate__(self): """Generic serializer. """ state = OpCode.__getstate__(self) if hasattr(self, "instances"): # pylint: disable=E1101 state["instances"] = [inst.__getstate__() for inst in self.instances] return state def __setstate__(self, state): """Generic unserializer. This method just restores from the serialized state the attributes of the current instance. @param state: the serialized opcode data @type state: C{dict} """ if not isinstance(state, dict): raise ValueError("Invalid data to __setstate__: expected dict, got %s" % type(state)) if "instances" in state: state["instances"] = map(OpCode.LoadOpCode, state["instances"]) return OpCode.__setstate__(self, state) def Validate(self, set_defaults): """Validates this opcode. We do this recursively. """ OpCode.Validate(self, set_defaults) for inst in self.instances: # pylint: disable=E1101 inst.Validate(set_defaults) class OpInstanceReinstall(OpCode): """Reinstall an instance's OS.""" OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, _PForceVariant, ("os_type", None, ht.TMaybeString, "Instance operating system"), ("osparams", None, ht.TMaybeDict, "Temporary OS parameters"), ] OP_RESULT = ht.TNone class OpInstanceRemove(OpCode): """Remove an instance.""" OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, _PShutdownTimeout, ("ignore_failures", False, ht.TBool, "Whether to ignore failures during removal"), ] OP_RESULT = ht.TNone class OpInstanceRename(OpCode): """Rename an instance.""" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, _PNameCheck, ("new_name", ht.NoDefault, ht.TNonEmptyString, "New instance name"), ("ip_check", False, ht.TBool, _PIpCheckDoc), ] OP_RESULT = ht.Comment("New instance name")(ht.TNonEmptyString) class OpInstanceStartup(OpCode): """Startup an instance.""" OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, _PForce, _PIgnoreOfflineNodes, ("hvparams", ht.EmptyDict, ht.TDict, "Temporary hypervisor parameters, hypervisor-dependent"), ("beparams", ht.EmptyDict, ht.TDict, "Temporary backend parameters"), _PNoRemember, _PStartupPaused, ] OP_RESULT = ht.TNone class OpInstanceShutdown(OpCode): """Shutdown an instance.""" OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, _PForce, _PIgnoreOfflineNodes, ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TNonNegativeInt, "How long to wait for instance to shut down"), _PNoRemember, ] OP_RESULT = ht.TNone class OpInstanceReboot(OpCode): """Reboot an instance.""" OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, _PShutdownTimeout, ("ignore_secondaries", False, ht.TBool, "Whether to start the instance even if secondary disks are failing"), ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES), "How to reboot instance"), ] OP_RESULT = ht.TNone class OpInstanceReplaceDisks(OpCode): """Replace the disks of an instance.""" OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, _PEarlyRelease, _PIgnoreIpolicy, ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES), "Replacement mode"), ("disks", ht.EmptyList, ht.TListOf(ht.TNonNegativeInt), "Disk indexes"), ("remote_node", None, ht.TMaybeString, "New secondary node"), ("remote_node_uuid", None, ht.TMaybeString, "New secondary node UUID"), _PIAllocFromDesc("Iallocator for deciding new secondary node"), ] OP_RESULT = ht.TNone class OpInstanceFailover(OpCode): """Failover an instance.""" OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, _PShutdownTimeout, _PIgnoreConsistency, _PMigrationTargetNode, _PMigrationTargetNodeUuid, _PIgnoreIpolicy, _PIAllocFromDesc("Iallocator for deciding the target node for" " shared-storage instances"), ("cleanup", False, ht.TBool, "Whether a previously failed failover should be cleaned up"), ] OP_RESULT = ht.TNone class OpInstanceMigrate(OpCode): """Migrate an instance. This migrates (without shutting down an instance) to its secondary node. @ivar instance_name: the name of the instance @ivar mode: the migration mode (live, non-live or None for auto) """ OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, _PMigrationMode, _PMigrationLive, _PMigrationTargetNode, _PMigrationTargetNodeUuid, _PAllowRuntimeChgs, _PIgnoreIpolicy, ("cleanup", False, ht.TBool, "Whether a previously failed migration should be cleaned up"), _PIAllocFromDesc("Iallocator for deciding the target node for" " shared-storage instances"), ("allow_failover", False, ht.TBool, "Whether we can fallback to failover if migration is not possible"), ] OP_RESULT = ht.TNone class OpInstanceMove(OpCode): """Move an instance. This move (with shutting down an instance and data copying) to an arbitrary node. @ivar instance_name: the name of the instance @ivar target_node: the destination node """ OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, _PShutdownTimeout, _PIgnoreIpolicy, ("target_node", ht.NoDefault, ht.TNonEmptyString, "Target node"), ("target_node_uuid", None, ht.TMaybeString, "Target node UUID"), _PIgnoreConsistency, ] OP_RESULT = ht.TNone class OpInstanceConsole(OpCode): """Connect to an instance's console.""" OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, ] OP_RESULT = ht.TDict class OpInstanceActivateDisks(OpCode): """Activate an instance's disks.""" OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, ("ignore_size", False, ht.TBool, "Whether to ignore recorded size"), _PWaitForSyncFalse, ] OP_RESULT = ht.TListOf(ht.TAnd(ht.TIsLength(3), ht.TItems([ht.TNonEmptyString, ht.TNonEmptyString, ht.TNonEmptyString]))) class OpInstanceDeactivateDisks(OpCode): """Deactivate an instance's disks.""" OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, _PForce, ] OP_RESULT = ht.TNone class OpInstanceRecreateDisks(OpCode): """Recreate an instance's disks.""" _TDiskChanges = \ ht.TAnd(ht.TIsLength(2), ht.TItems([ht.Comment("Disk index")(ht.TNonNegativeInt), ht.Comment("Parameters")(_TDiskParams)])) OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, ("disks", ht.EmptyList, ht.TOr(ht.TListOf(ht.TNonNegativeInt), ht.TListOf(_TDiskChanges)), "List of disk indexes (deprecated) or a list of tuples containing a disk" " index and a possibly empty dictionary with disk parameter changes"), ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), "New instance nodes, if relocation is desired"), ("node_uuids", None, ht.TMaybeListOf(ht.TNonEmptyString), "New instance node UUIDs, if relocation is desired"), _PIAllocFromDesc("Iallocator for deciding new nodes"), ] OP_RESULT = ht.TNone class OpInstanceQuery(OpCode): """Compute the list of instances.""" OP_PARAMS = [ _POutputFields, _PUseLocking, ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), "Empty list to query all instances, instance names otherwise"), ] OP_RESULT = _TOldQueryResult class OpInstanceQueryData(OpCode): """Compute the run-time status of instances.""" OP_PARAMS = [ _PUseLocking, ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), "Instance names"), ("static", False, ht.TBool, "Whether to only return configuration data without querying" " nodes"), ] OP_RESULT = ht.TDictOf(ht.TNonEmptyString, ht.TDict) def _TestInstSetParamsModList(fn): """Generates a check for modification lists. """ # Old format # TODO: Remove in version 2.8 including support in LUInstanceSetParams old_mod_item_fn = \ ht.TAnd(ht.TIsLength(2), ht.TItems([ ht.TOr(ht.TElemOf(constants.DDMS_VALUES), ht.TNonNegativeInt), fn, ])) # New format, supporting adding/removing disks/NICs at arbitrary indices mod_item_fn = \ ht.TAnd(ht.TIsLength(3), ht.TItems([ ht.TElemOf(constants.DDMS_VALUES_WITH_MODIFY), ht.Comment("Device index, can be negative, e.g. -1 for last disk") (ht.TOr(ht.TInt, ht.TString)), fn, ])) return ht.TOr(ht.Comment("Recommended")(ht.TListOf(mod_item_fn)), ht.Comment("Deprecated")(ht.TListOf(old_mod_item_fn))) class OpInstanceSetParams(OpCode): """Change the parameters of an instance. """ TestNicModifications = _TestInstSetParamsModList(_TestNicDef) TestDiskModifications = _TestInstSetParamsModList(_TDiskParams) OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, _PForce, _PForceVariant, _PIgnoreIpolicy, ("nics", ht.EmptyList, TestNicModifications, "List of NIC changes: each item is of the form" " ``(op, identifier, settings)``, ``op`` is one of ``%s``, ``%s`` or" " ``%s``, ``identifier`` can be a zero-based index number (or -1 to refer" " to the last position), the NIC's UUID of the NIC's name; a" " deprecated version of this parameter used the form ``(op, settings)``," " where ``op`` can be ``%s`` to add a new NIC with the specified" " settings, ``%s`` to remove the last NIC or a number to modify the" " settings of the NIC with that index" % (constants.DDM_ADD, constants.DDM_MODIFY, constants.DDM_REMOVE, constants.DDM_ADD, constants.DDM_REMOVE)), ("disks", ht.EmptyList, TestDiskModifications, "List of disk changes; see ``nics``"), ("beparams", ht.EmptyDict, ht.TDict, "Per-instance backend parameters"), ("runtime_mem", None, ht.TMaybePositiveInt, "New runtime memory"), ("hvparams", ht.EmptyDict, ht.TDict, "Per-instance hypervisor parameters, hypervisor-dependent"), ("disk_template", None, ht.TMaybe(_BuildDiskTemplateCheck(False)), "Disk template for instance"), ("pnode", None, ht.TMaybeString, "New primary node"), ("pnode_uuid", None, ht.TMaybeString, "New primary node UUID"), ("remote_node", None, ht.TMaybeString, "Secondary node (used when changing disk template)"), ("remote_node_uuid", None, ht.TMaybeString, "Secondary node UUID (used when changing disk template)"), ("os_name", None, ht.TMaybeString, "Change the instance's OS without reinstalling the instance"), ("osparams", None, ht.TMaybeDict, "Per-instance OS parameters"), ("wait_for_sync", True, ht.TBool, "Whether to wait for the disk to synchronize, when changing template"), ("offline", None, ht.TMaybeBool, "Whether to mark instance as offline"), ("conflicts_check", True, ht.TBool, "Check for conflicting IPs"), ] OP_RESULT = _TSetParamsResult class OpInstanceGrowDisk(OpCode): """Grow a disk of an instance.""" OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, _PWaitForSync, ("disk", ht.NoDefault, ht.TInt, "Disk index"), ("amount", ht.NoDefault, ht.TNonNegativeInt, "Amount of disk space to add (megabytes)"), ("absolute", False, ht.TBool, "Whether the amount parameter is an absolute target or a relative one"), ] OP_RESULT = ht.TNone class OpInstanceChangeGroup(OpCode): """Moves an instance to another node group.""" OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, _PEarlyRelease, _PIAllocFromDesc("Iallocator for computing solution"), _PTargetGroups, ] OP_RESULT = TJobIdListOnly # Node group opcodes class OpGroupAdd(OpCode): """Add a node group to the cluster.""" OP_DSC_FIELD = "group_name" OP_PARAMS = [ _PGroupName, _PNodeGroupAllocPolicy, _PGroupNodeParams, _PDiskParams, _PHvState, _PDiskState, ("ipolicy", None, ht.TMaybeDict, "Group-wide :ref:`instance policy ` specs"), ] OP_RESULT = ht.TNone class OpGroupAssignNodes(OpCode): """Assign nodes to a node group.""" OP_DSC_FIELD = "group_name" OP_PARAMS = [ _PGroupName, _PForce, ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString), "List of nodes to assign"), ("node_uuids", None, ht.TMaybeListOf(ht.TNonEmptyString), "List of node UUIDs to assign"), ] OP_RESULT = ht.TNone class OpGroupQuery(OpCode): """Compute the list of node groups.""" OP_PARAMS = [ _POutputFields, ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), "Empty list to query all groups, group names otherwise"), ] OP_RESULT = _TOldQueryResult class OpGroupSetParams(OpCode): """Change the parameters of a node group.""" OP_DSC_FIELD = "group_name" OP_PARAMS = [ _PGroupName, _PNodeGroupAllocPolicy, _PGroupNodeParams, _PDiskParams, _PHvState, _PDiskState, ("ipolicy", None, ht.TMaybeDict, "Group-wide instance policy specs"), ] OP_RESULT = _TSetParamsResult class OpGroupRemove(OpCode): """Remove a node group from the cluster.""" OP_DSC_FIELD = "group_name" OP_PARAMS = [ _PGroupName, ] OP_RESULT = ht.TNone class OpGroupRename(OpCode): """Rename a node group in the cluster.""" OP_PARAMS = [ _PGroupName, ("new_name", ht.NoDefault, ht.TNonEmptyString, "New group name"), ] OP_RESULT = ht.Comment("New group name")(ht.TNonEmptyString) class OpGroupEvacuate(OpCode): """Evacuate a node group in the cluster.""" OP_DSC_FIELD = "group_name" OP_PARAMS = [ _PGroupName, _PEarlyRelease, _PIAllocFromDesc("Iallocator for computing solution"), _PTargetGroups, ] OP_RESULT = TJobIdListOnly # OS opcodes class OpOsDiagnose(OpCode): """Compute the list of guest operating systems.""" OP_PARAMS = [ _POutputFields, ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), "Which operating systems to diagnose"), ] OP_RESULT = _TOldQueryResult # ExtStorage opcodes class OpExtStorageDiagnose(OpCode): """Compute the list of external storage providers.""" OP_PARAMS = [ _POutputFields, ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), "Which ExtStorage Provider to diagnose"), ] OP_RESULT = _TOldQueryResult # Exports opcodes class OpBackupQuery(OpCode): """Compute the list of exported images.""" OP_PARAMS = [ _PUseLocking, ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), "Empty list to query all nodes, node names otherwise"), ] OP_RESULT = ht.TDictOf(ht.TNonEmptyString, ht.TOr(ht.Comment("False on error")(ht.TBool), ht.TListOf(ht.TNonEmptyString))) class OpBackupPrepare(OpCode): """Prepares an instance export. @ivar instance_name: Instance name @ivar mode: Export mode (one of L{constants.EXPORT_MODES}) """ OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES), "Export mode"), ] OP_RESULT = ht.TMaybeDict class OpBackupExport(OpCode): """Export an instance. For local exports, the export destination is the node name. For remote exports, the export destination is a list of tuples, each consisting of hostname/IP address, port, magic, HMAC and HMAC salt. The HMAC is calculated using the cluster domain secret over the value "${index}:${hostname}:${port}". The destination X509 CA must be a signed certificate. @ivar mode: Export mode (one of L{constants.EXPORT_MODES}) @ivar target_node: Export destination @ivar x509_key_name: X509 key to use (remote export only) @ivar destination_x509_ca: Destination X509 CA in PEM format (remote export only) """ OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, _PShutdownTimeout, # TODO: Rename target_node as it changes meaning for different export modes # (e.g. "destination") ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList), "Destination information, depends on export mode"), ("target_node_uuid", None, ht.TMaybeString, "Target node UUID (if local export)"), ("shutdown", True, ht.TBool, "Whether to shutdown instance before export"), ("remove_instance", False, ht.TBool, "Whether to remove instance after export"), ("ignore_remove_failures", False, ht.TBool, "Whether to ignore failures while removing instances"), ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES), "Export mode"), ("x509_key_name", None, ht.TMaybe(ht.TList), "Name of X509 key (remote export only)"), ("destination_x509_ca", None, ht.TMaybeString, "Destination X509 CA (remote export only)"), ] OP_RESULT = \ ht.TAnd(ht.TIsLength(2), ht.TItems([ ht.Comment("Finalizing status")(ht.TBool), ht.Comment("Status for every exported disk")(ht.TListOf(ht.TBool)), ])) class OpBackupRemove(OpCode): """Remove an instance's export.""" OP_DSC_FIELD = "instance_name" OP_PARAMS = [ _PInstanceName, _PInstanceUuid, ] OP_RESULT = ht.TNone # Tags opcodes class OpTagsGet(OpCode): """Returns the tags of the given object.""" OP_DSC_FIELD = "name" OP_PARAMS = [ _PTagKind, # Not using _PUseLocking as the default is different for historical reasons ("use_locking", True, ht.TBool, "Whether to use synchronization"), # Name is only meaningful for nodes and instances ("name", ht.NoDefault, ht.TMaybeString, "Name of object to retrieve tags from"), ] OP_RESULT = ht.TListOf(ht.TNonEmptyString) class OpTagsSearch(OpCode): """Searches the tags in the cluster for a given pattern.""" OP_DSC_FIELD = "pattern" OP_PARAMS = [ ("pattern", ht.NoDefault, ht.TNonEmptyString, "Search pattern (regular expression)"), ] OP_RESULT = ht.TListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([ ht.TNonEmptyString, ht.TNonEmptyString, ]))) class OpTagsSet(OpCode): """Add a list of tags on a given object.""" OP_PARAMS = [ _PTagKind, _PTags, # Name is only meaningful for groups, nodes and instances ("name", ht.NoDefault, ht.TMaybeString, "Name of object where tag(s) should be added"), ] OP_RESULT = ht.TNone class OpTagsDel(OpCode): """Remove a list of tags from a given object.""" OP_PARAMS = [ _PTagKind, _PTags, # Name is only meaningful for groups, nodes and instances ("name", ht.NoDefault, ht.TMaybeString, "Name of object where tag(s) should be deleted"), ] OP_RESULT = ht.TNone # Test opcodes class OpTestDelay(OpCode): """Sleeps for a configured amount of time. This is used just for debugging and testing. Parameters: - duration: the time to sleep, in seconds - on_master: if true, sleep on the master - on_nodes: list of nodes in which to sleep If the on_master parameter is true, it will execute a sleep on the master (before any node sleep). If the on_nodes list is not empty, it will sleep on those nodes (after the sleep on the master, if that is enabled). As an additional feature, the case of duration < 0 will be reported as an execution error, so this opcode can be used as a failure generator. The case of duration == 0 will not be treated specially. """ OP_DSC_FIELD = "duration" OP_PARAMS = [ ("duration", ht.NoDefault, ht.TNumber, None), ("on_master", True, ht.TBool, None), ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), None), ("on_node_uuids", None, ht.TMaybeListOf(ht.TNonEmptyString), None), ("repeat", 0, ht.TNonNegativeInt, None), ] def OP_DSC_FORMATTER(self, value): # pylint: disable=C0103,R0201 """Custom formatter for duration. """ try: v = float(value) except TypeError: v = value return str(v) class OpTestAllocator(OpCode): """Allocator framework testing. This opcode has two modes: - gather and return allocator input for a given mode (allocate new or replace secondary) and a given instance definition (direction 'in') - run a selected allocator for a given operation (as above) and return the allocator output (direction 'out') """ OP_DSC_FIELD = "iallocator" OP_PARAMS = [ ("direction", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS), None), ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES), None), ("name", ht.NoDefault, ht.TNonEmptyString, None), ("nics", ht.NoDefault, ht.TMaybeListOf(ht.TDictOf(ht.TElemOf([constants.INIC_MAC, constants.INIC_IP, "bridge"]), ht.TMaybeString)), None), ("disks", ht.NoDefault, ht.TMaybe(ht.TList), None), ("hypervisor", None, ht.TMaybeString, None), _PIAllocFromDesc(None), ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), None), ("memory", None, ht.TMaybe(ht.TNonNegativeInt), None), ("vcpus", None, ht.TMaybe(ht.TNonNegativeInt), None), ("os", None, ht.TMaybeString, None), ("disk_template", None, ht.TMaybeString, None), ("instances", None, ht.TMaybeListOf(ht.TNonEmptyString), None), ("evac_mode", None, ht.TMaybe(ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)), None), ("target_groups", None, ht.TMaybeListOf(ht.TNonEmptyString), None), ("spindle_use", 1, ht.TNonNegativeInt, None), ("count", 1, ht.TNonNegativeInt, None), ] class OpTestJqueue(OpCode): """Utility opcode to test some aspects of the job queue. """ OP_PARAMS = [ ("notify_waitlock", False, ht.TBool, None), ("notify_exec", False, ht.TBool, None), ("log_messages", ht.EmptyList, ht.TListOf(ht.TString), None), ("fail", False, ht.TBool, None), ] class OpTestDummy(OpCode): """Utility opcode used by unittests. """ OP_PARAMS = [ ("result", ht.NoDefault, ht.NoType, None), ("messages", ht.NoDefault, ht.NoType, None), ("fail", ht.NoDefault, ht.NoType, None), ("submit_jobs", None, ht.NoType, None), ] WITH_LU = False # Network opcodes # Add a new network in the cluster class OpNetworkAdd(OpCode): """Add an IP network to the cluster.""" OP_DSC_FIELD = "network_name" OP_PARAMS = [ _PNetworkName, ("network", ht.NoDefault, _TIpNetwork4, "IPv4 subnet"), ("gateway", None, ht.TMaybe(_TIpAddress4), "IPv4 gateway"), ("network6", None, ht.TMaybe(_TIpNetwork6), "IPv6 subnet"), ("gateway6", None, ht.TMaybe(_TIpAddress6), "IPv6 gateway"), ("mac_prefix", None, ht.TMaybeString, "MAC address prefix that overrides cluster one"), ("add_reserved_ips", None, _TMaybeAddr4List, "Which IP addresses to reserve"), ("conflicts_check", True, ht.TBool, "Whether to check for conflicting IP addresses"), ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), "Network tags"), ] OP_RESULT = ht.TNone class OpNetworkRemove(OpCode): """Remove an existing network from the cluster. Must not be connected to any nodegroup. """ OP_DSC_FIELD = "network_name" OP_PARAMS = [ _PNetworkName, _PForce, ] OP_RESULT = ht.TNone class OpNetworkSetParams(OpCode): """Modify Network's parameters except for IPv4 subnet""" OP_DSC_FIELD = "network_name" OP_PARAMS = [ _PNetworkName, ("gateway", None, ht.TMaybeValueNone(_TIpAddress4), "IPv4 gateway"), ("network6", None, ht.TMaybeValueNone(_TIpNetwork6), "IPv6 subnet"), ("gateway6", None, ht.TMaybeValueNone(_TIpAddress6), "IPv6 gateway"), ("mac_prefix", None, ht.TMaybeValueNone(ht.TString), "MAC address prefix that overrides cluster one"), ("add_reserved_ips", None, _TMaybeAddr4List, "Which external IP addresses to reserve"), ("remove_reserved_ips", None, _TMaybeAddr4List, "Which external IP addresses to release"), ] OP_RESULT = ht.TNone class OpNetworkConnect(OpCode): """Connect a Network to a specific Nodegroup with the defined netparams (mode, link). Nics in this Network will inherit those params. Produce errors if a NIC (that its not already assigned to a network) has an IP that is contained in the Network this will produce error unless --no-conflicts-check is passed. """ OP_DSC_FIELD = "network_name" OP_PARAMS = [ _PGroupName, _PNetworkName, ("network_mode", ht.NoDefault, ht.TElemOf(constants.NIC_VALID_MODES), "Connectivity mode"), ("network_link", ht.NoDefault, ht.TString, "Connectivity link"), ("conflicts_check", True, ht.TBool, "Whether to check for conflicting IPs"), ] OP_RESULT = ht.TNone class OpNetworkDisconnect(OpCode): """Disconnect a Network from a Nodegroup. Produce errors if NICs are present in the Network unless --no-conficts-check option is passed. """ OP_DSC_FIELD = "network_name" OP_PARAMS = [ _PGroupName, _PNetworkName, ] OP_RESULT = ht.TNone class OpNetworkQuery(OpCode): """Compute the list of networks.""" OP_PARAMS = [ _POutputFields, _PUseLocking, ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString), "Empty list to query all groups, group names otherwise"), ] OP_RESULT = _TOldQueryResult def _GetOpList(): """Returns list of all defined opcodes. Does not eliminate duplicates by C{OP_ID}. """ return [v for v in globals().values() if (isinstance(v, type) and issubclass(v, OpCode) and hasattr(v, "OP_ID") and v is not OpCode)] OP_MAPPING = dict((v.OP_ID, v) for v in _GetOpList()) ganeti-2.9.3/lib/vcluster.py0000644000000000000000000001657312271422343015775 0ustar00rootroot00000000000000# # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module containing utilities for virtual clusters. Most functions manipulate file system paths and are no-ops when the environment variables C{GANETI_ROOTDIR} and C{GANETI_HOSTNAME} are not set. See the functions' docstrings for details. """ import os from ganeti import compat ETC_HOSTS = "/etc/hosts" _VIRT_PATH_PREFIX = "/###-VIRTUAL-PATH-###," _ROOTDIR_ENVNAME = "GANETI_ROOTDIR" _HOSTNAME_ENVNAME = "GANETI_HOSTNAME" #: List of paths which shouldn't be virtualized _VPATH_WHITELIST = compat.UniqueFrozenset([ ETC_HOSTS, ]) def _GetRootDirectory(envname): """Retrieves root directory from an environment variable. @type envname: string @param envname: Environment variable name @rtype: string @return: Root directory (can be empty) """ path = os.getenv(envname) if path: if not os.path.isabs(path): raise RuntimeError("Root directory in '%s' must be absolute: %s" % (envname, path)) return os.path.normpath(path) return "" def _GetHostname(envname): """Retrieves virtual hostname from an environment variable. @type envname: string @param envname: Environment variable name @rtype: string @return: Host name (can be empty) """ return os.getenv(envname, default="") def _CheckHostname(hostname): """Very basic check for hostnames. @type hostname: string @param hostname: Hostname """ if os.path.basename(hostname) != hostname: raise RuntimeError("Hostname '%s' can not be used for a file system" " path" % hostname) def _PreparePaths(rootdir, hostname): """Checks if the root directory and hostname are acceptable. The (node-specific) root directory must have the hostname as its last component. The parent directory then becomes the cluster-wide root directory. This is necessary as some components must be able to predict the root path on a remote node (e.g. copying files via scp). @type rootdir: string @param rootdir: Root directory (from environment) @type hostname: string @param hostname: Hostname (from environment) @rtype: tuple; (string, string, string or None) @return: Tuple containing cluster-global root directory, node root directory and virtual hostname """ if bool(rootdir) ^ bool(hostname): raise RuntimeError("Both root directory and hostname must be specified" " using the environment variables %s and %s" % (_ROOTDIR_ENVNAME, _HOSTNAME_ENVNAME)) if rootdir: assert rootdir == os.path.normpath(rootdir) _CheckHostname(hostname) if os.path.basename(rootdir) != hostname: raise RuntimeError("Last component of root directory ('%s') must match" " hostname ('%s')" % (rootdir, hostname)) return (os.path.dirname(rootdir), rootdir, hostname) else: return ("", "", None) (_VIRT_BASEDIR, _VIRT_NODEROOT, _VIRT_HOSTNAME) = \ _PreparePaths(_GetRootDirectory(_ROOTDIR_ENVNAME), _GetHostname(_HOSTNAME_ENVNAME)) assert (compat.all([_VIRT_BASEDIR, _VIRT_NODEROOT, _VIRT_HOSTNAME]) or not compat.any([_VIRT_BASEDIR, _VIRT_NODEROOT, _VIRT_HOSTNAME])) def GetVirtualHostname(): """Returns the virtual hostname. @rtype: string or L{None} """ return _VIRT_HOSTNAME def MakeNodeRoot(base, node_name): """Appends a node name to the base directory. """ _CheckHostname(node_name) return os.path.normpath("%s/%s" % (base, node_name)) def ExchangeNodeRoot(node_name, filename, _basedir=_VIRT_BASEDIR, _noderoot=_VIRT_NODEROOT): """Replaces the node-specific root directory in a path. Replaces it with the root directory for another node. Assuming C{/tmp/vcluster/node1} is the root directory for C{node1}, the result will be C{/tmp/vcluster/node3} for C{node3} (as long as a root directory is specified in the environment). """ if _basedir: pure = _RemoveNodePrefix(filename, _noderoot=_noderoot) result = "%s/%s" % (MakeNodeRoot(_basedir, node_name), pure) else: result = filename return os.path.normpath(result) def EnvironmentForHost(hostname, _basedir=_VIRT_BASEDIR): """Returns the environment variables for a host. """ if _basedir: return { _ROOTDIR_ENVNAME: MakeNodeRoot(_basedir, hostname), _HOSTNAME_ENVNAME: hostname, } else: return {} def AddNodePrefix(path, _noderoot=_VIRT_NODEROOT): """Adds a node-specific prefix to a path in a virtual cluster. Returned path includes user-specified root directory if specified in environment. As an example, the path C{/var/lib/ganeti} becomes C{/tmp/vcluster/node1/var/lib/ganeti} if C{/tmp/vcluster/node1} is the root directory specified in the environment. """ assert os.path.isabs(path) if _noderoot: result = "%s/%s" % (_noderoot, path) else: result = path assert os.path.isabs(result) return os.path.normpath(result) def _RemoveNodePrefix(path, _noderoot=_VIRT_NODEROOT): """Removes the node-specific prefix from a path. This is the opposite of L{AddNodePrefix} and removes a node-local prefix path. """ assert os.path.isabs(path) norm_path = os.path.normpath(path) if _noderoot: # Make sure path is actually below node root norm_root = os.path.normpath(_noderoot) root_with_sep = "%s%s" % (norm_root, os.sep) prefix = os.path.commonprefix([root_with_sep, norm_path]) if prefix == root_with_sep: result = norm_path[len(norm_root):] else: raise RuntimeError("Path '%s' is not below node root '%s'" % (path, _noderoot)) else: result = norm_path assert os.path.isabs(result) return result def MakeVirtualPath(path, _noderoot=_VIRT_NODEROOT): """Virtualizes a path. A path is "virtualized" by stripping it of its node-specific directory and prepending a prefix (L{_VIRT_PATH_PREFIX}). Use L{LocalizeVirtualPath} to undo the process. Virtual paths are meant to be transported via RPC. """ assert os.path.isabs(path) if _noderoot and path not in _VPATH_WHITELIST: return _VIRT_PATH_PREFIX + _RemoveNodePrefix(path, _noderoot=_noderoot) else: return path def LocalizeVirtualPath(path, _noderoot=_VIRT_NODEROOT): """Localizes a virtual path. A "virtualized" path consists of a prefix (L{LocalizeVirtualPath}) and a local path. This function adds the node-specific directory to the local path. Virtual paths are meant to be transported via RPC. """ assert os.path.isabs(path) if _noderoot and path not in _VPATH_WHITELIST: if path.startswith(_VIRT_PATH_PREFIX): return AddNodePrefix(path[len(_VIRT_PATH_PREFIX):], _noderoot=_noderoot) else: raise RuntimeError("Path '%s' is not a virtual path" % path) else: return path ganeti-2.9.3/lib/utils/0000755000000000000000000000000012271445544014710 5ustar00rootroot00000000000000ganeti-2.9.3/lib/utils/hash.py0000644000000000000000000000470612230001635016174 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utility functions for hashing. """ import os import hmac from ganeti import compat def Sha1Hmac(key, text, salt=None): """Calculates the HMAC-SHA1 digest of a text. HMAC is defined in RFC2104. @type key: string @param key: Secret key @type text: string """ if salt: salted_text = salt + text else: salted_text = text return hmac.new(key, salted_text, compat.sha1).hexdigest() def VerifySha1Hmac(key, text, digest, salt=None): """Verifies the HMAC-SHA1 digest of a text. HMAC is defined in RFC2104. @type key: string @param key: Secret key @type text: string @type digest: string @param digest: Expected digest @rtype: bool @return: Whether HMAC-SHA1 digest matches """ return digest.lower() == Sha1Hmac(key, text, salt=salt).lower() def _FingerprintFile(filename): """Compute the fingerprint of a file. If the file does not exist, a None will be returned instead. @type filename: str @param filename: the filename to checksum @rtype: str @return: the hex digest of the sha checksum of the contents of the file """ if not (os.path.exists(filename) and os.path.isfile(filename)): return None f = open(filename) fp = compat.sha1_hash() while True: data = f.read(4096) if not data: break fp.update(data) return fp.hexdigest() def FingerprintFiles(files): """Compute fingerprints for a list of files. @type files: list @param files: the list of filename to fingerprint @rtype: dict @return: a dictionary filename: fingerprint, holding only existing files """ ret = {} for filename in files: cksum = _FingerprintFile(filename) if cksum: ret[filename] = cksum return ret ganeti-2.9.3/lib/utils/text.py0000644000000000000000000004207412271422343016245 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utility functions for manipulating or working with text. """ import re import os import time import collections from ganeti import errors #: Unit checker regexp _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$") #: Characters which don't need to be quoted for shell commands _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$") #: Shell param checker regexp _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$") #: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026) _ASCII_ELLIPSIS = "..." #: MAC address octet _MAC_ADDR_OCTET_RE = r"[0-9a-f]{2}" def MatchNameComponent(key, name_list, case_sensitive=True): """Try to match a name against a list. This function will try to match a name like test1 against a list like C{['test1.example.com', 'test2.example.com', ...]}. Against this list, I{'test1'} as well as I{'test1.example'} will match, but not I{'test1.ex'}. A multiple match will be considered as no match at all (e.g. I{'test1'} against C{['test1.example.com', 'test1.example.org']}), except when the key fully matches an entry (e.g. I{'test1'} against C{['test1', 'test1.example.com']}). @type key: str @param key: the name to be searched @type name_list: list @param name_list: the list of strings against which to search the key @type case_sensitive: boolean @param case_sensitive: whether to provide a case-sensitive match @rtype: None or str @return: None if there is no match I{or} if there are multiple matches, otherwise the element from the list which matches """ if key in name_list: return key re_flags = 0 if not case_sensitive: re_flags |= re.IGNORECASE key = key.upper() name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags) names_filtered = [] string_matches = [] for name in name_list: if name_re.match(name) is not None: names_filtered.append(name) if not case_sensitive and key == name.upper(): string_matches.append(name) if len(string_matches) == 1: return string_matches[0] if len(names_filtered) == 1: return names_filtered[0] return None def _DnsNameGlobHelper(match): """Helper function for L{DnsNameGlobPattern}. Returns regular expression pattern for parts of the pattern. """ text = match.group(0) if text == "*": return "[^.]*" elif text == "?": return "[^.]" else: return re.escape(text) def DnsNameGlobPattern(pattern): """Generates regular expression from DNS name globbing pattern. A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular expression. Escape sequences or ranges (e.g. [a-z]) are not supported. Matching always starts at the leftmost part. An asterisk (*) matches all characters except the dot (.) separating DNS name parts. A question mark (?) matches a single character except the dot (.). @type pattern: string @param pattern: DNS name globbing pattern @rtype: string @return: Regular expression """ return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern) def FormatUnit(value, units): """Formats an incoming number of MiB with the appropriate unit. @type value: int @param value: integer representing the value in MiB (1048576) @type units: char @param units: the type of formatting we should do: - 'h' for automatic scaling - 'm' for MiBs - 'g' for GiBs - 't' for TiBs @rtype: str @return: the formatted value (with suffix) """ if units not in ("m", "g", "t", "h"): raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units)) suffix = "" if units == "m" or (units == "h" and value < 1024): if units == "h": suffix = "M" return "%d%s" % (round(value, 0), suffix) elif units == "g" or (units == "h" and value < (1024 * 1024)): if units == "h": suffix = "G" return "%0.1f%s" % (round(float(value) / 1024, 1), suffix) else: if units == "h": suffix = "T" return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix) def ParseUnit(input_string): """Tries to extract number and scale from the given string. Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE* [UNIT]}. If no unit is specified, it defaults to MiB. Return value is always an int in MiB. """ m = _PARSEUNIT_REGEX.match(str(input_string)) if not m: raise errors.UnitParseError("Invalid format") value = float(m.groups()[0]) unit = m.groups()[1] if unit: lcunit = unit.lower() else: lcunit = "m" if lcunit in ("m", "mb", "mib"): # Value already in MiB pass elif lcunit in ("g", "gb", "gib"): value *= 1024 elif lcunit in ("t", "tb", "tib"): value *= 1024 * 1024 else: raise errors.UnitParseError("Unknown unit: %s" % unit) # Make sure we round up if int(value) < value: value += 1 # Round up to the next multiple of 4 value = int(value) if value % 4: value += 4 - value % 4 return value def ShellQuote(value): """Quotes shell argument according to POSIX. @type value: str @param value: the argument to be quoted @rtype: str @return: the quoted value """ if _SHELL_UNQUOTED_RE.match(value): return value else: return "'%s'" % value.replace("'", "'\\''") def ShellQuoteArgs(args): """Quotes a list of shell arguments. @type args: list @param args: list of arguments to be quoted @rtype: str @return: the quoted arguments concatenated with spaces """ return " ".join([ShellQuote(i) for i in args]) class ShellWriter: """Helper class to write scripts with indentation. """ INDENT_STR = " " def __init__(self, fh, indent=True): """Initializes this class. """ self._fh = fh self._indent_enabled = indent self._indent = 0 def IncIndent(self): """Increase indentation level by 1. """ self._indent += 1 def DecIndent(self): """Decrease indentation level by 1. """ assert self._indent > 0 self._indent -= 1 def Write(self, txt, *args): """Write line to output file. """ assert self._indent >= 0 if args: line = txt % args else: line = txt if line and self._indent_enabled: # Indent only if there's something on the line self._fh.write(self._indent * self.INDENT_STR) self._fh.write(line) self._fh.write("\n") def GenerateSecret(numbytes=20): """Generates a random secret. This will generate a pseudo-random secret returning an hex string (so that it can be used where an ASCII string is needed). @param numbytes: the number of bytes which will be represented by the returned string (defaulting to 20, the length of a SHA1 hash) @rtype: str @return: an hex representation of the pseudo-random sequence """ return os.urandom(numbytes).encode("hex") def _MakeMacAddrRegexp(octets): """Builds a regular expression for verifying MAC addresses. @type octets: integer @param octets: How many octets to expect (1-6) @return: Compiled regular expression """ assert octets > 0 assert octets <= 6 return re.compile("^%s$" % ":".join([_MAC_ADDR_OCTET_RE] * octets), re.I) #: Regular expression for full MAC address _MAC_CHECK_RE = _MakeMacAddrRegexp(6) #: Regular expression for half a MAC address _MAC_PREFIX_CHECK_RE = _MakeMacAddrRegexp(3) def _MacAddressCheck(check_re, mac, msg): """Checks a MAC address using a regular expression. @param check_re: Compiled regular expression as returned by C{re.compile} @type mac: string @param mac: MAC address to be validated @type msg: string @param msg: Error message (%s will be replaced with MAC address) """ if check_re.match(mac): return mac.lower() raise errors.OpPrereqError(msg % mac, errors.ECODE_INVAL) def NormalizeAndValidateMac(mac): """Normalizes and check if a MAC address is valid and contains six octets. Checks whether the supplied MAC address is formally correct. Accepts colon-separated format only. Normalize it to all lower case. @type mac: string @param mac: MAC address to be validated @rtype: string @return: Normalized and validated MAC address @raise errors.OpPrereqError: If the MAC address isn't valid """ return _MacAddressCheck(_MAC_CHECK_RE, mac, "Invalid MAC address '%s'") def NormalizeAndValidateThreeOctetMacPrefix(mac): """Normalizes a potential MAC address prefix (three octets). Checks whether the supplied string is a valid MAC address prefix consisting of three colon-separated octets. The result is normalized to all lower case. @type mac: string @param mac: Prefix to be validated @rtype: string @return: Normalized and validated prefix @raise errors.OpPrereqError: If the MAC address prefix isn't valid """ return _MacAddressCheck(_MAC_PREFIX_CHECK_RE, mac, "Invalid MAC address prefix '%s'") def SafeEncode(text): """Return a 'safe' version of a source string. This function mangles the input string and returns a version that should be safe to display/encode as ASCII. To this end, we first convert it to ASCII using the 'backslashreplace' encoding which should get rid of any non-ASCII chars, and then we process it through a loop copied from the string repr sources in the python; we don't use string_escape anymore since that escape single quotes and backslashes too, and that is too much; and that escaping is not stable, i.e. string_escape(string_escape(x)) != string_escape(x). @type text: str or unicode @param text: input data @rtype: str @return: a safe version of text """ if isinstance(text, unicode): # only if unicode; if str already, we handle it below text = text.encode("ascii", "backslashreplace") resu = "" for char in text: c = ord(char) if char == "\t": resu += r"\t" elif char == "\n": resu += r"\n" elif char == "\r": resu += r'\'r' elif c < 32 or c >= 127: # non-printable resu += "\\x%02x" % (c & 0xff) else: resu += char return resu def UnescapeAndSplit(text, sep=","): r"""Split and unescape a string based on a given separator. This function splits a string based on a separator where the separator itself can be escape in order to be an element of the elements. The escaping rules are (assuming coma being the separator): - a plain , separates the elements - a sequence \\\\, (double backslash plus comma) is handled as a backslash plus a separator comma - a sequence \, (backslash plus comma) is handled as a non-separator comma @type text: string @param text: the string to split @type sep: string @param text: the separator @rtype: string @return: a list of strings """ # we split the list by sep (with no escaping at this stage) slist = text.split(sep) # next, we revisit the elements and if any of them ended with an odd # number of backslashes, then we join it with the next rlist = [] while slist: e1 = slist.pop(0) if e1.endswith("\\"): num_b = len(e1) - len(e1.rstrip("\\")) if num_b % 2 == 1 and slist: e2 = slist.pop(0) # Merge the two elements and push the result back to the source list for # revisiting. If e2 ended with backslashes, further merging may need to # be done. slist.insert(0, e1 + sep + e2) continue # here the backslashes remain (all), and will be reduced in the next step rlist.append(e1) # finally, replace backslash-something with something rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist] return rlist def CommaJoin(names): """Nicely join a set of identifiers. @param names: set, list or tuple @return: a string with the formatted results """ return ", ".join([str(val) for val in names]) def FormatTime(val, usecs=None): """Formats a time value. @type val: float or None @param val: Timestamp as returned by time.time() (seconds since Epoch, 1970-01-01 00:00:00 UTC) @return: a string value or N/A if we don't have a valid timestamp """ if val is None or not isinstance(val, (int, float)): return "N/A" # these two codes works on Linux, but they are not guaranteed on all # platforms result = time.strftime("%F %T", time.localtime(val)) if usecs is not None: result += ".%06d" % usecs return result def FormatSeconds(secs): """Formats seconds for easier reading. @type secs: number @param secs: Number of seconds @rtype: string @return: Formatted seconds (e.g. "2d 9h 19m 49s") """ parts = [] secs = round(secs, 0) if secs > 0: # Negative values would be a bit tricky for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]: (complete, secs) = divmod(secs, one) if complete or parts: parts.append("%d%s" % (complete, unit)) parts.append("%ds" % secs) return " ".join(parts) class LineSplitter: """Splits data chunks into lines separated by newline. Instances provide a file-like interface. """ def __init__(self, line_fn, *args): """Initializes this class. @type line_fn: callable @param line_fn: Function called for each line, first parameter is line @param args: Extra arguments for L{line_fn} """ assert callable(line_fn) if args: # Python 2.4 doesn't have functools.partial yet self._line_fn = \ lambda line: line_fn(line, *args) # pylint: disable=W0142 else: self._line_fn = line_fn self._lines = collections.deque() self._buffer = "" def write(self, data): parts = (self._buffer + data).split("\n") self._buffer = parts.pop() self._lines.extend(parts) def flush(self): while self._lines: self._line_fn(self._lines.popleft().rstrip("\r\n")) def close(self): self.flush() if self._buffer: self._line_fn(self._buffer) def IsValidShellParam(word): """Verifies is the given word is safe from the shell's p.o.v. This means that we can pass this to a command via the shell and be sure that it doesn't alter the command line and is passed as such to the actual command. Note that we are overly restrictive here, in order to be on the safe side. @type word: str @param word: the word to check @rtype: boolean @return: True if the word is 'safe' """ return bool(_SHELLPARAM_REGEX.match(word)) def BuildShellCmd(template, *args): """Build a safe shell command line from the given arguments. This function will check all arguments in the args list so that they are valid shell parameters (i.e. they don't contain shell metacharacters). If everything is ok, it will return the result of template % args. @type template: str @param template: the string holding the template for the string formatting @rtype: str @return: the expanded command line """ for word in args: if not IsValidShellParam(word): raise errors.ProgrammerError("Shell argument '%s' contains" " invalid characters" % word) return template % args def FormatOrdinal(value): """Formats a number as an ordinal in the English language. E.g. the number 1 becomes "1st", 22 becomes "22nd". @type value: integer @param value: Number @rtype: string """ tens = value % 10 if value > 10 and value < 20: suffix = "th" elif tens == 1: suffix = "st" elif tens == 2: suffix = "nd" elif tens == 3: suffix = "rd" else: suffix = "th" return "%s%s" % (value, suffix) def Truncate(text, length): """Truncate string and add ellipsis if needed. @type text: string @param text: Text @type length: integer @param length: Desired length @rtype: string @return: Truncated text """ assert length > len(_ASCII_ELLIPSIS) # Serialize if necessary if not isinstance(text, basestring): text = str(text) if len(text) <= length: return text else: return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS def FilterEmptyLinesAndComments(text): """Filters empty lines and comments from a line-based string. Whitespace is also removed from the beginning and end of all lines. @type text: string @param text: Input string @rtype: list """ return [line for line in map(lambda s: s.strip(), text.splitlines()) # Ignore empty lines and comments if line and not line.startswith("#")] def FormatKeyValue(data): """Formats a dictionary as "key=value" parameters. The keys are sorted to have a stable order. @type data: dict @rtype: list of string """ return ["%s=%s" % (key, value) for (key, value) in sorted(data.items())] ganeti-2.9.3/lib/utils/mlock.py0000644000000000000000000000446012230001635016353 0ustar00rootroot00000000000000# # # Copyright (C) 2009, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Wrapper around mlockall(2). """ import os import logging from ganeti import errors try: # pylint: disable=F0401 import ctypes except ImportError: ctypes = None # Flags for mlockall(2) (from bits/mman.h) _MCL_CURRENT = 1 _MCL_FUTURE = 2 def Mlockall(_ctypes=ctypes): """Lock current process' virtual address space into RAM. This is equivalent to the C call C{mlockall(MCL_CURRENT | MCL_FUTURE)}. See mlockall(2) for more details. This function requires the C{ctypes} module. @raises errors.NoCtypesError: If the C{ctypes} module is not found """ if _ctypes is None: raise errors.NoCtypesError() try: libc = _ctypes.cdll.LoadLibrary("libc.so.6") except EnvironmentError, err: logging.error("Failure trying to load libc: %s", err) libc = None if libc is None: logging.error("Cannot set memory lock, ctypes cannot load libc") return # The ctypes module before Python 2.6 does not have built-in functionality to # access the global errno global (which, depending on the libc and build # options, is per thread), where function error codes are stored. Use GNU # libc's way to retrieve errno(3) instead, which is to use the pointer named # "__errno_location" (see errno.h and bits/errno.h). # pylint: disable=W0212 libc.__errno_location.restype = _ctypes.POINTER(_ctypes.c_int) if libc.mlockall(_MCL_CURRENT | _MCL_FUTURE): # pylint: disable=W0212 logging.error("Cannot set memory lock: %s", os.strerror(libc.__errno_location().contents.value)) return logging.debug("Memory lock set") ganeti-2.9.3/lib/utils/algo.py0000644000000000000000000001642512267470014016207 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utility functions with algorithms. """ import re import time import itertools from ganeti import compat from ganeti.utils import text _SORTER_GROUPS = 8 _SORTER_RE = re.compile("^%s(.*)$" % (_SORTER_GROUPS * r"(\D+|\d+)?")) def UniqueSequence(seq): """Returns a list with unique elements. Element order is preserved. @type seq: sequence @param seq: the sequence with the source elements @rtype: list @return: list of unique elements from seq """ seen = set() return [i for i in seq if i not in seen and not seen.add(i)] def JoinDisjointDicts(dict_a, dict_b): """Joins dictionaries with no conflicting keys. Enforces the constraint that the two key sets must be disjoint, and then merges the two dictionaries in a new dictionary that is returned to the caller. @type dict_a: dict @param dict_a: the first dictionary @type dict_b: dict @param dict_b: the second dictionary @rtype: dict @return: a new dictionary containing all the key/value pairs contained in the two dictionaries. """ assert not (set(dict_a) & set(dict_b)), ("Duplicate keys found while joining" " %s and %s" % (dict_a, dict_b)) result = dict_a.copy() result.update(dict_b) return result def FindDuplicates(seq): """Identifies duplicates in a list. Does not preserve element order. @type seq: sequence @param seq: Sequence with source elements @rtype: list @return: List of duplicate elements from seq """ dup = set() seen = set() for item in seq: if item in seen: dup.add(item) else: seen.add(item) return list(dup) def _NiceSortTryInt(val): """Attempts to convert a string to an integer. """ if val and val.isdigit(): return int(val) else: return val def NiceSortKey(value): """Extract key for sorting. """ return [_NiceSortTryInt(grp) for grp in _SORTER_RE.match(value).groups()] def NiceSort(values, key=None): """Sort a list of strings based on digit and non-digit groupings. Given a list of names C{['a1', 'a10', 'a11', 'a2']} this function will sort the list in the logical order C{['a1', 'a2', 'a10', 'a11']}. The sort algorithm breaks each name in groups of either only-digits or no-digits. Only the first eight such groups are considered, and after that we just use what's left of the string. @type values: list @param values: the names to be sorted @type key: callable or None @param key: function of one argument to extract a comparison key from each list element, must return string @rtype: list @return: a copy of the name list sorted with our algorithm """ if key is None: keyfunc = NiceSortKey else: keyfunc = lambda value: NiceSortKey(key(value)) return sorted(values, key=keyfunc) def InvertDict(dict_in): """Inverts the key/value mapping of a dict. @param dict_in: The dict to invert @return: the inverted dict """ return dict(zip(dict_in.values(), dict_in.keys())) def InsertAtPos(src, pos, other): """Inserts C{other} at given C{pos} into C{src}. @note: This function does not modify C{src} in place but returns a new copy @type src: list @param src: The source list in which we want insert elements @type pos: int @param pos: The position where we want to start insert C{other} @type other: list @param other: The other list to insert into C{src} @return: A copy of C{src} with C{other} inserted at C{pos} """ new = src[:pos] new.extend(other) new.extend(src[pos:]) return new def SequenceToDict(seq, key=compat.fst): """Converts a sequence to a dictionary with duplicate detection. @type seq: sequen @param seq: Input sequence @type key: callable @param key: Function for retrieving dictionary key from sequence element @rtype: dict """ keys = map(key, seq) duplicates = FindDuplicates(keys) if duplicates: raise ValueError("Duplicate keys found: %s" % text.CommaJoin(duplicates)) assert len(keys) == len(seq) return dict(zip(keys, seq)) def _MakeFlatToDict(data): """Helper function for C{FlatToDict}. This function is recursively called @param data: The input data as described in C{FlatToDict}, already splitted @returns: The so far converted dict """ if not compat.fst(compat.fst(data)): assert len(data) == 1, \ "not bottom most element, found %d elements, expected 1" % len(data) return compat.snd(compat.fst(data)) keyfn = lambda e: compat.fst(e).pop(0) return dict([(k, _MakeFlatToDict(list(g))) for (k, g) in itertools.groupby(sorted(data), keyfn)]) def FlatToDict(data, field_sep="/"): """Converts a flat structure to a fully fledged dict. It accept a list of tuples in the form:: [ ("foo/bar", {"key1": "data1", "key2": "data2"}), ("foo/baz", {"key3" :"data3" }), ] where the first element is the key separated by C{field_sep}. This would then return:: { "foo": { "bar": {"key1": "data1", "key2": "data2"}, "baz": {"key3" :"data3" }, }, } @type data: list of tuple @param data: Input list to convert @type field_sep: str @param field_sep: The separator for the first field of the tuple @returns: A dict based on the input list """ return _MakeFlatToDict([(keys.split(field_sep), value) for (keys, value) in data]) class RunningTimeout(object): """Class to calculate remaining timeout when doing several operations. """ __slots__ = [ "_allow_negative", "_start_time", "_time_fn", "_timeout", ] def __init__(self, timeout, allow_negative, _time_fn=time.time): """Initializes this class. @type timeout: float @param timeout: Timeout duration @type allow_negative: bool @param allow_negative: Whether to return values below zero @param _time_fn: Time function for unittests """ object.__init__(self) if timeout is not None and timeout < 0.0: raise ValueError("Timeout must not be negative") self._timeout = timeout self._allow_negative = allow_negative self._time_fn = _time_fn self._start_time = None def Remaining(self): """Returns the remaining timeout. """ if self._timeout is None: return None # Get start time on first calculation if self._start_time is None: self._start_time = self._time_fn() # Calculate remaining time remaining_timeout = self._start_time + self._timeout - self._time_fn() if not self._allow_negative: # Ensure timeout is always >= 0 return max(0.0, remaining_timeout) return remaining_timeout ganeti-2.9.3/lib/utils/retry.py0000644000000000000000000001435312244641676016441 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utility functions for retrying function calls with a timeout. """ import time from ganeti import errors #: Special delay to specify whole remaining timeout RETRY_REMAINING_TIME = object() class RetryTimeout(Exception): """Retry loop timed out. Any arguments which was passed by the retried function to RetryAgain will be preserved in RetryTimeout, if it is raised. If such argument was an exception the RaiseInner helper method will reraise it. """ def RaiseInner(self): if self.args and isinstance(self.args[0], Exception): raise self.args[0] else: raise RetryTimeout(*self.args) class RetryAgain(Exception): """Retry again. Any arguments passed to RetryAgain will be preserved, if a timeout occurs, as arguments to RetryTimeout. If an exception is passed, the RaiseInner() method of the RetryTimeout() method can be used to reraise it. """ class _RetryDelayCalculator(object): """Calculator for increasing delays. """ __slots__ = [ "_factor", "_limit", "_next", "_start", ] def __init__(self, start, factor, limit): """Initializes this class. @type start: float @param start: Initial delay @type factor: float @param factor: Factor for delay increase @type limit: float or None @param limit: Upper limit for delay or None for no limit """ assert start > 0.0 assert factor >= 1.0 assert limit is None or limit >= 0.0 self._start = start self._factor = factor self._limit = limit self._next = start def __call__(self): """Returns current delay and calculates the next one. """ current = self._next # Update for next run if self._limit is None or self._next < self._limit: self._next = min(self._limit, self._next * self._factor) return current def Retry(fn, delay, timeout, args=None, wait_fn=time.sleep, _time_fn=time.time): """Call a function repeatedly until it succeeds. The function C{fn} is called repeatedly until it doesn't throw L{RetryAgain} anymore. Between calls a delay, specified by C{delay}, is inserted. After a total of C{timeout} seconds, this function throws L{RetryTimeout}. C{delay} can be one of the following: - callable returning the delay length as a float - Tuple of (start, factor, limit) - L{RETRY_REMAINING_TIME} to sleep until the timeout expires (this is useful when overriding L{wait_fn} to wait for an external event) - A static delay as a number (int or float) @type fn: callable @param fn: Function to be called @param delay: Either a callable (returning the delay), a tuple of (start, factor, limit) (see L{_RetryDelayCalculator}), L{RETRY_REMAINING_TIME} or a number (int or float) @type timeout: float @param timeout: Total timeout @type wait_fn: callable @param wait_fn: Waiting function @return: Return value of function """ assert callable(fn) assert callable(wait_fn) assert callable(_time_fn) if args is None: args = [] end_time = _time_fn() + timeout if callable(delay): # External function to calculate delay calc_delay = delay elif isinstance(delay, (tuple, list)): # Increasing delay with optional upper boundary (start, factor, limit) = delay calc_delay = _RetryDelayCalculator(start, factor, limit) elif delay is RETRY_REMAINING_TIME: # Always use the remaining time calc_delay = None else: # Static delay calc_delay = lambda: delay assert calc_delay is None or callable(calc_delay) while True: retry_args = [] try: # pylint: disable=W0142 return fn(*args) except RetryAgain, err: retry_args = err.args except RetryTimeout: raise errors.ProgrammerError("Nested retry loop detected that didn't" " handle RetryTimeout") remaining_time = end_time - _time_fn() if remaining_time <= 0.0: # pylint: disable=W0142 raise RetryTimeout(*retry_args) assert remaining_time > 0.0 if calc_delay is None: wait_fn(remaining_time) else: current_delay = calc_delay() if current_delay > 0.0: wait_fn(current_delay) def SimpleRetry(expected, fn, delay, timeout, args=None, wait_fn=time.sleep, _time_fn=time.time): """A wrapper over L{Retry} implementing a simpler interface. All the parameters are the same as for L{Retry}, except it has one extra argument: expected, which can be either a value (will be compared with the result of the function, or a callable (which will get the result passed and has to return a boolean). If the test is false, we will retry until either the timeout has passed or the tests succeeds. In both cases, the last result from calling the function will be returned. Note that this function is not expected to raise any retry-related exceptions, always simply returning values. As such, the function is designed to allow easy wrapping of code that doesn't use retry at all (e.g. "if fn(args)" replaced with "if SimpleRetry(True, fn, ...)". @see: L{Retry} """ rdict = {} def helper(*innerargs): # pylint: disable=W0142 result = rdict["result"] = fn(*innerargs) if not ((callable(expected) and expected(result)) or result == expected): raise RetryAgain() return result try: result = Retry(helper, delay, timeout, args=args, wait_fn=wait_fn, _time_fn=_time_fn) except RetryTimeout: assert "result" in rdict result = rdict["result"] return result ganeti-2.9.3/lib/utils/x509.py0000644000000000000000000003043212244641676015775 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utility functions for X509. """ import time import OpenSSL import re import datetime import calendar import errno import logging from ganeti import errors from ganeti import constants from ganeti import pathutils from ganeti.utils import text as utils_text from ganeti.utils import io as utils_io from ganeti.utils import hash as utils_hash HEX_CHAR_RE = r"[a-zA-Z0-9]" VALID_X509_SIGNATURE_SALT = re.compile("^%s+$" % HEX_CHAR_RE, re.S) X509_SIGNATURE = re.compile(r"^%s:\s*(?P%s+)/(?P%s+)$" % (re.escape(constants.X509_CERT_SIGNATURE_HEADER), HEX_CHAR_RE, HEX_CHAR_RE), re.S | re.I) # Certificate verification results (CERT_WARNING, CERT_ERROR) = range(1, 3) #: ASN1 time regexp _ASN1_TIME_REGEX = re.compile(r"^(\d+)([-+]\d\d)(\d\d)$") def _ParseAsn1Generalizedtime(value): """Parses an ASN1 GENERALIZEDTIME timestamp as used by pyOpenSSL. @type value: string @param value: ASN1 GENERALIZEDTIME timestamp @return: Seconds since the Epoch (1970-01-01 00:00:00 UTC) """ m = _ASN1_TIME_REGEX.match(value) if m: # We have an offset asn1time = m.group(1) hours = int(m.group(2)) minutes = int(m.group(3)) utcoffset = (60 * hours) + minutes else: if not value.endswith("Z"): raise ValueError("Missing timezone") asn1time = value[:-1] utcoffset = 0 parsed = time.strptime(asn1time, "%Y%m%d%H%M%S") tt = datetime.datetime(*(parsed[:7])) - datetime.timedelta(minutes=utcoffset) return calendar.timegm(tt.utctimetuple()) def GetX509CertValidity(cert): """Returns the validity period of the certificate. @type cert: OpenSSL.crypto.X509 @param cert: X509 certificate object """ # The get_notBefore and get_notAfter functions are only supported in # pyOpenSSL 0.7 and above. try: get_notbefore_fn = cert.get_notBefore except AttributeError: not_before = None else: not_before_asn1 = get_notbefore_fn() if not_before_asn1 is None: not_before = None else: not_before = _ParseAsn1Generalizedtime(not_before_asn1) try: get_notafter_fn = cert.get_notAfter except AttributeError: not_after = None else: not_after_asn1 = get_notafter_fn() if not_after_asn1 is None: not_after = None else: not_after = _ParseAsn1Generalizedtime(not_after_asn1) return (not_before, not_after) def _VerifyCertificateInner(expired, not_before, not_after, now, warn_days, error_days): """Verifies certificate validity. @type expired: bool @param expired: Whether pyOpenSSL considers the certificate as expired @type not_before: number or None @param not_before: Unix timestamp before which certificate is not valid @type not_after: number or None @param not_after: Unix timestamp after which certificate is invalid @type now: number @param now: Current time as Unix timestamp @type warn_days: number or None @param warn_days: How many days before expiration a warning should be reported @type error_days: number or None @param error_days: How many days before expiration an error should be reported """ if expired: msg = "Certificate is expired" if not_before is not None and not_after is not None: msg += (" (valid from %s to %s)" % (utils_text.FormatTime(not_before), utils_text.FormatTime(not_after))) elif not_before is not None: msg += " (valid from %s)" % utils_text.FormatTime(not_before) elif not_after is not None: msg += " (valid until %s)" % utils_text.FormatTime(not_after) return (CERT_ERROR, msg) elif not_before is not None and not_before > now: return (CERT_WARNING, "Certificate not yet valid (valid from %s)" % utils_text.FormatTime(not_before)) elif not_after is not None: remaining_days = int((not_after - now) / (24 * 3600)) msg = "Certificate expires in about %d days" % remaining_days if error_days is not None and remaining_days <= error_days: return (CERT_ERROR, msg) if warn_days is not None and remaining_days <= warn_days: return (CERT_WARNING, msg) return (None, None) def VerifyX509Certificate(cert, warn_days, error_days): """Verifies a certificate for LUClusterVerify. @type cert: OpenSSL.crypto.X509 @param cert: X509 certificate object @type warn_days: number or None @param warn_days: How many days before expiration a warning should be reported @type error_days: number or None @param error_days: How many days before expiration an error should be reported """ # Depending on the pyOpenSSL version, this can just return (None, None) (not_before, not_after) = GetX509CertValidity(cert) now = time.time() + constants.NODE_MAX_CLOCK_SKEW return _VerifyCertificateInner(cert.has_expired(), not_before, not_after, now, warn_days, error_days) def SignX509Certificate(cert, key, salt): """Sign a X509 certificate. An RFC822-like signature header is added in front of the certificate. @type cert: OpenSSL.crypto.X509 @param cert: X509 certificate object @type key: string @param key: Key for HMAC @type salt: string @param salt: Salt for HMAC @rtype: string @return: Serialized and signed certificate in PEM format """ if not VALID_X509_SIGNATURE_SALT.match(salt): raise errors.GenericError("Invalid salt: %r" % salt) # Dumping as PEM here ensures the certificate is in a sane format cert_pem = OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, cert) return ("%s: %s/%s\n\n%s" % (constants.X509_CERT_SIGNATURE_HEADER, salt, utils_hash.Sha1Hmac(key, cert_pem, salt=salt), cert_pem)) def _ExtractX509CertificateSignature(cert_pem): """Helper function to extract signature from X509 certificate. """ # Extract signature from original PEM data for line in cert_pem.splitlines(): if line.startswith("---"): break m = X509_SIGNATURE.match(line.strip()) if m: return (m.group("salt"), m.group("sign")) raise errors.GenericError("X509 certificate signature is missing") def LoadSignedX509Certificate(cert_pem, key): """Verifies a signed X509 certificate. @type cert_pem: string @param cert_pem: Certificate in PEM format and with signature header @type key: string @param key: Key for HMAC @rtype: tuple; (OpenSSL.crypto.X509, string) @return: X509 certificate object and salt """ (salt, signature) = _ExtractX509CertificateSignature(cert_pem) # Load and dump certificate to ensure it's in a sane format (cert, sane_pem) = ExtractX509Certificate(cert_pem) if not utils_hash.VerifySha1Hmac(key, sane_pem, signature, salt=salt): raise errors.GenericError("X509 certificate signature is invalid") return (cert, salt) def GenerateSelfSignedX509Cert(common_name, validity): """Generates a self-signed X509 certificate. @type common_name: string @param common_name: commonName value @type validity: int @param validity: Validity for certificate in seconds @return: a tuple of strings containing the PEM-encoded private key and certificate """ # Create private and public key key = OpenSSL.crypto.PKey() key.generate_key(OpenSSL.crypto.TYPE_RSA, constants.RSA_KEY_BITS) # Create self-signed certificate cert = OpenSSL.crypto.X509() if common_name: cert.get_subject().CN = common_name cert.set_serial_number(1) cert.gmtime_adj_notBefore(0) cert.gmtime_adj_notAfter(validity) cert.set_issuer(cert.get_subject()) cert.set_pubkey(key) cert.sign(key, constants.X509_CERT_SIGN_DIGEST) key_pem = OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, key) cert_pem = OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, cert) return (key_pem, cert_pem) def GenerateSelfSignedSslCert(filename, common_name=constants.X509_CERT_CN, validity=constants.X509_CERT_DEFAULT_VALIDITY): """Legacy function to generate self-signed X509 certificate. @type filename: str @param filename: path to write certificate to @type common_name: string @param common_name: commonName value @type validity: int @param validity: validity of certificate in number of days @return: a tuple of strings containing the PEM-encoded private key and certificate """ # TODO: Investigate using the cluster name instead of X505_CERT_CN for # common_name, as cluster-renames are very seldom, and it'd be nice if RAPI # and node daemon certificates have the proper Subject/Issuer. (key_pem, cert_pem) = GenerateSelfSignedX509Cert(common_name, validity * 24 * 60 * 60) utils_io.WriteFile(filename, mode=0400, data=key_pem + cert_pem) return (key_pem, cert_pem) def ExtractX509Certificate(pem): """Extracts the certificate from a PEM-formatted string. @type pem: string @rtype: tuple; (OpenSSL.X509 object, string) @return: Certificate object and PEM-formatted certificate """ cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, pem) return (cert, OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, cert)) def PrepareX509CertKeyCheck(cert, key): """Get function for verifying certificate with a certain private key. @type key: OpenSSL.crypto.PKey @param key: Private key object @type cert: OpenSSL.crypto.X509 @param cert: X509 certificate object @rtype: callable @return: Callable doing the actual check; will raise C{OpenSSL.SSL.Error} if certificate is not signed by given private key """ ctx = OpenSSL.SSL.Context(OpenSSL.SSL.TLSv1_METHOD) ctx.use_privatekey(key) ctx.use_certificate(cert) return ctx.check_privatekey def CheckNodeCertificate(cert, _noded_cert_file=pathutils.NODED_CERT_FILE): """Checks the local node daemon certificate against given certificate. Both certificates must be signed with the same key (as stored in the local L{pathutils.NODED_CERT_FILE} file). No error is raised if no local certificate can be found. @type cert: OpenSSL.crypto.X509 @param cert: X509 certificate object @raise errors.X509CertError: When an error related to X509 occurred @raise errors.GenericError: When the verification failed """ try: noded_pem = utils_io.ReadFile(_noded_cert_file) except EnvironmentError, err: if err.errno != errno.ENOENT: raise logging.debug("Node certificate file '%s' was not found", _noded_cert_file) return try: noded_cert = \ OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, noded_pem) except Exception, err: raise errors.X509CertError(_noded_cert_file, "Unable to load certificate: %s" % err) try: noded_key = \ OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, noded_pem) except Exception, err: raise errors.X509CertError(_noded_cert_file, "Unable to load private key: %s" % err) # Check consistency of server.pem file check_fn = PrepareX509CertKeyCheck(noded_cert, noded_key) try: check_fn() except OpenSSL.SSL.Error: # This should never happen as it would mean the certificate in server.pem # is out of sync with the private key stored in the same file raise errors.X509CertError(_noded_cert_file, "Certificate does not match with private key") # Check with supplied certificate with local key check_fn = PrepareX509CertKeyCheck(cert, noded_key) try: check_fn() except OpenSSL.SSL.Error: raise errors.GenericError("Given cluster certificate does not match" " local key") ganeti-2.9.3/lib/utils/storage.py0000644000000000000000000001356712271422343016732 0ustar00rootroot00000000000000# # # Copyright (C) 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utility functions for storage. """ import logging from ganeti import constants def GetDiskTemplatesOfStorageType(storage_type): """Given the storage type, returns a list of disk templates based on that storage type.""" return [dt for dt in constants.DISK_TEMPLATES if constants.MAP_DISK_TEMPLATE_STORAGE_TYPE[dt] == storage_type] def GetLvmDiskTemplates(): """Returns all disk templates that use LVM.""" return GetDiskTemplatesOfStorageType(constants.ST_LVM_VG) def IsDiskTemplateEnabled(disk_template, enabled_disk_templates): """Checks if a particular disk template is enabled. """ return disk_template in enabled_disk_templates def IsFileStorageEnabled(enabled_disk_templates): """Checks if file storage is enabled. """ return IsDiskTemplateEnabled(constants.DT_FILE, enabled_disk_templates) def IsSharedFileStorageEnabled(enabled_disk_templates): """Checks if shared file storage is enabled. """ return IsDiskTemplateEnabled(constants.DT_SHARED_FILE, enabled_disk_templates) def IsLvmEnabled(enabled_disk_templates): """Check whether or not any lvm-based disk templates are enabled.""" return len(set(GetLvmDiskTemplates()) .intersection(set(enabled_disk_templates))) != 0 def LvmGetsEnabled(enabled_disk_templates, new_enabled_disk_templates): """Checks whether lvm was not enabled before, but will be enabled after the operation. """ if IsLvmEnabled(enabled_disk_templates): return False return set(GetLvmDiskTemplates()).intersection( set(new_enabled_disk_templates)) def _GetDefaultStorageUnitForDiskTemplate(cfg, disk_template): """Retrieves the identifier of the default storage entity for the given storage type. @type cfg: C{objects.ConfigData} @param cfg: the configuration data @type disk_template: string @param disk_template: a disk template, for example 'drbd' @rtype: string @return: identifier for a storage unit, for example the vg_name for lvm storage """ storage_type = constants.MAP_DISK_TEMPLATE_STORAGE_TYPE[disk_template] cluster = cfg.GetClusterInfo() if disk_template in GetLvmDiskTemplates(): return (storage_type, cfg.GetVGName()) elif disk_template == constants.DT_FILE: return (storage_type, cluster.file_storage_dir) elif disk_template == constants.DT_SHARED_FILE: return (storage_type, cluster.shared_file_storage_dir) else: return (storage_type, None) def _GetDefaultStorageUnitForSpindles(cfg): """Creates a 'spindle' storage unit, by retrieving the volume group name and associating it to the lvm-pv storage type. @rtype: (string, string) @return: tuple (storage_type, storage_key), where storage type is 'lvm-pv' and storage_key the name of the default volume group """ return (constants.ST_LVM_PV, cfg.GetVGName()) def GetStorageUnitsOfCluster(cfg, include_spindles=False): """Examines the cluster's configuration and returns a list of storage units and their storage keys, ordered by the order in which they are enabled. @type cfg: L{config.ConfigWriter} @param cfg: Cluster configuration @type include_spindles: boolean @param include_spindles: flag to include an extra storage unit for physical volumes @rtype: list of tuples (string, string) @return: list of storage units, each storage unit being a tuple of (storage_type, storage_key); storage_type is in C{constants.STORAGE_TYPES} and the storage_key a string to identify an entity of that storage type, for example a volume group name for LVM storage or a file for file storage. """ cluster_config = cfg.GetClusterInfo() storage_units = [] for disk_template in cluster_config.enabled_disk_templates: if constants.MAP_DISK_TEMPLATE_STORAGE_TYPE[disk_template]\ in constants.STS_REPORT: storage_units.append( _GetDefaultStorageUnitForDiskTemplate(cfg, disk_template)) if include_spindles: included_storage_types = set([st for (st, _) in storage_units]) if not constants.ST_LVM_PV in included_storage_types: storage_units.append( _GetDefaultStorageUnitForSpindles(cfg)) return storage_units def LookupSpaceInfoByStorageType(storage_space_info, storage_type): """Looks up the storage space info for a given storage type. Note that this lookup can be ambiguous if storage space reporting for several units of the same storage type was requested. This function is only supposed to be used for legacy code in situations where it actually is unambiguous. @type storage_space_info: list of dicts @param storage_space_info: result of C{GetNodeInfo} @type storage_type: string @param storage_type: a storage type, which is included in the storage_units list @rtype: tuple @return: returns the element of storage_space_info that matches the given storage type """ result = None for unit_info in storage_space_info: if unit_info["type"] == storage_type: if result is None: result = unit_info else: # There is more than one storage type in the query, log a warning logging.warning("Storage space information requested for" " ambiguous storage type '%s'.", storage_type) return result ganeti-2.9.3/lib/utils/log.py0000644000000000000000000002302012244641676016044 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utility functions for logging. """ import os.path import logging import logging.handlers from cStringIO import StringIO from ganeti import constants from ganeti import compat class _ReopenableLogHandler(logging.handlers.BaseRotatingHandler): """Log handler with ability to reopen log file on request. In combination with a SIGHUP handler this class can reopen the log file on user request. """ def __init__(self, filename): """Initializes this class. @type filename: string @param filename: Path to logfile """ logging.handlers.BaseRotatingHandler.__init__(self, filename, "a") assert self.encoding is None, "Encoding not supported for logging" assert not hasattr(self, "_reopen"), "Base class has '_reopen' attribute" self._reopen = False def shouldRollover(self, _): # pylint: disable=C0103 """Determine whether log file should be reopened. """ return self._reopen or not self.stream def doRollover(self): # pylint: disable=C0103 """Reopens the log file. """ if self.stream: self.stream.flush() self.stream.close() self.stream = None # Reopen file # TODO: Handle errors? self.stream = open(self.baseFilename, "a") # Don't reopen on the next message self._reopen = False def RequestReopen(self): """Register a request to reopen the file. The file will be reopened before writing the next log record. """ self._reopen = True def _LogErrorsToConsole(base): """Create wrapper class writing errors to console. This needs to be in a function for unittesting. """ class wrapped(base): # pylint: disable=C0103 """Log handler that doesn't fallback to stderr. When an error occurs while writing on the logfile, logging.FileHandler tries to log on stderr. This doesn't work in Ganeti since stderr is redirected to a logfile. This class avoids failures by reporting errors to /dev/console. """ def __init__(self, console, *args, **kwargs): """Initializes this class. @type console: file-like object or None @param console: Open file-like object for console """ base.__init__(self, *args, **kwargs) assert not hasattr(self, "_console") self._console = console def handleError(self, record): # pylint: disable=C0103 """Handle errors which occur during an emit() call. Try to handle errors with FileHandler method, if it fails write to /dev/console. """ try: base.handleError(record) except Exception: # pylint: disable=W0703 if self._console: try: # Ignore warning about "self.format", pylint: disable=E1101 self._console.write("Cannot log message:\n%s\n" % self.format(record)) except Exception: # pylint: disable=W0703 # Log handler tried everything it could, now just give up pass return wrapped #: Custom log handler for writing to console with a reopenable handler _LogHandler = _LogErrorsToConsole(_ReopenableLogHandler) def _GetLogFormatter(program, multithreaded, debug, syslog): """Build log formatter. @param program: Program name @param multithreaded: Whether to add thread name to log messages @param debug: Whether to enable debug messages @param syslog: Whether the formatter will be used for syslog """ parts = [] if syslog: parts.append(program + "[%(process)d]:") else: parts.append("%(asctime)s: " + program + " pid=%(process)d") if multithreaded: if syslog: parts.append(" (%(threadName)s)") else: parts.append("/%(threadName)s") # Add debug info for non-syslog loggers if debug and not syslog: parts.append(" %(module)s:%(lineno)s") # Ses, we do want the textual level, as remote syslog will probably lose the # error level, and it's easier to grep for it. parts.append(" %(levelname)s %(message)s") return logging.Formatter("".join(parts)) def _ReopenLogFiles(handlers): """Wrapper for reopening all log handler's files in a sequence. """ for handler in handlers: handler.RequestReopen() logging.info("Received request to reopen log files") def SetupLogging(logfile, program, debug=0, stderr_logging=False, multithreaded=False, syslog=constants.SYSLOG_USAGE, console_logging=False, root_logger=None): """Configures the logging module. @type logfile: str @param logfile: the filename to which we should log @type program: str @param program: the name under which we should log messages @type debug: integer @param debug: if greater than zero, enable debug messages, otherwise only those at C{INFO} and above level @type stderr_logging: boolean @param stderr_logging: whether we should also log to the standard error @type multithreaded: boolean @param multithreaded: if True, will add the thread name to the log file @type syslog: string @param syslog: one of 'no', 'yes', 'only': - if no, syslog is not used - if yes, syslog is used (in addition to file-logging) - if only, only syslog is used @type console_logging: boolean @param console_logging: if True, will use a FileHandler which falls back to the system console if logging fails @type root_logger: logging.Logger @param root_logger: Root logger to use (for unittests) @raise EnvironmentError: if we can't open the log file and syslog/stderr logging is disabled @rtype: callable @return: Function reopening all open log files when called """ progname = os.path.basename(program) formatter = _GetLogFormatter(progname, multithreaded, debug, False) syslog_fmt = _GetLogFormatter(progname, multithreaded, debug, True) reopen_handlers = [] if root_logger is None: root_logger = logging.getLogger("") root_logger.setLevel(logging.NOTSET) # Remove all previously setup handlers for handler in root_logger.handlers: handler.close() root_logger.removeHandler(handler) if stderr_logging: stderr_handler = logging.StreamHandler() stderr_handler.setFormatter(formatter) if debug: stderr_handler.setLevel(logging.NOTSET) else: stderr_handler.setLevel(logging.CRITICAL) root_logger.addHandler(stderr_handler) if syslog in (constants.SYSLOG_YES, constants.SYSLOG_ONLY): facility = logging.handlers.SysLogHandler.LOG_DAEMON syslog_handler = logging.handlers.SysLogHandler(constants.SYSLOG_SOCKET, facility) syslog_handler.setFormatter(syslog_fmt) # Never enable debug over syslog syslog_handler.setLevel(logging.INFO) root_logger.addHandler(syslog_handler) if syslog != constants.SYSLOG_ONLY: # this can fail, if the logging directories are not setup or we have # a permisssion problem; in this case, it's best to log but ignore # the error if stderr_logging is True, and if false we re-raise the # exception since otherwise we could run but without any logs at all try: if console_logging: logfile_handler = _LogHandler(open(constants.DEV_CONSOLE, "a"), logfile) else: logfile_handler = _ReopenableLogHandler(logfile) logfile_handler.setFormatter(formatter) if debug: logfile_handler.setLevel(logging.DEBUG) else: logfile_handler.setLevel(logging.INFO) root_logger.addHandler(logfile_handler) reopen_handlers.append(logfile_handler) except EnvironmentError: if stderr_logging or syslog == constants.SYSLOG_YES: logging.exception("Failed to enable logging to file '%s'", logfile) else: # we need to re-raise the exception raise return compat.partial(_ReopenLogFiles, reopen_handlers) def SetupToolLogging(debug, verbose, threadname=False, _root_logger=None, _stream=None): """Configures the logging module for tools. All log messages are sent to stderr. @type debug: boolean @param debug: Disable log message filtering @type verbose: boolean @param verbose: Enable verbose log messages @type threadname: boolean @param threadname: Whether to include thread name in output """ if _root_logger is None: root_logger = logging.getLogger("") else: root_logger = _root_logger fmt = StringIO() fmt.write("%(asctime)s:") if threadname: fmt.write(" %(threadName)s") if debug or verbose: fmt.write(" %(levelname)s") fmt.write(" %(message)s") formatter = logging.Formatter(fmt.getvalue()) stderr_handler = logging.StreamHandler(_stream) stderr_handler.setFormatter(formatter) if debug: stderr_handler.setLevel(logging.NOTSET) elif verbose: stderr_handler.setLevel(logging.INFO) else: stderr_handler.setLevel(logging.WARNING) root_logger.setLevel(logging.NOTSET) root_logger.addHandler(stderr_handler) ganeti-2.9.3/lib/utils/lvm.py0000644000000000000000000000545612244641676016076 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utility functions for LVM. """ from ganeti import constants def CheckVolumeGroupSize(vglist, vgname, minsize): """Checks if the volume group list is valid. The function will check if a given volume group is in the list of volume groups and has a minimum size. @type vglist: dict @param vglist: dictionary of volume group names and their size @type vgname: str @param vgname: the volume group we should check @type minsize: int @param minsize: the minimum size we accept @rtype: None or str @return: None for success, otherwise the error message """ vgsize = vglist.get(vgname, None) if vgsize is None: return "volume group '%s' missing" % vgname elif vgsize < minsize: return ("volume group '%s' too small (%s MiB required, %d MiB found)" % (vgname, minsize, vgsize)) return None def LvmExclusiveCheckNodePvs(pvs_info): """Check consistency of PV sizes in a node for exclusive storage. @type pvs_info: list @param pvs_info: list of L{LvmPvInfo} objects @rtype: tuple @return: A pair composed of: 1. a list of error strings describing the violations found, or an empty list if everything is ok; 2. a pair containing the sizes of the smallest and biggest PVs, in MiB. """ errmsgs = [] sizes = [pv.size for pv in pvs_info] # The sizes of PVs must be the same (tolerance is constants.PART_MARGIN) small = min(sizes) big = max(sizes) if LvmExclusiveTestBadPvSizes(small, big): m = ("Sizes of PVs are too different: min=%d max=%d" % (small, big)) errmsgs.append(m) return (errmsgs, (small, big)) def LvmExclusiveTestBadPvSizes(small, big): """Test if the given PV sizes are permitted with exclusive storage. @param small: size of the smallest PV @param big: size of the biggest PV @return: True when the given sizes are bad, False otherwise """ # Test whether no X exists such that: # small >= X * (1 - constants.PART_MARGIN) and # big <= X * (1 + constants.PART_MARGIN) return (small * (1 + constants.PART_MARGIN) < big * (1 - constants.PART_MARGIN)) ganeti-2.9.3/lib/utils/__init__.py0000644000000000000000000005762112271422343017024 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Ganeti utility module. This module holds functions that can be used in both daemons (all) and the command line scripts. """ # Allow wildcard import in pylint: disable=W0401 import os import re import errno import pwd import time import itertools import select import logging import signal from ganeti import errors from ganeti import constants from ganeti import compat from ganeti import pathutils from ganeti.utils.algo import * from ganeti.utils.filelock import * from ganeti.utils.hash import * from ganeti.utils.io import * from ganeti.utils.log import * from ganeti.utils.lvm import * from ganeti.utils.mlock import * from ganeti.utils.nodesetup import * from ganeti.utils.process import * from ganeti.utils.retry import * from ganeti.utils.storage import * from ganeti.utils.text import * from ganeti.utils.wrapper import * from ganeti.utils.x509 import * _VALID_SERVICE_NAME_RE = re.compile("^[-_.a-zA-Z0-9]{1,128}$") UUID_RE = re.compile(constants.UUID_REGEX) def ForceDictType(target, key_types, allowed_values=None): """Force the values of a dict to have certain types. @type target: dict @param target: the dict to update @type key_types: dict @param key_types: dict mapping target dict keys to types in constants.ENFORCEABLE_TYPES @type allowed_values: list @keyword allowed_values: list of specially allowed values """ if allowed_values is None: allowed_values = [] if not isinstance(target, dict): msg = "Expected dictionary, got '%s'" % target raise errors.TypeEnforcementError(msg) for key in target: if key not in key_types: msg = "Unknown parameter '%s'" % key raise errors.TypeEnforcementError(msg) if target[key] in allowed_values: continue ktype = key_types[key] if ktype not in constants.ENFORCEABLE_TYPES: msg = "'%s' has non-enforceable type %s" % (key, ktype) raise errors.ProgrammerError(msg) if ktype in (constants.VTYPE_STRING, constants.VTYPE_MAYBE_STRING): if target[key] is None and ktype == constants.VTYPE_MAYBE_STRING: pass elif not isinstance(target[key], basestring): if isinstance(target[key], bool) and not target[key]: target[key] = "" else: msg = "'%s' (value %s) is not a valid string" % (key, target[key]) raise errors.TypeEnforcementError(msg) elif ktype == constants.VTYPE_BOOL: if isinstance(target[key], basestring) and target[key]: if target[key].lower() == constants.VALUE_FALSE: target[key] = False elif target[key].lower() == constants.VALUE_TRUE: target[key] = True else: msg = "'%s' (value %s) is not a valid boolean" % (key, target[key]) raise errors.TypeEnforcementError(msg) elif target[key]: target[key] = True else: target[key] = False elif ktype == constants.VTYPE_SIZE: try: target[key] = ParseUnit(target[key]) except errors.UnitParseError, err: msg = "'%s' (value %s) is not a valid size. error: %s" % \ (key, target[key], err) raise errors.TypeEnforcementError(msg) elif ktype == constants.VTYPE_INT: try: target[key] = int(target[key]) except (ValueError, TypeError): msg = "'%s' (value %s) is not a valid integer" % (key, target[key]) raise errors.TypeEnforcementError(msg) def ValidateServiceName(name): """Validate the given service name. @type name: number or string @param name: Service name or port specification """ try: numport = int(name) except (ValueError, TypeError): # Non-numeric service name valid = _VALID_SERVICE_NAME_RE.match(name) else: # Numeric port (protocols other than TCP or UDP might need adjustments # here) valid = (numport >= 0 and numport < (1 << 16)) if not valid: raise errors.OpPrereqError("Invalid service name '%s'" % name, errors.ECODE_INVAL) return name def _ComputeMissingKeys(key_path, options, defaults): """Helper functions to compute which keys a invalid. @param key_path: The current key path (if any) @param options: The user provided options @param defaults: The default dictionary @return: A list of invalid keys """ defaults_keys = frozenset(defaults.keys()) invalid = [] for key, value in options.items(): if key_path: new_path = "%s/%s" % (key_path, key) else: new_path = key if key not in defaults_keys: invalid.append(new_path) elif isinstance(value, dict): invalid.extend(_ComputeMissingKeys(new_path, value, defaults[key])) return invalid def VerifyDictOptions(options, defaults): """Verify a dict has only keys set which also are in the defaults dict. @param options: The user provided options @param defaults: The default dictionary @raise error.OpPrereqError: If one of the keys is not supported """ invalid = _ComputeMissingKeys("", options, defaults) if invalid: raise errors.OpPrereqError("Provided option keys not supported: %s" % CommaJoin(invalid), errors.ECODE_INVAL) def ListVolumeGroups(): """List volume groups and their size @rtype: dict @return: Dictionary with keys volume name and values the size of the volume """ command = "vgs --noheadings --units m --nosuffix -o name,size" result = RunCmd(command) retval = {} if result.failed: return retval for line in result.stdout.splitlines(): try: name, size = line.split() size = int(float(size)) except (IndexError, ValueError), err: logging.error("Invalid output from vgs (%s): %s", err, line) continue retval[name] = size return retval def BridgeExists(bridge): """Check whether the given bridge exists in the system @type bridge: str @param bridge: the bridge name to check @rtype: boolean @return: True if it does """ return os.path.isdir("/sys/class/net/%s/bridge" % bridge) def TryConvert(fn, val): """Try to convert a value ignoring errors. This function tries to apply function I{fn} to I{val}. If no C{ValueError} or C{TypeError} exceptions are raised, it will return the result, else it will return the original value. Any other exceptions are propagated to the caller. @type fn: callable @param fn: function to apply to the value @param val: the value to be converted @return: The converted value if the conversion was successful, otherwise the original value. """ try: nv = fn(val) except (ValueError, TypeError): nv = val return nv def ParseCpuMask(cpu_mask): """Parse a CPU mask definition and return the list of CPU IDs. CPU mask format: comma-separated list of CPU IDs or dash-separated ID ranges Example: "0-2,5" -> "0,1,2,5" @type cpu_mask: str @param cpu_mask: CPU mask definition @rtype: list of int @return: list of CPU IDs """ if not cpu_mask: return [] cpu_list = [] for range_def in cpu_mask.split(","): boundaries = range_def.split("-") n_elements = len(boundaries) if n_elements > 2: raise errors.ParseError("Invalid CPU ID range definition" " (only one hyphen allowed): %s" % range_def) try: lower = int(boundaries[0]) except (ValueError, TypeError), err: raise errors.ParseError("Invalid CPU ID value for lower boundary of" " CPU ID range: %s" % str(err)) try: higher = int(boundaries[-1]) except (ValueError, TypeError), err: raise errors.ParseError("Invalid CPU ID value for higher boundary of" " CPU ID range: %s" % str(err)) if lower > higher: raise errors.ParseError("Invalid CPU ID range definition" " (%d > %d): %s" % (lower, higher, range_def)) cpu_list.extend(range(lower, higher + 1)) return cpu_list def ParseMultiCpuMask(cpu_mask): """Parse a multiple CPU mask definition and return the list of CPU IDs. CPU mask format: colon-separated list of comma-separated list of CPU IDs or dash-separated ID ranges, with optional "all" as CPU value Example: "0-2,5:all:1,5,6:2" -> [ [ 0,1,2,5 ], [ -1 ], [ 1, 5, 6 ], [ 2 ] ] @type cpu_mask: str @param cpu_mask: multiple CPU mask definition @rtype: list of lists of int @return: list of lists of CPU IDs """ if not cpu_mask: return [] cpu_list = [] for range_def in cpu_mask.split(constants.CPU_PINNING_SEP): if range_def == constants.CPU_PINNING_ALL: cpu_list.append([constants.CPU_PINNING_ALL_VAL, ]) else: # Uniquify and sort the list before adding cpu_list.append(sorted(set(ParseCpuMask(range_def)))) return cpu_list def GetHomeDir(user, default=None): """Try to get the homedir of the given user. The user can be passed either as a string (denoting the name) or as an integer (denoting the user id). If the user is not found, the C{default} argument is returned, which defaults to C{None}. """ try: if isinstance(user, basestring): result = pwd.getpwnam(user) elif isinstance(user, (int, long)): result = pwd.getpwuid(user) else: raise errors.ProgrammerError("Invalid type passed to GetHomeDir (%s)" % type(user)) except KeyError: return default return result.pw_dir def FirstFree(seq, base=0): """Returns the first non-existing integer from seq. The seq argument should be a sorted list of positive integers. The first time the index of an element is smaller than the element value, the index will be returned. The base argument is used to start at a different offset, i.e. C{[3, 4, 6]} with I{offset=3} will return 5. Example: C{[0, 1, 3]} will return I{2}. @type seq: sequence @param seq: the sequence to be analyzed. @type base: int @param base: use this value as the base index of the sequence @rtype: int @return: the first non-used index in the sequence """ for idx, elem in enumerate(seq): assert elem >= base, "Passed element is higher than base offset" if elem > idx + base: # idx is not used return idx + base return None def SingleWaitForFdCondition(fdobj, event, timeout): """Waits for a condition to occur on the socket. Immediately returns at the first interruption. @type fdobj: integer or object supporting a fileno() method @param fdobj: entity to wait for events on @type event: integer @param event: ORed condition (see select module) @type timeout: float or None @param timeout: Timeout in seconds @rtype: int or None @return: None for timeout, otherwise occured conditions """ check = (event | select.POLLPRI | select.POLLNVAL | select.POLLHUP | select.POLLERR) if timeout is not None: # Poller object expects milliseconds timeout *= 1000 poller = select.poll() poller.register(fdobj, event) try: # TODO: If the main thread receives a signal and we have no timeout, we # could wait forever. This should check a global "quit" flag or something # every so often. io_events = poller.poll(timeout) except select.error, err: if err[0] != errno.EINTR: raise io_events = [] if io_events and io_events[0][1] & check: return io_events[0][1] else: return None class FdConditionWaiterHelper(object): """Retry helper for WaitForFdCondition. This class contains the retried and wait functions that make sure WaitForFdCondition can continue waiting until the timeout is actually expired. """ def __init__(self, timeout): self.timeout = timeout def Poll(self, fdobj, event): result = SingleWaitForFdCondition(fdobj, event, self.timeout) if result is None: raise RetryAgain() else: return result def UpdateTimeout(self, timeout): self.timeout = timeout def WaitForFdCondition(fdobj, event, timeout): """Waits for a condition to occur on the socket. Retries until the timeout is expired, even if interrupted. @type fdobj: integer or object supporting a fileno() method @param fdobj: entity to wait for events on @type event: integer @param event: ORed condition (see select module) @type timeout: float or None @param timeout: Timeout in seconds @rtype: int or None @return: None for timeout, otherwise occured conditions """ if timeout is not None: retrywaiter = FdConditionWaiterHelper(timeout) try: result = Retry(retrywaiter.Poll, RETRY_REMAINING_TIME, timeout, args=(fdobj, event), wait_fn=retrywaiter.UpdateTimeout) except RetryTimeout: result = None else: result = None while result is None: result = SingleWaitForFdCondition(fdobj, event, timeout) return result def EnsureDaemon(name): """Check for and start daemon if not alive. """ result = RunCmd([pathutils.DAEMON_UTIL, "check-and-start", name]) if result.failed: logging.error("Can't start daemon '%s', failure %s, output: %s", name, result.fail_reason, result.output) return False return True def StopDaemon(name): """Stop daemon """ result = RunCmd([pathutils.DAEMON_UTIL, "stop", name]) if result.failed: logging.error("Can't stop daemon '%s', failure %s, output: %s", name, result.fail_reason, result.output) return False return True def SplitTime(value): """Splits time as floating point number into a tuple. @param value: Time in seconds @type value: int or float @return: Tuple containing (seconds, microseconds) """ (seconds, microseconds) = divmod(int(value * 1000000), 1000000) assert 0 <= seconds, \ "Seconds must be larger than or equal to 0, but are %s" % seconds assert 0 <= microseconds <= 999999, \ "Microseconds must be 0-999999, but are %s" % microseconds return (int(seconds), int(microseconds)) def MergeTime(timetuple): """Merges a tuple into time as a floating point number. @param timetuple: Time as tuple, (seconds, microseconds) @type timetuple: tuple @return: Time as a floating point number expressed in seconds """ (seconds, microseconds) = timetuple assert 0 <= seconds, \ "Seconds must be larger than or equal to 0, but are %s" % seconds assert 0 <= microseconds <= 999999, \ "Microseconds must be 0-999999, but are %s" % microseconds return float(seconds) + (float(microseconds) * 0.000001) def EpochNano(): """Return the current timestamp expressed as number of nanoseconds since the unix epoch @return: nanoseconds since the Unix epoch """ return int(time.time() * 1000000000) def FindMatch(data, name): """Tries to find an item in a dictionary matching a name. Callers have to ensure the data names aren't contradictory (e.g. a regexp that matches a string). If the name isn't a direct key, all regular expression objects in the dictionary are matched against it. @type data: dict @param data: Dictionary containing data @type name: string @param name: Name to look for @rtype: tuple; (value in dictionary, matched groups as list) """ if name in data: return (data[name], []) for key, value in data.items(): # Regex objects if hasattr(key, "match"): m = key.match(name) if m: return (value, list(m.groups())) return None def GetMounts(filename=constants.PROC_MOUNTS): """Returns the list of mounted filesystems. This function is Linux-specific. @param filename: path of mounts file (/proc/mounts by default) @rtype: list of tuples @return: list of mount entries (device, mountpoint, fstype, options) """ # TODO(iustin): investigate non-Linux options (e.g. via mount output) data = [] mountlines = ReadFile(filename).splitlines() for line in mountlines: device, mountpoint, fstype, options, _ = line.split(None, 4) data.append((device, mountpoint, fstype, options)) return data def SignalHandled(signums): """Signal Handled decoration. This special decorator installs a signal handler and then calls the target function. The function must accept a 'signal_handlers' keyword argument, which will contain a dict indexed by signal number, with SignalHandler objects as values. The decorator can be safely stacked with iself, to handle multiple signals with different handlers. @type signums: list @param signums: signals to intercept """ def wrap(fn): def sig_function(*args, **kwargs): assert "signal_handlers" not in kwargs or \ kwargs["signal_handlers"] is None or \ isinstance(kwargs["signal_handlers"], dict), \ "Wrong signal_handlers parameter in original function call" if "signal_handlers" in kwargs and kwargs["signal_handlers"] is not None: signal_handlers = kwargs["signal_handlers"] else: signal_handlers = {} kwargs["signal_handlers"] = signal_handlers sighandler = SignalHandler(signums) try: for sig in signums: signal_handlers[sig] = sighandler return fn(*args, **kwargs) finally: sighandler.Reset() return sig_function return wrap def TimeoutExpired(epoch, timeout, _time_fn=time.time): """Checks whether a timeout has expired. """ return _time_fn() > (epoch + timeout) class SignalWakeupFd(object): try: # This is only supported in Python 2.5 and above (some distributions # backported it to Python 2.4) _set_wakeup_fd_fn = signal.set_wakeup_fd except AttributeError: # Not supported def _SetWakeupFd(self, _): # pylint: disable=R0201 return -1 else: def _SetWakeupFd(self, fd): return self._set_wakeup_fd_fn(fd) def __init__(self): """Initializes this class. """ (read_fd, write_fd) = os.pipe() # Once these succeeded, the file descriptors will be closed automatically. # Buffer size 0 is important, otherwise .read() with a specified length # might buffer data and the file descriptors won't be marked readable. self._read_fh = os.fdopen(read_fd, "r", 0) self._write_fh = os.fdopen(write_fd, "w", 0) self._previous = self._SetWakeupFd(self._write_fh.fileno()) # Utility functions self.fileno = self._read_fh.fileno self.read = self._read_fh.read def Reset(self): """Restores the previous wakeup file descriptor. """ if hasattr(self, "_previous") and self._previous is not None: self._SetWakeupFd(self._previous) self._previous = None def Notify(self): """Notifies the wakeup file descriptor. """ self._write_fh.write(chr(0)) def __del__(self): """Called before object deletion. """ self.Reset() class SignalHandler(object): """Generic signal handler class. It automatically restores the original handler when deconstructed or when L{Reset} is called. You can either pass your own handler function in or query the L{called} attribute to detect whether the signal was sent. @type signum: list @ivar signum: the signals we handle @type called: boolean @ivar called: tracks whether any of the signals have been raised """ def __init__(self, signum, handler_fn=None, wakeup=None): """Constructs a new SignalHandler instance. @type signum: int or list of ints @param signum: Single signal number or set of signal numbers @type handler_fn: callable @param handler_fn: Signal handling function """ assert handler_fn is None or callable(handler_fn) self.signum = set(signum) self.called = False self._handler_fn = handler_fn self._wakeup = wakeup self._previous = {} try: for signum in self.signum: # Setup handler prev_handler = signal.signal(signum, self._HandleSignal) try: self._previous[signum] = prev_handler except: # Restore previous handler signal.signal(signum, prev_handler) raise except: # Reset all handlers self.Reset() # Here we have a race condition: a handler may have already been called, # but there's not much we can do about it at this point. raise def __del__(self): self.Reset() def Reset(self): """Restore previous handler. This will reset all the signals to their previous handlers. """ for signum, prev_handler in self._previous.items(): signal.signal(signum, prev_handler) # If successful, remove from dict del self._previous[signum] def Clear(self): """Unsets the L{called} flag. This function can be used in case a signal may arrive several times. """ self.called = False def _HandleSignal(self, signum, frame): """Actual signal handling function. """ # This is not nice and not absolutely atomic, but it appears to be the only # solution in Python -- there are no atomic types. self.called = True if self._wakeup: # Notify whoever is interested in signals self._wakeup.Notify() if self._handler_fn: self._handler_fn(signum, frame) class FieldSet(object): """A simple field set. Among the features are: - checking if a string is among a list of static string or regex objects - checking if a whole list of string matches - returning the matching groups from a regex match Internally, all fields are held as regular expression objects. """ def __init__(self, *items): self.items = [re.compile("^%s$" % value) for value in items] def Extend(self, other_set): """Extend the field set with the items from another one""" self.items.extend(other_set.items) def Matches(self, field): """Checks if a field matches the current set @type field: str @param field: the string to match @return: either None or a regular expression match object """ for m in itertools.ifilter(None, (val.match(field) for val in self.items)): return m return None def NonMatching(self, items): """Returns the list of fields not matching the current set @type items: list @param items: the list of fields to check @rtype: list @return: list of non-matching fields """ return [val for val in items if not self.Matches(val)] def ValidateDeviceNames(kind, container): """Validate instance device names. Check that a device container contains only unique and valid names. @type kind: string @param kind: One-word item description @type container: list @param container: Container containing the devices """ valid = [] for device in container: if isinstance(device, dict): if kind == "NIC": name = device.get(constants.INIC_NAME, None) elif kind == "disk": name = device.get(constants.IDISK_NAME, None) else: raise errors.OpPrereqError("Invalid container kind '%s'" % kind, errors.ECODE_INVAL) else: name = device.name # Check that a device name is not the UUID of another device valid.append(device.uuid) try: int(name) except (ValueError, TypeError): pass else: raise errors.OpPrereqError("Invalid name '%s'. Purely numeric %s names" " are not allowed" % (name, kind), errors.ECODE_INVAL) if name is not None and name.lower() != constants.VALUE_NONE: if name in valid: raise errors.OpPrereqError("%s name '%s' already used" % (kind, name), errors.ECODE_NOTUNIQUE) else: valid.append(name) ganeti-2.9.3/lib/utils/process.py0000644000000000000000000007350412271422343016741 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utility functions for processes. """ import os import sys import subprocess import errno import select import logging import signal import resource from cStringIO import StringIO from ganeti import errors from ganeti import constants from ganeti import compat from ganeti.utils import retry as utils_retry from ganeti.utils import wrapper as utils_wrapper from ganeti.utils import text as utils_text from ganeti.utils import io as utils_io from ganeti.utils import algo as utils_algo #: when set to True, L{RunCmd} is disabled _no_fork = False (_TIMEOUT_NONE, _TIMEOUT_TERM, _TIMEOUT_KILL) = range(3) def DisableFork(): """Disables the use of fork(2). """ global _no_fork # pylint: disable=W0603 _no_fork = True class RunResult(object): """Holds the result of running external programs. @type exit_code: int @ivar exit_code: the exit code of the program, or None (if the program didn't exit()) @type signal: int or None @ivar signal: the signal that caused the program to finish, or None (if the program wasn't terminated by a signal) @type stdout: str @ivar stdout: the standard output of the program @type stderr: str @ivar stderr: the standard error of the program @type failed: boolean @ivar failed: True in case the program was terminated by a signal or exited with a non-zero exit code @ivar fail_reason: a string detailing the termination reason """ __slots__ = ["exit_code", "signal", "stdout", "stderr", "failed", "fail_reason", "cmd"] def __init__(self, exit_code, signal_, stdout, stderr, cmd, timeout_action, timeout): self.cmd = cmd self.exit_code = exit_code self.signal = signal_ self.stdout = stdout self.stderr = stderr self.failed = (signal_ is not None or exit_code != 0) fail_msgs = [] if self.signal is not None: fail_msgs.append("terminated by signal %s" % self.signal) elif self.exit_code is not None: fail_msgs.append("exited with exit code %s" % self.exit_code) else: fail_msgs.append("unable to determine termination reason") if timeout_action == _TIMEOUT_TERM: fail_msgs.append("terminated after timeout of %.2f seconds" % timeout) elif timeout_action == _TIMEOUT_KILL: fail_msgs.append(("force termination after timeout of %.2f seconds" " and linger for another %.2f seconds") % (timeout, constants.CHILD_LINGER_TIMEOUT)) if fail_msgs and self.failed: self.fail_reason = utils_text.CommaJoin(fail_msgs) else: self.fail_reason = None if self.failed: logging.debug("Command '%s' failed (%s); output: %s", self.cmd, self.fail_reason, self.output) def _GetOutput(self): """Returns the combined stdout and stderr for easier usage. """ return self.stdout + self.stderr output = property(_GetOutput, None, None, "Return full output") def _BuildCmdEnvironment(env, reset): """Builds the environment for an external program. """ if reset: cmd_env = {} else: cmd_env = os.environ.copy() cmd_env["LC_ALL"] = "C" if env is not None: cmd_env.update(env) return cmd_env def RunCmd(cmd, env=None, output=None, cwd="/", reset_env=False, interactive=False, timeout=None, noclose_fds=None, input_fd=None, postfork_fn=None): """Execute a (shell) command. The command should not read from its standard input, as it will be closed. @type cmd: string or list @param cmd: Command to run @type env: dict @param env: Additional environment variables @type output: str @param output: if desired, the output of the command can be saved in a file instead of the RunResult instance; this parameter denotes the file name (if not None) @type cwd: string @param cwd: if specified, will be used as the working directory for the command; the default will be / @type reset_env: boolean @param reset_env: whether to reset or keep the default os environment @type interactive: boolean @param interactive: whether we pipe stdin, stdout and stderr (default behaviour) or run the command interactive @type timeout: int @param timeout: If not None, timeout in seconds until child process gets killed @type noclose_fds: list @param noclose_fds: list of additional (fd >=3) file descriptors to leave open for the child process @type input_fd: C{file}-like object or numeric file descriptor @param input_fd: File descriptor for process' standard input @type postfork_fn: Callable receiving PID as parameter @param postfork_fn: Callback run after fork but before timeout @rtype: L{RunResult} @return: RunResult instance @raise errors.ProgrammerError: if we call this when forks are disabled """ if _no_fork: raise errors.ProgrammerError("utils.RunCmd() called with fork() disabled") if output and interactive: raise errors.ProgrammerError("Parameters 'output' and 'interactive' can" " not be provided at the same time") if not (output is None or input_fd is None): # The current logic in "_RunCmdFile", which is used when output is defined, # does not support input files (not hard to implement, though) raise errors.ProgrammerError("Parameters 'output' and 'input_fd' can" " not be used at the same time") if isinstance(cmd, basestring): strcmd = cmd shell = True else: cmd = [str(val) for val in cmd] strcmd = utils_text.ShellQuoteArgs(cmd) shell = False if output: logging.info("RunCmd %s, output file '%s'", strcmd, output) else: logging.info("RunCmd %s", strcmd) cmd_env = _BuildCmdEnvironment(env, reset_env) try: if output is None: out, err, status, timeout_action = _RunCmdPipe(cmd, cmd_env, shell, cwd, interactive, timeout, noclose_fds, input_fd, postfork_fn=postfork_fn) else: if postfork_fn: raise errors.ProgrammerError("postfork_fn is not supported if output" " should be captured") assert input_fd is None timeout_action = _TIMEOUT_NONE status = _RunCmdFile(cmd, cmd_env, shell, output, cwd, noclose_fds) out = err = "" except OSError, err: if err.errno == errno.ENOENT: raise errors.OpExecError("Can't execute '%s': not found (%s)" % (strcmd, err)) else: raise if status >= 0: exitcode = status signal_ = None else: exitcode = None signal_ = -status return RunResult(exitcode, signal_, out, err, strcmd, timeout_action, timeout) def SetupDaemonEnv(cwd="/", umask=077): """Setup a daemon's environment. This should be called between the first and second fork, due to setsid usage. @param cwd: the directory to which to chdir @param umask: the umask to setup """ os.chdir(cwd) os.umask(umask) os.setsid() def SetupDaemonFDs(output_file, output_fd): """Setups up a daemon's file descriptors. @param output_file: if not None, the file to which to redirect stdout/stderr @param output_fd: if not None, the file descriptor for stdout/stderr """ # check that at most one is defined assert [output_file, output_fd].count(None) >= 1 # Open /dev/null (read-only, only for stdin) devnull_fd = os.open(os.devnull, os.O_RDONLY) output_close = True if output_fd is not None: output_close = False elif output_file is not None: # Open output file try: output_fd = os.open(output_file, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0600) except EnvironmentError, err: raise Exception("Opening output file failed: %s" % err) else: output_fd = os.open(os.devnull, os.O_WRONLY) # Redirect standard I/O os.dup2(devnull_fd, 0) os.dup2(output_fd, 1) os.dup2(output_fd, 2) if devnull_fd > 2: utils_wrapper.CloseFdNoError(devnull_fd) if output_close and output_fd > 2: utils_wrapper.CloseFdNoError(output_fd) def StartDaemon(cmd, env=None, cwd="/", output=None, output_fd=None, pidfile=None): """Start a daemon process after forking twice. @type cmd: string or list @param cmd: Command to run @type env: dict @param env: Additional environment variables @type cwd: string @param cwd: Working directory for the program @type output: string @param output: Path to file in which to save the output @type output_fd: int @param output_fd: File descriptor for output @type pidfile: string @param pidfile: Process ID file @rtype: int @return: Daemon process ID @raise errors.ProgrammerError: if we call this when forks are disabled """ if _no_fork: raise errors.ProgrammerError("utils.StartDaemon() called with fork()" " disabled") if output and not (bool(output) ^ (output_fd is not None)): raise errors.ProgrammerError("Only one of 'output' and 'output_fd' can be" " specified") if isinstance(cmd, basestring): cmd = ["/bin/sh", "-c", cmd] strcmd = utils_text.ShellQuoteArgs(cmd) if output: logging.debug("StartDaemon %s, output file '%s'", strcmd, output) else: logging.debug("StartDaemon %s", strcmd) cmd_env = _BuildCmdEnvironment(env, False) # Create pipe for sending PID back (pidpipe_read, pidpipe_write) = os.pipe() try: try: # Create pipe for sending error messages (errpipe_read, errpipe_write) = os.pipe() try: try: # First fork pid = os.fork() if pid == 0: try: # Child process, won't return _StartDaemonChild(errpipe_read, errpipe_write, pidpipe_read, pidpipe_write, cmd, cmd_env, cwd, output, output_fd, pidfile) finally: # Well, maybe child process failed os._exit(1) # pylint: disable=W0212 finally: utils_wrapper.CloseFdNoError(errpipe_write) # Wait for daemon to be started (or an error message to # arrive) and read up to 100 KB as an error message errormsg = utils_wrapper.RetryOnSignal(os.read, errpipe_read, 100 * 1024) finally: utils_wrapper.CloseFdNoError(errpipe_read) finally: utils_wrapper.CloseFdNoError(pidpipe_write) # Read up to 128 bytes for PID pidtext = utils_wrapper.RetryOnSignal(os.read, pidpipe_read, 128) finally: utils_wrapper.CloseFdNoError(pidpipe_read) # Try to avoid zombies by waiting for child process try: os.waitpid(pid, 0) except OSError: pass if errormsg: raise errors.OpExecError("Error when starting daemon process: %r" % errormsg) try: return int(pidtext) except (ValueError, TypeError), err: raise errors.OpExecError("Error while trying to parse PID %r: %s" % (pidtext, err)) def _StartDaemonChild(errpipe_read, errpipe_write, pidpipe_read, pidpipe_write, args, env, cwd, output, fd_output, pidfile): """Child process for starting daemon. """ try: # Close parent's side utils_wrapper.CloseFdNoError(errpipe_read) utils_wrapper.CloseFdNoError(pidpipe_read) # First child process SetupDaemonEnv() # And fork for the second time pid = os.fork() if pid != 0: # Exit first child process os._exit(0) # pylint: disable=W0212 # Make sure pipe is closed on execv* (and thereby notifies # original process) utils_wrapper.SetCloseOnExecFlag(errpipe_write, True) # List of file descriptors to be left open noclose_fds = [errpipe_write] # Open PID file if pidfile: fd_pidfile = utils_io.WritePidFile(pidfile) # Keeping the file open to hold the lock noclose_fds.append(fd_pidfile) utils_wrapper.SetCloseOnExecFlag(fd_pidfile, False) else: fd_pidfile = None SetupDaemonFDs(output, fd_output) # Send daemon PID to parent utils_wrapper.RetryOnSignal(os.write, pidpipe_write, str(os.getpid())) # Close all file descriptors except stdio and error message pipe CloseFDs(noclose_fds=noclose_fds) # Change working directory os.chdir(cwd) if env is None: os.execvp(args[0], args) else: os.execvpe(args[0], args, env) except: # pylint: disable=W0702 try: # Report errors to original process WriteErrorToFD(errpipe_write, str(sys.exc_info()[1])) except: # pylint: disable=W0702 # Ignore errors in error handling pass os._exit(1) # pylint: disable=W0212 def WriteErrorToFD(fd, err): """Possibly write an error message to a fd. @type fd: None or int (file descriptor) @param fd: if not None, the error will be written to this fd @param err: string, the error message """ if fd is None: return if not err: err = "" utils_wrapper.RetryOnSignal(os.write, fd, err) def _CheckIfAlive(child): """Raises L{utils_retry.RetryAgain} if child is still alive. @raises utils_retry.RetryAgain: If child is still alive """ if child.poll() is None: raise utils_retry.RetryAgain() def _WaitForProcess(child, timeout): """Waits for the child to terminate or until we reach timeout. """ try: utils_retry.Retry(_CheckIfAlive, (1.0, 1.2, 5.0), max(0, timeout), args=[child]) except utils_retry.RetryTimeout: pass def _RunCmdPipe(cmd, env, via_shell, cwd, interactive, timeout, noclose_fds, input_fd, postfork_fn=None, _linger_timeout=constants.CHILD_LINGER_TIMEOUT): """Run a command and return its output. @type cmd: string or list @param cmd: Command to run @type env: dict @param env: The environment to use @type via_shell: bool @param via_shell: if we should run via the shell @type cwd: string @param cwd: the working directory for the program @type interactive: boolean @param interactive: Run command interactive (without piping) @type timeout: int @param timeout: Timeout after the programm gets terminated @type noclose_fds: list @param noclose_fds: list of additional (fd >=3) file descriptors to leave open for the child process @type input_fd: C{file}-like object or numeric file descriptor @param input_fd: File descriptor for process' standard input @type postfork_fn: Callable receiving PID as parameter @param postfork_fn: Function run after fork but before timeout @rtype: tuple @return: (out, err, status) """ poller = select.poll() if interactive: stderr = None stdout = None else: stderr = subprocess.PIPE stdout = subprocess.PIPE if input_fd: stdin = input_fd elif interactive: stdin = None else: stdin = subprocess.PIPE if noclose_fds: preexec_fn = lambda: CloseFDs(noclose_fds) close_fds = False else: preexec_fn = None close_fds = True child = subprocess.Popen(cmd, shell=via_shell, stderr=stderr, stdout=stdout, stdin=stdin, close_fds=close_fds, env=env, cwd=cwd, preexec_fn=preexec_fn) if postfork_fn: postfork_fn(child.pid) out = StringIO() err = StringIO() linger_timeout = None if timeout is None: poll_timeout = None else: poll_timeout = utils_algo.RunningTimeout(timeout, True).Remaining msg_timeout = ("Command %s (%d) run into execution timeout, terminating" % (cmd, child.pid)) msg_linger = ("Command %s (%d) run into linger timeout, killing" % (cmd, child.pid)) timeout_action = _TIMEOUT_NONE # subprocess: "If the stdin argument is PIPE, this attribute is a file object # that provides input to the child process. Otherwise, it is None." assert (stdin == subprocess.PIPE) ^ (child.stdin is None), \ "subprocess' stdin did not behave as documented" if not interactive: if child.stdin is not None: child.stdin.close() poller.register(child.stdout, select.POLLIN) poller.register(child.stderr, select.POLLIN) fdmap = { child.stdout.fileno(): (out, child.stdout), child.stderr.fileno(): (err, child.stderr), } for fd in fdmap: utils_wrapper.SetNonblockFlag(fd, True) while fdmap: if poll_timeout: pt = poll_timeout() * 1000 if pt < 0: if linger_timeout is None: logging.warning(msg_timeout) if child.poll() is None: timeout_action = _TIMEOUT_TERM utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid, signal.SIGTERM) linger_timeout = \ utils_algo.RunningTimeout(_linger_timeout, True).Remaining pt = linger_timeout() * 1000 if pt < 0: break else: pt = None pollresult = utils_wrapper.RetryOnSignal(poller.poll, pt) for fd, event in pollresult: if event & select.POLLIN or event & select.POLLPRI: data = fdmap[fd][1].read() # no data from read signifies EOF (the same as POLLHUP) if not data: poller.unregister(fd) del fdmap[fd] continue fdmap[fd][0].write(data) if (event & select.POLLNVAL or event & select.POLLHUP or event & select.POLLERR): poller.unregister(fd) del fdmap[fd] if timeout is not None: assert callable(poll_timeout) # We have no I/O left but it might still run if child.poll() is None: _WaitForProcess(child, poll_timeout()) # Terminate if still alive after timeout if child.poll() is None: if linger_timeout is None: logging.warning(msg_timeout) timeout_action = _TIMEOUT_TERM utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid, signal.SIGTERM) lt = _linger_timeout else: lt = linger_timeout() _WaitForProcess(child, lt) # Okay, still alive after timeout and linger timeout? Kill it! if child.poll() is None: timeout_action = _TIMEOUT_KILL logging.warning(msg_linger) utils_wrapper.IgnoreProcessNotFound(os.kill, child.pid, signal.SIGKILL) out = out.getvalue() err = err.getvalue() status = child.wait() return out, err, status, timeout_action def _RunCmdFile(cmd, env, via_shell, output, cwd, noclose_fds): """Run a command and save its output to a file. @type cmd: string or list @param cmd: Command to run @type env: dict @param env: The environment to use @type via_shell: bool @param via_shell: if we should run via the shell @type output: str @param output: the filename in which to save the output @type cwd: string @param cwd: the working directory for the program @type noclose_fds: list @param noclose_fds: list of additional (fd >=3) file descriptors to leave open for the child process @rtype: int @return: the exit status """ fh = open(output, "a") if noclose_fds: preexec_fn = lambda: CloseFDs(noclose_fds + [fh.fileno()]) close_fds = False else: preexec_fn = None close_fds = True try: child = subprocess.Popen(cmd, shell=via_shell, stderr=subprocess.STDOUT, stdout=fh, stdin=subprocess.PIPE, close_fds=close_fds, env=env, cwd=cwd, preexec_fn=preexec_fn) child.stdin.close() status = child.wait() finally: fh.close() return status def RunParts(dir_name, env=None, reset_env=False): """Run Scripts or programs in a directory @type dir_name: string @param dir_name: absolute path to a directory @type env: dict @param env: The environment to use @type reset_env: boolean @param reset_env: whether to reset or keep the default os environment @rtype: list of tuples @return: list of (name, (one of RUNDIR_STATUS), RunResult) """ rr = [] try: dir_contents = utils_io.ListVisibleFiles(dir_name) except OSError, err: logging.warning("RunParts: skipping %s (cannot list: %s)", dir_name, err) return rr for relname in sorted(dir_contents): fname = utils_io.PathJoin(dir_name, relname) if not (constants.EXT_PLUGIN_MASK.match(relname) is not None and utils_wrapper.IsExecutable(fname)): rr.append((relname, constants.RUNPARTS_SKIP, None)) else: try: result = RunCmd([fname], env=env, reset_env=reset_env) except Exception, err: # pylint: disable=W0703 rr.append((relname, constants.RUNPARTS_ERR, str(err))) else: rr.append((relname, constants.RUNPARTS_RUN, result)) return rr def _GetProcStatusPath(pid): """Returns the path for a PID's proc status file. @type pid: int @param pid: Process ID @rtype: string """ return "/proc/%d/status" % pid def IsProcessAlive(pid): """Check if a given pid exists on the system. @note: zombie status is not handled, so zombie processes will be returned as alive @type pid: int @param pid: the process ID to check @rtype: boolean @return: True if the process exists """ def _TryStat(name): try: os.stat(name) return True except EnvironmentError, err: if err.errno in (errno.ENOENT, errno.ENOTDIR): return False elif err.errno == errno.EINVAL: raise utils_retry.RetryAgain(err) raise assert isinstance(pid, int), "pid must be an integer" if pid <= 0: return False # /proc in a multiprocessor environment can have strange behaviors. # Retry the os.stat a few times until we get a good result. try: return utils_retry.Retry(_TryStat, (0.01, 1.5, 0.1), 0.5, args=[_GetProcStatusPath(pid)]) except utils_retry.RetryTimeout, err: err.RaiseInner() def _ParseSigsetT(sigset): """Parse a rendered sigset_t value. This is the opposite of the Linux kernel's fs/proc/array.c:render_sigset_t function. @type sigset: string @param sigset: Rendered signal set from /proc/$pid/status @rtype: set @return: Set of all enabled signal numbers """ result = set() signum = 0 for ch in reversed(sigset): chv = int(ch, 16) # The following could be done in a loop, but it's easier to read and # understand in the unrolled form if chv & 1: result.add(signum + 1) if chv & 2: result.add(signum + 2) if chv & 4: result.add(signum + 3) if chv & 8: result.add(signum + 4) signum += 4 return result def _GetProcStatusField(pstatus, field): """Retrieves a field from the contents of a proc status file. @type pstatus: string @param pstatus: Contents of /proc/$pid/status @type field: string @param field: Name of field whose value should be returned @rtype: string """ for line in pstatus.splitlines(): parts = line.split(":", 1) if len(parts) < 2 or parts[0] != field: continue return parts[1].strip() return None def IsProcessHandlingSignal(pid, signum, status_path=None): """Checks whether a process is handling a signal. @type pid: int @param pid: Process ID @type signum: int @param signum: Signal number @rtype: bool """ if status_path is None: status_path = _GetProcStatusPath(pid) try: proc_status = utils_io.ReadFile(status_path) except EnvironmentError, err: # In at least one case, reading /proc/$pid/status failed with ESRCH. if err.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL, errno.ESRCH): return False raise sigcgt = _GetProcStatusField(proc_status, "SigCgt") if sigcgt is None: raise RuntimeError("%s is missing 'SigCgt' field" % status_path) # Now check whether signal is handled return signum in _ParseSigsetT(sigcgt) def Daemonize(logfile): """Daemonize the current process. This detaches the current process from the controlling terminal and runs it in the background as a daemon. @type logfile: str @param logfile: the logfile to which we should redirect stdout/stderr @rtype: tuple; (int, callable) @return: File descriptor of pipe(2) which must be closed to notify parent process and a callable to reopen log files """ # pylint: disable=W0212 # yes, we really want os._exit # TODO: do another attempt to merge Daemonize and StartDaemon, or at # least abstract the pipe functionality between them # Create pipe for sending error messages (rpipe, wpipe) = os.pipe() # this might fail pid = os.fork() if (pid == 0): # The first child. SetupDaemonEnv() # this might fail pid = os.fork() # Fork a second child. if (pid == 0): # The second child. utils_wrapper.CloseFdNoError(rpipe) else: # exit() or _exit()? See below. os._exit(0) # Exit parent (the first child) of the second child. else: utils_wrapper.CloseFdNoError(wpipe) # Wait for daemon to be started (or an error message to # arrive) and read up to 100 KB as an error message errormsg = utils_wrapper.RetryOnSignal(os.read, rpipe, 100 * 1024) if errormsg: sys.stderr.write("Error when starting daemon process: %r\n" % errormsg) rcode = 1 else: rcode = 0 os._exit(rcode) # Exit parent of the first child. reopen_fn = compat.partial(SetupDaemonFDs, logfile, None) # Open logs for the first time reopen_fn() return (wpipe, reopen_fn) def KillProcess(pid, signal_=signal.SIGTERM, timeout=30, waitpid=False): """Kill a process given by its pid. @type pid: int @param pid: The PID to terminate. @type signal_: int @param signal_: The signal to send, by default SIGTERM @type timeout: int @param timeout: The timeout after which, if the process is still alive, a SIGKILL will be sent. If not positive, no such checking will be done @type waitpid: boolean @param waitpid: If true, we should waitpid on this process after sending signals, since it's our own child and otherwise it would remain as zombie """ def _helper(pid, signal_, wait): """Simple helper to encapsulate the kill/waitpid sequence""" if utils_wrapper.IgnoreProcessNotFound(os.kill, pid, signal_) and wait: try: os.waitpid(pid, os.WNOHANG) except OSError: pass if pid <= 0: # kill with pid=0 == suicide raise errors.ProgrammerError("Invalid pid given '%s'" % pid) if not IsProcessAlive(pid): return _helper(pid, signal_, waitpid) if timeout <= 0: return def _CheckProcess(): if not IsProcessAlive(pid): return try: (result_pid, _) = os.waitpid(pid, os.WNOHANG) except OSError: raise utils_retry.RetryAgain() if result_pid > 0: return raise utils_retry.RetryAgain() try: # Wait up to $timeout seconds utils_retry.Retry(_CheckProcess, (0.01, 1.5, 0.1), timeout) except utils_retry.RetryTimeout: pass if IsProcessAlive(pid): # Kill process if it's still alive _helper(pid, signal.SIGKILL, waitpid) def RunInSeparateProcess(fn, *args): """Runs a function in a separate process. Note: Only boolean return values are supported. @type fn: callable @param fn: Function to be called @rtype: bool @return: Function's result """ pid = os.fork() if pid == 0: # Child process try: # In case the function uses temporary files utils_wrapper.ResetTempfileModule() # Call function result = int(bool(fn(*args))) assert result in (0, 1) except: # pylint: disable=W0702 logging.exception("Error while calling function in separate process") # 0 and 1 are reserved for the return value result = 33 os._exit(result) # pylint: disable=W0212 # Parent process # Avoid zombies and check exit code (_, status) = os.waitpid(pid, 0) if os.WIFSIGNALED(status): exitcode = None signum = os.WTERMSIG(status) else: exitcode = os.WEXITSTATUS(status) signum = None if not (exitcode in (0, 1) and signum is None): raise errors.GenericError("Child program failed (code=%s, signal=%s)" % (exitcode, signum)) return bool(exitcode) def CloseFDs(noclose_fds=None): """Close file descriptors. This closes all file descriptors above 2 (i.e. except stdin/out/err). @type noclose_fds: list or None @param noclose_fds: if given, it denotes a list of file descriptor that should not be closed """ # Default maximum for the number of available file descriptors. if 'SC_OPEN_MAX' in os.sysconf_names: try: MAXFD = os.sysconf('SC_OPEN_MAX') if MAXFD < 0: MAXFD = 1024 except OSError: MAXFD = 1024 else: MAXFD = 1024 maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1] if (maxfd == resource.RLIM_INFINITY): maxfd = MAXFD # Iterate through and close all file descriptors (except the standard ones) for fd in range(3, maxfd): if noclose_fds and fd in noclose_fds: continue utils_wrapper.CloseFdNoError(fd) ganeti-2.9.3/lib/utils/io.py0000644000000000000000000007361412271422343015674 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utility functions for I/O. """ import os import logging import shutil import tempfile import errno import time import stat import grp import pwd from ganeti import errors from ganeti import constants from ganeti import pathutils from ganeti.utils import filelock #: Directory used by fsck(8) to store recovered data, usually at a file #: system's root directory _LOST_AND_FOUND = "lost+found" # Possible values for keep_perms in WriteFile() KP_NEVER = 0 KP_ALWAYS = 1 KP_IF_EXISTS = 2 KEEP_PERMS_VALUES = [ KP_NEVER, KP_ALWAYS, KP_IF_EXISTS, ] def ErrnoOrStr(err): """Format an EnvironmentError exception. If the L{err} argument has an errno attribute, it will be looked up and converted into a textual C{E...} description. Otherwise the string representation of the error will be returned. @type err: L{EnvironmentError} @param err: the exception to format """ if hasattr(err, "errno"): detail = errno.errorcode[err.errno] else: detail = str(err) return detail class FileStatHelper: """Helper to store file handle's C{fstat}. Useful in combination with L{ReadFile}'s C{preread} parameter. """ def __init__(self): """Initializes this class. """ self.st = None def __call__(self, fh): """Calls C{fstat} on file handle. """ self.st = os.fstat(fh.fileno()) def ReadFile(file_name, size=-1, preread=None): """Reads a file. @type size: int @param size: Read at most size bytes (if negative, entire file) @type preread: callable receiving file handle as single parameter @param preread: Function called before file is read @rtype: str @return: the (possibly partial) content of the file """ f = open(file_name, "r") try: if preread: preread(f) return f.read(size) finally: f.close() def WriteFile(file_name, fn=None, data=None, mode=None, uid=-1, gid=-1, atime=None, mtime=None, close=True, dry_run=False, backup=False, prewrite=None, postwrite=None, keep_perms=KP_NEVER): """(Over)write a file atomically. The file_name and either fn (a function taking one argument, the file descriptor, and which should write the data to it) or data (the contents of the file) must be passed. The other arguments are optional and allow setting the file mode, owner and group, and the mtime/atime of the file. If the function doesn't raise an exception, it has succeeded and the target file has the new contents. If the function has raised an exception, an existing target file should be unmodified and the temporary file should be removed. @type file_name: str @param file_name: the target filename @type fn: callable @param fn: content writing function, called with file descriptor as parameter @type data: str @param data: contents of the file @type mode: int @param mode: file mode @type uid: int @param uid: the owner of the file @type gid: int @param gid: the group of the file @type atime: int @param atime: a custom access time to be set on the file @type mtime: int @param mtime: a custom modification time to be set on the file @type close: boolean @param close: whether to close file after writing it @type prewrite: callable @param prewrite: function to be called before writing content @type postwrite: callable @param postwrite: function to be called after writing content @type keep_perms: members of L{KEEP_PERMS_VALUES} @param keep_perms: if L{KP_NEVER} (default), owner, group, and mode are taken from the other parameters; if L{KP_ALWAYS}, owner, group, and mode are copied from the existing file; if L{KP_IF_EXISTS}, owner, group, and mode are taken from the file, and if the file doesn't exist, they are taken from the other parameters. It is an error to pass L{KP_ALWAYS} when the file doesn't exist or when C{uid}, C{gid}, or C{mode} are set to non-default values. @rtype: None or int @return: None if the 'close' parameter evaluates to True, otherwise the file descriptor @raise errors.ProgrammerError: if any of the arguments are not valid """ if not os.path.isabs(file_name): raise errors.ProgrammerError("Path passed to WriteFile is not" " absolute: '%s'" % file_name) if [fn, data].count(None) != 1: raise errors.ProgrammerError("fn or data required") if [atime, mtime].count(None) == 1: raise errors.ProgrammerError("Both atime and mtime must be either" " set or None") if not keep_perms in KEEP_PERMS_VALUES: raise errors.ProgrammerError("Invalid value for keep_perms: %s" % keep_perms) if keep_perms == KP_ALWAYS and (uid != -1 or gid != -1 or mode is not None): raise errors.ProgrammerError("When keep_perms==KP_ALWAYS, 'uid', 'gid'," " and 'mode' cannot be set") if backup and not dry_run and os.path.isfile(file_name): CreateBackup(file_name) if keep_perms == KP_ALWAYS or keep_perms == KP_IF_EXISTS: # os.stat() raises an exception if the file doesn't exist try: file_stat = os.stat(file_name) mode = stat.S_IMODE(file_stat.st_mode) uid = file_stat.st_uid gid = file_stat.st_gid except OSError: if keep_perms == KP_ALWAYS: raise # else: if keeep_perms == KP_IF_EXISTS it's ok if the file doesn't exist # Whether temporary file needs to be removed (e.g. if any error occurs) do_remove = True # Function result result = None (dir_name, base_name) = os.path.split(file_name) (fd, new_name) = tempfile.mkstemp(suffix=".new", prefix=base_name, dir=dir_name) try: try: if uid != -1 or gid != -1: os.chown(new_name, uid, gid) if mode: os.chmod(new_name, mode) if callable(prewrite): prewrite(fd) if data is not None: if isinstance(data, unicode): data = data.encode() assert isinstance(data, str) to_write = len(data) offset = 0 while offset < to_write: written = os.write(fd, buffer(data, offset)) assert written >= 0 assert written <= to_write - offset offset += written assert offset == to_write else: fn(fd) if callable(postwrite): postwrite(fd) os.fsync(fd) if atime is not None and mtime is not None: os.utime(new_name, (atime, mtime)) finally: # Close file unless the file descriptor should be returned if close: os.close(fd) else: result = fd # Rename file to destination name if not dry_run: os.rename(new_name, file_name) # Successful, no need to remove anymore do_remove = False finally: if do_remove: RemoveFile(new_name) return result def GetFileID(path=None, fd=None): """Returns the file 'id', i.e. the dev/inode and mtime information. Either the path to the file or the fd must be given. @param path: the file path @param fd: a file descriptor @return: a tuple of (device number, inode number, mtime) """ if [path, fd].count(None) != 1: raise errors.ProgrammerError("One and only one of fd/path must be given") if fd is None: st = os.stat(path) else: st = os.fstat(fd) return (st.st_dev, st.st_ino, st.st_mtime) def VerifyFileID(fi_disk, fi_ours): """Verifies that two file IDs are matching. Differences in the inode/device are not accepted, but and older timestamp for fi_disk is accepted. @param fi_disk: tuple (dev, inode, mtime) representing the actual file data @param fi_ours: tuple (dev, inode, mtime) representing the last written file data @rtype: boolean """ (d1, i1, m1) = fi_disk (d2, i2, m2) = fi_ours return (d1, i1) == (d2, i2) and m1 <= m2 def SafeWriteFile(file_name, file_id, **kwargs): """Wraper over L{WriteFile} that locks the target file. By keeping the target file locked during WriteFile, we ensure that cooperating writers will safely serialise access to the file. @type file_name: str @param file_name: the target filename @type file_id: tuple @param file_id: a result from L{GetFileID} """ fd = os.open(file_name, os.O_RDONLY | os.O_CREAT) try: filelock.LockFile(fd) if file_id is not None: disk_id = GetFileID(fd=fd) if not VerifyFileID(disk_id, file_id): raise errors.LockError("Cannot overwrite file %s, it has been modified" " since last written" % file_name) return WriteFile(file_name, **kwargs) finally: os.close(fd) def ReadOneLineFile(file_name, strict=False): """Return the first non-empty line from a file. @type strict: boolean @param strict: if True, abort if the file has more than one non-empty line """ file_lines = ReadFile(file_name).splitlines() full_lines = filter(bool, file_lines) if not file_lines or not full_lines: raise errors.GenericError("No data in one-liner file %s" % file_name) elif strict and len(full_lines) > 1: raise errors.GenericError("Too many lines in one-liner file %s" % file_name) return full_lines[0] def RemoveFile(filename): """Remove a file ignoring some errors. Remove a file, ignoring non-existing ones or directories. Other errors are passed. @type filename: str @param filename: the file to be removed """ try: os.unlink(filename) except OSError, err: if err.errno not in (errno.ENOENT, errno.EISDIR): raise def RemoveDir(dirname): """Remove an empty directory. Remove a directory, ignoring non-existing ones. Other errors are passed. This includes the case, where the directory is not empty, so it can't be removed. @type dirname: str @param dirname: the empty directory to be removed """ try: os.rmdir(dirname) except OSError, err: if err.errno != errno.ENOENT: raise def RenameFile(old, new, mkdir=False, mkdir_mode=0750, dir_uid=None, dir_gid=None): """Renames a file. This just creates the very least directory if it does not exist and C{mkdir} is set to true. @type old: string @param old: Original path @type new: string @param new: New path @type mkdir: bool @param mkdir: Whether to create target directory if it doesn't exist @type mkdir_mode: int @param mkdir_mode: Mode for newly created directories @type dir_uid: int @param dir_uid: The uid for the (if fresh created) dir @type dir_gid: int @param dir_gid: The gid for the (if fresh created) dir """ try: return os.rename(old, new) except OSError, err: # In at least one use case of this function, the job queue, directory # creation is very rare. Checking for the directory before renaming is not # as efficient. if mkdir and err.errno == errno.ENOENT: # Create directory and try again dir_path = os.path.dirname(new) MakeDirWithPerm(dir_path, mkdir_mode, dir_uid, dir_gid) return os.rename(old, new) raise def EnforcePermission(path, mode, uid=None, gid=None, must_exist=True, _chmod_fn=os.chmod, _chown_fn=os.chown, _stat_fn=os.stat): """Enforces that given path has given permissions. @param path: The path to the file @param mode: The mode of the file @param uid: The uid of the owner of this file @param gid: The gid of the owner of this file @param must_exist: Specifies if non-existance of path will be an error @param _chmod_fn: chmod function to use (unittest only) @param _chown_fn: chown function to use (unittest only) """ logging.debug("Checking %s", path) # chown takes -1 if you want to keep one part of the ownership, however # None is Python standard for that. So we remap them here. if uid is None: uid = -1 if gid is None: gid = -1 try: st = _stat_fn(path) fmode = stat.S_IMODE(st[stat.ST_MODE]) if fmode != mode: logging.debug("Changing mode of %s from %#o to %#o", path, fmode, mode) _chmod_fn(path, mode) if max(uid, gid) > -1: fuid = st[stat.ST_UID] fgid = st[stat.ST_GID] if fuid != uid or fgid != gid: logging.debug("Changing owner of %s from UID %s/GID %s to" " UID %s/GID %s", path, fuid, fgid, uid, gid) _chown_fn(path, uid, gid) except EnvironmentError, err: if err.errno == errno.ENOENT: if must_exist: raise errors.GenericError("Path %s should exist, but does not" % path) else: raise errors.GenericError("Error while changing permissions on %s: %s" % (path, err)) def MakeDirWithPerm(path, mode, uid, gid, _lstat_fn=os.lstat, _mkdir_fn=os.mkdir, _perm_fn=EnforcePermission): """Enforces that given path is a dir and has given mode, uid and gid set. @param path: The path to the file @param mode: The mode of the file @param uid: The uid of the owner of this file @param gid: The gid of the owner of this file @param _lstat_fn: Stat function to use (unittest only) @param _mkdir_fn: mkdir function to use (unittest only) @param _perm_fn: permission setter function to use (unittest only) """ logging.debug("Checking directory %s", path) try: # We don't want to follow symlinks st = _lstat_fn(path) except EnvironmentError, err: if err.errno != errno.ENOENT: raise errors.GenericError("stat(2) on %s failed: %s" % (path, err)) _mkdir_fn(path) else: if not stat.S_ISDIR(st[stat.ST_MODE]): raise errors.GenericError(("Path %s is expected to be a directory, but " "isn't") % path) _perm_fn(path, mode, uid=uid, gid=gid) def Makedirs(path, mode=0750): """Super-mkdir; create a leaf directory and all intermediate ones. This is a wrapper around C{os.makedirs} adding error handling not implemented before Python 2.5. """ try: os.makedirs(path, mode) except OSError, err: # Ignore EEXIST. This is only handled in os.makedirs as included in # Python 2.5 and above. if err.errno != errno.EEXIST or not os.path.exists(path): raise def TimestampForFilename(): """Returns the current time formatted for filenames. The format doesn't contain colons as some shells and applications treat them as separators. Uses the local timezone. """ return time.strftime("%Y-%m-%d_%H_%M_%S") def CreateBackup(file_name): """Creates a backup of a file. @type file_name: str @param file_name: file to be backed up @rtype: str @return: the path to the newly created backup @raise errors.ProgrammerError: for invalid file names """ if not os.path.isfile(file_name): raise errors.ProgrammerError("Can't make a backup of a non-file '%s'" % file_name) prefix = ("%s.backup-%s." % (os.path.basename(file_name), TimestampForFilename())) dir_name = os.path.dirname(file_name) fsrc = open(file_name, "rb") try: (fd, backup_name) = tempfile.mkstemp(prefix=prefix, dir=dir_name) fdst = os.fdopen(fd, "wb") try: logging.debug("Backing up %s at %s", file_name, backup_name) shutil.copyfileobj(fsrc, fdst) finally: fdst.close() finally: fsrc.close() return backup_name def ListVisibleFiles(path, _is_mountpoint=os.path.ismount): """Returns a list of visible files in a directory. @type path: str @param path: the directory to enumerate @rtype: list @return: the list of all files not starting with a dot @raise ProgrammerError: if L{path} is not an absolue and normalized path """ if not IsNormAbsPath(path): raise errors.ProgrammerError("Path passed to ListVisibleFiles is not" " absolute/normalized: '%s'" % path) mountpoint = _is_mountpoint(path) def fn(name): """File name filter. Ignores files starting with a dot (".") as by Unix convention they're considered hidden. The "lost+found" directory found at the root of some filesystems is also hidden. """ return not (name.startswith(".") or (mountpoint and name == _LOST_AND_FOUND and os.path.isdir(os.path.join(path, name)))) return filter(fn, os.listdir(path)) def EnsureDirs(dirs): """Make required directories, if they don't exist. @param dirs: list of tuples (dir_name, dir_mode) @type dirs: list of (string, integer) """ for dir_name, dir_mode in dirs: try: os.mkdir(dir_name, dir_mode) except EnvironmentError, err: if err.errno != errno.EEXIST: raise errors.GenericError("Cannot create needed directory" " '%s': %s" % (dir_name, err)) try: os.chmod(dir_name, dir_mode) except EnvironmentError, err: raise errors.GenericError("Cannot change directory permissions on" " '%s': %s" % (dir_name, err)) if not os.path.isdir(dir_name): raise errors.GenericError("%s is not a directory" % dir_name) def FindFile(name, search_path, test=os.path.exists): """Look for a filesystem object in a given path. This is an abstract method to search for filesystem object (files, dirs) under a given search path. @type name: str @param name: the name to look for @type search_path: str @param search_path: location to start at @type test: callable @param test: a function taking one argument that should return True if the a given object is valid; the default value is os.path.exists, causing only existing files to be returned @rtype: str or None @return: full path to the object if found, None otherwise """ # validate the filename mask if constants.EXT_PLUGIN_MASK.match(name) is None: logging.critical("Invalid value passed for external script name: '%s'", name) return None for dir_name in search_path: # FIXME: investigate switch to PathJoin item_name = os.path.sep.join([dir_name, name]) # check the user test and that we're indeed resolving to the given # basename if test(item_name) and os.path.basename(item_name) == name: return item_name return None def IsNormAbsPath(path): """Check whether a path is absolute and also normalized This avoids things like /dir/../../other/path to be valid. """ return os.path.normpath(path) == path and os.path.isabs(path) def IsBelowDir(root, other_path): """Check whether a path is below a root dir. This works around the nasty byte-byte comparison of commonprefix. """ if not (os.path.isabs(root) and os.path.isabs(other_path)): raise ValueError("Provided paths '%s' and '%s' are not absolute" % (root, other_path)) norm_other = os.path.normpath(other_path) if norm_other == os.sep: # The root directory can never be below another path return False norm_root = os.path.normpath(root) if norm_root == os.sep: # This is the root directory, no need to add another slash prepared_root = norm_root else: prepared_root = "%s%s" % (norm_root, os.sep) return os.path.commonprefix([prepared_root, norm_other]) == prepared_root def PathJoin(*args): """Safe-join a list of path components. Requirements: - the first argument must be an absolute path - no component in the path must have backtracking (e.g. /../), since we check for normalization at the end @param args: the path components to be joined @raise ValueError: for invalid paths """ # ensure we're having at least one path passed in assert args # ensure the first component is an absolute and normalized path name root = args[0] if not IsNormAbsPath(root): raise ValueError("Invalid parameter to PathJoin: '%s'" % str(args[0])) result = os.path.join(*args) # ensure that the whole path is normalized if not IsNormAbsPath(result): raise ValueError("Invalid parameters to PathJoin: '%s'" % str(args)) # check that we're still under the original prefix if not IsBelowDir(root, result): raise ValueError("Error: path joining resulted in different prefix" " (%s != %s)" % (result, root)) return result def TailFile(fname, lines=20): """Return the last lines from a file. @note: this function will only read and parse the last 4KB of the file; if the lines are very long, it could be that less than the requested number of lines are returned @param fname: the file name @type lines: int @param lines: the (maximum) number of lines to return """ fd = open(fname, "r") try: fd.seek(0, 2) pos = fd.tell() pos = max(0, pos - 4096) fd.seek(pos, 0) raw_data = fd.read() finally: fd.close() rows = raw_data.splitlines() return rows[-lines:] def BytesToMebibyte(value): """Converts bytes to mebibytes. @type value: int @param value: Value in bytes @rtype: int @return: Value in mebibytes """ return int(round(value / (1024.0 * 1024.0), 0)) def CalculateDirectorySize(path): """Calculates the size of a directory recursively. @type path: string @param path: Path to directory @rtype: int @return: Size in mebibytes """ size = 0 for (curpath, _, files) in os.walk(path): for filename in files: st = os.lstat(PathJoin(curpath, filename)) size += st.st_size return BytesToMebibyte(size) def GetFilesystemStats(path): """Returns the total and free space on a filesystem. @type path: string @param path: Path on filesystem to be examined @rtype: int @return: tuple of (Total space, Free space) in mebibytes """ st = os.statvfs(path) fsize = BytesToMebibyte(st.f_bavail * st.f_frsize) tsize = BytesToMebibyte(st.f_blocks * st.f_frsize) return (tsize, fsize) def ReadPidFile(pidfile): """Read a pid from a file. @type pidfile: string @param pidfile: path to the file containing the pid @rtype: int @return: The process id, if the file exists and contains a valid PID, otherwise 0 """ try: raw_data = ReadOneLineFile(pidfile) except EnvironmentError, err: if err.errno != errno.ENOENT: logging.exception("Can't read pid file") return 0 return _ParsePidFileContents(raw_data) def _ParsePidFileContents(data): """Tries to extract a process ID from a PID file's content. @type data: string @rtype: int @return: Zero if nothing could be read, PID otherwise """ try: pid = int(data) except (TypeError, ValueError): logging.info("Can't parse pid file contents", exc_info=True) return 0 else: return pid def ReadLockedPidFile(path): """Reads a locked PID file. This can be used together with L{utils.process.StartDaemon}. @type path: string @param path: Path to PID file @return: PID as integer or, if file was unlocked or couldn't be opened, None """ try: fd = os.open(path, os.O_RDONLY) except EnvironmentError, err: if err.errno == errno.ENOENT: # PID file doesn't exist return None raise try: try: # Try to acquire lock filelock.LockFile(fd) except errors.LockError: # Couldn't lock, daemon is running return int(os.read(fd, 100)) finally: os.close(fd) return None def _SplitSshKey(key): """Splits a line for SSH's C{authorized_keys} file. If the line has no options (e.g. no C{command="..."}), only the significant parts, the key type and its hash, are used. Otherwise the whole line is used (split at whitespace). @type key: string @param key: Key line @rtype: tuple """ parts = key.split() if parts and parts[0] in constants.SSHAK_ALL: # If the key has no options in front of it, we only want the significant # fields return (False, parts[:2]) else: # Can't properly split the line, so use everything return (True, parts) def AddAuthorizedKey(file_obj, key): """Adds an SSH public key to an authorized_keys file. @type file_obj: str or file handle @param file_obj: path to authorized_keys file @type key: str @param key: string containing key """ key_fields = _SplitSshKey(key) if isinstance(file_obj, basestring): f = open(file_obj, "a+") else: f = file_obj try: nl = True for line in f: # Ignore whitespace changes if _SplitSshKey(line) == key_fields: break nl = line.endswith("\n") else: if not nl: f.write("\n") f.write(key.rstrip("\r\n")) f.write("\n") f.flush() finally: f.close() def RemoveAuthorizedKey(file_name, key): """Removes an SSH public key from an authorized_keys file. @type file_name: str @param file_name: path to authorized_keys file @type key: str @param key: string containing key """ key_fields = _SplitSshKey(key) fd, tmpname = tempfile.mkstemp(dir=os.path.dirname(file_name)) try: out = os.fdopen(fd, "w") try: f = open(file_name, "r") try: for line in f: # Ignore whitespace changes while comparing lines if _SplitSshKey(line) != key_fields: out.write(line) out.flush() os.rename(tmpname, file_name) finally: f.close() finally: out.close() except: RemoveFile(tmpname) raise def DaemonPidFileName(name): """Compute a ganeti pid file absolute path @type name: str @param name: the daemon name @rtype: str @return: the full path to the pidfile corresponding to the given daemon name """ return PathJoin(pathutils.RUN_DIR, "%s.pid" % name) def WritePidFile(pidfile): """Write the current process pidfile. @type pidfile: string @param pidfile: the path to the file to be written @raise errors.LockError: if the pid file already exists and points to a live process @rtype: int @return: the file descriptor of the lock file; do not close this unless you want to unlock the pid file """ # We don't rename nor truncate the file to not drop locks under # existing processes fd_pidfile = os.open(pidfile, os.O_RDWR | os.O_CREAT, 0600) # Lock the PID file (and fail if not possible to do so). Any code # wanting to send a signal to the daemon should try to lock the PID # file before reading it. If acquiring the lock succeeds, the daemon is # no longer running and the signal should not be sent. try: filelock.LockFile(fd_pidfile) except errors.LockError: msg = ["PID file '%s' is already locked by another process" % pidfile] # Try to read PID file pid = _ParsePidFileContents(os.read(fd_pidfile, 100)) if pid > 0: msg.append(", PID read from file is %s" % pid) raise errors.PidFileLockError("".join(msg)) os.write(fd_pidfile, "%d\n" % os.getpid()) return fd_pidfile def ReadWatcherPauseFile(filename, now=None, remove_after=3600): """Reads the watcher pause file. @type filename: string @param filename: Path to watcher pause file @type now: None, float or int @param now: Current time as Unix timestamp @type remove_after: int @param remove_after: Remove watcher pause file after specified amount of seconds past the pause end time """ if now is None: now = time.time() try: value = ReadFile(filename) except IOError, err: if err.errno != errno.ENOENT: raise value = None if value is not None: try: value = int(value) except ValueError: logging.warning(("Watcher pause file (%s) contains invalid value," " removing it"), filename) RemoveFile(filename) value = None if value is not None: # Remove file if it's outdated if now > (value + remove_after): RemoveFile(filename) value = None elif now > value: value = None return value def NewUUID(): """Returns a random UUID. @note: This is a Linux-specific method as it uses the /proc filesystem. @rtype: str """ return ReadFile(constants.RANDOM_UUID_FILE, size=128).rstrip("\n") class TemporaryFileManager(object): """Stores the list of files to be deleted and removes them on demand. """ def __init__(self): self._files = [] def __del__(self): self.Cleanup() def Add(self, filename): """Add file to list of files to be deleted. @type filename: string @param filename: path to filename to be added """ self._files.append(filename) def Remove(self, filename): """Remove file from list of files to be deleted. @type filename: string @param filename: path to filename to be deleted """ self._files.remove(filename) def Cleanup(self): """Delete all files marked for deletion """ while self._files: RemoveFile(self._files.pop()) def IsUserInGroup(uid, gid): """Returns True if the user belongs to the group. @type uid: int @param uid: the user id @type gid: int @param gid: the group id @rtype: bool """ user = pwd.getpwuid(uid) group = grp.getgrgid(gid) return user.pw_gid == gid or user.pw_name in group.gr_mem def CanRead(username, filename): """Returns True if the user can access (read) the file. @type username: string @param username: the name of the user @type filename: string @param filename: the name of the file @rtype: bool """ filestats = os.stat(filename) user = pwd.getpwnam(username) uid = user.pw_uid user_readable = filestats.st_mode & stat.S_IRUSR != 0 group_readable = filestats.st_mode & stat.S_IRGRP != 0 return ((filestats.st_uid == uid and user_readable) or (filestats.st_uid != uid and IsUserInGroup(uid, filestats.st_gid) and group_readable)) ganeti-2.9.3/lib/utils/filelock.py0000644000000000000000000001206112244641676017056 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utility functions for file-based locks. """ import fcntl import errno import os import logging from ganeti import errors from ganeti.utils import retry def LockFile(fd): """Locks a file using POSIX locks. @type fd: int @param fd: the file descriptor we need to lock """ try: fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError, err: if err.errno == errno.EAGAIN: raise errors.LockError("File already locked") raise class FileLock(object): """Utility class for file locks. """ def __init__(self, fd, filename): """Constructor for FileLock. @type fd: file @param fd: File object @type filename: str @param filename: Path of the file opened at I{fd} """ self.fd = fd self.filename = filename @classmethod def Open(cls, filename): """Creates and opens a file to be used as a file-based lock. @type filename: string @param filename: path to the file to be locked """ # Using "os.open" is necessary to allow both opening existing file # read/write and creating if not existing. Vanilla "open" will truncate an # existing file -or- allow creating if not existing. return cls(os.fdopen(os.open(filename, os.O_RDWR | os.O_CREAT, 0664), "w+"), filename) def __del__(self): self.Close() def Close(self): """Close the file and release the lock. """ if hasattr(self, "fd") and self.fd: self.fd.close() self.fd = None def _flock(self, flag, blocking, timeout, errmsg): """Wrapper for fcntl.flock. @type flag: int @param flag: operation flag @type blocking: bool @param blocking: whether the operation should be done in blocking mode. @type timeout: None or float @param timeout: for how long the operation should be retried (implies non-blocking mode). @type errmsg: string @param errmsg: error message in case operation fails. """ assert self.fd, "Lock was closed" assert timeout is None or timeout >= 0, \ "If specified, timeout must be positive" assert not (flag & fcntl.LOCK_NB), "LOCK_NB must not be set" # When a timeout is used, LOCK_NB must always be set if not (timeout is None and blocking): flag |= fcntl.LOCK_NB if timeout is None: self._Lock(self.fd, flag, timeout) else: try: retry.Retry(self._Lock, (0.1, 1.2, 1.0), timeout, args=(self.fd, flag, timeout)) except retry.RetryTimeout: raise errors.LockError(errmsg) @staticmethod def _Lock(fd, flag, timeout): try: fcntl.flock(fd, flag) except IOError, err: if timeout is not None and err.errno == errno.EAGAIN: raise retry.RetryAgain() logging.exception("fcntl.flock failed") raise def Exclusive(self, blocking=False, timeout=None): """Locks the file in exclusive mode. @type blocking: boolean @param blocking: whether to block and wait until we can lock the file or return immediately @type timeout: int or None @param timeout: if not None, the duration to wait for the lock (in blocking mode) """ self._flock(fcntl.LOCK_EX, blocking, timeout, "Failed to lock %s in exclusive mode" % self.filename) def Shared(self, blocking=False, timeout=None): """Locks the file in shared mode. @type blocking: boolean @param blocking: whether to block and wait until we can lock the file or return immediately @type timeout: int or None @param timeout: if not None, the duration to wait for the lock (in blocking mode) """ self._flock(fcntl.LOCK_SH, blocking, timeout, "Failed to lock %s in shared mode" % self.filename) def Unlock(self, blocking=True, timeout=None): """Unlocks the file. According to C{flock(2)}, unlocking can also be a nonblocking operation:: To make a non-blocking request, include LOCK_NB with any of the above operations. @type blocking: boolean @param blocking: whether to block and wait until we can lock the file or return immediately @type timeout: int or None @param timeout: if not None, the duration to wait for the lock (in blocking mode) """ self._flock(fcntl.LOCK_UN, blocking, timeout, "Failed to unlock %s" % self.filename) ganeti-2.9.3/lib/utils/wrapper.py0000644000000000000000000001253212244641676016751 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utility functions wrapping other functions. """ import sys import time import socket import errno import tempfile import fcntl import os import select import logging def TestDelay(duration): """Sleep for a fixed amount of time. @type duration: float @param duration: the sleep duration, in seconds @rtype: (boolean, str) @return: False for negative value, and an accompanying error message; True otherwise (and msg is None) """ if duration < 0: return False, "Invalid sleep duration" time.sleep(duration) return True, None def CloseFdNoError(fd, retries=5): """Close a file descriptor ignoring errors. @type fd: int @param fd: the file descriptor @type retries: int @param retries: how many retries to make, in case we get any other error than EBADF """ try: os.close(fd) except OSError, err: if err.errno != errno.EBADF: if retries > 0: CloseFdNoError(fd, retries - 1) # else either it's closed already or we're out of retries, so we # ignore this and go on def SetCloseOnExecFlag(fd, enable): """Sets or unsets the close-on-exec flag on a file descriptor. @type fd: int @param fd: File descriptor @type enable: bool @param enable: Whether to set or unset it. """ flags = fcntl.fcntl(fd, fcntl.F_GETFD) if enable: flags |= fcntl.FD_CLOEXEC else: flags &= ~fcntl.FD_CLOEXEC fcntl.fcntl(fd, fcntl.F_SETFD, flags) def SetNonblockFlag(fd, enable): """Sets or unsets the O_NONBLOCK flag on on a file descriptor. @type fd: int @param fd: File descriptor @type enable: bool @param enable: Whether to set or unset it """ flags = fcntl.fcntl(fd, fcntl.F_GETFL) if enable: flags |= os.O_NONBLOCK else: flags &= ~os.O_NONBLOCK fcntl.fcntl(fd, fcntl.F_SETFL, flags) def RetryOnSignal(fn, *args, **kwargs): """Calls a function again if it failed due to EINTR. """ while True: try: return fn(*args, **kwargs) except EnvironmentError, err: if err.errno != errno.EINTR: raise except (socket.error, select.error), err: # In python 2.6 and above select.error is an IOError, so it's handled # above, in 2.5 and below it's not, and it's handled here. if not (err.args and err.args[0] == errno.EINTR): raise def IgnoreProcessNotFound(fn, *args, **kwargs): """Ignores ESRCH when calling a process-related function. ESRCH is raised when a process is not found. @rtype: bool @return: Whether process was found """ try: fn(*args, **kwargs) except EnvironmentError, err: # Ignore ESRCH if err.errno == errno.ESRCH: return False raise return True def IgnoreSignals(fn, *args, **kwargs): """Tries to call a function ignoring failures due to EINTR. """ try: return fn(*args, **kwargs) except EnvironmentError, err: if err.errno == errno.EINTR: return None else: raise except (select.error, socket.error), err: # In python 2.6 and above select.error is an IOError, so it's handled # above, in 2.5 and below it's not, and it's handled here. if err.args and err.args[0] == errno.EINTR: return None else: raise def GetClosedTempfile(*args, **kwargs): """Creates a temporary file and returns its path. """ (fd, path) = tempfile.mkstemp(*args, **kwargs) CloseFdNoError(fd) return path def IsExecutable(filename): """Checks whether a file exists and is executable. @type filename: string @param filename: Filename @rtype: bool """ return os.path.isfile(filename) and os.access(filename, os.X_OK) def ResetTempfileModule(_time=time.time): """Resets the random name generator of the tempfile module. This function should be called after C{os.fork} in the child process to ensure it creates a newly seeded random generator. Otherwise it would generate the same random parts as the parent process. If several processes race for the creation of a temporary file, this could lead to one not getting a temporary name. """ # pylint: disable=W0212 if ((sys.hexversion >= 0x020703F0 and sys.hexversion < 0x03000000) or sys.hexversion >= 0x030203F0): # Python 2.7 automatically resets the RNG on pid changes (i.e. forking) return try: lock = tempfile._once_lock lock.acquire() try: # Re-seed random name generator if tempfile._name_sequence: tempfile._name_sequence.rng.seed(hash(_time()) ^ os.getpid()) finally: lock.release() except AttributeError: logging.critical("The tempfile module misses at least one of the" " '_once_lock' and '_name_sequence' attributes") ganeti-2.9.3/lib/utils/nodesetup.py0000644000000000000000000000664012244641676017302 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Utility functions for manipulating /etc/hosts. """ from cStringIO import StringIO from ganeti import pathutils from ganeti.utils import algo from ganeti.utils import io def SetEtcHostsEntry(file_name, ip, hostname, aliases): """Sets the name of an IP address and hostname in /etc/hosts. @type file_name: str @param file_name: path to the file to modify (usually C{/etc/hosts}) @type ip: str @param ip: the IP address @type hostname: str @param hostname: the hostname to be added @type aliases: list @param aliases: the list of aliases to add for the hostname """ # Ensure aliases are unique names = algo.UniqueSequence([hostname] + aliases) out = StringIO() def _write_entry(written): if not written: out.write("%s\t%s\n" % (ip, " ".join(names))) return True written = False for line in io.ReadFile(file_name).splitlines(True): fields = line.split() if fields and not fields[0].startswith("#") and ip == fields[0]: written = _write_entry(written) else: out.write(line) _write_entry(written) io.WriteFile(file_name, data=out.getvalue(), uid=0, gid=0, mode=0644, keep_perms=io.KP_IF_EXISTS) def AddHostToEtcHosts(hostname, ip): """Wrapper around SetEtcHostsEntry. @type hostname: str @param hostname: a hostname that will be resolved and added to L{pathutils.ETC_HOSTS} @type ip: str @param ip: The ip address of the host """ SetEtcHostsEntry(pathutils.ETC_HOSTS, ip, hostname, [hostname.split(".")[0]]) def RemoveEtcHostsEntry(file_name, hostname): """Removes a hostname from /etc/hosts. IP addresses without names are removed from the file. @type file_name: str @param file_name: path to the file to modify (usually C{/etc/hosts}) @type hostname: str @param hostname: the hostname to be removed """ out = StringIO() for line in io.ReadFile(file_name).splitlines(True): fields = line.split() if len(fields) > 1 and not fields[0].startswith("#"): names = fields[1:] if hostname in names: while hostname in names: names.remove(hostname) if names: out.write("%s %s\n" % (fields[0], " ".join(names))) continue out.write(line) io.WriteFile(file_name, data=out.getvalue(), uid=0, gid=0, mode=0644, keep_perms=io.KP_IF_EXISTS) def RemoveHostFromEtcHosts(hostname): """Wrapper around RemoveEtcHostsEntry. @type hostname: str @param hostname: hostname that will be resolved and its full and shot name will be removed from L{pathutils.ETC_HOSTS} """ RemoveEtcHostsEntry(pathutils.ETC_HOSTS, hostname) RemoveEtcHostsEntry(pathutils.ETC_HOSTS, hostname.split(".")[0]) ganeti-2.9.3/lib/rpc.py0000644000000000000000000007541312271422343014710 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Inter-node RPC library. """ # pylint: disable=C0103,R0201,R0904 # C0103: Invalid name, since call_ are not valid # R0201: Method could be a function, we keep all rpcs instance methods # as not to change them back and forth between static/instance methods # if they need to start using instance attributes # R0904: Too many public methods import logging import zlib import base64 import pycurl import threading import copy from ganeti import utils from ganeti import objects from ganeti import http from ganeti import serializer from ganeti import constants from ganeti import errors from ganeti import netutils from ganeti import ssconf from ganeti import runtime from ganeti import compat from ganeti import rpc_defs from ganeti import pathutils from ganeti import vcluster # Special module generated at build time from ganeti import _generated_rpc # pylint has a bug here, doesn't see this import import ganeti.http.client # pylint: disable=W0611 _RPC_CLIENT_HEADERS = [ "Content-type: %s" % http.HTTP_APP_JSON, "Expect:", ] #: Special value to describe an offline host _OFFLINE = object() def Init(): """Initializes the module-global HTTP client manager. Must be called before using any RPC function and while exactly one thread is running. """ # curl_global_init(3) and curl_global_cleanup(3) must be called with only # one thread running. This check is just a safety measure -- it doesn't # cover all cases. assert threading.activeCount() == 1, \ "Found more than one active thread when initializing pycURL" logging.info("Using PycURL %s", pycurl.version) pycurl.global_init(pycurl.GLOBAL_ALL) def Shutdown(): """Stops the module-global HTTP client manager. Must be called before quitting the program and while exactly one thread is running. """ pycurl.global_cleanup() def _ConfigRpcCurl(curl): noded_cert = str(pathutils.NODED_CERT_FILE) curl.setopt(pycurl.FOLLOWLOCATION, False) curl.setopt(pycurl.CAINFO, noded_cert) curl.setopt(pycurl.SSL_VERIFYHOST, 0) curl.setopt(pycurl.SSL_VERIFYPEER, True) curl.setopt(pycurl.SSLCERTTYPE, "PEM") curl.setopt(pycurl.SSLCERT, noded_cert) curl.setopt(pycurl.SSLKEYTYPE, "PEM") curl.setopt(pycurl.SSLKEY, noded_cert) curl.setopt(pycurl.CONNECTTIMEOUT, constants.RPC_CONNECT_TIMEOUT) def RunWithRPC(fn): """RPC-wrapper decorator. When applied to a function, it runs it with the RPC system initialized, and it shutsdown the system afterwards. This means the function must be called without RPC being initialized. """ def wrapper(*args, **kwargs): Init() try: return fn(*args, **kwargs) finally: Shutdown() return wrapper def _Compress(data): """Compresses a string for transport over RPC. Small amounts of data are not compressed. @type data: str @param data: Data @rtype: tuple @return: Encoded data to send """ # Small amounts of data are not compressed if len(data) < 512: return (constants.RPC_ENCODING_NONE, data) # Compress with zlib and encode in base64 return (constants.RPC_ENCODING_ZLIB_BASE64, base64.b64encode(zlib.compress(data, 3))) class RpcResult(object): """RPC Result class. This class holds an RPC result. It is needed since in multi-node calls we can't raise an exception just because one out of many failed, and therefore we use this class to encapsulate the result. @ivar data: the data payload, for successful results, or None @ivar call: the name of the RPC call @ivar node: the name of the node to which we made the call @ivar offline: whether the operation failed because the node was offline, as opposed to actual failure; offline=True will always imply failed=True, in order to allow simpler checking if the user doesn't care about the exact failure mode @ivar fail_msg: the error message if the call failed """ def __init__(self, data=None, failed=False, offline=False, call=None, node=None): self.offline = offline self.call = call self.node = node if offline: self.fail_msg = "Node is marked offline" self.data = self.payload = None elif failed: self.fail_msg = self._EnsureErr(data) self.data = self.payload = None else: self.data = data if not isinstance(self.data, (tuple, list)): self.fail_msg = ("RPC layer error: invalid result type (%s)" % type(self.data)) self.payload = None elif len(data) != 2: self.fail_msg = ("RPC layer error: invalid result length (%d), " "expected 2" % len(self.data)) self.payload = None elif not self.data[0]: self.fail_msg = self._EnsureErr(self.data[1]) self.payload = None else: # finally success self.fail_msg = None self.payload = data[1] for attr_name in ["call", "data", "fail_msg", "node", "offline", "payload"]: assert hasattr(self, attr_name), "Missing attribute %s" % attr_name @staticmethod def _EnsureErr(val): """Helper to ensure we return a 'True' value for error.""" if val: return val else: return "No error information" def Raise(self, msg, prereq=False, ecode=None): """If the result has failed, raise an OpExecError. This is used so that LU code doesn't have to check for each result, but instead can call this function. """ if not self.fail_msg: return if not msg: # one could pass None for default message msg = ("Call '%s' to node '%s' has failed: %s" % (self.call, self.node, self.fail_msg)) else: msg = "%s: %s" % (msg, self.fail_msg) if prereq: ec = errors.OpPrereqError else: ec = errors.OpExecError if ecode is not None: args = (msg, ecode) else: args = (msg, ) raise ec(*args) # pylint: disable=W0142 def Warn(self, msg, feedback_fn): """If the result has failed, call the feedback_fn. This is used to in cases were LU wants to warn the user about a failure, but continue anyway. """ if not self.fail_msg: return msg = "%s: %s" % (msg, self.fail_msg) feedback_fn(msg) def _SsconfResolver(ssconf_ips, node_list, _, ssc=ssconf.SimpleStore, nslookup_fn=netutils.Hostname.GetIP): """Return addresses for given node names. @type ssconf_ips: bool @param ssconf_ips: Use the ssconf IPs @type node_list: list @param node_list: List of node names @type ssc: class @param ssc: SimpleStore class that is used to obtain node->ip mappings @type nslookup_fn: callable @param nslookup_fn: function use to do NS lookup @rtype: list of tuple; (string, string) @return: List of tuples containing node name and IP address """ ss = ssc() family = ss.GetPrimaryIPFamily() if ssconf_ips: iplist = ss.GetNodePrimaryIPList() ipmap = dict(entry.split() for entry in iplist) else: ipmap = {} result = [] for node in node_list: ip = ipmap.get(node) if ip is None: ip = nslookup_fn(node, family=family) result.append((node, ip, node)) return result class _StaticResolver: def __init__(self, addresses): """Initializes this class. """ self._addresses = addresses def __call__(self, hosts, _): """Returns static addresses for hosts. """ assert len(hosts) == len(self._addresses) return zip(hosts, self._addresses, hosts) def _CheckConfigNode(node_uuid_or_name, node, accept_offline_node): """Checks if a node is online. @type node_uuid_or_name: string @param node_uuid_or_name: Node UUID @type node: L{objects.Node} or None @param node: Node object """ if node is None: # Assume that the passed parameter was actually a node name, so depend on # DNS for name resolution return (node_uuid_or_name, node_uuid_or_name, node_uuid_or_name) else: if node.offline and not accept_offline_node: ip = _OFFLINE else: ip = node.primary_ip return (node.name, ip, node_uuid_or_name) def _NodeConfigResolver(single_node_fn, all_nodes_fn, node_uuids, opts): """Calculate node addresses using configuration. Note that strings in node_uuids are treated as node names if the UUID is not found in the configuration. """ accept_offline_node = (opts is rpc_defs.ACCEPT_OFFLINE_NODE) assert accept_offline_node or opts is None, "Unknown option" # Special case for single-host lookups if len(node_uuids) == 1: (uuid, ) = node_uuids return [_CheckConfigNode(uuid, single_node_fn(uuid), accept_offline_node)] else: all_nodes = all_nodes_fn() return [_CheckConfigNode(uuid, all_nodes.get(uuid, None), accept_offline_node) for uuid in node_uuids] class _RpcProcessor: def __init__(self, resolver, port, lock_monitor_cb=None): """Initializes this class. @param resolver: callable accepting a list of node UUIDs or hostnames, returning a list of tuples containing name, IP address and original name of the resolved node. IP address can be the name or the special value L{_OFFLINE} to mark offline machines. @type port: int @param port: TCP port @param lock_monitor_cb: Callable for registering with lock monitor """ self._resolver = resolver self._port = port self._lock_monitor_cb = lock_monitor_cb @staticmethod def _PrepareRequests(hosts, port, procedure, body, read_timeout): """Prepares requests by sorting offline hosts into separate list. @type body: dict @param body: a dictionary with per-host body data """ results = {} requests = {} assert isinstance(body, dict) assert len(body) == len(hosts) assert compat.all(isinstance(v, str) for v in body.values()) assert frozenset(map(lambda x: x[2], hosts)) == frozenset(body.keys()), \ "%s != %s" % (hosts, body.keys()) for (name, ip, original_name) in hosts: if ip is _OFFLINE: # Node is marked as offline results[original_name] = RpcResult(node=name, offline=True, call=procedure) else: requests[original_name] = \ http.client.HttpClientRequest(str(ip), port, http.HTTP_POST, str("/%s" % procedure), headers=_RPC_CLIENT_HEADERS, post_data=body[original_name], read_timeout=read_timeout, nicename="%s/%s" % (name, procedure), curl_config_fn=_ConfigRpcCurl) return (results, requests) @staticmethod def _CombineResults(results, requests, procedure): """Combines pre-computed results for offline hosts with actual call results. """ for name, req in requests.items(): if req.success and req.resp_status_code == http.HTTP_OK: host_result = RpcResult(data=serializer.LoadJson(req.resp_body), node=name, call=procedure) else: # TODO: Better error reporting if req.error: msg = req.error else: msg = req.resp_body logging.error("RPC error in %s on node %s: %s", procedure, name, msg) host_result = RpcResult(data=msg, failed=True, node=name, call=procedure) results[name] = host_result return results def __call__(self, nodes, procedure, body, read_timeout, resolver_opts, _req_process_fn=None): """Makes an RPC request to a number of nodes. @type nodes: sequence @param nodes: node UUIDs or Hostnames @type procedure: string @param procedure: Request path @type body: dictionary @param body: dictionary with request bodies per host @type read_timeout: int or None @param read_timeout: Read timeout for request @rtype: dictionary @return: a dictionary mapping host names to rpc.RpcResult objects """ assert read_timeout is not None, \ "Missing RPC read timeout for procedure '%s'" % procedure if _req_process_fn is None: _req_process_fn = http.client.ProcessRequests (results, requests) = \ self._PrepareRequests(self._resolver(nodes, resolver_opts), self._port, procedure, body, read_timeout) _req_process_fn(requests.values(), lock_monitor_cb=self._lock_monitor_cb) assert not frozenset(results).intersection(requests) return self._CombineResults(results, requests, procedure) class _RpcClientBase: def __init__(self, resolver, encoder_fn, lock_monitor_cb=None, _req_process_fn=None): """Initializes this class. """ proc = _RpcProcessor(resolver, netutils.GetDaemonPort(constants.NODED), lock_monitor_cb=lock_monitor_cb) self._proc = compat.partial(proc, _req_process_fn=_req_process_fn) self._encoder = compat.partial(self._EncodeArg, encoder_fn) @staticmethod def _EncodeArg(encoder_fn, (argkind, value)): """Encode argument. """ if argkind is None: return value else: return encoder_fn(argkind)(value) def _Call(self, cdef, node_list, args): """Entry point for automatically generated RPC wrappers. """ (procedure, _, resolver_opts, timeout, argdefs, prep_fn, postproc_fn, _) = cdef if callable(timeout): read_timeout = timeout(args) else: read_timeout = timeout if callable(resolver_opts): req_resolver_opts = resolver_opts(args) else: req_resolver_opts = resolver_opts if len(args) != len(argdefs): raise errors.ProgrammerError("Number of passed arguments doesn't match") enc_args = map(self._encoder, zip(map(compat.snd, argdefs), args)) if prep_fn is None: # for a no-op prep_fn, we serialise the body once, and then we # reuse it in the dictionary values body = serializer.DumpJson(enc_args) pnbody = dict((n, body) for n in node_list) else: # for a custom prep_fn, we pass the encoded arguments and the # node name to the prep_fn, and we serialise its return value assert callable(prep_fn) pnbody = dict((n, serializer.DumpJson(prep_fn(n, enc_args))) for n in node_list) result = self._proc(node_list, procedure, pnbody, read_timeout, req_resolver_opts) if postproc_fn: return dict(map(lambda (key, value): (key, postproc_fn(value)), result.items())) else: return result def _ObjectToDict(value): """Converts an object to a dictionary. @note: See L{objects}. """ return value.ToDict() def _ObjectListToDict(value): """Converts a list of L{objects} to dictionaries. """ return map(_ObjectToDict, value) def _EncodeNodeToDiskDict(value): """Encodes a dictionary with node name as key and disk objects as values. """ return dict((name, _ObjectListToDict(disks)) for name, disks in value.items()) def _PrepareFileUpload(getents_fn, filename): """Loads a file and prepares it for an upload to nodes. """ statcb = utils.FileStatHelper() data = _Compress(utils.ReadFile(filename, preread=statcb)) st = statcb.st if getents_fn is None: getents_fn = runtime.GetEnts getents = getents_fn() virt_filename = vcluster.MakeVirtualPath(filename) return [virt_filename, data, st.st_mode, getents.LookupUid(st.st_uid), getents.LookupGid(st.st_gid), st.st_atime, st.st_mtime] def _PrepareFinalizeExportDisks(snap_disks): """Encodes disks for finalizing export. """ flat_disks = [] for disk in snap_disks: if isinstance(disk, bool): flat_disks.append(disk) else: flat_disks.append(disk.ToDict()) return flat_disks def _EncodeImportExportIO((ieio, ieioargs)): """Encodes import/export I/O information. """ if ieio == constants.IEIO_RAW_DISK: assert len(ieioargs) == 1 return (ieio, (ieioargs[0].ToDict(), )) if ieio == constants.IEIO_SCRIPT: assert len(ieioargs) == 2 return (ieio, (ieioargs[0].ToDict(), ieioargs[1])) return (ieio, ieioargs) def _EncodeBlockdevRename(value): """Encodes information for renaming block devices. """ return [(d.ToDict(), uid) for d, uid in value] def _AddSpindlesToLegacyNodeInfo(result, space_info): """Extracts the spindle information from the space info and adds it to the result dictionary. @type result: dict of strings @param result: dictionary holding the result of the legacy node info @type space_info: list of dicts of strings @param space_info: list, each row holding space information of one storage unit @rtype: None @return: does not return anything, manipulates the C{result} variable """ lvm_pv_info = utils.storage.LookupSpaceInfoByStorageType( space_info, constants.ST_LVM_PV) if lvm_pv_info: result["spindles_free"] = lvm_pv_info["storage_free"] result["spindles_total"] = lvm_pv_info["storage_size"] else: raise errors.OpExecError("No spindle storage information available.") def _AddDefaultStorageInfoToLegacyNodeInfo(result, space_info): """Extracts the storage space information of the default storage type from the space info and adds it to the result dictionary. @see: C{_AddSpindlesToLegacyNodeInfo} for parameter information. """ # Check if there is at least one row for non-spindle storage info. no_defaults = (len(space_info) < 1) or \ (space_info[0]["type"] == constants.ST_LVM_PV and len(space_info) == 1) default_space_info = None if no_defaults: logging.warning("No storage info provided for default storage type.") else: default_space_info = space_info[0] if default_space_info: result["name"] = default_space_info["name"] result["storage_free"] = default_space_info["storage_free"] result["storage_size"] = default_space_info["storage_size"] def MakeLegacyNodeInfo(data, require_spindles=False): """Formats the data returned by L{rpc.RpcRunner.call_node_info}. Converts the data into a single dictionary. This is fine for most use cases, but some require information from more than one volume group or hypervisor. @param require_spindles: add spindle storage information to the legacy node info """ (bootid, space_info, (hv_info, )) = data ret = utils.JoinDisjointDicts(hv_info, {"bootid": bootid}) if require_spindles: _AddSpindlesToLegacyNodeInfo(ret, space_info) _AddDefaultStorageInfoToLegacyNodeInfo(ret, space_info) return ret def _AnnotateDParamsDRBD(disk, (drbd_params, data_params, meta_params)): """Annotates just DRBD disks layouts. """ assert disk.dev_type == constants.DT_DRBD8 disk.params = objects.FillDict(drbd_params, disk.params) (dev_data, dev_meta) = disk.children dev_data.params = objects.FillDict(data_params, dev_data.params) dev_meta.params = objects.FillDict(meta_params, dev_meta.params) return disk def _AnnotateDParamsGeneric(disk, (params, )): """Generic disk parameter annotation routine. """ assert disk.dev_type != constants.DT_DRBD8 disk.params = objects.FillDict(params, disk.params) return disk def AnnotateDiskParams(template, disks, disk_params): """Annotates the disk objects with the disk parameters. @param template: The disk template used @param disks: The list of disks objects to annotate @param disk_params: The disk paramaters for annotation @returns: A list of disk objects annotated """ ld_params = objects.Disk.ComputeLDParams(template, disk_params) if template == constants.DT_DRBD8: annotation_fn = _AnnotateDParamsDRBD elif template == constants.DT_DISKLESS: annotation_fn = lambda disk, _: disk else: annotation_fn = _AnnotateDParamsGeneric return [annotation_fn(disk.Copy(), ld_params) for disk in disks] def _GetExclusiveStorageFlag(cfg, node_uuid): ni = cfg.GetNodeInfo(node_uuid) if ni is None: raise errors.OpPrereqError("Invalid node name %s" % node_uuid, errors.ECODE_NOENT) return cfg.GetNdParams(ni)[constants.ND_EXCLUSIVE_STORAGE] def _AddExclusiveStorageFlagToLvmStorageUnits(storage_units, es_flag): """Adds the exclusive storage flag to lvm units. This function creates a copy of the storage_units lists, with the es_flag being added to all lvm storage units. @type storage_units: list of pairs (string, string) @param storage_units: list of 'raw' storage units, consisting only of (storage_type, storage_key) @type es_flag: boolean @param es_flag: exclusive storage flag @rtype: list of tuples (string, string, list) @return: list of storage units (storage_type, storage_key, params) with the params containing the es_flag for lvm-vg storage units """ result = [] for (storage_type, storage_key) in storage_units: if storage_type in [constants.ST_LVM_VG, constants.ST_LVM_PV]: result.append((storage_type, storage_key, [es_flag])) else: result.append((storage_type, storage_key, [])) return result def GetExclusiveStorageForNodes(cfg, node_uuids): """Return the exclusive storage flag for all the given nodes. @type cfg: L{config.ConfigWriter} @param cfg: cluster configuration @type node_uuids: list or tuple @param node_uuids: node UUIDs for which to read the flag @rtype: dict @return: mapping from node uuids to exclusive storage flags @raise errors.OpPrereqError: if any given node name has no corresponding node """ getflag = lambda n: _GetExclusiveStorageFlag(cfg, n) flags = map(getflag, node_uuids) return dict(zip(node_uuids, flags)) def PrepareStorageUnitsForNodes(cfg, storage_units, node_uuids): """Return the lvm storage unit for all the given nodes. Main purpose of this function is to map the exclusive storage flag, which can be different for each node, to the default LVM storage unit. @type cfg: L{config.ConfigWriter} @param cfg: cluster configuration @type storage_units: list of pairs (string, string) @param storage_units: list of 'raw' storage units, e.g. pairs of (storage_type, storage_key) @type node_uuids: list or tuple @param node_uuids: node UUIDs for which to read the flag @rtype: dict @return: mapping from node uuids to a list of storage units which include the exclusive storage flag for lvm storage @raise errors.OpPrereqError: if any given node name has no corresponding node """ getunit = lambda n: _AddExclusiveStorageFlagToLvmStorageUnits( storage_units, _GetExclusiveStorageFlag(cfg, n)) flags = map(getunit, node_uuids) return dict(zip(node_uuids, flags)) #: Generic encoders _ENCODERS = { rpc_defs.ED_OBJECT_DICT: _ObjectToDict, rpc_defs.ED_OBJECT_DICT_LIST: _ObjectListToDict, rpc_defs.ED_NODE_TO_DISK_DICT: _EncodeNodeToDiskDict, rpc_defs.ED_COMPRESS: _Compress, rpc_defs.ED_FINALIZE_EXPORT_DISKS: _PrepareFinalizeExportDisks, rpc_defs.ED_IMPEXP_IO: _EncodeImportExportIO, rpc_defs.ED_BLOCKDEV_RENAME: _EncodeBlockdevRename, } class RpcRunner(_RpcClientBase, _generated_rpc.RpcClientDefault, _generated_rpc.RpcClientBootstrap, _generated_rpc.RpcClientDnsOnly, _generated_rpc.RpcClientConfig): """RPC runner class. """ def __init__(self, cfg, lock_monitor_cb, _req_process_fn=None, _getents=None): """Initialized the RPC runner. @type cfg: L{config.ConfigWriter} @param cfg: Configuration @type lock_monitor_cb: callable @param lock_monitor_cb: Lock monitor callback """ self._cfg = cfg encoders = _ENCODERS.copy() encoders.update({ # Encoders requiring configuration object rpc_defs.ED_INST_DICT: self._InstDict, rpc_defs.ED_INST_DICT_HVP_BEP_DP: self._InstDictHvpBepDp, rpc_defs.ED_INST_DICT_OSP_DP: self._InstDictOspDp, rpc_defs.ED_NIC_DICT: self._NicDict, # Encoders annotating disk parameters rpc_defs.ED_DISKS_DICT_DP: self._DisksDictDP, rpc_defs.ED_MULTI_DISKS_DICT_DP: self._MultiDiskDictDP, rpc_defs.ED_SINGLE_DISK_DICT_DP: self._SingleDiskDictDP, # Encoders with special requirements rpc_defs.ED_FILE_DETAILS: compat.partial(_PrepareFileUpload, _getents), }) # Resolver using configuration resolver = compat.partial(_NodeConfigResolver, cfg.GetNodeInfo, cfg.GetAllNodesInfo) # Pylint doesn't recognize multiple inheritance properly, see # and # # pylint: disable=W0233 _RpcClientBase.__init__(self, resolver, encoders.get, lock_monitor_cb=lock_monitor_cb, _req_process_fn=_req_process_fn) _generated_rpc.RpcClientConfig.__init__(self) _generated_rpc.RpcClientBootstrap.__init__(self) _generated_rpc.RpcClientDnsOnly.__init__(self) _generated_rpc.RpcClientDefault.__init__(self) def _NicDict(self, nic): """Convert the given nic to a dict and encapsulate netinfo """ n = copy.deepcopy(nic) if n.network: net_uuid = self._cfg.LookupNetwork(n.network) if net_uuid: nobj = self._cfg.GetNetwork(net_uuid) n.netinfo = objects.Network.ToDict(nobj) return n.ToDict() def _InstDict(self, instance, hvp=None, bep=None, osp=None): """Convert the given instance to a dict. This is done via the instance's ToDict() method and additionally we fill the hvparams with the cluster defaults. @type instance: L{objects.Instance} @param instance: an Instance object @type hvp: dict or None @param hvp: a dictionary with overridden hypervisor parameters @type bep: dict or None @param bep: a dictionary with overridden backend parameters @type osp: dict or None @param osp: a dictionary with overridden os parameters @rtype: dict @return: the instance dict, with the hvparams filled with the cluster defaults """ idict = instance.ToDict() cluster = self._cfg.GetClusterInfo() idict["hvparams"] = cluster.FillHV(instance) if hvp is not None: idict["hvparams"].update(hvp) idict["beparams"] = cluster.FillBE(instance) if bep is not None: idict["beparams"].update(bep) idict["osparams"] = cluster.SimpleFillOS(instance.os, instance.osparams) if osp is not None: idict["osparams"].update(osp) idict["disks"] = self._DisksDictDP((instance.disks, instance)) for nic in idict["nics"]: nic["nicparams"] = objects.FillDict( cluster.nicparams[constants.PP_DEFAULT], nic["nicparams"]) network = nic.get("network", None) if network: net_uuid = self._cfg.LookupNetwork(network) if net_uuid: nobj = self._cfg.GetNetwork(net_uuid) nic["netinfo"] = objects.Network.ToDict(nobj) return idict def _InstDictHvpBepDp(self, (instance, hvp, bep)): """Wrapper for L{_InstDict}. """ return self._InstDict(instance, hvp=hvp, bep=bep) def _InstDictOspDp(self, (instance, osparams)): """Wrapper for L{_InstDict}. """ return self._InstDict(instance, osp=osparams) def _DisksDictDP(self, (disks, instance)): """Wrapper for L{AnnotateDiskParams}. """ diskparams = self._cfg.GetInstanceDiskParams(instance) return [disk.ToDict() for disk in AnnotateDiskParams(instance.disk_template, disks, diskparams)] def _MultiDiskDictDP(self, disks_insts): """Wrapper for L{AnnotateDiskParams}. Supports a list of (disk, instance) tuples. """ return [disk for disk_inst in disks_insts for disk in self._DisksDictDP(disk_inst)] def _SingleDiskDictDP(self, (disk, instance)): """Wrapper for L{AnnotateDiskParams}. """ (anno_disk,) = self._DisksDictDP(([disk], instance)) return anno_disk class JobQueueRunner(_RpcClientBase, _generated_rpc.RpcClientJobQueue): """RPC wrappers for job queue. """ def __init__(self, context, address_list): """Initializes this class. """ if address_list is None: resolver = compat.partial(_SsconfResolver, True) else: # Caller provided an address list resolver = _StaticResolver(address_list) _RpcClientBase.__init__(self, resolver, _ENCODERS.get, lock_monitor_cb=context.glm.AddToLockMonitor) _generated_rpc.RpcClientJobQueue.__init__(self) class BootstrapRunner(_RpcClientBase, _generated_rpc.RpcClientBootstrap, _generated_rpc.RpcClientDnsOnly): """RPC wrappers for bootstrapping. """ def __init__(self): """Initializes this class. """ # Pylint doesn't recognize multiple inheritance properly, see # and # # pylint: disable=W0233 _RpcClientBase.__init__(self, compat.partial(_SsconfResolver, True), _ENCODERS.get) _generated_rpc.RpcClientBootstrap.__init__(self) _generated_rpc.RpcClientDnsOnly.__init__(self) class DnsOnlyRunner(_RpcClientBase, _generated_rpc.RpcClientDnsOnly): """RPC wrappers for calls using only DNS. """ def __init__(self): """Initialize this class. """ _RpcClientBase.__init__(self, compat.partial(_SsconfResolver, False), _ENCODERS.get) _generated_rpc.RpcClientDnsOnly.__init__(self) class ConfigRunner(_RpcClientBase, _generated_rpc.RpcClientConfig): """RPC wrappers for L{config}. """ def __init__(self, context, address_list, _req_process_fn=None, _getents=None): """Initializes this class. """ if context: lock_monitor_cb = context.glm.AddToLockMonitor else: lock_monitor_cb = None if address_list is None: resolver = compat.partial(_SsconfResolver, True) else: # Caller provided an address list resolver = _StaticResolver(address_list) encoders = _ENCODERS.copy() encoders.update({ rpc_defs.ED_FILE_DETAILS: compat.partial(_PrepareFileUpload, _getents), }) _RpcClientBase.__init__(self, resolver, encoders.get, lock_monitor_cb=lock_monitor_cb, _req_process_fn=_req_process_fn) _generated_rpc.RpcClientConfig.__init__(self) ganeti-2.9.3/lib/hooksmaster.py0000644000000000000000000002322112267470014016454 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module implementing the logic for running hooks. """ from ganeti import constants from ganeti import errors from ganeti import utils from ganeti import compat from ganeti import pathutils def _RpcResultsToHooksResults(rpc_results): """Function to convert RPC results to the format expected by HooksMaster. @type rpc_results: dict(node: L{rpc.RpcResult}) @param rpc_results: RPC results @rtype: dict(node: (fail_msg, offline, hooks_results)) @return: RPC results unpacked according to the format expected by L({hooksmaster.HooksMaster} """ return dict((node, (rpc_res.fail_msg, rpc_res.offline, rpc_res.payload)) for (node, rpc_res) in rpc_results.items()) class HooksMaster(object): def __init__(self, opcode, hooks_path, nodes, hooks_execution_fn, hooks_results_adapt_fn, build_env_fn, log_fn, htype=None, cluster_name=None, master_name=None): """Base class for hooks masters. This class invokes the execution of hooks according to the behaviour specified by its parameters. @type opcode: string @param opcode: opcode of the operation to which the hooks are tied @type hooks_path: string @param hooks_path: prefix of the hooks directories @type nodes: 2-tuple of lists @param nodes: 2-tuple of lists containing nodes on which pre-hooks must be run and nodes on which post-hooks must be run @type hooks_execution_fn: function that accepts the following parameters: (node_list, hooks_path, phase, environment) @param hooks_execution_fn: function that will execute the hooks; can be None, indicating that no conversion is necessary. @type hooks_results_adapt_fn: function @param hooks_results_adapt_fn: function that will adapt the return value of hooks_execution_fn to the format expected by RunPhase @type build_env_fn: function that returns a dictionary having strings as keys @param build_env_fn: function that builds the environment for the hooks @type log_fn: function that accepts a string @param log_fn: logging function @type htype: string or None @param htype: None or one of L{constants.HTYPE_CLUSTER}, L{constants.HTYPE_NODE}, L{constants.HTYPE_INSTANCE} @type cluster_name: string @param cluster_name: name of the cluster @type master_name: string @param master_name: name of the master """ self.opcode = opcode self.hooks_path = hooks_path self.hooks_execution_fn = hooks_execution_fn self.hooks_results_adapt_fn = hooks_results_adapt_fn self.build_env_fn = build_env_fn self.log_fn = log_fn self.htype = htype self.cluster_name = cluster_name self.master_name = master_name self.pre_env = self._BuildEnv(constants.HOOKS_PHASE_PRE) (self.pre_nodes, self.post_nodes) = nodes def _BuildEnv(self, phase): """Compute the environment and the target nodes. Based on the opcode and the current node list, this builds the environment for the hooks and the target node list for the run. """ if phase == constants.HOOKS_PHASE_PRE: prefix = "GANETI_" elif phase == constants.HOOKS_PHASE_POST: prefix = "GANETI_POST_" else: raise AssertionError("Unknown phase '%s'" % phase) env = {} if self.hooks_path is not None: phase_env = self.build_env_fn() if phase_env: assert not compat.any(key.upper().startswith(prefix) for key in phase_env) env.update(("%s%s" % (prefix, key), value) for (key, value) in phase_env.items()) if phase == constants.HOOKS_PHASE_PRE: assert compat.all((key.startswith("GANETI_") and not key.startswith("GANETI_POST_")) for key in env) elif phase == constants.HOOKS_PHASE_POST: assert compat.all(key.startswith("GANETI_POST_") for key in env) assert isinstance(self.pre_env, dict) # Merge with pre-phase environment assert not compat.any(key.startswith("GANETI_POST_") for key in self.pre_env) env.update(self.pre_env) else: raise AssertionError("Unknown phase '%s'" % phase) return env def _RunWrapper(self, node_list, hpath, phase, phase_env): """Simple wrapper over self.callfn. This method fixes the environment before executing the hooks. """ env = { "PATH": constants.HOOKS_PATH, "GANETI_HOOKS_VERSION": constants.HOOKS_VERSION, "GANETI_OP_CODE": self.opcode, "GANETI_DATA_DIR": pathutils.DATA_DIR, "GANETI_HOOKS_PHASE": phase, "GANETI_HOOKS_PATH": hpath, } if self.htype: env["GANETI_OBJECT_TYPE"] = self.htype if self.cluster_name is not None: env["GANETI_CLUSTER"] = self.cluster_name if self.master_name is not None: env["GANETI_MASTER"] = self.master_name if phase_env: env = utils.algo.JoinDisjointDicts(env, phase_env) # Convert everything to strings env = dict([(str(key), str(val)) for key, val in env.iteritems()]) assert compat.all(key == "PATH" or key.startswith("GANETI_") for key in env) return self.hooks_execution_fn(node_list, hpath, phase, env) def RunPhase(self, phase, node_names=None): """Run all the scripts for a phase. This is the main function of the HookMaster. It executes self.hooks_execution_fn, and after running self.hooks_results_adapt_fn on its results it expects them to be in the form {node_name: (fail_msg, [(script, result, output), ...]}). @param phase: one of L{constants.HOOKS_PHASE_POST} or L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase @param node_names: overrides the predefined list of nodes for the given phase @return: the processed results of the hooks multi-node rpc call @raise errors.HooksFailure: on communication failure to the nodes @raise errors.HooksAbort: on failure of one of the hooks """ if phase == constants.HOOKS_PHASE_PRE: if node_names is None: node_names = self.pre_nodes env = self.pre_env elif phase == constants.HOOKS_PHASE_POST: if node_names is None: node_names = self.post_nodes env = self._BuildEnv(phase) else: raise AssertionError("Unknown phase '%s'" % phase) if not node_names: # empty node list, we should not attempt to run this as either # we're in the cluster init phase and the rpc client part can't # even attempt to run, or this LU doesn't do hooks at all return results = self._RunWrapper(node_names, self.hooks_path, phase, env) if not results: msg = "Communication Failure" if phase == constants.HOOKS_PHASE_PRE: raise errors.HooksFailure(msg) else: self.log_fn(msg) return results converted_res = results if self.hooks_results_adapt_fn: converted_res = self.hooks_results_adapt_fn(results) errs = [] for node_name, (fail_msg, offline, hooks_results) in converted_res.items(): if offline: continue if fail_msg: self.log_fn("Communication failure to node %s: %s", node_name, fail_msg) continue for script, hkr, output in hooks_results: if hkr == constants.HKR_FAIL: if phase == constants.HOOKS_PHASE_PRE: errs.append((node_name, script, output)) else: if not output: output = "(no output)" self.log_fn("On %s script %s failed, output: %s" % (node_name, script, output)) if errs and phase == constants.HOOKS_PHASE_PRE: raise errors.HooksAbort(errs) return results def RunConfigUpdate(self): """Run the special configuration update hook This is a special hook that runs only on the master after each top-level LI if the configuration has been updated. """ phase = constants.HOOKS_PHASE_POST hpath = constants.HOOKS_NAME_CFGUPDATE nodes = [self.master_name] self._RunWrapper(nodes, hpath, phase, self.pre_env) @staticmethod def BuildFromLu(hooks_execution_fn, lu): if lu.HPATH is None: nodes = (None, None) else: hooks_nodes = lu.BuildHooksNodes() to_name = lambda node_uuids: frozenset(lu.cfg.GetNodeNames(node_uuids)) if len(hooks_nodes) == 2: nodes = (to_name(hooks_nodes[0]), to_name(hooks_nodes[1])) elif len(hooks_nodes) == 3: nodes = (to_name(hooks_nodes[0]), to_name(hooks_nodes[1]) | frozenset(hooks_nodes[2])) else: raise errors.ProgrammerError( "LogicalUnit.BuildHooksNodes must return a 2- or 3-tuple") master_name = cluster_name = None if lu.cfg: master_name = lu.cfg.GetMasterNodeName() cluster_name = lu.cfg.GetClusterName() return HooksMaster(lu.op.OP_ID, lu.HPATH, nodes, hooks_execution_fn, _RpcResultsToHooksResults, lu.BuildHooksEnv, lu.LogWarning, lu.HTYPE, cluster_name, master_name) ganeti-2.9.3/lib/confd/0000755000000000000000000000000012271445544014641 5ustar00rootroot00000000000000ganeti-2.9.3/lib/confd/client.py0000644000000000000000000005266512244641676016513 0ustar00rootroot00000000000000# # # Copyright (C) 2009, 2010, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Ganeti confd client Clients can use the confd client library to send requests to a group of master candidates running confd. The expected usage is through the asyncore framework, by sending queries, and asynchronously receiving replies through a callback. This way the client library doesn't ever need to "wait" on a particular answer, and can proceed even if some udp packets are lost. It's up to the user to reschedule queries if they haven't received responses and they need them. Example usage:: client = ConfdClient(...) # includes callback specification req = confd_client.ConfdClientRequest(type=constants.CONFD_REQ_PING) client.SendRequest(req) # then make sure your client calls asyncore.loop() or daemon.Mainloop.Run() # ... wait ... # And your callback will be called by asyncore, when your query gets a # response, or when it expires. You can use the provided ConfdFilterCallback to act as a filter, only passing "newer" answer to your callback, and filtering out outdated ones, or ones confirming what you already got. """ # pylint: disable=E0203 # E0203: Access to member %r before its definition, since we use # objects.py which doesn't explicitly initialise its members import time import random from ganeti import utils from ganeti import constants from ganeti import objects from ganeti import serializer from ganeti import daemon # contains AsyncUDPSocket from ganeti import errors from ganeti import confd from ganeti import ssconf from ganeti import compat from ganeti import netutils from ganeti import pathutils class ConfdAsyncUDPClient(daemon.AsyncUDPSocket): """Confd udp asyncore client This is kept separate from the main ConfdClient to make sure it's easy to implement a non-asyncore based client library. """ def __init__(self, client, family): """Constructor for ConfdAsyncUDPClient @type client: L{ConfdClient} @param client: client library, to pass the datagrams to """ daemon.AsyncUDPSocket.__init__(self, family) self.client = client # this method is overriding a daemon.AsyncUDPSocket method def handle_datagram(self, payload, ip, port): self.client.HandleResponse(payload, ip, port) class _Request(object): """Request status structure. @ivar request: the request data @ivar args: any extra arguments for the callback @ivar expiry: the expiry timestamp of the request @ivar sent: the set of contacted peers @ivar rcvd: the set of peers who replied """ def __init__(self, request, args, expiry, sent): self.request = request self.args = args self.expiry = expiry self.sent = frozenset(sent) self.rcvd = set() class ConfdClient: """Send queries to confd, and get back answers. Since the confd model works by querying multiple master candidates, and getting back answers, this is an asynchronous library. It can either work through asyncore or with your own handling. @type _requests: dict @ivar _requests: dictionary indexes by salt, which contains data about the outstanding requests; the values are objects of type L{_Request} """ def __init__(self, hmac_key, peers, callback, port=None, logger=None): """Constructor for ConfdClient @type hmac_key: string @param hmac_key: hmac key to talk to confd @type peers: list @param peers: list of peer nodes @type callback: f(L{ConfdUpcallPayload}) @param callback: function to call when getting answers @type port: integer @param port: confd port (default: use GetDaemonPort) @type logger: logging.Logger @param logger: optional logger for internal conditions """ if not callable(callback): raise errors.ProgrammerError("callback must be callable") self.UpdatePeerList(peers) self._SetPeersAddressFamily() self._hmac_key = hmac_key self._socket = ConfdAsyncUDPClient(self, self._family) self._callback = callback self._confd_port = port self._logger = logger self._requests = {} if self._confd_port is None: self._confd_port = netutils.GetDaemonPort(constants.CONFD) def UpdatePeerList(self, peers): """Update the list of peers @type peers: list @param peers: list of peer nodes """ # we are actually called from init, so: # pylint: disable=W0201 if not isinstance(peers, list): raise errors.ProgrammerError("peers must be a list") # make a copy of peers, since we're going to shuffle the list, later self._peers = list(peers) def _PackRequest(self, request, now=None): """Prepare a request to be sent on the wire. This function puts a proper salt in a confd request, puts the proper salt, and adds the correct magic number. """ if now is None: now = time.time() tstamp = "%d" % now req = serializer.DumpSignedJson(request.ToDict(), self._hmac_key, tstamp) return confd.PackMagic(req) def _UnpackReply(self, payload): in_payload = confd.UnpackMagic(payload) (dict_answer, salt) = serializer.LoadSignedJson(in_payload, self._hmac_key) answer = objects.ConfdReply.FromDict(dict_answer) return answer, salt def ExpireRequests(self): """Delete all the expired requests. """ now = time.time() for rsalt, rq in self._requests.items(): if now >= rq.expiry: del self._requests[rsalt] client_reply = ConfdUpcallPayload(salt=rsalt, type=UPCALL_EXPIRE, orig_request=rq.request, extra_args=rq.args, client=self, ) self._callback(client_reply) def SendRequest(self, request, args=None, coverage=0, async=True): """Send a confd request to some MCs @type request: L{objects.ConfdRequest} @param request: the request to send @type args: tuple @param args: additional callback arguments @type coverage: integer @param coverage: number of remote nodes to contact; if default (0), it will use a reasonable default (L{ganeti.constants.CONFD_DEFAULT_REQ_COVERAGE}), if -1 is passed, it will use the maximum number of peers, otherwise the number passed in will be used @type async: boolean @param async: handle the write asynchronously """ if coverage == 0: coverage = min(len(self._peers), constants.CONFD_DEFAULT_REQ_COVERAGE) elif coverage == -1: coverage = len(self._peers) if coverage > len(self._peers): raise errors.ConfdClientError("Not enough MCs known to provide the" " desired coverage") if not request.rsalt: raise errors.ConfdClientError("Missing request rsalt") self.ExpireRequests() if request.rsalt in self._requests: raise errors.ConfdClientError("Duplicate request rsalt") if request.type not in constants.CONFD_REQS: raise errors.ConfdClientError("Invalid request type") random.shuffle(self._peers) targets = self._peers[:coverage] now = time.time() payload = self._PackRequest(request, now=now) for target in targets: try: self._socket.enqueue_send(target, self._confd_port, payload) except errors.UdpDataSizeError: raise errors.ConfdClientError("Request too big") expire_time = now + constants.CONFD_CLIENT_EXPIRE_TIMEOUT self._requests[request.rsalt] = _Request(request, args, expire_time, targets) if not async: self.FlushSendQueue() def HandleResponse(self, payload, ip, port): """Asynchronous handler for a confd reply Call the relevant callback associated to the current request. """ try: try: answer, salt = self._UnpackReply(payload) except (errors.SignatureError, errors.ConfdMagicError), err: if self._logger: self._logger.debug("Discarding broken package: %s" % err) return try: rq = self._requests[salt] except KeyError: if self._logger: self._logger.debug("Discarding unknown (expired?) reply: %s" % err) return rq.rcvd.add(ip) client_reply = ConfdUpcallPayload(salt=salt, type=UPCALL_REPLY, server_reply=answer, orig_request=rq.request, server_ip=ip, server_port=port, extra_args=rq.args, client=self, ) self._callback(client_reply) finally: self.ExpireRequests() def FlushSendQueue(self): """Send out all pending requests. Can be used for synchronous client use. """ while self._socket.writable(): self._socket.handle_write() def ReceiveReply(self, timeout=1): """Receive one reply. @type timeout: float @param timeout: how long to wait for the reply @rtype: boolean @return: True if some data has been handled, False otherwise """ return self._socket.process_next_packet(timeout=timeout) @staticmethod def _NeededReplies(peer_cnt): """Compute the minimum safe number of replies for a query. The algorithm is designed to work well for both small and big number of peers: - for less than three, we require all responses - for less than five, we allow one miss - otherwise, half the number plus one This guarantees that we progress monotonically: 1->1, 2->2, 3->2, 4->2, 5->3, 6->3, 7->4, etc. @type peer_cnt: int @param peer_cnt: the number of peers contacted @rtype: int @return: the number of replies which should give a safe coverage """ if peer_cnt < 3: return peer_cnt elif peer_cnt < 5: return peer_cnt - 1 else: return int(peer_cnt / 2) + 1 def WaitForReply(self, salt, timeout=constants.CONFD_CLIENT_EXPIRE_TIMEOUT): """Wait for replies to a given request. This method will wait until either the timeout expires or a minimum number (computed using L{_NeededReplies}) of replies are received for the given salt. It is useful when doing synchronous calls to this library. @param salt: the salt of the request we want responses for @param timeout: the maximum timeout (should be less or equal to L{ganeti.constants.CONFD_CLIENT_EXPIRE_TIMEOUT} @rtype: tuple @return: a tuple of (timed_out, sent_cnt, recv_cnt); if the request is unknown, timed_out will be true and the counters will be zero """ def _CheckResponse(): if salt not in self._requests: # expired? if self._logger: self._logger.debug("Discarding unknown/expired request: %s" % salt) return MISSING rq = self._requests[salt] if len(rq.rcvd) >= expected: # already got all replies return (False, len(rq.sent), len(rq.rcvd)) # else wait, using default timeout self.ReceiveReply() raise utils.RetryAgain() MISSING = (True, 0, 0) if salt not in self._requests: return MISSING # extend the expire time with the current timeout, so that we # don't get the request expired from under us rq = self._requests[salt] rq.expiry += timeout sent = len(rq.sent) expected = self._NeededReplies(sent) try: return utils.Retry(_CheckResponse, 0, timeout) except utils.RetryTimeout: if salt in self._requests: rq = self._requests[salt] return (True, len(rq.sent), len(rq.rcvd)) else: return MISSING def _SetPeersAddressFamily(self): if not self._peers: raise errors.ConfdClientError("Peer list empty") try: peer = self._peers[0] self._family = netutils.IPAddress.GetAddressFamily(peer) for peer in self._peers[1:]: if netutils.IPAddress.GetAddressFamily(peer) != self._family: raise errors.ConfdClientError("Peers must be of same address family") except errors.IPAddressError: raise errors.ConfdClientError("Peer address %s invalid" % peer) # UPCALL_REPLY: server reply upcall # has all ConfdUpcallPayload fields populated UPCALL_REPLY = 1 # UPCALL_EXPIRE: internal library request expire # has only salt, type, orig_request and extra_args UPCALL_EXPIRE = 2 CONFD_UPCALL_TYPES = compat.UniqueFrozenset([ UPCALL_REPLY, UPCALL_EXPIRE, ]) class ConfdUpcallPayload(objects.ConfigObject): """Callback argument for confd replies @type salt: string @ivar salt: salt associated with the query @type type: one of confd.client.CONFD_UPCALL_TYPES @ivar type: upcall type (server reply, expired request, ...) @type orig_request: L{objects.ConfdRequest} @ivar orig_request: original request @type server_reply: L{objects.ConfdReply} @ivar server_reply: server reply @type server_ip: string @ivar server_ip: answering server ip address @type server_port: int @ivar server_port: answering server port @type extra_args: any @ivar extra_args: 'args' argument of the SendRequest function @type client: L{ConfdClient} @ivar client: current confd client instance """ __slots__ = [ "salt", "type", "orig_request", "server_reply", "server_ip", "server_port", "extra_args", "client", ] class ConfdClientRequest(objects.ConfdRequest): """This is the client-side version of ConfdRequest. This version of the class helps creating requests, on the client side, by filling in some default values. """ def __init__(self, **kwargs): objects.ConfdRequest.__init__(self, **kwargs) if not self.rsalt: self.rsalt = utils.NewUUID() if not self.protocol: self.protocol = constants.CONFD_PROTOCOL_VERSION if self.type not in constants.CONFD_REQS: raise errors.ConfdClientError("Invalid request type") class ConfdFilterCallback: """Callback that calls another callback, but filters duplicate results. @ivar consistent: a dictionary indexed by salt; for each salt, if all responses ware identical, this will be True; this is the expected state on a healthy cluster; on inconsistent or partitioned clusters, this might be False, if we see answers with the same serial but different contents """ def __init__(self, callback, logger=None): """Constructor for ConfdFilterCallback @type callback: f(L{ConfdUpcallPayload}) @param callback: function to call when getting answers @type logger: logging.Logger @param logger: optional logger for internal conditions """ if not callable(callback): raise errors.ProgrammerError("callback must be callable") self._callback = callback self._logger = logger # answers contains a dict of salt -> answer self._answers = {} self.consistent = {} def _LogFilter(self, salt, new_reply, old_reply): if not self._logger: return if new_reply.serial > old_reply.serial: self._logger.debug("Filtering confirming answer, with newer" " serial for query %s" % salt) elif new_reply.serial == old_reply.serial: if new_reply.answer != old_reply.answer: self._logger.warning("Got incoherent answers for query %s" " (serial: %s)" % (salt, new_reply.serial)) else: self._logger.debug("Filtering confirming answer, with same" " serial for query %s" % salt) else: self._logger.debug("Filtering outdated answer for query %s" " serial: (%d < %d)" % (salt, old_reply.serial, new_reply.serial)) def _HandleExpire(self, up): # if we have no answer we have received none, before the expiration. if up.salt in self._answers: del self._answers[up.salt] if up.salt in self.consistent: del self.consistent[up.salt] def _HandleReply(self, up): """Handle a single confd reply, and decide whether to filter it. @rtype: boolean @return: True if the reply should be filtered, False if it should be passed on to the up-callback """ filter_upcall = False salt = up.salt if salt not in self.consistent: self.consistent[salt] = True if salt not in self._answers: # first answer for a query (don't filter, and record) self._answers[salt] = up.server_reply elif up.server_reply.serial > self._answers[salt].serial: # newer answer (record, and compare contents) old_answer = self._answers[salt] self._answers[salt] = up.server_reply if up.server_reply.answer == old_answer.answer: # same content (filter) (version upgrade was unrelated) filter_upcall = True self._LogFilter(salt, up.server_reply, old_answer) # else: different content, pass up a second answer else: # older or same-version answer (duplicate or outdated, filter) if (up.server_reply.serial == self._answers[salt].serial and up.server_reply.answer != self._answers[salt].answer): self.consistent[salt] = False filter_upcall = True self._LogFilter(salt, up.server_reply, self._answers[salt]) return filter_upcall def __call__(self, up): """Filtering callback @type up: L{ConfdUpcallPayload} @param up: upper callback """ filter_upcall = False if up.type == UPCALL_REPLY: filter_upcall = self._HandleReply(up) elif up.type == UPCALL_EXPIRE: self._HandleExpire(up) if not filter_upcall: self._callback(up) class ConfdCountingCallback: """Callback that calls another callback, and counts the answers """ def __init__(self, callback, logger=None): """Constructor for ConfdCountingCallback @type callback: f(L{ConfdUpcallPayload}) @param callback: function to call when getting answers @type logger: logging.Logger @param logger: optional logger for internal conditions """ if not callable(callback): raise errors.ProgrammerError("callback must be callable") self._callback = callback self._logger = logger # answers contains a dict of salt -> count self._answers = {} def RegisterQuery(self, salt): if salt in self._answers: raise errors.ProgrammerError("query already registered") self._answers[salt] = 0 def AllAnswered(self): """Have all the registered queries received at least an answer? """ return compat.all(self._answers.values()) def _HandleExpire(self, up): # if we have no answer we have received none, before the expiration. if up.salt in self._answers: del self._answers[up.salt] def _HandleReply(self, up): """Handle a single confd reply, and decide whether to filter it. @rtype: boolean @return: True if the reply should be filtered, False if it should be passed on to the up-callback """ if up.salt in self._answers: self._answers[up.salt] += 1 def __call__(self, up): """Filtering callback @type up: L{ConfdUpcallPayload} @param up: upper callback """ if up.type == UPCALL_REPLY: self._HandleReply(up) elif up.type == UPCALL_EXPIRE: self._HandleExpire(up) self._callback(up) class StoreResultCallback: """Callback that simply stores the most recent answer. @ivar _answers: dict of salt to (have_answer, reply) """ _NO_KEY = (False, None) def __init__(self): """Constructor for StoreResultCallback """ # answers contains a dict of salt -> best result self._answers = {} def GetResponse(self, salt): """Return the best match for a salt """ return self._answers.get(salt, self._NO_KEY) def _HandleExpire(self, up): """Expiration handler. """ if up.salt in self._answers and self._answers[up.salt] == self._NO_KEY: del self._answers[up.salt] def _HandleReply(self, up): """Handle a single confd reply, and decide whether to filter it. """ self._answers[up.salt] = (True, up) def __call__(self, up): """Filtering callback @type up: L{ConfdUpcallPayload} @param up: upper callback """ if up.type == UPCALL_REPLY: self._HandleReply(up) elif up.type == UPCALL_EXPIRE: self._HandleExpire(up) def GetConfdClient(callback): """Return a client configured using the given callback. This is handy to abstract the MC list and HMAC key reading. @attention: This should only be called on nodes which are part of a cluster, since it depends on a valid (ganeti) data directory; for code running outside of a cluster, you need to create the client manually """ ss = ssconf.SimpleStore() mc_file = ss.KeyToFilename(constants.SS_MASTER_CANDIDATES_IPS) mc_list = utils.ReadFile(mc_file).splitlines() hmac_key = utils.ReadFile(pathutils.CONFD_HMAC_KEY) return ConfdClient(hmac_key, mc_list, callback) ganeti-2.9.3/lib/confd/__init__.py0000644000000000000000000000346112230001635016736 0ustar00rootroot00000000000000# # # Copyright (C) 2009, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Ganeti confd client/server library """ from ganeti import constants from ganeti import errors from ganeti import ht _FOURCC_LEN = 4 #: Items in the individual rows of the NodeDrbd query _HTNodeDrbdItems = [ht.TString, ht.TInt, ht.TString, ht.TString, ht.TString, ht.TString] #: Type for the (top-level) result of NodeDrbd query HTNodeDrbd = ht.TListOf(ht.TAnd(ht.TList, ht.TIsLength(len(_HTNodeDrbdItems)), ht.TItems(_HTNodeDrbdItems))) def PackMagic(payload): """Prepend the confd magic fourcc to a payload. """ return "".join([constants.CONFD_MAGIC_FOURCC, payload]) def UnpackMagic(payload): """Unpack and check the confd magic fourcc from a payload. """ if len(payload) < _FOURCC_LEN: raise errors.ConfdMagicError("UDP payload too short to contain the" " fourcc code") magic_number = payload[:_FOURCC_LEN] if magic_number != constants.CONFD_MAGIC_FOURCC: raise errors.ConfdMagicError("UDP payload contains an unkown fourcc") return payload[_FOURCC_LEN:] ganeti-2.9.3/lib/query.py0000644000000000000000000025122012271422343015261 0ustar00rootroot00000000000000# # # Copyright (C) 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module for query operations How it works: - Add field definitions - See how L{NODE_FIELDS} is built - Each field gets: - Query field definition (L{objects.QueryFieldDefinition}, use L{_MakeField} for creating), containing: - Name, must be lowercase and match L{FIELD_NAME_RE} - Title for tables, must not contain whitespace and match L{TITLE_RE} - Value data type, e.g. L{constants.QFT_NUMBER} - Human-readable description, must not end with punctuation or contain newlines - Data request type, see e.g. C{NQ_*} - OR-ed flags, see C{QFF_*} - A retrieval function, see L{Query.__init__} for description - Pass list of fields through L{_PrepareFieldList} for preparation and checks - Instantiate L{Query} with prepared field list definition and selected fields - Call L{Query.RequestedData} to determine what data to collect/compute - Call L{Query.Query} or L{Query.OldStyleQuery} with collected data and use result - Data container must support iteration using C{__iter__} - Items are passed to retrieval functions and can have any format - Call L{Query.GetFields} to get list of definitions for selected fields @attention: Retrieval functions must be idempotent. They can be called multiple times, in any order and any number of times. """ import logging import operator import re from ganeti import constants from ganeti import errors from ganeti import utils from ganeti import compat from ganeti import objects from ganeti import ht from ganeti import runtime from ganeti import qlang from ganeti import jstore from ganeti.constants import (QFT_UNKNOWN, QFT_TEXT, QFT_BOOL, QFT_NUMBER, QFT_UNIT, QFT_TIMESTAMP, QFT_OTHER, RS_NORMAL, RS_UNKNOWN, RS_NODATA, RS_UNAVAIL, RS_OFFLINE) (NETQ_CONFIG, NETQ_GROUP, NETQ_STATS, NETQ_INST) = range(300, 304) # Constants for requesting data from the caller/data provider. Each property # collected/computed separately by the data provider should have its own to # only collect the requested data and not more. (NQ_CONFIG, NQ_INST, NQ_LIVE, NQ_GROUP, NQ_OOB) = range(1, 6) (IQ_CONFIG, IQ_LIVE, IQ_DISKUSAGE, IQ_CONSOLE, IQ_NODES, IQ_NETWORKS) = range(100, 106) (LQ_MODE, LQ_OWNER, LQ_PENDING) = range(10, 13) (GQ_CONFIG, GQ_NODE, GQ_INST, GQ_DISKPARAMS) = range(200, 204) (CQ_CONFIG, CQ_QUEUE_DRAINED, CQ_WATCHER_PAUSE) = range(300, 303) (JQ_ARCHIVED, ) = range(400, 401) # Query field flags QFF_HOSTNAME = 0x01 QFF_IP_ADDRESS = 0x02 QFF_JOB_ID = 0x04 QFF_SPLIT_TIMESTAMP = 0x08 # Next values: 0x10, 0x20, 0x40, 0x80, 0x100, 0x200 QFF_ALL = (QFF_HOSTNAME | QFF_IP_ADDRESS | QFF_JOB_ID | QFF_SPLIT_TIMESTAMP) FIELD_NAME_RE = re.compile(r"^[a-z0-9/._]+$") TITLE_RE = re.compile(r"^[^\s]+$") DOC_RE = re.compile(r"^[A-Z].*[^.,?!]$") #: Verification function for each field type _VERIFY_FN = { QFT_UNKNOWN: ht.TNone, QFT_TEXT: ht.TString, QFT_BOOL: ht.TBool, QFT_NUMBER: ht.TInt, QFT_UNIT: ht.TInt, QFT_TIMESTAMP: ht.TNumber, QFT_OTHER: lambda _: True, } # Unique objects for special field statuses _FS_UNKNOWN = object() _FS_NODATA = object() _FS_UNAVAIL = object() _FS_OFFLINE = object() #: List of all special status _FS_ALL = compat.UniqueFrozenset([ _FS_UNKNOWN, _FS_NODATA, _FS_UNAVAIL, _FS_OFFLINE, ]) #: VType to QFT mapping _VTToQFT = { # TODO: fix validation of empty strings constants.VTYPE_STRING: QFT_OTHER, # since VTYPE_STRINGs can be empty constants.VTYPE_MAYBE_STRING: QFT_OTHER, constants.VTYPE_BOOL: QFT_BOOL, constants.VTYPE_SIZE: QFT_UNIT, constants.VTYPE_INT: QFT_NUMBER, } _SERIAL_NO_DOC = "%s object serial number, incremented on each modification" def _GetUnknownField(ctx, item): # pylint: disable=W0613 """Gets the contents of an unknown field. """ return _FS_UNKNOWN def _GetQueryFields(fielddefs, selected): """Calculates the internal list of selected fields. Unknown fields are returned as L{constants.QFT_UNKNOWN}. @type fielddefs: dict @param fielddefs: Field definitions @type selected: list of strings @param selected: List of selected fields """ result = [] for name in selected: try: fdef = fielddefs[name] except KeyError: fdef = (_MakeField(name, name, QFT_UNKNOWN, "Unknown field '%s'" % name), None, 0, _GetUnknownField) assert len(fdef) == 4 result.append(fdef) return result def GetAllFields(fielddefs): """Extract L{objects.QueryFieldDefinition} from field definitions. @rtype: list of L{objects.QueryFieldDefinition} """ return [fdef for (fdef, _, _, _) in fielddefs] class _FilterHints: """Class for filter analytics. When filters are used, the user of the L{Query} class usually doesn't know exactly which items will be necessary for building the result. It therefore has to prepare and compute the input data for potentially returning everything. There are two ways to optimize this. The first, and simpler, is to assign each field a group of data, so that the caller can determine which computations are necessary depending on the data groups requested. The list of referenced groups must also be computed for fields referenced in the filter. The second is restricting the items based on a primary key. The primary key is usually a unique name (e.g. a node name). This class extracts all referenced names from a filter. If it encounters any filter condition which disallows such a list to be determined (e.g. a non-equality filter), all names will be requested. The end-effect is that any operation other than L{qlang.OP_OR} and L{qlang.OP_EQUAL} will make the query more expensive. """ def __init__(self, namefield): """Initializes this class. @type namefield: string @param namefield: Field caller is interested in """ self._namefield = namefield #: Whether all names need to be requested (e.g. if a non-equality operator #: has been used) self._allnames = False #: Which names to request self._names = None #: Data kinds referenced by the filter (used by L{Query.RequestedData}) self._datakinds = set() def RequestedNames(self): """Returns all requested values. Returns C{None} if list of values can't be determined (e.g. encountered non-equality operators). @rtype: list """ if self._allnames or self._names is None: return None return utils.UniqueSequence(self._names) def ReferencedData(self): """Returns all kinds of data referenced by the filter. """ return frozenset(self._datakinds) def _NeedAllNames(self): """Changes internal state to request all names. """ self._allnames = True self._names = None def NoteLogicOp(self, op): """Called when handling a logic operation. @type op: string @param op: Operator """ if op != qlang.OP_OR: self._NeedAllNames() def NoteUnaryOp(self, op, datakind): # pylint: disable=W0613 """Called when handling an unary operation. @type op: string @param op: Operator """ if datakind is not None: self._datakinds.add(datakind) self._NeedAllNames() def NoteBinaryOp(self, op, datakind, name, value): """Called when handling a binary operation. @type op: string @param op: Operator @type name: string @param name: Left-hand side of operator (field name) @param value: Right-hand side of operator """ if datakind is not None: self._datakinds.add(datakind) if self._allnames: return # If any operator other than equality was used, all names need to be # retrieved if op == qlang.OP_EQUAL and name == self._namefield: if self._names is None: self._names = [] self._names.append(value) else: self._NeedAllNames() def _WrapLogicOp(op_fn, sentences, ctx, item): """Wrapper for logic operator functions. """ return op_fn(fn(ctx, item) for fn in sentences) def _WrapUnaryOp(op_fn, inner, ctx, item): """Wrapper for unary operator functions. """ return op_fn(inner(ctx, item)) def _WrapBinaryOp(op_fn, retrieval_fn, value, ctx, item): """Wrapper for binary operator functions. """ return op_fn(retrieval_fn(ctx, item), value) def _WrapNot(fn, lhs, rhs): """Negates the result of a wrapped function. """ return not fn(lhs, rhs) def _PrepareRegex(pattern): """Compiles a regular expression. """ try: return re.compile(pattern) except re.error, err: raise errors.ParameterError("Invalid regex pattern (%s)" % err) def _PrepareSplitTimestamp(value): """Prepares a value for comparison by L{_MakeSplitTimestampComparison}. """ if ht.TNumber(value): return value else: return utils.MergeTime(value) def _MakeSplitTimestampComparison(fn): """Compares split timestamp values after converting to float. """ return lambda lhs, rhs: fn(utils.MergeTime(lhs), rhs) def _MakeComparisonChecks(fn): """Prepares flag-specific comparisons using a comparison function. """ return [ (QFF_SPLIT_TIMESTAMP, _MakeSplitTimestampComparison(fn), _PrepareSplitTimestamp), (QFF_JOB_ID, lambda lhs, rhs: fn(jstore.ParseJobId(lhs), rhs), jstore.ParseJobId), (None, fn, None), ] class _FilterCompilerHelper: """Converts a query filter to a callable usable for filtering. """ # String statement has no effect, pylint: disable=W0105 #: How deep filters can be nested _LEVELS_MAX = 10 # Unique identifiers for operator groups (_OPTYPE_LOGIC, _OPTYPE_UNARY, _OPTYPE_BINARY) = range(1, 4) """Functions for equality checks depending on field flags. List of tuples containing flags and a callable receiving the left- and right-hand side of the operator. The flags are an OR-ed value of C{QFF_*} (e.g. L{QFF_HOSTNAME} or L{QFF_SPLIT_TIMESTAMP}). Order matters. The first item with flags will be used. Flags are checked using binary AND. """ _EQUALITY_CHECKS = [ (QFF_HOSTNAME, lambda lhs, rhs: utils.MatchNameComponent(rhs, [lhs], case_sensitive=False), None), (QFF_SPLIT_TIMESTAMP, _MakeSplitTimestampComparison(operator.eq), _PrepareSplitTimestamp), (None, operator.eq, None), ] """Known operators Operator as key (C{qlang.OP_*}), value a tuple of operator group (C{_OPTYPE_*}) and a group-specific value: - C{_OPTYPE_LOGIC}: Callable taking any number of arguments; used by L{_HandleLogicOp} - C{_OPTYPE_UNARY}: Always C{None}; details handled by L{_HandleUnaryOp} - C{_OPTYPE_BINARY}: Callable taking exactly two parameters, the left- and right-hand side of the operator, used by L{_HandleBinaryOp} """ _OPS = { # Logic operators qlang.OP_OR: (_OPTYPE_LOGIC, compat.any), qlang.OP_AND: (_OPTYPE_LOGIC, compat.all), # Unary operators qlang.OP_NOT: (_OPTYPE_UNARY, None), qlang.OP_TRUE: (_OPTYPE_UNARY, None), # Binary operators qlang.OP_EQUAL: (_OPTYPE_BINARY, _EQUALITY_CHECKS), qlang.OP_NOT_EQUAL: (_OPTYPE_BINARY, [(flags, compat.partial(_WrapNot, fn), valprepfn) for (flags, fn, valprepfn) in _EQUALITY_CHECKS]), qlang.OP_LT: (_OPTYPE_BINARY, _MakeComparisonChecks(operator.lt)), qlang.OP_LE: (_OPTYPE_BINARY, _MakeComparisonChecks(operator.le)), qlang.OP_GT: (_OPTYPE_BINARY, _MakeComparisonChecks(operator.gt)), qlang.OP_GE: (_OPTYPE_BINARY, _MakeComparisonChecks(operator.ge)), qlang.OP_REGEXP: (_OPTYPE_BINARY, [ (None, lambda lhs, rhs: rhs.search(lhs), _PrepareRegex), ]), qlang.OP_CONTAINS: (_OPTYPE_BINARY, [ (None, operator.contains, None), ]), } def __init__(self, fields): """Initializes this class. @param fields: Field definitions (return value of L{_PrepareFieldList}) """ self._fields = fields self._hints = None self._op_handler = None def __call__(self, hints, qfilter): """Converts a query filter into a callable function. @type hints: L{_FilterHints} or None @param hints: Callbacks doing analysis on filter @type qfilter: list @param qfilter: Filter structure @rtype: callable @return: Function receiving context and item as parameters, returning boolean as to whether item matches filter """ self._op_handler = { self._OPTYPE_LOGIC: (self._HandleLogicOp, getattr(hints, "NoteLogicOp", None)), self._OPTYPE_UNARY: (self._HandleUnaryOp, getattr(hints, "NoteUnaryOp", None)), self._OPTYPE_BINARY: (self._HandleBinaryOp, getattr(hints, "NoteBinaryOp", None)), } try: filter_fn = self._Compile(qfilter, 0) finally: self._op_handler = None return filter_fn def _Compile(self, qfilter, level): """Inner function for converting filters. Calls the correct handler functions for the top-level operator. This function is called recursively (e.g. for logic operators). """ if not (isinstance(qfilter, (list, tuple)) and qfilter): raise errors.ParameterError("Invalid filter on level %s" % level) # Limit recursion if level >= self._LEVELS_MAX: raise errors.ParameterError("Only up to %s levels are allowed (filter" " nested too deep)" % self._LEVELS_MAX) # Create copy to be modified operands = qfilter[:] op = operands.pop(0) try: (kind, op_data) = self._OPS[op] except KeyError: raise errors.ParameterError("Unknown operator '%s'" % op) (handler, hints_cb) = self._op_handler[kind] return handler(hints_cb, level, op, op_data, operands) def _LookupField(self, name): """Returns a field definition by name. """ try: return self._fields[name] except KeyError: raise errors.ParameterError("Unknown field '%s'" % name) def _HandleLogicOp(self, hints_fn, level, op, op_fn, operands): """Handles logic operators. @type hints_fn: callable @param hints_fn: Callback doing some analysis on the filter @type level: integer @param level: Current depth @type op: string @param op: Operator @type op_fn: callable @param op_fn: Function implementing operator @type operands: list @param operands: List of operands """ if hints_fn: hints_fn(op) return compat.partial(_WrapLogicOp, op_fn, [self._Compile(op, level + 1) for op in operands]) def _HandleUnaryOp(self, hints_fn, level, op, op_fn, operands): """Handles unary operators. @type hints_fn: callable @param hints_fn: Callback doing some analysis on the filter @type level: integer @param level: Current depth @type op: string @param op: Operator @type op_fn: callable @param op_fn: Function implementing operator @type operands: list @param operands: List of operands """ assert op_fn is None if len(operands) != 1: raise errors.ParameterError("Unary operator '%s' expects exactly one" " operand" % op) if op == qlang.OP_TRUE: (_, datakind, _, retrieval_fn) = self._LookupField(operands[0]) if hints_fn: hints_fn(op, datakind) op_fn = operator.truth arg = retrieval_fn elif op == qlang.OP_NOT: if hints_fn: hints_fn(op, None) op_fn = operator.not_ arg = self._Compile(operands[0], level + 1) else: raise errors.ProgrammerError("Can't handle operator '%s'" % op) return compat.partial(_WrapUnaryOp, op_fn, arg) def _HandleBinaryOp(self, hints_fn, level, op, op_data, operands): """Handles binary operators. @type hints_fn: callable @param hints_fn: Callback doing some analysis on the filter @type level: integer @param level: Current depth @type op: string @param op: Operator @param op_data: Functions implementing operators @type operands: list @param operands: List of operands """ # Unused arguments, pylint: disable=W0613 try: (name, value) = operands except (ValueError, TypeError): raise errors.ParameterError("Invalid binary operator, expected exactly" " two operands") (fdef, datakind, field_flags, retrieval_fn) = self._LookupField(name) assert fdef.kind != QFT_UNKNOWN # TODO: Type conversions? verify_fn = _VERIFY_FN[fdef.kind] if not verify_fn(value): raise errors.ParameterError("Unable to compare field '%s' (type '%s')" " with '%s', expected %s" % (name, fdef.kind, value.__class__.__name__, verify_fn)) if hints_fn: hints_fn(op, datakind, name, value) for (fn_flags, fn, valprepfn) in op_data: if fn_flags is None or fn_flags & field_flags: # Prepare value if necessary (e.g. compile regular expression) if valprepfn: value = valprepfn(value) return compat.partial(_WrapBinaryOp, fn, retrieval_fn, value) raise errors.ProgrammerError("Unable to find operator implementation" " (op '%s', flags %s)" % (op, field_flags)) def _CompileFilter(fields, hints, qfilter): """Converts a query filter into a callable function. See L{_FilterCompilerHelper} for details. @rtype: callable """ return _FilterCompilerHelper(fields)(hints, qfilter) class Query: def __init__(self, fieldlist, selected, qfilter=None, namefield=None): """Initializes this class. The field definition is a dictionary with the field's name as a key and a tuple containing, in order, the field definition object (L{objects.QueryFieldDefinition}, the data kind to help calling code collect data and a retrieval function. The retrieval function is called with two parameters, in order, the data container and the item in container (see L{Query.Query}). Users of this class can call L{RequestedData} before preparing the data container to determine what data is needed. @type fieldlist: dictionary @param fieldlist: Field definitions @type selected: list of strings @param selected: List of selected fields """ assert namefield is None or namefield in fieldlist self._fields = _GetQueryFields(fieldlist, selected) self._filter_fn = None self._requested_names = None self._filter_datakinds = frozenset() if qfilter is not None: # Collect requested names if wanted if namefield: hints = _FilterHints(namefield) else: hints = None # Build filter function self._filter_fn = _CompileFilter(fieldlist, hints, qfilter) if hints: self._requested_names = hints.RequestedNames() self._filter_datakinds = hints.ReferencedData() if namefield is None: self._name_fn = None else: (_, _, _, self._name_fn) = fieldlist[namefield] def RequestedNames(self): """Returns all names referenced in the filter. If there is no filter or operators are preventing determining the exact names, C{None} is returned. """ return self._requested_names def RequestedData(self): """Gets requested kinds of data. @rtype: frozenset """ return (self._filter_datakinds | frozenset(datakind for (_, datakind, _, _) in self._fields if datakind is not None)) def GetFields(self): """Returns the list of fields for this query. Includes unknown fields. @rtype: List of L{objects.QueryFieldDefinition} """ return GetAllFields(self._fields) def Query(self, ctx, sort_by_name=True): """Execute a query. @param ctx: Data container passed to field retrieval functions, must support iteration using C{__iter__} @type sort_by_name: boolean @param sort_by_name: Whether to sort by name or keep the input data's ordering """ sort = (self._name_fn and sort_by_name) result = [] for idx, item in enumerate(ctx): if not (self._filter_fn is None or self._filter_fn(ctx, item)): continue row = [_ProcessResult(fn(ctx, item)) for (_, _, _, fn) in self._fields] # Verify result if __debug__: _VerifyResultRow(self._fields, row) if sort: (status, name) = _ProcessResult(self._name_fn(ctx, item)) assert status == constants.RS_NORMAL # TODO: Are there cases where we wouldn't want to use NiceSort? # Answer: if the name field is non-string... result.append((utils.NiceSortKey(name), idx, row)) else: result.append(row) if not sort: return result # TODO: Would "heapq" be more efficient than sorting? # Sorting in-place instead of using "sorted()" result.sort() assert not result or (len(result[0]) == 3 and len(result[-1]) == 3) return map(operator.itemgetter(2), result) def OldStyleQuery(self, ctx, sort_by_name=True): """Query with "old" query result format. See L{Query.Query} for arguments. """ unknown = set(fdef.name for (fdef, _, _, _) in self._fields if fdef.kind == QFT_UNKNOWN) if unknown: raise errors.OpPrereqError("Unknown output fields selected: %s" % (utils.CommaJoin(unknown), ), errors.ECODE_INVAL) return [[value for (_, value) in row] for row in self.Query(ctx, sort_by_name=sort_by_name)] def _ProcessResult(value): """Converts result values into externally-visible ones. """ if value is _FS_UNKNOWN: return (RS_UNKNOWN, None) elif value is _FS_NODATA: return (RS_NODATA, None) elif value is _FS_UNAVAIL: return (RS_UNAVAIL, None) elif value is _FS_OFFLINE: return (RS_OFFLINE, None) else: return (RS_NORMAL, value) def _VerifyResultRow(fields, row): """Verifies the contents of a query result row. @type fields: list @param fields: Field definitions for result @type row: list of tuples @param row: Row data """ assert len(row) == len(fields) errs = [] for ((status, value), (fdef, _, _, _)) in zip(row, fields): if status == RS_NORMAL: if not _VERIFY_FN[fdef.kind](value): errs.append("normal field %s fails validation (value is %s)" % (fdef.name, value)) elif value is not None: errs.append("abnormal field %s has a non-None value" % fdef.name) assert not errs, ("Failed validation: %s in row %s" % (utils.CommaJoin(errs), row)) def _FieldDictKey((fdef, _, flags, fn)): """Generates key for field dictionary. """ assert fdef.name and fdef.title, "Name and title are required" assert FIELD_NAME_RE.match(fdef.name) assert TITLE_RE.match(fdef.title) assert (DOC_RE.match(fdef.doc) and len(fdef.doc.splitlines()) == 1 and fdef.doc.strip() == fdef.doc), \ "Invalid description for field '%s'" % fdef.name assert callable(fn) assert (flags & ~QFF_ALL) == 0, "Unknown flags for field '%s'" % fdef.name return fdef.name def _PrepareFieldList(fields, aliases): """Prepares field list for use by L{Query}. Converts the list to a dictionary and does some verification. @type fields: list of tuples; (L{objects.QueryFieldDefinition}, data kind, retrieval function) @param fields: List of fields, see L{Query.__init__} for a better description @type aliases: list of tuples; (alias, target) @param aliases: list of tuples containing aliases; for each alias/target pair, a duplicate will be created in the field list @rtype: dict @return: Field dictionary for L{Query} """ if __debug__: duplicates = utils.FindDuplicates(fdef.title.lower() for (fdef, _, _, _) in fields) assert not duplicates, "Duplicate title(s) found: %r" % duplicates result = utils.SequenceToDict(fields, key=_FieldDictKey) for alias, target in aliases: assert alias not in result, "Alias %s overrides an existing field" % alias assert target in result, "Missing target %s for alias %s" % (target, alias) (fdef, k, flags, fn) = result[target] fdef = fdef.Copy() fdef.name = alias result[alias] = (fdef, k, flags, fn) assert len(result) == len(fields) + len(aliases) assert compat.all(name == fdef.name for (name, (fdef, _, _, _)) in result.items()) return result def GetQueryResponse(query, ctx, sort_by_name=True): """Prepares the response for a query. @type query: L{Query} @param ctx: Data container, see L{Query.Query} @type sort_by_name: boolean @param sort_by_name: Whether to sort by name or keep the input data's ordering """ return objects.QueryResponse(data=query.Query(ctx, sort_by_name=sort_by_name), fields=query.GetFields()).ToDict() def QueryFields(fielddefs, selected): """Returns list of available fields. @type fielddefs: dict @param fielddefs: Field definitions @type selected: list of strings @param selected: List of selected fields @return: List of L{objects.QueryFieldDefinition} """ if selected is None: # Client requests all fields, sort by name fdefs = utils.NiceSort(GetAllFields(fielddefs.values()), key=operator.attrgetter("name")) else: # Keep order as requested by client fdefs = Query(fielddefs, selected).GetFields() return objects.QueryFieldsResponse(fields=fdefs).ToDict() def _MakeField(name, title, kind, doc): """Wrapper for creating L{objects.QueryFieldDefinition} instances. @param name: Field name as a regular expression @param title: Human-readable title @param kind: Field type @param doc: Human-readable description """ return objects.QueryFieldDefinition(name=name, title=title, kind=kind, doc=doc) def _StaticValueInner(value, ctx, _): # pylint: disable=W0613 """Returns a static value. """ return value def _StaticValue(value): """Prepares a function to return a static value. """ return compat.partial(_StaticValueInner, value) def _GetNodeRole(node, master_uuid): """Determine node role. @type node: L{objects.Node} @param node: Node object @type master_uuid: string @param master_uuid: Master node UUID """ if node.uuid == master_uuid: return constants.NR_MASTER elif node.master_candidate: return constants.NR_MCANDIDATE elif node.drained: return constants.NR_DRAINED elif node.offline: return constants.NR_OFFLINE else: return constants.NR_REGULAR def _GetItemAttr(attr): """Returns a field function to return an attribute of the item. @param attr: Attribute name """ getter = operator.attrgetter(attr) return lambda _, item: getter(item) def _GetItemMaybeAttr(attr): """Returns a field function to return a not-None attribute of the item. If the value is None, then C{_FS_UNAVAIL} will be returned instead. @param attr: Attribute name """ def _helper(_, obj): val = getattr(obj, attr) if val is None: return _FS_UNAVAIL else: return val return _helper def _GetNDParam(name): """Return a field function to return an ND parameter out of the context. """ def _helper(ctx, _): if ctx.ndparams is None: return _FS_UNAVAIL else: return ctx.ndparams.get(name, None) return _helper def _BuildNDFields(is_group): """Builds all the ndparam fields. @param is_group: whether this is called at group or node level """ if is_group: field_kind = GQ_CONFIG else: field_kind = NQ_GROUP return [(_MakeField("ndp/%s" % name, constants.NDS_PARAMETER_TITLES.get(name, "ndp/%s" % name), _VTToQFT[kind], "The \"%s\" node parameter" % name), field_kind, 0, _GetNDParam(name)) for name, kind in constants.NDS_PARAMETER_TYPES.items()] def _ConvWrapInner(convert, fn, ctx, item): """Wrapper for converting values. @param convert: Conversion function receiving value as single parameter @param fn: Retrieval function """ value = fn(ctx, item) # Is the value an abnormal status? if compat.any(value is fs for fs in _FS_ALL): # Return right away return value # TODO: Should conversion function also receive context, item or both? return convert(value) def _ConvWrap(convert, fn): """Convenience wrapper for L{_ConvWrapInner}. @param convert: Conversion function receiving value as single parameter @param fn: Retrieval function """ return compat.partial(_ConvWrapInner, convert, fn) def _GetItemTimestamp(getter): """Returns function for getting timestamp of item. @type getter: callable @param getter: Function to retrieve timestamp attribute """ def fn(_, item): """Returns a timestamp of item. """ timestamp = getter(item) if timestamp is None: # Old configs might not have all timestamps return _FS_UNAVAIL else: return timestamp return fn def _GetItemTimestampFields(datatype): """Returns common timestamp fields. @param datatype: Field data type for use by L{Query.RequestedData} """ return [ (_MakeField("ctime", "CTime", QFT_TIMESTAMP, "Creation timestamp"), datatype, 0, _GetItemTimestamp(operator.attrgetter("ctime"))), (_MakeField("mtime", "MTime", QFT_TIMESTAMP, "Modification timestamp"), datatype, 0, _GetItemTimestamp(operator.attrgetter("mtime"))), ] class NodeQueryData: """Data container for node data queries. """ def __init__(self, nodes, live_data, master_uuid, node_to_primary, node_to_secondary, inst_uuid_to_inst_name, groups, oob_support, cluster): """Initializes this class. """ self.nodes = nodes self.live_data = live_data self.master_uuid = master_uuid self.node_to_primary = node_to_primary self.node_to_secondary = node_to_secondary self.inst_uuid_to_inst_name = inst_uuid_to_inst_name self.groups = groups self.oob_support = oob_support self.cluster = cluster # Used for individual rows self.curlive_data = None self.ndparams = None def __iter__(self): """Iterate over all nodes. This function has side-effects and only one instance of the resulting generator should be used at a time. """ for node in self.nodes: group = self.groups.get(node.group, None) if group is None: self.ndparams = None else: self.ndparams = self.cluster.FillND(node, group) if self.live_data: self.curlive_data = self.live_data.get(node.uuid, None) else: self.curlive_data = None yield node #: Fields that are direct attributes of an L{objects.Node} object _NODE_SIMPLE_FIELDS = { "drained": ("Drained", QFT_BOOL, 0, "Whether node is drained"), "master_candidate": ("MasterC", QFT_BOOL, 0, "Whether node is a master candidate"), "master_capable": ("MasterCapable", QFT_BOOL, 0, "Whether node can become a master candidate"), "name": ("Node", QFT_TEXT, QFF_HOSTNAME, "Node name"), "offline": ("Offline", QFT_BOOL, 0, "Whether node is marked offline"), "serial_no": ("SerialNo", QFT_NUMBER, 0, _SERIAL_NO_DOC % "Node"), "uuid": ("UUID", QFT_TEXT, 0, "Node UUID"), "vm_capable": ("VMCapable", QFT_BOOL, 0, "Whether node can host instances"), } #: Fields requiring talking to the node # Note that none of these are available for non-vm_capable nodes _NODE_LIVE_FIELDS = { "bootid": ("BootID", QFT_TEXT, "bootid", "Random UUID renewed for each system reboot, can be used" " for detecting reboots by tracking changes"), "cnodes": ("CNodes", QFT_NUMBER, "cpu_nodes", "Number of NUMA domains on node (if exported by hypervisor)"), "cnos": ("CNOs", QFT_NUMBER, "cpu_dom0", "Number of logical processors used by the node OS (dom0 for Xen)"), "csockets": ("CSockets", QFT_NUMBER, "cpu_sockets", "Number of physical CPU sockets (if exported by hypervisor)"), "ctotal": ("CTotal", QFT_NUMBER, "cpu_total", "Number of logical processors"), "dfree": ("DFree", QFT_UNIT, "storage_free", "Available storage space in storage unit"), "dtotal": ("DTotal", QFT_UNIT, "storage_size", "Total storage space in storage unit used for instance disk" " allocation"), "spfree": ("SpFree", QFT_NUMBER, "spindles_free", "Available spindles in volume group (exclusive storage only)"), "sptotal": ("SpTotal", QFT_NUMBER, "spindles_total", "Total spindles in volume group (exclusive storage only)"), "mfree": ("MFree", QFT_UNIT, "memory_free", "Memory available for instance allocations"), "mnode": ("MNode", QFT_UNIT, "memory_dom0", "Amount of memory used by node (dom0 for Xen)"), "mtotal": ("MTotal", QFT_UNIT, "memory_total", "Total amount of memory of physical machine"), } def _GetGroup(cb): """Build function for calling another function with an node group. @param cb: The callback to be called with the nodegroup """ def fn(ctx, node): """Get group data for a node. @type ctx: L{NodeQueryData} @type inst: L{objects.Node} @param inst: Node object """ ng = ctx.groups.get(node.group, None) if ng is None: # Nodes always have a group, or the configuration is corrupt return _FS_UNAVAIL return cb(ctx, node, ng) return fn def _GetNodeGroup(ctx, node, ng): # pylint: disable=W0613 """Returns the name of a node's group. @type ctx: L{NodeQueryData} @type node: L{objects.Node} @param node: Node object @type ng: L{objects.NodeGroup} @param ng: The node group this node belongs to """ return ng.name def _GetNodePower(ctx, node): """Returns the node powered state @type ctx: L{NodeQueryData} @type node: L{objects.Node} @param node: Node object """ if ctx.oob_support[node.uuid]: return node.powered return _FS_UNAVAIL def _GetNdParams(ctx, node, ng): """Returns the ndparams for this node. @type ctx: L{NodeQueryData} @type node: L{objects.Node} @param node: Node object @type ng: L{objects.NodeGroup} @param ng: The node group this node belongs to """ return ctx.cluster.SimpleFillND(ng.FillND(node)) def _GetLiveNodeField(field, kind, ctx, node): """Gets the value of a "live" field from L{NodeQueryData}. @param field: Live field name @param kind: Data kind, one of L{constants.QFT_ALL} @type ctx: L{NodeQueryData} @type node: L{objects.Node} @param node: Node object """ if node.offline: return _FS_OFFLINE if not node.vm_capable: return _FS_UNAVAIL if not ctx.curlive_data: return _FS_NODATA return _GetStatsField(field, kind, ctx.curlive_data) def _GetStatsField(field, kind, data): """Gets a value from live statistics. If the value is not found, L{_FS_UNAVAIL} is returned. If the field kind is numeric a conversion to integer is attempted. If that fails, L{_FS_UNAVAIL} is returned. @param field: Live field name @param kind: Data kind, one of L{constants.QFT_ALL} @type data: dict @param data: Statistics """ try: value = data[field] except KeyError: return _FS_UNAVAIL if kind == QFT_TEXT: return value assert kind in (QFT_NUMBER, QFT_UNIT) # Try to convert into number try: return int(value) except (ValueError, TypeError): logging.exception("Failed to convert node field '%s' (value %r) to int", field, value) return _FS_UNAVAIL def _GetNodeHvState(_, node): """Converts node's hypervisor state for query result. """ hv_state = node.hv_state if hv_state is None: return _FS_UNAVAIL return dict((name, value.ToDict()) for (name, value) in hv_state.items()) def _GetNodeDiskState(_, node): """Converts node's disk state for query result. """ disk_state = node.disk_state if disk_state is None: return _FS_UNAVAIL return dict((disk_kind, dict((name, value.ToDict()) for (name, value) in kind_state.items())) for (disk_kind, kind_state) in disk_state.items()) def _BuildNodeFields(): """Builds list of fields for node queries. """ fields = [ (_MakeField("pip", "PrimaryIP", QFT_TEXT, "Primary IP address"), NQ_CONFIG, 0, _GetItemAttr("primary_ip")), (_MakeField("sip", "SecondaryIP", QFT_TEXT, "Secondary IP address"), NQ_CONFIG, 0, _GetItemAttr("secondary_ip")), (_MakeField("tags", "Tags", QFT_OTHER, "Tags"), NQ_CONFIG, 0, lambda ctx, node: list(node.GetTags())), (_MakeField("master", "IsMaster", QFT_BOOL, "Whether node is master"), NQ_CONFIG, 0, lambda ctx, node: node.uuid == ctx.master_uuid), (_MakeField("group", "Group", QFT_TEXT, "Node group"), NQ_GROUP, 0, _GetGroup(_GetNodeGroup)), (_MakeField("group.uuid", "GroupUUID", QFT_TEXT, "UUID of node group"), NQ_CONFIG, 0, _GetItemAttr("group")), (_MakeField("powered", "Powered", QFT_BOOL, "Whether node is thought to be powered on"), NQ_OOB, 0, _GetNodePower), (_MakeField("ndparams", "NodeParameters", QFT_OTHER, "Merged node parameters"), NQ_GROUP, 0, _GetGroup(_GetNdParams)), (_MakeField("custom_ndparams", "CustomNodeParameters", QFT_OTHER, "Custom node parameters"), NQ_GROUP, 0, _GetItemAttr("ndparams")), (_MakeField("hv_state", "HypervisorState", QFT_OTHER, "Hypervisor state"), NQ_CONFIG, 0, _GetNodeHvState), (_MakeField("disk_state", "DiskState", QFT_OTHER, "Disk state"), NQ_CONFIG, 0, _GetNodeDiskState), ] fields.extend(_BuildNDFields(False)) # Node role role_values = (constants.NR_MASTER, constants.NR_MCANDIDATE, constants.NR_REGULAR, constants.NR_DRAINED, constants.NR_OFFLINE) role_doc = ("Node role; \"%s\" for master, \"%s\" for master candidate," " \"%s\" for regular, \"%s\" for drained, \"%s\" for offline" % role_values) fields.append((_MakeField("role", "Role", QFT_TEXT, role_doc), NQ_CONFIG, 0, lambda ctx, node: _GetNodeRole(node, ctx.master_uuid))) assert set(role_values) == constants.NR_ALL def _GetLength(getter): return lambda ctx, node: len(getter(ctx)[node.uuid]) def _GetList(getter): return lambda ctx, node: utils.NiceSort( [ctx.inst_uuid_to_inst_name[uuid] for uuid in getter(ctx)[node.uuid]]) # Add fields operating on instance lists for prefix, titleprefix, docword, getter in \ [("p", "Pri", "primary", operator.attrgetter("node_to_primary")), ("s", "Sec", "secondary", operator.attrgetter("node_to_secondary"))]: # TODO: Allow filterting by hostname in list fields.extend([ (_MakeField("%sinst_cnt" % prefix, "%sinst" % prefix.upper(), QFT_NUMBER, "Number of instances with this node as %s" % docword), NQ_INST, 0, _GetLength(getter)), (_MakeField("%sinst_list" % prefix, "%sInstances" % titleprefix, QFT_OTHER, "List of instances with this node as %s" % docword), NQ_INST, 0, _GetList(getter)), ]) # Add simple fields fields.extend([ (_MakeField(name, title, kind, doc), NQ_CONFIG, flags, _GetItemAttr(name)) for (name, (title, kind, flags, doc)) in _NODE_SIMPLE_FIELDS.items()]) # Add fields requiring live data fields.extend([ (_MakeField(name, title, kind, doc), NQ_LIVE, 0, compat.partial(_GetLiveNodeField, nfield, kind)) for (name, (title, kind, nfield, doc)) in _NODE_LIVE_FIELDS.items()]) # Add timestamps fields.extend(_GetItemTimestampFields(NQ_CONFIG)) return _PrepareFieldList(fields, []) class InstanceQueryData: """Data container for instance data queries. """ def __init__(self, instances, cluster, disk_usage, offline_node_uuids, bad_node_uuids, live_data, wrongnode_inst, console, nodes, groups, networks): """Initializes this class. @param instances: List of instance objects @param cluster: Cluster object @type disk_usage: dict; instance UUID as key @param disk_usage: Per-instance disk usage @type offline_node_uuids: list of strings @param offline_node_uuids: List of offline nodes @type bad_node_uuids: list of strings @param bad_node_uuids: List of faulty nodes @type live_data: dict; instance UUID as key @param live_data: Per-instance live data @type wrongnode_inst: set @param wrongnode_inst: Set of instances running on wrong node(s) @type console: dict; instance UUID as key @param console: Per-instance console information @type nodes: dict; node UUID as key @param nodes: Node objects @type networks: dict; net_uuid as key @param networks: Network objects """ assert len(set(bad_node_uuids) & set(offline_node_uuids)) == \ len(offline_node_uuids), \ "Offline nodes not included in bad nodes" assert not (set(live_data.keys()) & set(bad_node_uuids)), \ "Found live data for bad or offline nodes" self.instances = instances self.cluster = cluster self.disk_usage = disk_usage self.offline_nodes = offline_node_uuids self.bad_nodes = bad_node_uuids self.live_data = live_data self.wrongnode_inst = wrongnode_inst self.console = console self.nodes = nodes self.groups = groups self.networks = networks # Used for individual rows self.inst_hvparams = None self.inst_beparams = None self.inst_osparams = None self.inst_nicparams = None def __iter__(self): """Iterate over all instances. This function has side-effects and only one instance of the resulting generator should be used at a time. """ for inst in self.instances: self.inst_hvparams = self.cluster.FillHV(inst, skip_globals=True) self.inst_beparams = self.cluster.FillBE(inst) self.inst_osparams = self.cluster.SimpleFillOS(inst.os, inst.osparams) self.inst_nicparams = [self.cluster.SimpleFillNIC(nic.nicparams) for nic in inst.nics] yield inst def _GetInstOperState(ctx, inst): """Get instance's operational status. @type ctx: L{InstanceQueryData} @type inst: L{objects.Instance} @param inst: Instance object """ # Can't use RS_OFFLINE here as it would describe the instance to # be offline when we actually don't know due to missing data if inst.primary_node in ctx.bad_nodes: return _FS_NODATA else: return bool(ctx.live_data.get(inst.uuid)) def _GetInstLiveData(name): """Build function for retrieving live data. @type name: string @param name: Live data field name """ def fn(ctx, inst): """Get live data for an instance. @type ctx: L{InstanceQueryData} @type inst: L{objects.Instance} @param inst: Instance object """ if (inst.primary_node in ctx.bad_nodes or inst.primary_node in ctx.offline_nodes): # Can't use RS_OFFLINE here as it would describe the instance to be # offline when we actually don't know due to missing data return _FS_NODATA if inst.uuid in ctx.live_data: data = ctx.live_data[inst.uuid] if name in data: return data[name] return _FS_UNAVAIL return fn def _GetInstStatus(ctx, inst): """Get instance status. @type ctx: L{InstanceQueryData} @type inst: L{objects.Instance} @param inst: Instance object """ if inst.primary_node in ctx.offline_nodes: return constants.INSTST_NODEOFFLINE if inst.primary_node in ctx.bad_nodes: return constants.INSTST_NODEDOWN if bool(ctx.live_data.get(inst.uuid)): if inst.uuid in ctx.wrongnode_inst: return constants.INSTST_WRONGNODE elif inst.admin_state == constants.ADMINST_UP: return constants.INSTST_RUNNING else: return constants.INSTST_ERRORUP if inst.admin_state == constants.ADMINST_UP: return constants.INSTST_ERRORDOWN elif inst.admin_state == constants.ADMINST_DOWN: return constants.INSTST_ADMINDOWN return constants.INSTST_ADMINOFFLINE def _GetInstDisk(index, cb): """Build function for calling another function with an instance Disk. @type index: int @param index: Disk index @type cb: callable @param cb: Callback """ def fn(ctx, inst): """Call helper function with instance Disk. @type ctx: L{InstanceQueryData} @type inst: L{objects.Instance} @param inst: Instance object """ try: nic = inst.disks[index] except IndexError: return _FS_UNAVAIL return cb(ctx, index, nic) return fn def _GetInstDiskSize(ctx, _, disk): # pylint: disable=W0613 """Get a Disk's size. @type ctx: L{InstanceQueryData} @type disk: L{objects.Disk} @param disk: The Disk object """ if disk.size is None: return _FS_UNAVAIL else: return disk.size def _GetInstDiskSpindles(ctx, _, disk): # pylint: disable=W0613 """Get a Disk's spindles. @type disk: L{objects.Disk} @param disk: The Disk object """ if disk.spindles is None: return _FS_UNAVAIL else: return disk.spindles def _GetInstDeviceName(ctx, _, device): # pylint: disable=W0613 """Get a Device's Name. @type ctx: L{InstanceQueryData} @type device: L{objects.NIC} or L{objects.Disk} @param device: The NIC or Disk object """ if device.name is None: return _FS_UNAVAIL else: return device.name def _GetInstDeviceUUID(ctx, _, device): # pylint: disable=W0613 """Get a Device's UUID. @type ctx: L{InstanceQueryData} @type device: L{objects.NIC} or L{objects.Disk} @param device: The NIC or Disk object """ if device.uuid is None: return _FS_UNAVAIL else: return device.uuid def _GetInstNic(index, cb): """Build function for calling another function with an instance NIC. @type index: int @param index: NIC index @type cb: callable @param cb: Callback """ def fn(ctx, inst): """Call helper function with instance NIC. @type ctx: L{InstanceQueryData} @type inst: L{objects.Instance} @param inst: Instance object """ try: nic = inst.nics[index] except IndexError: return _FS_UNAVAIL return cb(ctx, index, nic) return fn def _GetInstNicNetworkName(ctx, _, nic): # pylint: disable=W0613 """Get a NIC's Network. @type ctx: L{InstanceQueryData} @type nic: L{objects.NIC} @param nic: NIC object """ if nic.network is None: return _FS_UNAVAIL else: return ctx.networks[nic.network].name def _GetInstNicNetwork(ctx, _, nic): # pylint: disable=W0613 """Get a NIC's Network. @type ctx: L{InstanceQueryData} @type nic: L{objects.NIC} @param nic: NIC object """ if nic.network is None: return _FS_UNAVAIL else: return nic.network def _GetInstNicIp(ctx, _, nic): # pylint: disable=W0613 """Get a NIC's IP address. @type ctx: L{InstanceQueryData} @type nic: L{objects.NIC} @param nic: NIC object """ if nic.ip is None: return _FS_UNAVAIL else: return nic.ip def _GetInstNicBridge(ctx, index, _): """Get a NIC's bridge. @type ctx: L{InstanceQueryData} @type index: int @param index: NIC index """ assert len(ctx.inst_nicparams) >= index nicparams = ctx.inst_nicparams[index] if nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: return nicparams[constants.NIC_LINK] else: return _FS_UNAVAIL def _GetInstAllNicNetworkNames(ctx, inst): """Get all network names for an instance. @type ctx: L{InstanceQueryData} @type inst: L{objects.Instance} @param inst: Instance object """ result = [] for nic in inst.nics: name = None if nic.network: name = ctx.networks[nic.network].name result.append(name) assert len(result) == len(inst.nics) return result def _GetInstAllNicBridges(ctx, inst): """Get all network bridges for an instance. @type ctx: L{InstanceQueryData} @type inst: L{objects.Instance} @param inst: Instance object """ assert len(ctx.inst_nicparams) == len(inst.nics) result = [] for nicp in ctx.inst_nicparams: if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: result.append(nicp[constants.NIC_LINK]) else: result.append(None) assert len(result) == len(inst.nics) return result def _GetInstNicParam(name): """Build function for retrieving a NIC parameter. @type name: string @param name: Parameter name """ def fn(ctx, index, _): """Get a NIC's bridge. @type ctx: L{InstanceQueryData} @type inst: L{objects.Instance} @param inst: Instance object @type nic: L{objects.NIC} @param nic: NIC object """ assert len(ctx.inst_nicparams) >= index return ctx.inst_nicparams[index][name] return fn def _GetInstanceNetworkFields(): """Get instance fields involving network interfaces. @return: Tuple containing list of field definitions used as input for L{_PrepareFieldList} and a list of aliases """ nic_mac_fn = lambda ctx, _, nic: nic.mac nic_mode_fn = _GetInstNicParam(constants.NIC_MODE) nic_link_fn = _GetInstNicParam(constants.NIC_LINK) fields = [ # All NICs (_MakeField("nic.count", "NICs", QFT_NUMBER, "Number of network interfaces"), IQ_CONFIG, 0, lambda ctx, inst: len(inst.nics)), (_MakeField("nic.macs", "NIC_MACs", QFT_OTHER, "List containing each network interface's MAC address"), IQ_CONFIG, 0, lambda ctx, inst: [nic.mac for nic in inst.nics]), (_MakeField("nic.ips", "NIC_IPs", QFT_OTHER, "List containing each network interface's IP address"), IQ_CONFIG, 0, lambda ctx, inst: [nic.ip for nic in inst.nics]), (_MakeField("nic.names", "NIC_Names", QFT_OTHER, "List containing each network interface's name"), IQ_CONFIG, 0, lambda ctx, inst: [nic.name for nic in inst.nics]), (_MakeField("nic.uuids", "NIC_UUIDs", QFT_OTHER, "List containing each network interface's UUID"), IQ_CONFIG, 0, lambda ctx, inst: [nic.uuid for nic in inst.nics]), (_MakeField("nic.modes", "NIC_modes", QFT_OTHER, "List containing each network interface's mode"), IQ_CONFIG, 0, lambda ctx, inst: [nicp[constants.NIC_MODE] for nicp in ctx.inst_nicparams]), (_MakeField("nic.links", "NIC_links", QFT_OTHER, "List containing each network interface's link"), IQ_CONFIG, 0, lambda ctx, inst: [nicp[constants.NIC_LINK] for nicp in ctx.inst_nicparams]), (_MakeField("nic.bridges", "NIC_bridges", QFT_OTHER, "List containing each network interface's bridge"), IQ_CONFIG, 0, _GetInstAllNicBridges), (_MakeField("nic.networks", "NIC_networks", QFT_OTHER, "List containing each interface's network"), IQ_CONFIG, 0, lambda ctx, inst: [nic.network for nic in inst.nics]), (_MakeField("nic.networks.names", "NIC_networks_names", QFT_OTHER, "List containing each interface's network"), IQ_NETWORKS, 0, _GetInstAllNicNetworkNames) ] # NICs by number for i in range(constants.MAX_NICS): numtext = utils.FormatOrdinal(i + 1) fields.extend([ (_MakeField("nic.ip/%s" % i, "NicIP/%s" % i, QFT_TEXT, "IP address of %s network interface" % numtext), IQ_CONFIG, 0, _GetInstNic(i, _GetInstNicIp)), (_MakeField("nic.mac/%s" % i, "NicMAC/%s" % i, QFT_TEXT, "MAC address of %s network interface" % numtext), IQ_CONFIG, 0, _GetInstNic(i, nic_mac_fn)), (_MakeField("nic.name/%s" % i, "NicName/%s" % i, QFT_TEXT, "Name address of %s network interface" % numtext), IQ_CONFIG, 0, _GetInstNic(i, _GetInstDeviceName)), (_MakeField("nic.uuid/%s" % i, "NicUUID/%s" % i, QFT_TEXT, "UUID address of %s network interface" % numtext), IQ_CONFIG, 0, _GetInstNic(i, _GetInstDeviceUUID)), (_MakeField("nic.mode/%s" % i, "NicMode/%s" % i, QFT_TEXT, "Mode of %s network interface" % numtext), IQ_CONFIG, 0, _GetInstNic(i, nic_mode_fn)), (_MakeField("nic.link/%s" % i, "NicLink/%s" % i, QFT_TEXT, "Link of %s network interface" % numtext), IQ_CONFIG, 0, _GetInstNic(i, nic_link_fn)), (_MakeField("nic.bridge/%s" % i, "NicBridge/%s" % i, QFT_TEXT, "Bridge of %s network interface" % numtext), IQ_CONFIG, 0, _GetInstNic(i, _GetInstNicBridge)), (_MakeField("nic.network/%s" % i, "NicNetwork/%s" % i, QFT_TEXT, "Network of %s network interface" % numtext), IQ_CONFIG, 0, _GetInstNic(i, _GetInstNicNetwork)), (_MakeField("nic.network.name/%s" % i, "NicNetworkName/%s" % i, QFT_TEXT, "Network name of %s network interface" % numtext), IQ_NETWORKS, 0, _GetInstNic(i, _GetInstNicNetworkName)), ]) aliases = [ # Legacy fields for first NIC ("ip", "nic.ip/0"), ("mac", "nic.mac/0"), ("bridge", "nic.bridge/0"), ("nic_mode", "nic.mode/0"), ("nic_link", "nic.link/0"), ("nic_network", "nic.network/0"), ] return (fields, aliases) def _GetInstDiskUsage(ctx, inst): """Get disk usage for an instance. @type ctx: L{InstanceQueryData} @type inst: L{objects.Instance} @param inst: Instance object """ usage = ctx.disk_usage[inst.uuid] if usage is None: usage = 0 return usage def _GetInstanceConsole(ctx, inst): """Get console information for instance. @type ctx: L{InstanceQueryData} @type inst: L{objects.Instance} @param inst: Instance object """ consinfo = ctx.console[inst.uuid] if consinfo is None: return _FS_UNAVAIL return consinfo def _GetInstanceDiskFields(): """Get instance fields involving disks. @return: List of field definitions used as input for L{_PrepareFieldList} """ fields = [ (_MakeField("disk_usage", "DiskUsage", QFT_UNIT, "Total disk space used by instance on each of its nodes;" " this is not the disk size visible to the instance, but" " the usage on the node"), IQ_DISKUSAGE, 0, _GetInstDiskUsage), (_MakeField("disk.count", "Disks", QFT_NUMBER, "Number of disks"), IQ_CONFIG, 0, lambda ctx, inst: len(inst.disks)), (_MakeField("disk.sizes", "Disk_sizes", QFT_OTHER, "List of disk sizes"), IQ_CONFIG, 0, lambda ctx, inst: [disk.size for disk in inst.disks]), (_MakeField("disk.spindles", "Disk_spindles", QFT_OTHER, "List of disk spindles"), IQ_CONFIG, 0, lambda ctx, inst: [disk.spindles for disk in inst.disks]), (_MakeField("disk.names", "Disk_names", QFT_OTHER, "List of disk names"), IQ_CONFIG, 0, lambda ctx, inst: [disk.name for disk in inst.disks]), (_MakeField("disk.uuids", "Disk_UUIDs", QFT_OTHER, "List of disk UUIDs"), IQ_CONFIG, 0, lambda ctx, inst: [disk.uuid for disk in inst.disks]), ] # Disks by number for i in range(constants.MAX_DISKS): numtext = utils.FormatOrdinal(i + 1) fields.extend([ (_MakeField("disk.size/%s" % i, "Disk/%s" % i, QFT_UNIT, "Disk size of %s disk" % numtext), IQ_CONFIG, 0, _GetInstDisk(i, _GetInstDiskSize)), (_MakeField("disk.spindles/%s" % i, "DiskSpindles/%s" % i, QFT_NUMBER, "Spindles of %s disk" % numtext), IQ_CONFIG, 0, _GetInstDisk(i, _GetInstDiskSpindles)), (_MakeField("disk.name/%s" % i, "DiskName/%s" % i, QFT_TEXT, "Name of %s disk" % numtext), IQ_CONFIG, 0, _GetInstDisk(i, _GetInstDeviceName)), (_MakeField("disk.uuid/%s" % i, "DiskUUID/%s" % i, QFT_TEXT, "UUID of %s disk" % numtext), IQ_CONFIG, 0, _GetInstDisk(i, _GetInstDeviceUUID))]) return fields def _GetInstanceParameterFields(): """Get instance fields involving parameters. @return: List of field definitions used as input for L{_PrepareFieldList} """ fields = [ # Filled parameters (_MakeField("hvparams", "HypervisorParameters", QFT_OTHER, "Hypervisor parameters (merged)"), IQ_CONFIG, 0, lambda ctx, _: ctx.inst_hvparams), (_MakeField("beparams", "BackendParameters", QFT_OTHER, "Backend parameters (merged)"), IQ_CONFIG, 0, lambda ctx, _: ctx.inst_beparams), (_MakeField("osparams", "OpSysParameters", QFT_OTHER, "Operating system parameters (merged)"), IQ_CONFIG, 0, lambda ctx, _: ctx.inst_osparams), # Unfilled parameters (_MakeField("custom_hvparams", "CustomHypervisorParameters", QFT_OTHER, "Custom hypervisor parameters"), IQ_CONFIG, 0, _GetItemAttr("hvparams")), (_MakeField("custom_beparams", "CustomBackendParameters", QFT_OTHER, "Custom backend parameters",), IQ_CONFIG, 0, _GetItemAttr("beparams")), (_MakeField("custom_osparams", "CustomOpSysParameters", QFT_OTHER, "Custom operating system parameters",), IQ_CONFIG, 0, _GetItemAttr("osparams")), (_MakeField("custom_nicparams", "CustomNicParameters", QFT_OTHER, "Custom network interface parameters"), IQ_CONFIG, 0, lambda ctx, inst: [nic.nicparams for nic in inst.nics]), ] # HV params def _GetInstHvParam(name): return lambda ctx, _: ctx.inst_hvparams.get(name, _FS_UNAVAIL) fields.extend([ (_MakeField("hv/%s" % name, constants.HVS_PARAMETER_TITLES.get(name, "hv/%s" % name), _VTToQFT[kind], "The \"%s\" hypervisor parameter" % name), IQ_CONFIG, 0, _GetInstHvParam(name)) for name, kind in constants.HVS_PARAMETER_TYPES.items() if name not in constants.HVC_GLOBALS]) # BE params def _GetInstBeParam(name): return lambda ctx, _: ctx.inst_beparams.get(name, None) fields.extend([ (_MakeField("be/%s" % name, constants.BES_PARAMETER_TITLES.get(name, "be/%s" % name), _VTToQFT[kind], "The \"%s\" backend parameter" % name), IQ_CONFIG, 0, _GetInstBeParam(name)) for name, kind in constants.BES_PARAMETER_TYPES.items()]) return fields _INST_SIMPLE_FIELDS = { "disk_template": ("Disk_template", QFT_TEXT, 0, "Instance disk template"), "hypervisor": ("Hypervisor", QFT_TEXT, 0, "Hypervisor name"), "name": ("Instance", QFT_TEXT, QFF_HOSTNAME, "Instance name"), # Depending on the hypervisor, the port can be None "network_port": ("Network_port", QFT_OTHER, 0, "Instance network port if available (e.g. for VNC console)"), "os": ("OS", QFT_TEXT, 0, "Operating system"), "serial_no": ("SerialNo", QFT_NUMBER, 0, _SERIAL_NO_DOC % "Instance"), "uuid": ("UUID", QFT_TEXT, 0, "Instance UUID"), } def _GetNodeName(ctx, default, node_uuid): """Gets node name of a node. @type ctx: L{InstanceQueryData} @param default: Default value @type node_uuid: string @param node_uuid: Node UUID """ try: node = ctx.nodes[node_uuid] except KeyError: return default else: return node.name def _GetInstNodeGroup(ctx, default, node_uuid): """Gets group UUID of an instance node. @type ctx: L{InstanceQueryData} @param default: Default value @type node_uuid: string @param node_uuid: Node UUID """ try: node = ctx.nodes[node_uuid] except KeyError: return default else: return node.group def _GetInstNodeGroupName(ctx, default, node_uuid): """Gets group name of an instance node. @type ctx: L{InstanceQueryData} @param default: Default value @type node_uuid: string @param node_uuid: Node UUID """ try: node = ctx.nodes[node_uuid] except KeyError: return default try: group = ctx.groups[node.group] except KeyError: return default return group.name def _BuildInstanceFields(): """Builds list of fields for instance queries. """ fields = [ (_MakeField("pnode", "Primary_node", QFT_TEXT, "Primary node"), IQ_NODES, QFF_HOSTNAME, lambda ctx, inst: _GetNodeName(ctx, None, inst.primary_node)), (_MakeField("pnode.group", "PrimaryNodeGroup", QFT_TEXT, "Primary node's group"), IQ_NODES, 0, lambda ctx, inst: _GetInstNodeGroupName(ctx, _FS_UNAVAIL, inst.primary_node)), (_MakeField("pnode.group.uuid", "PrimaryNodeGroupUUID", QFT_TEXT, "Primary node's group UUID"), IQ_NODES, 0, lambda ctx, inst: _GetInstNodeGroup(ctx, _FS_UNAVAIL, inst.primary_node)), # TODO: Allow filtering by secondary node as hostname (_MakeField("snodes", "Secondary_Nodes", QFT_OTHER, "Secondary nodes; usually this will just be one node"), IQ_NODES, 0, lambda ctx, inst: map(compat.partial(_GetNodeName, ctx, None), inst.secondary_nodes)), (_MakeField("snodes.group", "SecondaryNodesGroups", QFT_OTHER, "Node groups of secondary nodes"), IQ_NODES, 0, lambda ctx, inst: map(compat.partial(_GetInstNodeGroupName, ctx, None), inst.secondary_nodes)), (_MakeField("snodes.group.uuid", "SecondaryNodesGroupsUUID", QFT_OTHER, "Node group UUIDs of secondary nodes"), IQ_NODES, 0, lambda ctx, inst: map(compat.partial(_GetInstNodeGroup, ctx, None), inst.secondary_nodes)), (_MakeField("admin_state", "InstanceState", QFT_TEXT, "Desired state of instance"), IQ_CONFIG, 0, _GetItemAttr("admin_state")), (_MakeField("admin_up", "Autostart", QFT_BOOL, "Desired state of instance"), IQ_CONFIG, 0, lambda ctx, inst: inst.admin_state == constants.ADMINST_UP), (_MakeField("disks_active", "DisksActive", QFT_BOOL, "Desired state of instance disks"), IQ_CONFIG, 0, _GetItemAttr("disks_active")), (_MakeField("tags", "Tags", QFT_OTHER, "Tags"), IQ_CONFIG, 0, lambda ctx, inst: list(inst.GetTags())), (_MakeField("console", "Console", QFT_OTHER, "Instance console information"), IQ_CONSOLE, 0, _GetInstanceConsole), ] # Add simple fields fields.extend([ (_MakeField(name, title, kind, doc), IQ_CONFIG, flags, _GetItemAttr(name)) for (name, (title, kind, flags, doc)) in _INST_SIMPLE_FIELDS.items()]) # Fields requiring talking to the node fields.extend([ (_MakeField("oper_state", "Running", QFT_BOOL, "Actual state of instance"), IQ_LIVE, 0, _GetInstOperState), (_MakeField("oper_ram", "Memory", QFT_UNIT, "Actual memory usage as seen by hypervisor"), IQ_LIVE, 0, _GetInstLiveData("memory")), (_MakeField("oper_vcpus", "VCPUs", QFT_NUMBER, "Actual number of VCPUs as seen by hypervisor"), IQ_LIVE, 0, _GetInstLiveData("vcpus")), ]) # Status field status_values = (constants.INSTST_RUNNING, constants.INSTST_ADMINDOWN, constants.INSTST_WRONGNODE, constants.INSTST_ERRORUP, constants.INSTST_ERRORDOWN, constants.INSTST_NODEDOWN, constants.INSTST_NODEOFFLINE, constants.INSTST_ADMINOFFLINE) status_doc = ("Instance status; \"%s\" if instance is set to be running" " and actually is, \"%s\" if instance is stopped and" " is not running, \"%s\" if instance running, but not on its" " designated primary node, \"%s\" if instance should be" " stopped, but is actually running, \"%s\" if instance should" " run, but doesn't, \"%s\" if instance's primary node is down," " \"%s\" if instance's primary node is marked offline," " \"%s\" if instance is offline and does not use dynamic" " resources" % status_values) fields.append((_MakeField("status", "Status", QFT_TEXT, status_doc), IQ_LIVE, 0, _GetInstStatus)) assert set(status_values) == constants.INSTST_ALL, \ "Status documentation mismatch" (network_fields, network_aliases) = _GetInstanceNetworkFields() fields.extend(network_fields) fields.extend(_GetInstanceParameterFields()) fields.extend(_GetInstanceDiskFields()) fields.extend(_GetItemTimestampFields(IQ_CONFIG)) aliases = [ ("vcpus", "be/vcpus"), ("be/memory", "be/maxmem"), ("sda_size", "disk.size/0"), ("sdb_size", "disk.size/1"), ] + network_aliases return _PrepareFieldList(fields, aliases) class LockQueryData: """Data container for lock data queries. """ def __init__(self, lockdata): """Initializes this class. """ self.lockdata = lockdata def __iter__(self): """Iterate over all locks. """ return iter(self.lockdata) def _GetLockOwners(_, data): """Returns a sorted list of a lock's current owners. """ (_, _, owners, _) = data if owners: owners = utils.NiceSort(owners) return owners def _GetLockPending(_, data): """Returns a sorted list of a lock's pending acquires. """ (_, _, _, pending) = data if pending: pending = [(mode, utils.NiceSort(names)) for (mode, names) in pending] return pending def _BuildLockFields(): """Builds list of fields for lock queries. """ return _PrepareFieldList([ # TODO: Lock names are not always hostnames. Should QFF_HOSTNAME be used? (_MakeField("name", "Name", QFT_TEXT, "Lock name"), None, 0, lambda ctx, (name, mode, owners, pending): name), (_MakeField("mode", "Mode", QFT_OTHER, "Mode in which the lock is currently acquired" " (exclusive or shared)"), LQ_MODE, 0, lambda ctx, (name, mode, owners, pending): mode), (_MakeField("owner", "Owner", QFT_OTHER, "Current lock owner(s)"), LQ_OWNER, 0, _GetLockOwners), (_MakeField("pending", "Pending", QFT_OTHER, "Threads waiting for the lock"), LQ_PENDING, 0, _GetLockPending), ], []) class GroupQueryData: """Data container for node group data queries. """ def __init__(self, cluster, groups, group_to_nodes, group_to_instances, want_diskparams): """Initializes this class. @param cluster: Cluster object @param groups: List of node group objects @type group_to_nodes: dict; group UUID as key @param group_to_nodes: Per-group list of nodes @type group_to_instances: dict; group UUID as key @param group_to_instances: Per-group list of (primary) instances @type want_diskparams: bool @param want_diskparams: Whether diskparamters should be calculated """ self.groups = groups self.group_to_nodes = group_to_nodes self.group_to_instances = group_to_instances self.cluster = cluster self.want_diskparams = want_diskparams # Used for individual rows self.group_ipolicy = None self.ndparams = None self.group_dp = None def __iter__(self): """Iterate over all node groups. This function has side-effects and only one instance of the resulting generator should be used at a time. """ for group in self.groups: self.group_ipolicy = self.cluster.SimpleFillIPolicy(group.ipolicy) self.ndparams = self.cluster.SimpleFillND(group.ndparams) if self.want_diskparams: self.group_dp = self.cluster.SimpleFillDP(group.diskparams) else: self.group_dp = None yield group _GROUP_SIMPLE_FIELDS = { "alloc_policy": ("AllocPolicy", QFT_TEXT, "Allocation policy for group"), "name": ("Group", QFT_TEXT, "Group name"), "serial_no": ("SerialNo", QFT_NUMBER, _SERIAL_NO_DOC % "Group"), "uuid": ("UUID", QFT_TEXT, "Group UUID"), } def _BuildGroupFields(): """Builds list of fields for node group queries. """ # Add simple fields fields = [(_MakeField(name, title, kind, doc), GQ_CONFIG, 0, _GetItemAttr(name)) for (name, (title, kind, doc)) in _GROUP_SIMPLE_FIELDS.items()] def _GetLength(getter): return lambda ctx, group: len(getter(ctx)[group.uuid]) def _GetSortedList(getter): return lambda ctx, group: utils.NiceSort(getter(ctx)[group.uuid]) group_to_nodes = operator.attrgetter("group_to_nodes") group_to_instances = operator.attrgetter("group_to_instances") # Add fields for nodes fields.extend([ (_MakeField("node_cnt", "Nodes", QFT_NUMBER, "Number of nodes"), GQ_NODE, 0, _GetLength(group_to_nodes)), (_MakeField("node_list", "NodeList", QFT_OTHER, "List of nodes"), GQ_NODE, 0, _GetSortedList(group_to_nodes)), ]) # Add fields for instances fields.extend([ (_MakeField("pinst_cnt", "Instances", QFT_NUMBER, "Number of primary instances"), GQ_INST, 0, _GetLength(group_to_instances)), (_MakeField("pinst_list", "InstanceList", QFT_OTHER, "List of primary instances"), GQ_INST, 0, _GetSortedList(group_to_instances)), ]) # Other fields fields.extend([ (_MakeField("tags", "Tags", QFT_OTHER, "Tags"), GQ_CONFIG, 0, lambda ctx, group: list(group.GetTags())), (_MakeField("ipolicy", "InstancePolicy", QFT_OTHER, "Instance policy limitations (merged)"), GQ_CONFIG, 0, lambda ctx, _: ctx.group_ipolicy), (_MakeField("custom_ipolicy", "CustomInstancePolicy", QFT_OTHER, "Custom instance policy limitations"), GQ_CONFIG, 0, _GetItemAttr("ipolicy")), (_MakeField("custom_ndparams", "CustomNDParams", QFT_OTHER, "Custom node parameters"), GQ_CONFIG, 0, _GetItemAttr("ndparams")), (_MakeField("ndparams", "NDParams", QFT_OTHER, "Node parameters"), GQ_CONFIG, 0, lambda ctx, _: ctx.ndparams), (_MakeField("diskparams", "DiskParameters", QFT_OTHER, "Disk parameters (merged)"), GQ_DISKPARAMS, 0, lambda ctx, _: ctx.group_dp), (_MakeField("custom_diskparams", "CustomDiskParameters", QFT_OTHER, "Custom disk parameters"), GQ_CONFIG, 0, _GetItemAttr("diskparams")), ]) # ND parameters fields.extend(_BuildNDFields(True)) fields.extend(_GetItemTimestampFields(GQ_CONFIG)) return _PrepareFieldList(fields, []) class OsInfo(objects.ConfigObject): __slots__ = [ "name", "valid", "hidden", "blacklisted", "variants", "api_versions", "parameters", "node_status", ] def _BuildOsFields(): """Builds list of fields for operating system queries. """ fields = [ (_MakeField("name", "Name", QFT_TEXT, "Operating system name"), None, 0, _GetItemAttr("name")), (_MakeField("valid", "Valid", QFT_BOOL, "Whether operating system definition is valid"), None, 0, _GetItemAttr("valid")), (_MakeField("hidden", "Hidden", QFT_BOOL, "Whether operating system is hidden"), None, 0, _GetItemAttr("hidden")), (_MakeField("blacklisted", "Blacklisted", QFT_BOOL, "Whether operating system is blacklisted"), None, 0, _GetItemAttr("blacklisted")), (_MakeField("variants", "Variants", QFT_OTHER, "Operating system variants"), None, 0, _ConvWrap(utils.NiceSort, _GetItemAttr("variants"))), (_MakeField("api_versions", "ApiVersions", QFT_OTHER, "Operating system API versions"), None, 0, _ConvWrap(sorted, _GetItemAttr("api_versions"))), (_MakeField("parameters", "Parameters", QFT_OTHER, "Operating system parameters"), None, 0, _ConvWrap(compat.partial(utils.NiceSort, key=compat.fst), _GetItemAttr("parameters"))), (_MakeField("node_status", "NodeStatus", QFT_OTHER, "Status from node"), None, 0, _GetItemAttr("node_status")), ] return _PrepareFieldList(fields, []) class ExtStorageInfo(objects.ConfigObject): __slots__ = [ "name", "node_status", "nodegroup_status", "parameters", ] def _BuildExtStorageFields(): """Builds list of fields for extstorage provider queries. """ fields = [ (_MakeField("name", "Name", QFT_TEXT, "ExtStorage provider name"), None, 0, _GetItemAttr("name")), (_MakeField("node_status", "NodeStatus", QFT_OTHER, "Status from node"), None, 0, _GetItemAttr("node_status")), (_MakeField("nodegroup_status", "NodegroupStatus", QFT_OTHER, "Overall Nodegroup status"), None, 0, _GetItemAttr("nodegroup_status")), (_MakeField("parameters", "Parameters", QFT_OTHER, "ExtStorage provider parameters"), None, 0, _GetItemAttr("parameters")), ] return _PrepareFieldList(fields, []) def _JobUnavailInner(fn, ctx, (job_id, job)): # pylint: disable=W0613 """Return L{_FS_UNAVAIL} if job is None. When listing specifc jobs (e.g. "gnt-job list 1 2 3"), a job may not be found, in which case this function converts it to L{_FS_UNAVAIL}. """ if job is None: return _FS_UNAVAIL else: return fn(job) def _JobUnavail(inner): """Wrapper for L{_JobUnavailInner}. """ return compat.partial(_JobUnavailInner, inner) def _PerJobOpInner(fn, job): """Executes a function per opcode in a job. """ return map(fn, job.ops) def _PerJobOp(fn): """Wrapper for L{_PerJobOpInner}. """ return _JobUnavail(compat.partial(_PerJobOpInner, fn)) def _JobTimestampInner(fn, job): """Converts unavailable timestamp to L{_FS_UNAVAIL}. """ timestamp = fn(job) if timestamp is None: return _FS_UNAVAIL else: return timestamp def _JobTimestamp(fn): """Wrapper for L{_JobTimestampInner}. """ return _JobUnavail(compat.partial(_JobTimestampInner, fn)) def _BuildJobFields(): """Builds list of fields for job queries. """ fields = [ (_MakeField("id", "ID", QFT_NUMBER, "Job ID"), None, QFF_JOB_ID, lambda _, (job_id, job): job_id), (_MakeField("status", "Status", QFT_TEXT, "Job status"), None, 0, _JobUnavail(lambda job: job.CalcStatus())), (_MakeField("priority", "Priority", QFT_NUMBER, ("Current job priority (%s to %s)" % (constants.OP_PRIO_LOWEST, constants.OP_PRIO_HIGHEST))), None, 0, _JobUnavail(lambda job: job.CalcPriority())), (_MakeField("archived", "Archived", QFT_BOOL, "Whether job is archived"), JQ_ARCHIVED, 0, lambda _, (job_id, job): job.archived), (_MakeField("ops", "OpCodes", QFT_OTHER, "List of all opcodes"), None, 0, _PerJobOp(lambda op: op.input.__getstate__())), (_MakeField("opresult", "OpCode_result", QFT_OTHER, "List of opcodes results"), None, 0, _PerJobOp(operator.attrgetter("result"))), (_MakeField("opstatus", "OpCode_status", QFT_OTHER, "List of opcodes status"), None, 0, _PerJobOp(operator.attrgetter("status"))), (_MakeField("oplog", "OpCode_log", QFT_OTHER, "List of opcode output logs"), None, 0, _PerJobOp(operator.attrgetter("log"))), (_MakeField("opstart", "OpCode_start", QFT_OTHER, "List of opcode start timestamps (before acquiring locks)"), None, 0, _PerJobOp(operator.attrgetter("start_timestamp"))), (_MakeField("opexec", "OpCode_exec", QFT_OTHER, "List of opcode execution start timestamps (after acquiring" " locks)"), None, 0, _PerJobOp(operator.attrgetter("exec_timestamp"))), (_MakeField("opend", "OpCode_end", QFT_OTHER, "List of opcode execution end timestamps"), None, 0, _PerJobOp(operator.attrgetter("end_timestamp"))), (_MakeField("oppriority", "OpCode_prio", QFT_OTHER, "List of opcode priorities"), None, 0, _PerJobOp(operator.attrgetter("priority"))), (_MakeField("summary", "Summary", QFT_OTHER, "List of per-opcode summaries"), None, 0, _PerJobOp(lambda op: op.input.Summary())), ] # Timestamp fields for (name, attr, title, desc) in [ ("received_ts", "received_timestamp", "Received", "Timestamp of when job was received"), ("start_ts", "start_timestamp", "Start", "Timestamp of job start"), ("end_ts", "end_timestamp", "End", "Timestamp of job end"), ]: getter = operator.attrgetter(attr) fields.extend([ (_MakeField(name, title, QFT_OTHER, "%s (tuple containing seconds and microseconds)" % desc), None, QFF_SPLIT_TIMESTAMP, _JobTimestamp(getter)), ]) return _PrepareFieldList(fields, []) def _GetExportName(_, (node_name, expname)): # pylint: disable=W0613 """Returns an export name if available. """ if expname is None: return _FS_NODATA else: return expname def _BuildExportFields(): """Builds list of fields for exports. """ fields = [ (_MakeField("node", "Node", QFT_TEXT, "Node name"), None, QFF_HOSTNAME, lambda _, (node_name, expname): node_name), (_MakeField("export", "Export", QFT_TEXT, "Export name"), None, 0, _GetExportName), ] return _PrepareFieldList(fields, []) _CLUSTER_VERSION_FIELDS = { "software_version": ("SoftwareVersion", QFT_TEXT, constants.RELEASE_VERSION, "Software version"), "protocol_version": ("ProtocolVersion", QFT_NUMBER, constants.PROTOCOL_VERSION, "RPC protocol version"), "config_version": ("ConfigVersion", QFT_NUMBER, constants.CONFIG_VERSION, "Configuration format version"), "os_api_version": ("OsApiVersion", QFT_NUMBER, max(constants.OS_API_VERSIONS), "API version for OS template scripts"), "export_version": ("ExportVersion", QFT_NUMBER, constants.EXPORT_VERSION, "Import/export file format version"), "vcs_version": ("VCSVersion", QFT_TEXT, constants.VCS_VERSION, "VCS version"), } _CLUSTER_SIMPLE_FIELDS = { "cluster_name": ("Name", QFT_TEXT, QFF_HOSTNAME, "Cluster name"), "volume_group_name": ("VgName", QFT_TEXT, 0, "LVM volume group name"), } class ClusterQueryData: def __init__(self, cluster, nodes, drain_flag, watcher_pause): """Initializes this class. @type cluster: L{objects.Cluster} @param cluster: Instance of cluster object @type nodes: dict; node UUID as key @param nodes: Node objects @type drain_flag: bool @param drain_flag: Whether job queue is drained @type watcher_pause: number @param watcher_pause: Until when watcher is paused (Unix timestamp) """ self._cluster = cluster self.nodes = nodes self.drain_flag = drain_flag self.watcher_pause = watcher_pause def __iter__(self): return iter([self._cluster]) def _ClusterWatcherPause(ctx, _): """Returns until when watcher is paused (if available). """ if ctx.watcher_pause is None: return _FS_UNAVAIL else: return ctx.watcher_pause def _BuildClusterFields(): """Builds list of fields for cluster information. """ fields = [ (_MakeField("tags", "Tags", QFT_OTHER, "Tags"), CQ_CONFIG, 0, lambda ctx, cluster: list(cluster.GetTags())), (_MakeField("architecture", "ArchInfo", QFT_OTHER, "Architecture information"), None, 0, lambda ctx, _: runtime.GetArchInfo()), (_MakeField("drain_flag", "QueueDrained", QFT_BOOL, "Flag whether job queue is drained"), CQ_QUEUE_DRAINED, 0, lambda ctx, _: ctx.drain_flag), (_MakeField("watcher_pause", "WatcherPause", QFT_TIMESTAMP, "Until when watcher is paused"), CQ_WATCHER_PAUSE, 0, _ClusterWatcherPause), (_MakeField("master_node", "Master", QFT_TEXT, "Master node name"), CQ_CONFIG, QFF_HOSTNAME, lambda ctx, cluster: _GetNodeName(ctx, None, cluster.master_node)), ] # Simple fields fields.extend([ (_MakeField(name, title, kind, doc), CQ_CONFIG, flags, _GetItemAttr(name)) for (name, (title, kind, flags, doc)) in _CLUSTER_SIMPLE_FIELDS.items() ],) # Version fields fields.extend([ (_MakeField(name, title, kind, doc), None, 0, _StaticValue(value)) for (name, (title, kind, value, doc)) in _CLUSTER_VERSION_FIELDS.items()]) # Add timestamps fields.extend(_GetItemTimestampFields(CQ_CONFIG)) return _PrepareFieldList(fields, [ ("name", "cluster_name")]) class NetworkQueryData: """Data container for network data queries. """ def __init__(self, networks, network_to_groups, network_to_instances, stats): """Initializes this class. @param networks: List of network objects @type network_to_groups: dict; network UUID as key @param network_to_groups: Per-network list of groups @type network_to_instances: dict; network UUID as key @param network_to_instances: Per-network list of instances @type stats: dict; network UUID as key @param stats: Per-network usage statistics """ self.networks = networks self.network_to_groups = network_to_groups self.network_to_instances = network_to_instances self.stats = stats def __iter__(self): """Iterate over all networks. """ for net in self.networks: if self.stats: self.curstats = self.stats.get(net.uuid, None) else: self.curstats = None yield net _NETWORK_SIMPLE_FIELDS = { "name": ("Network", QFT_TEXT, 0, "Name"), "network": ("Subnet", QFT_TEXT, 0, "IPv4 subnet"), "gateway": ("Gateway", QFT_OTHER, 0, "IPv4 gateway"), "network6": ("IPv6Subnet", QFT_OTHER, 0, "IPv6 subnet"), "gateway6": ("IPv6Gateway", QFT_OTHER, 0, "IPv6 gateway"), "mac_prefix": ("MacPrefix", QFT_OTHER, 0, "MAC address prefix"), "serial_no": ("SerialNo", QFT_NUMBER, 0, _SERIAL_NO_DOC % "Network"), "uuid": ("UUID", QFT_TEXT, 0, "Network UUID"), } _NETWORK_STATS_FIELDS = { "free_count": ("FreeCount", QFT_NUMBER, 0, "Number of available addresses"), "reserved_count": ("ReservedCount", QFT_NUMBER, 0, "Number of reserved addresses"), "map": ("Map", QFT_TEXT, 0, "Actual mapping"), "external_reservations": ("ExternalReservations", QFT_TEXT, 0, "External reservations"), } def _GetNetworkStatsField(field, kind, ctx, _): """Gets the value of a "stats" field from L{NetworkQueryData}. @param field: Field name @param kind: Data kind, one of L{constants.QFT_ALL} @type ctx: L{NetworkQueryData} """ return _GetStatsField(field, kind, ctx.curstats) def _BuildNetworkFields(): """Builds list of fields for network queries. """ fields = [ (_MakeField("tags", "Tags", QFT_OTHER, "Tags"), IQ_CONFIG, 0, lambda ctx, inst: list(inst.GetTags())), ] # Add simple fields fields.extend([ (_MakeField(name, title, kind, doc), NETQ_CONFIG, 0, _GetItemMaybeAttr(name)) for (name, (title, kind, _, doc)) in _NETWORK_SIMPLE_FIELDS.items()]) def _GetLength(getter): return lambda ctx, network: len(getter(ctx)[network.uuid]) def _GetSortedList(getter): return lambda ctx, network: utils.NiceSort(getter(ctx)[network.uuid]) network_to_groups = operator.attrgetter("network_to_groups") network_to_instances = operator.attrgetter("network_to_instances") # Add fields for node groups fields.extend([ (_MakeField("group_cnt", "NodeGroups", QFT_NUMBER, "Number of nodegroups"), NETQ_GROUP, 0, _GetLength(network_to_groups)), (_MakeField("group_list", "GroupList", QFT_OTHER, "List of nodegroups (group name, NIC mode, NIC link)"), NETQ_GROUP, 0, lambda ctx, network: network_to_groups(ctx)[network.uuid]), ]) # Add fields for instances fields.extend([ (_MakeField("inst_cnt", "Instances", QFT_NUMBER, "Number of instances"), NETQ_INST, 0, _GetLength(network_to_instances)), (_MakeField("inst_list", "InstanceList", QFT_OTHER, "List of instances"), NETQ_INST, 0, _GetSortedList(network_to_instances)), ]) # Add fields for usage statistics fields.extend([ (_MakeField(name, title, kind, doc), NETQ_STATS, 0, compat.partial(_GetNetworkStatsField, name, kind)) for (name, (title, kind, _, doc)) in _NETWORK_STATS_FIELDS.items()]) # Add timestamps fields.extend(_GetItemTimestampFields(IQ_NETWORKS)) return _PrepareFieldList(fields, []) #: Fields for cluster information CLUSTER_FIELDS = _BuildClusterFields() #: Fields available for node queries NODE_FIELDS = _BuildNodeFields() #: Fields available for instance queries INSTANCE_FIELDS = _BuildInstanceFields() #: Fields available for lock queries LOCK_FIELDS = _BuildLockFields() #: Fields available for node group queries GROUP_FIELDS = _BuildGroupFields() #: Fields available for operating system queries OS_FIELDS = _BuildOsFields() #: Fields available for extstorage provider queries EXTSTORAGE_FIELDS = _BuildExtStorageFields() #: Fields available for job queries JOB_FIELDS = _BuildJobFields() #: Fields available for exports EXPORT_FIELDS = _BuildExportFields() #: Fields available for network queries NETWORK_FIELDS = _BuildNetworkFields() #: All available resources ALL_FIELDS = { constants.QR_CLUSTER: CLUSTER_FIELDS, constants.QR_INSTANCE: INSTANCE_FIELDS, constants.QR_NODE: NODE_FIELDS, constants.QR_LOCK: LOCK_FIELDS, constants.QR_GROUP: GROUP_FIELDS, constants.QR_OS: OS_FIELDS, constants.QR_EXTSTORAGE: EXTSTORAGE_FIELDS, constants.QR_JOB: JOB_FIELDS, constants.QR_EXPORT: EXPORT_FIELDS, constants.QR_NETWORK: NETWORK_FIELDS, } #: All available field lists ALL_FIELD_LISTS = ALL_FIELDS.values() ganeti-2.9.3/lib/jqueue.py0000644000000000000000000023757612271422343015434 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2014 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module implementing the job queue handling. Locking: there's a single, large lock in the L{JobQueue} class. It's used by all other classes in this module. @var JOBQUEUE_THREADS: the number of worker threads we start for processing jobs """ import logging import errno import time import weakref import threading import itertools import operator try: # pylint: disable=E0611 from pyinotify import pyinotify except ImportError: import pyinotify from ganeti import asyncnotifier from ganeti import constants from ganeti import serializer from ganeti import workerpool from ganeti import locking from ganeti import opcodes from ganeti import errors from ganeti import mcpu from ganeti import utils from ganeti import jstore from ganeti import rpc from ganeti import runtime from ganeti import netutils from ganeti import compat from ganeti import ht from ganeti import query from ganeti import qlang from ganeti import pathutils from ganeti import vcluster JOBQUEUE_THREADS = 25 # member lock names to be passed to @ssynchronized decorator _LOCK = "_lock" _QUEUE = "_queue" #: Retrieves "id" attribute _GetIdAttr = operator.attrgetter("id") class CancelJob(Exception): """Special exception to cancel a job. """ class QueueShutdown(Exception): """Special exception to abort a job when the job queue is shutting down. """ def TimeStampNow(): """Returns the current timestamp. @rtype: tuple @return: the current time in the (seconds, microseconds) format """ return utils.SplitTime(time.time()) def _CallJqUpdate(runner, names, file_name, content): """Updates job queue file after virtualizing filename. """ virt_file_name = vcluster.MakeVirtualPath(file_name) return runner.call_jobqueue_update(names, virt_file_name, content) class _SimpleJobQuery: """Wrapper for job queries. Instance keeps list of fields cached, useful e.g. in L{_JobChangesChecker}. """ def __init__(self, fields): """Initializes this class. """ self._query = query.Query(query.JOB_FIELDS, fields) def __call__(self, job): """Executes a job query using cached field list. """ return self._query.OldStyleQuery([(job.id, job)], sort_by_name=False)[0] class _QueuedOpCode(object): """Encapsulates an opcode object. @ivar log: holds the execution log and consists of tuples of the form C{(log_serial, timestamp, level, message)} @ivar input: the OpCode we encapsulate @ivar status: the current status @ivar result: the result of the LU execution @ivar start_timestamp: timestamp for the start of the execution @ivar exec_timestamp: timestamp for the actual LU Exec() function invocation @ivar stop_timestamp: timestamp for the end of the execution """ __slots__ = ["input", "status", "result", "log", "priority", "start_timestamp", "exec_timestamp", "end_timestamp", "__weakref__"] def __init__(self, op): """Initializes instances of this class. @type op: L{opcodes.OpCode} @param op: the opcode we encapsulate """ self.input = op self.status = constants.OP_STATUS_QUEUED self.result = None self.log = [] self.start_timestamp = None self.exec_timestamp = None self.end_timestamp = None # Get initial priority (it might change during the lifetime of this opcode) self.priority = getattr(op, "priority", constants.OP_PRIO_DEFAULT) @classmethod def Restore(cls, state): """Restore the _QueuedOpCode from the serialized form. @type state: dict @param state: the serialized state @rtype: _QueuedOpCode @return: a new _QueuedOpCode instance """ obj = _QueuedOpCode.__new__(cls) obj.input = opcodes.OpCode.LoadOpCode(state["input"]) obj.status = state["status"] obj.result = state["result"] obj.log = state["log"] obj.start_timestamp = state.get("start_timestamp", None) obj.exec_timestamp = state.get("exec_timestamp", None) obj.end_timestamp = state.get("end_timestamp", None) obj.priority = state.get("priority", constants.OP_PRIO_DEFAULT) return obj def Serialize(self): """Serializes this _QueuedOpCode. @rtype: dict @return: the dictionary holding the serialized state """ return { "input": self.input.__getstate__(), "status": self.status, "result": self.result, "log": self.log, "start_timestamp": self.start_timestamp, "exec_timestamp": self.exec_timestamp, "end_timestamp": self.end_timestamp, "priority": self.priority, } class _QueuedJob(object): """In-memory job representation. This is what we use to track the user-submitted jobs. Locking must be taken care of by users of this class. @type queue: L{JobQueue} @ivar queue: the parent queue @ivar id: the job ID @type ops: list @ivar ops: the list of _QueuedOpCode that constitute the job @type log_serial: int @ivar log_serial: holds the index for the next log entry @ivar received_timestamp: the timestamp for when the job was received @ivar start_timestmap: the timestamp for start of execution @ivar end_timestamp: the timestamp for end of execution @ivar writable: Whether the job is allowed to be modified """ # pylint: disable=W0212 __slots__ = ["queue", "id", "ops", "log_serial", "ops_iter", "cur_opctx", "received_timestamp", "start_timestamp", "end_timestamp", "__weakref__", "processor_lock", "writable", "archived"] def _AddReasons(self): """Extend the reason trail Add the reason for all the opcodes of this job to be executed. """ count = 0 for queued_op in self.ops: op = queued_op.input reason_src = opcodes.NameToReasonSrc(op.__class__.__name__) reason_text = "job=%d;index=%d" % (self.id, count) reason = getattr(op, "reason", []) reason.append((reason_src, reason_text, utils.EpochNano())) op.reason = reason count = count + 1 def __init__(self, queue, job_id, ops, writable): """Constructor for the _QueuedJob. @type queue: L{JobQueue} @param queue: our parent queue @type job_id: job_id @param job_id: our job id @type ops: list @param ops: the list of opcodes we hold, which will be encapsulated in _QueuedOpCodes @type writable: bool @param writable: Whether job can be modified """ if not ops: raise errors.GenericError("A job needs at least one opcode") self.queue = queue self.id = int(job_id) self.ops = [_QueuedOpCode(op) for op in ops] self._AddReasons() self.log_serial = 0 self.received_timestamp = TimeStampNow() self.start_timestamp = None self.end_timestamp = None self.archived = False self._InitInMemory(self, writable) assert not self.archived, "New jobs can not be marked as archived" @staticmethod def _InitInMemory(obj, writable): """Initializes in-memory variables. """ obj.writable = writable obj.ops_iter = None obj.cur_opctx = None # Read-only jobs are not processed and therefore don't need a lock if writable: obj.processor_lock = threading.Lock() else: obj.processor_lock = None def __repr__(self): status = ["%s.%s" % (self.__class__.__module__, self.__class__.__name__), "id=%s" % self.id, "ops=%s" % ",".join([op.input.Summary() for op in self.ops])] return "<%s at %#x>" % (" ".join(status), id(self)) @classmethod def Restore(cls, queue, state, writable, archived): """Restore a _QueuedJob from serialized state: @type queue: L{JobQueue} @param queue: to which queue the restored job belongs @type state: dict @param state: the serialized state @type writable: bool @param writable: Whether job can be modified @type archived: bool @param archived: Whether job was already archived @rtype: _JobQueue @return: the restored _JobQueue instance """ obj = _QueuedJob.__new__(cls) obj.queue = queue obj.id = int(state["id"]) obj.received_timestamp = state.get("received_timestamp", None) obj.start_timestamp = state.get("start_timestamp", None) obj.end_timestamp = state.get("end_timestamp", None) obj.archived = archived obj.ops = [] obj.log_serial = 0 for op_state in state["ops"]: op = _QueuedOpCode.Restore(op_state) for log_entry in op.log: obj.log_serial = max(obj.log_serial, log_entry[0]) obj.ops.append(op) cls._InitInMemory(obj, writable) return obj def Serialize(self): """Serialize the _JobQueue instance. @rtype: dict @return: the serialized state """ return { "id": self.id, "ops": [op.Serialize() for op in self.ops], "start_timestamp": self.start_timestamp, "end_timestamp": self.end_timestamp, "received_timestamp": self.received_timestamp, } def CalcStatus(self): """Compute the status of this job. This function iterates over all the _QueuedOpCodes in the job and based on their status, computes the job status. The algorithm is: - if we find a cancelled, or finished with error, the job status will be the same - otherwise, the last opcode with the status one of: - waitlock - canceling - running will determine the job status - otherwise, it means either all opcodes are queued, or success, and the job status will be the same @return: the job status """ status = constants.JOB_STATUS_QUEUED all_success = True for op in self.ops: if op.status == constants.OP_STATUS_SUCCESS: continue all_success = False if op.status == constants.OP_STATUS_QUEUED: pass elif op.status == constants.OP_STATUS_WAITING: status = constants.JOB_STATUS_WAITING elif op.status == constants.OP_STATUS_RUNNING: status = constants.JOB_STATUS_RUNNING elif op.status == constants.OP_STATUS_CANCELING: status = constants.JOB_STATUS_CANCELING break elif op.status == constants.OP_STATUS_ERROR: status = constants.JOB_STATUS_ERROR # The whole job fails if one opcode failed break elif op.status == constants.OP_STATUS_CANCELED: status = constants.OP_STATUS_CANCELED break if all_success: status = constants.JOB_STATUS_SUCCESS return status def CalcPriority(self): """Gets the current priority for this job. Only unfinished opcodes are considered. When all are done, the default priority is used. @rtype: int """ priorities = [op.priority for op in self.ops if op.status not in constants.OPS_FINALIZED] if not priorities: # All opcodes are done, assume default priority return constants.OP_PRIO_DEFAULT return min(priorities) def GetLogEntries(self, newer_than): """Selectively returns the log entries. @type newer_than: None or int @param newer_than: if this is None, return all log entries, otherwise return only the log entries with serial higher than this value @rtype: list @return: the list of the log entries selected """ if newer_than is None: serial = -1 else: serial = newer_than entries = [] for op in self.ops: entries.extend(filter(lambda entry: entry[0] > serial, op.log)) return entries def GetInfo(self, fields): """Returns information about a job. @type fields: list @param fields: names of fields to return @rtype: list @return: list with one element for each field @raise errors.OpExecError: when an invalid field has been passed """ return _SimpleJobQuery(fields)(self) def MarkUnfinishedOps(self, status, result): """Mark unfinished opcodes with a given status and result. This is an utility function for marking all running or waiting to be run opcodes with a given status. Opcodes which are already finalised are not changed. @param status: a given opcode status @param result: the opcode result """ not_marked = True for op in self.ops: if op.status in constants.OPS_FINALIZED: assert not_marked, "Finalized opcodes found after non-finalized ones" continue op.status = status op.result = result not_marked = False def Finalize(self): """Marks the job as finalized. """ self.end_timestamp = TimeStampNow() def Cancel(self): """Marks job as canceled/-ing if possible. @rtype: tuple; (bool, string) @return: Boolean describing whether job was successfully canceled or marked as canceling and a text message """ status = self.CalcStatus() if status == constants.JOB_STATUS_QUEUED: self.MarkUnfinishedOps(constants.OP_STATUS_CANCELED, "Job canceled by request") self.Finalize() return (True, "Job %s canceled" % self.id) elif status == constants.JOB_STATUS_WAITING: # The worker will notice the new status and cancel the job self.MarkUnfinishedOps(constants.OP_STATUS_CANCELING, None) return (True, "Job %s will be canceled" % self.id) else: logging.debug("Job %s is no longer waiting in the queue", self.id) return (False, "Job %s is no longer waiting in the queue" % self.id) def ChangePriority(self, priority): """Changes the job priority. @type priority: int @param priority: New priority @rtype: tuple; (bool, string) @return: Boolean describing whether job's priority was successfully changed and a text message """ status = self.CalcStatus() if status in constants.JOBS_FINALIZED: return (False, "Job %s is finished" % self.id) elif status == constants.JOB_STATUS_CANCELING: return (False, "Job %s is cancelling" % self.id) else: assert status in (constants.JOB_STATUS_QUEUED, constants.JOB_STATUS_WAITING, constants.JOB_STATUS_RUNNING) changed = False for op in self.ops: if (op.status == constants.OP_STATUS_RUNNING or op.status in constants.OPS_FINALIZED): assert not changed, \ ("Found opcode for which priority should not be changed after" " priority has been changed for previous opcodes") continue assert op.status in (constants.OP_STATUS_QUEUED, constants.OP_STATUS_WAITING) changed = True # Set new priority (doesn't modify opcode input) op.priority = priority if changed: return (True, ("Priorities of pending opcodes for job %s have been" " changed to %s" % (self.id, priority))) else: return (False, "Job %s had no pending opcodes" % self.id) class _OpExecCallbacks(mcpu.OpExecCbBase): def __init__(self, queue, job, op): """Initializes this class. @type queue: L{JobQueue} @param queue: Job queue @type job: L{_QueuedJob} @param job: Job object @type op: L{_QueuedOpCode} @param op: OpCode """ assert queue, "Queue is missing" assert job, "Job is missing" assert op, "Opcode is missing" self._queue = queue self._job = job self._op = op def _CheckCancel(self): """Raises an exception to cancel the job if asked to. """ # Cancel here if we were asked to if self._op.status == constants.OP_STATUS_CANCELING: logging.debug("Canceling opcode") raise CancelJob() # See if queue is shutting down if not self._queue.AcceptingJobsUnlocked(): logging.debug("Queue is shutting down") raise QueueShutdown() @locking.ssynchronized(_QUEUE, shared=1) def NotifyStart(self): """Mark the opcode as running, not lock-waiting. This is called from the mcpu code as a notifier function, when the LU is finally about to start the Exec() method. Of course, to have end-user visible results, the opcode must be initially (before calling into Processor.ExecOpCode) set to OP_STATUS_WAITING. """ assert self._op in self._job.ops assert self._op.status in (constants.OP_STATUS_WAITING, constants.OP_STATUS_CANCELING) # Cancel here if we were asked to self._CheckCancel() logging.debug("Opcode is now running") self._op.status = constants.OP_STATUS_RUNNING self._op.exec_timestamp = TimeStampNow() # And finally replicate the job status self._queue.UpdateJobUnlocked(self._job) @locking.ssynchronized(_QUEUE, shared=1) def _AppendFeedback(self, timestamp, log_type, log_msg): """Internal feedback append function, with locks """ self._job.log_serial += 1 self._op.log.append((self._job.log_serial, timestamp, log_type, log_msg)) self._queue.UpdateJobUnlocked(self._job, replicate=False) def Feedback(self, *args): """Append a log entry. """ assert len(args) < 3 if len(args) == 1: log_type = constants.ELOG_MESSAGE log_msg = args[0] else: (log_type, log_msg) = args # The time is split to make serialization easier and not lose # precision. timestamp = utils.SplitTime(time.time()) self._AppendFeedback(timestamp, log_type, log_msg) def CurrentPriority(self): """Returns current priority for opcode. """ assert self._op.status in (constants.OP_STATUS_WAITING, constants.OP_STATUS_CANCELING) # Cancel here if we were asked to self._CheckCancel() return self._op.priority def SubmitManyJobs(self, jobs): """Submits jobs for processing. See L{JobQueue.SubmitManyJobs}. """ # Locking is done in job queue return self._queue.SubmitManyJobs(jobs) class _JobChangesChecker(object): def __init__(self, fields, prev_job_info, prev_log_serial): """Initializes this class. @type fields: list of strings @param fields: Fields requested by LUXI client @type prev_job_info: string @param prev_job_info: previous job info, as passed by the LUXI client @type prev_log_serial: string @param prev_log_serial: previous job serial, as passed by the LUXI client """ self._squery = _SimpleJobQuery(fields) self._prev_job_info = prev_job_info self._prev_log_serial = prev_log_serial def __call__(self, job): """Checks whether job has changed. @type job: L{_QueuedJob} @param job: Job object """ assert not job.writable, "Expected read-only job" status = job.CalcStatus() job_info = self._squery(job) log_entries = job.GetLogEntries(self._prev_log_serial) # Serializing and deserializing data can cause type changes (e.g. from # tuple to list) or precision loss. We're doing it here so that we get # the same modifications as the data received from the client. Without # this, the comparison afterwards might fail without the data being # significantly different. # TODO: we just deserialized from disk, investigate how to make sure that # the job info and log entries are compatible to avoid this further step. # TODO: Doing something like in testutils.py:UnifyValueType might be more # efficient, though floats will be tricky job_info = serializer.LoadJson(serializer.DumpJson(job_info)) log_entries = serializer.LoadJson(serializer.DumpJson(log_entries)) # Don't even try to wait if the job is no longer running, there will be # no changes. if (status not in (constants.JOB_STATUS_QUEUED, constants.JOB_STATUS_RUNNING, constants.JOB_STATUS_WAITING) or job_info != self._prev_job_info or (log_entries and self._prev_log_serial != log_entries[0][0])): logging.debug("Job %s changed", job.id) return (job_info, log_entries) return None class _JobFileChangesWaiter(object): def __init__(self, filename, _inotify_wm_cls=pyinotify.WatchManager): """Initializes this class. @type filename: string @param filename: Path to job file @raises errors.InotifyError: if the notifier cannot be setup """ self._wm = _inotify_wm_cls() self._inotify_handler = \ asyncnotifier.SingleFileEventHandler(self._wm, self._OnInotify, filename) self._notifier = \ pyinotify.Notifier(self._wm, default_proc_fun=self._inotify_handler) try: self._inotify_handler.enable() except Exception: # pyinotify doesn't close file descriptors automatically self._notifier.stop() raise def _OnInotify(self, notifier_enabled): """Callback for inotify. """ if not notifier_enabled: self._inotify_handler.enable() def Wait(self, timeout): """Waits for the job file to change. @type timeout: float @param timeout: Timeout in seconds @return: Whether there have been events """ assert timeout >= 0 have_events = self._notifier.check_events(timeout * 1000) if have_events: self._notifier.read_events() self._notifier.process_events() return have_events def Close(self): """Closes underlying notifier and its file descriptor. """ self._notifier.stop() class _JobChangesWaiter(object): def __init__(self, filename, _waiter_cls=_JobFileChangesWaiter): """Initializes this class. @type filename: string @param filename: Path to job file """ self._filewaiter = None self._filename = filename self._waiter_cls = _waiter_cls def Wait(self, timeout): """Waits for a job to change. @type timeout: float @param timeout: Timeout in seconds @return: Whether there have been events """ if self._filewaiter: return self._filewaiter.Wait(timeout) # Lazy setup: Avoid inotify setup cost when job file has already changed. # If this point is reached, return immediately and let caller check the job # file again in case there were changes since the last check. This avoids a # race condition. self._filewaiter = self._waiter_cls(self._filename) return True def Close(self): """Closes underlying waiter. """ if self._filewaiter: self._filewaiter.Close() class _WaitForJobChangesHelper(object): """Helper class using inotify to wait for changes in a job file. This class takes a previous job status and serial, and alerts the client when the current job status has changed. """ @staticmethod def _CheckForChanges(counter, job_load_fn, check_fn): if counter.next() > 0: # If this isn't the first check the job is given some more time to change # again. This gives better performance for jobs generating many # changes/messages. time.sleep(0.1) job = job_load_fn() if not job: raise errors.JobLost() result = check_fn(job) if result is None: raise utils.RetryAgain() return result def __call__(self, filename, job_load_fn, fields, prev_job_info, prev_log_serial, timeout, _waiter_cls=_JobChangesWaiter): """Waits for changes on a job. @type filename: string @param filename: File on which to wait for changes @type job_load_fn: callable @param job_load_fn: Function to load job @type fields: list of strings @param fields: Which fields to check for changes @type prev_job_info: list or None @param prev_job_info: Last job information returned @type prev_log_serial: int @param prev_log_serial: Last job message serial number @type timeout: float @param timeout: maximum time to wait in seconds """ counter = itertools.count() try: check_fn = _JobChangesChecker(fields, prev_job_info, prev_log_serial) waiter = _waiter_cls(filename) try: return utils.Retry(compat.partial(self._CheckForChanges, counter, job_load_fn, check_fn), utils.RETRY_REMAINING_TIME, timeout, wait_fn=waiter.Wait) finally: waiter.Close() except errors.JobLost: return None except utils.RetryTimeout: return constants.JOB_NOTCHANGED def _EncodeOpError(err): """Encodes an error which occurred while processing an opcode. """ if isinstance(err, errors.GenericError): to_encode = err else: to_encode = errors.OpExecError(str(err)) return errors.EncodeException(to_encode) class _TimeoutStrategyWrapper: def __init__(self, fn): """Initializes this class. """ self._fn = fn self._next = None def _Advance(self): """Gets the next timeout if necessary. """ if self._next is None: self._next = self._fn() def Peek(self): """Returns the next timeout. """ self._Advance() return self._next def Next(self): """Returns the current timeout and advances the internal state. """ self._Advance() result = self._next self._next = None return result class _OpExecContext: def __init__(self, op, index, log_prefix, timeout_strategy_factory): """Initializes this class. """ self.op = op self.index = index self.log_prefix = log_prefix self.summary = op.input.Summary() # Create local copy to modify if getattr(op.input, opcodes.DEPEND_ATTR, None): self.jobdeps = op.input.depends[:] else: self.jobdeps = None self._timeout_strategy_factory = timeout_strategy_factory self._ResetTimeoutStrategy() def _ResetTimeoutStrategy(self): """Creates a new timeout strategy. """ self._timeout_strategy = \ _TimeoutStrategyWrapper(self._timeout_strategy_factory().NextAttempt) def CheckPriorityIncrease(self): """Checks whether priority can and should be increased. Called when locks couldn't be acquired. """ op = self.op # Exhausted all retries and next round should not use blocking acquire # for locks? if (self._timeout_strategy.Peek() is None and op.priority > constants.OP_PRIO_HIGHEST): logging.debug("Increasing priority") op.priority -= 1 self._ResetTimeoutStrategy() return True return False def GetNextLockTimeout(self): """Returns the next lock acquire timeout. """ return self._timeout_strategy.Next() class _JobProcessor(object): (DEFER, WAITDEP, FINISHED) = range(1, 4) def __init__(self, queue, opexec_fn, job, _timeout_strategy_factory=mcpu.LockAttemptTimeoutStrategy): """Initializes this class. """ self.queue = queue self.opexec_fn = opexec_fn self.job = job self._timeout_strategy_factory = _timeout_strategy_factory @staticmethod def _FindNextOpcode(job, timeout_strategy_factory): """Locates the next opcode to run. @type job: L{_QueuedJob} @param job: Job object @param timeout_strategy_factory: Callable to create new timeout strategy """ # Create some sort of a cache to speed up locating next opcode for future # lookups # TODO: Consider splitting _QueuedJob.ops into two separate lists, one for # pending and one for processed ops. if job.ops_iter is None: job.ops_iter = enumerate(job.ops) # Find next opcode to run while True: try: (idx, op) = job.ops_iter.next() except StopIteration: raise errors.ProgrammerError("Called for a finished job") if op.status == constants.OP_STATUS_RUNNING: # Found an opcode already marked as running raise errors.ProgrammerError("Called for job marked as running") opctx = _OpExecContext(op, idx, "Op %s/%s" % (idx + 1, len(job.ops)), timeout_strategy_factory) if op.status not in constants.OPS_FINALIZED: return opctx # This is a job that was partially completed before master daemon # shutdown, so it can be expected that some opcodes are already # completed successfully (if any did error out, then the whole job # should have been aborted and not resubmitted for processing). logging.info("%s: opcode %s already processed, skipping", opctx.log_prefix, opctx.summary) @staticmethod def _MarkWaitlock(job, op): """Marks an opcode as waiting for locks. The job's start timestamp is also set if necessary. @type job: L{_QueuedJob} @param job: Job object @type op: L{_QueuedOpCode} @param op: Opcode object """ assert op in job.ops assert op.status in (constants.OP_STATUS_QUEUED, constants.OP_STATUS_WAITING) update = False op.result = None if op.status == constants.OP_STATUS_QUEUED: op.status = constants.OP_STATUS_WAITING update = True if op.start_timestamp is None: op.start_timestamp = TimeStampNow() update = True if job.start_timestamp is None: job.start_timestamp = op.start_timestamp update = True assert op.status == constants.OP_STATUS_WAITING return update @staticmethod def _CheckDependencies(queue, job, opctx): """Checks if an opcode has dependencies and if so, processes them. @type queue: L{JobQueue} @param queue: Queue object @type job: L{_QueuedJob} @param job: Job object @type opctx: L{_OpExecContext} @param opctx: Opcode execution context @rtype: bool @return: Whether opcode will be re-scheduled by dependency tracker """ op = opctx.op result = False while opctx.jobdeps: (dep_job_id, dep_status) = opctx.jobdeps[0] (depresult, depmsg) = queue.depmgr.CheckAndRegister(job, dep_job_id, dep_status) assert ht.TNonEmptyString(depmsg), "No dependency message" logging.info("%s: %s", opctx.log_prefix, depmsg) if depresult == _JobDependencyManager.CONTINUE: # Remove dependency and continue opctx.jobdeps.pop(0) elif depresult == _JobDependencyManager.WAIT: # Need to wait for notification, dependency tracker will re-add job # to workerpool result = True break elif depresult == _JobDependencyManager.CANCEL: # Job was cancelled, cancel this job as well job.Cancel() assert op.status == constants.OP_STATUS_CANCELING break elif depresult in (_JobDependencyManager.WRONGSTATUS, _JobDependencyManager.ERROR): # Job failed or there was an error, this job must fail op.status = constants.OP_STATUS_ERROR op.result = _EncodeOpError(errors.OpExecError(depmsg)) break else: raise errors.ProgrammerError("Unknown dependency result '%s'" % depresult) return result def _ExecOpCodeUnlocked(self, opctx): """Processes one opcode and returns the result. """ op = opctx.op assert op.status == constants.OP_STATUS_WAITING timeout = opctx.GetNextLockTimeout() try: # Make sure not to hold queue lock while calling ExecOpCode result = self.opexec_fn(op.input, _OpExecCallbacks(self.queue, self.job, op), timeout=timeout) except mcpu.LockAcquireTimeout: assert timeout is not None, "Received timeout for blocking acquire" logging.debug("Couldn't acquire locks in %0.6fs", timeout) assert op.status in (constants.OP_STATUS_WAITING, constants.OP_STATUS_CANCELING) # Was job cancelled while we were waiting for the lock? if op.status == constants.OP_STATUS_CANCELING: return (constants.OP_STATUS_CANCELING, None) # Queue is shutting down, return to queued if not self.queue.AcceptingJobsUnlocked(): return (constants.OP_STATUS_QUEUED, None) # Stay in waitlock while trying to re-acquire lock return (constants.OP_STATUS_WAITING, None) except CancelJob: logging.exception("%s: Canceling job", opctx.log_prefix) assert op.status == constants.OP_STATUS_CANCELING return (constants.OP_STATUS_CANCELING, None) except QueueShutdown: logging.exception("%s: Queue is shutting down", opctx.log_prefix) assert op.status == constants.OP_STATUS_WAITING # Job hadn't been started yet, so it should return to the queue return (constants.OP_STATUS_QUEUED, None) except Exception, err: # pylint: disable=W0703 logging.exception("%s: Caught exception in %s", opctx.log_prefix, opctx.summary) return (constants.OP_STATUS_ERROR, _EncodeOpError(err)) else: logging.debug("%s: %s successful", opctx.log_prefix, opctx.summary) return (constants.OP_STATUS_SUCCESS, result) def __call__(self, _nextop_fn=None): """Continues execution of a job. @param _nextop_fn: Callback function for tests @return: C{FINISHED} if job is fully processed, C{DEFER} if the job should be deferred and C{WAITDEP} if the dependency manager (L{_JobDependencyManager}) will re-schedule the job when appropriate """ queue = self.queue job = self.job logging.debug("Processing job %s", job.id) queue.acquire(shared=1) try: opcount = len(job.ops) assert job.writable, "Expected writable job" # Don't do anything for finalized jobs if job.CalcStatus() in constants.JOBS_FINALIZED: return self.FINISHED # Is a previous opcode still pending? if job.cur_opctx: opctx = job.cur_opctx job.cur_opctx = None else: if __debug__ and _nextop_fn: _nextop_fn() opctx = self._FindNextOpcode(job, self._timeout_strategy_factory) op = opctx.op # Consistency check assert compat.all(i.status in (constants.OP_STATUS_QUEUED, constants.OP_STATUS_CANCELING) for i in job.ops[opctx.index + 1:]) assert op.status in (constants.OP_STATUS_QUEUED, constants.OP_STATUS_WAITING, constants.OP_STATUS_CANCELING) assert (op.priority <= constants.OP_PRIO_LOWEST and op.priority >= constants.OP_PRIO_HIGHEST) waitjob = None if op.status != constants.OP_STATUS_CANCELING: assert op.status in (constants.OP_STATUS_QUEUED, constants.OP_STATUS_WAITING) # Prepare to start opcode if self._MarkWaitlock(job, op): # Write to disk queue.UpdateJobUnlocked(job) assert op.status == constants.OP_STATUS_WAITING assert job.CalcStatus() == constants.JOB_STATUS_WAITING assert job.start_timestamp and op.start_timestamp assert waitjob is None # Check if waiting for a job is necessary waitjob = self._CheckDependencies(queue, job, opctx) assert op.status in (constants.OP_STATUS_WAITING, constants.OP_STATUS_CANCELING, constants.OP_STATUS_ERROR) if not (waitjob or op.status in (constants.OP_STATUS_CANCELING, constants.OP_STATUS_ERROR)): logging.info("%s: opcode %s waiting for locks", opctx.log_prefix, opctx.summary) assert not opctx.jobdeps, "Not all dependencies were removed" queue.release() try: (op_status, op_result) = self._ExecOpCodeUnlocked(opctx) finally: queue.acquire(shared=1) op.status = op_status op.result = op_result assert not waitjob if op.status in (constants.OP_STATUS_WAITING, constants.OP_STATUS_QUEUED): # waiting: Couldn't get locks in time # queued: Queue is shutting down assert not op.end_timestamp else: # Finalize opcode op.end_timestamp = TimeStampNow() if op.status == constants.OP_STATUS_CANCELING: assert not compat.any(i.status != constants.OP_STATUS_CANCELING for i in job.ops[opctx.index:]) else: assert op.status in constants.OPS_FINALIZED if op.status == constants.OP_STATUS_QUEUED: # Queue is shutting down assert not waitjob finalize = False # Reset context job.cur_opctx = None # In no case must the status be finalized here assert job.CalcStatus() == constants.JOB_STATUS_QUEUED elif op.status == constants.OP_STATUS_WAITING or waitjob: finalize = False if not waitjob and opctx.CheckPriorityIncrease(): # Priority was changed, need to update on-disk file queue.UpdateJobUnlocked(job) # Keep around for another round job.cur_opctx = opctx assert (op.priority <= constants.OP_PRIO_LOWEST and op.priority >= constants.OP_PRIO_HIGHEST) # In no case must the status be finalized here assert job.CalcStatus() == constants.JOB_STATUS_WAITING else: # Ensure all opcodes so far have been successful assert (opctx.index == 0 or compat.all(i.status == constants.OP_STATUS_SUCCESS for i in job.ops[:opctx.index])) # Reset context job.cur_opctx = None if op.status == constants.OP_STATUS_SUCCESS: finalize = False elif op.status == constants.OP_STATUS_ERROR: # Ensure failed opcode has an exception as its result assert errors.GetEncodedError(job.ops[opctx.index].result) to_encode = errors.OpExecError("Preceding opcode failed") job.MarkUnfinishedOps(constants.OP_STATUS_ERROR, _EncodeOpError(to_encode)) finalize = True # Consistency check assert compat.all(i.status == constants.OP_STATUS_ERROR and errors.GetEncodedError(i.result) for i in job.ops[opctx.index:]) elif op.status == constants.OP_STATUS_CANCELING: job.MarkUnfinishedOps(constants.OP_STATUS_CANCELED, "Job canceled by request") finalize = True else: raise errors.ProgrammerError("Unknown status '%s'" % op.status) if opctx.index == (opcount - 1): # Finalize on last opcode finalize = True if finalize: # All opcodes have been run, finalize job job.Finalize() # Write to disk. If the job status is final, this is the final write # allowed. Once the file has been written, it can be archived anytime. queue.UpdateJobUnlocked(job) assert not waitjob if finalize: logging.info("Finished job %s, status = %s", job.id, job.CalcStatus()) return self.FINISHED assert not waitjob or queue.depmgr.JobWaiting(job) if waitjob: return self.WAITDEP else: return self.DEFER finally: assert job.writable, "Job became read-only while being processed" queue.release() def _EvaluateJobProcessorResult(depmgr, job, result): """Looks at a result from L{_JobProcessor} for a job. To be used in a L{_JobQueueWorker}. """ if result == _JobProcessor.FINISHED: # Notify waiting jobs depmgr.NotifyWaiters(job.id) elif result == _JobProcessor.DEFER: # Schedule again raise workerpool.DeferTask(priority=job.CalcPriority()) elif result == _JobProcessor.WAITDEP: # No-op, dependency manager will re-schedule pass else: raise errors.ProgrammerError("Job processor returned unknown status %s" % (result, )) class _JobQueueWorker(workerpool.BaseWorker): """The actual job workers. """ def RunTask(self, job): # pylint: disable=W0221 """Job executor. @type job: L{_QueuedJob} @param job: the job to be processed """ assert job.writable, "Expected writable job" # Ensure only one worker is active on a single job. If a job registers for # a dependency job, and the other job notifies before the first worker is # done, the job can end up in the tasklist more than once. job.processor_lock.acquire() try: return self._RunTaskInner(job) finally: job.processor_lock.release() def _RunTaskInner(self, job): """Executes a job. Must be called with per-job lock acquired. """ queue = job.queue assert queue == self.pool.queue setname_fn = lambda op: self.SetTaskName(self._GetWorkerName(job, op)) setname_fn(None) proc = mcpu.Processor(queue.context, job.id) # Create wrapper for setting thread name wrap_execop_fn = compat.partial(self._WrapExecOpCode, setname_fn, proc.ExecOpCode) _EvaluateJobProcessorResult(queue.depmgr, job, _JobProcessor(queue, wrap_execop_fn, job)()) @staticmethod def _WrapExecOpCode(setname_fn, execop_fn, op, *args, **kwargs): """Updates the worker thread name to include a short summary of the opcode. @param setname_fn: Callable setting worker thread name @param execop_fn: Callable for executing opcode (usually L{mcpu.Processor.ExecOpCode}) """ setname_fn(op) try: return execop_fn(op, *args, **kwargs) finally: setname_fn(None) @staticmethod def _GetWorkerName(job, op): """Sets the worker thread name. @type job: L{_QueuedJob} @type op: L{opcodes.OpCode} """ parts = ["Job%s" % job.id] if op: parts.append(op.TinySummary()) return "/".join(parts) class _JobQueueWorkerPool(workerpool.WorkerPool): """Simple class implementing a job-processing workerpool. """ def __init__(self, queue): super(_JobQueueWorkerPool, self).__init__("Jq", JOBQUEUE_THREADS, _JobQueueWorker) self.queue = queue class _JobDependencyManager: """Keeps track of job dependencies. """ (WAIT, ERROR, CANCEL, CONTINUE, WRONGSTATUS) = range(1, 6) def __init__(self, getstatus_fn, enqueue_fn): """Initializes this class. """ self._getstatus_fn = getstatus_fn self._enqueue_fn = enqueue_fn self._waiters = {} self._lock = locking.SharedLock("JobDepMgr") @locking.ssynchronized(_LOCK, shared=1) def GetLockInfo(self, requested): # pylint: disable=W0613 """Retrieves information about waiting jobs. @type requested: set @param requested: Requested information, see C{query.LQ_*} """ # No need to sort here, that's being done by the lock manager and query # library. There are no priorities for notifying jobs, hence all show up as # one item under "pending". return [("job/%s" % job_id, None, None, [("job", [job.id for job in waiters])]) for job_id, waiters in self._waiters.items() if waiters] @locking.ssynchronized(_LOCK, shared=1) def JobWaiting(self, job): """Checks if a job is waiting. """ return compat.any(job in jobs for jobs in self._waiters.values()) @locking.ssynchronized(_LOCK) def CheckAndRegister(self, job, dep_job_id, dep_status): """Checks if a dependency job has the requested status. If the other job is not yet in a finalized status, the calling job will be notified (re-added to the workerpool) at a later point. @type job: L{_QueuedJob} @param job: Job object @type dep_job_id: int @param dep_job_id: ID of dependency job @type dep_status: list @param dep_status: Required status """ assert ht.TJobId(job.id) assert ht.TJobId(dep_job_id) assert ht.TListOf(ht.TElemOf(constants.JOBS_FINALIZED))(dep_status) if job.id == dep_job_id: return (self.ERROR, "Job can't depend on itself") # Get status of dependency job try: status = self._getstatus_fn(dep_job_id) except errors.JobLost, err: return (self.ERROR, "Dependency error: %s" % err) assert status in constants.JOB_STATUS_ALL job_id_waiters = self._waiters.setdefault(dep_job_id, set()) if status not in constants.JOBS_FINALIZED: # Register for notification and wait for job to finish job_id_waiters.add(job) return (self.WAIT, "Need to wait for job %s, wanted status '%s'" % (dep_job_id, dep_status)) # Remove from waiters list if job in job_id_waiters: job_id_waiters.remove(job) if (status == constants.JOB_STATUS_CANCELED and constants.JOB_STATUS_CANCELED not in dep_status): return (self.CANCEL, "Dependency job %s was cancelled" % dep_job_id) elif not dep_status or status in dep_status: return (self.CONTINUE, "Dependency job %s finished with status '%s'" % (dep_job_id, status)) else: return (self.WRONGSTATUS, "Dependency job %s finished with status '%s'," " not one of '%s' as required" % (dep_job_id, status, utils.CommaJoin(dep_status))) def _RemoveEmptyWaitersUnlocked(self): """Remove all jobs without actual waiters. """ for job_id in [job_id for (job_id, waiters) in self._waiters.items() if not waiters]: del self._waiters[job_id] def NotifyWaiters(self, job_id): """Notifies all jobs waiting for a certain job ID. @attention: Do not call until L{CheckAndRegister} returned a status other than C{WAITDEP} for C{job_id}, or behaviour is undefined @type job_id: int @param job_id: Job ID """ assert ht.TJobId(job_id) self._lock.acquire() try: self._RemoveEmptyWaitersUnlocked() jobs = self._waiters.pop(job_id, None) finally: self._lock.release() if jobs: # Re-add jobs to workerpool logging.debug("Re-adding %s jobs which were waiting for job %s", len(jobs), job_id) self._enqueue_fn(jobs) def _RequireOpenQueue(fn): """Decorator for "public" functions. This function should be used for all 'public' functions. That is, functions usually called from other classes. Note that this should be applied only to methods (not plain functions), since it expects that the decorated function is called with a first argument that has a '_queue_filelock' argument. @warning: Use this decorator only after locking.ssynchronized Example:: @locking.ssynchronized(_LOCK) @_RequireOpenQueue def Example(self): pass """ def wrapper(self, *args, **kwargs): # pylint: disable=W0212 assert self._queue_filelock is not None, "Queue should be open" return fn(self, *args, **kwargs) return wrapper def _RequireNonDrainedQueue(fn): """Decorator checking for a non-drained queue. To be used with functions submitting new jobs. """ def wrapper(self, *args, **kwargs): """Wrapper function. @raise errors.JobQueueDrainError: if the job queue is marked for draining """ # Ok when sharing the big job queue lock, as the drain file is created when # the lock is exclusive. # Needs access to protected member, pylint: disable=W0212 if self._drained: raise errors.JobQueueDrainError("Job queue is drained, refusing job") if not self._accepting_jobs: raise errors.JobQueueError("Job queue is shutting down, refusing job") return fn(self, *args, **kwargs) return wrapper class JobQueue(object): """Queue used to manage the jobs. """ def __init__(self, context): """Constructor for JobQueue. The constructor will initialize the job queue object and then start loading the current jobs from disk, either for starting them (if they were queue) or for aborting them (if they were already running). @type context: GanetiContext @param context: the context object for access to the configuration data and other ganeti objects """ self.context = context self._memcache = weakref.WeakValueDictionary() self._my_hostname = netutils.Hostname.GetSysName() # The Big JobQueue lock. If a code block or method acquires it in shared # mode safe it must guarantee concurrency with all the code acquiring it in # shared mode, including itself. In order not to acquire it at all # concurrency must be guaranteed with all code acquiring it in shared mode # and all code acquiring it exclusively. self._lock = locking.SharedLock("JobQueue") self.acquire = self._lock.acquire self.release = self._lock.release # Accept jobs by default self._accepting_jobs = True # Initialize the queue, and acquire the filelock. # This ensures no other process is working on the job queue. self._queue_filelock = jstore.InitAndVerifyQueue(must_lock=True) # Read serial file self._last_serial = jstore.ReadSerial() assert self._last_serial is not None, ("Serial file was modified between" " check in jstore and here") # Get initial list of nodes self._nodes = dict((n.name, n.primary_ip) for n in self.context.cfg.GetAllNodesInfo().values() if n.master_candidate) # Remove master node self._nodes.pop(self._my_hostname, None) # TODO: Check consistency across nodes self._queue_size = None self._UpdateQueueSizeUnlocked() assert ht.TInt(self._queue_size) self._drained = jstore.CheckDrainFlag() # Job dependencies self.depmgr = _JobDependencyManager(self._GetJobStatusForDependencies, self._EnqueueJobs) self.context.glm.AddToLockMonitor(self.depmgr) # Setup worker pool self._wpool = _JobQueueWorkerPool(self) try: self._InspectQueue() except: self._wpool.TerminateWorkers() raise @locking.ssynchronized(_LOCK) @_RequireOpenQueue def _InspectQueue(self): """Loads the whole job queue and resumes unfinished jobs. This function needs the lock here because WorkerPool.AddTask() may start a job while we're still doing our work. """ logging.info("Inspecting job queue") restartjobs = [] all_job_ids = self._GetJobIDsUnlocked() jobs_count = len(all_job_ids) lastinfo = time.time() for idx, job_id in enumerate(all_job_ids): # Give an update every 1000 jobs or 10 seconds if (idx % 1000 == 0 or time.time() >= (lastinfo + 10.0) or idx == (jobs_count - 1)): logging.info("Job queue inspection: %d/%d (%0.1f %%)", idx, jobs_count - 1, 100.0 * (idx + 1) / jobs_count) lastinfo = time.time() job = self._LoadJobUnlocked(job_id) # a failure in loading the job can cause 'None' to be returned if job is None: continue status = job.CalcStatus() if status == constants.JOB_STATUS_QUEUED: restartjobs.append(job) elif status in (constants.JOB_STATUS_RUNNING, constants.JOB_STATUS_WAITING, constants.JOB_STATUS_CANCELING): logging.warning("Unfinished job %s found: %s", job.id, job) if status == constants.JOB_STATUS_WAITING: # Restart job job.MarkUnfinishedOps(constants.OP_STATUS_QUEUED, None) restartjobs.append(job) else: to_encode = errors.OpExecError("Unclean master daemon shutdown") job.MarkUnfinishedOps(constants.OP_STATUS_ERROR, _EncodeOpError(to_encode)) job.Finalize() self.UpdateJobUnlocked(job) if restartjobs: logging.info("Restarting %s jobs", len(restartjobs)) self._EnqueueJobsUnlocked(restartjobs) logging.info("Job queue inspection finished") def _GetRpc(self, address_list): """Gets RPC runner with context. """ return rpc.JobQueueRunner(self.context, address_list) @locking.ssynchronized(_LOCK) @_RequireOpenQueue def AddNode(self, node): """Register a new node with the queue. @type node: L{objects.Node} @param node: the node object to be added """ node_name = node.name assert node_name != self._my_hostname # Clean queue directory on added node result = self._GetRpc(None).call_jobqueue_purge(node_name) msg = result.fail_msg if msg: logging.warning("Cannot cleanup queue directory on node %s: %s", node_name, msg) if not node.master_candidate: # remove if existing, ignoring errors self._nodes.pop(node_name, None) # and skip the replication of the job ids return # Upload the whole queue excluding archived jobs files = [self._GetJobPath(job_id) for job_id in self._GetJobIDsUnlocked()] # Upload current serial file files.append(pathutils.JOB_QUEUE_SERIAL_FILE) # Static address list addrs = [node.primary_ip] for file_name in files: # Read file content content = utils.ReadFile(file_name) result = _CallJqUpdate(self._GetRpc(addrs), [node_name], file_name, content) msg = result[node_name].fail_msg if msg: logging.error("Failed to upload file %s to node %s: %s", file_name, node_name, msg) # Set queue drained flag result = \ self._GetRpc(addrs).call_jobqueue_set_drain_flag([node_name], self._drained) msg = result[node_name].fail_msg if msg: logging.error("Failed to set queue drained flag on node %s: %s", node_name, msg) self._nodes[node_name] = node.primary_ip @locking.ssynchronized(_LOCK) @_RequireOpenQueue def RemoveNode(self, node_name): """Callback called when removing nodes from the cluster. @type node_name: str @param node_name: the name of the node to remove """ self._nodes.pop(node_name, None) @staticmethod def _CheckRpcResult(result, nodes, failmsg): """Verifies the status of an RPC call. Since we aim to keep consistency should this node (the current master) fail, we will log errors if our rpc fail, and especially log the case when more than half of the nodes fails. @param result: the data as returned from the rpc call @type nodes: list @param nodes: the list of nodes we made the call to @type failmsg: str @param failmsg: the identifier to be used for logging """ failed = [] success = [] for node in nodes: msg = result[node].fail_msg if msg: failed.append(node) logging.error("RPC call %s (%s) failed on node %s: %s", result[node].call, failmsg, node, msg) else: success.append(node) # +1 for the master node if (len(success) + 1) < len(failed): # TODO: Handle failing nodes logging.error("More than half of the nodes failed") def _GetNodeIp(self): """Helper for returning the node name/ip list. @rtype: (list, list) @return: a tuple of two lists, the first one with the node names and the second one with the node addresses """ # TODO: Change to "tuple(map(list, zip(*self._nodes.items())))"? name_list = self._nodes.keys() addr_list = [self._nodes[name] for name in name_list] return name_list, addr_list def _UpdateJobQueueFile(self, file_name, data, replicate): """Writes a file locally and then replicates it to all nodes. This function will replace the contents of a file on the local node and then replicate it to all the other nodes we have. @type file_name: str @param file_name: the path of the file to be replicated @type data: str @param data: the new contents of the file @type replicate: boolean @param replicate: whether to spread the changes to the remote nodes """ getents = runtime.GetEnts() utils.WriteFile(file_name, data=data, uid=getents.masterd_uid, gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS) if replicate: names, addrs = self._GetNodeIp() result = _CallJqUpdate(self._GetRpc(addrs), names, file_name, data) self._CheckRpcResult(result, self._nodes, "Updating %s" % file_name) def _RenameFilesUnlocked(self, rename): """Renames a file locally and then replicate the change. This function will rename a file in the local queue directory and then replicate this rename to all the other nodes we have. @type rename: list of (old, new) @param rename: List containing tuples mapping old to new names """ # Rename them locally for old, new in rename: utils.RenameFile(old, new, mkdir=True) # ... and on all nodes names, addrs = self._GetNodeIp() result = self._GetRpc(addrs).call_jobqueue_rename(names, rename) self._CheckRpcResult(result, self._nodes, "Renaming files (%r)" % rename) def _NewSerialsUnlocked(self, count): """Generates a new job identifier. Job identifiers are unique during the lifetime of a cluster. @type count: integer @param count: how many serials to return @rtype: list of int @return: a list of job identifiers. """ assert ht.TNonNegativeInt(count) # New number serial = self._last_serial + count # Write to file self._UpdateJobQueueFile(pathutils.JOB_QUEUE_SERIAL_FILE, "%s\n" % serial, True) result = [jstore.FormatJobID(v) for v in range(self._last_serial + 1, serial + 1)] # Keep it only if we were able to write the file self._last_serial = serial assert len(result) == count return result @staticmethod def _GetJobPath(job_id): """Returns the job file for a given job id. @type job_id: str @param job_id: the job identifier @rtype: str @return: the path to the job file """ return utils.PathJoin(pathutils.QUEUE_DIR, "job-%s" % job_id) @staticmethod def _GetArchivedJobPath(job_id): """Returns the archived job file for a give job id. @type job_id: str @param job_id: the job identifier @rtype: str @return: the path to the archived job file """ return utils.PathJoin(pathutils.JOB_QUEUE_ARCHIVE_DIR, jstore.GetArchiveDirectory(job_id), "job-%s" % job_id) @staticmethod def _DetermineJobDirectories(archived): """Build list of directories containing job files. @type archived: bool @param archived: Whether to include directories for archived jobs @rtype: list """ result = [pathutils.QUEUE_DIR] if archived: archive_path = pathutils.JOB_QUEUE_ARCHIVE_DIR result.extend(map(compat.partial(utils.PathJoin, archive_path), utils.ListVisibleFiles(archive_path))) return result @classmethod def _GetJobIDsUnlocked(cls, sort=True, archived=False): """Return all known job IDs. The method only looks at disk because it's a requirement that all jobs are present on disk (so in the _memcache we don't have any extra IDs). @type sort: boolean @param sort: perform sorting on the returned job ids @rtype: list @return: the list of job IDs """ jlist = [] for path in cls._DetermineJobDirectories(archived): for filename in utils.ListVisibleFiles(path): m = constants.JOB_FILE_RE.match(filename) if m: jlist.append(int(m.group(1))) if sort: jlist.sort() return jlist def _LoadJobUnlocked(self, job_id): """Loads a job from the disk or memory. Given a job id, this will return the cached job object if existing, or try to load the job from the disk. If loading from disk, it will also add the job to the cache. @type job_id: int @param job_id: the job id @rtype: L{_QueuedJob} or None @return: either None or the job object """ assert isinstance(job_id, int), "Job queue: Supplied job id is not an int!" job = self._memcache.get(job_id, None) if job: logging.debug("Found job %s in memcache", job_id) assert job.writable, "Found read-only job in memcache" return job try: job = self._LoadJobFromDisk(job_id, False) if job is None: return job except errors.JobFileCorrupted: old_path = self._GetJobPath(job_id) new_path = self._GetArchivedJobPath(job_id) if old_path == new_path: # job already archived (future case) logging.exception("Can't parse job %s", job_id) else: # non-archived case logging.exception("Can't parse job %s, will archive.", job_id) self._RenameFilesUnlocked([(old_path, new_path)]) return None assert job.writable, "Job just loaded is not writable" self._memcache[job_id] = job logging.debug("Added job %s to the cache", job_id) return job def _LoadJobFromDisk(self, job_id, try_archived, writable=None): """Load the given job file from disk. Given a job file, read, load and restore it in a _QueuedJob format. @type job_id: int @param job_id: job identifier @type try_archived: bool @param try_archived: Whether to try loading an archived job @rtype: L{_QueuedJob} or None @return: either None or the job object """ path_functions = [(self._GetJobPath, False)] if try_archived: path_functions.append((self._GetArchivedJobPath, True)) raw_data = None archived = None for (fn, archived) in path_functions: filepath = fn(job_id) logging.debug("Loading job from %s", filepath) try: raw_data = utils.ReadFile(filepath) except EnvironmentError, err: if err.errno != errno.ENOENT: raise else: break if not raw_data: return None if writable is None: writable = not archived try: data = serializer.LoadJson(raw_data) job = _QueuedJob.Restore(self, data, writable, archived) except Exception, err: # pylint: disable=W0703 raise errors.JobFileCorrupted(err) return job def SafeLoadJobFromDisk(self, job_id, try_archived, writable=None): """Load the given job file from disk. Given a job file, read, load and restore it in a _QueuedJob format. In case of error reading the job, it gets returned as None, and the exception is logged. @type job_id: int @param job_id: job identifier @type try_archived: bool @param try_archived: Whether to try loading an archived job @rtype: L{_QueuedJob} or None @return: either None or the job object """ try: return self._LoadJobFromDisk(job_id, try_archived, writable=writable) except (errors.JobFileCorrupted, EnvironmentError): logging.exception("Can't load/parse job %s", job_id) return None def _UpdateQueueSizeUnlocked(self): """Update the queue size. """ self._queue_size = len(self._GetJobIDsUnlocked(sort=False)) @locking.ssynchronized(_LOCK) @_RequireOpenQueue def SetDrainFlag(self, drain_flag): """Sets the drain flag for the queue. @type drain_flag: boolean @param drain_flag: Whether to set or unset the drain flag """ # Change flag locally jstore.SetDrainFlag(drain_flag) self._drained = drain_flag # ... and on all nodes (names, addrs) = self._GetNodeIp() result = \ self._GetRpc(addrs).call_jobqueue_set_drain_flag(names, drain_flag) self._CheckRpcResult(result, self._nodes, "Setting queue drain flag to %s" % drain_flag) return True @_RequireOpenQueue def _SubmitJobUnlocked(self, job_id, ops): """Create and store a new job. This enters the job into our job queue and also puts it on the new queue, in order for it to be picked up by the queue processors. @type job_id: job ID @param job_id: the job ID for the new job @type ops: list @param ops: The list of OpCodes that will become the new job. @rtype: L{_QueuedJob} @return: the job object to be queued @raise errors.JobQueueFull: if the job queue has too many jobs in it @raise errors.GenericError: If an opcode is not valid """ if self._queue_size >= constants.JOB_QUEUE_SIZE_HARD_LIMIT: raise errors.JobQueueFull() job = _QueuedJob(self, job_id, ops, True) for idx, op in enumerate(job.ops): # Check priority if op.priority not in constants.OP_PRIO_SUBMIT_VALID: allowed = utils.CommaJoin(constants.OP_PRIO_SUBMIT_VALID) raise errors.GenericError("Opcode %s has invalid priority %s, allowed" " are %s" % (idx, op.priority, allowed)) # Check job dependencies dependencies = getattr(op.input, opcodes.DEPEND_ATTR, None) if not opcodes.TNoRelativeJobDependencies(dependencies): raise errors.GenericError("Opcode %s has invalid dependencies, must" " match %s: %s" % (idx, opcodes.TNoRelativeJobDependencies, dependencies)) # Write to disk self.UpdateJobUnlocked(job) self._queue_size += 1 logging.debug("Adding new job %s to the cache", job_id) self._memcache[job_id] = job return job @locking.ssynchronized(_LOCK) @_RequireOpenQueue @_RequireNonDrainedQueue def SubmitJob(self, ops): """Create and store a new job. @see: L{_SubmitJobUnlocked} """ (job_id, ) = self._NewSerialsUnlocked(1) self._EnqueueJobsUnlocked([self._SubmitJobUnlocked(job_id, ops)]) return job_id @locking.ssynchronized(_LOCK) @_RequireOpenQueue @_RequireNonDrainedQueue def SubmitManyJobs(self, jobs): """Create and store multiple jobs. @see: L{_SubmitJobUnlocked} """ all_job_ids = self._NewSerialsUnlocked(len(jobs)) (results, added_jobs) = \ self._SubmitManyJobsUnlocked(jobs, all_job_ids, []) self._EnqueueJobsUnlocked(added_jobs) return results @staticmethod def _FormatSubmitError(msg, ops): """Formats errors which occurred while submitting a job. """ return ("%s; opcodes %s" % (msg, utils.CommaJoin(op.Summary() for op in ops))) @staticmethod def _ResolveJobDependencies(resolve_fn, deps): """Resolves relative job IDs in dependencies. @type resolve_fn: callable @param resolve_fn: Function to resolve a relative job ID @type deps: list @param deps: Dependencies @rtype: tuple; (boolean, string or list) @return: If successful (first tuple item), the returned list contains resolved job IDs along with the requested status; if not successful, the second element is an error message """ result = [] for (dep_job_id, dep_status) in deps: if ht.TRelativeJobId(dep_job_id): assert ht.TInt(dep_job_id) and dep_job_id < 0 try: job_id = resolve_fn(dep_job_id) except IndexError: # Abort return (False, "Unable to resolve relative job ID %s" % dep_job_id) else: job_id = dep_job_id result.append((job_id, dep_status)) return (True, result) def _SubmitManyJobsUnlocked(self, jobs, job_ids, previous_job_ids): """Create and store multiple jobs. @see: L{_SubmitJobUnlocked} """ results = [] added_jobs = [] def resolve_fn(job_idx, reljobid): assert reljobid < 0 return (previous_job_ids + job_ids[:job_idx])[reljobid] for (idx, (job_id, ops)) in enumerate(zip(job_ids, jobs)): for op in ops: if getattr(op, opcodes.DEPEND_ATTR, None): (status, data) = \ self._ResolveJobDependencies(compat.partial(resolve_fn, idx), op.depends) if not status: # Abort resolving dependencies assert ht.TNonEmptyString(data), "No error message" break # Use resolved dependencies op.depends = data else: try: job = self._SubmitJobUnlocked(job_id, ops) except errors.GenericError, err: status = False data = self._FormatSubmitError(str(err), ops) else: status = True data = job_id added_jobs.append(job) results.append((status, data)) return (results, added_jobs) @locking.ssynchronized(_LOCK) def _EnqueueJobs(self, jobs): """Helper function to add jobs to worker pool's queue. @type jobs: list @param jobs: List of all jobs """ return self._EnqueueJobsUnlocked(jobs) def _EnqueueJobsUnlocked(self, jobs): """Helper function to add jobs to worker pool's queue. @type jobs: list @param jobs: List of all jobs """ assert self._lock.is_owned(shared=0), "Must own lock in exclusive mode" self._wpool.AddManyTasks([(job, ) for job in jobs], priority=[job.CalcPriority() for job in jobs], task_id=map(_GetIdAttr, jobs)) def _GetJobStatusForDependencies(self, job_id): """Gets the status of a job for dependencies. @type job_id: int @param job_id: Job ID @raise errors.JobLost: If job can't be found """ # Not using in-memory cache as doing so would require an exclusive lock # Try to load from disk job = self.SafeLoadJobFromDisk(job_id, True, writable=False) assert not job.writable, "Got writable job" # pylint: disable=E1101 if job: return job.CalcStatus() raise errors.JobLost("Job %s not found" % job_id) @_RequireOpenQueue def UpdateJobUnlocked(self, job, replicate=True): """Update a job's on disk storage. After a job has been modified, this function needs to be called in order to write the changes to disk and replicate them to the other nodes. @type job: L{_QueuedJob} @param job: the changed job @type replicate: boolean @param replicate: whether to replicate the change to remote nodes """ if __debug__: finalized = job.CalcStatus() in constants.JOBS_FINALIZED assert (finalized ^ (job.end_timestamp is None)) assert job.writable, "Can't update read-only job" assert not job.archived, "Can't update archived job" filename = self._GetJobPath(job.id) data = serializer.DumpJson(job.Serialize()) logging.debug("Writing job %s to %s", job.id, filename) self._UpdateJobQueueFile(filename, data, replicate) def WaitForJobChanges(self, job_id, fields, prev_job_info, prev_log_serial, timeout): """Waits for changes in a job. @type job_id: int @param job_id: Job identifier @type fields: list of strings @param fields: Which fields to check for changes @type prev_job_info: list or None @param prev_job_info: Last job information returned @type prev_log_serial: int @param prev_log_serial: Last job message serial number @type timeout: float @param timeout: maximum time to wait in seconds @rtype: tuple (job info, log entries) @return: a tuple of the job information as required via the fields parameter, and the log entries as a list if the job has not changed and the timeout has expired, we instead return a special value, L{constants.JOB_NOTCHANGED}, which should be interpreted as such by the clients """ load_fn = compat.partial(self.SafeLoadJobFromDisk, job_id, True, writable=False) helper = _WaitForJobChangesHelper() return helper(self._GetJobPath(job_id), load_fn, fields, prev_job_info, prev_log_serial, timeout) @locking.ssynchronized(_LOCK) @_RequireOpenQueue def CancelJob(self, job_id): """Cancels a job. This will only succeed if the job has not started yet. @type job_id: int @param job_id: job ID of job to be cancelled. """ logging.info("Cancelling job %s", job_id) return self._ModifyJobUnlocked(job_id, lambda job: job.Cancel()) @locking.ssynchronized(_LOCK) @_RequireOpenQueue def ChangeJobPriority(self, job_id, priority): """Changes a job's priority. @type job_id: int @param job_id: ID of the job whose priority should be changed @type priority: int @param priority: New priority """ logging.info("Changing priority of job %s to %s", job_id, priority) if priority not in constants.OP_PRIO_SUBMIT_VALID: allowed = utils.CommaJoin(constants.OP_PRIO_SUBMIT_VALID) raise errors.GenericError("Invalid priority %s, allowed are %s" % (priority, allowed)) def fn(job): (success, msg) = job.ChangePriority(priority) if success: try: self._wpool.ChangeTaskPriority(job.id, job.CalcPriority()) except workerpool.NoSuchTask: logging.debug("Job %s is not in workerpool at this time", job.id) return (success, msg) return self._ModifyJobUnlocked(job_id, fn) def _ModifyJobUnlocked(self, job_id, mod_fn): """Modifies a job. @type job_id: int @param job_id: Job ID @type mod_fn: callable @param mod_fn: Modifying function, receiving job object as parameter, returning tuple of (status boolean, message string) """ job = self._LoadJobUnlocked(job_id) if not job: logging.debug("Job %s not found", job_id) return (False, "Job %s not found" % job_id) assert job.writable, "Can't modify read-only job" assert not job.archived, "Can't modify archived job" (success, msg) = mod_fn(job) if success: # If the job was finalized (e.g. cancelled), this is the final write # allowed. The job can be archived anytime. self.UpdateJobUnlocked(job) return (success, msg) @_RequireOpenQueue def _ArchiveJobsUnlocked(self, jobs): """Archives jobs. @type jobs: list of L{_QueuedJob} @param jobs: Job objects @rtype: int @return: Number of archived jobs """ archive_jobs = [] rename_files = [] for job in jobs: assert job.writable, "Can't archive read-only job" assert not job.archived, "Can't cancel archived job" if job.CalcStatus() not in constants.JOBS_FINALIZED: logging.debug("Job %s is not yet done", job.id) continue archive_jobs.append(job) old = self._GetJobPath(job.id) new = self._GetArchivedJobPath(job.id) rename_files.append((old, new)) # TODO: What if 1..n files fail to rename? self._RenameFilesUnlocked(rename_files) logging.debug("Successfully archived job(s) %s", utils.CommaJoin(job.id for job in archive_jobs)) # Since we haven't quite checked, above, if we succeeded or failed renaming # the files, we update the cached queue size from the filesystem. When we # get around to fix the TODO: above, we can use the number of actually # archived jobs to fix this. self._UpdateQueueSizeUnlocked() return len(archive_jobs) @locking.ssynchronized(_LOCK) @_RequireOpenQueue def ArchiveJob(self, job_id): """Archives a job. This is just a wrapper over L{_ArchiveJobsUnlocked}. @type job_id: int @param job_id: Job ID of job to be archived. @rtype: bool @return: Whether job was archived """ logging.info("Archiving job %s", job_id) job = self._LoadJobUnlocked(job_id) if not job: logging.debug("Job %s not found", job_id) return False return self._ArchiveJobsUnlocked([job]) == 1 @locking.ssynchronized(_LOCK) @_RequireOpenQueue def AutoArchiveJobs(self, age, timeout): """Archives all jobs based on age. The method will archive all jobs which are older than the age parameter. For jobs that don't have an end timestamp, the start timestamp will be considered. The special '-1' age will cause archival of all jobs (that are not running or queued). @type age: int @param age: the minimum age in seconds """ logging.info("Archiving jobs with age more than %s seconds", age) now = time.time() end_time = now + timeout archived_count = 0 last_touched = 0 all_job_ids = self._GetJobIDsUnlocked() pending = [] for idx, job_id in enumerate(all_job_ids): last_touched = idx + 1 # Not optimal because jobs could be pending # TODO: Measure average duration for job archival and take number of # pending jobs into account. if time.time() > end_time: break # Returns None if the job failed to load job = self._LoadJobUnlocked(job_id) if job: if job.end_timestamp is None: if job.start_timestamp is None: job_age = job.received_timestamp else: job_age = job.start_timestamp else: job_age = job.end_timestamp if age == -1 or now - job_age[0] > age: pending.append(job) # Archive 10 jobs at a time if len(pending) >= 10: archived_count += self._ArchiveJobsUnlocked(pending) pending = [] if pending: archived_count += self._ArchiveJobsUnlocked(pending) return (archived_count, len(all_job_ids) - last_touched) def _Query(self, fields, qfilter): qobj = query.Query(query.JOB_FIELDS, fields, qfilter=qfilter, namefield="id") # Archived jobs are only looked at if the "archived" field is referenced # either as a requested field or in the filter. By default archived jobs # are ignored. include_archived = (query.JQ_ARCHIVED in qobj.RequestedData()) job_ids = qobj.RequestedNames() list_all = (job_ids is None) if list_all: # Since files are added to/removed from the queue atomically, there's no # risk of getting the job ids in an inconsistent state. job_ids = self._GetJobIDsUnlocked(archived=include_archived) jobs = [] for job_id in job_ids: job = self.SafeLoadJobFromDisk(job_id, True, writable=False) if job is not None or not list_all: jobs.append((job_id, job)) return (qobj, jobs, list_all) def QueryJobs(self, fields, qfilter): """Returns a list of jobs in queue. @type fields: sequence @param fields: List of wanted fields @type qfilter: None or query2 filter (list) @param qfilter: Query filter """ (qobj, ctx, _) = self._Query(fields, qfilter) return query.GetQueryResponse(qobj, ctx, sort_by_name=False) def OldStyleQueryJobs(self, job_ids, fields): """Returns a list of jobs in queue. @type job_ids: list @param job_ids: sequence of job identifiers or None for all @type fields: list @param fields: names of fields to return @rtype: list @return: list one element per job, each element being list with the requested fields """ # backwards compat: job_ids = [int(jid) for jid in job_ids] qfilter = qlang.MakeSimpleFilter("id", job_ids) (qobj, ctx, _) = self._Query(fields, qfilter) return qobj.OldStyleQuery(ctx, sort_by_name=False) @locking.ssynchronized(_LOCK) def PrepareShutdown(self): """Prepare to stop the job queue. Disables execution of jobs in the workerpool and returns whether there are any jobs currently running. If the latter is the case, the job queue is not yet ready for shutdown. Once this function returns C{True} L{Shutdown} can be called without interfering with any job. Queued and unfinished jobs will be resumed next time. Once this function has been called no new job submissions will be accepted (see L{_RequireNonDrainedQueue}). @rtype: bool @return: Whether there are any running jobs """ if self._accepting_jobs: self._accepting_jobs = False # Tell worker pool to stop processing pending tasks self._wpool.SetActive(False) return self._wpool.HasRunningTasks() def AcceptingJobsUnlocked(self): """Returns whether jobs are accepted. Once L{PrepareShutdown} has been called, no new jobs are accepted and the queue is shutting down. @rtype: bool """ return self._accepting_jobs @locking.ssynchronized(_LOCK) @_RequireOpenQueue def Shutdown(self): """Stops the job queue. This shutdowns all the worker threads an closes the queue. """ self._wpool.TerminateWorkers() self._queue_filelock.Close() self._queue_filelock = None ganeti-2.9.3/lib/hypervisor/0000755000000000000000000000000012271445544015762 5ustar00rootroot00000000000000ganeti-2.9.3/lib/hypervisor/hv_base.py0000644000000000000000000004476312271422343017751 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Base class for all hypervisors The syntax for the _CHECK variables and the contents of the PARAMETERS dict is the same, see the docstring for L{BaseHypervisor.PARAMETERS}. @var _FILE_CHECK: stub for file checks, without the required flag @var _DIR_CHECK: stub for directory checks, without the required flag @var REQ_FILE_CHECK: mandatory file parameter @var OPT_FILE_CHECK: optional file parameter @var REQ_DIR_CHECK: mandatory directory parametr @var OPT_DIR_CHECK: optional directory parameter @var NO_CHECK: parameter without any checks at all @var REQUIRED_CHECK: parameter required to exist (and non-false), but without other checks; beware that this can't be used for boolean parameters, where you should use NO_CHECK or a custom checker """ import os import re import logging from ganeti import errors from ganeti import utils from ganeti import constants def _IsCpuMaskWellFormed(cpu_mask): """Verifies if the given single CPU mask is valid The single CPU mask should be in the form "a,b,c,d", where each letter is a positive number or range. """ try: cpu_list = utils.ParseCpuMask(cpu_mask) except errors.ParseError, _: return False return isinstance(cpu_list, list) and len(cpu_list) > 0 def _IsMultiCpuMaskWellFormed(cpu_mask): """Verifies if the given multiple CPU mask is valid A valid multiple CPU mask is in the form "a:b:c:d", where each letter is a single CPU mask. """ try: utils.ParseMultiCpuMask(cpu_mask) except errors.ParseError, _: return False return True # Read the BaseHypervisor.PARAMETERS docstring for the syntax of the # _CHECK values # must be afile _FILE_CHECK = (utils.IsNormAbsPath, "must be an absolute normalized path", os.path.isfile, "not found or not a file") # must be a directory _DIR_CHECK = (utils.IsNormAbsPath, "must be an absolute normalized path", os.path.isdir, "not found or not a directory") # CPU mask must be well-formed # TODO: implement node level check for the CPU mask _CPU_MASK_CHECK = (_IsCpuMaskWellFormed, "CPU mask definition is not well-formed", None, None) # Multiple CPU mask must be well-formed _MULTI_CPU_MASK_CHECK = (_IsMultiCpuMaskWellFormed, "Multiple CPU mask definition is not well-formed", None, None) # Check for validity of port number _NET_PORT_CHECK = (lambda x: 0 < x < 65535, "invalid port number", None, None) # Check that an integer is non negative _NONNEGATIVE_INT_CHECK = (lambda x: x >= 0, "cannot be negative", None, None) # nice wrappers for users REQ_FILE_CHECK = (True, ) + _FILE_CHECK OPT_FILE_CHECK = (False, ) + _FILE_CHECK REQ_DIR_CHECK = (True, ) + _DIR_CHECK OPT_DIR_CHECK = (False, ) + _DIR_CHECK REQ_NET_PORT_CHECK = (True, ) + _NET_PORT_CHECK OPT_NET_PORT_CHECK = (False, ) + _NET_PORT_CHECK REQ_CPU_MASK_CHECK = (True, ) + _CPU_MASK_CHECK OPT_CPU_MASK_CHECK = (False, ) + _CPU_MASK_CHECK REQ_MULTI_CPU_MASK_CHECK = (True, ) + _MULTI_CPU_MASK_CHECK OPT_MULTI_CPU_MASK_CHECK = (False, ) + _MULTI_CPU_MASK_CHECK REQ_NONNEGATIVE_INT_CHECK = (True, ) + _NONNEGATIVE_INT_CHECK OPT_NONNEGATIVE_INT_CHECK = (False, ) + _NONNEGATIVE_INT_CHECK # no checks at all NO_CHECK = (False, None, None, None, None) # required, but no other checks REQUIRED_CHECK = (True, None, None, None, None) # migration type MIGRATION_MODE_CHECK = (True, lambda x: x in constants.HT_MIGRATION_MODES, "invalid migration mode", None, None) def ParamInSet(required, my_set): """Builds parameter checker for set membership. @type required: boolean @param required: whether this is a required parameter @type my_set: tuple, list or set @param my_set: allowed values set """ fn = lambda x: x in my_set err = ("The value must be one of: %s" % utils.CommaJoin(my_set)) return (required, fn, err, None, None) class BaseHypervisor(object): """Abstract virtualisation technology interface The goal is that all aspects of the virtualisation technology are abstracted away from the rest of code. @cvar PARAMETERS: a dict of parameter name: check type; the check type is a five-tuple containing: - the required flag (boolean) - a function to check for syntax, that will be used in L{CheckParameterSyntax}, in the master daemon process - an error message for the above function - a function to check for parameter validity on the remote node, in the L{ValidateParameters} function - an error message for the above function @type CAN_MIGRATE: boolean @cvar CAN_MIGRATE: whether this hypervisor can do migration (either live or non-live) """ PARAMETERS = {} ANCILLARY_FILES = [] ANCILLARY_FILES_OPT = [] CAN_MIGRATE = False def StartInstance(self, instance, block_devices, startup_paused): """Start an instance.""" raise NotImplementedError def StopInstance(self, instance, force=False, retry=False, name=None): """Stop an instance @type instance: L{objects.Instance} @param instance: instance to stop @type force: boolean @param force: whether to do a "hard" stop (destroy) @type retry: boolean @param retry: whether this is just a retry call @type name: string or None @param name: if this parameter is passed, the the instance object should not be used (will be passed as None), and the shutdown must be done by name only """ raise NotImplementedError def CleanupInstance(self, instance_name): """Cleanup after a stopped instance This is an optional method, used by hypervisors that need to cleanup after an instance has been stopped. @type instance_name: string @param instance_name: instance name to cleanup after """ pass def RebootInstance(self, instance): """Reboot an instance.""" raise NotImplementedError def ListInstances(self, hvparams=None): """Get the list of running instances.""" raise NotImplementedError def GetInstanceInfo(self, instance_name, hvparams=None): """Get instance properties. @type instance_name: string @param instance_name: the instance name @type hvparams: dict of strings @param hvparams: hvparams to be used with this instance @return: tuple (name, id, memory, vcpus, state, times) """ raise NotImplementedError def GetAllInstancesInfo(self, hvparams=None): """Get properties of all instances. @type hvparams: dict of strings @param hvparams: hypervisor parameter @return: list of tuples (name, id, memory, vcpus, stat, times) """ raise NotImplementedError def GetNodeInfo(self, hvparams=None): """Return information about the node. @type hvparams: dict of strings @param hvparams: hypervisor parameters @return: a dict with at least the following keys (memory values in MiB): - memory_total: the total memory size on the node - memory_free: the available memory on the node for instances - memory_dom0: the memory used by the node itself, if available - cpu_total: total number of CPUs - cpu_dom0: number of CPUs used by the node OS - cpu_nodes: number of NUMA domains - cpu_sockets: number of physical CPU sockets """ raise NotImplementedError @classmethod def GetInstanceConsole(cls, instance, primary_node, hvparams, beparams): """Return information for connecting to the console of an instance. """ raise NotImplementedError @classmethod def GetAncillaryFiles(cls): """Return a list of ancillary files to be copied to all nodes as ancillary configuration files. @rtype: (list of absolute paths, list of absolute paths) @return: (all files, optional files) """ # By default we return a member variable, so that if an hypervisor has just # a static list of files it doesn't have to override this function. assert set(cls.ANCILLARY_FILES).issuperset(cls.ANCILLARY_FILES_OPT), \ "Optional ancillary files must be a subset of ancillary files" return (cls.ANCILLARY_FILES, cls.ANCILLARY_FILES_OPT) def Verify(self, hvparams=None): """Verify the hypervisor. @type hvparams: dict of strings @param hvparams: hypervisor parameters to be verified against @return: Problem description if something is wrong, C{None} otherwise """ raise NotImplementedError def MigrationInfo(self, instance): # pylint: disable=R0201,W0613 """Get instance information to perform a migration. By default assume no information is needed. @type instance: L{objects.Instance} @param instance: instance to be migrated @rtype: string/data (opaque) @return: instance migration information - serialized form """ return "" def AcceptInstance(self, instance, info, target): """Prepare to accept an instance. By default assume no preparation is needed. @type instance: L{objects.Instance} @param instance: instance to be accepted @type info: string/data (opaque) @param info: migration information, from the source node @type target: string @param target: target host (usually ip), on this node """ pass def BalloonInstanceMemory(self, instance, mem): """Balloon an instance memory to a certain value. @type instance: L{objects.Instance} @param instance: instance to be accepted @type mem: int @param mem: actual memory size to use for instance runtime """ raise NotImplementedError def FinalizeMigrationDst(self, instance, info, success): """Finalize the instance migration on the target node. Should finalize or revert any preparation done to accept the instance. Since by default we do no preparation, we also don't have anything to do @type instance: L{objects.Instance} @param instance: instance whose migration is being finalized @type info: string/data (opaque) @param info: migration information, from the source node @type success: boolean @param success: whether the migration was a success or a failure """ pass def MigrateInstance(self, cluster_name, instance, target, live): """Migrate an instance. @type cluster_name: string @param cluster_name: name of the cluster @type instance: L{objects.Instance} @param instance: the instance to be migrated @type target: string @param target: hostname (usually ip) of the target node @type live: boolean @param live: whether to do a live or non-live migration """ raise NotImplementedError def FinalizeMigrationSource(self, instance, success, live): """Finalize the instance migration on the source node. @type instance: L{objects.Instance} @param instance: the instance that was migrated @type success: bool @param success: whether the migration succeeded or not @type live: bool @param live: whether the user requested a live migration or not """ pass def GetMigrationStatus(self, instance): """Get the migration status @type instance: L{objects.Instance} @param instance: the instance that is being migrated @rtype: L{objects.MigrationStatus} @return: the status of the current migration (one of L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional progress info that can be retrieved from the hypervisor """ raise NotImplementedError def _InstanceStartupMemory(self, instance): """Get the correct startup memory for an instance This function calculates how much memory an instance should be started with, making sure it's a value between the minimum and the maximum memory, but also trying to use no more than the current free memory on the node. @type instance: L{objects.Instance} @param instance: the instance that is being started @rtype: integer @return: memory the instance should be started with """ free_memory = self.GetNodeInfo(hvparams=instance.hvparams)["memory_free"] max_start_mem = min(instance.beparams[constants.BE_MAXMEM], free_memory) start_mem = max(instance.beparams[constants.BE_MINMEM], max_start_mem) return start_mem @classmethod def CheckParameterSyntax(cls, hvparams): """Check the given parameters for validity. This should check the passed set of parameters for validity. Classes should extend, not replace, this function. @type hvparams: dict @param hvparams: dictionary with parameter names/value @raise errors.HypervisorError: when a parameter is not valid """ for key in hvparams: if key not in cls.PARAMETERS: raise errors.HypervisorError("Parameter '%s' is not supported" % key) # cheap tests that run on the master, should not access the world for name, (required, check_fn, errstr, _, _) in cls.PARAMETERS.items(): if name not in hvparams: raise errors.HypervisorError("Parameter '%s' is missing" % name) value = hvparams[name] if not required and not value: continue if not value: raise errors.HypervisorError("Parameter '%s' is required but" " is currently not defined" % (name, )) if check_fn is not None and not check_fn(value): raise errors.HypervisorError("Parameter '%s' fails syntax" " check: %s (current value: '%s')" % (name, errstr, value)) @classmethod def ValidateParameters(cls, hvparams): """Check the given parameters for validity. This should check the passed set of parameters for validity. Classes should extend, not replace, this function. @type hvparams: dict @param hvparams: dictionary with parameter names/value @raise errors.HypervisorError: when a parameter is not valid """ for name, (required, _, _, check_fn, errstr) in cls.PARAMETERS.items(): value = hvparams[name] if not required and not value: continue if check_fn is not None and not check_fn(value): raise errors.HypervisorError("Parameter '%s' fails" " validation: %s (current value: '%s')" % (name, errstr, value)) @classmethod def PowercycleNode(cls, hvparams=None): """Hard powercycle a node using hypervisor specific methods. This method should hard powercycle the node, using whatever methods the hypervisor provides. Note that this means that all instances running on the node must be stopped too. @type hvparams: dict of strings @param hvparams: hypervisor params to be used on this node """ raise NotImplementedError @staticmethod def GetLinuxNodeInfo(meminfo="/proc/meminfo", cpuinfo="/proc/cpuinfo"): """For linux systems, return actual OS information. This is an abstraction for all non-hypervisor-based classes, where the node actually sees all the memory and CPUs via the /proc interface and standard commands. The other case if for example xen, where you only see the hardware resources via xen-specific tools. @param meminfo: name of the file containing meminfo @type meminfo: string @param cpuinfo: name of the file containing cpuinfo @type cpuinfo: string @return: a dict with the following keys (values in MiB): - memory_total: the total memory size on the node - memory_free: the available memory on the node for instances - memory_dom0: the memory used by the node itself, if available - cpu_total: total number of CPUs - cpu_dom0: number of CPUs used by the node OS - cpu_nodes: number of NUMA domains - cpu_sockets: number of physical CPU sockets """ try: data = utils.ReadFile(meminfo).splitlines() except EnvironmentError, err: raise errors.HypervisorError("Failed to list node info: %s" % (err,)) result = {} sum_free = 0 try: for line in data: splitfields = line.split(":", 1) if len(splitfields) > 1: key = splitfields[0].strip() val = splitfields[1].strip() if key == "MemTotal": result["memory_total"] = int(val.split()[0]) / 1024 elif key in ("MemFree", "Buffers", "Cached"): sum_free += int(val.split()[0]) / 1024 elif key == "Active": result["memory_dom0"] = int(val.split()[0]) / 1024 except (ValueError, TypeError), err: raise errors.HypervisorError("Failed to compute memory usage: %s" % (err,)) result["memory_free"] = sum_free cpu_total = 0 try: fh = open(cpuinfo) try: cpu_total = len(re.findall(r"(?m)^processor\s*:\s*[0-9]+\s*$", fh.read())) finally: fh.close() except EnvironmentError, err: raise errors.HypervisorError("Failed to list node info: %s" % (err,)) result["cpu_total"] = cpu_total # We assume that the node OS can access all the CPUs result["cpu_dom0"] = cpu_total # FIXME: export correct data here result["cpu_nodes"] = 1 result["cpu_sockets"] = 1 return result @classmethod def LinuxPowercycle(cls): """Linux-specific powercycle method. """ try: fd = os.open("/proc/sysrq-trigger", os.O_WRONLY) try: os.write(fd, "b") finally: fd.close() except OSError: logging.exception("Can't open the sysrq-trigger file") result = utils.RunCmd(["reboot", "-n", "-f"]) if not result: logging.error("Can't run shutdown: %s", result.output) @staticmethod def _FormatVerifyResults(msgs): """Formats the verification results, given a list of errors. @param msgs: list of errors, possibly empty @return: overall problem description if something is wrong, C{None} otherwise """ if msgs: return "; ".join(msgs) else: return None ganeti-2.9.3/lib/hypervisor/hv_chroot.py0000644000000000000000000002523512271422343020326 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Chroot manager hypervisor """ import os import os.path import time import logging from ganeti import constants from ganeti import errors # pylint: disable=W0611 from ganeti import utils from ganeti import objects from ganeti import pathutils from ganeti.hypervisor import hv_base from ganeti.errors import HypervisorError class ChrootManager(hv_base.BaseHypervisor): """Chroot manager. This not-really hypervisor allows ganeti to manage chroots. It has special behaviour and requirements on the OS definition and the node environemnt: - the start and stop of the chroot environment are done via a script called ganeti-chroot located in the root directory of the first drive, which should be created by the OS definition - this script must accept the start and stop argument and, on shutdown, it should cleanly shutdown the daemons/processes using the chroot - the daemons run in chroot should only bind to the instance IP (to which the OS create script has access via the instance name) - since some daemons in the node could be listening on the wildcard address, some ports might be unavailable - the instance listing will show no memory usage - on shutdown, the chroot manager will try to find all mountpoints under the root dir of the instance and unmount them - instance alive check is based on whether any process is using the chroot """ _ROOT_DIR = pathutils.RUN_DIR + "/chroot-hypervisor" PARAMETERS = { constants.HV_INIT_SCRIPT: (True, utils.IsNormAbsPath, "must be an absolute normalized path", None, None), } def __init__(self): hv_base.BaseHypervisor.__init__(self) utils.EnsureDirs([(self._ROOT_DIR, constants.RUN_DIRS_MODE)]) @staticmethod def _IsDirLive(path): """Check if a directory looks like a live chroot. """ if not os.path.ismount(path): return False result = utils.RunCmd(["fuser", "-m", path]) return not result.failed @staticmethod def _GetMountSubdirs(path): """Return the list of mountpoints under a given path. """ result = [] for _, mountpoint, _, _ in utils.GetMounts(): if (mountpoint.startswith(path) and mountpoint != path): result.append(mountpoint) result.sort(key=lambda x: x.count("/"), reverse=True) return result @classmethod def _InstanceDir(cls, instance_name): """Return the root directory for an instance. """ return utils.PathJoin(cls._ROOT_DIR, instance_name) def ListInstances(self, hvparams=None): """Get the list of running instances. """ return [name for name in os.listdir(self._ROOT_DIR) if self._IsDirLive(utils.PathJoin(self._ROOT_DIR, name))] def GetInstanceInfo(self, instance_name, hvparams=None): """Get instance properties. @type instance_name: string @param instance_name: the instance name @type hvparams: dict of strings @param hvparams: hvparams to be used with this instance @return: (name, id, memory, vcpus, stat, times) """ dir_name = self._InstanceDir(instance_name) if not self._IsDirLive(dir_name): raise HypervisorError("Instance %s is not running" % instance_name) return (instance_name, 0, 0, 0, 0, 0) def GetAllInstancesInfo(self, hvparams=None): """Get properties of all instances. @type hvparams: dict of strings @param hvparams: hypervisor parameter @return: [(name, id, memory, vcpus, stat, times),...] """ data = [] for file_name in os.listdir(self._ROOT_DIR): path = utils.PathJoin(self._ROOT_DIR, file_name) if self._IsDirLive(path): data.append((file_name, 0, 0, 0, 0, 0)) return data def StartInstance(self, instance, block_devices, startup_paused): """Start an instance. For the chroot manager, we try to mount the block device and execute '/ganeti-chroot start'. """ root_dir = self._InstanceDir(instance.name) if not os.path.exists(root_dir): try: os.mkdir(root_dir) except IOError, err: raise HypervisorError("Failed to start instance %s: %s" % (instance.name, err)) if not os.path.isdir(root_dir): raise HypervisorError("Needed path %s is not a directory" % root_dir) if not os.path.ismount(root_dir): if not block_devices: raise HypervisorError("The chroot manager needs at least one disk") sda_dev_path = block_devices[0][1] result = utils.RunCmd(["mount", sda_dev_path, root_dir]) if result.failed: raise HypervisorError("Can't mount the chroot dir: %s" % result.output) init_script = instance.hvparams[constants.HV_INIT_SCRIPT] result = utils.RunCmd(["chroot", root_dir, init_script, "start"]) if result.failed: raise HypervisorError("Can't run the chroot start script: %s" % result.output) def StopInstance(self, instance, force=False, retry=False, name=None): """Stop an instance. This method has complicated cleanup tests, as we must: - try to kill all leftover processes - try to unmount any additional sub-mountpoints - finally unmount the instance dir """ if name is None: name = instance.name root_dir = self._InstanceDir(name) if not os.path.exists(root_dir) or not self._IsDirLive(root_dir): return # Run the chroot stop script only once if not retry and not force: result = utils.RunCmd(["chroot", root_dir, "/ganeti-chroot", "stop"]) if result.failed: raise HypervisorError("Can't run the chroot stop script: %s" % result.output) if not force: utils.RunCmd(["fuser", "-k", "-TERM", "-m", root_dir]) else: utils.RunCmd(["fuser", "-k", "-KILL", "-m", root_dir]) # 2 seconds at most should be enough for KILL to take action time.sleep(2) if self._IsDirLive(root_dir): if force: raise HypervisorError("Can't stop the processes using the chroot") return def CleanupInstance(self, instance_name): """Cleanup after a stopped instance """ root_dir = self._InstanceDir(instance_name) if not os.path.exists(root_dir): return if self._IsDirLive(root_dir): raise HypervisorError("Processes are still using the chroot") for mpath in self._GetMountSubdirs(root_dir): utils.RunCmd(["umount", mpath]) result = utils.RunCmd(["umount", root_dir]) if result.failed: msg = ("Processes still alive in the chroot: %s" % utils.RunCmd("fuser -vm %s" % root_dir).output) logging.error(msg) raise HypervisorError("Can't umount the chroot dir: %s (%s)" % (result.output, msg)) def RebootInstance(self, instance): """Reboot an instance. This is not (yet) implemented for the chroot manager. """ raise HypervisorError("The chroot manager doesn't implement the" " reboot functionality") def BalloonInstanceMemory(self, instance, mem): """Balloon an instance memory to a certain value. @type instance: L{objects.Instance} @param instance: instance to be accepted @type mem: int @param mem: actual memory size to use for instance runtime """ # Currently chroots don't have memory limits pass def GetNodeInfo(self, hvparams=None): """Return information about the node. See L{BaseHypervisor.GetLinuxNodeInfo}. """ return self.GetLinuxNodeInfo() @classmethod def GetInstanceConsole(cls, instance, primary_node, # pylint: disable=W0221 hvparams, beparams, root_dir=None): """Return information for connecting to the console of an instance. """ if root_dir is None: root_dir = cls._InstanceDir(instance.name) if not os.path.ismount(root_dir): raise HypervisorError("Instance %s is not running" % instance.name) return objects.InstanceConsole(instance=instance.name, kind=constants.CONS_SSH, host=primary_node.name, user=constants.SSH_CONSOLE_USER, command=["chroot", root_dir]) def Verify(self, hvparams=None): """Verify the hypervisor. For the chroot manager, it just checks the existence of the base dir. @type hvparams: dict of strings @param hvparams: hypervisor parameters to be verified against, not used in for chroot @return: Problem description if something is wrong, C{None} otherwise """ if os.path.exists(self._ROOT_DIR): return None else: return "The required directory '%s' does not exist" % self._ROOT_DIR @classmethod def PowercycleNode(cls, hvparams=None): """Chroot powercycle, just a wrapper over Linux powercycle. @type hvparams: dict of strings @param hvparams: hypervisor params to be used on this node """ cls.LinuxPowercycle() def MigrateInstance(self, cluster_name, instance, target, live): """Migrate an instance. @type cluster_name: string @param cluster_name: name of the cluster @type instance: L{objects.Instance} @param instance: the instance to be migrated @type target: string @param target: hostname (usually ip) of the target node @type live: boolean @param live: whether to do a live or non-live migration """ raise HypervisorError("Migration not supported by the chroot hypervisor") def GetMigrationStatus(self, instance): """Get the migration status @type instance: L{objects.Instance} @param instance: the instance that is being migrated @rtype: L{objects.MigrationStatus} @return: the status of the current migration (one of L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional progress info that can be retrieved from the hypervisor """ raise HypervisorError("Migration not supported by the chroot hypervisor") ganeti-2.9.3/lib/hypervisor/hv_lxc.py0000644000000000000000000003717612271422343017625 0ustar00rootroot00000000000000# # # Copyright (C) 2010, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """LXC hypervisor """ import os import os.path import time import logging from ganeti import constants from ganeti import errors # pylint: disable=W0611 from ganeti import utils from ganeti import objects from ganeti import pathutils from ganeti.hypervisor import hv_base from ganeti.errors import HypervisorError class LXCHypervisor(hv_base.BaseHypervisor): """LXC-based virtualization. TODO: - move hardcoded parameters into hypervisor parameters, once we have the container-parameter support Problems/issues: - LXC is very temperamental; in daemon mode, it succeeds or fails in launching the instance silently, without any error indication, and when failing it can leave network interfaces around, and future successful startups will list the instance twice """ _ROOT_DIR = pathutils.RUN_DIR + "/lxc" _DEVS = [ "c 1:3", # /dev/null "c 1:5", # /dev/zero "c 1:7", # /dev/full "c 1:8", # /dev/random "c 1:9", # /dev/urandom "c 1:10", # /dev/aio "c 5:0", # /dev/tty "c 5:1", # /dev/console "c 5:2", # /dev/ptmx "c 136:*", # first block of Unix98 PTY slaves ] _DENIED_CAPABILITIES = [ "mac_override", # Allow MAC configuration or state changes # TODO: remove sys_admin too, for safety #"sys_admin", # Perform a range of system administration operations "sys_boot", # Use reboot(2) and kexec_load(2) "sys_module", # Load and unload kernel modules "sys_time", # Set system clock, set real-time (hardware) clock ] _DIR_MODE = 0755 PARAMETERS = { constants.HV_CPU_MASK: hv_base.OPT_CPU_MASK_CHECK, } def __init__(self): hv_base.BaseHypervisor.__init__(self) utils.EnsureDirs([(self._ROOT_DIR, self._DIR_MODE)]) @staticmethod def _GetMountSubdirs(path): """Return the list of mountpoints under a given path. """ result = [] for _, mountpoint, _, _ in utils.GetMounts(): if (mountpoint.startswith(path) and mountpoint != path): result.append(mountpoint) result.sort(key=lambda x: x.count("/"), reverse=True) return result @classmethod def _InstanceDir(cls, instance_name): """Return the root directory for an instance. """ return utils.PathJoin(cls._ROOT_DIR, instance_name) @classmethod def _InstanceConfFile(cls, instance_name): """Return the configuration file for an instance. """ return utils.PathJoin(cls._ROOT_DIR, instance_name + ".conf") @classmethod def _InstanceLogFile(cls, instance_name): """Return the log file for an instance. """ return utils.PathJoin(cls._ROOT_DIR, instance_name + ".log") @classmethod def _GetCgroupMountPoint(cls): for _, mountpoint, fstype, _ in utils.GetMounts(): if fstype == "cgroup": return mountpoint raise errors.HypervisorError("The cgroup filesystem is not mounted") @classmethod def _GetCgroupCpuList(cls, instance_name): """Return the list of CPU ids for an instance. """ cgroup = cls._GetCgroupMountPoint() try: cpus = utils.ReadFile(utils.PathJoin(cgroup, 'lxc', instance_name, "cpuset.cpus")) except EnvironmentError, err: raise errors.HypervisorError("Getting CPU list for instance" " %s failed: %s" % (instance_name, err)) return utils.ParseCpuMask(cpus) @classmethod def _GetCgroupMemoryLimit(cls, instance_name): """Return the memory limit for an instance """ cgroup = cls._GetCgroupMountPoint() try: memory = int(utils.ReadFile(utils.PathJoin(cgroup, 'lxc', instance_name, "memory.limit_in_bytes"))) except EnvironmentError: # memory resource controller may be disabled, ignore memory = 0 return memory def ListInstances(self, hvparams=None): """Get the list of running instances. """ return [iinfo[0] for iinfo in self.GetAllInstancesInfo()] def GetInstanceInfo(self, instance_name, hvparams=None): """Get instance properties. @type instance_name: string @param instance_name: the instance name @type hvparams: dict of strings @param hvparams: hvparams to be used with this instance @rtype: tuple of strings @return: (name, id, memory, vcpus, stat, times) """ # TODO: read container info from the cgroup mountpoint result = utils.RunCmd(["lxc-info", "-s", "-n", instance_name]) if result.failed: raise errors.HypervisorError("Running lxc-info failed: %s" % result.output) # lxc-info output examples: # 'state: STOPPED # 'state: RUNNING _, state = result.stdout.rsplit(None, 1) if state != "RUNNING": return None cpu_list = self._GetCgroupCpuList(instance_name) memory = self._GetCgroupMemoryLimit(instance_name) / (1024 ** 2) return (instance_name, 0, memory, len(cpu_list), 0, 0) def GetAllInstancesInfo(self, hvparams=None): """Get properties of all instances. @type hvparams: dict of strings @param hvparams: hypervisor parameter @return: [(name, id, memory, vcpus, stat, times),...] """ data = [] for name in os.listdir(self._ROOT_DIR): try: info = self.GetInstanceInfo(name) except errors.HypervisorError: continue if info: data.append(info) return data def _CreateConfigFile(self, instance, root_dir): """Create an lxc.conf file for an instance. """ out = [] # hostname out.append("lxc.utsname = %s" % instance.name) # separate pseudo-TTY instances out.append("lxc.pts = 255") # standard TTYs out.append("lxc.tty = 6") # console log file console_log = utils.PathJoin(self._ROOT_DIR, instance.name + ".console") try: utils.WriteFile(console_log, data="", mode=constants.SECURE_FILE_MODE) except EnvironmentError, err: raise errors.HypervisorError("Creating console log file %s for" " instance %s failed: %s" % (console_log, instance.name, err)) out.append("lxc.console = %s" % console_log) # root FS out.append("lxc.rootfs = %s" % root_dir) # TODO: additional mounts, if we disable CAP_SYS_ADMIN # CPUs if instance.hvparams[constants.HV_CPU_MASK]: cpu_list = utils.ParseCpuMask(instance.hvparams[constants.HV_CPU_MASK]) cpus_in_mask = len(cpu_list) if cpus_in_mask != instance.beparams["vcpus"]: raise errors.HypervisorError("Number of VCPUs (%d) doesn't match" " the number of CPUs in the" " cpu_mask (%d)" % (instance.beparams["vcpus"], cpus_in_mask)) out.append("lxc.cgroup.cpuset.cpus = %s" % instance.hvparams[constants.HV_CPU_MASK]) # Memory # Conditionally enable, memory resource controller might be disabled cgroup = self._GetCgroupMountPoint() if os.path.exists(utils.PathJoin(cgroup, 'memory.limit_in_bytes')): out.append("lxc.cgroup.memory.limit_in_bytes = %dM" % instance.beparams[constants.BE_MAXMEM]) if os.path.exists(utils.PathJoin(cgroup, 'memory.memsw.limit_in_bytes')): out.append("lxc.cgroup.memory.memsw.limit_in_bytes = %dM" % instance.beparams[constants.BE_MAXMEM]) # Device control # deny direct device access out.append("lxc.cgroup.devices.deny = a") for devinfo in self._DEVS: out.append("lxc.cgroup.devices.allow = %s rw" % devinfo) # Networking for idx, nic in enumerate(instance.nics): out.append("# NIC %d" % idx) mode = nic.nicparams[constants.NIC_MODE] link = nic.nicparams[constants.NIC_LINK] if mode == constants.NIC_MODE_BRIDGED: out.append("lxc.network.type = veth") out.append("lxc.network.link = %s" % link) else: raise errors.HypervisorError("LXC hypervisor only supports" " bridged mode (NIC %d has mode %s)" % (idx, mode)) out.append("lxc.network.hwaddr = %s" % nic.mac) out.append("lxc.network.flags = up") # Capabilities for cap in self._DENIED_CAPABILITIES: out.append("lxc.cap.drop = %s" % cap) return "\n".join(out) + "\n" def StartInstance(self, instance, block_devices, startup_paused): """Start an instance. For LXC, we try to mount the block device and execute 'lxc-start'. We use volatile containers. """ root_dir = self._InstanceDir(instance.name) try: utils.EnsureDirs([(root_dir, self._DIR_MODE)]) except errors.GenericError, err: raise HypervisorError("Creating instance directory failed: %s", str(err)) conf_file = self._InstanceConfFile(instance.name) utils.WriteFile(conf_file, data=self._CreateConfigFile(instance, root_dir)) log_file = self._InstanceLogFile(instance.name) if not os.path.exists(log_file): try: utils.WriteFile(log_file, data="", mode=constants.SECURE_FILE_MODE) except EnvironmentError, err: raise errors.HypervisorError("Creating hypervisor log file %s for" " instance %s failed: %s" % (log_file, instance.name, err)) if not os.path.ismount(root_dir): if not block_devices: raise HypervisorError("LXC needs at least one disk") sda_dev_path = block_devices[0][1] result = utils.RunCmd(["mount", sda_dev_path, root_dir]) if result.failed: raise HypervisorError("Mounting the root dir of LXC instance %s" " failed: %s" % (instance.name, result.output)) result = utils.RunCmd(["lxc-start", "-n", instance.name, "-o", log_file, "-l", "DEBUG", "-f", conf_file, "-d"]) if result.failed: raise HypervisorError("Running the lxc-start script failed: %s" % result.output) def StopInstance(self, instance, force=False, retry=False, name=None): """Stop an instance. This method has complicated cleanup tests, as we must: - try to kill all leftover processes - try to unmount any additional sub-mountpoints - finally unmount the instance dir """ if name is None: name = instance.name root_dir = self._InstanceDir(name) if not os.path.exists(root_dir): return if name in self.ListInstances(): # Signal init to shutdown; this is a hack if not retry and not force: result = utils.RunCmd(["chroot", root_dir, "poweroff"]) if result.failed: raise HypervisorError("Running 'poweroff' on the instance" " failed: %s" % result.output) time.sleep(2) result = utils.RunCmd(["lxc-stop", "-n", name]) if result.failed: logging.warning("Error while doing lxc-stop for %s: %s", name, result.output) if not os.path.ismount(root_dir): return for mpath in self._GetMountSubdirs(root_dir): result = utils.RunCmd(["umount", mpath]) if result.failed: logging.warning("Error while umounting subpath %s for instance %s: %s", mpath, name, result.output) result = utils.RunCmd(["umount", root_dir]) if result.failed and force: msg = ("Processes still alive in the chroot: %s" % utils.RunCmd("fuser -vm %s" % root_dir).output) logging.error(msg) raise HypervisorError("Unmounting the chroot dir failed: %s (%s)" % (result.output, msg)) def RebootInstance(self, instance): """Reboot an instance. This is not (yet) implemented (in Ganeti) for the LXC hypervisor. """ # TODO: implement reboot raise HypervisorError("The LXC hypervisor doesn't implement the" " reboot functionality") def BalloonInstanceMemory(self, instance, mem): """Balloon an instance memory to a certain value. @type instance: L{objects.Instance} @param instance: instance to be accepted @type mem: int @param mem: actual memory size to use for instance runtime """ # Currently lxc instances don't have memory limits pass def GetNodeInfo(self, hvparams=None): """Return information about the node. See L{BaseHypervisor.GetLinuxNodeInfo}. """ return self.GetLinuxNodeInfo() @classmethod def GetInstanceConsole(cls, instance, primary_node, hvparams, beparams): """Return a command for connecting to the console of an instance. """ return objects.InstanceConsole(instance=instance.name, kind=constants.CONS_SSH, host=primary_node.name, user=constants.SSH_CONSOLE_USER, command=["lxc-console", "-n", instance.name]) def Verify(self, hvparams=None): """Verify the hypervisor. For the LXC manager, it just checks the existence of the base dir. @type hvparams: dict of strings @param hvparams: hypervisor parameters to be verified against; not used here @return: Problem description if something is wrong, C{None} otherwise """ msgs = [] if not os.path.exists(self._ROOT_DIR): msgs.append("The required directory '%s' does not exist" % self._ROOT_DIR) try: self._GetCgroupMountPoint() except errors.HypervisorError, err: msgs.append(str(err)) return self._FormatVerifyResults(msgs) @classmethod def PowercycleNode(cls, hvparams=None): """LXC powercycle, just a wrapper over Linux powercycle. @type hvparams: dict of strings @param hvparams: hypervisor params to be used on this node """ cls.LinuxPowercycle() def MigrateInstance(self, cluster_name, instance, target, live): """Migrate an instance. @type cluster_name: string @param cluster_name: name of the cluster @type instance: L{objects.Instance} @param instance: the instance to be migrated @type target: string @param target: hostname (usually ip) of the target node @type live: boolean @param live: whether to do a live or non-live migration """ raise HypervisorError("Migration is not supported by the LXC hypervisor") def GetMigrationStatus(self, instance): """Get the migration status @type instance: L{objects.Instance} @param instance: the instance that is being migrated @rtype: L{objects.MigrationStatus} @return: the status of the current migration (one of L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional progress info that can be retrieved from the hypervisor """ raise HypervisorError("Migration is not supported by the LXC hypervisor") ganeti-2.9.3/lib/hypervisor/hv_fake.py0000644000000000000000000002537512271422343017743 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Fake hypervisor """ import os import os.path import logging from ganeti import utils from ganeti import constants from ganeti import errors from ganeti import objects from ganeti import pathutils from ganeti.hypervisor import hv_base class FakeHypervisor(hv_base.BaseHypervisor): """Fake hypervisor interface. This can be used for testing the ganeti code without having to have a real virtualisation software installed. """ PARAMETERS = { constants.HV_MIGRATION_MODE: hv_base.MIGRATION_MODE_CHECK, } CAN_MIGRATE = True _ROOT_DIR = pathutils.RUN_DIR + "/fake-hypervisor" def __init__(self): hv_base.BaseHypervisor.__init__(self) utils.EnsureDirs([(self._ROOT_DIR, constants.RUN_DIRS_MODE)]) def ListInstances(self, hvparams=None): """Get the list of running instances. """ return os.listdir(self._ROOT_DIR) def GetInstanceInfo(self, instance_name, hvparams=None): """Get instance properties. @type instance_name: string @param instance_name: the instance name @type hvparams: dict of strings @param hvparams: hvparams to be used with this instance @return: tuple of (name, id, memory, vcpus, stat, times) """ file_name = self._InstanceFile(instance_name) if not os.path.exists(file_name): return None try: fh = open(file_name, "r") try: inst_id = fh.readline().strip() memory = utils.TryConvert(int, fh.readline().strip()) vcpus = utils.TryConvert(int, fh.readline().strip()) stat = "---b-" times = "0" return (instance_name, inst_id, memory, vcpus, stat, times) finally: fh.close() except IOError, err: raise errors.HypervisorError("Failed to list instance %s: %s" % (instance_name, err)) def GetAllInstancesInfo(self, hvparams=None): """Get properties of all instances. @type hvparams: dict of strings @param hvparams: hypervisor parameter @return: list of tuples (name, id, memory, vcpus, stat, times) """ data = [] for file_name in os.listdir(self._ROOT_DIR): try: fh = open(utils.PathJoin(self._ROOT_DIR, file_name), "r") inst_id = "-1" memory = 0 vcpus = 1 stat = "-----" times = "-1" try: inst_id = fh.readline().strip() memory = utils.TryConvert(int, fh.readline().strip()) vcpus = utils.TryConvert(int, fh.readline().strip()) stat = "---b-" times = "0" finally: fh.close() data.append((file_name, inst_id, memory, vcpus, stat, times)) except IOError, err: raise errors.HypervisorError("Failed to list instances: %s" % err) return data @classmethod def _InstanceFile(cls, instance_name): """Compute the instance file for an instance name. """ return utils.PathJoin(cls._ROOT_DIR, instance_name) def _IsAlive(self, instance_name): """Checks if an instance is alive. """ file_name = self._InstanceFile(instance_name) return os.path.exists(file_name) def _MarkUp(self, instance, memory): """Mark the instance as running. This does no checks, which should be done by its callers. """ file_name = self._InstanceFile(instance.name) fh = file(file_name, "w") try: fh.write("0\n%d\n%d\n" % (memory, instance.beparams[constants.BE_VCPUS])) finally: fh.close() def _MarkDown(self, instance_name): """Mark the instance as running. This does no checks, which should be done by its callers. """ file_name = self._InstanceFile(instance_name) utils.RemoveFile(file_name) def StartInstance(self, instance, block_devices, startup_paused): """Start an instance. For the fake hypervisor, it just creates a file in the base dir, creating an exception if it already exists. We don't actually handle race conditions properly, since these are *FAKE* instances. """ if self._IsAlive(instance.name): raise errors.HypervisorError("Failed to start instance %s: %s" % (instance.name, "already running")) try: self._MarkUp(instance, self._InstanceStartupMemory(instance)) except IOError, err: raise errors.HypervisorError("Failed to start instance %s: %s" % (instance.name, err)) def StopInstance(self, instance, force=False, retry=False, name=None): """Stop an instance. For the fake hypervisor, this just removes the file in the base dir, if it exist, otherwise we raise an exception. """ if name is None: name = instance.name if not self._IsAlive(name): raise errors.HypervisorError("Failed to stop instance %s: %s" % (name, "not running")) self._MarkDown(name) def RebootInstance(self, instance): """Reboot an instance. For the fake hypervisor, this does nothing. """ return def BalloonInstanceMemory(self, instance, mem): """Balloon an instance memory to a certain value. @type instance: L{objects.Instance} @param instance: instance to be accepted @type mem: int @param mem: actual memory size to use for instance runtime """ if not self._IsAlive(instance.name): raise errors.HypervisorError("Failed to balloon memory for %s: %s" % (instance.name, "not running")) try: self._MarkUp(instance, mem) except EnvironmentError, err: raise errors.HypervisorError("Failed to balloon memory for %s: %s" % (instance.name, utils.ErrnoOrStr(err))) def GetNodeInfo(self, hvparams=None): """Return information about the node. See L{BaseHypervisor.GetLinuxNodeInfo}. """ result = self.GetLinuxNodeInfo() # substract running instances all_instances = self.GetAllInstancesInfo() result["memory_free"] -= min(result["memory_free"], sum([row[2] for row in all_instances])) return result @classmethod def GetInstanceConsole(cls, instance, primary_node, hvparams, beparams): """Return information for connecting to the console of an instance. """ return objects.InstanceConsole(instance=instance.name, kind=constants.CONS_MESSAGE, message=("Console not available for fake" " hypervisor")) def Verify(self, hvparams=None): """Verify the hypervisor. For the fake hypervisor, it just checks the existence of the base dir. @type hvparams: dict of strings @param hvparams: hypervisor parameters to be verified against; not used for fake hypervisors @return: Problem description if something is wrong, C{None} otherwise """ if os.path.exists(self._ROOT_DIR): return None else: return "The required directory '%s' does not exist" % self._ROOT_DIR @classmethod def PowercycleNode(cls, hvparams=None): """Fake hypervisor powercycle, just a wrapper over Linux powercycle. @type hvparams: dict of strings @param hvparams: hypervisor params to be used on this node """ cls.LinuxPowercycle() def AcceptInstance(self, instance, info, target): """Prepare to accept an instance. @type instance: L{objects.Instance} @param instance: instance to be accepted @type info: string @param info: instance info, not used @type target: string @param target: target host (usually ip), on this node """ if self._IsAlive(instance.name): raise errors.HypervisorError("Can't accept instance, already running") def MigrateInstance(self, cluster_name, instance, target, live): """Migrate an instance. @type cluster_name: string @param cluster_name: name of the cluster @type instance: L{objects.Instance} @param instance: the instance to be migrated @type target: string @param target: hostname (usually ip) of the target node @type live: boolean @param live: whether to do a live or non-live migration """ logging.debug("Fake hypervisor migrating %s to %s (live=%s)", instance, target, live) def FinalizeMigrationDst(self, instance, info, success): """Finalize the instance migration on the target node. For the fake hv, this just marks the instance up. @type instance: L{objects.Instance} @param instance: instance whose migration is being finalized @type info: string/data (opaque) @param info: migration information, from the source node @type success: boolean @param success: whether the migration was a success or a failure """ if success: self._MarkUp(instance, self._InstanceStartupMemory(instance)) else: # ensure it's down self._MarkDown(instance.name) def PostMigrationCleanup(self, instance): """Clean-up after a migration. To be executed on the source node. @type instance: L{objects.Instance} @param instance: the instance that was migrated """ pass def FinalizeMigrationSource(self, instance, success, live): """Finalize the instance migration on the source node. @type instance: L{objects.Instance} @param instance: the instance that was migrated @type success: bool @param success: whether the migration succeeded or not @type live: bool @param live: whether the user requested a live migration or not """ # pylint: disable=W0613 if success: self._MarkDown(instance.name) def GetMigrationStatus(self, instance): """Get the migration status The fake hypervisor migration always succeeds. @type instance: L{objects.Instance} @param instance: the instance that is being migrated @rtype: L{objects.MigrationStatus} @return: the status of the current migration (one of L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional progress info that can be retrieved from the hypervisor """ return objects.MigrationStatus(status=constants.HV_MIGRATION_COMPLETED) ganeti-2.9.3/lib/hypervisor/hv_xen.py0000644000000000000000000012360212271422343017617 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Xen hypervisors """ import logging import errno import string # pylint: disable=W0402 import shutil from cStringIO import StringIO from ganeti import constants from ganeti import errors from ganeti import utils from ganeti.hypervisor import hv_base from ganeti import netutils from ganeti import objects from ganeti import pathutils XEND_CONFIG_FILE = utils.PathJoin(pathutils.XEN_CONFIG_DIR, "xend-config.sxp") XL_CONFIG_FILE = utils.PathJoin(pathutils.XEN_CONFIG_DIR, "xen/xl.conf") VIF_BRIDGE_SCRIPT = utils.PathJoin(pathutils.XEN_CONFIG_DIR, "scripts/vif-bridge") _DOM0_NAME = "Domain-0" _DISK_LETTERS = string.ascii_lowercase _FILE_DRIVER_MAP = { constants.FD_LOOP: "file", constants.FD_BLKTAP: "tap:aio", constants.FD_BLKTAP2: "tap2:tapdisk:aio", } def _CreateConfigCpus(cpu_mask): """Create a CPU config string for Xen's config file. """ # Convert the string CPU mask to a list of list of int's cpu_list = utils.ParseMultiCpuMask(cpu_mask) if len(cpu_list) == 1: all_cpu_mapping = cpu_list[0] if all_cpu_mapping == constants.CPU_PINNING_OFF: # If CPU pinning has 1 entry that's "all", then remove the # parameter from the config file return None else: # If CPU pinning has one non-all entry, mapping all vCPUS (the entire # VM) to one physical CPU, using format 'cpu = "C"' return "cpu = \"%s\"" % ",".join(map(str, all_cpu_mapping)) else: def _GetCPUMap(vcpu): if vcpu[0] == constants.CPU_PINNING_ALL_VAL: cpu_map = constants.CPU_PINNING_ALL_XEN else: cpu_map = ",".join(map(str, vcpu)) return "\"%s\"" % cpu_map # build the result string in format 'cpus = [ "c", "c", "c" ]', # where each c is a physical CPU number, a range, a list, or any # combination return "cpus = [ %s ]" % ", ".join(map(_GetCPUMap, cpu_list)) def _RunInstanceList(fn, instance_list_errors): """Helper function for L{_GetInstanceList} to retrieve the list of instances from xen. @type fn: callable @param fn: Function to query xen for the list of instances @type instance_list_errors: list @param instance_list_errors: Error list @rtype: list """ result = fn() if result.failed: logging.error("Retrieving the instance list from xen failed (%s): %s", result.fail_reason, result.output) instance_list_errors.append(result) raise utils.RetryAgain() # skip over the heading return result.stdout.splitlines() def _ParseInstanceList(lines, include_node): """Parses the output of listing instances by xen. @type lines: list @param lines: Result of retrieving the instance list from xen @type include_node: boolean @param include_node: If True, return information for Dom0 @return: list of tuple containing (name, id, memory, vcpus, state, time spent) """ result = [] # Iterate through all lines while ignoring header for line in lines[1:]: # The format of lines is: # Name ID Mem(MiB) VCPUs State Time(s) # Domain-0 0 3418 4 r----- 266.2 data = line.split() if len(data) != 6: raise errors.HypervisorError("Can't parse instance list," " line: %s" % line) try: data[1] = int(data[1]) data[2] = int(data[2]) data[3] = int(data[3]) data[5] = float(data[5]) except (TypeError, ValueError), err: raise errors.HypervisorError("Can't parse instance list," " line: %s, error: %s" % (line, err)) # skip the Domain-0 (optional) if include_node or data[0] != _DOM0_NAME: result.append(data) return result def _GetInstanceList(fn, include_node, _timeout=5): """Return the list of running instances. See L{_RunInstanceList} and L{_ParseInstanceList} for parameter details. """ instance_list_errors = [] try: lines = utils.Retry(_RunInstanceList, (0.3, 1.5, 1.0), _timeout, args=(fn, instance_list_errors)) except utils.RetryTimeout: if instance_list_errors: instance_list_result = instance_list_errors.pop() errmsg = ("listing instances failed, timeout exceeded (%s): %s" % (instance_list_result.fail_reason, instance_list_result.output)) else: errmsg = "listing instances failed" raise errors.HypervisorError(errmsg) return _ParseInstanceList(lines, include_node) def _IsInstanceRunning(instance_info): return instance_info == "r-----" \ or instance_info == "-b----" def _IsInstanceShutdown(instance_info): return instance_info == "---s--" def _ParseNodeInfo(info): """Return information about the node. @return: a dict with the following keys (memory values in MiB): - memory_total: the total memory size on the node - memory_free: the available memory on the node for instances - nr_cpus: total number of CPUs - nr_nodes: in a NUMA system, the number of domains - nr_sockets: the number of physical CPU sockets in the node - hv_version: the hypervisor version in the form (major, minor) """ result = {} cores_per_socket = threads_per_core = nr_cpus = None xen_major, xen_minor = None, None memory_total = None memory_free = None for line in info.splitlines(): fields = line.split(":", 1) if len(fields) < 2: continue (key, val) = map(lambda s: s.strip(), fields) # Note: in Xen 3, memory has changed to total_memory if key in ("memory", "total_memory"): memory_total = int(val) elif key == "free_memory": memory_free = int(val) elif key == "nr_cpus": nr_cpus = result["cpu_total"] = int(val) elif key == "nr_nodes": result["cpu_nodes"] = int(val) elif key == "cores_per_socket": cores_per_socket = int(val) elif key == "threads_per_core": threads_per_core = int(val) elif key == "xen_major": xen_major = int(val) elif key == "xen_minor": xen_minor = int(val) if None not in [cores_per_socket, threads_per_core, nr_cpus]: result["cpu_sockets"] = nr_cpus / (cores_per_socket * threads_per_core) if memory_free is not None: result["memory_free"] = memory_free if memory_total is not None: result["memory_total"] = memory_total if not (xen_major is None or xen_minor is None): result[constants.HV_NODEINFO_KEY_VERSION] = (xen_major, xen_minor) return result def _MergeInstanceInfo(info, instance_list): """Updates node information from L{_ParseNodeInfo} with instance info. @type info: dict @param info: Result from L{_ParseNodeInfo} @type instance_list: list of tuples @param instance_list: list of instance information; one tuple per instance @rtype: dict """ total_instmem = 0 for (name, _, mem, vcpus, _, _) in instance_list: if name == _DOM0_NAME: info["memory_dom0"] = mem info["cpu_dom0"] = vcpus # Include Dom0 in total memory usage total_instmem += mem memory_free = info.get("memory_free") memory_total = info.get("memory_total") # Calculate memory used by hypervisor if None not in [memory_total, memory_free, total_instmem]: info["memory_hv"] = memory_total - memory_free - total_instmem return info def _GetNodeInfo(info, instance_list): """Combines L{_MergeInstanceInfo} and L{_ParseNodeInfo}. @type instance_list: list of tuples @param instance_list: list of instance information; one tuple per instance """ return _MergeInstanceInfo(_ParseNodeInfo(info), instance_list) def _GetConfigFileDiskData(block_devices, blockdev_prefix, _letters=_DISK_LETTERS): """Get disk directives for Xen config file. This method builds the xen config disk directive according to the given disk_template and block_devices. @param block_devices: list of tuples (cfdev, rldev): - cfdev: dict containing ganeti config disk part - rldev: ganeti.block.bdev.BlockDev object @param blockdev_prefix: a string containing blockdevice prefix, e.g. "sd" for /dev/sda @return: string containing disk directive for xen instance config file """ if len(block_devices) > len(_letters): raise errors.HypervisorError("Too many disks") disk_data = [] for sd_suffix, (cfdev, dev_path) in zip(_letters, block_devices): sd_name = blockdev_prefix + sd_suffix if cfdev.mode == constants.DISK_RDWR: mode = "w" else: mode = "r" if cfdev.dev_type in [constants.DT_FILE, constants.DT_SHARED_FILE]: driver = _FILE_DRIVER_MAP[cfdev.physical_id[0]] else: driver = "phy" disk_data.append("'%s:%s,%s,%s'" % (driver, dev_path, sd_name, mode)) return disk_data class XenHypervisor(hv_base.BaseHypervisor): """Xen generic hypervisor interface This is the Xen base class used for both Xen PVM and HVM. It contains all the functionality that is identical for both. """ CAN_MIGRATE = True REBOOT_RETRY_COUNT = 60 REBOOT_RETRY_INTERVAL = 10 _ROOT_DIR = pathutils.RUN_DIR + "/xen-hypervisor" _NICS_DIR = _ROOT_DIR + "/nic" # contains NICs' info _DIRS = [_ROOT_DIR, _NICS_DIR] ANCILLARY_FILES = [ XEND_CONFIG_FILE, XL_CONFIG_FILE, VIF_BRIDGE_SCRIPT, ] ANCILLARY_FILES_OPT = [ XL_CONFIG_FILE, ] def __init__(self, _cfgdir=None, _run_cmd_fn=None, _cmd=None): hv_base.BaseHypervisor.__init__(self) if _cfgdir is None: self._cfgdir = pathutils.XEN_CONFIG_DIR else: self._cfgdir = _cfgdir if _run_cmd_fn is None: self._run_cmd_fn = utils.RunCmd else: self._run_cmd_fn = _run_cmd_fn self._cmd = _cmd @staticmethod def _GetCommandFromHvparams(hvparams): """Returns the Xen command extracted from the given hvparams. @type hvparams: dict of strings @param hvparams: hypervisor parameters """ if hvparams is None or constants.HV_XEN_CMD not in hvparams: raise errors.HypervisorError("Cannot determine xen command.") else: return hvparams[constants.HV_XEN_CMD] def _GetCommand(self, hvparams): """Returns Xen command to use. @type hvparams: dict of strings @param hvparams: hypervisor parameters """ if self._cmd is None: cmd = XenHypervisor._GetCommandFromHvparams(hvparams) else: cmd = self._cmd if cmd not in constants.KNOWN_XEN_COMMANDS: raise errors.ProgrammerError("Unknown Xen command '%s'" % cmd) return cmd def _RunXen(self, args, hvparams): """Wrapper around L{utils.process.RunCmd} to run Xen command. @type hvparams: dict of strings @param hvparams: dictionary of hypervisor params @see: L{utils.process.RunCmd} """ cmd = [self._GetCommand(hvparams)] cmd.extend(args) return self._run_cmd_fn(cmd) def _ConfigFileName(self, instance_name): """Get the config file name for an instance. @param instance_name: instance name @type instance_name: str @return: fully qualified path to instance config file @rtype: str """ return utils.PathJoin(self._cfgdir, instance_name) @classmethod def _WriteNICInfoFile(cls, instance_name, idx, nic): """Write the Xen config file for the instance. This version of the function just writes the config file from static data. """ dirs = [(dname, constants.RUN_DIRS_MODE) for dname in cls._DIRS + [cls._InstanceNICDir(instance_name)]] utils.EnsureDirs(dirs) cfg_file = cls._InstanceNICFile(instance_name, idx) data = StringIO() if nic.netinfo: netinfo = objects.Network.FromDict(nic.netinfo) data.write("NETWORK_NAME=%s\n" % netinfo.name) if netinfo.network: data.write("NETWORK_SUBNET=%s\n" % netinfo.network) if netinfo.gateway: data.write("NETWORK_GATEWAY=%s\n" % netinfo.gateway) if netinfo.network6: data.write("NETWORK_SUBNET6=%s\n" % netinfo.network6) if netinfo.gateway6: data.write("NETWORK_GATEWAY6=%s\n" % netinfo.gateway6) if netinfo.mac_prefix: data.write("NETWORK_MAC_PREFIX=%s\n" % netinfo.mac_prefix) if netinfo.tags: data.write("NETWORK_TAGS=%s\n" % r"\ ".join(netinfo.tags)) data.write("MAC=%s\n" % nic.mac) data.write("IP=%s\n" % nic.ip) data.write("MODE=%s\n" % nic.nicparams[constants.NIC_MODE]) data.write("LINK=%s\n" % nic.nicparams[constants.NIC_LINK]) try: utils.WriteFile(cfg_file, data=data.getvalue()) except EnvironmentError, err: raise errors.HypervisorError("Cannot write Xen instance configuration" " file %s: %s" % (cfg_file, err)) @classmethod def _InstanceNICDir(cls, instance_name): """Returns the directory holding the tap device files for a given instance. """ return utils.PathJoin(cls._NICS_DIR, instance_name) @classmethod def _InstanceNICFile(cls, instance_name, seq): """Returns the name of the file containing the tap device for a given NIC """ return utils.PathJoin(cls._InstanceNICDir(instance_name), str(seq)) @classmethod def _GetConfig(cls, instance, startup_memory, block_devices): """Build Xen configuration for an instance. """ raise NotImplementedError def _WriteConfigFile(self, instance_name, data): """Write the Xen config file for the instance. This version of the function just writes the config file from static data. """ # just in case it exists utils.RemoveFile(utils.PathJoin(self._cfgdir, "auto", instance_name)) cfg_file = self._ConfigFileName(instance_name) try: utils.WriteFile(cfg_file, data=data) except EnvironmentError, err: raise errors.HypervisorError("Cannot write Xen instance configuration" " file %s: %s" % (cfg_file, err)) def _ReadConfigFile(self, instance_name): """Returns the contents of the instance config file. """ filename = self._ConfigFileName(instance_name) try: file_content = utils.ReadFile(filename) except EnvironmentError, err: raise errors.HypervisorError("Failed to load Xen config file: %s" % err) return file_content def _RemoveConfigFile(self, instance_name): """Remove the xen configuration file. """ utils.RemoveFile(self._ConfigFileName(instance_name)) try: shutil.rmtree(self._InstanceNICDir(instance_name)) except OSError, err: if err.errno != errno.ENOENT: raise def _StashConfigFile(self, instance_name): """Move the Xen config file to the log directory and return its new path. """ old_filename = self._ConfigFileName(instance_name) base = ("%s-%s" % (instance_name, utils.TimestampForFilename())) new_filename = utils.PathJoin(pathutils.LOG_XEN_DIR, base) utils.RenameFile(old_filename, new_filename) return new_filename def _GetInstanceList(self, include_node, hvparams): """Wrapper around module level L{_GetInstanceList}. @type hvparams: dict of strings @param hvparams: hypervisor parameters to be used on this node """ return _GetInstanceList(lambda: self._RunXen(["list"], hvparams), include_node) def ListInstances(self, hvparams=None): """Get the list of running instances. """ instance_list = self._GetInstanceList(False, hvparams) names = [info[0] for info in instance_list] return names def GetInstanceInfo(self, instance_name, hvparams=None): """Get instance properties. @type instance_name: string @param instance_name: the instance name @type hvparams: dict of strings @param hvparams: the instance's hypervisor params @return: tuple (name, id, memory, vcpus, stat, times) """ instance_list = self._GetInstanceList(instance_name == _DOM0_NAME, hvparams) result = None for data in instance_list: if data[0] == instance_name: result = data break return result def GetAllInstancesInfo(self, hvparams=None): """Get properties of all instances. @type hvparams: dict of strings @param hvparams: hypervisor parameters @return: list of tuples (name, id, memory, vcpus, stat, times) """ return self._GetInstanceList(False, hvparams) def _MakeConfigFile(self, instance, startup_memory, block_devices): """Gather configuration details and write to disk. See L{_GetConfig} for arguments. """ buf = StringIO() buf.write("# Automatically generated by Ganeti. Do not edit!\n") buf.write("\n") buf.write(self._GetConfig(instance, startup_memory, block_devices)) buf.write("\n") self._WriteConfigFile(instance.name, buf.getvalue()) def StartInstance(self, instance, block_devices, startup_paused): """Start an instance. """ startup_memory = self._InstanceStartupMemory(instance) self._MakeConfigFile(instance, startup_memory, block_devices) cmd = ["create"] if startup_paused: cmd.append("-p") cmd.append(self._ConfigFileName(instance.name)) result = self._RunXen(cmd, instance.hvparams) if result.failed: # Move the Xen configuration file to the log directory to avoid # leaving a stale config file behind. stashed_config = self._StashConfigFile(instance.name) raise errors.HypervisorError("Failed to start instance %s: %s (%s). Moved" " config file to %s" % (instance.name, result.fail_reason, result.output, stashed_config)) def StopInstance(self, instance, force=False, retry=False, name=None): """Stop an instance. """ if name is None: name = instance.name return self._StopInstance(name, force, instance.hvparams) def _ShutdownInstance(self, name, hvparams): """Shutdown an instance if the instance is running. @type name: string @param name: name of the instance to stop @type hvparams: dict of string @param hvparams: hypervisor parameters of the instance The '-w' flag waits for shutdown to complete which avoids the need to poll in the case where we want to destroy the domain immediately after shutdown. """ instance_info = self.GetInstanceInfo(name, hvparams=hvparams) if instance_info is None or _IsInstanceShutdown(instance_info[4]): logging.info("Failed to shutdown instance %s, not running", name) return None return self._RunXen(["shutdown", "-w", name], hvparams) def _DestroyInstance(self, name, hvparams): """Destroy an instance if the instance if the instance exists. @type name: string @param name: name of the instance to destroy @type hvparams: dict of string @param hvparams: hypervisor parameters of the instance """ instance_info = self.GetInstanceInfo(name, hvparams=hvparams) if instance_info is None: logging.info("Failed to destroy instance %s, does not exist", name) return None return self._RunXen(["destroy", name], hvparams) def _StopInstance(self, name, force, hvparams): """Stop an instance. @type name: string @param name: name of the instance to destroy @type force: boolean @param force: whether to do a "hard" stop (destroy) @type hvparams: dict of string @param hvparams: hypervisor parameters of the instance """ if force: result = self._DestroyInstance(name, hvparams) else: self._ShutdownInstance(name, hvparams) result = self._DestroyInstance(name, hvparams) if result is not None and result.failed and \ self.GetInstanceInfo(name, hvparams=hvparams) is not None: raise errors.HypervisorError("Failed to stop instance %s: %s, %s" % (name, result.fail_reason, result.output)) # Remove configuration file if stopping/starting instance was successful self._RemoveConfigFile(name) def RebootInstance(self, instance): """Reboot an instance. """ ini_info = self.GetInstanceInfo(instance.name, hvparams=instance.hvparams) if ini_info is None: raise errors.HypervisorError("Failed to reboot instance %s," " not running" % instance.name) result = self._RunXen(["reboot", instance.name], instance.hvparams) if result.failed: raise errors.HypervisorError("Failed to reboot instance %s: %s, %s" % (instance.name, result.fail_reason, result.output)) def _CheckInstance(): new_info = self.GetInstanceInfo(instance.name, hvparams=instance.hvparams) # check if the domain ID has changed or the run time has decreased if (new_info is not None and (new_info[1] != ini_info[1] or new_info[5] < ini_info[5])): return raise utils.RetryAgain() try: utils.Retry(_CheckInstance, self.REBOOT_RETRY_INTERVAL, self.REBOOT_RETRY_INTERVAL * self.REBOOT_RETRY_COUNT) except utils.RetryTimeout: raise errors.HypervisorError("Failed to reboot instance %s: instance" " did not reboot in the expected interval" % (instance.name, )) def BalloonInstanceMemory(self, instance, mem): """Balloon an instance memory to a certain value. @type instance: L{objects.Instance} @param instance: instance to be accepted @type mem: int @param mem: actual memory size to use for instance runtime """ result = self._RunXen(["mem-set", instance.name, mem], instance.hvparams) if result.failed: raise errors.HypervisorError("Failed to balloon instance %s: %s (%s)" % (instance.name, result.fail_reason, result.output)) # Update configuration file cmd = ["sed", "-ie", "s/^memory.*$/memory = %s/" % mem] cmd.append(self._ConfigFileName(instance.name)) result = utils.RunCmd(cmd) if result.failed: raise errors.HypervisorError("Failed to update memory for %s: %s (%s)" % (instance.name, result.fail_reason, result.output)) def GetNodeInfo(self, hvparams=None): """Return information about the node. @see: L{_GetNodeInfo} and L{_ParseNodeInfo} """ result = self._RunXen(["info"], hvparams) if result.failed: logging.error("Can't retrieve xen hypervisor information (%s): %s", result.fail_reason, result.output) return None instance_list = self._GetInstanceList(True, hvparams) return _GetNodeInfo(result.stdout, instance_list) @classmethod def GetInstanceConsole(cls, instance, primary_node, hvparams, beparams): """Return a command for connecting to the console of an instance. """ xen_cmd = XenHypervisor._GetCommandFromHvparams(hvparams) return objects.InstanceConsole(instance=instance.name, kind=constants.CONS_SSH, host=primary_node.name, user=constants.SSH_CONSOLE_USER, command=[pathutils.XEN_CONSOLE_WRAPPER, xen_cmd, instance.name]) def Verify(self, hvparams=None): """Verify the hypervisor. For Xen, this verifies that the xend process is running. @type hvparams: dict of strings @param hvparams: hypervisor parameters to be verified against @return: Problem description if something is wrong, C{None} otherwise """ if hvparams is None: return "Could not verify the hypervisor, because no hvparams were" \ " provided." if constants.HV_XEN_CMD in hvparams: xen_cmd = hvparams[constants.HV_XEN_CMD] try: self._CheckToolstack(xen_cmd) except errors.HypervisorError: return "The configured xen toolstack '%s' is not available on this" \ " node." % xen_cmd result = self._RunXen(["info"], hvparams) if result.failed: return "Retrieving information from xen failed: %s, %s" % \ (result.fail_reason, result.output) return None def MigrationInfo(self, instance): """Get instance information to perform a migration. @type instance: L{objects.Instance} @param instance: instance to be migrated @rtype: string @return: content of the xen config file """ return self._ReadConfigFile(instance.name) def AcceptInstance(self, instance, info, target): """Prepare to accept an instance. @type instance: L{objects.Instance} @param instance: instance to be accepted @type info: string @param info: content of the xen config file on the source node @type target: string @param target: target host (usually ip), on this node """ pass def FinalizeMigrationDst(self, instance, info, success): """Finalize an instance migration. After a successful migration we write the xen config file. We do nothing on a failure, as we did not change anything at accept time. @type instance: L{objects.Instance} @param instance: instance whose migration is being finalized @type info: string @param info: content of the xen config file on the source node @type success: boolean @param success: whether the migration was a success or a failure """ if success: self._WriteConfigFile(instance.name, info) def MigrateInstance(self, cluster_name, instance, target, live): """Migrate an instance to a target node. The migration will not be attempted if the instance is not currently running. @type instance: L{objects.Instance} @param instance: the instance to be migrated @type target: string @param target: ip address of the target node @type live: boolean @param live: perform a live migration """ port = instance.hvparams[constants.HV_MIGRATION_PORT] return self._MigrateInstance(cluster_name, instance.name, target, port, live, instance.hvparams) def _MigrateInstance(self, cluster_name, instance_name, target, port, live, hvparams, _ping_fn=netutils.TcpPing): """Migrate an instance to a target node. @see: L{MigrateInstance} for details """ if hvparams is None: raise errors.HypervisorError("No hvparams provided.") if self.GetInstanceInfo(instance_name, hvparams=hvparams) is None: raise errors.HypervisorError("Instance not running, cannot migrate") cmd = self._GetCommand(hvparams) if (cmd == constants.XEN_CMD_XM and not _ping_fn(target, port, live_port_needed=True)): raise errors.HypervisorError("Remote host %s not listening on port" " %s, cannot migrate" % (target, port)) args = ["migrate"] if cmd == constants.XEN_CMD_XM: args.extend(["-p", "%d" % port]) if live: args.append("-l") elif cmd == constants.XEN_CMD_XL: args.extend([ "-s", constants.XL_SSH_CMD % cluster_name, "-C", self._ConfigFileName(instance_name), ]) else: raise errors.HypervisorError("Unsupported Xen command: %s" % self._cmd) args.extend([instance_name, target]) result = self._RunXen(args, hvparams) if result.failed: raise errors.HypervisorError("Failed to migrate instance %s: %s" % (instance_name, result.output)) def FinalizeMigrationSource(self, instance, success, live): """Finalize the instance migration on the source node. @type instance: L{objects.Instance} @param instance: the instance that was migrated @type success: bool @param success: whether the migration succeeded or not @type live: bool @param live: whether the user requested a live migration or not """ # pylint: disable=W0613 if success: # remove old xen file after migration succeeded try: self._RemoveConfigFile(instance.name) except EnvironmentError: logging.exception("Failure while removing instance config file") def GetMigrationStatus(self, instance): """Get the migration status As MigrateInstance for Xen is still blocking, if this method is called it means that MigrateInstance has completed successfully. So we can safely assume that the migration was successful and notify this fact to the client. @type instance: L{objects.Instance} @param instance: the instance that is being migrated @rtype: L{objects.MigrationStatus} @return: the status of the current migration (one of L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional progress info that can be retrieved from the hypervisor """ return objects.MigrationStatus(status=constants.HV_MIGRATION_COMPLETED) def PowercycleNode(self, hvparams=None): """Xen-specific powercycle. This first does a Linux reboot (which triggers automatically a Xen reboot), and if that fails it tries to do a Xen reboot. The reason we don't try a Xen reboot first is that the xen reboot launches an external command which connects to the Xen hypervisor, and that won't work in case the root filesystem is broken and/or the xend daemon is not working. @type hvparams: dict of strings @param hvparams: hypervisor params to be used on this node """ try: self.LinuxPowercycle() finally: xen_cmd = self._GetCommand(hvparams) utils.RunCmd([xen_cmd, "debug", "R"]) def _CheckToolstack(self, xen_cmd): """Check whether the given toolstack is available on the node. @type xen_cmd: string @param xen_cmd: xen command (e.g. 'xm' or 'xl') """ binary_found = self._CheckToolstackBinary(xen_cmd) if not binary_found: raise errors.HypervisorError("No '%s' binary found on node." % xen_cmd) elif xen_cmd == constants.XEN_CMD_XL: if not self._CheckToolstackXlConfigured(): raise errors.HypervisorError("Toolstack '%s' is not enabled on this" "node." % xen_cmd) def _CheckToolstackBinary(self, xen_cmd): """Checks whether the xen command's binary is found on the machine. """ if xen_cmd not in constants.KNOWN_XEN_COMMANDS: raise errors.HypervisorError("Unknown xen command '%s'." % xen_cmd) result = self._run_cmd_fn(["which", xen_cmd]) return not result.failed def _CheckToolstackXlConfigured(self): """Checks whether xl is enabled on an xl-capable node. @rtype: bool @returns: C{True} if 'xl' is enabled, C{False} otherwise """ result = self._run_cmd_fn([constants.XEN_CMD_XL, "help"]) if not result.failed: return True elif result.failed: if "toolstack" in result.stderr: return False # xl fails for some other reason than the toolstack else: raise errors.HypervisorError("Cannot run xen ('%s'). Error: %s." % (constants.XEN_CMD_XL, result.stderr)) class XenPvmHypervisor(XenHypervisor): """Xen PVM hypervisor interface""" PARAMETERS = { constants.HV_USE_BOOTLOADER: hv_base.NO_CHECK, constants.HV_BOOTLOADER_PATH: hv_base.OPT_FILE_CHECK, constants.HV_BOOTLOADER_ARGS: hv_base.NO_CHECK, constants.HV_KERNEL_PATH: hv_base.REQ_FILE_CHECK, constants.HV_INITRD_PATH: hv_base.OPT_FILE_CHECK, constants.HV_ROOT_PATH: hv_base.NO_CHECK, constants.HV_KERNEL_ARGS: hv_base.NO_CHECK, constants.HV_MIGRATION_PORT: hv_base.REQ_NET_PORT_CHECK, constants.HV_MIGRATION_MODE: hv_base.MIGRATION_MODE_CHECK, # TODO: Add a check for the blockdev prefix (matching [a-z:] or similar). constants.HV_BLOCKDEV_PREFIX: hv_base.NO_CHECK, constants.HV_REBOOT_BEHAVIOR: hv_base.ParamInSet(True, constants.REBOOT_BEHAVIORS), constants.HV_CPU_MASK: hv_base.OPT_MULTI_CPU_MASK_CHECK, constants.HV_CPU_CAP: hv_base.OPT_NONNEGATIVE_INT_CHECK, constants.HV_CPU_WEIGHT: (False, lambda x: 0 < x < 65536, "invalid weight", None, None), constants.HV_VIF_SCRIPT: hv_base.OPT_FILE_CHECK, constants.HV_XEN_CMD: hv_base.ParamInSet(True, constants.KNOWN_XEN_COMMANDS), } def _GetConfig(self, instance, startup_memory, block_devices): """Write the Xen config file for the instance. """ hvp = instance.hvparams config = StringIO() config.write("# this is autogenerated by Ganeti, please do not edit\n#\n") # if bootloader is True, use bootloader instead of kernel and ramdisk # parameters. if hvp[constants.HV_USE_BOOTLOADER]: # bootloader handling bootloader_path = hvp[constants.HV_BOOTLOADER_PATH] if bootloader_path: config.write("bootloader = '%s'\n" % bootloader_path) else: raise errors.HypervisorError("Bootloader enabled, but missing" " bootloader path") bootloader_args = hvp[constants.HV_BOOTLOADER_ARGS] if bootloader_args: config.write("bootargs = '%s'\n" % bootloader_args) else: # kernel handling kpath = hvp[constants.HV_KERNEL_PATH] config.write("kernel = '%s'\n" % kpath) # initrd handling initrd_path = hvp[constants.HV_INITRD_PATH] if initrd_path: config.write("ramdisk = '%s'\n" % initrd_path) # rest of the settings config.write("memory = %d\n" % startup_memory) config.write("maxmem = %d\n" % instance.beparams[constants.BE_MAXMEM]) config.write("vcpus = %d\n" % instance.beparams[constants.BE_VCPUS]) cpu_pinning = _CreateConfigCpus(hvp[constants.HV_CPU_MASK]) if cpu_pinning: config.write("%s\n" % cpu_pinning) cpu_cap = hvp[constants.HV_CPU_CAP] if cpu_cap: config.write("cpu_cap=%d\n" % cpu_cap) cpu_weight = hvp[constants.HV_CPU_WEIGHT] if cpu_weight: config.write("cpu_weight=%d\n" % cpu_weight) config.write("name = '%s'\n" % instance.name) vif_data = [] for idx, nic in enumerate(instance.nics): nic_str = "mac=%s" % (nic.mac) ip = getattr(nic, "ip", None) if ip is not None: nic_str += ", ip=%s" % ip if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: nic_str += ", bridge=%s" % nic.nicparams[constants.NIC_LINK] if hvp[constants.HV_VIF_SCRIPT]: nic_str += ", script=%s" % hvp[constants.HV_VIF_SCRIPT] vif_data.append("'%s'" % nic_str) self._WriteNICInfoFile(instance.name, idx, nic) disk_data = \ _GetConfigFileDiskData(block_devices, hvp[constants.HV_BLOCKDEV_PREFIX]) config.write("vif = [%s]\n" % ",".join(vif_data)) config.write("disk = [%s]\n" % ",".join(disk_data)) if hvp[constants.HV_ROOT_PATH]: config.write("root = '%s'\n" % hvp[constants.HV_ROOT_PATH]) config.write("on_poweroff = 'destroy'\n") if hvp[constants.HV_REBOOT_BEHAVIOR] == constants.INSTANCE_REBOOT_ALLOWED: config.write("on_reboot = 'restart'\n") else: config.write("on_reboot = 'destroy'\n") config.write("on_crash = 'restart'\n") config.write("extra = '%s'\n" % hvp[constants.HV_KERNEL_ARGS]) return config.getvalue() class XenHvmHypervisor(XenHypervisor): """Xen HVM hypervisor interface""" ANCILLARY_FILES = XenHypervisor.ANCILLARY_FILES + [ pathutils.VNC_PASSWORD_FILE, ] ANCILLARY_FILES_OPT = XenHypervisor.ANCILLARY_FILES_OPT + [ pathutils.VNC_PASSWORD_FILE, ] PARAMETERS = { constants.HV_ACPI: hv_base.NO_CHECK, constants.HV_BOOT_ORDER: (True, ) + (lambda x: x and len(x.strip("acdn")) == 0, "Invalid boot order specified, must be one or more of [acdn]", None, None), constants.HV_CDROM_IMAGE_PATH: hv_base.OPT_FILE_CHECK, constants.HV_DISK_TYPE: hv_base.ParamInSet(True, constants.HT_HVM_VALID_DISK_TYPES), constants.HV_NIC_TYPE: hv_base.ParamInSet(True, constants.HT_HVM_VALID_NIC_TYPES), constants.HV_PAE: hv_base.NO_CHECK, constants.HV_VNC_BIND_ADDRESS: (False, netutils.IP4Address.IsValid, "VNC bind address is not a valid IP address", None, None), constants.HV_KERNEL_PATH: hv_base.REQ_FILE_CHECK, constants.HV_DEVICE_MODEL: hv_base.REQ_FILE_CHECK, constants.HV_VNC_PASSWORD_FILE: hv_base.REQ_FILE_CHECK, constants.HV_MIGRATION_PORT: hv_base.REQ_NET_PORT_CHECK, constants.HV_MIGRATION_MODE: hv_base.MIGRATION_MODE_CHECK, constants.HV_USE_LOCALTIME: hv_base.NO_CHECK, # TODO: Add a check for the blockdev prefix (matching [a-z:] or similar). constants.HV_BLOCKDEV_PREFIX: hv_base.NO_CHECK, # Add PCI passthrough constants.HV_PASSTHROUGH: hv_base.NO_CHECK, constants.HV_REBOOT_BEHAVIOR: hv_base.ParamInSet(True, constants.REBOOT_BEHAVIORS), constants.HV_CPU_MASK: hv_base.OPT_MULTI_CPU_MASK_CHECK, constants.HV_CPU_CAP: hv_base.NO_CHECK, constants.HV_CPU_WEIGHT: (False, lambda x: 0 < x < 65535, "invalid weight", None, None), constants.HV_VIF_TYPE: hv_base.ParamInSet(False, constants.HT_HVM_VALID_VIF_TYPES), constants.HV_VIF_SCRIPT: hv_base.OPT_FILE_CHECK, constants.HV_VIRIDIAN: hv_base.NO_CHECK, constants.HV_XEN_CMD: hv_base.ParamInSet(True, constants.KNOWN_XEN_COMMANDS), } def _GetConfig(self, instance, startup_memory, block_devices): """Create a Xen 3.1 HVM config file. """ hvp = instance.hvparams config = StringIO() # kernel handling kpath = hvp[constants.HV_KERNEL_PATH] config.write("kernel = '%s'\n" % kpath) config.write("builder = 'hvm'\n") config.write("memory = %d\n" % startup_memory) config.write("maxmem = %d\n" % instance.beparams[constants.BE_MAXMEM]) config.write("vcpus = %d\n" % instance.beparams[constants.BE_VCPUS]) cpu_pinning = _CreateConfigCpus(hvp[constants.HV_CPU_MASK]) if cpu_pinning: config.write("%s\n" % cpu_pinning) cpu_cap = hvp[constants.HV_CPU_CAP] if cpu_cap: config.write("cpu_cap=%d\n" % cpu_cap) cpu_weight = hvp[constants.HV_CPU_WEIGHT] if cpu_weight: config.write("cpu_weight=%d\n" % cpu_weight) config.write("name = '%s'\n" % instance.name) if hvp[constants.HV_PAE]: config.write("pae = 1\n") else: config.write("pae = 0\n") if hvp[constants.HV_ACPI]: config.write("acpi = 1\n") else: config.write("acpi = 0\n") if hvp[constants.HV_VIRIDIAN]: config.write("viridian = 1\n") else: config.write("viridian = 0\n") config.write("apic = 1\n") config.write("device_model = '%s'\n" % hvp[constants.HV_DEVICE_MODEL]) config.write("boot = '%s'\n" % hvp[constants.HV_BOOT_ORDER]) config.write("sdl = 0\n") config.write("usb = 1\n") config.write("usbdevice = 'tablet'\n") config.write("vnc = 1\n") if hvp[constants.HV_VNC_BIND_ADDRESS] is None: config.write("vnclisten = '%s'\n" % constants.VNC_DEFAULT_BIND_ADDRESS) else: config.write("vnclisten = '%s'\n" % hvp[constants.HV_VNC_BIND_ADDRESS]) if instance.network_port > constants.VNC_BASE_PORT: display = instance.network_port - constants.VNC_BASE_PORT config.write("vncdisplay = %s\n" % display) config.write("vncunused = 0\n") else: config.write("# vncdisplay = 1\n") config.write("vncunused = 1\n") vnc_pwd_file = hvp[constants.HV_VNC_PASSWORD_FILE] try: password = utils.ReadFile(vnc_pwd_file) except EnvironmentError, err: raise errors.HypervisorError("Failed to open VNC password file %s: %s" % (vnc_pwd_file, err)) config.write("vncpasswd = '%s'\n" % password.rstrip()) config.write("serial = 'pty'\n") if hvp[constants.HV_USE_LOCALTIME]: config.write("localtime = 1\n") vif_data = [] # Note: what is called 'nic_type' here, is used as value for the xen nic # vif config parameter 'model'. For the xen nic vif parameter 'type', we use # the 'vif_type' to avoid a clash of notation. nic_type = hvp[constants.HV_NIC_TYPE] if nic_type is None: vif_type_str = "" if hvp[constants.HV_VIF_TYPE]: vif_type_str = ", type=%s" % hvp[constants.HV_VIF_TYPE] # ensure old instances don't change nic_type_str = vif_type_str elif nic_type == constants.HT_NIC_PARAVIRTUAL: nic_type_str = ", type=paravirtualized" else: # parameter 'model' is only valid with type 'ioemu' nic_type_str = ", model=%s, type=%s" % \ (nic_type, constants.HT_HVM_VIF_IOEMU) for idx, nic in enumerate(instance.nics): nic_str = "mac=%s%s" % (nic.mac, nic_type_str) ip = getattr(nic, "ip", None) if ip is not None: nic_str += ", ip=%s" % ip if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: nic_str += ", bridge=%s" % nic.nicparams[constants.NIC_LINK] if hvp[constants.HV_VIF_SCRIPT]: nic_str += ", script=%s" % hvp[constants.HV_VIF_SCRIPT] vif_data.append("'%s'" % nic_str) self._WriteNICInfoFile(instance.name, idx, nic) config.write("vif = [%s]\n" % ",".join(vif_data)) disk_data = \ _GetConfigFileDiskData(block_devices, hvp[constants.HV_BLOCKDEV_PREFIX]) iso_path = hvp[constants.HV_CDROM_IMAGE_PATH] if iso_path: iso = "'file:%s,hdc:cdrom,r'" % iso_path disk_data.append(iso) config.write("disk = [%s]\n" % (",".join(disk_data))) # Add PCI passthrough pci_pass_arr = [] pci_pass = hvp[constants.HV_PASSTHROUGH] if pci_pass: pci_pass_arr = pci_pass.split(";") config.write("pci = %s\n" % pci_pass_arr) config.write("on_poweroff = 'destroy'\n") if hvp[constants.HV_REBOOT_BEHAVIOR] == constants.INSTANCE_REBOOT_ALLOWED: config.write("on_reboot = 'restart'\n") else: config.write("on_reboot = 'destroy'\n") config.write("on_crash = 'restart'\n") return config.getvalue() ganeti-2.9.3/lib/hypervisor/__init__.py0000644000000000000000000000376612230001635020067 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Virtualization interface abstraction """ from ganeti import constants from ganeti import errors from ganeti.hypervisor import hv_fake from ganeti.hypervisor import hv_xen from ganeti.hypervisor import hv_kvm from ganeti.hypervisor import hv_chroot from ganeti.hypervisor import hv_lxc _HYPERVISOR_MAP = { constants.HT_XEN_PVM: hv_xen.XenPvmHypervisor, constants.HT_XEN_HVM: hv_xen.XenHvmHypervisor, constants.HT_FAKE: hv_fake.FakeHypervisor, constants.HT_KVM: hv_kvm.KVMHypervisor, constants.HT_CHROOT: hv_chroot.ChrootManager, constants.HT_LXC: hv_lxc.LXCHypervisor, } def GetHypervisorClass(ht_kind): """Return a Hypervisor class. This function returns the hypervisor class corresponding to the given hypervisor name. @type ht_kind: string @param ht_kind: The requested hypervisor type """ if ht_kind not in _HYPERVISOR_MAP: raise errors.HypervisorError("Unknown hypervisor type '%s'" % ht_kind) cls = _HYPERVISOR_MAP[ht_kind] return cls def GetHypervisor(ht_kind): """Return a Hypervisor instance. This is a wrapper over L{GetHypervisorClass} which returns an instance of the class. @type ht_kind: string @param ht_kind: The requested hypervisor type """ cls = GetHypervisorClass(ht_kind) return cls() ganeti-2.9.3/lib/hypervisor/hv_kvm.py0000644000000000000000000024260712271422343017631 0ustar00rootroot00000000000000# # # Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """KVM hypervisor """ import errno import os import os.path import re import tempfile import time import logging import pwd import struct import fcntl import shutil import socket import stat import StringIO try: import affinity # pylint: disable=F0401 except ImportError: affinity = None from ganeti import utils from ganeti import constants from ganeti import errors from ganeti import serializer from ganeti import objects from ganeti import uidpool from ganeti import ssconf from ganeti import netutils from ganeti import pathutils from ganeti.hypervisor import hv_base from ganeti.utils import wrapper as utils_wrapper _KVM_NETWORK_SCRIPT = pathutils.CONF_DIR + "/kvm-vif-bridge" _KVM_START_PAUSED_FLAG = "-S" # TUN/TAP driver constants, taken from # They are architecture-independent and already hardcoded in qemu-kvm source, # so we can safely include them here. TUNSETIFF = 0x400454ca TUNGETIFF = 0x800454d2 TUNGETFEATURES = 0x800454cf IFF_TAP = 0x0002 IFF_NO_PI = 0x1000 IFF_VNET_HDR = 0x4000 #: SPICE parameters which depend on L{constants.HV_KVM_SPICE_BIND} _SPICE_ADDITIONAL_PARAMS = frozenset([ constants.HV_KVM_SPICE_IP_VERSION, constants.HV_KVM_SPICE_PASSWORD_FILE, constants.HV_KVM_SPICE_LOSSLESS_IMG_COMPR, constants.HV_KVM_SPICE_JPEG_IMG_COMPR, constants.HV_KVM_SPICE_ZLIB_GLZ_IMG_COMPR, constants.HV_KVM_SPICE_STREAMING_VIDEO_DETECTION, constants.HV_KVM_SPICE_USE_TLS, ]) def _GetTunFeatures(fd, _ioctl=fcntl.ioctl): """Retrieves supported TUN features from file descriptor. @see: L{_ProbeTapVnetHdr} """ req = struct.pack("I", 0) try: buf = _ioctl(fd, TUNGETFEATURES, req) except EnvironmentError, err: logging.warning("ioctl(TUNGETFEATURES) failed: %s", err) return None else: (flags, ) = struct.unpack("I", buf) return flags def _ProbeTapVnetHdr(fd, _features_fn=_GetTunFeatures): """Check whether to enable the IFF_VNET_HDR flag. To do this, _all_ of the following conditions must be met: 1. TUNGETFEATURES ioctl() *must* be implemented 2. TUNGETFEATURES ioctl() result *must* contain the IFF_VNET_HDR flag 3. TUNGETIFF ioctl() *must* be implemented; reading the kernel code in drivers/net/tun.c there is no way to test this until after the tap device has been created using TUNSETIFF, and there is no way to change the IFF_VNET_HDR flag after creating the interface, catch-22! However both TUNGETIFF and TUNGETFEATURES were introduced in kernel version 2.6.27, thus we can expect TUNGETIFF to be present if TUNGETFEATURES is. @type fd: int @param fd: the file descriptor of /dev/net/tun """ flags = _features_fn(fd) if flags is None: # Not supported return False result = bool(flags & IFF_VNET_HDR) if not result: logging.warning("Kernel does not support IFF_VNET_HDR, not enabling") return result def _OpenTap(vnet_hdr=True): """Open a new tap device and return its file descriptor. This is intended to be used by a qemu-type hypervisor together with the -net tap,fd= command line parameter. @type vnet_hdr: boolean @param vnet_hdr: Enable the VNET Header @return: (ifname, tapfd) @rtype: tuple """ try: tapfd = os.open("/dev/net/tun", os.O_RDWR) except EnvironmentError: raise errors.HypervisorError("Failed to open /dev/net/tun") flags = IFF_TAP | IFF_NO_PI if vnet_hdr and _ProbeTapVnetHdr(tapfd): flags |= IFF_VNET_HDR # The struct ifreq ioctl request (see netdevice(7)) ifr = struct.pack("16sh", "", flags) try: res = fcntl.ioctl(tapfd, TUNSETIFF, ifr) except EnvironmentError, err: raise errors.HypervisorError("Failed to allocate a new TAP device: %s" % err) # Get the interface name from the ioctl ifname = struct.unpack("16sh", res)[0].strip("\x00") return (ifname, tapfd) class QmpMessage: """QEMU Messaging Protocol (QMP) message. """ def __init__(self, data): """Creates a new QMP message based on the passed data. """ if not isinstance(data, dict): raise TypeError("QmpMessage must be initialized with a dict") self.data = data def __getitem__(self, field_name): """Get the value of the required field if present, or None. Overrides the [] operator to provide access to the message data, returning None if the required item is not in the message @return: the value of the field_name field, or None if field_name is not contained in the message """ return self.data.get(field_name, None) def __setitem__(self, field_name, field_value): """Set the value of the required field_name to field_value. """ self.data[field_name] = field_value def __len__(self): """Return the number of fields stored in this QmpMessage. """ return len(self.data) def __delitem__(self, key): """Delete the specified element from the QmpMessage. """ del(self.data[key]) @staticmethod def BuildFromJsonString(json_string): """Build a QmpMessage from a JSON encoded string. @type json_string: str @param json_string: JSON string representing the message @rtype: L{QmpMessage} @return: a L{QmpMessage} built from json_string """ # Parse the string data = serializer.LoadJson(json_string) return QmpMessage(data) def __str__(self): # The protocol expects the JSON object to be sent as a single line. return serializer.DumpJson(self.data) def __eq__(self, other): # When comparing two QmpMessages, we are interested in comparing # their internal representation of the message data return self.data == other.data class QmpConnection: """Connection to the QEMU Monitor using the QEMU Monitor Protocol (QMP). """ _FIRST_MESSAGE_KEY = "QMP" _EVENT_KEY = "event" _ERROR_KEY = "error" _RETURN_KEY = RETURN_KEY = "return" _ACTUAL_KEY = ACTUAL_KEY = "actual" _ERROR_CLASS_KEY = "class" _ERROR_DESC_KEY = "desc" _EXECUTE_KEY = "execute" _ARGUMENTS_KEY = "arguments" _CAPABILITIES_COMMAND = "qmp_capabilities" _MESSAGE_END_TOKEN = "\r\n" _SOCKET_TIMEOUT = 5 def __init__(self, monitor_filename): """Instantiates the QmpConnection object. @type monitor_filename: string @param monitor_filename: the filename of the UNIX raw socket on which the QMP monitor is listening """ self.monitor_filename = monitor_filename self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) # We want to fail if the server doesn't send a complete message # in a reasonable amount of time self.sock.settimeout(self._SOCKET_TIMEOUT) self._connected = False self._buf = "" def _check_socket(self): sock_stat = None try: sock_stat = os.stat(self.monitor_filename) except EnvironmentError, err: if err.errno == errno.ENOENT: raise errors.HypervisorError("No qmp socket found") else: raise errors.HypervisorError("Error checking qmp socket: %s", utils.ErrnoOrStr(err)) if not stat.S_ISSOCK(sock_stat.st_mode): raise errors.HypervisorError("Qmp socket is not a socket") def _check_connection(self): """Make sure that the connection is established. """ if not self._connected: raise errors.ProgrammerError("To use a QmpConnection you need to first" " invoke connect() on it") def connect(self): """Connects to the QMP monitor. Connects to the UNIX socket and makes sure that we can actually send and receive data to the kvm instance via QMP. @raise errors.HypervisorError: when there are communication errors @raise errors.ProgrammerError: when there are data serialization errors """ if self._connected: raise errors.ProgrammerError("Cannot connect twice") self._check_socket() # Check file existance/stuff try: self.sock.connect(self.monitor_filename) except EnvironmentError: raise errors.HypervisorError("Can't connect to qmp socket") self._connected = True # Check if we receive a correct greeting message from the server # (As per the QEMU Protocol Specification 0.1 - section 2.2) greeting = self._Recv() if not greeting[self._FIRST_MESSAGE_KEY]: self._connected = False raise errors.HypervisorError("kvm: QMP communication error (wrong" " server greeting") # Let's put the monitor in command mode using the qmp_capabilities # command, or else no command will be executable. # (As per the QEMU Protocol Specification 0.1 - section 4) self.Execute(self._CAPABILITIES_COMMAND) def _ParseMessage(self, buf): """Extract and parse a QMP message from the given buffer. Seeks for a QMP message in the given buf. If found, it parses it and returns it together with the rest of the characters in the buf. If no message is found, returns None and the whole buffer. @raise errors.ProgrammerError: when there are data serialization errors """ message = None # Check if we got the message end token (CRLF, as per the QEMU Protocol # Specification 0.1 - Section 2.1.1) pos = buf.find(self._MESSAGE_END_TOKEN) if pos >= 0: try: message = QmpMessage.BuildFromJsonString(buf[:pos + 1]) except Exception, err: raise errors.ProgrammerError("QMP data serialization error: %s" % err) buf = buf[pos + 1:] return (message, buf) def _Recv(self): """Receives a message from QMP and decodes the received JSON object. @rtype: QmpMessage @return: the received message @raise errors.HypervisorError: when there are communication errors @raise errors.ProgrammerError: when there are data serialization errors """ self._check_connection() # Check if there is already a message in the buffer (message, self._buf) = self._ParseMessage(self._buf) if message: return message recv_buffer = StringIO.StringIO(self._buf) recv_buffer.seek(len(self._buf)) try: while True: data = self.sock.recv(4096) if not data: break recv_buffer.write(data) (message, self._buf) = self._ParseMessage(recv_buffer.getvalue()) if message: return message except socket.timeout, err: raise errors.HypervisorError("Timeout while receiving a QMP message: " "%s" % (err)) except socket.error, err: raise errors.HypervisorError("Unable to receive data from KVM using the" " QMP protocol: %s" % err) def _Send(self, message): """Encodes and sends a message to KVM using QMP. @type message: QmpMessage @param message: message to send to KVM @raise errors.HypervisorError: when there are communication errors @raise errors.ProgrammerError: when there are data serialization errors """ self._check_connection() try: message_str = str(message) except Exception, err: raise errors.ProgrammerError("QMP data deserialization error: %s" % err) try: self.sock.sendall(message_str) except socket.timeout, err: raise errors.HypervisorError("Timeout while sending a QMP message: " "%s (%s)" % (err.string, err.errno)) except socket.error, err: raise errors.HypervisorError("Unable to send data from KVM using the" " QMP protocol: %s" % err) def Execute(self, command, arguments=None): """Executes a QMP command and returns the response of the server. @type command: str @param command: the command to execute @type arguments: dict @param arguments: dictionary of arguments to be passed to the command @rtype: dict @return: dictionary representing the received JSON object @raise errors.HypervisorError: when there are communication errors @raise errors.ProgrammerError: when there are data serialization errors """ self._check_connection() message = QmpMessage({self._EXECUTE_KEY: command}) if arguments: message[self._ARGUMENTS_KEY] = arguments self._Send(message) # Events can occur between the sending of the command and the reception # of the response, so we need to filter out messages with the event key. while True: response = self._Recv() err = response[self._ERROR_KEY] if err: raise errors.HypervisorError("kvm: error executing the %s" " command: %s (%s):" % (command, err[self._ERROR_DESC_KEY], err[self._ERROR_CLASS_KEY])) elif not response[self._EVENT_KEY]: return response class KVMHypervisor(hv_base.BaseHypervisor): """KVM hypervisor interface """ CAN_MIGRATE = True _ROOT_DIR = pathutils.RUN_DIR + "/kvm-hypervisor" _PIDS_DIR = _ROOT_DIR + "/pid" # contains live instances pids _UIDS_DIR = _ROOT_DIR + "/uid" # contains instances reserved uids _CTRL_DIR = _ROOT_DIR + "/ctrl" # contains instances control sockets _CONF_DIR = _ROOT_DIR + "/conf" # contains instances startup data _NICS_DIR = _ROOT_DIR + "/nic" # contains instances nic <-> tap associations _KEYMAP_DIR = _ROOT_DIR + "/keymap" # contains instances keymaps # KVM instances with chroot enabled are started in empty chroot directories. _CHROOT_DIR = _ROOT_DIR + "/chroot" # for empty chroot directories # After an instance is stopped, its chroot directory is removed. # If the chroot directory is not empty, it can't be removed. # A non-empty chroot directory indicates a possible security incident. # To support forensics, the non-empty chroot directory is quarantined in # a separate directory, called 'chroot-quarantine'. _CHROOT_QUARANTINE_DIR = _ROOT_DIR + "/chroot-quarantine" _DIRS = [_ROOT_DIR, _PIDS_DIR, _UIDS_DIR, _CTRL_DIR, _CONF_DIR, _NICS_DIR, _CHROOT_DIR, _CHROOT_QUARANTINE_DIR, _KEYMAP_DIR] PARAMETERS = { constants.HV_KVM_PATH: hv_base.REQ_FILE_CHECK, constants.HV_KERNEL_PATH: hv_base.OPT_FILE_CHECK, constants.HV_INITRD_PATH: hv_base.OPT_FILE_CHECK, constants.HV_ROOT_PATH: hv_base.NO_CHECK, constants.HV_KERNEL_ARGS: hv_base.NO_CHECK, constants.HV_ACPI: hv_base.NO_CHECK, constants.HV_SERIAL_CONSOLE: hv_base.NO_CHECK, constants.HV_SERIAL_SPEED: hv_base.NO_CHECK, constants.HV_VNC_BIND_ADDRESS: hv_base.NO_CHECK, # will be checked later constants.HV_VNC_TLS: hv_base.NO_CHECK, constants.HV_VNC_X509: hv_base.OPT_DIR_CHECK, constants.HV_VNC_X509_VERIFY: hv_base.NO_CHECK, constants.HV_VNC_PASSWORD_FILE: hv_base.OPT_FILE_CHECK, constants.HV_KVM_SPICE_BIND: hv_base.NO_CHECK, # will be checked later constants.HV_KVM_SPICE_IP_VERSION: (False, lambda x: (x == constants.IFACE_NO_IP_VERSION_SPECIFIED or x in constants.VALID_IP_VERSIONS), "The SPICE IP version should be 4 or 6", None, None), constants.HV_KVM_SPICE_PASSWORD_FILE: hv_base.OPT_FILE_CHECK, constants.HV_KVM_SPICE_LOSSLESS_IMG_COMPR: hv_base.ParamInSet( False, constants.HT_KVM_SPICE_VALID_LOSSLESS_IMG_COMPR_OPTIONS), constants.HV_KVM_SPICE_JPEG_IMG_COMPR: hv_base.ParamInSet( False, constants.HT_KVM_SPICE_VALID_LOSSY_IMG_COMPR_OPTIONS), constants.HV_KVM_SPICE_ZLIB_GLZ_IMG_COMPR: hv_base.ParamInSet( False, constants.HT_KVM_SPICE_VALID_LOSSY_IMG_COMPR_OPTIONS), constants.HV_KVM_SPICE_STREAMING_VIDEO_DETECTION: hv_base.ParamInSet( False, constants.HT_KVM_SPICE_VALID_VIDEO_STREAM_DETECTION_OPTIONS), constants.HV_KVM_SPICE_AUDIO_COMPR: hv_base.NO_CHECK, constants.HV_KVM_SPICE_USE_TLS: hv_base.NO_CHECK, constants.HV_KVM_SPICE_TLS_CIPHERS: hv_base.NO_CHECK, constants.HV_KVM_SPICE_USE_VDAGENT: hv_base.NO_CHECK, constants.HV_KVM_FLOPPY_IMAGE_PATH: hv_base.OPT_FILE_CHECK, constants.HV_CDROM_IMAGE_PATH: hv_base.OPT_FILE_CHECK, constants.HV_KVM_CDROM2_IMAGE_PATH: hv_base.OPT_FILE_CHECK, constants.HV_BOOT_ORDER: hv_base.ParamInSet(True, constants.HT_KVM_VALID_BO_TYPES), constants.HV_NIC_TYPE: hv_base.ParamInSet(True, constants.HT_KVM_VALID_NIC_TYPES), constants.HV_DISK_TYPE: hv_base.ParamInSet(True, constants.HT_KVM_VALID_DISK_TYPES), constants.HV_KVM_CDROM_DISK_TYPE: hv_base.ParamInSet(False, constants.HT_KVM_VALID_DISK_TYPES), constants.HV_USB_MOUSE: hv_base.ParamInSet(False, constants.HT_KVM_VALID_MOUSE_TYPES), constants.HV_KEYMAP: hv_base.NO_CHECK, constants.HV_MIGRATION_PORT: hv_base.REQ_NET_PORT_CHECK, constants.HV_MIGRATION_BANDWIDTH: hv_base.REQ_NONNEGATIVE_INT_CHECK, constants.HV_MIGRATION_DOWNTIME: hv_base.REQ_NONNEGATIVE_INT_CHECK, constants.HV_MIGRATION_MODE: hv_base.MIGRATION_MODE_CHECK, constants.HV_USE_LOCALTIME: hv_base.NO_CHECK, constants.HV_DISK_CACHE: hv_base.ParamInSet(True, constants.HT_VALID_CACHE_TYPES), constants.HV_SECURITY_MODEL: hv_base.ParamInSet(True, constants.HT_KVM_VALID_SM_TYPES), constants.HV_SECURITY_DOMAIN: hv_base.NO_CHECK, constants.HV_KVM_FLAG: hv_base.ParamInSet(False, constants.HT_KVM_FLAG_VALUES), constants.HV_VHOST_NET: hv_base.NO_CHECK, constants.HV_KVM_USE_CHROOT: hv_base.NO_CHECK, constants.HV_MEM_PATH: hv_base.OPT_DIR_CHECK, constants.HV_REBOOT_BEHAVIOR: hv_base.ParamInSet(True, constants.REBOOT_BEHAVIORS), constants.HV_CPU_MASK: hv_base.OPT_MULTI_CPU_MASK_CHECK, constants.HV_CPU_TYPE: hv_base.NO_CHECK, constants.HV_CPU_CORES: hv_base.OPT_NONNEGATIVE_INT_CHECK, constants.HV_CPU_THREADS: hv_base.OPT_NONNEGATIVE_INT_CHECK, constants.HV_CPU_SOCKETS: hv_base.OPT_NONNEGATIVE_INT_CHECK, constants.HV_SOUNDHW: hv_base.NO_CHECK, constants.HV_USB_DEVICES: hv_base.NO_CHECK, constants.HV_VGA: hv_base.NO_CHECK, constants.HV_KVM_EXTRA: hv_base.NO_CHECK, constants.HV_KVM_MACHINE_VERSION: hv_base.NO_CHECK, constants.HV_VNET_HDR: hv_base.NO_CHECK, } _VIRTIO = "virtio" _VIRTIO_NET_PCI = "virtio-net-pci" _MIGRATION_STATUS_RE = re.compile(r"Migration\s+status:\s+(\w+)", re.M | re.I) _MIGRATION_PROGRESS_RE = \ re.compile(r"\s*transferred\s+ram:\s+(?P\d+)\s+kbytes\s*\n" r"\s*remaining\s+ram:\s+(?P\d+)\s+kbytes\s*\n" r"\s*total\s+ram:\s+(?P\d+)\s+kbytes\s*\n", re.I) _MIGRATION_INFO_MAX_BAD_ANSWERS = 5 _MIGRATION_INFO_RETRY_DELAY = 2 _VERSION_RE = re.compile(r"\b(\d+)\.(\d+)(\.(\d+))?\b") _CPU_INFO_RE = re.compile(r"cpu\s+\#(\d+).*thread_id\s*=\s*(\d+)", re.I) _CPU_INFO_CMD = "info cpus" _CONT_CMD = "cont" _DEFAULT_MACHINE_VERSION_RE = re.compile(r"^(\S+).*\(default\)", re.M) _CHECK_MACHINE_VERSION_RE = \ staticmethod(lambda x: re.compile(r"^(%s)[ ]+.*PC" % x, re.M)) _QMP_RE = re.compile(r"^-qmp\s", re.M) _SPICE_RE = re.compile(r"^-spice\s", re.M) _VHOST_RE = re.compile(r"^-net\s.*,vhost=on|off", re.M) _ENABLE_KVM_RE = re.compile(r"^-enable-kvm\s", re.M) _DISABLE_KVM_RE = re.compile(r"^-disable-kvm\s", re.M) _NETDEV_RE = re.compile(r"^-netdev\s", re.M) _DISPLAY_RE = re.compile(r"^-display\s", re.M) _MACHINE_RE = re.compile(r"^-machine\s", re.M) _NEW_VIRTIO_RE = re.compile(r"^name \"%s\"" % _VIRTIO_NET_PCI, re.M) # match -drive.*boot=on|off on different lines, but in between accept only # dashes not preceeded by a new line (which would mean another option # different than -drive is starting) _BOOT_RE = re.compile(r"^-drive\s([^-]|(? constants.VNC_BASE_PORT: display = instance.network_port - constants.VNC_BASE_PORT if vnc_bind_address == constants.IP4_ADDRESS_ANY: vnc_arg = ":%d" % (display) else: vnc_arg = "%s:%d" % (vnc_bind_address, display) else: logging.error("Network port is not a valid VNC display (%d < %d)," " not starting VNC", instance.network_port, constants.VNC_BASE_PORT) vnc_arg = "none" # Only allow tls and other option when not binding to a file, for now. # kvm/qemu gets confused otherwise about the filename to use. vnc_append = "" if hvp[constants.HV_VNC_TLS]: vnc_append = "%s,tls" % vnc_append if hvp[constants.HV_VNC_X509_VERIFY]: vnc_append = "%s,x509verify=%s" % (vnc_append, hvp[constants.HV_VNC_X509]) elif hvp[constants.HV_VNC_X509]: vnc_append = "%s,x509=%s" % (vnc_append, hvp[constants.HV_VNC_X509]) if hvp[constants.HV_VNC_PASSWORD_FILE]: vnc_append = "%s,password" % vnc_append vnc_arg = "%s%s" % (vnc_arg, vnc_append) else: vnc_arg = "unix:%s/%s.vnc" % (vnc_bind_address, instance.name) kvm_cmd.extend(["-vnc", vnc_arg]) elif spice_bind: # FIXME: this is wrong here; the iface ip address differs # between systems, so it should be done in _ExecuteKVMRuntime if netutils.IsValidInterface(spice_bind): # The user specified a network interface, we have to figure out the IP # address. addresses = netutils.GetInterfaceIpAddresses(spice_bind) spice_ip_version = hvp[constants.HV_KVM_SPICE_IP_VERSION] # if the user specified an IP version and the interface does not # have that kind of IP addresses, throw an exception if spice_ip_version != constants.IFACE_NO_IP_VERSION_SPECIFIED: if not addresses[spice_ip_version]: raise errors.HypervisorError("SPICE: Unable to get an IPv%s address" " for %s" % (spice_ip_version, spice_bind)) # the user did not specify an IP version, we have to figure it out elif (addresses[constants.IP4_VERSION] and addresses[constants.IP6_VERSION]): # we have both ipv4 and ipv6, let's use the cluster default IP # version cluster_family = ssconf.SimpleStore().GetPrimaryIPFamily() spice_ip_version = \ netutils.IPAddress.GetVersionFromAddressFamily(cluster_family) elif addresses[constants.IP4_VERSION]: spice_ip_version = constants.IP4_VERSION elif addresses[constants.IP6_VERSION]: spice_ip_version = constants.IP6_VERSION else: raise errors.HypervisorError("SPICE: Unable to get an IP address" " for %s" % (spice_bind)) spice_address = addresses[spice_ip_version][0] else: # spice_bind is known to be a valid IP address, because # ValidateParameters checked it. spice_address = spice_bind spice_arg = "addr=%s" % spice_address if hvp[constants.HV_KVM_SPICE_USE_TLS]: spice_arg = ("%s,tls-port=%s,x509-cacert-file=%s" % (spice_arg, instance.network_port, pathutils.SPICE_CACERT_FILE)) spice_arg = ("%s,x509-key-file=%s,x509-cert-file=%s" % (spice_arg, pathutils.SPICE_CERT_FILE, pathutils.SPICE_CERT_FILE)) tls_ciphers = hvp[constants.HV_KVM_SPICE_TLS_CIPHERS] if tls_ciphers: spice_arg = "%s,tls-ciphers=%s" % (spice_arg, tls_ciphers) else: spice_arg = "%s,port=%s" % (spice_arg, instance.network_port) if not hvp[constants.HV_KVM_SPICE_PASSWORD_FILE]: spice_arg = "%s,disable-ticketing" % spice_arg if spice_ip_version: spice_arg = "%s,ipv%s" % (spice_arg, spice_ip_version) # Image compression options img_lossless = hvp[constants.HV_KVM_SPICE_LOSSLESS_IMG_COMPR] img_jpeg = hvp[constants.HV_KVM_SPICE_JPEG_IMG_COMPR] img_zlib_glz = hvp[constants.HV_KVM_SPICE_ZLIB_GLZ_IMG_COMPR] if img_lossless: spice_arg = "%s,image-compression=%s" % (spice_arg, img_lossless) if img_jpeg: spice_arg = "%s,jpeg-wan-compression=%s" % (spice_arg, img_jpeg) if img_zlib_glz: spice_arg = "%s,zlib-glz-wan-compression=%s" % (spice_arg, img_zlib_glz) # Video stream detection video_streaming = hvp[constants.HV_KVM_SPICE_STREAMING_VIDEO_DETECTION] if video_streaming: spice_arg = "%s,streaming-video=%s" % (spice_arg, video_streaming) # Audio compression, by default in qemu-kvm it is on if not hvp[constants.HV_KVM_SPICE_AUDIO_COMPR]: spice_arg = "%s,playback-compression=off" % spice_arg if not hvp[constants.HV_KVM_SPICE_USE_VDAGENT]: spice_arg = "%s,agent-mouse=off" % spice_arg else: # Enable the spice agent communication channel between the host and the # agent. kvm_cmd.extend(["-device", "virtio-serial-pci"]) kvm_cmd.extend([ "-device", "virtserialport,chardev=spicechannel0,name=com.redhat.spice.0", ]) kvm_cmd.extend(["-chardev", "spicevmc,id=spicechannel0,name=vdagent"]) logging.info("KVM: SPICE will listen on port %s", instance.network_port) kvm_cmd.extend(["-spice", spice_arg]) else: # From qemu 1.4 -nographic is incompatible with -daemonize. The new way # also works in earlier versions though (tested with 1.1 and 1.3) if self._DISPLAY_RE.search(kvmhelp): kvm_cmd.extend(["-display", "none"]) else: kvm_cmd.extend(["-nographic"]) if hvp[constants.HV_USE_LOCALTIME]: kvm_cmd.extend(["-localtime"]) if hvp[constants.HV_KVM_USE_CHROOT]: kvm_cmd.extend(["-chroot", self._InstanceChrootDir(instance.name)]) # Add qemu-KVM -cpu param if hvp[constants.HV_CPU_TYPE]: kvm_cmd.extend(["-cpu", hvp[constants.HV_CPU_TYPE]]) # As requested by music lovers if hvp[constants.HV_SOUNDHW]: kvm_cmd.extend(["-soundhw", hvp[constants.HV_SOUNDHW]]) # Pass a -vga option if requested, or if spice is used, for backwards # compatibility. if hvp[constants.HV_VGA]: kvm_cmd.extend(["-vga", hvp[constants.HV_VGA]]) elif spice_bind: kvm_cmd.extend(["-vga", "qxl"]) # Various types of usb devices, comma separated if hvp[constants.HV_USB_DEVICES]: for dev in hvp[constants.HV_USB_DEVICES].split(","): kvm_cmd.extend(["-usbdevice", dev]) # Set system UUID to instance UUID if self._UUID_RE.search(kvmhelp): kvm_cmd.extend(["-uuid", instance.uuid]) if hvp[constants.HV_KVM_EXTRA]: kvm_cmd.extend(hvp[constants.HV_KVM_EXTRA].split(" ")) # Save the current instance nics, but defer their expansion as parameters, # as we'll need to generate executable temp files for them. kvm_nics = instance.nics hvparams = hvp return (kvm_cmd, kvm_nics, hvparams) def _WriteKVMRuntime(self, instance_name, data): """Write an instance's KVM runtime """ try: utils.WriteFile(self._InstanceKVMRuntime(instance_name), data=data) except EnvironmentError, err: raise errors.HypervisorError("Failed to save KVM runtime file: %s" % err) def _ReadKVMRuntime(self, instance_name): """Read an instance's KVM runtime """ try: file_content = utils.ReadFile(self._InstanceKVMRuntime(instance_name)) except EnvironmentError, err: raise errors.HypervisorError("Failed to load KVM runtime file: %s" % err) return file_content def _SaveKVMRuntime(self, instance, kvm_runtime): """Save an instance's KVM runtime """ kvm_cmd, kvm_nics, hvparams = kvm_runtime serialized_nics = [nic.ToDict() for nic in kvm_nics] serialized_form = serializer.Dump((kvm_cmd, serialized_nics, hvparams)) self._WriteKVMRuntime(instance.name, serialized_form) def _LoadKVMRuntime(self, instance, serialized_runtime=None): """Load an instance's KVM runtime """ if not serialized_runtime: serialized_runtime = self._ReadKVMRuntime(instance.name) loaded_runtime = serializer.Load(serialized_runtime) kvm_cmd, serialized_nics, hvparams = loaded_runtime kvm_nics = [objects.NIC.FromDict(snic) for snic in serialized_nics] return (kvm_cmd, kvm_nics, hvparams) def _RunKVMCmd(self, name, kvm_cmd, tap_fds=None): """Run the KVM cmd and check for errors @type name: string @param name: instance name @type kvm_cmd: list of strings @param kvm_cmd: runcmd input for kvm @type tap_fds: list of int @param tap_fds: fds of tap devices opened by Ganeti """ try: result = utils.RunCmd(kvm_cmd, noclose_fds=tap_fds) finally: for fd in tap_fds: utils_wrapper.CloseFdNoError(fd) if result.failed: raise errors.HypervisorError("Failed to start instance %s: %s (%s)" % (name, result.fail_reason, result.output)) if not self._InstancePidAlive(name)[2]: raise errors.HypervisorError("Failed to start instance %s" % name) def _ExecuteKVMRuntime(self, instance, kvm_runtime, kvmhelp, incoming=None): """Execute a KVM cmd, after completing it with some last minute data. @type incoming: tuple of strings @param incoming: (target_host_ip, port) @type kvmhelp: string @param kvmhelp: output of kvm --help """ # Small _ExecuteKVMRuntime hv parameters programming howto: # - conf_hvp contains the parameters as configured on ganeti. they might # have changed since the instance started; only use them if the change # won't affect the inside of the instance (which hasn't been rebooted). # - up_hvp contains the parameters as they were when the instance was # started, plus any new parameter which has been added between ganeti # versions: it is paramount that those default to a value which won't # affect the inside of the instance as well. conf_hvp = instance.hvparams name = instance.name self._CheckDown(name) temp_files = [] kvm_cmd, kvm_nics, up_hvp = kvm_runtime # the first element of kvm_cmd is always the path to the kvm binary kvm_path = kvm_cmd[0] up_hvp = objects.FillDict(conf_hvp, up_hvp) # We know it's safe to run as a different user upon migration, so we'll use # the latest conf, from conf_hvp. security_model = conf_hvp[constants.HV_SECURITY_MODEL] if security_model == constants.HT_SM_USER: kvm_cmd.extend(["-runas", conf_hvp[constants.HV_SECURITY_DOMAIN]]) keymap = conf_hvp[constants.HV_KEYMAP] if keymap: keymap_path = self._InstanceKeymapFile(name) # If a keymap file is specified, KVM won't use its internal defaults. By # first including the "en-us" layout, an error on loading the actual # layout (e.g. because it can't be found) won't lead to a non-functional # keyboard. A keyboard with incorrect keys is still better than none. utils.WriteFile(keymap_path, data="include en-us\ninclude %s\n" % keymap) kvm_cmd.extend(["-k", keymap_path]) # We have reasons to believe changing something like the nic driver/type # upon migration won't exactly fly with the instance kernel, so for nic # related parameters we'll use up_hvp tapfds = [] taps = [] if not kvm_nics: kvm_cmd.extend(["-net", "none"]) else: vnet_hdr = False tap_extra = "" nic_type = up_hvp[constants.HV_NIC_TYPE] if nic_type == constants.HT_NIC_PARAVIRTUAL: nic_model = self._VIRTIO try: devlist = self._GetKVMOutput(kvm_path, self._KVMOPT_DEVICELIST) if self._NEW_VIRTIO_RE.search(devlist): nic_model = self._VIRTIO_NET_PCI vnet_hdr = up_hvp[constants.HV_VNET_HDR] except errors.HypervisorError, _: # Older versions of kvm don't support DEVICE_LIST, but they don't # have new virtio syntax either. pass if up_hvp[constants.HV_VHOST_NET]: # check for vhost_net support if self._VHOST_RE.search(kvmhelp): tap_extra = ",vhost=on" else: raise errors.HypervisorError("vhost_net is configured" " but it is not available") else: nic_model = nic_type kvm_supports_netdev = self._NETDEV_RE.search(kvmhelp) for nic_seq, nic in enumerate(kvm_nics): tapname, tapfd = _OpenTap(vnet_hdr=vnet_hdr) tapfds.append(tapfd) taps.append(tapname) if kvm_supports_netdev: nic_val = "%s,mac=%s,netdev=netdev%s" % (nic_model, nic.mac, nic_seq) tap_val = "type=tap,id=netdev%s,fd=%d%s" % (nic_seq, tapfd, tap_extra) kvm_cmd.extend(["-netdev", tap_val, "-device", nic_val]) else: nic_val = "nic,vlan=%s,macaddr=%s,model=%s" % (nic_seq, nic.mac, nic_model) tap_val = "tap,vlan=%s,fd=%d" % (nic_seq, tapfd) kvm_cmd.extend(["-net", tap_val, "-net", nic_val]) if incoming: target, port = incoming kvm_cmd.extend(["-incoming", "tcp:%s:%s" % (target, port)]) # Changing the vnc password doesn't bother the guest that much. At most it # will surprise people who connect to it. Whether positively or negatively # it's debatable. vnc_pwd_file = conf_hvp[constants.HV_VNC_PASSWORD_FILE] vnc_pwd = None if vnc_pwd_file: try: vnc_pwd = utils.ReadOneLineFile(vnc_pwd_file, strict=True) except EnvironmentError, err: raise errors.HypervisorError("Failed to open VNC password file %s: %s" % (vnc_pwd_file, err)) if conf_hvp[constants.HV_KVM_USE_CHROOT]: utils.EnsureDirs([(self._InstanceChrootDir(name), constants.SECURE_DIR_MODE)]) # Automatically enable QMP if version is >= 0.14 if self._QMP_RE.search(kvmhelp): logging.debug("Enabling QMP") kvm_cmd.extend(["-qmp", "unix:%s,server,nowait" % self._InstanceQmpMonitor(instance.name)]) # Configure the network now for starting instances and bridged interfaces, # during FinalizeMigration for incoming instances' routed interfaces for nic_seq, nic in enumerate(kvm_nics): if (incoming and nic.nicparams[constants.NIC_MODE] != constants.NIC_MODE_BRIDGED): continue self._ConfigureNIC(instance, nic_seq, nic, taps[nic_seq]) # CPU affinity requires kvm to start paused, so we set this flag if the # instance is not already paused and if we are not going to accept a # migrating instance. In the latter case, pausing is not needed. start_kvm_paused = not (_KVM_START_PAUSED_FLAG in kvm_cmd) and not incoming if start_kvm_paused: kvm_cmd.extend([_KVM_START_PAUSED_FLAG]) # Note: CPU pinning is using up_hvp since changes take effect # during instance startup anyway, and to avoid problems when soft # rebooting the instance. cpu_pinning = False if up_hvp.get(constants.HV_CPU_MASK, None): cpu_pinning = True if security_model == constants.HT_SM_POOL: ss = ssconf.SimpleStore() uid_pool = uidpool.ParseUidPool(ss.GetUidPool(), separator="\n") all_uids = set(uidpool.ExpandUidPool(uid_pool)) uid = uidpool.RequestUnusedUid(all_uids) try: username = pwd.getpwuid(uid.GetUid()).pw_name kvm_cmd.extend(["-runas", username]) self._RunKVMCmd(name, kvm_cmd, tapfds) except: uidpool.ReleaseUid(uid) raise else: uid.Unlock() utils.WriteFile(self._InstanceUidFile(name), data=uid.AsStr()) else: self._RunKVMCmd(name, kvm_cmd, tapfds) utils.EnsureDirs([(self._InstanceNICDir(instance.name), constants.RUN_DIRS_MODE)]) for nic_seq, tap in enumerate(taps): utils.WriteFile(self._InstanceNICFile(instance.name, nic_seq), data=tap) if vnc_pwd: change_cmd = "change vnc password %s" % vnc_pwd self._CallMonitorCommand(instance.name, change_cmd) # Setting SPICE password. We are not vulnerable to malicious passwordless # connection attempts because SPICE by default does not allow connections # if neither a password nor the "disable_ticketing" options are specified. # As soon as we send the password via QMP, that password is a valid ticket # for connection. spice_password_file = conf_hvp[constants.HV_KVM_SPICE_PASSWORD_FILE] if spice_password_file: spice_pwd = "" try: spice_pwd = utils.ReadOneLineFile(spice_password_file, strict=True) except EnvironmentError, err: raise errors.HypervisorError("Failed to open SPICE password file %s: %s" % (spice_password_file, err)) qmp = QmpConnection(self._InstanceQmpMonitor(instance.name)) qmp.connect() arguments = { "protocol": "spice", "password": spice_pwd, } qmp.Execute("set_password", arguments) for filename in temp_files: utils.RemoveFile(filename) # If requested, set CPU affinity and resume instance execution if cpu_pinning: self._ExecuteCpuAffinity(instance.name, up_hvp[constants.HV_CPU_MASK]) start_memory = self._InstanceStartupMemory(instance) if start_memory < instance.beparams[constants.BE_MAXMEM]: self.BalloonInstanceMemory(instance, start_memory) if start_kvm_paused: # To control CPU pinning, ballooning, and vnc/spice passwords # the VM was started in a frozen state. If freezing was not # explicitly requested resume the vm status. self._CallMonitorCommand(instance.name, self._CONT_CMD) def StartInstance(self, instance, block_devices, startup_paused): """Start an instance. """ self._CheckDown(instance.name) kvmpath = instance.hvparams[constants.HV_KVM_PATH] kvmhelp = self._GetKVMOutput(kvmpath, self._KVMOPT_HELP) kvm_runtime = self._GenerateKVMRuntime(instance, block_devices, startup_paused, kvmhelp) self._SaveKVMRuntime(instance, kvm_runtime) self._ExecuteKVMRuntime(instance, kvm_runtime, kvmhelp) def _CallMonitorCommand(self, instance_name, command): """Invoke a command on the instance monitor. """ # TODO: Replace monitor calls with QMP once KVM >= 0.14 is the minimum # version. The monitor protocol is designed for human consumption, whereas # QMP is made for programmatic usage. In the worst case QMP can also # execute monitor commands. As it is, all calls to socat take at least # 500ms and likely more: socat can't detect the end of the reply and waits # for 500ms of no data received before exiting (500 ms is the default for # the "-t" parameter). socat = ("echo %s | %s STDIO UNIX-CONNECT:%s" % (utils.ShellQuote(command), constants.SOCAT_PATH, utils.ShellQuote(self._InstanceMonitor(instance_name)))) result = utils.RunCmd(socat) if result.failed: msg = ("Failed to send command '%s' to instance '%s', reason '%s'," " output: %s" % (command, instance_name, result.fail_reason, result.output)) raise errors.HypervisorError(msg) return result @classmethod def _ParseKVMVersion(cls, text): """Parse the KVM version from the --help output. @type text: string @param text: output of kvm --help @return: (version, v_maj, v_min, v_rev) @raise errors.HypervisorError: when the KVM version cannot be retrieved """ match = cls._VERSION_RE.search(text.splitlines()[0]) if not match: raise errors.HypervisorError("Unable to get KVM version") v_all = match.group(0) v_maj = int(match.group(1)) v_min = int(match.group(2)) if match.group(4): v_rev = int(match.group(4)) else: v_rev = 0 return (v_all, v_maj, v_min, v_rev) @classmethod def _GetKVMOutput(cls, kvm_path, option): """Return the output of a kvm invocation @type kvm_path: string @param kvm_path: path to the kvm executable @type option: a key of _KVMOPTS_CMDS @param option: kvm option to fetch the output from @return: output a supported kvm invocation @raise errors.HypervisorError: when the KVM help output cannot be retrieved """ assert option in cls._KVMOPTS_CMDS, "Invalid output option" optlist, can_fail = cls._KVMOPTS_CMDS[option] result = utils.RunCmd([kvm_path] + optlist) if result.failed and not can_fail: raise errors.HypervisorError("Unable to get KVM %s output" % " ".join(optlist)) return result.output @classmethod def _GetKVMVersion(cls, kvm_path): """Return the installed KVM version. @return: (version, v_maj, v_min, v_rev) @raise errors.HypervisorError: when the KVM version cannot be retrieved """ return cls._ParseKVMVersion(cls._GetKVMOutput(kvm_path, cls._KVMOPT_HELP)) @classmethod def _GetDefaultMachineVersion(cls, kvm_path): """Return the default hardware revision (e.g. pc-1.1) """ output = cls._GetKVMOutput(kvm_path, cls._KVMOPT_MLIST) match = cls._DEFAULT_MACHINE_VERSION_RE.search(output) if match: return match.group(1) else: return "pc" def StopInstance(self, instance, force=False, retry=False, name=None): """Stop an instance. """ if name is not None and not force: raise errors.HypervisorError("Cannot shutdown cleanly by name only") if name is None: name = instance.name acpi = instance.hvparams[constants.HV_ACPI] else: acpi = False _, pid, alive = self._InstancePidAlive(name) if pid > 0 and alive: if force or not acpi: utils.KillProcess(pid) else: self._CallMonitorCommand(name, "system_powerdown") def CleanupInstance(self, instance_name): """Cleanup after a stopped instance """ pidfile, pid, alive = self._InstancePidAlive(instance_name) if pid > 0 and alive: raise errors.HypervisorError("Cannot cleanup a live instance") self._RemoveInstanceRuntimeFiles(pidfile, instance_name) def RebootInstance(self, instance): """Reboot an instance. """ # For some reason if we do a 'send-key ctrl-alt-delete' to the control # socket the instance will stop, but now power up again. So we'll resort # to shutdown and restart. _, _, alive = self._InstancePidAlive(instance.name) if not alive: raise errors.HypervisorError("Failed to reboot instance %s:" " not running" % instance.name) # StopInstance will delete the saved KVM runtime so: # ...first load it... kvm_runtime = self._LoadKVMRuntime(instance) # ...now we can safely call StopInstance... if not self.StopInstance(instance): self.StopInstance(instance, force=True) # ...and finally we can save it again, and execute it... self._SaveKVMRuntime(instance, kvm_runtime) kvmpath = instance.hvparams[constants.HV_KVM_PATH] kvmhelp = self._GetKVMOutput(kvmpath, self._KVMOPT_HELP) self._ExecuteKVMRuntime(instance, kvm_runtime, kvmhelp) def MigrationInfo(self, instance): """Get instance information to perform a migration. @type instance: L{objects.Instance} @param instance: instance to be migrated @rtype: string @return: content of the KVM runtime file """ return self._ReadKVMRuntime(instance.name) def AcceptInstance(self, instance, info, target): """Prepare to accept an instance. @type instance: L{objects.Instance} @param instance: instance to be accepted @type info: string @param info: content of the KVM runtime file on the source node @type target: string @param target: target host (usually ip), on this node """ kvm_runtime = self._LoadKVMRuntime(instance, serialized_runtime=info) incoming_address = (target, instance.hvparams[constants.HV_MIGRATION_PORT]) kvmpath = instance.hvparams[constants.HV_KVM_PATH] kvmhelp = self._GetKVMOutput(kvmpath, self._KVMOPT_HELP) self._ExecuteKVMRuntime(instance, kvm_runtime, kvmhelp, incoming=incoming_address) def FinalizeMigrationDst(self, instance, info, success): """Finalize the instance migration on the target node. Stop the incoming mode KVM. @type instance: L{objects.Instance} @param instance: instance whose migration is being finalized """ if success: kvm_runtime = self._LoadKVMRuntime(instance, serialized_runtime=info) kvm_nics = kvm_runtime[1] for nic_seq, nic in enumerate(kvm_nics): if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: # Bridged interfaces have already been configured continue try: tap = utils.ReadFile(self._InstanceNICFile(instance.name, nic_seq)) except EnvironmentError, err: logging.warning("Failed to find host interface for %s NIC #%d: %s", instance.name, nic_seq, str(err)) continue try: self._ConfigureNIC(instance, nic_seq, nic, tap) except errors.HypervisorError, err: logging.warning(str(err)) self._WriteKVMRuntime(instance.name, info) else: self.StopInstance(instance, force=True) def MigrateInstance(self, cluster_name, instance, target, live): """Migrate an instance to a target node. The migration will not be attempted if the instance is not currently running. @type cluster_name: string @param cluster_name: name of the cluster @type instance: L{objects.Instance} @param instance: the instance to be migrated @type target: string @param target: ip address of the target node @type live: boolean @param live: perform a live migration """ instance_name = instance.name port = instance.hvparams[constants.HV_MIGRATION_PORT] _, _, alive = self._InstancePidAlive(instance_name) if not alive: raise errors.HypervisorError("Instance not running, cannot migrate") if not live: self._CallMonitorCommand(instance_name, "stop") migrate_command = ("migrate_set_speed %dm" % instance.hvparams[constants.HV_MIGRATION_BANDWIDTH]) self._CallMonitorCommand(instance_name, migrate_command) migrate_command = ("migrate_set_downtime %dms" % instance.hvparams[constants.HV_MIGRATION_DOWNTIME]) self._CallMonitorCommand(instance_name, migrate_command) migrate_command = "migrate -d tcp:%s:%s" % (target, port) self._CallMonitorCommand(instance_name, migrate_command) def FinalizeMigrationSource(self, instance, success, live): """Finalize the instance migration on the source node. @type instance: L{objects.Instance} @param instance: the instance that was migrated @type success: bool @param success: whether the migration succeeded or not @type live: bool @param live: whether the user requested a live migration or not """ if success: pidfile, pid, _ = self._InstancePidAlive(instance.name) utils.KillProcess(pid) self._RemoveInstanceRuntimeFiles(pidfile, instance.name) elif live: self._CallMonitorCommand(instance.name, self._CONT_CMD) def GetMigrationStatus(self, instance): """Get the migration status @type instance: L{objects.Instance} @param instance: the instance that is being migrated @rtype: L{objects.MigrationStatus} @return: the status of the current migration (one of L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional progress info that can be retrieved from the hypervisor """ info_command = "info migrate" for _ in range(self._MIGRATION_INFO_MAX_BAD_ANSWERS): result = self._CallMonitorCommand(instance.name, info_command) match = self._MIGRATION_STATUS_RE.search(result.stdout) if not match: if not result.stdout: logging.info("KVM: empty 'info migrate' result") else: logging.warning("KVM: unknown 'info migrate' result: %s", result.stdout) else: status = match.group(1) if status in constants.HV_KVM_MIGRATION_VALID_STATUSES: migration_status = objects.MigrationStatus(status=status) match = self._MIGRATION_PROGRESS_RE.search(result.stdout) if match: migration_status.transferred_ram = match.group("transferred") migration_status.total_ram = match.group("total") return migration_status logging.warning("KVM: unknown migration status '%s'", status) time.sleep(self._MIGRATION_INFO_RETRY_DELAY) return objects.MigrationStatus(status=constants.HV_MIGRATION_FAILED) def BalloonInstanceMemory(self, instance, mem): """Balloon an instance memory to a certain value. @type instance: L{objects.Instance} @param instance: instance to be accepted @type mem: int @param mem: actual memory size to use for instance runtime """ self._CallMonitorCommand(instance.name, "balloon %d" % mem) def GetNodeInfo(self, hvparams=None): """Return information about the node. @type hvparams: dict of strings @param hvparams: hypervisor parameters, not used in this class @return: a dict as returned by L{BaseHypervisor.GetLinuxNodeInfo} plus the following keys: - hv_version: the hypervisor version in the form (major, minor, revision) """ result = self.GetLinuxNodeInfo() kvmpath = constants.KVM_PATH if hvparams is not None: kvmpath = hvparams.get(constants.HV_KVM_PATH, constants.KVM_PATH) _, v_major, v_min, v_rev = self._GetKVMVersion(kvmpath) result[constants.HV_NODEINFO_KEY_VERSION] = (v_major, v_min, v_rev) return result @classmethod def GetInstanceConsole(cls, instance, primary_node, hvparams, beparams): """Return a command for connecting to the console of an instance. """ if hvparams[constants.HV_SERIAL_CONSOLE]: cmd = [pathutils.KVM_CONSOLE_WRAPPER, constants.SOCAT_PATH, utils.ShellQuote(instance.name), utils.ShellQuote(cls._InstanceMonitor(instance.name)), "STDIO,%s" % cls._SocatUnixConsoleParams(), "UNIX-CONNECT:%s" % cls._InstanceSerial(instance.name)] return objects.InstanceConsole(instance=instance.name, kind=constants.CONS_SSH, host=primary_node.name, user=constants.SSH_CONSOLE_USER, command=cmd) vnc_bind_address = hvparams[constants.HV_VNC_BIND_ADDRESS] if vnc_bind_address and instance.network_port > constants.VNC_BASE_PORT: display = instance.network_port - constants.VNC_BASE_PORT return objects.InstanceConsole(instance=instance.name, kind=constants.CONS_VNC, host=vnc_bind_address, port=instance.network_port, display=display) spice_bind = hvparams[constants.HV_KVM_SPICE_BIND] if spice_bind: return objects.InstanceConsole(instance=instance.name, kind=constants.CONS_SPICE, host=spice_bind, port=instance.network_port) return objects.InstanceConsole(instance=instance.name, kind=constants.CONS_MESSAGE, message=("No serial shell for instance %s" % instance.name)) def Verify(self, hvparams=None): """Verify the hypervisor. Check that the required binaries exist. @type hvparams: dict of strings @param hvparams: hypervisor parameters to be verified against, not used here @return: Problem description if something is wrong, C{None} otherwise """ msgs = [] kvmpath = constants.KVM_PATH if hvparams is not None: kvmpath = hvparams.get(constants.HV_KVM_PATH, constants.KVM_PATH) if not os.path.exists(kvmpath): msgs.append("The KVM binary ('%s') does not exist" % kvmpath) if not os.path.exists(constants.SOCAT_PATH): msgs.append("The socat binary ('%s') does not exist" % constants.SOCAT_PATH) return self._FormatVerifyResults(msgs) @classmethod def CheckParameterSyntax(cls, hvparams): """Check the given parameters for validity. @type hvparams: dict @param hvparams: dictionary with parameter names/value @raise errors.HypervisorError: when a parameter is not valid """ super(KVMHypervisor, cls).CheckParameterSyntax(hvparams) kernel_path = hvparams[constants.HV_KERNEL_PATH] if kernel_path: if not hvparams[constants.HV_ROOT_PATH]: raise errors.HypervisorError("Need a root partition for the instance," " if a kernel is defined") if (hvparams[constants.HV_VNC_X509_VERIFY] and not hvparams[constants.HV_VNC_X509]): raise errors.HypervisorError("%s must be defined, if %s is" % (constants.HV_VNC_X509, constants.HV_VNC_X509_VERIFY)) if hvparams[constants.HV_SERIAL_CONSOLE]: serial_speed = hvparams[constants.HV_SERIAL_SPEED] valid_speeds = constants.VALID_SERIAL_SPEEDS if not serial_speed or serial_speed not in valid_speeds: raise errors.HypervisorError("Invalid serial console speed, must be" " one of: %s" % utils.CommaJoin(valid_speeds)) boot_order = hvparams[constants.HV_BOOT_ORDER] if (boot_order == constants.HT_BO_CDROM and not hvparams[constants.HV_CDROM_IMAGE_PATH]): raise errors.HypervisorError("Cannot boot from cdrom without an" " ISO path") security_model = hvparams[constants.HV_SECURITY_MODEL] if security_model == constants.HT_SM_USER: if not hvparams[constants.HV_SECURITY_DOMAIN]: raise errors.HypervisorError("A security domain (user to run kvm as)" " must be specified") elif (security_model == constants.HT_SM_NONE or security_model == constants.HT_SM_POOL): if hvparams[constants.HV_SECURITY_DOMAIN]: raise errors.HypervisorError("Cannot have a security domain when the" " security model is 'none' or 'pool'") spice_bind = hvparams[constants.HV_KVM_SPICE_BIND] spice_ip_version = hvparams[constants.HV_KVM_SPICE_IP_VERSION] if spice_bind: if spice_ip_version != constants.IFACE_NO_IP_VERSION_SPECIFIED: # if an IP version is specified, the spice_bind parameter must be an # IP of that family if (netutils.IP4Address.IsValid(spice_bind) and spice_ip_version != constants.IP4_VERSION): raise errors.HypervisorError("SPICE: Got an IPv4 address (%s), but" " the specified IP version is %s" % (spice_bind, spice_ip_version)) if (netutils.IP6Address.IsValid(spice_bind) and spice_ip_version != constants.IP6_VERSION): raise errors.HypervisorError("SPICE: Got an IPv6 address (%s), but" " the specified IP version is %s" % (spice_bind, spice_ip_version)) else: # All the other SPICE parameters depend on spice_bind being set. Raise an # error if any of them is set without it. for param in _SPICE_ADDITIONAL_PARAMS: if hvparams[param]: raise errors.HypervisorError("SPICE: %s requires %s to be set" % (param, constants.HV_KVM_SPICE_BIND)) @classmethod def ValidateParameters(cls, hvparams): """Check the given parameters for validity. @type hvparams: dict @param hvparams: dictionary with parameter names/value @raise errors.HypervisorError: when a parameter is not valid """ super(KVMHypervisor, cls).ValidateParameters(hvparams) kvm_path = hvparams[constants.HV_KVM_PATH] security_model = hvparams[constants.HV_SECURITY_MODEL] if security_model == constants.HT_SM_USER: username = hvparams[constants.HV_SECURITY_DOMAIN] try: pwd.getpwnam(username) except KeyError: raise errors.HypervisorError("Unknown security domain user %s" % username) vnc_bind_address = hvparams[constants.HV_VNC_BIND_ADDRESS] if vnc_bind_address: bound_to_addr = netutils.IP4Address.IsValid(vnc_bind_address) is_interface = netutils.IsValidInterface(vnc_bind_address) is_path = utils.IsNormAbsPath(vnc_bind_address) if not bound_to_addr and not is_interface and not is_path: raise errors.HypervisorError("VNC: The %s parameter must be either" " a valid IP address, an interface name," " or an absolute path" % constants.HV_KVM_SPICE_BIND) spice_bind = hvparams[constants.HV_KVM_SPICE_BIND] if spice_bind: # only one of VNC and SPICE can be used currently. if hvparams[constants.HV_VNC_BIND_ADDRESS]: raise errors.HypervisorError("Both SPICE and VNC are configured, but" " only one of them can be used at a" " given time") # check that KVM supports SPICE kvmhelp = cls._GetKVMOutput(kvm_path, cls._KVMOPT_HELP) if not cls._SPICE_RE.search(kvmhelp): raise errors.HypervisorError("SPICE is configured, but it is not" " supported according to 'kvm --help'") # if spice_bind is not an IP address, it must be a valid interface bound_to_addr = (netutils.IP4Address.IsValid(spice_bind) or netutils.IP6Address.IsValid(spice_bind)) if not bound_to_addr and not netutils.IsValidInterface(spice_bind): raise errors.HypervisorError("SPICE: The %s parameter must be either" " a valid IP address or interface name" % constants.HV_KVM_SPICE_BIND) machine_version = hvparams[constants.HV_KVM_MACHINE_VERSION] if machine_version: output = cls._GetKVMOutput(kvm_path, cls._KVMOPT_MLIST) if not cls._CHECK_MACHINE_VERSION_RE(machine_version).search(output): raise errors.HypervisorError("Unsupported machine version: %s" % machine_version) @classmethod def PowercycleNode(cls, hvparams=None): """KVM powercycle, just a wrapper over Linux powercycle. @type hvparams: dict of strings @param hvparams: hypervisor params to be used on this node """ cls.LinuxPowercycle() ganeti-2.9.3/lib/objects.py0000644000000000000000000020305512271422343015550 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Transportable objects for Ganeti. This module provides small, mostly data-only objects which are safe to pass to and from external parties. """ # pylint: disable=E0203,W0201,R0902 # E0203: Access to member %r before its definition, since we use # objects.py which doesn't explicitly initialise its members # W0201: Attribute '%s' defined outside __init__ # R0902: Allow instances of these objects to have more than 20 attributes import ConfigParser import re import copy import logging import time from cStringIO import StringIO from ganeti import errors from ganeti import constants from ganeti import netutils from ganeti import outils from ganeti import utils from socket import AF_INET __all__ = ["ConfigObject", "ConfigData", "NIC", "Disk", "Instance", "OS", "Node", "NodeGroup", "Cluster", "FillDict", "Network"] _TIMESTAMPS = ["ctime", "mtime"] _UUID = ["uuid"] def FillDict(defaults_dict, custom_dict, skip_keys=None): """Basic function to apply settings on top a default dict. @type defaults_dict: dict @param defaults_dict: dictionary holding the default values @type custom_dict: dict @param custom_dict: dictionary holding customized value @type skip_keys: list @param skip_keys: which keys not to fill @rtype: dict @return: dict with the 'full' values """ ret_dict = copy.deepcopy(defaults_dict) ret_dict.update(custom_dict) if skip_keys: for k in skip_keys: try: del ret_dict[k] except KeyError: pass return ret_dict def FillIPolicy(default_ipolicy, custom_ipolicy): """Fills an instance policy with defaults. """ assert frozenset(default_ipolicy.keys()) == constants.IPOLICY_ALL_KEYS ret_dict = copy.deepcopy(custom_ipolicy) for key in default_ipolicy: if key not in ret_dict: ret_dict[key] = copy.deepcopy(default_ipolicy[key]) elif key == constants.ISPECS_STD: ret_dict[key] = FillDict(default_ipolicy[key], ret_dict[key]) return ret_dict def FillDiskParams(default_dparams, custom_dparams, skip_keys=None): """Fills the disk parameter defaults. @see: L{FillDict} for parameters and return value """ assert frozenset(default_dparams.keys()) == constants.DISK_TEMPLATES return dict((dt, FillDict(default_dparams[dt], custom_dparams.get(dt, {}), skip_keys=skip_keys)) for dt in constants.DISK_TEMPLATES) def UpgradeGroupedParams(target, defaults): """Update all groups for the target parameter. @type target: dict of dicts @param target: {group: {parameter: value}} @type defaults: dict @param defaults: default parameter values """ if target is None: target = {constants.PP_DEFAULT: defaults} else: for group in target: target[group] = FillDict(defaults, target[group]) return target def UpgradeBeParams(target): """Update the be parameters dict to the new format. @type target: dict @param target: "be" parameters dict """ if constants.BE_MEMORY in target: memory = target[constants.BE_MEMORY] target[constants.BE_MAXMEM] = memory target[constants.BE_MINMEM] = memory del target[constants.BE_MEMORY] def UpgradeDiskParams(diskparams): """Upgrade the disk parameters. @type diskparams: dict @param diskparams: disk parameters to upgrade @rtype: dict @return: the upgraded disk parameters dict """ if not diskparams: result = {} else: result = FillDiskParams(constants.DISK_DT_DEFAULTS, diskparams) return result def UpgradeNDParams(ndparams): """Upgrade ndparams structure. @type ndparams: dict @param ndparams: disk parameters to upgrade @rtype: dict @return: the upgraded node parameters dict """ if ndparams is None: ndparams = {} if (constants.ND_OOB_PROGRAM in ndparams and ndparams[constants.ND_OOB_PROGRAM] is None): # will be reset by the line below del ndparams[constants.ND_OOB_PROGRAM] return FillDict(constants.NDC_DEFAULTS, ndparams) def MakeEmptyIPolicy(): """Create empty IPolicy dictionary. """ return {} class ConfigObject(outils.ValidatedSlots): """A generic config object. It has the following properties: - provides somewhat safe recursive unpickling and pickling for its classes - unset attributes which are defined in slots are always returned as None instead of raising an error Classes derived from this must always declare __slots__ (we use many config objects and the memory reduction is useful) """ __slots__ = [] def __getattr__(self, name): if name not in self.GetAllSlots(): raise AttributeError("Invalid object attribute %s.%s" % (type(self).__name__, name)) return None def __setstate__(self, state): slots = self.GetAllSlots() for name in state: if name in slots: setattr(self, name, state[name]) def Validate(self): """Validates the slots. """ def ToDict(self): """Convert to a dict holding only standard python types. The generic routine just dumps all of this object's attributes in a dict. It does not work if the class has children who are ConfigObjects themselves (e.g. the nics list in an Instance), in which case the object should subclass the function in order to make sure all objects returned are only standard python types. """ result = {} for name in self.GetAllSlots(): value = getattr(self, name, None) if value is not None: result[name] = value return result __getstate__ = ToDict @classmethod def FromDict(cls, val): """Create an object from a dictionary. This generic routine takes a dict, instantiates a new instance of the given class, and sets attributes based on the dict content. As for `ToDict`, this does not work if the class has children who are ConfigObjects themselves (e.g. the nics list in an Instance), in which case the object should subclass the function and alter the objects. """ if not isinstance(val, dict): raise errors.ConfigurationError("Invalid object passed to FromDict:" " expected dict, got %s" % type(val)) val_str = dict([(str(k), v) for k, v in val.iteritems()]) obj = cls(**val_str) # pylint: disable=W0142 return obj def Copy(self): """Makes a deep copy of the current object and its children. """ dict_form = self.ToDict() clone_obj = self.__class__.FromDict(dict_form) return clone_obj def __repr__(self): """Implement __repr__ for ConfigObjects.""" return repr(self.ToDict()) def UpgradeConfig(self): """Fill defaults for missing configuration values. This method will be called at configuration load time, and its implementation will be object dependent. """ pass class TaggableObject(ConfigObject): """An generic class supporting tags. """ __slots__ = ["tags"] VALID_TAG_RE = re.compile(r"^[\w.+*/:@-]+$") @classmethod def ValidateTag(cls, tag): """Check if a tag is valid. If the tag is invalid, an errors.TagError will be raised. The function has no return value. """ if not isinstance(tag, basestring): raise errors.TagError("Invalid tag type (not a string)") if len(tag) > constants.MAX_TAG_LEN: raise errors.TagError("Tag too long (>%d characters)" % constants.MAX_TAG_LEN) if not tag: raise errors.TagError("Tags cannot be empty") if not cls.VALID_TAG_RE.match(tag): raise errors.TagError("Tag contains invalid characters") def GetTags(self): """Return the tags list. """ tags = getattr(self, "tags", None) if tags is None: tags = self.tags = set() return tags def AddTag(self, tag): """Add a new tag. """ self.ValidateTag(tag) tags = self.GetTags() if len(tags) >= constants.MAX_TAGS_PER_OBJ: raise errors.TagError("Too many tags") self.GetTags().add(tag) def RemoveTag(self, tag): """Remove a tag. """ self.ValidateTag(tag) tags = self.GetTags() try: tags.remove(tag) except KeyError: raise errors.TagError("Tag not found") def ToDict(self): """Taggable-object-specific conversion to standard python types. This replaces the tags set with a list. """ bo = super(TaggableObject, self).ToDict() tags = bo.get("tags", None) if isinstance(tags, set): bo["tags"] = list(tags) return bo @classmethod def FromDict(cls, val): """Custom function for instances. """ obj = super(TaggableObject, cls).FromDict(val) if hasattr(obj, "tags") and isinstance(obj.tags, list): obj.tags = set(obj.tags) return obj class MasterNetworkParameters(ConfigObject): """Network configuration parameters for the master @ivar uuid: master nodes UUID @ivar ip: master IP @ivar netmask: master netmask @ivar netdev: master network device @ivar ip_family: master IP family """ __slots__ = [ "uuid", "ip", "netmask", "netdev", "ip_family", ] class ConfigData(ConfigObject): """Top-level config object.""" __slots__ = [ "version", "cluster", "nodes", "nodegroups", "instances", "networks", "serial_no", ] + _TIMESTAMPS def ToDict(self): """Custom function for top-level config data. This just replaces the list of instances, nodes and the cluster with standard python types. """ mydict = super(ConfigData, self).ToDict() mydict["cluster"] = mydict["cluster"].ToDict() for key in "nodes", "instances", "nodegroups", "networks": mydict[key] = outils.ContainerToDicts(mydict[key]) return mydict @classmethod def FromDict(cls, val): """Custom function for top-level config data """ obj = super(ConfigData, cls).FromDict(val) obj.cluster = Cluster.FromDict(obj.cluster) obj.nodes = outils.ContainerFromDicts(obj.nodes, dict, Node) obj.instances = \ outils.ContainerFromDicts(obj.instances, dict, Instance) obj.nodegroups = \ outils.ContainerFromDicts(obj.nodegroups, dict, NodeGroup) obj.networks = outils.ContainerFromDicts(obj.networks, dict, Network) return obj def HasAnyDiskOfType(self, dev_type): """Check if in there is at disk of the given type in the configuration. @type dev_type: L{constants.DTS_BLOCK} @param dev_type: the type to look for @rtype: boolean @return: boolean indicating if a disk of the given type was found or not """ for instance in self.instances.values(): for disk in instance.disks: if disk.IsBasedOnDiskType(dev_type): return True return False def UpgradeConfig(self): """Fill defaults for missing configuration values. """ self.cluster.UpgradeConfig() for node in self.nodes.values(): node.UpgradeConfig() for instance in self.instances.values(): instance.UpgradeConfig() self._UpgradeEnabledDiskTemplates() if self.nodegroups is None: self.nodegroups = {} for nodegroup in self.nodegroups.values(): nodegroup.UpgradeConfig() InstancePolicy.UpgradeDiskTemplates( nodegroup.ipolicy, self.cluster.enabled_disk_templates) if self.cluster.drbd_usermode_helper is None: # To decide if we set an helper let's check if at least one instance has # a DRBD disk. This does not cover all the possible scenarios but it # gives a good approximation. if self.HasAnyDiskOfType(constants.DT_DRBD8): self.cluster.drbd_usermode_helper = constants.DEFAULT_DRBD_HELPER if self.networks is None: self.networks = {} for network in self.networks.values(): network.UpgradeConfig() def _UpgradeEnabledDiskTemplates(self): """Upgrade the cluster's enabled disk templates by inspecting the currently enabled and/or used disk templates. """ if not self.cluster.enabled_disk_templates: template_set = \ set([inst.disk_template for inst in self.instances.values()]) # Add drbd and plain, if lvm is enabled (by specifying a volume group) if self.cluster.volume_group_name: template_set.add(constants.DT_DRBD8) template_set.add(constants.DT_PLAIN) # Set enabled_disk_templates to the inferred disk templates. Order them # according to a preference list that is based on Ganeti's history of # supported disk templates. self.cluster.enabled_disk_templates = [] for preferred_template in constants.DISK_TEMPLATE_PREFERENCE: if preferred_template in template_set: self.cluster.enabled_disk_templates.append(preferred_template) template_set.remove(preferred_template) self.cluster.enabled_disk_templates.extend(list(template_set)) InstancePolicy.UpgradeDiskTemplates( self.cluster.ipolicy, self.cluster.enabled_disk_templates) class NIC(ConfigObject): """Config object representing a network card.""" __slots__ = ["name", "mac", "ip", "network", "nicparams", "netinfo"] + _UUID @classmethod def CheckParameterSyntax(cls, nicparams): """Check the given parameters for validity. @type nicparams: dict @param nicparams: dictionary with parameter names/value @raise errors.ConfigurationError: when a parameter is not valid """ mode = nicparams[constants.NIC_MODE] if (mode not in constants.NIC_VALID_MODES and mode != constants.VALUE_AUTO): raise errors.ConfigurationError("Invalid NIC mode '%s'" % mode) if (mode == constants.NIC_MODE_BRIDGED and not nicparams[constants.NIC_LINK]): raise errors.ConfigurationError("Missing bridged NIC link") class Disk(ConfigObject): """Config object representing a block device.""" __slots__ = (["name", "dev_type", "logical_id", "physical_id", "children", "iv_name", "size", "mode", "params", "spindles"] + _UUID) def CreateOnSecondary(self): """Test if this device needs to be created on a secondary node.""" return self.dev_type in (constants.DT_DRBD8, constants.DT_PLAIN) def AssembleOnSecondary(self): """Test if this device needs to be assembled on a secondary node.""" return self.dev_type in (constants.DT_DRBD8, constants.DT_PLAIN) def OpenOnSecondary(self): """Test if this device needs to be opened on a secondary node.""" return self.dev_type in (constants.DT_PLAIN,) def StaticDevPath(self): """Return the device path if this device type has a static one. Some devices (LVM for example) live always at the same /dev/ path, irrespective of their status. For such devices, we return this path, for others we return None. @warning: The path returned is not a normalized pathname; callers should check that it is a valid path. """ if self.dev_type == constants.DT_PLAIN: return "/dev/%s/%s" % (self.logical_id[0], self.logical_id[1]) elif self.dev_type == constants.DT_BLOCK: return self.logical_id[1] elif self.dev_type == constants.DT_RBD: return "/dev/%s/%s" % (self.logical_id[0], self.logical_id[1]) return None def ChildrenNeeded(self): """Compute the needed number of children for activation. This method will return either -1 (all children) or a positive number denoting the minimum number of children needed for activation (only mirrored devices will usually return >=0). Currently, only DRBD8 supports diskless activation (therefore we return 0), for all other we keep the previous semantics and return -1. """ if self.dev_type == constants.DT_DRBD8: return 0 return -1 def IsBasedOnDiskType(self, dev_type): """Check if the disk or its children are based on the given type. @type dev_type: L{constants.DTS_BLOCK} @param dev_type: the type to look for @rtype: boolean @return: boolean indicating if a device of the given type was found or not """ if self.children: for child in self.children: if child.IsBasedOnDiskType(dev_type): return True return self.dev_type == dev_type def GetNodes(self, node_uuid): """This function returns the nodes this device lives on. Given the node on which the parent of the device lives on (or, in case of a top-level device, the primary node of the devices' instance), this function will return a list of nodes on which this devices needs to (or can) be assembled. """ if self.dev_type in [constants.DT_PLAIN, constants.DT_FILE, constants.DT_BLOCK, constants.DT_RBD, constants.DT_EXT, constants.DT_SHARED_FILE]: result = [node_uuid] elif self.dev_type in constants.DTS_DRBD: result = [self.logical_id[0], self.logical_id[1]] if node_uuid not in result: raise errors.ConfigurationError("DRBD device passed unknown node") else: raise errors.ProgrammerError("Unhandled device type %s" % self.dev_type) return result def ComputeNodeTree(self, parent_node_uuid): """Compute the node/disk tree for this disk and its children. This method, given the node on which the parent disk lives, will return the list of all (node UUID, disk) pairs which describe the disk tree in the most compact way. For example, a drbd/lvm stack will be returned as (primary_node, drbd) and (secondary_node, drbd) which represents all the top-level devices on the nodes. """ my_nodes = self.GetNodes(parent_node_uuid) result = [(node, self) for node in my_nodes] if not self.children: # leaf device return result for node in my_nodes: for child in self.children: child_result = child.ComputeNodeTree(node) if len(child_result) == 1: # child (and all its descendants) is simple, doesn't split # over multiple hosts, so we don't need to describe it, our # own entry for this node describes it completely continue else: # check if child nodes differ from my nodes; note that # subdisk can differ from the child itself, and be instead # one of its descendants for subnode, subdisk in child_result: if subnode not in my_nodes: result.append((subnode, subdisk)) # otherwise child is under our own node, so we ignore this # entry (but probably the other results in the list will # be different) return result def ComputeGrowth(self, amount): """Compute the per-VG growth requirements. This only works for VG-based disks. @type amount: integer @param amount: the desired increase in (user-visible) disk space @rtype: dict @return: a dictionary of volume-groups and the required size """ if self.dev_type == constants.DT_PLAIN: return {self.logical_id[0]: amount} elif self.dev_type == constants.DT_DRBD8: if self.children: return self.children[0].ComputeGrowth(amount) else: return {} else: # Other disk types do not require VG space return {} def RecordGrow(self, amount): """Update the size of this disk after growth. This method recurses over the disks's children and updates their size correspondigly. The method needs to be kept in sync with the actual algorithms from bdev. """ if self.dev_type in (constants.DT_PLAIN, constants.DT_FILE, constants.DT_RBD, constants.DT_EXT, constants.DT_SHARED_FILE): self.size += amount elif self.dev_type == constants.DT_DRBD8: if self.children: self.children[0].RecordGrow(amount) self.size += amount else: raise errors.ProgrammerError("Disk.RecordGrow called for unsupported" " disk type %s" % self.dev_type) def Update(self, size=None, mode=None, spindles=None): """Apply changes to size, spindles and mode. """ if self.dev_type == constants.DT_DRBD8: if self.children: self.children[0].Update(size=size, mode=mode) else: assert not self.children if size is not None: self.size = size if mode is not None: self.mode = mode if spindles is not None: self.spindles = spindles def UnsetSize(self): """Sets recursively the size to zero for the disk and its children. """ if self.children: for child in self.children: child.UnsetSize() self.size = 0 def SetPhysicalID(self, target_node_uuid, nodes_ip): """Convert the logical ID to the physical ID. This is used only for drbd, which needs ip/port configuration. The routine descends down and updates its children also, because this helps when the only the top device is passed to the remote node. Arguments: - target_node_uuid: the node UUID we wish to configure for - nodes_ip: a mapping of node name to ip The target_node must exist in in nodes_ip, and must be one of the nodes in the logical ID for each of the DRBD devices encountered in the disk tree. """ if self.children: for child in self.children: child.SetPhysicalID(target_node_uuid, nodes_ip) if self.logical_id is None and self.physical_id is not None: return if self.dev_type in constants.DTS_DRBD: pnode_uuid, snode_uuid, port, pminor, sminor, secret = self.logical_id if target_node_uuid not in (pnode_uuid, snode_uuid): raise errors.ConfigurationError("DRBD device not knowing node %s" % target_node_uuid) pnode_ip = nodes_ip.get(pnode_uuid, None) snode_ip = nodes_ip.get(snode_uuid, None) if pnode_ip is None or snode_ip is None: raise errors.ConfigurationError("Can't find primary or secondary node" " for %s" % str(self)) p_data = (pnode_ip, port) s_data = (snode_ip, port) if pnode_uuid == target_node_uuid: self.physical_id = p_data + s_data + (pminor, secret) else: # it must be secondary, we tested above self.physical_id = s_data + p_data + (sminor, secret) else: self.physical_id = self.logical_id return def ToDict(self): """Disk-specific conversion to standard python types. This replaces the children lists of objects with lists of standard python types. """ bo = super(Disk, self).ToDict() for attr in ("children",): alist = bo.get(attr, None) if alist: bo[attr] = outils.ContainerToDicts(alist) return bo @classmethod def FromDict(cls, val): """Custom function for Disks """ obj = super(Disk, cls).FromDict(val) if obj.children: obj.children = outils.ContainerFromDicts(obj.children, list, Disk) if obj.logical_id and isinstance(obj.logical_id, list): obj.logical_id = tuple(obj.logical_id) if obj.physical_id and isinstance(obj.physical_id, list): obj.physical_id = tuple(obj.physical_id) if obj.dev_type in constants.DTS_DRBD: # we need a tuple of length six here if len(obj.logical_id) < 6: obj.logical_id += (None,) * (6 - len(obj.logical_id)) return obj def __str__(self): """Custom str() formatter for disks. """ if self.dev_type == constants.DT_PLAIN: val = " parameters @rtype: list(dict) @return: a list of dicts, one for each node of the disk hierarchy. Each dict contains the LD parameters of the node. The tree is flattened in-order. """ if disk_template not in constants.DISK_TEMPLATES: raise errors.ProgrammerError("Unknown disk template %s" % disk_template) assert disk_template in disk_params result = list() dt_params = disk_params[disk_template] if disk_template == constants.DT_DRBD8: result.append(FillDict(constants.DISK_LD_DEFAULTS[constants.DT_DRBD8], { constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE], constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS], constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS], constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG], constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM], constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM], constants.LDP_PROTOCOL: dt_params[constants.DRBD_PROTOCOL], constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC], constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD], constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET], constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET], constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE], constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE], })) # data LV result.append(FillDict(constants.DISK_LD_DEFAULTS[constants.DT_PLAIN], { constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES], })) # metadata LV result.append(FillDict(constants.DISK_LD_DEFAULTS[constants.DT_PLAIN], { constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES], })) elif disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE): result.append(constants.DISK_LD_DEFAULTS[disk_template]) elif disk_template == constants.DT_PLAIN: result.append(FillDict(constants.DISK_LD_DEFAULTS[constants.DT_PLAIN], { constants.LDP_STRIPES: dt_params[constants.LV_STRIPES], })) elif disk_template == constants.DT_BLOCK: result.append(constants.DISK_LD_DEFAULTS[constants.DT_BLOCK]) elif disk_template == constants.DT_RBD: result.append(FillDict(constants.DISK_LD_DEFAULTS[constants.DT_RBD], { constants.LDP_POOL: dt_params[constants.RBD_POOL], })) elif disk_template == constants.DT_EXT: result.append(constants.DISK_LD_DEFAULTS[constants.DT_EXT]) return result class InstancePolicy(ConfigObject): """Config object representing instance policy limits dictionary. Note that this object is not actually used in the config, it's just used as a placeholder for a few functions. """ @classmethod def UpgradeDiskTemplates(cls, ipolicy, enabled_disk_templates): """Upgrades the ipolicy configuration.""" if constants.IPOLICY_DTS in ipolicy: if not set(ipolicy[constants.IPOLICY_DTS]).issubset( set(enabled_disk_templates)): ipolicy[constants.IPOLICY_DTS] = list( set(ipolicy[constants.IPOLICY_DTS]) & set(enabled_disk_templates)) @classmethod def CheckParameterSyntax(cls, ipolicy, check_std): """ Check the instance policy for validity. @type ipolicy: dict @param ipolicy: dictionary with min/max/std specs and policies @type check_std: bool @param check_std: Whether to check std value or just assume compliance @raise errors.ConfigurationError: when the policy is not legal """ InstancePolicy.CheckISpecSyntax(ipolicy, check_std) if constants.IPOLICY_DTS in ipolicy: InstancePolicy.CheckDiskTemplates(ipolicy[constants.IPOLICY_DTS]) for key in constants.IPOLICY_PARAMETERS: if key in ipolicy: InstancePolicy.CheckParameter(key, ipolicy[key]) wrong_keys = frozenset(ipolicy.keys()) - constants.IPOLICY_ALL_KEYS if wrong_keys: raise errors.ConfigurationError("Invalid keys in ipolicy: %s" % utils.CommaJoin(wrong_keys)) @classmethod def _CheckIncompleteSpec(cls, spec, keyname): missing_params = constants.ISPECS_PARAMETERS - frozenset(spec.keys()) if missing_params: msg = ("Missing instance specs parameters for %s: %s" % (keyname, utils.CommaJoin(missing_params))) raise errors.ConfigurationError(msg) @classmethod def CheckISpecSyntax(cls, ipolicy, check_std): """Check the instance policy specs for validity. @type ipolicy: dict @param ipolicy: dictionary with min/max/std specs @type check_std: bool @param check_std: Whether to check std value or just assume compliance @raise errors.ConfigurationError: when specs are not valid """ if constants.ISPECS_MINMAX not in ipolicy: # Nothing to check return if check_std and constants.ISPECS_STD not in ipolicy: msg = "Missing key in ipolicy: %s" % constants.ISPECS_STD raise errors.ConfigurationError(msg) stdspec = ipolicy.get(constants.ISPECS_STD) if check_std: InstancePolicy._CheckIncompleteSpec(stdspec, constants.ISPECS_STD) if not ipolicy[constants.ISPECS_MINMAX]: raise errors.ConfigurationError("Empty minmax specifications") std_is_good = False for minmaxspecs in ipolicy[constants.ISPECS_MINMAX]: missing = constants.ISPECS_MINMAX_KEYS - frozenset(minmaxspecs.keys()) if missing: msg = "Missing instance specification: %s" % utils.CommaJoin(missing) raise errors.ConfigurationError(msg) for (key, spec) in minmaxspecs.items(): InstancePolicy._CheckIncompleteSpec(spec, key) spec_std_ok = True for param in constants.ISPECS_PARAMETERS: par_std_ok = InstancePolicy._CheckISpecParamSyntax(minmaxspecs, stdspec, param, check_std) spec_std_ok = spec_std_ok and par_std_ok std_is_good = std_is_good or spec_std_ok if not std_is_good: raise errors.ConfigurationError("Invalid std specifications") @classmethod def _CheckISpecParamSyntax(cls, minmaxspecs, stdspec, name, check_std): """Check the instance policy specs for validity on a given key. We check if the instance specs makes sense for a given key, that is if minmaxspecs[min][name] <= stdspec[name] <= minmaxspec[max][name]. @type minmaxspecs: dict @param minmaxspecs: dictionary with min and max instance spec @type stdspec: dict @param stdspec: dictionary with standard instance spec @type name: string @param name: what are the limits for @type check_std: bool @param check_std: Whether to check std value or just assume compliance @rtype: bool @return: C{True} when specs are valid, C{False} when standard spec for the given name is not valid @raise errors.ConfigurationError: when min/max specs for the given name are not valid """ minspec = minmaxspecs[constants.ISPECS_MIN] maxspec = minmaxspecs[constants.ISPECS_MAX] min_v = minspec[name] max_v = maxspec[name] if min_v > max_v: err = ("Invalid specification of min/max values for %s: %s/%s" % (name, min_v, max_v)) raise errors.ConfigurationError(err) elif check_std: std_v = stdspec.get(name, min_v) return std_v >= min_v and std_v <= max_v else: return True @classmethod def CheckDiskTemplates(cls, disk_templates): """Checks the disk templates for validity. """ if not disk_templates: raise errors.ConfigurationError("Instance policy must contain" + " at least one disk template") wrong = frozenset(disk_templates).difference(constants.DISK_TEMPLATES) if wrong: raise errors.ConfigurationError("Invalid disk template(s) %s" % utils.CommaJoin(wrong)) @classmethod def CheckParameter(cls, key, value): """Checks a parameter. Currently we expect all parameters to be float values. """ try: float(value) except (TypeError, ValueError), err: raise errors.ConfigurationError("Invalid value for key" " '%s':" " '%s', error: %s" % (key, value, err)) class Instance(TaggableObject): """Config object representing an instance.""" __slots__ = [ "name", "primary_node", "os", "hypervisor", "hvparams", "beparams", "osparams", "admin_state", "nics", "disks", "disk_template", "disks_active", "network_port", "serial_no", ] + _TIMESTAMPS + _UUID def _ComputeSecondaryNodes(self): """Compute the list of secondary nodes. This is a simple wrapper over _ComputeAllNodes. """ all_nodes = set(self._ComputeAllNodes()) all_nodes.discard(self.primary_node) return tuple(all_nodes) secondary_nodes = property(_ComputeSecondaryNodes, None, None, "List of names of secondary nodes") def _ComputeAllNodes(self): """Compute the list of all nodes. Since the data is already there (in the drbd disks), keeping it as a separate normal attribute is redundant and if not properly synchronised can cause problems. Thus it's better to compute it dynamically. """ def _Helper(nodes, device): """Recursively computes nodes given a top device.""" if device.dev_type in constants.DTS_DRBD: nodea, nodeb = device.logical_id[:2] nodes.add(nodea) nodes.add(nodeb) if device.children: for child in device.children: _Helper(nodes, child) all_nodes = set() all_nodes.add(self.primary_node) for device in self.disks: _Helper(all_nodes, device) return tuple(all_nodes) all_nodes = property(_ComputeAllNodes, None, None, "List of names of all the nodes of the instance") def MapLVsByNode(self, lvmap=None, devs=None, node_uuid=None): """Provide a mapping of nodes to LVs this instance owns. This function figures out what logical volumes should belong on which nodes, recursing through a device tree. @type lvmap: dict @param lvmap: optional dictionary to receive the 'node' : ['lv', ...] data. @type devs: list of L{Disk} @param devs: disks to get the LV name for. If None, all disk of this instance are used. @type node_uuid: string @param node_uuid: UUID of the node to get the LV names for. If None, the primary node of this instance is used. @return: None if lvmap arg is given, otherwise, a dictionary of the form { 'node_uuid' : ['volume1', 'volume2', ...], ... }; volumeN is of the form "vg_name/lv_name", compatible with GetVolumeList() """ if node_uuid is None: node_uuid = self.primary_node if lvmap is None: lvmap = { node_uuid: [], } ret = lvmap else: if not node_uuid in lvmap: lvmap[node_uuid] = [] ret = None if not devs: devs = self.disks for dev in devs: if dev.dev_type == constants.DT_PLAIN: lvmap[node_uuid].append(dev.logical_id[0] + "/" + dev.logical_id[1]) elif dev.dev_type in constants.DTS_DRBD: if dev.children: self.MapLVsByNode(lvmap, dev.children, dev.logical_id[0]) self.MapLVsByNode(lvmap, dev.children, dev.logical_id[1]) elif dev.children: self.MapLVsByNode(lvmap, dev.children, node_uuid) return ret def FindDisk(self, idx): """Find a disk given having a specified index. This is just a wrapper that does validation of the index. @type idx: int @param idx: the disk index @rtype: L{Disk} @return: the corresponding disk @raise errors.OpPrereqError: when the given index is not valid """ try: idx = int(idx) return self.disks[idx] except (TypeError, ValueError), err: raise errors.OpPrereqError("Invalid disk index: '%s'" % str(err), errors.ECODE_INVAL) except IndexError: raise errors.OpPrereqError("Invalid disk index: %d (instace has disks" " 0 to %d" % (idx, len(self.disks) - 1), errors.ECODE_INVAL) def ToDict(self): """Instance-specific conversion to standard python types. This replaces the children lists of objects with lists of standard python types. """ bo = super(Instance, self).ToDict() for attr in "nics", "disks": alist = bo.get(attr, None) if alist: nlist = outils.ContainerToDicts(alist) else: nlist = [] bo[attr] = nlist return bo @classmethod def FromDict(cls, val): """Custom function for instances. """ if "admin_state" not in val: if val.get("admin_up", False): val["admin_state"] = constants.ADMINST_UP else: val["admin_state"] = constants.ADMINST_DOWN if "admin_up" in val: del val["admin_up"] obj = super(Instance, cls).FromDict(val) obj.nics = outils.ContainerFromDicts(obj.nics, list, NIC) obj.disks = outils.ContainerFromDicts(obj.disks, list, Disk) return obj def UpgradeConfig(self): """Fill defaults for missing configuration values. """ for nic in self.nics: nic.UpgradeConfig() for disk in self.disks: disk.UpgradeConfig() if self.hvparams: for key in constants.HVC_GLOBALS: try: del self.hvparams[key] except KeyError: pass if self.osparams is None: self.osparams = {} UpgradeBeParams(self.beparams) if self.disks_active is None: self.disks_active = self.admin_state == constants.ADMINST_UP class OS(ConfigObject): """Config object representing an operating system. @type supported_parameters: list @ivar supported_parameters: a list of tuples, name and description, containing the supported parameters by this OS @type VARIANT_DELIM: string @cvar VARIANT_DELIM: the variant delimiter """ __slots__ = [ "name", "path", "api_versions", "create_script", "export_script", "import_script", "rename_script", "verify_script", "supported_variants", "supported_parameters", ] VARIANT_DELIM = "+" @classmethod def SplitNameVariant(cls, name): """Splits the name into the proper name and variant. @param name: the OS (unprocessed) name @rtype: list @return: a list of two elements; if the original name didn't contain a variant, it's returned as an empty string """ nv = name.split(cls.VARIANT_DELIM, 1) if len(nv) == 1: nv.append("") return nv @classmethod def GetName(cls, name): """Returns the proper name of the os (without the variant). @param name: the OS (unprocessed) name """ return cls.SplitNameVariant(name)[0] @classmethod def GetVariant(cls, name): """Returns the variant the os (without the base name). @param name: the OS (unprocessed) name """ return cls.SplitNameVariant(name)[1] class ExtStorage(ConfigObject): """Config object representing an External Storage Provider. """ __slots__ = [ "name", "path", "create_script", "remove_script", "grow_script", "attach_script", "detach_script", "setinfo_script", "verify_script", "supported_parameters", ] class NodeHvState(ConfigObject): """Hypvervisor state on a node. @ivar mem_total: Total amount of memory @ivar mem_node: Memory used by, or reserved for, the node itself (not always available) @ivar mem_hv: Memory used by hypervisor or lost due to instance allocation rounding @ivar mem_inst: Memory used by instances living on node @ivar cpu_total: Total node CPU core count @ivar cpu_node: Number of CPU cores reserved for the node itself """ __slots__ = [ "mem_total", "mem_node", "mem_hv", "mem_inst", "cpu_total", "cpu_node", ] + _TIMESTAMPS class NodeDiskState(ConfigObject): """Disk state on a node. """ __slots__ = [ "total", "reserved", "overhead", ] + _TIMESTAMPS class Node(TaggableObject): """Config object representing a node. @ivar hv_state: Hypervisor state (e.g. number of CPUs) @ivar hv_state_static: Hypervisor state overriden by user @ivar disk_state: Disk state (e.g. free space) @ivar disk_state_static: Disk state overriden by user """ __slots__ = [ "name", "primary_ip", "secondary_ip", "serial_no", "master_candidate", "offline", "drained", "group", "master_capable", "vm_capable", "ndparams", "powered", "hv_state", "hv_state_static", "disk_state", "disk_state_static", ] + _TIMESTAMPS + _UUID def UpgradeConfig(self): """Fill defaults for missing configuration values. """ # pylint: disable=E0203 # because these are "defined" via slots, not manually if self.master_capable is None: self.master_capable = True if self.vm_capable is None: self.vm_capable = True if self.ndparams is None: self.ndparams = {} # And remove any global parameter for key in constants.NDC_GLOBALS: if key in self.ndparams: logging.warning("Ignoring %s node parameter for node %s", key, self.name) del self.ndparams[key] if self.powered is None: self.powered = True def ToDict(self): """Custom function for serializing. """ data = super(Node, self).ToDict() hv_state = data.get("hv_state", None) if hv_state is not None: data["hv_state"] = outils.ContainerToDicts(hv_state) disk_state = data.get("disk_state", None) if disk_state is not None: data["disk_state"] = \ dict((key, outils.ContainerToDicts(value)) for (key, value) in disk_state.items()) return data @classmethod def FromDict(cls, val): """Custom function for deserializing. """ obj = super(Node, cls).FromDict(val) if obj.hv_state is not None: obj.hv_state = \ outils.ContainerFromDicts(obj.hv_state, dict, NodeHvState) if obj.disk_state is not None: obj.disk_state = \ dict((key, outils.ContainerFromDicts(value, dict, NodeDiskState)) for (key, value) in obj.disk_state.items()) return obj class NodeGroup(TaggableObject): """Config object representing a node group.""" __slots__ = [ "name", "members", "ndparams", "diskparams", "ipolicy", "serial_no", "hv_state_static", "disk_state_static", "alloc_policy", "networks", ] + _TIMESTAMPS + _UUID def ToDict(self): """Custom function for nodegroup. This discards the members object, which gets recalculated and is only kept in memory. """ mydict = super(NodeGroup, self).ToDict() del mydict["members"] return mydict @classmethod def FromDict(cls, val): """Custom function for nodegroup. The members slot is initialized to an empty list, upon deserialization. """ obj = super(NodeGroup, cls).FromDict(val) obj.members = [] return obj def UpgradeConfig(self): """Fill defaults for missing configuration values. """ if self.ndparams is None: self.ndparams = {} if self.serial_no is None: self.serial_no = 1 if self.alloc_policy is None: self.alloc_policy = constants.ALLOC_POLICY_PREFERRED # We only update mtime, and not ctime, since we would not be able # to provide a correct value for creation time. if self.mtime is None: self.mtime = time.time() if self.diskparams is None: self.diskparams = {} if self.ipolicy is None: self.ipolicy = MakeEmptyIPolicy() if self.networks is None: self.networks = {} def FillND(self, node): """Return filled out ndparams for L{objects.Node} @type node: L{objects.Node} @param node: A Node object to fill @return a copy of the node's ndparams with defaults filled """ return self.SimpleFillND(node.ndparams) def SimpleFillND(self, ndparams): """Fill a given ndparams dict with defaults. @type ndparams: dict @param ndparams: the dict to fill @rtype: dict @return: a copy of the passed in ndparams with missing keys filled from the node group defaults """ return FillDict(self.ndparams, ndparams) class Cluster(TaggableObject): """Config object representing the cluster.""" __slots__ = [ "serial_no", "rsahostkeypub", "dsahostkeypub", "highest_used_port", "tcpudp_port_pool", "mac_prefix", "volume_group_name", "reserved_lvs", "drbd_usermode_helper", "default_bridge", "default_hypervisor", "master_node", "master_ip", "master_netdev", "master_netmask", "use_external_mip_script", "cluster_name", "file_storage_dir", "shared_file_storage_dir", "enabled_hypervisors", "hvparams", "ipolicy", "os_hvp", "beparams", "osparams", "nicparams", "ndparams", "diskparams", "candidate_pool_size", "modify_etc_hosts", "modify_ssh_setup", "maintain_node_health", "uid_pool", "default_iallocator", "hidden_os", "blacklisted_os", "primary_ip_family", "prealloc_wipe_disks", "hv_state_static", "disk_state_static", "enabled_disk_templates", ] + _TIMESTAMPS + _UUID def UpgradeConfig(self): """Fill defaults for missing configuration values. """ # pylint: disable=E0203 # because these are "defined" via slots, not manually if self.hvparams is None: self.hvparams = constants.HVC_DEFAULTS else: for hypervisor in constants.HYPER_TYPES: try: existing_params = self.hvparams[hypervisor] except KeyError: existing_params = {} self.hvparams[hypervisor] = FillDict( constants.HVC_DEFAULTS[hypervisor], existing_params) if self.os_hvp is None: self.os_hvp = {} # osparams added before 2.2 if self.osparams is None: self.osparams = {} self.ndparams = UpgradeNDParams(self.ndparams) self.beparams = UpgradeGroupedParams(self.beparams, constants.BEC_DEFAULTS) for beparams_group in self.beparams: UpgradeBeParams(self.beparams[beparams_group]) migrate_default_bridge = not self.nicparams self.nicparams = UpgradeGroupedParams(self.nicparams, constants.NICC_DEFAULTS) if migrate_default_bridge: self.nicparams[constants.PP_DEFAULT][constants.NIC_LINK] = \ self.default_bridge if self.modify_etc_hosts is None: self.modify_etc_hosts = True if self.modify_ssh_setup is None: self.modify_ssh_setup = True # default_bridge is no longer used in 2.1. The slot is left there to # support auto-upgrading. It can be removed once we decide to deprecate # upgrading straight from 2.0. if self.default_bridge is not None: self.default_bridge = None # default_hypervisor is just the first enabled one in 2.1. This slot and # code can be removed once upgrading straight from 2.0 is deprecated. if self.default_hypervisor is not None: self.enabled_hypervisors = ([self.default_hypervisor] + [hvname for hvname in self.enabled_hypervisors if hvname != self.default_hypervisor]) self.default_hypervisor = None # maintain_node_health added after 2.1.1 if self.maintain_node_health is None: self.maintain_node_health = False if self.uid_pool is None: self.uid_pool = [] if self.default_iallocator is None: self.default_iallocator = "" # reserved_lvs added before 2.2 if self.reserved_lvs is None: self.reserved_lvs = [] # hidden and blacklisted operating systems added before 2.2.1 if self.hidden_os is None: self.hidden_os = [] if self.blacklisted_os is None: self.blacklisted_os = [] # primary_ip_family added before 2.3 if self.primary_ip_family is None: self.primary_ip_family = AF_INET if self.master_netmask is None: ipcls = netutils.IPAddress.GetClassFromIpFamily(self.primary_ip_family) self.master_netmask = ipcls.iplen if self.prealloc_wipe_disks is None: self.prealloc_wipe_disks = False # shared_file_storage_dir added before 2.5 if self.shared_file_storage_dir is None: self.shared_file_storage_dir = "" if self.use_external_mip_script is None: self.use_external_mip_script = False if self.diskparams: self.diskparams = UpgradeDiskParams(self.diskparams) else: self.diskparams = constants.DISK_DT_DEFAULTS.copy() # instance policy added before 2.6 if self.ipolicy is None: self.ipolicy = FillIPolicy(constants.IPOLICY_DEFAULTS, {}) else: # we can either make sure to upgrade the ipolicy always, or only # do it in some corner cases (e.g. missing keys); note that this # will break any removal of keys from the ipolicy dict wrongkeys = frozenset(self.ipolicy.keys()) - constants.IPOLICY_ALL_KEYS if wrongkeys: # These keys would be silently removed by FillIPolicy() msg = ("Cluster instance policy contains spurious keys: %s" % utils.CommaJoin(wrongkeys)) raise errors.ConfigurationError(msg) self.ipolicy = FillIPolicy(constants.IPOLICY_DEFAULTS, self.ipolicy) @property def primary_hypervisor(self): """The first hypervisor is the primary. Useful, for example, for L{Node}'s hv/disk state. """ return self.enabled_hypervisors[0] def ToDict(self): """Custom function for cluster. """ mydict = super(Cluster, self).ToDict() if self.tcpudp_port_pool is None: tcpudp_port_pool = [] else: tcpudp_port_pool = list(self.tcpudp_port_pool) mydict["tcpudp_port_pool"] = tcpudp_port_pool return mydict @classmethod def FromDict(cls, val): """Custom function for cluster. """ obj = super(Cluster, cls).FromDict(val) if obj.tcpudp_port_pool is None: obj.tcpudp_port_pool = set() elif not isinstance(obj.tcpudp_port_pool, set): obj.tcpudp_port_pool = set(obj.tcpudp_port_pool) return obj def SimpleFillDP(self, diskparams): """Fill a given diskparams dict with cluster defaults. @param diskparams: The diskparams @return: The defaults dict """ return FillDiskParams(self.diskparams, diskparams) def GetHVDefaults(self, hypervisor, os_name=None, skip_keys=None): """Get the default hypervisor parameters for the cluster. @param hypervisor: the hypervisor name @param os_name: if specified, we'll also update the defaults for this OS @param skip_keys: if passed, list of keys not to use @return: the defaults dict """ if skip_keys is None: skip_keys = [] fill_stack = [self.hvparams.get(hypervisor, {})] if os_name is not None: os_hvp = self.os_hvp.get(os_name, {}).get(hypervisor, {}) fill_stack.append(os_hvp) ret_dict = {} for o_dict in fill_stack: ret_dict = FillDict(ret_dict, o_dict, skip_keys=skip_keys) return ret_dict def SimpleFillHV(self, hv_name, os_name, hvparams, skip_globals=False): """Fill a given hvparams dict with cluster defaults. @type hv_name: string @param hv_name: the hypervisor to use @type os_name: string @param os_name: the OS to use for overriding the hypervisor defaults @type skip_globals: boolean @param skip_globals: if True, the global hypervisor parameters will not be filled @rtype: dict @return: a copy of the given hvparams with missing keys filled from the cluster defaults """ if skip_globals: skip_keys = constants.HVC_GLOBALS else: skip_keys = [] def_dict = self.GetHVDefaults(hv_name, os_name, skip_keys=skip_keys) return FillDict(def_dict, hvparams, skip_keys=skip_keys) def FillHV(self, instance, skip_globals=False): """Fill an instance's hvparams dict with cluster defaults. @type instance: L{objects.Instance} @param instance: the instance parameter to fill @type skip_globals: boolean @param skip_globals: if True, the global hypervisor parameters will not be filled @rtype: dict @return: a copy of the instance's hvparams with missing keys filled from the cluster defaults """ return self.SimpleFillHV(instance.hypervisor, instance.os, instance.hvparams, skip_globals) def SimpleFillBE(self, beparams): """Fill a given beparams dict with cluster defaults. @type beparams: dict @param beparams: the dict to fill @rtype: dict @return: a copy of the passed in beparams with missing keys filled from the cluster defaults """ return FillDict(self.beparams.get(constants.PP_DEFAULT, {}), beparams) def FillBE(self, instance): """Fill an instance's beparams dict with cluster defaults. @type instance: L{objects.Instance} @param instance: the instance parameter to fill @rtype: dict @return: a copy of the instance's beparams with missing keys filled from the cluster defaults """ return self.SimpleFillBE(instance.beparams) def SimpleFillNIC(self, nicparams): """Fill a given nicparams dict with cluster defaults. @type nicparams: dict @param nicparams: the dict to fill @rtype: dict @return: a copy of the passed in nicparams with missing keys filled from the cluster defaults """ return FillDict(self.nicparams.get(constants.PP_DEFAULT, {}), nicparams) def SimpleFillOS(self, os_name, os_params): """Fill an instance's osparams dict with cluster defaults. @type os_name: string @param os_name: the OS name to use @type os_params: dict @param os_params: the dict to fill with default values @rtype: dict @return: a copy of the instance's osparams with missing keys filled from the cluster defaults """ name_only = os_name.split("+", 1)[0] # base OS result = self.osparams.get(name_only, {}) # OS with variant result = FillDict(result, self.osparams.get(os_name, {})) # specified params return FillDict(result, os_params) @staticmethod def SimpleFillHvState(hv_state): """Fill an hv_state sub dict with cluster defaults. """ return FillDict(constants.HVST_DEFAULTS, hv_state) @staticmethod def SimpleFillDiskState(disk_state): """Fill an disk_state sub dict with cluster defaults. """ return FillDict(constants.DS_DEFAULTS, disk_state) def FillND(self, node, nodegroup): """Return filled out ndparams for L{objects.NodeGroup} and L{objects.Node} @type node: L{objects.Node} @param node: A Node object to fill @type nodegroup: L{objects.NodeGroup} @param nodegroup: A Node object to fill @return a copy of the node's ndparams with defaults filled """ return self.SimpleFillND(nodegroup.FillND(node)) def SimpleFillND(self, ndparams): """Fill a given ndparams dict with defaults. @type ndparams: dict @param ndparams: the dict to fill @rtype: dict @return: a copy of the passed in ndparams with missing keys filled from the cluster defaults """ return FillDict(self.ndparams, ndparams) def SimpleFillIPolicy(self, ipolicy): """ Fill instance policy dict with defaults. @type ipolicy: dict @param ipolicy: the dict to fill @rtype: dict @return: a copy of passed ipolicy with missing keys filled from the cluster defaults """ return FillIPolicy(self.ipolicy, ipolicy) def IsDiskTemplateEnabled(self, disk_template): """Checks if a particular disk template is enabled. """ return utils.storage.IsDiskTemplateEnabled( disk_template, self.enabled_disk_templates) def IsFileStorageEnabled(self): """Checks if file storage is enabled. """ return utils.storage.IsFileStorageEnabled(self.enabled_disk_templates) def IsSharedFileStorageEnabled(self): """Checks if shared file storage is enabled. """ return utils.storage.IsSharedFileStorageEnabled( self.enabled_disk_templates) class BlockDevStatus(ConfigObject): """Config object representing the status of a block device.""" __slots__ = [ "dev_path", "major", "minor", "sync_percent", "estimated_time", "is_degraded", "ldisk_status", ] class ImportExportStatus(ConfigObject): """Config object representing the status of an import or export.""" __slots__ = [ "recent_output", "listen_port", "connected", "progress_mbytes", "progress_throughput", "progress_eta", "progress_percent", "exit_status", "error_message", ] + _TIMESTAMPS class ImportExportOptions(ConfigObject): """Options for import/export daemon @ivar key_name: X509 key name (None for cluster certificate) @ivar ca_pem: Remote peer CA in PEM format (None for cluster certificate) @ivar compress: Compression method (one of L{constants.IEC_ALL}) @ivar magic: Used to ensure the connection goes to the right disk @ivar ipv6: Whether to use IPv6 @ivar connect_timeout: Number of seconds for establishing connection """ __slots__ = [ "key_name", "ca_pem", "compress", "magic", "ipv6", "connect_timeout", ] class ConfdRequest(ConfigObject): """Object holding a confd request. @ivar protocol: confd protocol version @ivar type: confd query type @ivar query: query request @ivar rsalt: requested reply salt """ __slots__ = [ "protocol", "type", "query", "rsalt", ] class ConfdReply(ConfigObject): """Object holding a confd reply. @ivar protocol: confd protocol version @ivar status: reply status code (ok, error) @ivar answer: confd query reply @ivar serial: configuration serial number """ __slots__ = [ "protocol", "status", "answer", "serial", ] class QueryFieldDefinition(ConfigObject): """Object holding a query field definition. @ivar name: Field name @ivar title: Human-readable title @ivar kind: Field type @ivar doc: Human-readable description """ __slots__ = [ "name", "title", "kind", "doc", ] class _QueryResponseBase(ConfigObject): __slots__ = [ "fields", ] def ToDict(self): """Custom function for serializing. """ mydict = super(_QueryResponseBase, self).ToDict() mydict["fields"] = outils.ContainerToDicts(mydict["fields"]) return mydict @classmethod def FromDict(cls, val): """Custom function for de-serializing. """ obj = super(_QueryResponseBase, cls).FromDict(val) obj.fields = \ outils.ContainerFromDicts(obj.fields, list, QueryFieldDefinition) return obj class QueryResponse(_QueryResponseBase): """Object holding the response to a query. @ivar fields: List of L{QueryFieldDefinition} objects @ivar data: Requested data """ __slots__ = [ "data", ] class QueryFieldsRequest(ConfigObject): """Object holding a request for querying available fields. """ __slots__ = [ "what", "fields", ] class QueryFieldsResponse(_QueryResponseBase): """Object holding the response to a query for fields. @ivar fields: List of L{QueryFieldDefinition} objects """ __slots__ = [] class MigrationStatus(ConfigObject): """Object holding the status of a migration. """ __slots__ = [ "status", "transferred_ram", "total_ram", ] class InstanceConsole(ConfigObject): """Object describing how to access the console of an instance. """ __slots__ = [ "instance", "kind", "message", "host", "port", "user", "command", "display", ] def Validate(self): """Validates contents of this object. """ assert self.kind in constants.CONS_ALL, "Unknown console type" assert self.instance, "Missing instance name" assert self.message or self.kind in [constants.CONS_SSH, constants.CONS_SPICE, constants.CONS_VNC] assert self.host or self.kind == constants.CONS_MESSAGE assert self.port or self.kind in [constants.CONS_MESSAGE, constants.CONS_SSH] assert self.user or self.kind in [constants.CONS_MESSAGE, constants.CONS_SPICE, constants.CONS_VNC] assert self.command or self.kind in [constants.CONS_MESSAGE, constants.CONS_SPICE, constants.CONS_VNC] assert self.display or self.kind in [constants.CONS_MESSAGE, constants.CONS_SPICE, constants.CONS_SSH] return True class Network(TaggableObject): """Object representing a network definition for ganeti. """ __slots__ = [ "name", "serial_no", "mac_prefix", "network", "network6", "gateway", "gateway6", "reservations", "ext_reservations", ] + _TIMESTAMPS + _UUID def HooksDict(self, prefix=""): """Export a dictionary used by hooks with a network's information. @type prefix: String @param prefix: Prefix to prepend to the dict entries """ result = { "%sNETWORK_NAME" % prefix: self.name, "%sNETWORK_UUID" % prefix: self.uuid, "%sNETWORK_TAGS" % prefix: " ".join(self.GetTags()), } if self.network: result["%sNETWORK_SUBNET" % prefix] = self.network if self.gateway: result["%sNETWORK_GATEWAY" % prefix] = self.gateway if self.network6: result["%sNETWORK_SUBNET6" % prefix] = self.network6 if self.gateway6: result["%sNETWORK_GATEWAY6" % prefix] = self.gateway6 if self.mac_prefix: result["%sNETWORK_MAC_PREFIX" % prefix] = self.mac_prefix return result @classmethod def FromDict(cls, val): """Custom function for networks. Remove deprecated network_type and family. """ if "network_type" in val: del val["network_type"] if "family" in val: del val["family"] obj = super(Network, cls).FromDict(val) return obj class SerializableConfigParser(ConfigParser.SafeConfigParser): """Simple wrapper over ConfigParse that allows serialization. This class is basically ConfigParser.SafeConfigParser with two additional methods that allow it to serialize/unserialize to/from a buffer. """ def Dumps(self): """Dump this instance and return the string representation.""" buf = StringIO() self.write(buf) return buf.getvalue() @classmethod def Loads(cls, data): """Load data from a string.""" buf = StringIO(data) cfp = cls() cfp.readfp(buf) return cfp class LvmPvInfo(ConfigObject): """Information about an LVM physical volume (PV). @type name: string @ivar name: name of the PV @type vg_name: string @ivar vg_name: name of the volume group containing the PV @type size: float @ivar size: size of the PV in MiB @type free: float @ivar free: free space in the PV, in MiB @type attributes: string @ivar attributes: PV attributes @type lv_list: list of strings @ivar lv_list: names of the LVs hosted on the PV """ __slots__ = [ "name", "vg_name", "size", "free", "attributes", "lv_list" ] def IsEmpty(self): """Is this PV empty? """ return self.size <= (self.free + 1) def IsAllocatable(self): """Is this PV allocatable? """ return ("a" in self.attributes) ganeti-2.9.3/lib/errors.py0000644000000000000000000002627412271422343015441 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Ganeti exception handling. """ from ganeti import compat # OpPrereqError failure types #: Resolver errors ECODE_RESOLVER = "resolver_error" #: Not enough resources (iallocator failure, disk space, memory, etc.) ECODE_NORES = "insufficient_resources" #: Temporarily out of resources; operation can be tried again ECODE_TEMP_NORES = "temp_insufficient_resources" #: Wrong arguments (at syntax level) ECODE_INVAL = "wrong_input" #: Wrong entity state ECODE_STATE = "wrong_state" #: Entity not found ECODE_NOENT = "unknown_entity" #: Entity already exists ECODE_EXISTS = "already_exists" #: Resource not unique (e.g. MAC or IP duplication) ECODE_NOTUNIQUE = "resource_not_unique" #: Internal cluster error ECODE_FAULT = "internal_error" #: Environment error (e.g. node disk error) ECODE_ENVIRON = "environment_error" #: List of all failure types ECODE_ALL = compat.UniqueFrozenset([ ECODE_RESOLVER, ECODE_NORES, ECODE_TEMP_NORES, ECODE_INVAL, ECODE_STATE, ECODE_NOENT, ECODE_EXISTS, ECODE_NOTUNIQUE, ECODE_FAULT, ECODE_ENVIRON, ]) class GenericError(Exception): """Base exception for Ganeti. """ class LockError(GenericError): """Lock error exception. This signifies problems in the locking subsystem. """ class PidFileLockError(LockError): """PID file is already locked by another process. """ class HypervisorError(GenericError): """Hypervisor-related exception. This is raised in case we can't communicate with the hypervisor properly. """ class ProgrammerError(GenericError): """Programming-related error. This is raised in cases we determine that the calling conventions have been violated, meaning we got some desynchronisation between parts of our code. It signifies a real programming bug. """ class BlockDeviceError(GenericError): """Block-device related exception. This is raised in case we can't setup the instance's block devices properly. """ class ConfigurationError(GenericError): """Configuration related exception. Things like having an instance with a primary node that doesn't exist in the config or such raise this exception. """ class ConfigVersionMismatch(ConfigurationError): """Version mismatch in the configuration file. The error has two arguments: the expected and the actual found version. """ class AddressPoolError(GenericError): """Errors related to IP address pools. """ class ReservationError(GenericError): """Errors reserving a resource. """ class RemoteError(GenericError): """Programming-related error on remote call. This is raised when an unhandled error occurs in a call to a remote node. It usually signifies a real programming bug. """ class SignatureError(GenericError): """Error authenticating a remote message. This is raised when the hmac signature on a message doesn't verify correctly to the message itself. It can happen because of network unreliability or because of spurious traffic. """ class ParameterError(GenericError): """A passed parameter to a command is invalid. This is raised when the parameter passed to a request function is invalid. Correct code should have verified this before passing the request structure. The argument to this exception should be the parameter name. """ class ResultValidationError(GenericError): """The iallocation results fails validation. """ class OpPrereqError(GenericError): """Prerequisites for the OpCode are not fulfilled. This exception has two arguments: an error message, and one of the ECODE_* codes. """ class OpExecError(GenericError): """Error during OpCode execution. """ class OpResultError(GenericError): """Issue with OpCode result. """ class DeviceCreationError(GenericError): """Error during the creation of a device. This exception should contain the list of the devices actually created up to now, in the form of pairs (node, device) """ def __init__(self, message, created_devices): GenericError.__init__(self) self.message = message self.created_devices = created_devices def __str__(self): return self.message class OpCodeUnknown(GenericError): """Unknown opcode submitted. This signifies a mismatch between the definitions on the client and server side. """ class JobLost(GenericError): """Submitted job lost. The job was submitted but it cannot be found in the current job list. """ class JobFileCorrupted(GenericError): """Job file could not be properly decoded/restored. """ class ResolverError(GenericError): """Host name cannot be resolved. This is not a normal situation for Ganeti, as we rely on having a working resolver. The non-resolvable hostname is available as the first element of the args tuple; the other two elements of the tuple are the first two args of the socket.gaierror exception (error code and description). """ class HooksFailure(GenericError): """A generic hook failure. This signifies usually a setup misconfiguration. """ class HooksAbort(HooksFailure): """A required hook has failed. This caused an abort of the operation in the initial phase. This exception always has an attribute args which is a list of tuples of: - node: the source node on which this hooks has failed - script: the name of the script which aborted the run """ class UnitParseError(GenericError): """Unable to parse size unit. """ class ParseError(GenericError): """Generic parse error. Raised when unable to parse user input. """ class TypeEnforcementError(GenericError): """Unable to enforce data type. """ class X509CertError(GenericError): """Invalid X509 certificate. This error has two arguments: the certificate filename and the error cause. """ class TagError(GenericError): """Generic tag error. The argument to this exception will show the exact error. """ class CommandError(GenericError): """External command error. """ class StorageError(GenericError): """Storage-related exception. """ class InotifyError(GenericError): """Error raised when there is a failure setting up an inotify watcher. """ class QuitGanetiException(Exception): """Signal Ganeti that it must quit. This is not necessarily an error (and thus not a subclass of GenericError), but it's an exceptional circumstance and it is thus treated. This exception should be instantiated with two values. The first one will specify the return code to the caller, and the second one will be the returned result (either as an error or as a normal result). Usually only the leave cluster rpc call should return status True (as there it's expected we quit), every other call will return status False (as a critical error was encountered). Examples:: # Return a result of "True" to the caller, but quit ganeti afterwards raise QuitGanetiException(True, None) # Send an error to the caller, and quit ganeti raise QuitGanetiException(False, "Fatal safety violation, shutting down") """ class JobQueueError(GenericError): """Job queue error. """ class JobQueueDrainError(JobQueueError): """Job queue is marked for drain error. This is raised when a job submission attempt is made but the queue is marked for drain. """ class JobQueueFull(JobQueueError): """Job queue full error. Raised when job queue size reached its hard limit. """ class ConfdMagicError(GenericError): """A magic fourcc error in Ganeti confd. Errors processing the fourcc in ganeti confd datagrams. """ class ConfdClientError(GenericError): """A magic fourcc error in Ganeti confd. Errors in the confd client library. """ class UdpDataSizeError(GenericError): """UDP payload too big. """ class NoCtypesError(GenericError): """python ctypes module is not found in the system. """ class IPAddressError(GenericError): """Generic IP address error. """ class LuxiError(GenericError): """LUXI error. """ class QueryFilterParseError(ParseError): """Error while parsing query filter. This exception must be instantiated with two values. The first one is a string with an error description, the second one is an instance of a subclass of C{pyparsing.ParseBaseException} (used to display the exact error location). """ def GetDetails(self): """Returns a list of strings with details about the error. """ try: (_, inner) = self.args except IndexError: return None return [str(inner.line), (" " * (inner.column - 1)) + "^", str(inner)] class RapiTestResult(GenericError): """Exception containing results from RAPI test utilities. """ class FileStoragePathError(GenericError): """Error from file storage path validation. """ # errors should be added above def GetErrorClass(name): """Return the class of an exception. Given the class name, return the class itself. @type name: str @param name: the exception name @rtype: class @return: the actual class, or None if not found """ item = globals().get(name, None) if item is not None: if not (isinstance(item, type(Exception)) and issubclass(item, GenericError)): item = None return item def EncodeException(err): """Encodes an exception into a format that L{MaybeRaise} will recognise. The passed L{err} argument will be formatted as a tuple (exception name, arguments) that the MaybeRaise function will recognise. @type err: GenericError child @param err: usually a child of GenericError (but any exception will be accepted) @rtype: tuple @return: tuple of (exception name, exception arguments) """ return (err.__class__.__name__, err.args) def GetEncodedError(result): """If this looks like an encoded Ganeti exception, return it. This function tries to parse the passed argument and if it looks like an encoding done by EncodeException, it will return the class object and arguments. """ tlt = (tuple, list) if (isinstance(result, tlt) and len(result) == 2 and isinstance(result[1], tlt)): # custom ganeti errors errcls = GetErrorClass(result[0]) if errcls: return (errcls, tuple(result[1])) return None def MaybeRaise(result): """If this looks like an encoded Ganeti exception, raise it. This function tries to parse the passed argument and if it looks like an encoding done by EncodeException, it will re-raise it. """ error = GetEncodedError(result) if error: (errcls, args) = error # pylint: disable=W0142 raise errcls(*args) ganeti-2.9.3/lib/ovf.py0000644000000000000000000020404512267470014014714 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Converter tools between ovf and ganeti config file """ # pylint: disable=F0401, E1101 # F0401 because ElementTree is not default for python 2.4 # E1101 makes no sense - pylint assumes that ElementTree object is a tuple import ConfigParser import errno import logging import os import os.path import re import shutil import tarfile import tempfile import xml.dom.minidom import xml.parsers.expat try: import xml.etree.ElementTree as ET except ImportError: import elementtree.ElementTree as ET try: ParseError = ET.ParseError # pylint: disable=E1103 except AttributeError: ParseError = None from ganeti import constants from ganeti import errors from ganeti import utils from ganeti import pathutils # Schemas used in OVF format GANETI_SCHEMA = "http://ganeti" OVF_SCHEMA = "http://schemas.dmtf.org/ovf/envelope/1" RASD_SCHEMA = ("http://schemas.dmtf.org/wbem/wscim/1/cim-schema/2/" "CIM_ResourceAllocationSettingData") VSSD_SCHEMA = ("http://schemas.dmtf.org/wbem/wscim/1/cim-schema/2/" "CIM_VirtualSystemSettingData") XML_SCHEMA = "http://www.w3.org/2001/XMLSchema-instance" # File extensions in OVF package OVA_EXT = ".ova" OVF_EXT = ".ovf" MF_EXT = ".mf" CERT_EXT = ".cert" COMPRESSION_EXT = ".gz" FILE_EXTENSIONS = [ OVF_EXT, MF_EXT, CERT_EXT, ] COMPRESSION_TYPE = "gzip" NO_COMPRESSION = [None, "identity"] COMPRESS = "compression" DECOMPRESS = "decompression" ALLOWED_ACTIONS = [COMPRESS, DECOMPRESS] VMDK = "vmdk" RAW = "raw" COW = "cow" ALLOWED_FORMATS = [RAW, COW, VMDK] # ResourceType values RASD_TYPE = { "vcpus": "3", "memory": "4", "scsi-controller": "6", "ethernet-adapter": "10", "disk": "17", } SCSI_SUBTYPE = "lsilogic" VS_TYPE = { "ganeti": "ganeti-ovf", "external": "vmx-04", } # AllocationUnits values and conversion ALLOCATION_UNITS = { "b": ["bytes", "b"], "kb": ["kilobytes", "kb", "byte * 2^10", "kibibytes", "kib"], "mb": ["megabytes", "mb", "byte * 2^20", "mebibytes", "mib"], "gb": ["gigabytes", "gb", "byte * 2^30", "gibibytes", "gib"], } CONVERT_UNITS_TO_MB = { "b": lambda x: x / (1024 * 1024), "kb": lambda x: x / 1024, "mb": lambda x: x, "gb": lambda x: x * 1024, } # Names of the config fields NAME = "name" OS = "os" HYPERV = "hypervisor" VCPUS = "vcpus" MEMORY = "memory" AUTO_BALANCE = "auto_balance" DISK_TEMPLATE = "disk_template" TAGS = "tags" VERSION = "version" # Instance IDs of System and SCSI controller INSTANCE_ID = { "system": 0, "vcpus": 1, "memory": 2, "scsi": 3, } # Disk format descriptions DISK_FORMAT = { RAW: "http://en.wikipedia.org/wiki/Byte", VMDK: "http://www.vmware.com/interfaces/specifications/vmdk.html" "#monolithicSparse", COW: "http://www.gnome.org/~markmc/qcow-image-format.html", } def CheckQemuImg(): """ Make sure that qemu-img is present before performing operations. @raise errors.OpPrereqError: when qemu-img was not found in the system """ if not constants.QEMUIMG_PATH: raise errors.OpPrereqError("qemu-img not found at build time, unable" " to continue", errors.ECODE_STATE) def LinkFile(old_path, prefix=None, suffix=None, directory=None): """Create link with a given prefix and suffix. This is a wrapper over os.link. It tries to create a hard link for given file, but instead of rising error when file exists, the function changes the name a little bit. @type old_path:string @param old_path: path to the file that is to be linked @type prefix: string @param prefix: prefix of filename for the link @type suffix: string @param suffix: suffix of the filename for the link @type directory: string @param directory: directory of the link @raise errors.OpPrereqError: when error on linking is different than "File exists" """ assert(prefix is not None or suffix is not None) if directory is None: directory = os.getcwd() new_path = utils.PathJoin(directory, "%s%s" % (prefix, suffix)) counter = 1 while True: try: os.link(old_path, new_path) break except OSError, err: if err.errno == errno.EEXIST: new_path = utils.PathJoin(directory, "%s_%s%s" % (prefix, counter, suffix)) counter += 1 else: raise errors.OpPrereqError("Error moving the file %s to %s location:" " %s" % (old_path, new_path, err), errors.ECODE_ENVIRON) return new_path class OVFReader(object): """Reader class for OVF files. @type files_list: list @ivar files_list: list of files in the OVF package @type tree: ET.ElementTree @ivar tree: XML tree of the .ovf file @type schema_name: string @ivar schema_name: name of the .ovf file @type input_dir: string @ivar input_dir: directory in which the .ovf file resides """ def __init__(self, input_path): """Initialiaze the reader - load the .ovf file to XML parser. It is assumed that names of manifesto (.mf), certificate (.cert) and ovf files are the same. In order to account any other files as part of the ovf package, they have to be explicitly mentioned in the Resources section of the .ovf file. @type input_path: string @param input_path: absolute path to the .ovf file @raise errors.OpPrereqError: when .ovf file is not a proper XML file or some of the files mentioned in Resources section do not exist """ self.tree = ET.ElementTree() try: self.tree.parse(input_path) except (ParseError, xml.parsers.expat.ExpatError), err: raise errors.OpPrereqError("Error while reading %s file: %s" % (OVF_EXT, err), errors.ECODE_ENVIRON) # Create a list of all files in the OVF package (input_dir, input_file) = os.path.split(input_path) (input_name, _) = os.path.splitext(input_file) files_directory = utils.ListVisibleFiles(input_dir) files_list = [] for file_name in files_directory: (name, extension) = os.path.splitext(file_name) if extension in FILE_EXTENSIONS and name == input_name: files_list.append(file_name) files_list += self._GetAttributes("{%s}References/{%s}File" % (OVF_SCHEMA, OVF_SCHEMA), "{%s}href" % OVF_SCHEMA) for file_name in files_list: file_path = utils.PathJoin(input_dir, file_name) if not os.path.exists(file_path): raise errors.OpPrereqError("File does not exist: %s" % file_path, errors.ECODE_ENVIRON) logging.info("Files in the OVF package: %s", " ".join(files_list)) self.files_list = files_list self.input_dir = input_dir self.schema_name = input_name def _GetAttributes(self, path, attribute): """Get specified attribute from all nodes accessible using given path. Function follows the path from root node to the desired tags using path, then reads the apropriate attribute values. @type path: string @param path: path of nodes to visit @type attribute: string @param attribute: attribute for which we gather the information @rtype: list @return: for each accessible tag with the attribute value set, value of the attribute """ current_list = self.tree.findall(path) results = [x.get(attribute) for x in current_list] return filter(None, results) def _GetElementMatchingAttr(self, path, match_attr): """Searches for element on a path that matches certain attribute value. Function follows the path from root node to the desired tags using path, then searches for the first one matching the attribute value. @type path: string @param path: path of nodes to visit @type match_attr: tuple @param match_attr: pair (attribute, value) for which we search @rtype: ET.ElementTree or None @return: first element matching match_attr or None if nothing matches """ potential_elements = self.tree.findall(path) (attr, val) = match_attr for elem in potential_elements: if elem.get(attr) == val: return elem return None def _GetElementMatchingText(self, path, match_text): """Searches for element on a path that matches certain text value. Function follows the path from root node to the desired tags using path, then searches for the first one matching the text value. @type path: string @param path: path of nodes to visit @type match_text: tuple @param match_text: pair (node, text) for which we search @rtype: ET.ElementTree or None @return: first element matching match_text or None if nothing matches """ potential_elements = self.tree.findall(path) (node, text) = match_text for elem in potential_elements: if elem.findtext(node) == text: return elem return None @staticmethod def _GetDictParameters(root, schema): """Reads text in all children and creates the dictionary from the contents. @type root: ET.ElementTree or None @param root: father of the nodes we want to collect data about @type schema: string @param schema: schema name to be removed from the tag @rtype: dict @return: dictionary containing tags and their text contents, tags have their schema fragment removed or empty dictionary, when root is None """ if root is None: return {} results = {} for element in list(root): pref_len = len("{%s}" % schema) assert(schema in element.tag) tag = element.tag[pref_len:] results[tag] = element.text return results def VerifyManifest(self): """Verifies manifest for the OVF package, if one is given. @raise errors.OpPrereqError: if SHA1 checksums do not match """ if "%s%s" % (self.schema_name, MF_EXT) in self.files_list: logging.warning("Verifying SHA1 checksums, this may take a while") manifest_filename = "%s%s" % (self.schema_name, MF_EXT) manifest_path = utils.PathJoin(self.input_dir, manifest_filename) manifest_content = utils.ReadFile(manifest_path).splitlines() manifest_files = {} regexp = r"SHA1\((\S+)\)= (\S+)" for line in manifest_content: match = re.match(regexp, line) if match: file_name = match.group(1) sha1_sum = match.group(2) manifest_files[file_name] = sha1_sum files_with_paths = [utils.PathJoin(self.input_dir, file_name) for file_name in self.files_list] sha1_sums = utils.FingerprintFiles(files_with_paths) for file_name, value in manifest_files.iteritems(): if sha1_sums.get(utils.PathJoin(self.input_dir, file_name)) != value: raise errors.OpPrereqError("SHA1 checksum of %s does not match the" " value in manifest file" % file_name, errors.ECODE_ENVIRON) logging.info("SHA1 checksums verified") def GetInstanceName(self): """Provides information about instance name. @rtype: string @return: instance name string """ find_name = "{%s}VirtualSystem/{%s}Name" % (OVF_SCHEMA, OVF_SCHEMA) return self.tree.findtext(find_name) def GetDiskTemplate(self): """Returns disk template from .ovf file @rtype: string or None @return: name of the template """ find_template = ("{%s}GanetiSection/{%s}DiskTemplate" % (GANETI_SCHEMA, GANETI_SCHEMA)) return self.tree.findtext(find_template) def GetHypervisorData(self): """Provides hypervisor information - hypervisor name and options. @rtype: dict @return: dictionary containing name of the used hypervisor and all the specified options """ hypervisor_search = ("{%s}GanetiSection/{%s}Hypervisor" % (GANETI_SCHEMA, GANETI_SCHEMA)) hypervisor_data = self.tree.find(hypervisor_search) if hypervisor_data is None: return {"hypervisor_name": constants.VALUE_AUTO} results = { "hypervisor_name": hypervisor_data.findtext("{%s}Name" % GANETI_SCHEMA, default=constants.VALUE_AUTO), } parameters = hypervisor_data.find("{%s}Parameters" % GANETI_SCHEMA) results.update(self._GetDictParameters(parameters, GANETI_SCHEMA)) return results def GetOSData(self): """ Provides operating system information - os name and options. @rtype: dict @return: dictionary containing name and options for the chosen OS """ results = {} os_search = ("{%s}GanetiSection/{%s}OperatingSystem" % (GANETI_SCHEMA, GANETI_SCHEMA)) os_data = self.tree.find(os_search) if os_data is not None: results["os_name"] = os_data.findtext("{%s}Name" % GANETI_SCHEMA) parameters = os_data.find("{%s}Parameters" % GANETI_SCHEMA) results.update(self._GetDictParameters(parameters, GANETI_SCHEMA)) return results def GetBackendData(self): """ Provides backend information - vcpus, memory, auto balancing options. @rtype: dict @return: dictionary containing options for vcpus, memory and auto balance settings """ results = {} find_vcpus = ("{%s}VirtualSystem/{%s}VirtualHardwareSection/{%s}Item" % (OVF_SCHEMA, OVF_SCHEMA, OVF_SCHEMA)) match_vcpus = ("{%s}ResourceType" % RASD_SCHEMA, RASD_TYPE["vcpus"]) vcpus = self._GetElementMatchingText(find_vcpus, match_vcpus) if vcpus is not None: vcpus_count = vcpus.findtext("{%s}VirtualQuantity" % RASD_SCHEMA, default=constants.VALUE_AUTO) else: vcpus_count = constants.VALUE_AUTO results["vcpus"] = str(vcpus_count) find_memory = find_vcpus match_memory = ("{%s}ResourceType" % RASD_SCHEMA, RASD_TYPE["memory"]) memory = self._GetElementMatchingText(find_memory, match_memory) memory_raw = None if memory is not None: alloc_units = memory.findtext("{%s}AllocationUnits" % RASD_SCHEMA) matching_units = [units for units, variants in ALLOCATION_UNITS.items() if alloc_units.lower() in variants] if matching_units == []: raise errors.OpPrereqError("Unit %s for RAM memory unknown" % alloc_units, errors.ECODE_INVAL) units = matching_units[0] memory_raw = int(memory.findtext("{%s}VirtualQuantity" % RASD_SCHEMA, default=constants.VALUE_AUTO)) memory_count = CONVERT_UNITS_TO_MB[units](memory_raw) else: memory_count = constants.VALUE_AUTO results["memory"] = str(memory_count) find_balance = ("{%s}GanetiSection/{%s}AutoBalance" % (GANETI_SCHEMA, GANETI_SCHEMA)) balance = self.tree.findtext(find_balance, default=constants.VALUE_AUTO) results["auto_balance"] = balance return results def GetTagsData(self): """Provides tags information for instance. @rtype: string or None @return: string of comma-separated tags for the instance """ find_tags = "{%s}GanetiSection/{%s}Tags" % (GANETI_SCHEMA, GANETI_SCHEMA) results = self.tree.findtext(find_tags) if results: return results else: return None def GetVersionData(self): """Provides version number read from .ovf file @rtype: string @return: string containing the version number """ find_version = ("{%s}GanetiSection/{%s}Version" % (GANETI_SCHEMA, GANETI_SCHEMA)) return self.tree.findtext(find_version) def GetNetworkData(self): """Provides data about the network in the OVF instance. The method gathers the data about networks used by OVF instance. It assumes that 'name' tag means something - in essence, if it contains one of the words 'bridged' or 'routed' then that will be the mode of this network in Ganeti. The information about the network can be either in GanetiSection or VirtualHardwareSection. @rtype: dict @return: dictionary containing all the network information """ results = {} networks_search = ("{%s}NetworkSection/{%s}Network" % (OVF_SCHEMA, OVF_SCHEMA)) network_names = self._GetAttributes(networks_search, "{%s}name" % OVF_SCHEMA) required = ["ip", "mac", "link", "mode", "network"] for (counter, network_name) in enumerate(network_names): network_search = ("{%s}VirtualSystem/{%s}VirtualHardwareSection/{%s}Item" % (OVF_SCHEMA, OVF_SCHEMA, OVF_SCHEMA)) ganeti_search = ("{%s}GanetiSection/{%s}Network/{%s}Nic" % (GANETI_SCHEMA, GANETI_SCHEMA, GANETI_SCHEMA)) network_match = ("{%s}Connection" % RASD_SCHEMA, network_name) ganeti_match = ("{%s}name" % OVF_SCHEMA, network_name) network_data = self._GetElementMatchingText(network_search, network_match) network_ganeti_data = self._GetElementMatchingAttr(ganeti_search, ganeti_match) ganeti_data = {} if network_ganeti_data is not None: ganeti_data["mode"] = network_ganeti_data.findtext("{%s}Mode" % GANETI_SCHEMA) ganeti_data["mac"] = network_ganeti_data.findtext("{%s}MACAddress" % GANETI_SCHEMA) ganeti_data["ip"] = network_ganeti_data.findtext("{%s}IPAddress" % GANETI_SCHEMA) ganeti_data["link"] = network_ganeti_data.findtext("{%s}Link" % GANETI_SCHEMA) ganeti_data["network"] = network_ganeti_data.findtext("{%s}Net" % GANETI_SCHEMA) mac_data = None if network_data is not None: mac_data = network_data.findtext("{%s}Address" % RASD_SCHEMA) network_name = network_name.lower() # First, some not Ganeti-specific information is collected if constants.NIC_MODE_BRIDGED in network_name: results["nic%s_mode" % counter] = "bridged" elif constants.NIC_MODE_ROUTED in network_name: results["nic%s_mode" % counter] = "routed" results["nic%s_mac" % counter] = mac_data # GanetiSection data overrides 'manually' collected data for name, value in ganeti_data.iteritems(): results["nic%s_%s" % (counter, name)] = value # Bridged network has no IP - unless specifically stated otherwise if (results.get("nic%s_mode" % counter) == "bridged" and not results.get("nic%s_ip" % counter)): results["nic%s_ip" % counter] = constants.VALUE_NONE for option in required: if not results.get("nic%s_%s" % (counter, option)): results["nic%s_%s" % (counter, option)] = constants.VALUE_AUTO if network_names: results["nic_count"] = str(len(network_names)) return results def GetDisksNames(self): """Provides list of file names for the disks used by the instance. @rtype: list @return: list of file names, as referenced in .ovf file """ results = [] disks_search = "{%s}DiskSection/{%s}Disk" % (OVF_SCHEMA, OVF_SCHEMA) disk_ids = self._GetAttributes(disks_search, "{%s}fileRef" % OVF_SCHEMA) for disk in disk_ids: disk_search = "{%s}References/{%s}File" % (OVF_SCHEMA, OVF_SCHEMA) disk_match = ("{%s}id" % OVF_SCHEMA, disk) disk_elem = self._GetElementMatchingAttr(disk_search, disk_match) if disk_elem is None: raise errors.OpPrereqError("%s file corrupted - disk %s not found in" " references" % (OVF_EXT, disk), errors.ECODE_ENVIRON) disk_name = disk_elem.get("{%s}href" % OVF_SCHEMA) disk_compression = disk_elem.get("{%s}compression" % OVF_SCHEMA) results.append((disk_name, disk_compression)) return results def SubElementText(parent, tag, text, attrib={}, **extra): # pylint: disable=W0102 """This is just a wrapper on ET.SubElement that always has text content. """ if text is None: return None elem = ET.SubElement(parent, tag, attrib=attrib, **extra) elem.text = str(text) return elem class OVFWriter(object): """Writer class for OVF files. @type tree: ET.ElementTree @ivar tree: XML tree that we are constructing @type virtual_system_type: string @ivar virtual_system_type: value of vssd:VirtualSystemType, for external usage in VMWare this requires to be vmx @type hardware_list: list @ivar hardware_list: list of items prepared for VirtualHardwareSection @type next_instance_id: int @ivar next_instance_id: next instance id to be used when creating elements on hardware_list """ def __init__(self, has_gnt_section): """Initialize the writer - set the top element. @type has_gnt_section: bool @param has_gnt_section: if the Ganeti schema should be added - i.e. this means that Ganeti section will be present """ env_attribs = { "xmlns:xsi": XML_SCHEMA, "xmlns:vssd": VSSD_SCHEMA, "xmlns:rasd": RASD_SCHEMA, "xmlns:ovf": OVF_SCHEMA, "xmlns": OVF_SCHEMA, "xml:lang": "en-US", } if has_gnt_section: env_attribs["xmlns:gnt"] = GANETI_SCHEMA self.virtual_system_type = VS_TYPE["ganeti"] else: self.virtual_system_type = VS_TYPE["external"] self.tree = ET.Element("Envelope", attrib=env_attribs) self.hardware_list = [] # INSTANCE_ID contains statically assigned IDs, starting from 0 self.next_instance_id = len(INSTANCE_ID) # FIXME: hackish def SaveDisksData(self, disks): """Convert disk information to certain OVF sections. @type disks: list @param disks: list of dictionaries of disk options from config.ini """ references = ET.SubElement(self.tree, "References") disk_section = ET.SubElement(self.tree, "DiskSection") SubElementText(disk_section, "Info", "Virtual disk information") for counter, disk in enumerate(disks): file_id = "file%s" % counter disk_id = "disk%s" % counter file_attribs = { "ovf:href": disk["path"], "ovf:size": str(disk["real-size"]), "ovf:id": file_id, } disk_attribs = { "ovf:capacity": str(disk["virt-size"]), "ovf:diskId": disk_id, "ovf:fileRef": file_id, "ovf:format": DISK_FORMAT.get(disk["format"], disk["format"]), } if "compression" in disk: file_attribs["ovf:compression"] = disk["compression"] ET.SubElement(references, "File", attrib=file_attribs) ET.SubElement(disk_section, "Disk", attrib=disk_attribs) # Item in VirtualHardwareSection creation disk_item = ET.Element("Item") SubElementText(disk_item, "rasd:ElementName", disk_id) SubElementText(disk_item, "rasd:HostResource", "ovf:/disk/%s" % disk_id) SubElementText(disk_item, "rasd:InstanceID", self.next_instance_id) SubElementText(disk_item, "rasd:Parent", INSTANCE_ID["scsi"]) SubElementText(disk_item, "rasd:ResourceType", RASD_TYPE["disk"]) self.hardware_list.append(disk_item) self.next_instance_id += 1 def SaveNetworksData(self, networks): """Convert network information to NetworkSection. @type networks: list @param networks: list of dictionaries of network options form config.ini """ network_section = ET.SubElement(self.tree, "NetworkSection") SubElementText(network_section, "Info", "List of logical networks") for counter, network in enumerate(networks): network_name = "%s%s" % (network["mode"], counter) network_attrib = {"ovf:name": network_name} ET.SubElement(network_section, "Network", attrib=network_attrib) # Item in VirtualHardwareSection creation network_item = ET.Element("Item") SubElementText(network_item, "rasd:Address", network["mac"]) SubElementText(network_item, "rasd:Connection", network_name) SubElementText(network_item, "rasd:ElementName", network_name) SubElementText(network_item, "rasd:InstanceID", self.next_instance_id) SubElementText(network_item, "rasd:ResourceType", RASD_TYPE["ethernet-adapter"]) self.hardware_list.append(network_item) self.next_instance_id += 1 @staticmethod def _SaveNameAndParams(root, data): """Save name and parameters information under root using data. @type root: ET.Element @param root: root element for the Name and Parameters @type data: dict @param data: data from which we gather the values """ assert(data.get("name")) name = SubElementText(root, "gnt:Name", data["name"]) params = ET.SubElement(root, "gnt:Parameters") for name, value in data.iteritems(): if name != "name": SubElementText(params, "gnt:%s" % name, value) def SaveGanetiData(self, ganeti, networks): """Convert Ganeti-specific information to GanetiSection. @type ganeti: dict @param ganeti: dictionary of Ganeti-specific options from config.ini @type networks: list @param networks: list of dictionaries of network options form config.ini """ ganeti_section = ET.SubElement(self.tree, "gnt:GanetiSection") SubElementText(ganeti_section, "gnt:Version", ganeti.get("version")) SubElementText(ganeti_section, "gnt:DiskTemplate", ganeti.get("disk_template")) SubElementText(ganeti_section, "gnt:AutoBalance", ganeti.get("auto_balance")) SubElementText(ganeti_section, "gnt:Tags", ganeti.get("tags")) osys = ET.SubElement(ganeti_section, "gnt:OperatingSystem") self._SaveNameAndParams(osys, ganeti["os"]) hypervisor = ET.SubElement(ganeti_section, "gnt:Hypervisor") self._SaveNameAndParams(hypervisor, ganeti["hypervisor"]) network_section = ET.SubElement(ganeti_section, "gnt:Network") for counter, network in enumerate(networks): network_name = "%s%s" % (network["mode"], counter) nic_attrib = {"ovf:name": network_name} nic = ET.SubElement(network_section, "gnt:Nic", attrib=nic_attrib) SubElementText(nic, "gnt:Mode", network["mode"]) SubElementText(nic, "gnt:MACAddress", network["mac"]) SubElementText(nic, "gnt:IPAddress", network["ip"]) SubElementText(nic, "gnt:Link", network["link"]) SubElementText(nic, "gnt:Net", network["network"]) def SaveVirtualSystemData(self, name, vcpus, memory): """Convert virtual system information to OVF sections. @type name: string @param name: name of the instance @type vcpus: int @param vcpus: number of VCPUs @type memory: int @param memory: RAM memory in MB """ assert(vcpus > 0) assert(memory > 0) vs_attrib = {"ovf:id": name} virtual_system = ET.SubElement(self.tree, "VirtualSystem", attrib=vs_attrib) SubElementText(virtual_system, "Info", "A virtual machine") name_section = ET.SubElement(virtual_system, "Name") name_section.text = name os_attrib = {"ovf:id": "0"} os_section = ET.SubElement(virtual_system, "OperatingSystemSection", attrib=os_attrib) SubElementText(os_section, "Info", "Installed guest operating system") hardware_section = ET.SubElement(virtual_system, "VirtualHardwareSection") SubElementText(hardware_section, "Info", "Virtual hardware requirements") # System description system = ET.SubElement(hardware_section, "System") SubElementText(system, "vssd:ElementName", "Virtual Hardware Family") SubElementText(system, "vssd:InstanceID", INSTANCE_ID["system"]) SubElementText(system, "vssd:VirtualSystemIdentifier", name) SubElementText(system, "vssd:VirtualSystemType", self.virtual_system_type) # Item for vcpus vcpus_item = ET.SubElement(hardware_section, "Item") SubElementText(vcpus_item, "rasd:ElementName", "%s virtual CPU(s)" % vcpus) SubElementText(vcpus_item, "rasd:InstanceID", INSTANCE_ID["vcpus"]) SubElementText(vcpus_item, "rasd:ResourceType", RASD_TYPE["vcpus"]) SubElementText(vcpus_item, "rasd:VirtualQuantity", vcpus) # Item for memory memory_item = ET.SubElement(hardware_section, "Item") SubElementText(memory_item, "rasd:AllocationUnits", "byte * 2^20") SubElementText(memory_item, "rasd:ElementName", "%sMB of memory" % memory) SubElementText(memory_item, "rasd:InstanceID", INSTANCE_ID["memory"]) SubElementText(memory_item, "rasd:ResourceType", RASD_TYPE["memory"]) SubElementText(memory_item, "rasd:VirtualQuantity", memory) # Item for scsi controller scsi_item = ET.SubElement(hardware_section, "Item") SubElementText(scsi_item, "rasd:Address", INSTANCE_ID["system"]) SubElementText(scsi_item, "rasd:ElementName", "scsi_controller0") SubElementText(scsi_item, "rasd:InstanceID", INSTANCE_ID["scsi"]) SubElementText(scsi_item, "rasd:ResourceSubType", SCSI_SUBTYPE) SubElementText(scsi_item, "rasd:ResourceType", RASD_TYPE["scsi-controller"]) # Other items - from self.hardware_list for item in self.hardware_list: hardware_section.append(item) def PrettyXmlDump(self): """Formatter of the XML file. @rtype: string @return: XML tree in the form of nicely-formatted string """ raw_string = ET.tostring(self.tree) parsed_xml = xml.dom.minidom.parseString(raw_string) xml_string = parsed_xml.toprettyxml(indent=" ") text_re = re.compile(r">\n\s+([^<>\s].*?)\n\s+\g<1> IPV4_NETWORK_MAX_NUM_HOSTS: raise errors.AddressPoolError("A big network with %s host(s) is currently" " not supported. please specify at most a" " /%s network" % (str(self.network.numhosts), IPV4_NETWORK_MAX_SIZE)) if self.network.numhosts < IPV4_NETWORK_MIN_NUM_HOSTS: raise errors.AddressPoolError("A network with only %s host(s) is too" " small, please specify at least a /%s" " network" % (str(self.network.numhosts), IPV4_NETWORK_MIN_SIZE)) if self.net.gateway: self.gateway = ipaddr.IPAddress(self.net.gateway) if self.net.network6: self.network6 = ipaddr.IPv6Network(self.net.network6) if self.net.gateway6: self.gateway6 = ipaddr.IPv6Address(self.net.gateway6) if self.net.reservations: self.reservations = bitarray(self.net.reservations) else: self.reservations = bitarray(self.network.numhosts) # pylint: disable=E1103 self.reservations.setall(False) if self.net.ext_reservations: self.ext_reservations = bitarray(self.net.ext_reservations) else: self.ext_reservations = bitarray(self.network.numhosts) # pylint: disable=E1103 self.ext_reservations.setall(False) assert len(self.reservations) == self.network.numhosts assert len(self.ext_reservations) == self.network.numhosts def Contains(self, address): if address is None: return False addr = ipaddr.IPAddress(address) return addr in self.network def _GetAddrIndex(self, address): addr = ipaddr.IPAddress(address) if not addr in self.network: raise errors.AddressPoolError("%s does not contain %s" % (self.network, addr)) return int(addr) - int(self.network.network) def Update(self): """Write address pools back to the network object. """ # pylint: disable=E1103 self.net.ext_reservations = self.ext_reservations.to01() self.net.reservations = self.reservations.to01() def _Mark(self, address, value=True, external=False): idx = self._GetAddrIndex(address) if external: self.ext_reservations[idx] = value else: self.reservations[idx] = value self.Update() def _GetSize(self): return 2 ** (32 - self.network.prefixlen) @property def all_reservations(self): """Return a combined map of internal and external reservations. """ return (self.reservations | self.ext_reservations) def Validate(self): assert len(self.reservations) == self._GetSize() assert len(self.ext_reservations) == self._GetSize() all_res = self.reservations & self.ext_reservations assert not all_res.any() if self.gateway is not None: assert self.gateway in self.network if self.network6 and self.gateway6: assert self.gateway6 in self.network6 or self.gateway6.is_link_local return True def IsFull(self): """Check whether the network is full. """ return self.all_reservations.all() def GetReservedCount(self): """Get the count of reserved addresses. """ return self.all_reservations.count(True) def GetFreeCount(self): """Get the count of unused addresses. """ return self.all_reservations.count(False) def GetMap(self): """Return a textual representation of the network's occupation status. """ return self.all_reservations.to01().replace("1", "X").replace("0", ".") def IsReserved(self, address): """Checks if the given IP is reserved. """ idx = self._GetAddrIndex(address) return self.all_reservations[idx] def Reserve(self, address, external=False): """Mark an address as used. """ if self.IsReserved(address): raise errors.AddressPoolError("%s is already reserved" % address) self._Mark(address, external=external) def Release(self, address, external=False): """Release a given address reservation. """ self._Mark(address, value=False, external=external) def GetFreeAddress(self): """Returns the first available address. """ if self.IsFull(): raise errors.AddressPoolError("%s is full" % self.network) idx = self.all_reservations.index(False) address = str(self.network[idx]) self.Reserve(address) return address def GenerateFree(self): """Returns the first free address of the network. @raise errors.AddressPoolError: Pool is full """ idx = self.all_reservations.search(self.FREE, 1) if idx: return str(self.network[idx[0]]) else: raise errors.AddressPoolError("%s is full" % self.network) def GetExternalReservations(self): """Returns a list of all externally reserved addresses. """ # pylint: disable=E1103 idxs = self.ext_reservations.search(self.RESERVED) return [str(self.network[idx]) for idx in idxs] @classmethod def InitializeNetwork(cls, net): """Initialize an L{objects.Network} object. Reserve the network, broadcast and gateway IP addresses. """ obj = cls(net) obj.Update() for ip in [obj.network[0], obj.network[-1]]: obj.Reserve(ip, external=True) if obj.net.gateway is not None: obj.Reserve(obj.net.gateway, external=True) obj.Validate() return obj ganeti-2.9.3/lib/constants.py0000644000000000000000000020326512271422343016136 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module holding different constants.""" import re import socket from ganeti import _autoconf from ganeti import _vcsversion from ganeti import compat from ganeti import pathutils # various versions RELEASE_VERSION = _autoconf.PACKAGE_VERSION OS_API_V10 = 10 OS_API_V15 = 15 OS_API_V20 = 20 OS_API_VERSIONS = compat.UniqueFrozenset([ OS_API_V10, OS_API_V15, OS_API_V20, ]) VCS_VERSION = _vcsversion.VCS_VERSION EXPORT_VERSION = 0 RAPI_VERSION = 2 # Format for CONFIG_VERSION: # 01 03 0123 = 01030123 # ^^ ^^ ^^^^ # | | + Configuration version/revision # | + Minor version # + Major version # # It is stored as an integer. Make sure not to write an octal number. # BuildVersion and SplitVersion must be in here because we can't import other # modules. The cfgupgrade tool must be able to read and write version numbers # and thus requires these functions. To avoid code duplication, they're kept in # here. def BuildVersion(major, minor, revision): """Calculates int version number from major, minor and revision numbers. Returns: int representing version number """ assert isinstance(major, int) assert isinstance(minor, int) assert isinstance(revision, int) return (1000000 * major + 10000 * minor + 1 * revision) def SplitVersion(version): """Splits version number stored in an int. Returns: tuple; (major, minor, revision) """ assert isinstance(version, int) (major, remainder) = divmod(version, 1000000) (minor, revision) = divmod(remainder, 10000) return (major, minor, revision) CONFIG_MAJOR = int(_autoconf.VERSION_MAJOR) CONFIG_MINOR = int(_autoconf.VERSION_MINOR) CONFIG_REVISION = 0 CONFIG_VERSION = BuildVersion(CONFIG_MAJOR, CONFIG_MINOR, CONFIG_REVISION) #: RPC protocol version PROTOCOL_VERSION = BuildVersion(CONFIG_MAJOR, CONFIG_MINOR, 0) # user separation DAEMONS_GROUP = _autoconf.DAEMONS_GROUP ADMIN_GROUP = _autoconf.ADMIN_GROUP MASTERD_USER = _autoconf.MASTERD_USER MASTERD_GROUP = _autoconf.MASTERD_GROUP RAPI_USER = _autoconf.RAPI_USER RAPI_GROUP = _autoconf.RAPI_GROUP CONFD_USER = _autoconf.CONFD_USER CONFD_GROUP = _autoconf.CONFD_GROUP LUXID_USER = _autoconf.LUXID_USER LUXID_GROUP = _autoconf.LUXID_GROUP NODED_USER = _autoconf.NODED_USER NODED_GROUP = _autoconf.NODED_GROUP MOND_USER = _autoconf.MOND_USER MOND_GROUP = _autoconf.MOND_GROUP SSH_LOGIN_USER = _autoconf.SSH_LOGIN_USER SSH_CONSOLE_USER = _autoconf.SSH_CONSOLE_USER # cpu pinning separators and constants CPU_PINNING_SEP = ":" CPU_PINNING_ALL = "all" # internal representation of "all" CPU_PINNING_ALL_VAL = -1 # one "all" entry in a CPU list means CPU pinning is off CPU_PINNING_OFF = [CPU_PINNING_ALL_VAL] # A Xen-specific implementation detail - there is no way to actually say # "use any cpu for pinning" in a Xen configuration file, as opposed to the # command line, where you can say "xm vcpu-pin all". # The workaround used in Xen is "0-63" (see source code function # xm_vcpu_pin in /tools/python/xen/xm/main.py). # To support future changes, the following constant is treated as a # blackbox string that simply means use-any-cpu-for-pinning-under-xen. CPU_PINNING_ALL_XEN = "0-63" # A KVM-specific implementation detail - the following value is used # to set CPU affinity to all processors (#0 through #31), per taskset # man page. # FIXME: This only works for machines with up to 32 CPU cores CPU_PINNING_ALL_KVM = 0xFFFFFFFF # Wipe DD_CMD = "dd" MAX_WIPE_CHUNK = 1024 # 1GB MIN_WIPE_CHUNK_PERCENT = 10 RUN_DIRS_MODE = 0775 SECURE_DIR_MODE = 0700 SECURE_FILE_MODE = 0600 ADOPTABLE_BLOCKDEV_ROOT = "/dev/disk/" ENABLE_CONFD = _autoconf.ENABLE_CONFD ENABLE_MOND = _autoconf.ENABLE_MOND ENABLE_SPLIT_QUERY = _autoconf.ENABLE_SPLIT_QUERY ENABLE_RESTRICTED_COMMANDS = _autoconf.ENABLE_RESTRICTED_COMMANDS # SSH constants SSH = "ssh" SCP = "scp" NODED = "ganeti-noded" CONFD = "ganeti-confd" LUXID = "ganeti-luxid" RAPI = "ganeti-rapi" MASTERD = "ganeti-masterd" MOND = "ganeti-mond" DAEMONS = compat.UniqueFrozenset([ NODED, CONFD, LUXID, RAPI, MASTERD, MOND, ]) DAEMONS_PORTS = { # daemon-name: ("proto", "default-port") NODED: ("tcp", 1811), CONFD: ("udp", 1814), MOND: ("tcp", 1815), RAPI: ("tcp", 5080), SSH: ("tcp", 22), } DEFAULT_NODED_PORT = DAEMONS_PORTS[NODED][1] DEFAULT_CONFD_PORT = DAEMONS_PORTS[CONFD][1] DEFAULT_MOND_PORT = DAEMONS_PORTS[MOND][1] DEFAULT_RAPI_PORT = DAEMONS_PORTS[RAPI][1] FIRST_DRBD_PORT = 11000 LAST_DRBD_PORT = 14999 DAEMONS_LOGBASE = { NODED: "node-daemon", CONFD: "conf-daemon", LUXID: "luxi-daemon", RAPI: "rapi-daemon", MASTERD: "master-daemon", MOND: "monitoring-daemon", } DAEMONS_LOGFILES = \ dict((daemon, pathutils.GetLogFilename(DAEMONS_LOGBASE[daemon])) for daemon in DAEMONS_LOGBASE) # Some daemons might require more than one logfile. # Specifically, right now only the Haskell http library "snap", used by the # monitoring daemon, requires multiple log files. # These are the only valid reasons for having an extra logfile EXTRA_LOGREASON_ACCESS = "access" EXTRA_LOGREASON_ERROR = "error" VALID_EXTRA_LOGREASONS = compat.UniqueFrozenset([ EXTRA_LOGREASON_ACCESS, EXTRA_LOGREASON_ERROR, ]) # These are the extra logfiles, grouped by daemon DAEMONS_EXTRA_LOGBASE = { MOND: { EXTRA_LOGREASON_ACCESS: "monitoring-daemon-access", EXTRA_LOGREASON_ERROR: "monitoring-daemon-error", } } DAEMONS_EXTRA_LOGFILES = \ dict((daemon, dict((extra, pathutils.GetLogFilename(DAEMONS_EXTRA_LOGBASE[daemon][extra])) for extra in DAEMONS_EXTRA_LOGBASE[daemon])) for daemon in DAEMONS_EXTRA_LOGBASE) DEV_CONSOLE = "/dev/console" PROC_MOUNTS = "/proc/mounts" # Local UniX Interface related constants LUXI_EOM = chr(3) LUXI_VERSION = CONFIG_VERSION #: Environment variable for the luxi override socket LUXI_OVERRIDE = "FORCE_LUXI_SOCKET" LUXI_OVERRIDE_MASTER = "master" LUXI_OVERRIDE_QUERY = "query" LUXI_SOCKET_PERMS = 0660 # one of "no", "yes", "only" SYSLOG_USAGE = _autoconf.SYSLOG_USAGE SYSLOG_NO = "no" SYSLOG_YES = "yes" SYSLOG_ONLY = "only" SYSLOG_SOCKET = "/dev/log" EXPORT_CONF_FILE = "config.ini" XEN_BOOTLOADER = _autoconf.XEN_BOOTLOADER XEN_KERNEL = _autoconf.XEN_KERNEL XEN_INITRD = _autoconf.XEN_INITRD XEN_CMD_XM = "xm" XEN_CMD_XL = "xl" KNOWN_XEN_COMMANDS = compat.UniqueFrozenset([ XEN_CMD_XM, XEN_CMD_XL, ]) # When the Xen toolstack used is "xl", live migration requires the source host # to connect to the target host via ssh (xl runs this command). We need to pass # the command xl runs some extra info so that it can use Ganeti's key # verification and not fail. Note that this string is incomplete: it must be # filled with the cluster name before being used. XL_SSH_CMD = ("ssh -l %s -oGlobalKnownHostsFile=%s" " -oUserKnownHostsFile=/dev/null" " -oCheckHostIp=no -oStrictHostKeyChecking=yes" " -oHostKeyAlias=%%s") % (SSH_LOGIN_USER, pathutils.SSH_KNOWN_HOSTS_FILE) KVM_PATH = _autoconf.KVM_PATH KVM_KERNEL = _autoconf.KVM_KERNEL SOCAT_PATH = _autoconf.SOCAT_PATH SOCAT_USE_ESCAPE = _autoconf.SOCAT_USE_ESCAPE SOCAT_USE_COMPRESS = _autoconf.SOCAT_USE_COMPRESS SOCAT_ESCAPE_CODE = "0x1d" #: Console as SSH command CONS_SSH = "ssh" #: Console as VNC server CONS_VNC = "vnc" #: Console as SPICE server CONS_SPICE = "spice" #: Display a message for console access CONS_MESSAGE = "msg" #: All console types CONS_ALL = compat.UniqueFrozenset([ CONS_SSH, CONS_VNC, CONS_SPICE, CONS_MESSAGE, ]) # For RSA keys more bits are better, but they also make operations more # expensive. NIST SP 800-131 recommends a minimum of 2048 bits from the year # 2010 on. RSA_KEY_BITS = 2048 # Ciphers allowed for SSL connections. For the format, see ciphers(1). A better # way to disable ciphers would be to use the exclamation mark (!), but socat # versions below 1.5 can't parse exclamation marks in options properly. When # modifying the ciphers, ensure not to accidentially add something after it's # been removed. Use the "openssl" utility to check the allowed ciphers, e.g. # "openssl ciphers -v HIGH:-DES". OPENSSL_CIPHERS = "HIGH:-DES:-3DES:-EXPORT:-ADH" # Digest used to sign certificates ("openssl x509" uses SHA1 by default) X509_CERT_SIGN_DIGEST = "SHA1" # Default validity of certificates in days X509_CERT_DEFAULT_VALIDITY = 365 * 5 # commonName (CN) used in certificates X509_CERT_CN = "ganeti.example.com" X509_CERT_SIGNATURE_HEADER = "X-Ganeti-Signature" # Import/export daemon mode IEM_IMPORT = "import" IEM_EXPORT = "export" # Import/export transport compression IEC_NONE = "none" IEC_GZIP = "gzip" IEC_ALL = compat.UniqueFrozenset([ IEC_NONE, IEC_GZIP, ]) IE_CUSTOM_SIZE = "fd" IE_MAGIC_RE = re.compile(r"^[-_.a-zA-Z0-9]{5,100}$") # Import/export I/O # Direct file I/O, equivalent to a shell's I/O redirection using '<' or '>' IEIO_FILE = "file" # Raw block device I/O using "dd" IEIO_RAW_DISK = "raw" # OS definition import/export script IEIO_SCRIPT = "script" VALUE_DEFAULT = "default" VALUE_AUTO = "auto" VALUE_GENERATE = "generate" VALUE_NONE = "none" VALUE_TRUE = "true" VALUE_FALSE = "false" # External script validation mask EXT_PLUGIN_MASK = re.compile("^[a-zA-Z0-9_-]+$") # hooks-related constants HOOKS_PHASE_PRE = "pre" HOOKS_PHASE_POST = "post" HOOKS_NAME_CFGUPDATE = "config-update" HOOKS_NAME_WATCHER = "watcher" HOOKS_VERSION = 2 HOOKS_PATH = "/sbin:/bin:/usr/sbin:/usr/bin" # hooks subject type (what object type does the LU deal with) HTYPE_CLUSTER = "CLUSTER" HTYPE_NODE = "NODE" HTYPE_GROUP = "GROUP" HTYPE_INSTANCE = "INSTANCE" HTYPE_NETWORK = "NETWORK" HKR_SKIP = 0 HKR_FAIL = 1 HKR_SUCCESS = 2 # Storage types ST_BLOCK = "blockdev" ST_DISKLESS = "diskless" ST_EXT = "ext" ST_FILE = "file" ST_LVM_PV = "lvm-pv" ST_LVM_VG = "lvm-vg" ST_RADOS = "rados" STORAGE_TYPES = compat.UniqueFrozenset([ ST_BLOCK, ST_DISKLESS, ST_EXT, ST_FILE, ST_LVM_PV, ST_LVM_VG, ST_RADOS, ]) # the set of storage types for which storage reporting is available # FIXME: Remove this, once storage reporting is available for all types. STS_REPORT = compat.UniqueFrozenset([ST_FILE, ST_LVM_PV, ST_LVM_VG]) # Storage fields # first two are valid in LU context only, not passed to backend SF_NODE = "node" SF_TYPE = "type" # and the rest are valid in backend SF_NAME = "name" SF_SIZE = "size" SF_FREE = "free" SF_USED = "used" SF_ALLOCATABLE = "allocatable" # Storage operations SO_FIX_CONSISTENCY = "fix-consistency" # Available fields per storage type VALID_STORAGE_FIELDS = compat.UniqueFrozenset([ SF_NAME, SF_TYPE, SF_SIZE, SF_USED, SF_FREE, SF_ALLOCATABLE, ]) MODIFIABLE_STORAGE_FIELDS = { ST_LVM_PV: frozenset([SF_ALLOCATABLE]), } VALID_STORAGE_OPERATIONS = { ST_LVM_VG: frozenset([SO_FIX_CONSISTENCY]), } # Local disk status # Note: Code depends on LDS_OKAY < LDS_UNKNOWN < LDS_FAULTY (LDS_OKAY, LDS_UNKNOWN, LDS_FAULTY) = range(1, 4) LDS_NAMES = { LDS_OKAY: "ok", LDS_UNKNOWN: "unknown", LDS_FAULTY: "faulty", } # disk template types DT_BLOCK = "blockdev" DT_DISKLESS = "diskless" DT_DRBD8 = "drbd" DT_EXT = "ext" DT_FILE = "file" DT_PLAIN = "plain" DT_RBD = "rbd" DT_SHARED_FILE = "sharedfile" # This is used to order determine the default disk template when the list # of enabled disk templates is inferred from the current state of the cluster. # This only happens on an upgrade from a version of Ganeti that did not # support the 'enabled_disk_templates' so far. DISK_TEMPLATE_PREFERENCE = [ DT_BLOCK, DT_DISKLESS, DT_DRBD8, DT_EXT, DT_FILE, DT_PLAIN, DT_RBD, DT_SHARED_FILE, ] DISK_TEMPLATES = compat.UniqueFrozenset([ DT_DISKLESS, DT_PLAIN, DT_DRBD8, DT_FILE, DT_SHARED_FILE, DT_BLOCK, DT_RBD, DT_EXT ]) # disk templates that are enabled by default DEFAULT_ENABLED_DISK_TEMPLATES = [ DT_DRBD8, DT_PLAIN, ] # mapping of disk templates to storage types MAP_DISK_TEMPLATE_STORAGE_TYPE = { DT_BLOCK: ST_BLOCK, DT_DISKLESS: ST_DISKLESS, DT_DRBD8: ST_LVM_VG, DT_EXT: ST_EXT, DT_FILE: ST_FILE, DT_PLAIN: ST_LVM_VG, DT_RBD: ST_RADOS, DT_SHARED_FILE: ST_FILE, } # the set of network-mirrored disk templates DTS_INT_MIRROR = compat.UniqueFrozenset([DT_DRBD8]) # the set of externally-mirrored disk templates (e.g. SAN, NAS) DTS_EXT_MIRROR = compat.UniqueFrozenset([ DT_DISKLESS, # 'trivially' externally mirrored DT_SHARED_FILE, DT_BLOCK, DT_RBD, DT_EXT, ]) # the set of non-lvm-based disk templates DTS_NOT_LVM = compat.UniqueFrozenset([ DT_DISKLESS, DT_FILE, DT_SHARED_FILE, DT_BLOCK, DT_RBD, DT_EXT, ]) # the set of disk templates which can be grown DTS_GROWABLE = compat.UniqueFrozenset([ DT_PLAIN, DT_DRBD8, DT_FILE, DT_SHARED_FILE, DT_RBD, DT_EXT, ]) # the set of disk templates that allow adoption DTS_MAY_ADOPT = compat.UniqueFrozenset([ DT_PLAIN, DT_BLOCK, ]) # the set of disk templates that *must* use adoption DTS_MUST_ADOPT = compat.UniqueFrozenset([DT_BLOCK]) # the set of disk templates that allow migrations DTS_MIRRORED = frozenset.union(DTS_INT_MIRROR, DTS_EXT_MIRROR) # the set of file based disk templates DTS_FILEBASED = compat.UniqueFrozenset([ DT_FILE, DT_SHARED_FILE, ]) # the set of disk templates that can be moved by copying # Note: a requirement is that they're not accessed externally or shared between # nodes; in particular, sharedfile is not suitable. DTS_COPYABLE = compat.UniqueFrozenset([ DT_FILE, DT_PLAIN, ]) # the set of disk templates that are supported by exclusive_storage DTS_EXCL_STORAGE = compat.UniqueFrozenset([DT_PLAIN]) # templates for which we don't perform checks on free space DTS_NO_FREE_SPACE_CHECK = compat.UniqueFrozenset([ DT_FILE, DT_SHARED_FILE, DT_RBD, DT_EXT, ]) DTS_BLOCK = compat.UniqueFrozenset([ DT_PLAIN, DT_DRBD8, DT_BLOCK, DT_RBD, DT_EXT, ]) # the set of drbd-like disk types DTS_DRBD = compat.UniqueFrozenset([DT_DRBD8]) # drbd constants DRBD_HMAC_ALG = "md5" DRBD_DEFAULT_NET_PROTOCOL = "C" DRBD_MIGRATION_NET_PROTOCOL = "C" DRBD_STATUS_FILE = "/proc/drbd" #: Size of DRBD meta block device DRBD_META_SIZE = 128 # drbd barrier types DRBD_B_NONE = "n" DRBD_B_DISK_BARRIERS = "b" DRBD_B_DISK_DRAIN = "d" DRBD_B_DISK_FLUSH = "f" # Valid barrier combinations: "n" or any non-null subset of "bfd" DRBD_VALID_BARRIER_OPT = compat.UniqueFrozenset([ frozenset([DRBD_B_NONE]), frozenset([DRBD_B_DISK_BARRIERS]), frozenset([DRBD_B_DISK_DRAIN]), frozenset([DRBD_B_DISK_FLUSH]), frozenset([DRBD_B_DISK_DRAIN, DRBD_B_DISK_FLUSH]), frozenset([DRBD_B_DISK_BARRIERS, DRBD_B_DISK_DRAIN]), frozenset([DRBD_B_DISK_BARRIERS, DRBD_B_DISK_FLUSH]), frozenset([DRBD_B_DISK_BARRIERS, DRBD_B_DISK_FLUSH, DRBD_B_DISK_DRAIN]), ]) # rbd tool command RBD_CMD = "rbd" # file backend driver FD_LOOP = "loop" FD_BLKTAP = "blktap" FD_BLKTAP2 = "blktap2" FD_DEFAULT = FD_LOOP # disk access mode DISK_RDONLY = "ro" DISK_RDWR = "rw" DISK_ACCESS_SET = compat.UniqueFrozenset([DISK_RDONLY, DISK_RDWR]) # disk replacement mode REPLACE_DISK_PRI = "replace_on_primary" # replace disks on primary REPLACE_DISK_SEC = "replace_on_secondary" # replace disks on secondary REPLACE_DISK_CHG = "replace_new_secondary" # change secondary node REPLACE_DISK_AUTO = "replace_auto" REPLACE_MODES = compat.UniqueFrozenset([ REPLACE_DISK_PRI, REPLACE_DISK_SEC, REPLACE_DISK_CHG, REPLACE_DISK_AUTO, ]) # Instance export mode EXPORT_MODE_LOCAL = "local" EXPORT_MODE_REMOTE = "remote" EXPORT_MODES = compat.UniqueFrozenset([ EXPORT_MODE_LOCAL, EXPORT_MODE_REMOTE, ]) # instance creation modes INSTANCE_CREATE = "create" INSTANCE_IMPORT = "import" INSTANCE_REMOTE_IMPORT = "remote-import" INSTANCE_CREATE_MODES = compat.UniqueFrozenset([ INSTANCE_CREATE, INSTANCE_IMPORT, INSTANCE_REMOTE_IMPORT, ]) # Remote import/export handshake message and version RIE_VERSION = 0 RIE_HANDSHAKE = "Hi, I'm Ganeti" # Remote import/export certificate validity in seconds RIE_CERT_VALIDITY = 24 * 60 * 60 # Overall timeout for establishing connection RIE_CONNECT_TIMEOUT = 180 # Export only: how long to wait per connection attempt (seconds) RIE_CONNECT_ATTEMPT_TIMEOUT = 20 # Export only: number of attempts to connect RIE_CONNECT_RETRIES = 10 #: Give child process up to 5 seconds to exit after sending a signal CHILD_LINGER_TIMEOUT = 5.0 FILE_DRIVER = compat.UniqueFrozenset([FD_LOOP, FD_BLKTAP, FD_BLKTAP2]) # import/export config options INISECT_EXP = "export" INISECT_INS = "instance" INISECT_HYP = "hypervisor" INISECT_BEP = "backend" INISECT_OSP = "os" # dynamic device modification DDM_ADD = "add" DDM_MODIFY = "modify" DDM_REMOVE = "remove" DDMS_VALUES = compat.UniqueFrozenset([DDM_ADD, DDM_REMOVE]) DDMS_VALUES_WITH_MODIFY = (DDMS_VALUES | frozenset([ DDM_MODIFY, ])) # TODO: DDM_SWAP, DDM_MOVE? # common exit codes EXIT_SUCCESS = 0 EXIT_FAILURE = 1 EXIT_NOTCLUSTER = 5 EXIT_NOTMASTER = 11 EXIT_NODESETUP_ERROR = 12 EXIT_CONFIRMATION = 13 # need user confirmation #: Exit code for query operations with unknown fields EXIT_UNKNOWN_FIELD = 14 # tags TAG_CLUSTER = "cluster" TAG_NODEGROUP = "nodegroup" TAG_NODE = "node" TAG_INSTANCE = "instance" TAG_NETWORK = "network" VALID_TAG_TYPES = compat.UniqueFrozenset([ TAG_CLUSTER, TAG_NODEGROUP, TAG_NODE, TAG_INSTANCE, TAG_NETWORK, ]) MAX_TAG_LEN = 128 MAX_TAGS_PER_OBJ = 4096 # others DEFAULT_BRIDGE = "xen-br0" CLASSIC_DRBD_SYNC_SPEED = 60 * 1024 # 60 MiB, expressed in KiB IP4_ADDRESS_LOCALHOST = "127.0.0.1" IP4_ADDRESS_ANY = "0.0.0.0" IP6_ADDRESS_LOCALHOST = "::1" IP6_ADDRESS_ANY = "::" IP4_VERSION = 4 IP6_VERSION = 6 VALID_IP_VERSIONS = compat.UniqueFrozenset([IP4_VERSION, IP6_VERSION]) # for export to htools IP4_FAMILY = socket.AF_INET IP6_FAMILY = socket.AF_INET6 TCP_PING_TIMEOUT = 10 DEFAULT_VG = "xenvg" DEFAULT_DRBD_HELPER = "/bin/true" MIN_VG_SIZE = 20480 DEFAULT_MAC_PREFIX = "aa:00:00" # default maximum instance wait time, in seconds. DEFAULT_SHUTDOWN_TIMEOUT = 120 NODE_MAX_CLOCK_SKEW = 150 # Time for an intra-cluster disk transfer to wait for a connection DISK_TRANSFER_CONNECT_TIMEOUT = 60 # Disk index separator DISK_SEPARATOR = _autoconf.DISK_SEPARATOR IP_COMMAND_PATH = _autoconf.IP_PATH #: Key for job IDs in opcode result JOB_IDS_KEY = "jobs" # runparts results (RUNPARTS_SKIP, RUNPARTS_RUN, RUNPARTS_ERR) = range(3) RUNPARTS_STATUS = compat.UniqueFrozenset([ RUNPARTS_SKIP, RUNPARTS_RUN, RUNPARTS_ERR, ]) # RPC constants (RPC_ENCODING_NONE, RPC_ENCODING_ZLIB_BASE64) = range(2) # Various time constants for the timeout table RPC_TMO_URGENT = 60 # one minute RPC_TMO_FAST = 5 * 60 # five minutes RPC_TMO_NORMAL = 15 * 60 # 15 minutes RPC_TMO_SLOW = 3600 # one hour RPC_TMO_4HRS = 4 * 3600 RPC_TMO_1DAY = 86400 # Timeout for connecting to nodes (seconds) RPC_CONNECT_TIMEOUT = 5 # os related constants OS_SCRIPT_CREATE = "create" OS_SCRIPT_IMPORT = "import" OS_SCRIPT_EXPORT = "export" OS_SCRIPT_RENAME = "rename" OS_SCRIPT_VERIFY = "verify" OS_SCRIPTS = compat.UniqueFrozenset([ OS_SCRIPT_CREATE, OS_SCRIPT_IMPORT, OS_SCRIPT_EXPORT, OS_SCRIPT_RENAME, OS_SCRIPT_VERIFY, ]) OS_API_FILE = "ganeti_api_version" OS_VARIANTS_FILE = "variants.list" OS_PARAMETERS_FILE = "parameters.list" OS_VALIDATE_PARAMETERS = "parameters" OS_VALIDATE_CALLS = compat.UniqueFrozenset([OS_VALIDATE_PARAMETERS]) # External Storage (ES) related constants ES_ACTION_CREATE = "create" ES_ACTION_REMOVE = "remove" ES_ACTION_GROW = "grow" ES_ACTION_ATTACH = "attach" ES_ACTION_DETACH = "detach" ES_ACTION_SETINFO = "setinfo" ES_ACTION_VERIFY = "verify" ES_SCRIPT_CREATE = ES_ACTION_CREATE ES_SCRIPT_REMOVE = ES_ACTION_REMOVE ES_SCRIPT_GROW = ES_ACTION_GROW ES_SCRIPT_ATTACH = ES_ACTION_ATTACH ES_SCRIPT_DETACH = ES_ACTION_DETACH ES_SCRIPT_SETINFO = ES_ACTION_SETINFO ES_SCRIPT_VERIFY = ES_ACTION_VERIFY ES_SCRIPTS = frozenset([ ES_SCRIPT_CREATE, ES_SCRIPT_REMOVE, ES_SCRIPT_GROW, ES_SCRIPT_ATTACH, ES_SCRIPT_DETACH, ES_SCRIPT_SETINFO, ES_SCRIPT_VERIFY ]) ES_PARAMETERS_FILE = "parameters.list" # reboot types INSTANCE_REBOOT_SOFT = "soft" INSTANCE_REBOOT_HARD = "hard" INSTANCE_REBOOT_FULL = "full" REBOOT_TYPES = compat.UniqueFrozenset([ INSTANCE_REBOOT_SOFT, INSTANCE_REBOOT_HARD, INSTANCE_REBOOT_FULL, ]) # instance reboot behaviors INSTANCE_REBOOT_ALLOWED = "reboot" INSTANCE_REBOOT_EXIT = "exit" REBOOT_BEHAVIORS = compat.UniqueFrozenset([ INSTANCE_REBOOT_ALLOWED, INSTANCE_REBOOT_EXIT, ]) VTYPE_STRING = "string" VTYPE_MAYBE_STRING = "maybe-string" VTYPE_BOOL = "bool" VTYPE_SIZE = "size" # size, in MiBs VTYPE_INT = "int" ENFORCEABLE_TYPES = compat.UniqueFrozenset([ VTYPE_STRING, VTYPE_MAYBE_STRING, VTYPE_BOOL, VTYPE_SIZE, VTYPE_INT, ]) # Constant representing that the user does not specify any IP version IFACE_NO_IP_VERSION_SPECIFIED = 0 VALID_SERIAL_SPEEDS = compat.UniqueFrozenset([ 75, 110, 300, 600, 1200, 1800, 2400, 4800, 9600, 14400, 19200, 28800, 38400, 57600, 115200, 230400, 345600, 460800, ]) # HV parameter names (global namespace) HV_BOOT_ORDER = "boot_order" HV_CDROM_IMAGE_PATH = "cdrom_image_path" HV_KVM_CDROM2_IMAGE_PATH = "cdrom2_image_path" HV_KVM_FLOPPY_IMAGE_PATH = "floppy_image_path" HV_NIC_TYPE = "nic_type" HV_DISK_TYPE = "disk_type" HV_KVM_CDROM_DISK_TYPE = "cdrom_disk_type" HV_VNC_BIND_ADDRESS = "vnc_bind_address" HV_VNC_PASSWORD_FILE = "vnc_password_file" HV_VNC_TLS = "vnc_tls" HV_VNC_X509 = "vnc_x509_path" HV_VNC_X509_VERIFY = "vnc_x509_verify" HV_KVM_SPICE_BIND = "spice_bind" HV_KVM_SPICE_IP_VERSION = "spice_ip_version" HV_KVM_SPICE_PASSWORD_FILE = "spice_password_file" HV_KVM_SPICE_LOSSLESS_IMG_COMPR = "spice_image_compression" HV_KVM_SPICE_JPEG_IMG_COMPR = "spice_jpeg_wan_compression" HV_KVM_SPICE_ZLIB_GLZ_IMG_COMPR = "spice_zlib_glz_wan_compression" HV_KVM_SPICE_STREAMING_VIDEO_DETECTION = "spice_streaming_video" HV_KVM_SPICE_AUDIO_COMPR = "spice_playback_compression" HV_KVM_SPICE_USE_TLS = "spice_use_tls" HV_KVM_SPICE_TLS_CIPHERS = "spice_tls_ciphers" HV_KVM_SPICE_USE_VDAGENT = "spice_use_vdagent" HV_ACPI = "acpi" HV_PAE = "pae" HV_USE_BOOTLOADER = "use_bootloader" HV_BOOTLOADER_ARGS = "bootloader_args" HV_BOOTLOADER_PATH = "bootloader_path" HV_KERNEL_ARGS = "kernel_args" HV_KERNEL_PATH = "kernel_path" HV_INITRD_PATH = "initrd_path" HV_ROOT_PATH = "root_path" HV_SERIAL_CONSOLE = "serial_console" HV_SERIAL_SPEED = "serial_speed" HV_USB_MOUSE = "usb_mouse" HV_KEYMAP = "keymap" HV_DEVICE_MODEL = "device_model" HV_INIT_SCRIPT = "init_script" HV_MIGRATION_PORT = "migration_port" HV_MIGRATION_BANDWIDTH = "migration_bandwidth" HV_MIGRATION_DOWNTIME = "migration_downtime" HV_MIGRATION_MODE = "migration_mode" HV_USE_LOCALTIME = "use_localtime" HV_DISK_CACHE = "disk_cache" HV_SECURITY_MODEL = "security_model" HV_SECURITY_DOMAIN = "security_domain" HV_KVM_FLAG = "kvm_flag" HV_VHOST_NET = "vhost_net" HV_KVM_USE_CHROOT = "use_chroot" HV_CPU_MASK = "cpu_mask" HV_MEM_PATH = "mem_path" HV_PASSTHROUGH = "pci_pass" HV_BLOCKDEV_PREFIX = "blockdev_prefix" HV_REBOOT_BEHAVIOR = "reboot_behavior" HV_CPU_TYPE = "cpu_type" HV_CPU_CAP = "cpu_cap" HV_CPU_WEIGHT = "cpu_weight" HV_CPU_CORES = "cpu_cores" HV_CPU_THREADS = "cpu_threads" HV_CPU_SOCKETS = "cpu_sockets" HV_SOUNDHW = "soundhw" HV_USB_DEVICES = "usb_devices" HV_VGA = "vga" HV_KVM_EXTRA = "kvm_extra" HV_KVM_MACHINE_VERSION = "machine_version" HV_KVM_PATH = "kvm_path" HV_VIF_TYPE = "vif_type" HV_VIF_SCRIPT = "vif_script" HV_XEN_CMD = "xen_cmd" HV_VNET_HDR = "vnet_hdr" HV_VIRIDIAN = "viridian" HVS_PARAMETER_TYPES = { HV_KVM_PATH: VTYPE_STRING, HV_BOOT_ORDER: VTYPE_STRING, HV_KVM_FLOPPY_IMAGE_PATH: VTYPE_STRING, HV_CDROM_IMAGE_PATH: VTYPE_STRING, HV_KVM_CDROM2_IMAGE_PATH: VTYPE_STRING, HV_NIC_TYPE: VTYPE_STRING, HV_DISK_TYPE: VTYPE_STRING, HV_KVM_CDROM_DISK_TYPE: VTYPE_STRING, HV_VNC_PASSWORD_FILE: VTYPE_STRING, HV_VNC_BIND_ADDRESS: VTYPE_STRING, HV_VNC_TLS: VTYPE_BOOL, HV_VNC_X509: VTYPE_STRING, HV_VNC_X509_VERIFY: VTYPE_BOOL, HV_KVM_SPICE_BIND: VTYPE_STRING, HV_KVM_SPICE_IP_VERSION: VTYPE_INT, HV_KVM_SPICE_PASSWORD_FILE: VTYPE_STRING, HV_KVM_SPICE_LOSSLESS_IMG_COMPR: VTYPE_STRING, HV_KVM_SPICE_JPEG_IMG_COMPR: VTYPE_STRING, HV_KVM_SPICE_ZLIB_GLZ_IMG_COMPR: VTYPE_STRING, HV_KVM_SPICE_STREAMING_VIDEO_DETECTION: VTYPE_STRING, HV_KVM_SPICE_AUDIO_COMPR: VTYPE_BOOL, HV_KVM_SPICE_USE_TLS: VTYPE_BOOL, HV_KVM_SPICE_TLS_CIPHERS: VTYPE_STRING, HV_KVM_SPICE_USE_VDAGENT: VTYPE_BOOL, HV_ACPI: VTYPE_BOOL, HV_PAE: VTYPE_BOOL, HV_USE_BOOTLOADER: VTYPE_BOOL, HV_BOOTLOADER_PATH: VTYPE_STRING, HV_BOOTLOADER_ARGS: VTYPE_STRING, HV_KERNEL_PATH: VTYPE_STRING, HV_KERNEL_ARGS: VTYPE_STRING, HV_INITRD_PATH: VTYPE_STRING, HV_ROOT_PATH: VTYPE_MAYBE_STRING, HV_SERIAL_CONSOLE: VTYPE_BOOL, HV_SERIAL_SPEED: VTYPE_INT, HV_USB_MOUSE: VTYPE_STRING, HV_KEYMAP: VTYPE_STRING, HV_DEVICE_MODEL: VTYPE_STRING, HV_INIT_SCRIPT: VTYPE_STRING, HV_MIGRATION_PORT: VTYPE_INT, HV_MIGRATION_BANDWIDTH: VTYPE_INT, HV_MIGRATION_DOWNTIME: VTYPE_INT, HV_MIGRATION_MODE: VTYPE_STRING, HV_USE_LOCALTIME: VTYPE_BOOL, HV_DISK_CACHE: VTYPE_STRING, HV_SECURITY_MODEL: VTYPE_STRING, HV_SECURITY_DOMAIN: VTYPE_STRING, HV_KVM_FLAG: VTYPE_STRING, HV_VHOST_NET: VTYPE_BOOL, HV_KVM_USE_CHROOT: VTYPE_BOOL, HV_CPU_MASK: VTYPE_STRING, HV_MEM_PATH: VTYPE_STRING, HV_PASSTHROUGH: VTYPE_STRING, HV_BLOCKDEV_PREFIX: VTYPE_STRING, HV_REBOOT_BEHAVIOR: VTYPE_STRING, HV_CPU_TYPE: VTYPE_STRING, HV_CPU_CAP: VTYPE_INT, HV_CPU_WEIGHT: VTYPE_INT, HV_CPU_CORES: VTYPE_INT, HV_CPU_THREADS: VTYPE_INT, HV_CPU_SOCKETS: VTYPE_INT, HV_SOUNDHW: VTYPE_STRING, HV_USB_DEVICES: VTYPE_STRING, HV_VGA: VTYPE_STRING, HV_KVM_EXTRA: VTYPE_STRING, HV_KVM_MACHINE_VERSION: VTYPE_STRING, HV_VIF_TYPE: VTYPE_STRING, HV_VIF_SCRIPT: VTYPE_STRING, HV_XEN_CMD: VTYPE_STRING, HV_VNET_HDR: VTYPE_BOOL, HV_VIRIDIAN: VTYPE_BOOL, } HVS_PARAMETERS = frozenset(HVS_PARAMETER_TYPES.keys()) HVS_PARAMETER_TITLES = { HV_ACPI: "ACPI", HV_BOOT_ORDER: "Boot_order", HV_CDROM_IMAGE_PATH: "CDROM_image_path", HV_DISK_TYPE: "Disk_type", HV_INITRD_PATH: "Initrd_path", HV_KERNEL_PATH: "Kernel_path", HV_NIC_TYPE: "NIC_type", HV_PAE: "PAE", HV_VNC_BIND_ADDRESS: "VNC_bind_address", HV_PASSTHROUGH: "pci_pass", HV_CPU_TYPE: "cpu_type", } # Migration statuses HV_MIGRATION_COMPLETED = "completed" HV_MIGRATION_ACTIVE = "active" HV_MIGRATION_FAILED = "failed" HV_MIGRATION_CANCELLED = "cancelled" HV_MIGRATION_VALID_STATUSES = compat.UniqueFrozenset([ HV_MIGRATION_COMPLETED, HV_MIGRATION_ACTIVE, HV_MIGRATION_FAILED, HV_MIGRATION_CANCELLED, ]) HV_MIGRATION_FAILED_STATUSES = compat.UniqueFrozenset([ HV_MIGRATION_FAILED, HV_MIGRATION_CANCELLED, ]) # KVM-specific statuses HV_KVM_MIGRATION_VALID_STATUSES = HV_MIGRATION_VALID_STATUSES # Node info keys HV_NODEINFO_KEY_VERSION = "hv_version" # Hypervisor state HVST_MEMORY_TOTAL = "mem_total" HVST_MEMORY_NODE = "mem_node" HVST_MEMORY_HV = "mem_hv" HVST_CPU_TOTAL = "cpu_total" HVST_CPU_NODE = "cpu_node" HVST_DEFAULTS = { HVST_MEMORY_TOTAL: 0, HVST_MEMORY_NODE: 0, HVST_MEMORY_HV: 0, HVST_CPU_TOTAL: 1, HVST_CPU_NODE: 1, } HVSTS_PARAMETER_TYPES = { HVST_MEMORY_TOTAL: VTYPE_INT, HVST_MEMORY_NODE: VTYPE_INT, HVST_MEMORY_HV: VTYPE_INT, HVST_CPU_TOTAL: VTYPE_INT, HVST_CPU_NODE: VTYPE_INT, } HVSTS_PARAMETERS = frozenset(HVSTS_PARAMETER_TYPES.keys()) # Disk state DS_DISK_TOTAL = "disk_total" DS_DISK_RESERVED = "disk_reserved" DS_DISK_OVERHEAD = "disk_overhead" DS_DEFAULTS = { DS_DISK_TOTAL: 0, DS_DISK_RESERVED: 0, DS_DISK_OVERHEAD: 0, } DSS_PARAMETER_TYPES = { DS_DISK_TOTAL: VTYPE_INT, DS_DISK_RESERVED: VTYPE_INT, DS_DISK_OVERHEAD: VTYPE_INT, } DSS_PARAMETERS = frozenset(DSS_PARAMETER_TYPES.keys()) DS_VALID_TYPES = compat.UniqueFrozenset([DT_PLAIN]) # Backend parameter names BE_MEMORY = "memory" # deprecated and replaced by max and min mem BE_MAXMEM = "maxmem" BE_MINMEM = "minmem" BE_VCPUS = "vcpus" BE_AUTO_BALANCE = "auto_balance" BE_ALWAYS_FAILOVER = "always_failover" BE_SPINDLE_USE = "spindle_use" BES_PARAMETER_TYPES = { BE_MAXMEM: VTYPE_SIZE, BE_MINMEM: VTYPE_SIZE, BE_VCPUS: VTYPE_INT, BE_AUTO_BALANCE: VTYPE_BOOL, BE_ALWAYS_FAILOVER: VTYPE_BOOL, BE_SPINDLE_USE: VTYPE_INT, } BES_PARAMETER_TITLES = { BE_AUTO_BALANCE: "Auto_balance", BE_MAXMEM: "ConfigMaxMem", BE_MINMEM: "ConfigMinMem", BE_VCPUS: "ConfigVCPUs", } BES_PARAMETER_COMPAT = { BE_MEMORY: VTYPE_SIZE, } BES_PARAMETER_COMPAT.update(BES_PARAMETER_TYPES) BES_PARAMETERS = frozenset(BES_PARAMETER_TYPES.keys()) # instance specs ISPEC_MEM_SIZE = "memory-size" ISPEC_CPU_COUNT = "cpu-count" ISPEC_DISK_COUNT = "disk-count" ISPEC_DISK_SIZE = "disk-size" ISPEC_NIC_COUNT = "nic-count" ISPEC_SPINDLE_USE = "spindle-use" ISPECS_PARAMETER_TYPES = { ISPEC_MEM_SIZE: VTYPE_INT, ISPEC_CPU_COUNT: VTYPE_INT, ISPEC_DISK_COUNT: VTYPE_INT, ISPEC_DISK_SIZE: VTYPE_INT, ISPEC_NIC_COUNT: VTYPE_INT, ISPEC_SPINDLE_USE: VTYPE_INT, } ISPECS_PARAMETERS = frozenset(ISPECS_PARAMETER_TYPES.keys()) ISPECS_MINMAX = "minmax" ISPECS_MIN = "min" ISPECS_MAX = "max" ISPECS_STD = "std" IPOLICY_DTS = "disk-templates" IPOLICY_VCPU_RATIO = "vcpu-ratio" IPOLICY_SPINDLE_RATIO = "spindle-ratio" ISPECS_MINMAX_KEYS = compat.UniqueFrozenset([ ISPECS_MIN, ISPECS_MAX, ]) IPOLICY_PARAMETERS = compat.UniqueFrozenset([ IPOLICY_VCPU_RATIO, IPOLICY_SPINDLE_RATIO, ]) IPOLICY_ALL_KEYS = (IPOLICY_PARAMETERS | frozenset([ISPECS_MINMAX, ISPECS_STD, IPOLICY_DTS])) # Node parameter names ND_OOB_PROGRAM = "oob_program" ND_SPINDLE_COUNT = "spindle_count" ND_EXCLUSIVE_STORAGE = "exclusive_storage" NDS_PARAMETER_TYPES = { ND_OOB_PROGRAM: VTYPE_STRING, ND_SPINDLE_COUNT: VTYPE_INT, ND_EXCLUSIVE_STORAGE: VTYPE_BOOL, } NDS_PARAMETERS = frozenset(NDS_PARAMETER_TYPES.keys()) NDS_PARAMETER_TITLES = { ND_OOB_PROGRAM: "OutOfBandProgram", ND_SPINDLE_COUNT: "SpindleCount", ND_EXCLUSIVE_STORAGE: "ExclusiveStorage", } # Logical Disks parameters LDP_RESYNC_RATE = "resync-rate" LDP_STRIPES = "stripes" LDP_BARRIERS = "disabled-barriers" LDP_NO_META_FLUSH = "disable-meta-flush" LDP_DEFAULT_METAVG = "default-metavg" LDP_DISK_CUSTOM = "disk-custom" LDP_NET_CUSTOM = "net-custom" LDP_PROTOCOL = "protocol" LDP_DYNAMIC_RESYNC = "dynamic-resync" LDP_PLAN_AHEAD = "c-plan-ahead" LDP_FILL_TARGET = "c-fill-target" LDP_DELAY_TARGET = "c-delay-target" LDP_MAX_RATE = "c-max-rate" LDP_MIN_RATE = "c-min-rate" LDP_POOL = "pool" DISK_LD_TYPES = { LDP_RESYNC_RATE: VTYPE_INT, LDP_STRIPES: VTYPE_INT, LDP_BARRIERS: VTYPE_STRING, LDP_NO_META_FLUSH: VTYPE_BOOL, LDP_DEFAULT_METAVG: VTYPE_STRING, LDP_DISK_CUSTOM: VTYPE_STRING, LDP_NET_CUSTOM: VTYPE_STRING, LDP_PROTOCOL: VTYPE_STRING, LDP_DYNAMIC_RESYNC: VTYPE_BOOL, LDP_PLAN_AHEAD: VTYPE_INT, LDP_FILL_TARGET: VTYPE_INT, LDP_DELAY_TARGET: VTYPE_INT, LDP_MAX_RATE: VTYPE_INT, LDP_MIN_RATE: VTYPE_INT, LDP_POOL: VTYPE_STRING, } DISK_LD_PARAMETERS = frozenset(DISK_LD_TYPES.keys()) # Disk template parameters (can be set/changed by the user via gnt-cluster and # gnt-group) DRBD_RESYNC_RATE = "resync-rate" DRBD_DATA_STRIPES = "data-stripes" DRBD_META_STRIPES = "meta-stripes" DRBD_DISK_BARRIERS = "disk-barriers" DRBD_META_BARRIERS = "meta-barriers" DRBD_DEFAULT_METAVG = "metavg" DRBD_DISK_CUSTOM = "disk-custom" DRBD_NET_CUSTOM = "net-custom" DRBD_PROTOCOL = "protocol" DRBD_DYNAMIC_RESYNC = "dynamic-resync" DRBD_PLAN_AHEAD = "c-plan-ahead" DRBD_FILL_TARGET = "c-fill-target" DRBD_DELAY_TARGET = "c-delay-target" DRBD_MAX_RATE = "c-max-rate" DRBD_MIN_RATE = "c-min-rate" LV_STRIPES = "stripes" RBD_POOL = "pool" DISK_DT_TYPES = { DRBD_RESYNC_RATE: VTYPE_INT, DRBD_DATA_STRIPES: VTYPE_INT, DRBD_META_STRIPES: VTYPE_INT, DRBD_DISK_BARRIERS: VTYPE_STRING, DRBD_META_BARRIERS: VTYPE_BOOL, DRBD_DEFAULT_METAVG: VTYPE_STRING, DRBD_DISK_CUSTOM: VTYPE_STRING, DRBD_NET_CUSTOM: VTYPE_STRING, DRBD_PROTOCOL: VTYPE_STRING, DRBD_DYNAMIC_RESYNC: VTYPE_BOOL, DRBD_PLAN_AHEAD: VTYPE_INT, DRBD_FILL_TARGET: VTYPE_INT, DRBD_DELAY_TARGET: VTYPE_INT, DRBD_MAX_RATE: VTYPE_INT, DRBD_MIN_RATE: VTYPE_INT, LV_STRIPES: VTYPE_INT, RBD_POOL: VTYPE_STRING, } DISK_DT_PARAMETERS = frozenset(DISK_DT_TYPES.keys()) # OOB supported commands OOB_POWER_ON = "power-on" OOB_POWER_OFF = "power-off" OOB_POWER_CYCLE = "power-cycle" OOB_POWER_STATUS = "power-status" OOB_HEALTH = "health" OOB_COMMANDS = compat.UniqueFrozenset([ OOB_POWER_ON, OOB_POWER_OFF, OOB_POWER_CYCLE, OOB_POWER_STATUS, OOB_HEALTH, ]) OOB_POWER_STATUS_POWERED = "powered" OOB_TIMEOUT = 60 # 60 seconds OOB_POWER_DELAY = 2.0 # 2 seconds OOB_STATUS_OK = "OK" OOB_STATUS_WARNING = "WARNING" OOB_STATUS_CRITICAL = "CRITICAL" OOB_STATUS_UNKNOWN = "UNKNOWN" OOB_STATUSES = compat.UniqueFrozenset([ OOB_STATUS_OK, OOB_STATUS_WARNING, OOB_STATUS_CRITICAL, OOB_STATUS_UNKNOWN, ]) # Instance Parameters Profile PP_DEFAULT = "default" # NIC_* constants are used inside the ganeti config NIC_MODE = "mode" NIC_LINK = "link" NIC_MODE_BRIDGED = "bridged" NIC_MODE_ROUTED = "routed" NIC_MODE_OVS = "openvswitch" NIC_IP_POOL = "pool" NIC_VALID_MODES = compat.UniqueFrozenset([ NIC_MODE_BRIDGED, NIC_MODE_ROUTED, NIC_MODE_OVS, ]) RESERVE_ACTION = "reserve" RELEASE_ACTION = "release" NICS_PARAMETER_TYPES = { NIC_MODE: VTYPE_STRING, NIC_LINK: VTYPE_STRING, } NICS_PARAMETERS = frozenset(NICS_PARAMETER_TYPES.keys()) # IDISK_* constants are used in opcodes, to create/change disks IDISK_SIZE = "size" IDISK_SPINDLES = "spindles" IDISK_MODE = "mode" IDISK_ADOPT = "adopt" IDISK_VG = "vg" IDISK_METAVG = "metavg" IDISK_PROVIDER = "provider" IDISK_NAME = "name" IDISK_PARAMS_TYPES = { IDISK_SIZE: VTYPE_SIZE, IDISK_SPINDLES: VTYPE_INT, IDISK_MODE: VTYPE_STRING, IDISK_ADOPT: VTYPE_STRING, IDISK_VG: VTYPE_STRING, IDISK_METAVG: VTYPE_STRING, IDISK_PROVIDER: VTYPE_STRING, IDISK_NAME: VTYPE_MAYBE_STRING, } IDISK_PARAMS = frozenset(IDISK_PARAMS_TYPES.keys()) MODIFIABLE_IDISK_PARAMS_TYPES = { IDISK_MODE: VTYPE_STRING, IDISK_NAME: VTYPE_STRING, } MODIFIABLE_IDISK_PARAMS = frozenset(MODIFIABLE_IDISK_PARAMS_TYPES.keys()) # INIC_* constants are used in opcodes, to create/change nics INIC_MAC = "mac" INIC_IP = "ip" INIC_MODE = "mode" INIC_LINK = "link" INIC_NETWORK = "network" INIC_NAME = "name" INIC_PARAMS_TYPES = { INIC_IP: VTYPE_MAYBE_STRING, INIC_LINK: VTYPE_STRING, INIC_MAC: VTYPE_STRING, INIC_MODE: VTYPE_STRING, INIC_NETWORK: VTYPE_MAYBE_STRING, INIC_NAME: VTYPE_MAYBE_STRING, } INIC_PARAMS = frozenset(INIC_PARAMS_TYPES.keys()) # Hypervisor constants HT_XEN_PVM = "xen-pvm" HT_FAKE = "fake" HT_XEN_HVM = "xen-hvm" HT_KVM = "kvm" HT_CHROOT = "chroot" HT_LXC = "lxc" HYPER_TYPES = compat.UniqueFrozenset([ HT_XEN_PVM, HT_FAKE, HT_XEN_HVM, HT_KVM, HT_CHROOT, HT_LXC, ]) HTS_REQ_PORT = compat.UniqueFrozenset([HT_XEN_HVM, HT_KVM]) VNC_BASE_PORT = 5900 VNC_DEFAULT_BIND_ADDRESS = IP4_ADDRESS_ANY # NIC types HT_NIC_RTL8139 = "rtl8139" HT_NIC_NE2K_PCI = "ne2k_pci" HT_NIC_NE2K_ISA = "ne2k_isa" HT_NIC_I82551 = "i82551" HT_NIC_I85557B = "i82557b" HT_NIC_I8259ER = "i82559er" HT_NIC_PCNET = "pcnet" HT_NIC_E1000 = "e1000" HT_NIC_PARAVIRTUAL = HT_DISK_PARAVIRTUAL = "paravirtual" HT_HVM_VALID_NIC_TYPES = compat.UniqueFrozenset([ HT_NIC_RTL8139, HT_NIC_NE2K_PCI, HT_NIC_E1000, HT_NIC_NE2K_ISA, HT_NIC_PARAVIRTUAL, ]) HT_KVM_VALID_NIC_TYPES = compat.UniqueFrozenset([ HT_NIC_RTL8139, HT_NIC_NE2K_PCI, HT_NIC_NE2K_ISA, HT_NIC_I82551, HT_NIC_I85557B, HT_NIC_I8259ER, HT_NIC_PCNET, HT_NIC_E1000, HT_NIC_PARAVIRTUAL, ]) # Vif types # default vif type in xen-hvm HT_HVM_VIF_IOEMU = "ioemu" HT_HVM_VIF_VIF = "vif" HT_HVM_VALID_VIF_TYPES = compat.UniqueFrozenset([ HT_HVM_VIF_IOEMU, HT_HVM_VIF_VIF, ]) # Disk types HT_DISK_IOEMU = "ioemu" HT_DISK_IDE = "ide" HT_DISK_SCSI = "scsi" HT_DISK_SD = "sd" HT_DISK_MTD = "mtd" HT_DISK_PFLASH = "pflash" HT_CACHE_DEFAULT = "default" HT_CACHE_NONE = "none" HT_CACHE_WTHROUGH = "writethrough" HT_CACHE_WBACK = "writeback" HT_VALID_CACHE_TYPES = compat.UniqueFrozenset([ HT_CACHE_DEFAULT, HT_CACHE_NONE, HT_CACHE_WTHROUGH, HT_CACHE_WBACK, ]) HT_HVM_VALID_DISK_TYPES = compat.UniqueFrozenset([ HT_DISK_PARAVIRTUAL, HT_DISK_IOEMU, ]) HT_KVM_VALID_DISK_TYPES = compat.UniqueFrozenset([ HT_DISK_PARAVIRTUAL, HT_DISK_IDE, HT_DISK_SCSI, HT_DISK_SD, HT_DISK_MTD, HT_DISK_PFLASH, ]) # Mouse types: HT_MOUSE_MOUSE = "mouse" HT_MOUSE_TABLET = "tablet" HT_KVM_VALID_MOUSE_TYPES = compat.UniqueFrozenset([ HT_MOUSE_MOUSE, HT_MOUSE_TABLET, ]) # Boot order HT_BO_FLOPPY = "floppy" HT_BO_CDROM = "cdrom" HT_BO_DISK = "disk" HT_BO_NETWORK = "network" HT_KVM_VALID_BO_TYPES = compat.UniqueFrozenset([ HT_BO_FLOPPY, HT_BO_CDROM, HT_BO_DISK, HT_BO_NETWORK, ]) # SPICE lossless image compression options HT_KVM_SPICE_LOSSLESS_IMG_COMPR_AUTO_GLZ = "auto_glz" HT_KVM_SPICE_LOSSLESS_IMG_COMPR_AUTO_LZ = "auto_lz" HT_KVM_SPICE_LOSSLESS_IMG_COMPR_QUIC = "quic" HT_KVM_SPICE_LOSSLESS_IMG_COMPR_GLZ = "glz" HT_KVM_SPICE_LOSSLESS_IMG_COMPR_LZ = "lz" HT_KVM_SPICE_LOSSLESS_IMG_COMPR_OFF = "off" HT_KVM_SPICE_VALID_LOSSLESS_IMG_COMPR_OPTIONS = compat.UniqueFrozenset([ HT_KVM_SPICE_LOSSLESS_IMG_COMPR_AUTO_GLZ, HT_KVM_SPICE_LOSSLESS_IMG_COMPR_AUTO_LZ, HT_KVM_SPICE_LOSSLESS_IMG_COMPR_QUIC, HT_KVM_SPICE_LOSSLESS_IMG_COMPR_GLZ, HT_KVM_SPICE_LOSSLESS_IMG_COMPR_LZ, HT_KVM_SPICE_LOSSLESS_IMG_COMPR_OFF, ]) # SPICE lossy image compression options (valid for both jpeg and zlib-glz) HT_KVM_SPICE_LOSSY_IMG_COMPR_AUTO = "auto" HT_KVM_SPICE_LOSSY_IMG_COMPR_NEVER = "never" HT_KVM_SPICE_LOSSY_IMG_COMPR_ALWAYS = "always" HT_KVM_SPICE_VALID_LOSSY_IMG_COMPR_OPTIONS = compat.UniqueFrozenset([ HT_KVM_SPICE_LOSSY_IMG_COMPR_AUTO, HT_KVM_SPICE_LOSSY_IMG_COMPR_NEVER, HT_KVM_SPICE_LOSSY_IMG_COMPR_ALWAYS, ]) # SPICE video stream detection HT_KVM_SPICE_VIDEO_STREAM_DETECTION_OFF = "off" HT_KVM_SPICE_VIDEO_STREAM_DETECTION_ALL = "all" HT_KVM_SPICE_VIDEO_STREAM_DETECTION_FILTER = "filter" HT_KVM_SPICE_VALID_VIDEO_STREAM_DETECTION_OPTIONS = compat.UniqueFrozenset([ HT_KVM_SPICE_VIDEO_STREAM_DETECTION_OFF, HT_KVM_SPICE_VIDEO_STREAM_DETECTION_ALL, HT_KVM_SPICE_VIDEO_STREAM_DETECTION_FILTER, ]) # Security models HT_SM_NONE = "none" HT_SM_USER = "user" HT_SM_POOL = "pool" HT_KVM_VALID_SM_TYPES = compat.UniqueFrozenset([ HT_SM_NONE, HT_SM_USER, HT_SM_POOL, ]) # Kvm flag values HT_KVM_ENABLED = "enabled" HT_KVM_DISABLED = "disabled" HT_KVM_FLAG_VALUES = compat.UniqueFrozenset([HT_KVM_ENABLED, HT_KVM_DISABLED]) # Migration type HT_MIGRATION_LIVE = "live" HT_MIGRATION_NONLIVE = "non-live" HT_MIGRATION_MODES = compat.UniqueFrozenset([ HT_MIGRATION_LIVE, HT_MIGRATION_NONLIVE, ]) # Cluster Verify steps VERIFY_NPLUSONE_MEM = "nplusone_mem" VERIFY_OPTIONAL_CHECKS = compat.UniqueFrozenset([VERIFY_NPLUSONE_MEM]) # Cluster Verify error classes CV_TCLUSTER = "cluster" CV_TGROUP = "group" CV_TNODE = "node" CV_TINSTANCE = "instance" # Cluster Verify error codes and documentation CV_ECLUSTERCFG = \ (CV_TCLUSTER, "ECLUSTERCFG", "Cluster configuration verification failure") CV_ECLUSTERCERT = \ (CV_TCLUSTER, "ECLUSTERCERT", "Cluster certificate files verification failure") CV_ECLUSTERFILECHECK = \ (CV_TCLUSTER, "ECLUSTERFILECHECK", "Cluster configuration verification failure") CV_ECLUSTERDANGLINGNODES = \ (CV_TNODE, "ECLUSTERDANGLINGNODES", "Some nodes belong to non-existing groups") CV_ECLUSTERDANGLINGINST = \ (CV_TNODE, "ECLUSTERDANGLINGINST", "Some instances have a non-existing primary node") CV_EGROUPDIFFERENTPVSIZE = \ (CV_TGROUP, "EGROUPDIFFERENTPVSIZE", "PVs in the group have different sizes") CV_EINSTANCEBADNODE = \ (CV_TINSTANCE, "EINSTANCEBADNODE", "Instance marked as running lives on an offline node") CV_EINSTANCEDOWN = \ (CV_TINSTANCE, "EINSTANCEDOWN", "Instance not running on its primary node") CV_EINSTANCELAYOUT = \ (CV_TINSTANCE, "EINSTANCELAYOUT", "Instance has multiple secondary nodes") CV_EINSTANCEMISSINGDISK = \ (CV_TINSTANCE, "EINSTANCEMISSINGDISK", "Missing volume on an instance") CV_EINSTANCEFAULTYDISK = \ (CV_TINSTANCE, "EINSTANCEFAULTYDISK", "Impossible to retrieve status for a disk") CV_EINSTANCEWRONGNODE = \ (CV_TINSTANCE, "EINSTANCEWRONGNODE", "Instance running on the wrong node") CV_EINSTANCESPLITGROUPS = \ (CV_TINSTANCE, "EINSTANCESPLITGROUPS", "Instance with primary and secondary nodes in different groups") CV_EINSTANCEPOLICY = \ (CV_TINSTANCE, "EINSTANCEPOLICY", "Instance does not meet policy") CV_EINSTANCEUNSUITABLENODE = \ (CV_TINSTANCE, "EINSTANCEUNSUITABLENODE", "Instance running on nodes that are not suitable for it") CV_EINSTANCEMISSINGCFGPARAMETER = \ (CV_TINSTANCE, "EINSTANCEMISSINGCFGPARAMETER", "A configuration parameter for an instance is missing") CV_ENODEDRBD = \ (CV_TNODE, "ENODEDRBD", "Error parsing the DRBD status file") CV_ENODEDRBDVERSION = \ (CV_TNODE, "ENODEDRBDVERSION", "DRBD version mismatch within a node group") CV_ENODEDRBDHELPER = \ (CV_TNODE, "ENODEDRBDHELPER", "Error caused by the DRBD helper") CV_ENODEFILECHECK = \ (CV_TNODE, "ENODEFILECHECK", "Error retrieving the checksum of the node files") CV_ENODEHOOKS = \ (CV_TNODE, "ENODEHOOKS", "Communication failure in hooks execution") CV_ENODEHV = \ (CV_TNODE, "ENODEHV", "Hypervisor parameters verification failure") CV_ENODELVM = \ (CV_TNODE, "ENODELVM", "LVM-related node error") CV_ENODEN1 = \ (CV_TNODE, "ENODEN1", "Not enough memory to accommodate instance failovers") CV_ENODENET = \ (CV_TNODE, "ENODENET", "Network-related node error") CV_ENODEOS = \ (CV_TNODE, "ENODEOS", "OS-related node error") CV_ENODEORPHANINSTANCE = \ (CV_TNODE, "ENODEORPHANINSTANCE", "Unknown intance running on a node") CV_ENODEORPHANLV = \ (CV_TNODE, "ENODEORPHANLV", "Unknown LVM logical volume") CV_ENODERPC = \ (CV_TNODE, "ENODERPC", "Error during connection to the primary node of an instance") CV_ENODESSH = \ (CV_TNODE, "ENODESSH", "SSH-related node error") CV_ENODEVERSION = \ (CV_TNODE, "ENODEVERSION", "Protocol version mismatch or Ganeti version mismatch") CV_ENODESETUP = \ (CV_TNODE, "ENODESETUP", "Node setup error") CV_ENODETIME = \ (CV_TNODE, "ENODETIME", "Node returned invalid time") CV_ENODEOOBPATH = \ (CV_TNODE, "ENODEOOBPATH", "Invalid Out Of Band path") CV_ENODEUSERSCRIPTS = \ (CV_TNODE, "ENODEUSERSCRIPTS", "User scripts not present or not executable") CV_ENODEFILESTORAGEPATHS = \ (CV_TNODE, "ENODEFILESTORAGEPATHS", "Detected bad file storage paths") CV_ENODEFILESTORAGEPATHUNUSABLE = \ (CV_TNODE, "ENODEFILESTORAGEPATHUNUSABLE", "File storage path unusable") CV_ENODESHAREDFILESTORAGEPATHUNUSABLE = \ (CV_TNODE, "ENODESHAREDFILESTORAGEPATHUNUSABLE", "Shared file storage path unusable") CV_ALL_ECODES = compat.UniqueFrozenset([ CV_ECLUSTERCFG, CV_ECLUSTERCERT, CV_ECLUSTERFILECHECK, CV_ECLUSTERDANGLINGNODES, CV_ECLUSTERDANGLINGINST, CV_EINSTANCEBADNODE, CV_EINSTANCEDOWN, CV_EINSTANCELAYOUT, CV_EINSTANCEMISSINGDISK, CV_EINSTANCEFAULTYDISK, CV_EINSTANCEWRONGNODE, CV_EINSTANCESPLITGROUPS, CV_EINSTANCEPOLICY, CV_ENODEDRBD, CV_ENODEDRBDHELPER, CV_ENODEFILECHECK, CV_ENODEHOOKS, CV_ENODEHV, CV_ENODELVM, CV_ENODEN1, CV_ENODENET, CV_ENODEOS, CV_ENODEORPHANINSTANCE, CV_ENODEORPHANLV, CV_ENODERPC, CV_ENODESSH, CV_ENODEVERSION, CV_ENODESETUP, CV_ENODETIME, CV_ENODEOOBPATH, CV_ENODEUSERSCRIPTS, CV_ENODEFILESTORAGEPATHS, CV_ENODEFILESTORAGEPATHUNUSABLE, CV_ENODESHAREDFILESTORAGEPATHUNUSABLE, ]) CV_ALL_ECODES_STRINGS = \ compat.UniqueFrozenset(estr for (_, estr, _) in CV_ALL_ECODES) # Node verify constants NV_BRIDGES = "bridges" NV_DRBDHELPER = "drbd-helper" NV_DRBDVERSION = "drbd-version" NV_DRBDLIST = "drbd-list" NV_EXCLUSIVEPVS = "exclusive-pvs" NV_FILELIST = "filelist" NV_ACCEPTED_STORAGE_PATHS = "allowed-file-storage-paths" NV_FILE_STORAGE_PATH = "file-storage-path" NV_SHARED_FILE_STORAGE_PATH = "shared-file-storage-path" NV_HVINFO = "hvinfo" NV_HVPARAMS = "hvparms" NV_HYPERVISOR = "hypervisor" NV_INSTANCELIST = "instancelist" NV_LVLIST = "lvlist" NV_MASTERIP = "master-ip" NV_NODELIST = "nodelist" NV_NODENETTEST = "node-net-test" NV_NODESETUP = "nodesetup" NV_OOB_PATHS = "oob-paths" NV_OSLIST = "oslist" NV_PVLIST = "pvlist" NV_TIME = "time" NV_USERSCRIPTS = "user-scripts" NV_VERSION = "version" NV_VGLIST = "vglist" NV_VMNODES = "vmnodes" # Instance status INSTST_RUNNING = "running" INSTST_ADMINDOWN = "ADMIN_down" INSTST_ADMINOFFLINE = "ADMIN_offline" INSTST_NODEOFFLINE = "ERROR_nodeoffline" INSTST_NODEDOWN = "ERROR_nodedown" INSTST_WRONGNODE = "ERROR_wrongnode" INSTST_ERRORUP = "ERROR_up" INSTST_ERRORDOWN = "ERROR_down" INSTST_ALL = compat.UniqueFrozenset([ INSTST_RUNNING, INSTST_ADMINDOWN, INSTST_ADMINOFFLINE, INSTST_NODEOFFLINE, INSTST_NODEDOWN, INSTST_WRONGNODE, INSTST_ERRORUP, INSTST_ERRORDOWN, ]) # Admin states ADMINST_UP = "up" ADMINST_DOWN = "down" ADMINST_OFFLINE = "offline" ADMINST_ALL = compat.UniqueFrozenset([ ADMINST_UP, ADMINST_DOWN, ADMINST_OFFLINE, ]) # Node roles NR_REGULAR = "R" NR_MASTER = "M" NR_MCANDIDATE = "C" NR_DRAINED = "D" NR_OFFLINE = "O" NR_ALL = compat.UniqueFrozenset([ NR_REGULAR, NR_MASTER, NR_MCANDIDATE, NR_DRAINED, NR_OFFLINE, ]) # SSL certificate check constants (in days) SSL_CERT_EXPIRATION_WARN = 30 SSL_CERT_EXPIRATION_ERROR = 7 # Allocator framework constants IALLOCATOR_VERSION = 2 IALLOCATOR_DIR_IN = "in" IALLOCATOR_DIR_OUT = "out" VALID_IALLOCATOR_DIRECTIONS = compat.UniqueFrozenset([ IALLOCATOR_DIR_IN, IALLOCATOR_DIR_OUT, ]) IALLOCATOR_MODE_ALLOC = "allocate" IALLOCATOR_MODE_RELOC = "relocate" IALLOCATOR_MODE_CHG_GROUP = "change-group" IALLOCATOR_MODE_NODE_EVAC = "node-evacuate" IALLOCATOR_MODE_MULTI_ALLOC = "multi-allocate" VALID_IALLOCATOR_MODES = compat.UniqueFrozenset([ IALLOCATOR_MODE_ALLOC, IALLOCATOR_MODE_RELOC, IALLOCATOR_MODE_CHG_GROUP, IALLOCATOR_MODE_NODE_EVAC, IALLOCATOR_MODE_MULTI_ALLOC, ]) IALLOCATOR_SEARCH_PATH = _autoconf.IALLOCATOR_SEARCH_PATH DEFAULT_IALLOCATOR_SHORTCUT = "." IALLOCATOR_NEVAC_PRI = "primary-only" IALLOCATOR_NEVAC_SEC = "secondary-only" IALLOCATOR_NEVAC_ALL = "all" IALLOCATOR_NEVAC_MODES = compat.UniqueFrozenset([ IALLOCATOR_NEVAC_PRI, IALLOCATOR_NEVAC_SEC, IALLOCATOR_NEVAC_ALL, ]) # Node evacuation NODE_EVAC_PRI = "primary-only" NODE_EVAC_SEC = "secondary-only" NODE_EVAC_ALL = "all" NODE_EVAC_MODES = compat.UniqueFrozenset([ NODE_EVAC_PRI, NODE_EVAC_SEC, NODE_EVAC_ALL, ]) # Job queue JOB_QUEUE_VERSION = 1 JOB_QUEUE_SIZE_HARD_LIMIT = 5000 JOB_QUEUE_FILES_PERMS = 0640 JOB_ID_TEMPLATE = r"\d+" JOB_FILE_RE = re.compile(r"^job-(%s)$" % JOB_ID_TEMPLATE) # unchanged job return JOB_NOTCHANGED = "nochange" # Job status JOB_STATUS_QUEUED = "queued" JOB_STATUS_WAITING = "waiting" JOB_STATUS_CANCELING = "canceling" JOB_STATUS_RUNNING = "running" JOB_STATUS_CANCELED = "canceled" JOB_STATUS_SUCCESS = "success" JOB_STATUS_ERROR = "error" JOBS_PENDING = compat.UniqueFrozenset([ JOB_STATUS_QUEUED, JOB_STATUS_WAITING, JOB_STATUS_CANCELING, ]) JOBS_FINALIZED = compat.UniqueFrozenset([ JOB_STATUS_CANCELED, JOB_STATUS_SUCCESS, JOB_STATUS_ERROR, ]) JOB_STATUS_ALL = compat.UniqueFrozenset([ JOB_STATUS_RUNNING, ]) | JOBS_PENDING | JOBS_FINALIZED # OpCode status # not yet finalized OP_STATUS_QUEUED = "queued" OP_STATUS_WAITING = "waiting" OP_STATUS_CANCELING = "canceling" OP_STATUS_RUNNING = "running" # finalized OP_STATUS_CANCELED = "canceled" OP_STATUS_SUCCESS = "success" OP_STATUS_ERROR = "error" OPS_FINALIZED = compat.UniqueFrozenset([ OP_STATUS_CANCELED, OP_STATUS_SUCCESS, OP_STATUS_ERROR, ]) # OpCode priority OP_PRIO_LOWEST = +19 OP_PRIO_HIGHEST = -20 OP_PRIO_LOW = +10 OP_PRIO_NORMAL = 0 OP_PRIO_HIGH = -10 OP_PRIO_SUBMIT_VALID = compat.UniqueFrozenset([ OP_PRIO_LOW, OP_PRIO_NORMAL, OP_PRIO_HIGH, ]) OP_PRIO_DEFAULT = OP_PRIO_NORMAL # Lock recalculate mode LOCKS_REPLACE = "replace" LOCKS_APPEND = "append" # Lock timeout (sum) before we should go into blocking acquire (still # can be reset by priority change); computed as max time (10 hours) # before we should actually go into blocking acquire given that we # start from default priority level; in seconds # TODO LOCK_ATTEMPTS_TIMEOUT = 10 * 3600 / (OP_PRIO_DEFAULT - OP_PRIO_HIGHEST) LOCK_ATTEMPTS_MAXWAIT = 15.0 LOCK_ATTEMPTS_MINWAIT = 1.0 # Execution log types ELOG_MESSAGE = "message" ELOG_REMOTE_IMPORT = "remote-import" ELOG_JQUEUE_TEST = "jqueue-test" # /etc/hosts modification ETC_HOSTS_ADD = "add" ETC_HOSTS_REMOVE = "remove" # Job queue test JQT_MSGPREFIX = "TESTMSG=" JQT_EXPANDNAMES = "expandnames" JQT_EXEC = "exec" JQT_LOGMSG = "logmsg" JQT_STARTMSG = "startmsg" JQT_ALL = compat.UniqueFrozenset([ JQT_EXPANDNAMES, JQT_EXEC, JQT_LOGMSG, JQT_STARTMSG, ]) # Query resources QR_CLUSTER = "cluster" QR_INSTANCE = "instance" QR_NODE = "node" QR_LOCK = "lock" QR_GROUP = "group" QR_OS = "os" QR_JOB = "job" QR_EXPORT = "export" QR_NETWORK = "network" QR_EXTSTORAGE = "extstorage" #: List of resources which can be queried using L{opcodes.OpQuery} QR_VIA_OP = compat.UniqueFrozenset([ QR_CLUSTER, QR_INSTANCE, QR_NODE, QR_GROUP, QR_OS, QR_EXPORT, QR_NETWORK, QR_EXTSTORAGE, ]) #: List of resources which can be queried using Local UniX Interface QR_VIA_LUXI = QR_VIA_OP.union([ QR_LOCK, QR_JOB, ]) #: List of resources which can be queried using RAPI QR_VIA_RAPI = QR_VIA_LUXI # Query field types QFT_UNKNOWN = "unknown" QFT_TEXT = "text" QFT_BOOL = "bool" QFT_NUMBER = "number" QFT_UNIT = "unit" QFT_TIMESTAMP = "timestamp" QFT_OTHER = "other" #: All query field types QFT_ALL = compat.UniqueFrozenset([ QFT_UNKNOWN, QFT_TEXT, QFT_BOOL, QFT_NUMBER, QFT_UNIT, QFT_TIMESTAMP, QFT_OTHER, ]) # Query result field status (don't change or reuse values as they're used by # clients) #: Normal field status RS_NORMAL = 0 #: Unknown field RS_UNKNOWN = 1 #: No data (e.g. RPC error), can be used instead of L{RS_OFFLINE} RS_NODATA = 2 #: Value unavailable/unsupported for item; if this field is supported #: but we cannot get the data for the moment, RS_NODATA or #: RS_OFFLINE should be used RS_UNAVAIL = 3 #: Resource marked offline RS_OFFLINE = 4 RS_ALL = compat.UniqueFrozenset([ RS_NORMAL, RS_UNKNOWN, RS_NODATA, RS_UNAVAIL, RS_OFFLINE, ]) #: Dictionary with special field cases and their verbose/terse formatting RSS_DESCRIPTION = { RS_UNKNOWN: ("(unknown)", "??"), RS_NODATA: ("(nodata)", "?"), RS_OFFLINE: ("(offline)", "*"), RS_UNAVAIL: ("(unavail)", "-"), } # max dynamic devices MAX_NICS = 8 MAX_DISKS = 16 # SSCONF file prefix SSCONF_FILEPREFIX = "ssconf_" # SSCONF keys SS_CLUSTER_NAME = "cluster_name" SS_CLUSTER_TAGS = "cluster_tags" SS_FILE_STORAGE_DIR = "file_storage_dir" SS_SHARED_FILE_STORAGE_DIR = "shared_file_storage_dir" SS_MASTER_CANDIDATES = "master_candidates" SS_MASTER_CANDIDATES_IPS = "master_candidates_ips" SS_MASTER_IP = "master_ip" SS_MASTER_NETDEV = "master_netdev" SS_MASTER_NETMASK = "master_netmask" SS_MASTER_NODE = "master_node" SS_NODE_LIST = "node_list" SS_NODE_PRIMARY_IPS = "node_primary_ips" SS_NODE_SECONDARY_IPS = "node_secondary_ips" SS_OFFLINE_NODES = "offline_nodes" SS_ONLINE_NODES = "online_nodes" SS_PRIMARY_IP_FAMILY = "primary_ip_family" SS_INSTANCE_LIST = "instance_list" SS_RELEASE_VERSION = "release_version" SS_HYPERVISOR_LIST = "hypervisor_list" SS_MAINTAIN_NODE_HEALTH = "maintain_node_health" SS_UID_POOL = "uid_pool" SS_NODEGROUPS = "nodegroups" SS_NETWORKS = "networks" # This is not a complete SSCONF key, but the prefix for the hypervisor keys SS_HVPARAMS_PREF = "hvparams_" # Hvparams keys: SS_HVPARAMS_XEN_PVM = SS_HVPARAMS_PREF + HT_XEN_PVM SS_HVPARAMS_XEN_FAKE = SS_HVPARAMS_PREF + HT_FAKE SS_HVPARAMS_XEN_HVM = SS_HVPARAMS_PREF + HT_XEN_HVM SS_HVPARAMS_XEN_KVM = SS_HVPARAMS_PREF + HT_KVM SS_HVPARAMS_XEN_CHROOT = SS_HVPARAMS_PREF + HT_CHROOT SS_HVPARAMS_XEN_LXC = SS_HVPARAMS_PREF + HT_LXC VALID_SS_HVPARAMS_KEYS = compat.UniqueFrozenset([ SS_HVPARAMS_XEN_PVM, SS_HVPARAMS_XEN_FAKE, SS_HVPARAMS_XEN_HVM, SS_HVPARAMS_XEN_KVM, SS_HVPARAMS_XEN_CHROOT, SS_HVPARAMS_XEN_LXC, ]) SS_FILE_PERMS = 0444 # cluster wide default parameters DEFAULT_ENABLED_HYPERVISOR = HT_XEN_PVM HVC_DEFAULTS = { HT_XEN_PVM: { HV_USE_BOOTLOADER: False, HV_BOOTLOADER_PATH: XEN_BOOTLOADER, HV_BOOTLOADER_ARGS: "", HV_KERNEL_PATH: XEN_KERNEL, HV_INITRD_PATH: "", HV_ROOT_PATH: "/dev/xvda1", HV_KERNEL_ARGS: "ro", HV_MIGRATION_PORT: 8002, HV_MIGRATION_MODE: HT_MIGRATION_LIVE, HV_BLOCKDEV_PREFIX: "sd", HV_REBOOT_BEHAVIOR: INSTANCE_REBOOT_ALLOWED, HV_CPU_MASK: CPU_PINNING_ALL, HV_CPU_CAP: 0, HV_CPU_WEIGHT: 256, HV_VIF_SCRIPT: "", HV_XEN_CMD: XEN_CMD_XM, }, HT_XEN_HVM: { HV_BOOT_ORDER: "cd", HV_CDROM_IMAGE_PATH: "", HV_NIC_TYPE: HT_NIC_RTL8139, HV_DISK_TYPE: HT_DISK_PARAVIRTUAL, HV_VNC_BIND_ADDRESS: IP4_ADDRESS_ANY, HV_VNC_PASSWORD_FILE: pathutils.VNC_PASSWORD_FILE, HV_ACPI: True, HV_PAE: True, HV_KERNEL_PATH: "/usr/lib/xen/boot/hvmloader", HV_DEVICE_MODEL: "/usr/lib/xen/bin/qemu-dm", HV_MIGRATION_PORT: 8002, HV_MIGRATION_MODE: HT_MIGRATION_NONLIVE, HV_USE_LOCALTIME: False, HV_BLOCKDEV_PREFIX: "hd", HV_PASSTHROUGH: "", HV_REBOOT_BEHAVIOR: INSTANCE_REBOOT_ALLOWED, HV_CPU_MASK: CPU_PINNING_ALL, HV_CPU_CAP: 0, HV_CPU_WEIGHT: 256, HV_VIF_TYPE: HT_HVM_VIF_IOEMU, HV_VIF_SCRIPT: "", HV_VIRIDIAN: False, HV_XEN_CMD: XEN_CMD_XM, }, HT_KVM: { HV_KVM_PATH: KVM_PATH, HV_KERNEL_PATH: KVM_KERNEL, HV_INITRD_PATH: "", HV_KERNEL_ARGS: "ro", HV_ROOT_PATH: "/dev/vda1", HV_ACPI: True, HV_SERIAL_CONSOLE: True, HV_SERIAL_SPEED: 38400, HV_VNC_BIND_ADDRESS: "", HV_VNC_TLS: False, HV_VNC_X509: "", HV_VNC_X509_VERIFY: False, HV_VNC_PASSWORD_FILE: "", HV_KVM_SPICE_BIND: "", HV_KVM_SPICE_IP_VERSION: IFACE_NO_IP_VERSION_SPECIFIED, HV_KVM_SPICE_PASSWORD_FILE: "", HV_KVM_SPICE_LOSSLESS_IMG_COMPR: "", HV_KVM_SPICE_JPEG_IMG_COMPR: "", HV_KVM_SPICE_ZLIB_GLZ_IMG_COMPR: "", HV_KVM_SPICE_STREAMING_VIDEO_DETECTION: "", HV_KVM_SPICE_AUDIO_COMPR: True, HV_KVM_SPICE_USE_TLS: False, HV_KVM_SPICE_TLS_CIPHERS: OPENSSL_CIPHERS, HV_KVM_SPICE_USE_VDAGENT: True, HV_KVM_FLOPPY_IMAGE_PATH: "", HV_CDROM_IMAGE_PATH: "", HV_KVM_CDROM2_IMAGE_PATH: "", HV_BOOT_ORDER: HT_BO_DISK, HV_NIC_TYPE: HT_NIC_PARAVIRTUAL, HV_DISK_TYPE: HT_DISK_PARAVIRTUAL, HV_KVM_CDROM_DISK_TYPE: "", HV_USB_MOUSE: "", HV_KEYMAP: "", HV_MIGRATION_PORT: 8102, HV_MIGRATION_BANDWIDTH: 32, # MiB/s HV_MIGRATION_DOWNTIME: 30, # ms HV_MIGRATION_MODE: HT_MIGRATION_LIVE, HV_USE_LOCALTIME: False, HV_DISK_CACHE: HT_CACHE_DEFAULT, HV_SECURITY_MODEL: HT_SM_NONE, HV_SECURITY_DOMAIN: "", HV_KVM_FLAG: "", HV_VHOST_NET: False, HV_KVM_USE_CHROOT: False, HV_MEM_PATH: "", HV_REBOOT_BEHAVIOR: INSTANCE_REBOOT_ALLOWED, HV_CPU_MASK: CPU_PINNING_ALL, HV_CPU_TYPE: "", HV_CPU_CORES: 0, HV_CPU_THREADS: 0, HV_CPU_SOCKETS: 0, HV_SOUNDHW: "", HV_USB_DEVICES: "", HV_VGA: "", HV_KVM_EXTRA: "", HV_KVM_MACHINE_VERSION: "", HV_VNET_HDR: True, }, HT_FAKE: { HV_MIGRATION_MODE: HT_MIGRATION_LIVE, }, HT_CHROOT: { HV_INIT_SCRIPT: "/ganeti-chroot", }, HT_LXC: { HV_CPU_MASK: "", }, } HVC_GLOBALS = compat.UniqueFrozenset([ HV_MIGRATION_PORT, HV_MIGRATION_BANDWIDTH, HV_MIGRATION_MODE, HV_XEN_CMD, ]) BEC_DEFAULTS = { BE_MINMEM: 128, BE_MAXMEM: 128, BE_VCPUS: 1, BE_AUTO_BALANCE: True, BE_ALWAYS_FAILOVER: False, BE_SPINDLE_USE: 1, } NDC_DEFAULTS = { ND_OOB_PROGRAM: "", ND_SPINDLE_COUNT: 1, ND_EXCLUSIVE_STORAGE: False, } NDC_GLOBALS = compat.UniqueFrozenset([ ND_EXCLUSIVE_STORAGE, ]) DISK_LD_DEFAULTS = { DT_DRBD8: { LDP_RESYNC_RATE: CLASSIC_DRBD_SYNC_SPEED, LDP_BARRIERS: _autoconf.DRBD_BARRIERS, LDP_NO_META_FLUSH: _autoconf.DRBD_NO_META_FLUSH, LDP_DEFAULT_METAVG: DEFAULT_VG, LDP_DISK_CUSTOM: "", LDP_NET_CUSTOM: "", LDP_PROTOCOL: DRBD_DEFAULT_NET_PROTOCOL, LDP_DYNAMIC_RESYNC: False, # The default values for the DRBD dynamic resync speed algorithm # are taken from the drbsetup 8.3.11 man page, except for # c-plan-ahead (that we don't need to set to 0, because we have a # separate option to enable it) and for c-max-rate, that we cap to # the default value for the static resync rate. LDP_PLAN_AHEAD: 20, # ds LDP_FILL_TARGET: 0, # sectors LDP_DELAY_TARGET: 1, # ds LDP_MAX_RATE: CLASSIC_DRBD_SYNC_SPEED, # KiB/s LDP_MIN_RATE: 4 * 1024, # KiB/s }, DT_PLAIN: { LDP_STRIPES: _autoconf.LVM_STRIPECOUNT }, DT_FILE: {}, DT_SHARED_FILE: {}, DT_BLOCK: {}, DT_RBD: { LDP_POOL: "rbd" }, DT_EXT: {}, } # readability shortcuts _LV_DEFAULTS = DISK_LD_DEFAULTS[DT_PLAIN] _DRBD_DEFAULTS = DISK_LD_DEFAULTS[DT_DRBD8] DISK_DT_DEFAULTS = { DT_PLAIN: { LV_STRIPES: DISK_LD_DEFAULTS[DT_PLAIN][LDP_STRIPES], }, DT_DRBD8: { DRBD_RESYNC_RATE: _DRBD_DEFAULTS[LDP_RESYNC_RATE], DRBD_DATA_STRIPES: _LV_DEFAULTS[LDP_STRIPES], DRBD_META_STRIPES: _LV_DEFAULTS[LDP_STRIPES], DRBD_DISK_BARRIERS: _DRBD_DEFAULTS[LDP_BARRIERS], DRBD_META_BARRIERS: _DRBD_DEFAULTS[LDP_NO_META_FLUSH], DRBD_DEFAULT_METAVG: _DRBD_DEFAULTS[LDP_DEFAULT_METAVG], DRBD_DISK_CUSTOM: _DRBD_DEFAULTS[LDP_DISK_CUSTOM], DRBD_NET_CUSTOM: _DRBD_DEFAULTS[LDP_NET_CUSTOM], DRBD_PROTOCOL: _DRBD_DEFAULTS[LDP_PROTOCOL], DRBD_DYNAMIC_RESYNC: _DRBD_DEFAULTS[LDP_DYNAMIC_RESYNC], DRBD_PLAN_AHEAD: _DRBD_DEFAULTS[LDP_PLAN_AHEAD], DRBD_FILL_TARGET: _DRBD_DEFAULTS[LDP_FILL_TARGET], DRBD_DELAY_TARGET: _DRBD_DEFAULTS[LDP_DELAY_TARGET], DRBD_MAX_RATE: _DRBD_DEFAULTS[LDP_MAX_RATE], DRBD_MIN_RATE: _DRBD_DEFAULTS[LDP_MIN_RATE], }, DT_DISKLESS: {}, DT_FILE: {}, DT_SHARED_FILE: {}, DT_BLOCK: {}, DT_RBD: { RBD_POOL: DISK_LD_DEFAULTS[DT_RBD][LDP_POOL] }, DT_EXT: {}, } # we don't want to export the shortcuts del _LV_DEFAULTS, _DRBD_DEFAULTS NICC_DEFAULTS = { NIC_MODE: NIC_MODE_BRIDGED, NIC_LINK: DEFAULT_BRIDGE, } # All of the following values are quite arbitrarily - there are no # "good" defaults, these must be customised per-site ISPECS_MINMAX_DEFAULTS = { ISPECS_MIN: { ISPEC_MEM_SIZE: 128, ISPEC_CPU_COUNT: 1, ISPEC_DISK_COUNT: 1, ISPEC_DISK_SIZE: 1024, ISPEC_NIC_COUNT: 1, ISPEC_SPINDLE_USE: 1, }, ISPECS_MAX: { ISPEC_MEM_SIZE: 32768, ISPEC_CPU_COUNT: 8, ISPEC_DISK_COUNT: MAX_DISKS, ISPEC_DISK_SIZE: 1024 * 1024, ISPEC_NIC_COUNT: MAX_NICS, ISPEC_SPINDLE_USE: 12, }, } IPOLICY_DEFAULTS = { ISPECS_MINMAX: [ISPECS_MINMAX_DEFAULTS], ISPECS_STD: { ISPEC_MEM_SIZE: 128, ISPEC_CPU_COUNT: 1, ISPEC_DISK_COUNT: 1, ISPEC_DISK_SIZE: 1024, ISPEC_NIC_COUNT: 1, ISPEC_SPINDLE_USE: 1, }, IPOLICY_DTS: list(DISK_TEMPLATES), IPOLICY_VCPU_RATIO: 4.0, IPOLICY_SPINDLE_RATIO: 32.0, } MASTER_POOL_SIZE_DEFAULT = 10 # Exclusive storage: # Error margin used to compare physical disks PART_MARGIN = .01 # Space reserved when creating instance disks PART_RESERVED = .02 CONFD_PROTOCOL_VERSION = 1 CONFD_REQ_PING = 0 CONFD_REQ_NODE_ROLE_BYNAME = 1 CONFD_REQ_NODE_PIP_BY_INSTANCE_IP = 2 CONFD_REQ_CLUSTER_MASTER = 3 CONFD_REQ_NODE_PIP_LIST = 4 CONFD_REQ_MC_PIP_LIST = 5 CONFD_REQ_INSTANCES_IPS_LIST = 6 CONFD_REQ_NODE_DRBD = 7 CONFD_REQ_NODE_INSTANCES = 8 # Confd request query fields. These are used to narrow down queries. # These must be strings rather than integers, because json-encoding # converts them to strings anyway, as they're used as dict-keys. CONFD_REQQ_LINK = "0" CONFD_REQQ_IP = "1" CONFD_REQQ_IPLIST = "2" CONFD_REQQ_FIELDS = "3" CONFD_REQFIELD_NAME = "0" CONFD_REQFIELD_IP = "1" CONFD_REQFIELD_MNODE_PIP = "2" CONFD_REQS = compat.UniqueFrozenset([ CONFD_REQ_PING, CONFD_REQ_NODE_ROLE_BYNAME, CONFD_REQ_NODE_PIP_BY_INSTANCE_IP, CONFD_REQ_CLUSTER_MASTER, CONFD_REQ_NODE_PIP_LIST, CONFD_REQ_MC_PIP_LIST, CONFD_REQ_INSTANCES_IPS_LIST, CONFD_REQ_NODE_DRBD, ]) CONFD_REPL_STATUS_OK = 0 CONFD_REPL_STATUS_ERROR = 1 CONFD_REPL_STATUS_NOTIMPLEMENTED = 2 CONFD_REPL_STATUSES = compat.UniqueFrozenset([ CONFD_REPL_STATUS_OK, CONFD_REPL_STATUS_ERROR, CONFD_REPL_STATUS_NOTIMPLEMENTED, ]) (CONFD_NODE_ROLE_MASTER, CONFD_NODE_ROLE_CANDIDATE, CONFD_NODE_ROLE_OFFLINE, CONFD_NODE_ROLE_DRAINED, CONFD_NODE_ROLE_REGULAR, ) = range(5) # A few common errors for confd CONFD_ERROR_UNKNOWN_ENTRY = 1 CONFD_ERROR_INTERNAL = 2 CONFD_ERROR_ARGUMENT = 3 # Each request is "salted" by the current timestamp. # This constants decides how many seconds of skew to accept. # TODO: make this a default and allow the value to be more configurable CONFD_MAX_CLOCK_SKEW = 2 * NODE_MAX_CLOCK_SKEW # When we haven't reloaded the config for more than this amount of # seconds, we force a test to see if inotify is betraying us. Using a # prime number to ensure we get less chance of 'same wakeup' with # other processes. CONFD_CONFIG_RELOAD_TIMEOUT = 17 # If we receive more than one update in this amount of microseconds, # we move to polling every RATELIMIT seconds, rather than relying on # inotify, to be able to serve more requests. CONFD_CONFIG_RELOAD_RATELIMIT = 250000 # Magic number prepended to all confd queries. # This allows us to distinguish different types of confd protocols and handle # them. For example by changing this we can move the whole payload to be # compressed, or move away from json. CONFD_MAGIC_FOURCC = "plj0" # By default a confd request is sent to the minimum between this number and all # MCs. 6 was chosen because even in the case of a disastrous 50% response rate, # we should have enough answers to be able to compare more than one. CONFD_DEFAULT_REQ_COVERAGE = 6 # Timeout in seconds to expire pending query request in the confd client # library. We don't actually expect any answer more than 10 seconds after we # sent a request. CONFD_CLIENT_EXPIRE_TIMEOUT = 10 # Maximum UDP datagram size. # On IPv4: 64K - 20 (ip header size) - 8 (udp header size) = 65507 # On IPv6: 64K - 40 (ip6 header size) - 8 (udp header size) = 65487 # (assuming we can't use jumbo frames) # We just set this to 60K, which should be enough MAX_UDP_DATA_SIZE = 61440 # User-id pool minimum/maximum acceptable user-ids. UIDPOOL_UID_MIN = 0 UIDPOOL_UID_MAX = 2 ** 32 - 1 # Assuming 32 bit user-ids # Name or path of the pgrep command PGREP = "pgrep" # Name of the node group that gets created at cluster init or upgrade INITIAL_NODE_GROUP_NAME = "default" # Possible values for NodeGroup.alloc_policy ALLOC_POLICY_PREFERRED = "preferred" ALLOC_POLICY_LAST_RESORT = "last_resort" ALLOC_POLICY_UNALLOCABLE = "unallocable" VALID_ALLOC_POLICIES = [ ALLOC_POLICY_PREFERRED, ALLOC_POLICY_LAST_RESORT, ALLOC_POLICY_UNALLOCABLE, ] # Temporary external/shared storage parameters BLOCKDEV_DRIVER_MANUAL = "manual" # qemu-img path, required for ovfconverter QEMUIMG_PATH = _autoconf.QEMUIMG_PATH # Whether htools was enabled at compilation time HTOOLS = _autoconf.HTOOLS # The hail iallocator IALLOC_HAIL = "hail" # Fake opcodes for functions that have hooks attached to them via # backend.RunLocalHooks FAKE_OP_MASTER_TURNUP = "OP_CLUSTER_IP_TURNUP" FAKE_OP_MASTER_TURNDOWN = "OP_CLUSTER_IP_TURNDOWN" # SSH key types SSHK_RSA = "rsa" SSHK_DSA = "dsa" SSHK_ALL = compat.UniqueFrozenset([SSHK_RSA, SSHK_DSA]) # SSH authorized key types SSHAK_RSA = "ssh-rsa" SSHAK_DSS = "ssh-dss" SSHAK_ALL = compat.UniqueFrozenset([SSHAK_RSA, SSHAK_DSS]) # SSH setup SSHS_CLUSTER_NAME = "cluster_name" SSHS_SSH_HOST_KEY = "ssh_host_key" SSHS_SSH_ROOT_KEY = "ssh_root_key" SSHS_NODE_DAEMON_CERTIFICATE = "node_daemon_certificate" #: Key files for SSH daemon SSH_DAEMON_KEYFILES = { SSHK_RSA: (pathutils.SSH_HOST_RSA_PRIV, pathutils.SSH_HOST_RSA_PUB), SSHK_DSA: (pathutils.SSH_HOST_DSA_PRIV, pathutils.SSH_HOST_DSA_PUB), } # Node daemon setup NDS_CLUSTER_NAME = "cluster_name" NDS_NODE_DAEMON_CERTIFICATE = "node_daemon_certificate" NDS_SSCONF = "ssconf" NDS_START_NODE_DAEMON = "start_node_daemon" # Path generating random UUID RANDOM_UUID_FILE = "/proc/sys/kernel/random/uuid" # Regex string for verifying a UUID UUID_REGEX = "^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$" # Auto-repair tag prefixes AUTO_REPAIR_TAG_PREFIX = "ganeti:watcher:autorepair:" AUTO_REPAIR_TAG_ENABLED = AUTO_REPAIR_TAG_PREFIX AUTO_REPAIR_TAG_SUSPENDED = AUTO_REPAIR_TAG_ENABLED + "suspend:" AUTO_REPAIR_TAG_PENDING = AUTO_REPAIR_TAG_PREFIX + "pending:" AUTO_REPAIR_TAG_RESULT = AUTO_REPAIR_TAG_PREFIX + "result:" # Auto-repair levels AUTO_REPAIR_FIX_STORAGE = "fix-storage" AUTO_REPAIR_MIGRATE = "migrate" AUTO_REPAIR_FAILOVER = "failover" AUTO_REPAIR_REINSTALL = "reinstall" AUTO_REPAIR_ALL_TYPES = [ AUTO_REPAIR_FIX_STORAGE, AUTO_REPAIR_MIGRATE, AUTO_REPAIR_FAILOVER, AUTO_REPAIR_REINSTALL, ] # Auto-repair results AUTO_REPAIR_SUCCESS = "success" AUTO_REPAIR_FAILURE = "failure" AUTO_REPAIR_ENOPERM = "enoperm" AUTO_REPAIR_ALL_RESULTS = frozenset([ AUTO_REPAIR_SUCCESS, AUTO_REPAIR_FAILURE, AUTO_REPAIR_ENOPERM, ]) # The version identifier for builtin data collectors BUILTIN_DATA_COLLECTOR_VERSION = "B" # The reason trail opcode parameter name OPCODE_REASON = "reason" # The source reasons for the execution of an OpCode OPCODE_REASON_SRC_CLIENT = "gnt:client" OPCODE_REASON_SRC_NODED = "gnt:daemon:noded" OPCODE_REASON_SRC_OPCODE = "gnt:opcode" OPCODE_REASON_SRC_RLIB2 = "gnt:library:rlib2" OPCODE_REASON_SRC_USER = "gnt:user" OPCODE_REASON_SOURCES = compat.UniqueFrozenset([ OPCODE_REASON_SRC_CLIENT, OPCODE_REASON_SRC_NODED, OPCODE_REASON_SRC_OPCODE, OPCODE_REASON_SRC_RLIB2, OPCODE_REASON_SRC_USER, ]) DISKSTATS_FILE = "/proc/diskstats" # Do not re-export imported modules del re, _vcsversion, _autoconf, socket, pathutils, compat ganeti-2.9.3/lib/ssconf.py0000644000000000000000000003112712271422343015411 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Global Configuration data for Ganeti. This module provides the interface to a special case of cluster configuration data, which is mostly static and available to all nodes. """ import sys import errno import logging from ganeti import compat from ganeti import errors from ganeti import constants from ganeti import utils from ganeti import netutils from ganeti import pathutils SSCONF_LOCK_TIMEOUT = 10 #: Valid ssconf keys _VALID_KEYS = compat.UniqueFrozenset([ constants.SS_CLUSTER_NAME, constants.SS_CLUSTER_TAGS, constants.SS_FILE_STORAGE_DIR, constants.SS_SHARED_FILE_STORAGE_DIR, constants.SS_MASTER_CANDIDATES, constants.SS_MASTER_CANDIDATES_IPS, constants.SS_MASTER_IP, constants.SS_MASTER_NETDEV, constants.SS_MASTER_NETMASK, constants.SS_MASTER_NODE, constants.SS_NODE_LIST, constants.SS_NODE_PRIMARY_IPS, constants.SS_NODE_SECONDARY_IPS, constants.SS_OFFLINE_NODES, constants.SS_ONLINE_NODES, constants.SS_PRIMARY_IP_FAMILY, constants.SS_INSTANCE_LIST, constants.SS_RELEASE_VERSION, constants.SS_HYPERVISOR_LIST, constants.SS_MAINTAIN_NODE_HEALTH, constants.SS_UID_POOL, constants.SS_NODEGROUPS, constants.SS_NETWORKS, constants.SS_HVPARAMS_XEN_PVM, constants.SS_HVPARAMS_XEN_FAKE, constants.SS_HVPARAMS_XEN_HVM, constants.SS_HVPARAMS_XEN_KVM, constants.SS_HVPARAMS_XEN_CHROOT, constants.SS_HVPARAMS_XEN_LXC, ]) #: Maximum size for ssconf files _MAX_SIZE = 128 * 1024 def ReadSsconfFile(filename): """Reads an ssconf file and verifies its size. @type filename: string @param filename: Path to file @rtype: string @return: File contents without newlines at the end @raise RuntimeError: When the file size exceeds L{_MAX_SIZE} """ statcb = utils.FileStatHelper() data = utils.ReadFile(filename, size=_MAX_SIZE, preread=statcb) if statcb.st.st_size > _MAX_SIZE: msg = ("File '%s' has a size of %s bytes (up to %s allowed)" % (filename, statcb.st.st_size, _MAX_SIZE)) raise RuntimeError(msg) return data.rstrip("\n") class SimpleStore(object): """Interface to static cluster data. This is different that the config.ConfigWriter and SimpleConfigReader classes in that it holds data that will always be present, even on nodes which don't have all the cluster data. Other particularities of the datastore: - keys are restricted to predefined values """ def __init__(self, cfg_location=None, _lockfile=pathutils.SSCONF_LOCK_FILE): if cfg_location is None: self._cfg_dir = pathutils.DATA_DIR else: self._cfg_dir = cfg_location self._lockfile = _lockfile def KeyToFilename(self, key): """Convert a given key into filename. """ if key not in _VALID_KEYS: raise errors.ProgrammerError("Invalid key requested from SSConf: '%s'" % str(key)) filename = self._cfg_dir + "/" + constants.SSCONF_FILEPREFIX + key return filename def _ReadFile(self, key, default=None): """Generic routine to read keys. This will read the file which holds the value requested. Errors will be changed into ConfigurationErrors. """ filename = self.KeyToFilename(key) try: return ReadSsconfFile(filename) except EnvironmentError, err: if err.errno == errno.ENOENT and default is not None: return default raise errors.ConfigurationError("Can't read ssconf file %s: %s" % (filename, str(err))) def ReadAll(self): """Reads all keys and returns their values. @rtype: dict @return: Dictionary, ssconf key as key, value as value """ result = [] for key in _VALID_KEYS: try: value = self._ReadFile(key) except errors.ConfigurationError: # Ignore non-existing files pass else: result.append((key, value)) return dict(result) def WriteFiles(self, values, dry_run=False): """Writes ssconf files used by external scripts. @type values: dict @param values: Dictionary of (name, value) @type dry_run boolean @param dry_run: Whether to perform a dry run """ ssconf_lock = utils.FileLock.Open(self._lockfile) # Get lock while writing files ssconf_lock.Exclusive(blocking=True, timeout=SSCONF_LOCK_TIMEOUT) try: for name, value in values.iteritems(): if value and not value.endswith("\n"): value += "\n" if len(value) > _MAX_SIZE: msg = ("Value '%s' has a length of %s bytes, but only up to %s are" " allowed" % (name, len(value), _MAX_SIZE)) raise errors.ConfigurationError(msg) utils.WriteFile(self.KeyToFilename(name), data=value, mode=constants.SS_FILE_PERMS, dry_run=dry_run) finally: ssconf_lock.Unlock() def GetFileList(self): """Return the list of all config files. This is used for computing node replication data. """ return [self.KeyToFilename(key) for key in _VALID_KEYS] def GetClusterName(self): """Get the cluster name. """ return self._ReadFile(constants.SS_CLUSTER_NAME) def GetFileStorageDir(self): """Get the file storage dir. """ return self._ReadFile(constants.SS_FILE_STORAGE_DIR) def GetSharedFileStorageDir(self): """Get the shared file storage dir. """ return self._ReadFile(constants.SS_SHARED_FILE_STORAGE_DIR) def GetMasterCandidates(self): """Return the list of master candidates. """ data = self._ReadFile(constants.SS_MASTER_CANDIDATES) nl = data.splitlines(False) return nl def GetMasterCandidatesIPList(self): """Return the list of master candidates' primary IP. """ data = self._ReadFile(constants.SS_MASTER_CANDIDATES_IPS) nl = data.splitlines(False) return nl def GetMasterIP(self): """Get the IP of the master node for this cluster. """ return self._ReadFile(constants.SS_MASTER_IP) def GetMasterNetdev(self): """Get the netdev to which we'll add the master ip. """ return self._ReadFile(constants.SS_MASTER_NETDEV) def GetMasterNetmask(self): """Get the master netmask. """ try: return self._ReadFile(constants.SS_MASTER_NETMASK) except errors.ConfigurationError: family = self.GetPrimaryIPFamily() ipcls = netutils.IPAddress.GetClassFromIpFamily(family) return ipcls.iplen def GetMasterNode(self): """Get the hostname of the master node for this cluster. """ return self._ReadFile(constants.SS_MASTER_NODE) def GetNodeList(self): """Return the list of cluster nodes. """ data = self._ReadFile(constants.SS_NODE_LIST) nl = data.splitlines(False) return nl def GetNodePrimaryIPList(self): """Return the list of cluster nodes' primary IP. """ data = self._ReadFile(constants.SS_NODE_PRIMARY_IPS) nl = data.splitlines(False) return nl def GetNodeSecondaryIPList(self): """Return the list of cluster nodes' secondary IP. """ data = self._ReadFile(constants.SS_NODE_SECONDARY_IPS) nl = data.splitlines(False) return nl def GetNodegroupList(self): """Return the list of nodegroups. """ data = self._ReadFile(constants.SS_NODEGROUPS) nl = data.splitlines(False) return nl def GetNetworkList(self): """Return the list of networks. """ data = self._ReadFile(constants.SS_NETWORKS) nl = data.splitlines(False) return nl def GetClusterTags(self): """Return the cluster tags. """ data = self._ReadFile(constants.SS_CLUSTER_TAGS) nl = data.splitlines(False) return nl def GetHypervisorList(self): """Return the list of enabled hypervisors. """ data = self._ReadFile(constants.SS_HYPERVISOR_LIST) nl = data.splitlines(False) return nl def GetHvparamsForHypervisor(self, hvname): """Return the hypervisor parameters of the given hypervisor. @type hvname: string @param hvname: name of the hypervisor, must be in C{constants.HYPER_TYPES} @rtype: dict of strings @returns: dictionary with hypervisor parameters """ data = self._ReadFile(constants.SS_HVPARAMS_PREF + hvname) lines = data.splitlines(False) hvparams = {} for line in lines: (key, value) = line.split("=") hvparams[key] = value return hvparams def GetHvparams(self): """Return the hypervisor parameters of all hypervisors. @rtype: dict of dict of strings @returns: dictionary mapping hypervisor names to hvparams """ all_hvparams = {} for hv in constants.HYPER_TYPES: all_hvparams[hv] = self.GetHvparamsForHypervisor(hv) return all_hvparams def GetMaintainNodeHealth(self): """Return the value of the maintain_node_health option. """ data = self._ReadFile(constants.SS_MAINTAIN_NODE_HEALTH) # we rely on the bool serialization here return data == "True" def GetUidPool(self): """Return the user-id pool definition string. The separator character is a newline. The return value can be parsed using uidpool.ParseUidPool():: ss = ssconf.SimpleStore() uid_pool = uidpool.ParseUidPool(ss.GetUidPool(), separator="\\n") """ data = self._ReadFile(constants.SS_UID_POOL) return data def GetPrimaryIPFamily(self): """Return the cluster-wide primary address family. """ try: return int(self._ReadFile(constants.SS_PRIMARY_IP_FAMILY, default=netutils.IP4Address.family)) except (ValueError, TypeError), err: raise errors.ConfigurationError("Error while trying to parse primary IP" " family: %s" % err) def WriteSsconfFiles(values, dry_run=False): """Update all ssconf files. Wrapper around L{SimpleStore.WriteFiles}. """ SimpleStore().WriteFiles(values, dry_run=dry_run) def GetMasterAndMyself(ss=None): """Get the master node and my own hostname. This can be either used for a 'soft' check (compared to CheckMaster, which exits) or just for computing both at the same time. The function does not handle any errors, these should be handled in the caller (errors.ConfigurationError, errors.ResolverError). @param ss: either a sstore.SimpleConfigReader or a sstore.SimpleStore instance @rtype: tuple @return: a tuple (master node name, my own name) """ if ss is None: ss = SimpleStore() return ss.GetMasterNode(), netutils.Hostname.GetSysName() def CheckMaster(debug, ss=None): """Checks the node setup. If this is the master, the function will return. Otherwise it will exit with an exit code based on the node status. """ try: master_name, myself = GetMasterAndMyself(ss) except errors.ConfigurationError, err: print "Cluster configuration incomplete: '%s'" % str(err) sys.exit(constants.EXIT_NODESETUP_ERROR) except errors.ResolverError, err: sys.stderr.write("Cannot resolve my own name (%s)\n" % err.args[0]) sys.exit(constants.EXIT_NODESETUP_ERROR) if myself != master_name: if debug: sys.stderr.write("Not master, exiting.\n") sys.exit(constants.EXIT_NOTMASTER) def VerifyClusterName(name, _cfg_location=None): """Verifies cluster name against a local cluster name. @type name: string @param name: Cluster name """ sstore = SimpleStore(cfg_location=_cfg_location) try: local_name = sstore.GetClusterName() except errors.ConfigurationError, err: logging.debug("Can't get local cluster name: %s", err) else: if name != local_name: raise errors.GenericError("Current cluster name is '%s'" % local_name) def VerifyKeys(keys): """Raises an exception if unknown ssconf keys are given. @type keys: sequence @param keys: Key names to verify @raise errors.GenericError: When invalid keys were found """ invalid = frozenset(keys) - _VALID_KEYS if invalid: raise errors.GenericError("Invalid ssconf keys: %s" % utils.CommaJoin(sorted(invalid))) ganeti-2.9.3/lib/ht.py0000644000000000000000000002534012271422343014531 0ustar00rootroot00000000000000# # # Copyright (C) 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module implementing the parameter types code.""" import re import operator from ganeti import compat from ganeti import utils from ganeti import constants _PAREN_RE = re.compile("^[a-zA-Z0-9_-]+$") def Parens(text): """Enclose text in parens if necessary. @param text: Text """ text = str(text) if _PAREN_RE.match(text): return text else: return "(%s)" % text class _WrapperBase(object): __slots__ = [ "_fn", "_text", ] def __init__(self, text, fn): """Initializes this class. @param text: Description @param fn: Wrapped function """ assert text.strip() self._text = text self._fn = fn def __call__(self, *args): return self._fn(*args) class _DescWrapper(_WrapperBase): """Wrapper class for description text. """ def __str__(self): return self._text class _CommentWrapper(_WrapperBase): """Wrapper class for comment. """ def __str__(self): return "%s [%s]" % (self._fn, self._text) def WithDesc(text): """Builds wrapper class with description text. @type text: string @param text: Description text @return: Callable class """ assert text[0] == text[0].upper() return compat.partial(_DescWrapper, text) def Comment(text): """Builds wrapper for adding comment to description text. @type text: string @param text: Comment text @return: Callable class """ assert not frozenset(text).intersection("[]") return compat.partial(_CommentWrapper, text) def CombinationDesc(op, args, fn): """Build description for combinating operator. @type op: string @param op: Operator as text (e.g. "and") @type args: list @param args: Operator arguments @type fn: callable @param fn: Wrapped function """ # Some type descriptions are rather long. If "None" is listed at the # end or somewhere in between it is easily missed. Therefore it should # be at the beginning, e.g. "None or (long description)". if __debug__ and TNone in args and args.index(TNone) > 0: raise Exception("TNone must be listed first") if len(args) == 1: descr = str(args[0]) else: descr = (" %s " % op).join(Parens(i) for i in args) return WithDesc(descr)(fn) # Modifiable default values; need to define these here before the # actual LUs @WithDesc(str([])) def EmptyList(): """Returns an empty list. """ return [] @WithDesc(str({})) def EmptyDict(): """Returns an empty dict. """ return {} #: The without-default default value NoDefault = object() #: The no-type (value too complex to check it in the type system) NoType = object() # Some basic types @WithDesc("Anything") def TAny(_): """Accepts any value. """ return True @WithDesc("NotNone") def TNotNone(val): """Checks if the given value is not None. """ return val is not None @WithDesc("None") def TNone(val): """Checks if the given value is None. """ return val is None @WithDesc("ValueNone") def TValueNone(val): """Checks if the given value is L{constants.VALUE_NONE}. """ return val == constants.VALUE_NONE @WithDesc("Boolean") def TBool(val): """Checks if the given value is a boolean. """ return isinstance(val, bool) @WithDesc("Integer") def TInt(val): """Checks if the given value is an integer. """ # For backwards compatibility with older Python versions, boolean values are # also integers and should be excluded in this test. # # >>> (isinstance(False, int), isinstance(True, int)) # (True, True) return isinstance(val, (int, long)) and not isinstance(val, bool) @WithDesc("Float") def TFloat(val): """Checks if the given value is a float. """ return isinstance(val, float) @WithDesc("String") def TString(val): """Checks if the given value is a string. """ return isinstance(val, basestring) @WithDesc("EvalToTrue") def TTrue(val): """Checks if a given value evaluates to a boolean True value. """ return bool(val) def TElemOf(target_list): """Builds a function that checks if a given value is a member of a list. """ def fn(val): return val in target_list return WithDesc("OneOf %s" % (utils.CommaJoin(target_list), ))(fn) # Container types @WithDesc("List") def TList(val): """Checks if the given value is a list. """ return isinstance(val, list) @WithDesc("Tuple") def TTuple(val): """Checks if the given value is a tuple. """ return isinstance(val, tuple) @WithDesc("Dictionary") def TDict(val): """Checks if the given value is a dictionary. """ return isinstance(val, dict) def TIsLength(size): """Check is the given container is of the given size. """ def fn(container): return len(container) == size return WithDesc("Length %s" % (size, ))(fn) # Combinator types def TAnd(*args): """Combine multiple functions using an AND operation. """ def fn(val): return compat.all(t(val) for t in args) return CombinationDesc("and", args, fn) def TOr(*args): """Combine multiple functions using an OR operation. """ def fn(val): return compat.any(t(val) for t in args) return CombinationDesc("or", args, fn) def TMap(fn, test): """Checks that a modified version of the argument passes the given test. """ return WithDesc("Result of %s must be %s" % (Parens(fn), Parens(test)))(lambda val: test(fn(val))) def TRegex(pobj): """Checks whether a string matches a specific regular expression. @param pobj: Compiled regular expression as returned by C{re.compile} """ desc = WithDesc("String matching regex \"%s\"" % pobj.pattern.encode("string_escape")) return desc(TAnd(TString, pobj.match)) def TMaybe(test): """Wrap a test in a TOr(TNone, test). This makes it easier to define TMaybe* types. """ return TOr(TNone, test) def TMaybeValueNone(test): """Used for unsetting values. """ return TMaybe(TOr(TValueNone, test)) # Type aliases #: a non-empty string TNonEmptyString = WithDesc("NonEmptyString")(TAnd(TString, TTrue)) #: a maybe non-empty string TMaybeString = TMaybe(TNonEmptyString) #: a maybe boolean (bool or none) TMaybeBool = TMaybe(TBool) #: Maybe a dictionary (dict or None) TMaybeDict = TMaybe(TDict) #: Maybe a list (list or None) TMaybeList = TMaybe(TList) #: a non-negative integer (value >= 0) TNonNegativeInt = \ TAnd(TInt, WithDesc("EqualOrGreaterThanZero")(lambda v: v >= 0)) #: a positive integer (value > 0) TPositiveInt = \ TAnd(TInt, WithDesc("GreaterThanZero")(lambda v: v > 0)) #: a maybe positive integer (positive integer or None) TMaybePositiveInt = TMaybe(TPositiveInt) #: a negative integer (value < 0) TNegativeInt = \ TAnd(TInt, WithDesc("LessThanZero")(compat.partial(operator.gt, 0))) #: a positive float TNonNegativeFloat = \ TAnd(TFloat, WithDesc("EqualOrGreaterThanZero")(lambda v: v >= 0.0)) #: Job ID TJobId = WithDesc("JobId")(TOr(TNonNegativeInt, TRegex(re.compile("^%s$" % constants.JOB_ID_TEMPLATE)))) #: Number TNumber = TOr(TInt, TFloat) #: Relative job ID TRelativeJobId = WithDesc("RelativeJobId")(TNegativeInt) def TInstanceOf(cls): """Checks if a given value is an instance of C{cls}. @type cls: class @param cls: Class object """ name = "%s.%s" % (cls.__module__, cls.__name__) desc = WithDesc("Instance of %s" % (Parens(name), )) return desc(lambda val: isinstance(val, cls)) def TListOf(my_type): """Checks if a given value is a list with all elements of the same type. """ desc = WithDesc("List of %s" % (Parens(my_type), )) return desc(TAnd(TList, lambda lst: compat.all(my_type(v) for v in lst))) TMaybeListOf = lambda item_type: TMaybe(TListOf(item_type)) def TDictOf(key_type, val_type): """Checks a dict type for the type of its key/values. """ desc = WithDesc("Dictionary with keys of %s and values of %s" % (Parens(key_type), Parens(val_type))) def fn(container): return (compat.all(key_type(v) for v in container.keys()) and compat.all(val_type(v) for v in container.values())) return desc(TAnd(TDict, fn)) def _TStrictDictCheck(require_all, exclusive, items, val): """Helper function for L{TStrictDict}. """ notfound_fn = lambda _: not exclusive if require_all and not frozenset(val.keys()).issuperset(items.keys()): # Requires items not found in value return False return compat.all(items.get(key, notfound_fn)(value) for (key, value) in val.items()) def TStrictDict(require_all, exclusive, items): """Strict dictionary check with specific keys. @type require_all: boolean @param require_all: Whether all keys in L{items} are required @type exclusive: boolean @param exclusive: Whether only keys listed in L{items} should be accepted @type items: dictionary @param items: Mapping from key (string) to verification function """ descparts = ["Dictionary containing"] if exclusive: descparts.append(" none but the") if require_all: descparts.append(" required") if len(items) == 1: descparts.append(" key ") else: descparts.append(" keys ") descparts.append(utils.CommaJoin("\"%s\" (value %s)" % (key, value) for (key, value) in items.items())) desc = WithDesc("".join(descparts)) return desc(TAnd(TDict, compat.partial(_TStrictDictCheck, require_all, exclusive, items))) def TItems(items): """Checks individual items of a container. If the verified value and the list of expected items differ in length, this check considers only as many items as are contained in the shorter list. Use L{TIsLength} to enforce a certain length. @type items: list @param items: List of checks """ assert items, "Need items" text = ["Item", "item"] desc = WithDesc(utils.CommaJoin("%s %s is %s" % (text[int(idx > 0)], idx, Parens(check)) for (idx, check) in enumerate(items))) return desc(lambda value: compat.all(check(i) for (check, i) in zip(items, value))) ganeti-2.9.3/lib/uidpool.py0000644000000000000000000002724612244641676015614 0ustar00rootroot00000000000000# # # Copyright (C) 2010, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """User-id pool related functions. The user-id pool is cluster-wide configuration option. It is stored as a list of user-id ranges. This module contains functions used for manipulating the user-id pool parameter and for requesting/returning user-ids from the pool. """ import errno import logging import os import random from ganeti import errors from ganeti import constants from ganeti import utils from ganeti import pathutils def ParseUidPool(value, separator=None): """Parse a user-id pool definition. @param value: string representation of the user-id pool. The accepted input format is a list of integer ranges. The boundaries are inclusive. Example: '1000-5000,8000,9000-9010'. @param separator: the separator character between the uids/uid-ranges. Defaults to a comma. @return: a list of integer pairs (lower, higher range boundaries) """ if separator is None: separator = "," ranges = [] for range_def in value.split(separator): if not range_def: # Skip empty strings continue boundaries = range_def.split("-") n_elements = len(boundaries) if n_elements > 2: raise errors.OpPrereqError( "Invalid user-id range definition. Only one hyphen allowed: %s" % boundaries, errors.ECODE_INVAL) try: lower = int(boundaries[0]) except (ValueError, TypeError), err: raise errors.OpPrereqError("Invalid user-id value for lower boundary of" " user-id range: %s" % str(err), errors.ECODE_INVAL) try: higher = int(boundaries[n_elements - 1]) except (ValueError, TypeError), err: raise errors.OpPrereqError("Invalid user-id value for higher boundary of" " user-id range: %s" % str(err), errors.ECODE_INVAL) ranges.append((lower, higher)) ranges.sort() return ranges def AddToUidPool(uid_pool, add_uids): """Add a list of user-ids/user-id ranges to a user-id pool. @param uid_pool: a user-id pool (list of integer tuples) @param add_uids: user-id ranges to be added to the pool (list of integer tuples) """ for uid_range in add_uids: if uid_range not in uid_pool: uid_pool.append(uid_range) uid_pool.sort() def RemoveFromUidPool(uid_pool, remove_uids): """Remove a list of user-ids/user-id ranges from a user-id pool. @param uid_pool: a user-id pool (list of integer tuples) @param remove_uids: user-id ranges to be removed from the pool (list of integer tuples) """ for uid_range in remove_uids: if uid_range not in uid_pool: raise errors.OpPrereqError( "User-id range to be removed is not found in the current" " user-id pool: %s" % str(uid_range), errors.ECODE_INVAL) uid_pool.remove(uid_range) def _FormatUidRange(lower, higher): """Convert a user-id range definition into a string. """ if lower == higher: return str(lower) return "%s-%s" % (lower, higher) def FormatUidPool(uid_pool, separator=None): """Convert the internal representation of the user-id pool into a string. The output format is also accepted by ParseUidPool() @param uid_pool: a list of integer pairs representing UID ranges @param separator: the separator character between the uids/uid-ranges. Defaults to ", ". @return: a string with the formatted results """ if separator is None: separator = ", " return separator.join([_FormatUidRange(lower, higher) for lower, higher in uid_pool]) def CheckUidPool(uid_pool): """Sanity check user-id pool range definition values. @param uid_pool: a list of integer pairs (lower, higher range boundaries) """ for lower, higher in uid_pool: if lower > higher: raise errors.OpPrereqError( "Lower user-id range boundary value (%s)" " is larger than higher boundary value (%s)" % (lower, higher), errors.ECODE_INVAL) if lower < constants.UIDPOOL_UID_MIN: raise errors.OpPrereqError( "Lower user-id range boundary value (%s)" " is smaller than UIDPOOL_UID_MIN (%s)." % (lower, constants.UIDPOOL_UID_MIN), errors.ECODE_INVAL) if higher > constants.UIDPOOL_UID_MAX: raise errors.OpPrereqError( "Higher user-id boundary value (%s)" " is larger than UIDPOOL_UID_MAX (%s)." % (higher, constants.UIDPOOL_UID_MAX), errors.ECODE_INVAL) def ExpandUidPool(uid_pool): """Expands a uid-pool definition to a list of uids. @param uid_pool: a list of integer pairs (lower, higher range boundaries) @return: a list of integers """ uids = set() for lower, higher in uid_pool: uids.update(range(lower, higher + 1)) return list(uids) def _IsUidUsed(uid): """Check if there is any process in the system running with the given user-id @type uid: integer @param uid: the user-id to be checked. """ pgrep_command = [constants.PGREP, "-u", uid] result = utils.RunCmd(pgrep_command) if result.exit_code == 0: return True elif result.exit_code == 1: return False else: raise errors.CommandError("Running pgrep failed. exit code: %s" % result.exit_code) class LockedUid(object): """Class representing a locked user-id in the uid-pool. This binds together a userid and a lock. """ def __init__(self, uid, lock): """Constructor @param uid: a user-id @param lock: a utils.FileLock object """ self._uid = uid self._lock = lock def Unlock(self): # Release the exclusive lock and close the filedescriptor self._lock.Close() def GetUid(self): return self._uid def AsStr(self): return "%s" % self._uid def RequestUnusedUid(all_uids): """Tries to find an unused uid from the uid-pool, locks it and returns it. Usage pattern ============= 1. When starting a process:: from ganeti import ssconf from ganeti import uidpool # Get list of all user-ids in the uid-pool from ssconf ss = ssconf.SimpleStore() uid_pool = uidpool.ParseUidPool(ss.GetUidPool(), separator="\\n") all_uids = set(uidpool.ExpandUidPool(uid_pool)) uid = uidpool.RequestUnusedUid(all_uids) try: # Once the process is started, we can release the file lock uid.Unlock() except ..., err: # Return the UID to the pool uidpool.ReleaseUid(uid) 2. Stopping a process:: from ganeti import uidpool uid = uidpool.ReleaseUid(uid) @type all_uids: set of integers @param all_uids: a set containing all the user-ids in the user-id pool @return: a LockedUid object representing the unused uid. It's the caller's responsibility to unlock the uid once an instance is started with this uid. """ # Create the lock dir if it's not yet present try: utils.EnsureDirs([(pathutils.UIDPOOL_LOCKDIR, 0755)]) except errors.GenericError, err: raise errors.LockError("Failed to create user-id pool lock dir: %s" % err) # Get list of currently used uids from the filesystem try: taken_uids = set() for taken_uid in os.listdir(pathutils.UIDPOOL_LOCKDIR): try: taken_uid = int(taken_uid) except ValueError, err: # Skip directory entries that can't be converted into an integer continue taken_uids.add(taken_uid) except OSError, err: raise errors.LockError("Failed to get list of used user-ids: %s" % err) # Filter out spurious entries from the directory listing taken_uids = all_uids.intersection(taken_uids) # Remove the list of used uids from the list of all uids unused_uids = list(all_uids - taken_uids) if not unused_uids: logging.info("All user-ids in the uid-pool are marked 'taken'") # Randomize the order of the unused user-id list random.shuffle(unused_uids) # Randomize the order of the unused user-id list taken_uids = list(taken_uids) random.shuffle(taken_uids) for uid in (unused_uids + taken_uids): try: # Create the lock file # Note: we don't care if it exists. Only the fact that we can # (or can't) lock it later is what matters. uid_path = utils.PathJoin(pathutils.UIDPOOL_LOCKDIR, str(uid)) lock = utils.FileLock.Open(uid_path) except OSError, err: raise errors.LockError("Failed to create lockfile for user-id %s: %s" % (uid, err)) try: # Try acquiring an exclusive lock on the lock file lock.Exclusive() # Check if there is any process running with this user-id if _IsUidUsed(uid): logging.debug("There is already a process running under" " user-id %s", uid) lock.Unlock() continue return LockedUid(uid, lock) except IOError, err: if err.errno == errno.EAGAIN: # The file is already locked, let's skip it and try another unused uid logging.debug("Lockfile for user-id is already locked %s: %s", uid, err) continue except errors.LockError, err: # There was an unexpected error while trying to lock the file logging.error("Failed to lock the lockfile for user-id %s: %s", uid, err) raise raise errors.LockError("Failed to find an unused user-id") def ReleaseUid(uid): """This should be called when the given user-id is no longer in use. @type uid: LockedUid or integer @param uid: the uid to release back to the pool """ if isinstance(uid, LockedUid): # Make sure we release the exclusive lock, if there is any uid.Unlock() uid_filename = uid.AsStr() else: uid_filename = str(uid) try: uid_path = utils.PathJoin(pathutils.UIDPOOL_LOCKDIR, uid_filename) os.remove(uid_path) except OSError, err: raise errors.LockError("Failed to remove user-id lockfile" " for user-id %s: %s" % (uid_filename, err)) def ExecWithUnusedUid(fn, all_uids, *args, **kwargs): """Execute a callable and provide an unused user-id in its kwargs. This wrapper function provides a simple way to handle the requesting, unlocking and releasing a user-id. "fn" is called by passing a "uid" keyword argument that contains an unused user-id (as an integer) selected from the set of user-ids passed in all_uids. If there is an error while executing "fn", the user-id is returned to the pool. @param fn: a callable that accepts a keyword argument called "uid" @type all_uids: a set of integers @param all_uids: a set containing all user-ids in the user-id pool """ uid = RequestUnusedUid(all_uids) kwargs["uid"] = uid.GetUid() try: return_value = fn(*args, **kwargs) except: # The failure of "callabe" means that starting a process with the uid # failed, so let's put the uid back into the pool. ReleaseUid(uid) raise uid.Unlock() return return_value ganeti-2.9.3/lib/runtime.py0000644000000000000000000001542212271422343015601 0ustar00rootroot00000000000000# # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module implementing configuration details at runtime. """ import grp import pwd import threading import platform from ganeti import constants from ganeti import errors from ganeti import utils _priv = None _priv_lock = threading.Lock() #: Architecture information _arch = None def GetUid(user, _getpwnam): """Retrieve the uid from the database. @type user: string @param user: The username to retrieve @return: The resolved uid """ try: return _getpwnam(user).pw_uid except KeyError, err: raise errors.ConfigurationError("User '%s' not found (%s)" % (user, err)) def GetGid(group, _getgrnam): """Retrieve the gid from the database. @type group: string @param group: The group name to retrieve @return: The resolved gid """ try: return _getgrnam(group).gr_gid except KeyError, err: raise errors.ConfigurationError("Group '%s' not found (%s)" % (group, err)) class GetentResolver: """Resolves Ganeti uids and gids by name. @ivar masterd_uid: The resolved uid of the masterd user @ivar masterd_gid: The resolved gid of the masterd group @ivar confd_uid: The resolved uid of the confd user @ivar confd_gid: The resolved gid of the confd group @ivar luxid_uid: The resolved uid of the luxid user @ivar luxid_gid: The resolved gid of the luxid group @ivar rapi_uid: The resolved uid of the rapi user @ivar rapi_gid: The resolved gid of the rapi group @ivar noded_uid: The resolved uid of the noded user @ivar daemons_gid: The resolved gid of the daemons group @ivar admin_gid: The resolved gid of the admin group """ def __init__(self, _getpwnam=pwd.getpwnam, _getgrnam=grp.getgrnam): """Initialize the resolver. """ # Daemon pairs self.masterd_uid = GetUid(constants.MASTERD_USER, _getpwnam) self.masterd_gid = GetGid(constants.MASTERD_GROUP, _getgrnam) self.confd_uid = GetUid(constants.CONFD_USER, _getpwnam) self.confd_gid = GetGid(constants.CONFD_GROUP, _getgrnam) self.luxid_uid = GetUid(constants.LUXID_USER, _getpwnam) self.luxid_gid = GetGid(constants.LUXID_GROUP, _getgrnam) self.rapi_uid = GetUid(constants.RAPI_USER, _getpwnam) self.rapi_gid = GetGid(constants.RAPI_GROUP, _getgrnam) self.noded_uid = GetUid(constants.NODED_USER, _getpwnam) self.noded_gid = GetGid(constants.NODED_GROUP, _getgrnam) self.mond_uid = GetUid(constants.MOND_USER, _getpwnam) self.mond_gid = GetGid(constants.MOND_GROUP, _getgrnam) # Misc Ganeti groups self.daemons_gid = GetGid(constants.DAEMONS_GROUP, _getgrnam) self.admin_gid = GetGid(constants.ADMIN_GROUP, _getgrnam) self._uid2user = { self.masterd_uid: constants.MASTERD_USER, self.confd_uid: constants.CONFD_USER, self.luxid_uid: constants.LUXID_USER, self.rapi_uid: constants.RAPI_USER, self.noded_uid: constants.NODED_USER, self.mond_uid: constants.MOND_USER, } self._gid2group = { self.masterd_gid: constants.MASTERD_GROUP, self.confd_gid: constants.CONFD_GROUP, self.luxid_gid: constants.LUXID_GROUP, self.rapi_gid: constants.RAPI_GROUP, self.noded_gid: constants.NODED_GROUP, self.mond_gid: constants.MOND_GROUP, self.daemons_gid: constants.DAEMONS_GROUP, self.admin_gid: constants.ADMIN_GROUP, } self._user2uid = utils.InvertDict(self._uid2user) self._group2gid = utils.InvertDict(self._gid2group) def LookupUid(self, uid): """Looks which Ganeti user belongs to this uid. @param uid: The uid to lookup @returns The user name associated with that uid """ try: return self._uid2user[uid] except KeyError: raise errors.ConfigurationError("Unknown Ganeti uid '%d'" % uid) def LookupGid(self, gid): """Looks which Ganeti group belongs to this gid. @param gid: The gid to lookup @returns The group name associated with that gid """ try: return self._gid2group[gid] except KeyError: raise errors.ConfigurationError("Unknown Ganeti gid '%d'" % gid) def LookupUser(self, name): """Looks which uid belongs to this name. @param name: The name to lookup @returns The uid associated with that user name """ try: return self._user2uid[name] except KeyError: raise errors.ConfigurationError("Unknown Ganeti user '%s'" % name) def LookupGroup(self, name): """Looks which gid belongs to this name. @param name: The name to lookup @returns The gid associated with that group name """ try: return self._group2gid[name] except KeyError: raise errors.ConfigurationError("Unknown Ganeti group '%s'" % name) def GetEnts(resolver=GetentResolver): """Singleton wrapper around resolver instance. As this method is accessed by multiple threads at the same time we need to take thread-safety carefully. """ # We need to use the global keyword here global _priv # pylint: disable=W0603 if not _priv: _priv_lock.acquire() try: if not _priv: # W0621: Redefine '_priv' from outer scope (used for singleton) _priv = resolver() # pylint: disable=W0621 finally: _priv_lock.release() return _priv def InitArchInfo(): """Initialize architecture information. We can assume this information never changes during the lifetime of a process, therefore the information can easily be cached. @note: This function uses C{platform.architecture} to retrieve the Python binary architecture and does so by forking to run C{file} (see Python documentation for more information). Therefore it must not be used in a multi-threaded environment. """ global _arch # pylint: disable=W0603 if _arch is not None: raise errors.ProgrammerError("Architecture information can only be" " initialized once") _arch = (platform.architecture()[0], platform.machine()) def GetArchInfo(): """Returns previsouly initialized architecture information. """ if _arch is None: raise errors.ProgrammerError("Architecture information hasn't been" " initialized") return _arch ganeti-2.9.3/lib/masterd/0000755000000000000000000000000012271445544015207 5ustar00rootroot00000000000000ganeti-2.9.3/lib/masterd/instance.py0000644000000000000000000013463012271422343017364 0ustar00rootroot00000000000000# # # Copyright (C) 2010, 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Instance-related functions and classes for masterd. """ import logging import time import OpenSSL from ganeti import constants from ganeti import errors from ganeti import compat from ganeti import utils from ganeti import objects from ganeti import netutils from ganeti import pathutils class _ImportExportError(Exception): """Local exception to report import/export errors. """ class ImportExportTimeouts(object): #: Time until daemon starts writing status file DEFAULT_READY_TIMEOUT = 10 #: Length of time until errors cause hard failure DEFAULT_ERROR_TIMEOUT = 10 #: Time after which daemon must be listening DEFAULT_LISTEN_TIMEOUT = 10 #: Progress update interval DEFAULT_PROGRESS_INTERVAL = 60 __slots__ = [ "error", "ready", "listen", "connect", "progress", ] def __init__(self, connect, listen=DEFAULT_LISTEN_TIMEOUT, error=DEFAULT_ERROR_TIMEOUT, ready=DEFAULT_READY_TIMEOUT, progress=DEFAULT_PROGRESS_INTERVAL): """Initializes this class. @type connect: number @param connect: Timeout for establishing connection @type listen: number @param listen: Timeout for starting to listen for connections @type error: number @param error: Length of time until errors cause hard failure @type ready: number @param ready: Timeout for daemon to become ready @type progress: number @param progress: Progress update interval """ self.error = error self.ready = ready self.listen = listen self.connect = connect self.progress = progress class ImportExportCbBase(object): """Callbacks for disk import/export. """ def ReportListening(self, ie, private, component): """Called when daemon started listening. @type ie: Subclass of L{_DiskImportExportBase} @param ie: Import/export object @param private: Private data passed to import/export object @param component: transfer component name """ def ReportConnected(self, ie, private): """Called when a connection has been established. @type ie: Subclass of L{_DiskImportExportBase} @param ie: Import/export object @param private: Private data passed to import/export object """ def ReportProgress(self, ie, private): """Called when new progress information should be reported. @type ie: Subclass of L{_DiskImportExportBase} @param ie: Import/export object @param private: Private data passed to import/export object """ def ReportFinished(self, ie, private): """Called when a transfer has finished. @type ie: Subclass of L{_DiskImportExportBase} @param ie: Import/export object @param private: Private data passed to import/export object """ class _DiskImportExportBase(object): MODE_TEXT = None def __init__(self, lu, node_uuid, opts, instance, component, timeouts, cbs, private=None): """Initializes this class. @param lu: Logical unit instance @type node_uuid: string @param node_uuid: Node UUID for import @type opts: L{objects.ImportExportOptions} @param opts: Import/export daemon options @type instance: L{objects.Instance} @param instance: Instance object @type component: string @param component: which part of the instance is being imported @type timeouts: L{ImportExportTimeouts} @param timeouts: Timeouts for this import @type cbs: L{ImportExportCbBase} @param cbs: Callbacks @param private: Private data for callback functions """ assert self.MODE_TEXT self._lu = lu self.node_uuid = node_uuid self.node_name = lu.cfg.GetNodeName(node_uuid) self._opts = opts.Copy() self._instance = instance self._component = component self._timeouts = timeouts self._cbs = cbs self._private = private # Set master daemon's timeout in options for import/export daemon assert self._opts.connect_timeout is None self._opts.connect_timeout = timeouts.connect # Parent loop self._loop = None # Timestamps self._ts_begin = None self._ts_connected = None self._ts_finished = None self._ts_cleanup = None self._ts_last_progress = None self._ts_last_error = None # Transfer status self.success = None self.final_message = None # Daemon status self._daemon_name = None self._daemon = None @property def recent_output(self): """Returns the most recent output from the daemon. """ if self._daemon: return "\n".join(self._daemon.recent_output) return None @property def progress(self): """Returns transfer progress information. """ if not self._daemon: return None return (self._daemon.progress_mbytes, self._daemon.progress_throughput, self._daemon.progress_percent, self._daemon.progress_eta) @property def magic(self): """Returns the magic value for this import/export. """ return self._opts.magic @property def active(self): """Determines whether this transport is still active. """ return self.success is None @property def loop(self): """Returns parent loop. @rtype: L{ImportExportLoop} """ return self._loop def SetLoop(self, loop): """Sets the parent loop. @type loop: L{ImportExportLoop} """ if self._loop: raise errors.ProgrammerError("Loop can only be set once") self._loop = loop def _StartDaemon(self): """Starts the import/export daemon. """ raise NotImplementedError() def CheckDaemon(self): """Checks whether daemon has been started and if not, starts it. @rtype: string @return: Daemon name """ assert self._ts_cleanup is None if self._daemon_name is None: assert self._ts_begin is None result = self._StartDaemon() if result.fail_msg: raise _ImportExportError("Failed to start %s on %s: %s" % (self.MODE_TEXT, self.node_name, result.fail_msg)) daemon_name = result.payload logging.info("Started %s '%s' on %s", self.MODE_TEXT, daemon_name, self.node_name) self._ts_begin = time.time() self._daemon_name = daemon_name return self._daemon_name def GetDaemonName(self): """Returns the daemon name. """ assert self._daemon_name, "Daemon has not been started" assert self._ts_cleanup is None return self._daemon_name def Abort(self): """Sends SIGTERM to import/export daemon (if still active). """ if self._daemon_name: self._lu.LogWarning("Aborting %s '%s' on %s", self.MODE_TEXT, self._daemon_name, self.node_uuid) result = self._lu.rpc.call_impexp_abort(self.node_uuid, self._daemon_name) if result.fail_msg: self._lu.LogWarning("Failed to abort %s '%s' on %s: %s", self.MODE_TEXT, self._daemon_name, self.node_uuid, result.fail_msg) return False return True def _SetDaemonData(self, data): """Internal function for updating status daemon data. @type data: L{objects.ImportExportStatus} @param data: Daemon status data """ assert self._ts_begin is not None if not data: if utils.TimeoutExpired(self._ts_begin, self._timeouts.ready): raise _ImportExportError("Didn't become ready after %s seconds" % self._timeouts.ready) return False self._daemon = data return True def SetDaemonData(self, success, data): """Updates daemon status data. @type success: bool @param success: Whether fetching data was successful or not @type data: L{objects.ImportExportStatus} @param data: Daemon status data """ if not success: if self._ts_last_error is None: self._ts_last_error = time.time() elif utils.TimeoutExpired(self._ts_last_error, self._timeouts.error): raise _ImportExportError("Too many errors while updating data") return False self._ts_last_error = None return self._SetDaemonData(data) def CheckListening(self): """Checks whether the daemon is listening. """ raise NotImplementedError() def _GetConnectedCheckEpoch(self): """Returns timeout to calculate connect timeout. """ raise NotImplementedError() def CheckConnected(self): """Checks whether the daemon is connected. @rtype: bool @return: Whether the daemon is connected """ assert self._daemon, "Daemon status missing" if self._ts_connected is not None: return True if self._daemon.connected: self._ts_connected = time.time() # TODO: Log remote peer logging.debug("%s '%s' on %s is now connected", self.MODE_TEXT, self._daemon_name, self.node_uuid) self._cbs.ReportConnected(self, self._private) return True if utils.TimeoutExpired(self._GetConnectedCheckEpoch(), self._timeouts.connect): raise _ImportExportError("Not connected after %s seconds" % self._timeouts.connect) return False def _CheckProgress(self): """Checks whether a progress update should be reported. """ if ((self._ts_last_progress is None or utils.TimeoutExpired(self._ts_last_progress, self._timeouts.progress)) and self._daemon and self._daemon.progress_mbytes is not None and self._daemon.progress_throughput is not None): self._cbs.ReportProgress(self, self._private) self._ts_last_progress = time.time() def CheckFinished(self): """Checks whether the daemon exited. @rtype: bool @return: Whether the transfer is finished """ assert self._daemon, "Daemon status missing" if self._ts_finished: return True if self._daemon.exit_status is None: # TODO: Adjust delay for ETA expiring soon self._CheckProgress() return False self._ts_finished = time.time() self._ReportFinished(self._daemon.exit_status == 0, self._daemon.error_message) return True def _ReportFinished(self, success, message): """Transfer is finished or daemon exited. @type success: bool @param success: Whether the transfer was successful @type message: string @param message: Error message """ assert self.success is None self.success = success self.final_message = message if success: logging.info("%s '%s' on %s succeeded", self.MODE_TEXT, self._daemon_name, self.node_uuid) elif self._daemon_name: self._lu.LogWarning("%s '%s' on %s failed: %s", self.MODE_TEXT, self._daemon_name, self._lu.cfg.GetNodeName(self.node_uuid), message) else: self._lu.LogWarning("%s on %s failed: %s", self.MODE_TEXT, self._lu.cfg.GetNodeName(self.node_uuid), message) self._cbs.ReportFinished(self, self._private) def _Finalize(self): """Makes the RPC call to finalize this import/export. """ return self._lu.rpc.call_impexp_cleanup(self.node_uuid, self._daemon_name) def Finalize(self, error=None): """Finalizes this import/export. """ if self._daemon_name: logging.info("Finalizing %s '%s' on %s", self.MODE_TEXT, self._daemon_name, self.node_uuid) result = self._Finalize() if result.fail_msg: self._lu.LogWarning("Failed to finalize %s '%s' on %s: %s", self.MODE_TEXT, self._daemon_name, self.node_uuid, result.fail_msg) return False # Daemon is no longer running self._daemon_name = None self._ts_cleanup = time.time() if error: self._ReportFinished(False, error) return True class DiskImport(_DiskImportExportBase): MODE_TEXT = "import" def __init__(self, lu, node_uuid, opts, instance, component, dest, dest_args, timeouts, cbs, private=None): """Initializes this class. @param lu: Logical unit instance @type node_uuid: string @param node_uuid: Node name for import @type opts: L{objects.ImportExportOptions} @param opts: Import/export daemon options @type instance: L{objects.Instance} @param instance: Instance object @type component: string @param component: which part of the instance is being imported @param dest: I/O destination @param dest_args: I/O arguments @type timeouts: L{ImportExportTimeouts} @param timeouts: Timeouts for this import @type cbs: L{ImportExportCbBase} @param cbs: Callbacks @param private: Private data for callback functions """ _DiskImportExportBase.__init__(self, lu, node_uuid, opts, instance, component, timeouts, cbs, private) self._dest = dest self._dest_args = dest_args # Timestamps self._ts_listening = None @property def listen_port(self): """Returns the port the daemon is listening on. """ if self._daemon: return self._daemon.listen_port return None def _StartDaemon(self): """Starts the import daemon. """ return self._lu.rpc.call_import_start(self.node_uuid, self._opts, self._instance, self._component, (self._dest, self._dest_args)) def CheckListening(self): """Checks whether the daemon is listening. @rtype: bool @return: Whether the daemon is listening """ assert self._daemon, "Daemon status missing" if self._ts_listening is not None: return True port = self._daemon.listen_port if port is not None: self._ts_listening = time.time() logging.debug("Import '%s' on %s is now listening on port %s", self._daemon_name, self.node_uuid, port) self._cbs.ReportListening(self, self._private, self._component) return True if utils.TimeoutExpired(self._ts_begin, self._timeouts.listen): raise _ImportExportError("Not listening after %s seconds" % self._timeouts.listen) return False def _GetConnectedCheckEpoch(self): """Returns the time since we started listening. """ assert self._ts_listening is not None, \ ("Checking whether an import is connected is only useful" " once it's been listening") return self._ts_listening class DiskExport(_DiskImportExportBase): MODE_TEXT = "export" def __init__(self, lu, node_uuid, opts, dest_host, dest_port, instance, component, source, source_args, timeouts, cbs, private=None): """Initializes this class. @param lu: Logical unit instance @type node_uuid: string @param node_uuid: Node UUID for import @type opts: L{objects.ImportExportOptions} @param opts: Import/export daemon options @type dest_host: string @param dest_host: Destination host name or IP address @type dest_port: number @param dest_port: Destination port number @type instance: L{objects.Instance} @param instance: Instance object @type component: string @param component: which part of the instance is being imported @param source: I/O source @param source_args: I/O source @type timeouts: L{ImportExportTimeouts} @param timeouts: Timeouts for this import @type cbs: L{ImportExportCbBase} @param cbs: Callbacks @param private: Private data for callback functions """ _DiskImportExportBase.__init__(self, lu, node_uuid, opts, instance, component, timeouts, cbs, private) self._dest_host = dest_host self._dest_port = dest_port self._source = source self._source_args = source_args def _StartDaemon(self): """Starts the export daemon. """ return self._lu.rpc.call_export_start(self.node_uuid, self._opts, self._dest_host, self._dest_port, self._instance, self._component, (self._source, self._source_args)) def CheckListening(self): """Checks whether the daemon is listening. """ # Only an import can be listening return True def _GetConnectedCheckEpoch(self): """Returns the time since the daemon started. """ assert self._ts_begin is not None return self._ts_begin def FormatProgress(progress): """Formats progress information for user consumption """ (mbytes, throughput, percent, eta) = progress parts = [ utils.FormatUnit(mbytes, "h"), # Not using FormatUnit as it doesn't support kilobytes "%0.1f MiB/s" % throughput, ] if percent is not None: parts.append("%d%%" % percent) if eta is not None: parts.append("ETA %s" % utils.FormatSeconds(eta)) return utils.CommaJoin(parts) class ImportExportLoop: MIN_DELAY = 1.0 MAX_DELAY = 20.0 def __init__(self, lu): """Initializes this class. """ self._lu = lu self._queue = [] self._pending_add = [] def Add(self, diskie): """Adds an import/export object to the loop. @type diskie: Subclass of L{_DiskImportExportBase} @param diskie: Import/export object """ assert diskie not in self._pending_add assert diskie.loop is None diskie.SetLoop(self) # Adding new objects to a staging list is necessary, otherwise the main # loop gets confused if callbacks modify the queue while the main loop is # iterating over it. self._pending_add.append(diskie) @staticmethod def _CollectDaemonStatus(lu, daemons): """Collects the status for all import/export daemons. """ daemon_status = {} for node_name, names in daemons.iteritems(): result = lu.rpc.call_impexp_status(node_name, names) if result.fail_msg: lu.LogWarning("Failed to get daemon status on %s: %s", node_name, result.fail_msg) continue assert len(names) == len(result.payload) daemon_status[node_name] = dict(zip(names, result.payload)) return daemon_status @staticmethod def _GetActiveDaemonNames(queue): """Gets the names of all active daemons. """ result = {} for diskie in queue: if not diskie.active: continue try: # Start daemon if necessary daemon_name = diskie.CheckDaemon() except _ImportExportError, err: logging.exception("%s failed", diskie.MODE_TEXT) diskie.Finalize(error=str(err)) continue result.setdefault(diskie.node_name, []).append(daemon_name) assert len(queue) >= len(result) assert len(queue) >= sum([len(names) for names in result.itervalues()]) logging.debug("daemons=%r", result) return result def _AddPendingToQueue(self): """Adds all pending import/export objects to the internal queue. """ assert compat.all(diskie not in self._queue and diskie.loop == self for diskie in self._pending_add) self._queue.extend(self._pending_add) del self._pending_add[:] def Run(self): """Utility main loop. """ while True: self._AddPendingToQueue() # Collect all active daemon names daemons = self._GetActiveDaemonNames(self._queue) if not daemons: break # Collection daemon status data data = self._CollectDaemonStatus(self._lu, daemons) # Use data delay = self.MAX_DELAY for diskie in self._queue: if not diskie.active: continue try: try: all_daemon_data = data[diskie.node_name] except KeyError: result = diskie.SetDaemonData(False, None) else: result = \ diskie.SetDaemonData(True, all_daemon_data[diskie.GetDaemonName()]) if not result: # Daemon not yet ready, retry soon delay = min(3.0, delay) continue if diskie.CheckFinished(): # Transfer finished diskie.Finalize() continue # Normal case: check again in 5 seconds delay = min(5.0, delay) if not diskie.CheckListening(): # Not yet listening, retry soon delay = min(1.0, delay) continue if not diskie.CheckConnected(): # Not yet connected, retry soon delay = min(1.0, delay) continue except _ImportExportError, err: logging.exception("%s failed", diskie.MODE_TEXT) diskie.Finalize(error=str(err)) if not compat.any(diskie.active for diskie in self._queue): break # Wait a bit delay = min(self.MAX_DELAY, max(self.MIN_DELAY, delay)) logging.debug("Waiting for %ss", delay) time.sleep(delay) def FinalizeAll(self): """Finalizes all pending transfers. """ success = True for diskie in self._queue: success = diskie.Finalize() and success return success class _TransferInstCbBase(ImportExportCbBase): def __init__(self, lu, feedback_fn, instance, timeouts, src_node_uuid, src_cbs, dest_node_uuid, dest_ip): """Initializes this class. """ ImportExportCbBase.__init__(self) self.lu = lu self.feedback_fn = feedback_fn self.instance = instance self.timeouts = timeouts self.src_node_uuid = src_node_uuid self.src_cbs = src_cbs self.dest_node_uuid = dest_node_uuid self.dest_ip = dest_ip class _TransferInstSourceCb(_TransferInstCbBase): def ReportConnected(self, ie, dtp): """Called when a connection has been established. """ assert self.src_cbs is None assert dtp.src_export == ie assert dtp.dest_import self.feedback_fn("%s is sending data on %s" % (dtp.data.name, ie.node_name)) def ReportProgress(self, ie, dtp): """Called when new progress information should be reported. """ progress = ie.progress if not progress: return self.feedback_fn("%s sent %s" % (dtp.data.name, FormatProgress(progress))) def ReportFinished(self, ie, dtp): """Called when a transfer has finished. """ assert self.src_cbs is None assert dtp.src_export == ie assert dtp.dest_import if ie.success: self.feedback_fn("%s finished sending data" % dtp.data.name) else: self.feedback_fn("%s failed to send data: %s (recent output: %s)" % (dtp.data.name, ie.final_message, ie.recent_output)) dtp.RecordResult(ie.success) cb = dtp.data.finished_fn if cb: cb() # TODO: Check whether sending SIGTERM right away is okay, maybe we should # give the daemon a moment to sort things out if dtp.dest_import and not ie.success: dtp.dest_import.Abort() class _TransferInstDestCb(_TransferInstCbBase): def ReportListening(self, ie, dtp, component): """Called when daemon started listening. """ assert self.src_cbs assert dtp.src_export is None assert dtp.dest_import assert dtp.export_opts self.feedback_fn("%s is now listening, starting export" % dtp.data.name) # Start export on source node de = DiskExport(self.lu, self.src_node_uuid, dtp.export_opts, self.dest_ip, ie.listen_port, self.instance, component, dtp.data.src_io, dtp.data.src_ioargs, self.timeouts, self.src_cbs, private=dtp) ie.loop.Add(de) dtp.src_export = de def ReportConnected(self, ie, dtp): """Called when a connection has been established. """ self.feedback_fn("%s is receiving data on %s" % (dtp.data.name, self.lu.cfg.GetNodeName(self.dest_node_uuid))) def ReportFinished(self, ie, dtp): """Called when a transfer has finished. """ if ie.success: self.feedback_fn("%s finished receiving data" % dtp.data.name) else: self.feedback_fn("%s failed to receive data: %s (recent output: %s)" % (dtp.data.name, ie.final_message, ie.recent_output)) dtp.RecordResult(ie.success) # TODO: Check whether sending SIGTERM right away is okay, maybe we should # give the daemon a moment to sort things out if dtp.src_export and not ie.success: dtp.src_export.Abort() class DiskTransfer(object): def __init__(self, name, src_io, src_ioargs, dest_io, dest_ioargs, finished_fn): """Initializes this class. @type name: string @param name: User-visible name for this transfer (e.g. "disk/0") @param src_io: Source I/O type @param src_ioargs: Source I/O arguments @param dest_io: Destination I/O type @param dest_ioargs: Destination I/O arguments @type finished_fn: callable @param finished_fn: Function called once transfer has finished """ self.name = name self.src_io = src_io self.src_ioargs = src_ioargs self.dest_io = dest_io self.dest_ioargs = dest_ioargs self.finished_fn = finished_fn class _DiskTransferPrivate(object): def __init__(self, data, success, export_opts): """Initializes this class. @type data: L{DiskTransfer} @type success: bool """ self.data = data self.success = success self.export_opts = export_opts self.src_export = None self.dest_import = None def RecordResult(self, success): """Updates the status. One failed part will cause the whole transfer to fail. """ self.success = self.success and success def _GetInstDiskMagic(base, instance_name, index): """Computes the magic value for a disk export or import. @type base: string @param base: Random seed value (can be the same for all disks of a transfer) @type instance_name: string @param instance_name: Name of instance @type index: number @param index: Disk index """ h = compat.sha1_hash() h.update(str(constants.RIE_VERSION)) h.update(base) h.update(instance_name) h.update(str(index)) return h.hexdigest() def TransferInstanceData(lu, feedback_fn, src_node_uuid, dest_node_uuid, dest_ip, instance, all_transfers): """Transfers an instance's data from one node to another. @param lu: Logical unit instance @param feedback_fn: Feedback function @type src_node_uuid: string @param src_node_uuid: Source node UUID @type dest_node_uuid: string @param dest_node_uuid: Destination node UUID @type dest_ip: string @param dest_ip: IP address of destination node @type instance: L{objects.Instance} @param instance: Instance object @type all_transfers: list of L{DiskTransfer} instances @param all_transfers: List of all disk transfers to be made @rtype: list @return: List with a boolean (True=successful, False=failed) for success for each transfer """ # Disable compression for all moves as these are all within the same cluster compress = constants.IEC_NONE src_node_name = lu.cfg.GetNodeName(src_node_uuid) dest_node_name = lu.cfg.GetNodeName(dest_node_uuid) logging.debug("Source node %s, destination node %s, compression '%s'", src_node_name, dest_node_name, compress) timeouts = ImportExportTimeouts(constants.DISK_TRANSFER_CONNECT_TIMEOUT) src_cbs = _TransferInstSourceCb(lu, feedback_fn, instance, timeouts, src_node_uuid, None, dest_node_uuid, dest_ip) dest_cbs = _TransferInstDestCb(lu, feedback_fn, instance, timeouts, src_node_uuid, src_cbs, dest_node_uuid, dest_ip) all_dtp = [] base_magic = utils.GenerateSecret(6) ieloop = ImportExportLoop(lu) try: for idx, transfer in enumerate(all_transfers): if transfer: feedback_fn("Exporting %s from %s to %s" % (transfer.name, src_node_name, dest_node_name)) magic = _GetInstDiskMagic(base_magic, instance.name, idx) opts = objects.ImportExportOptions(key_name=None, ca_pem=None, compress=compress, magic=magic) dtp = _DiskTransferPrivate(transfer, True, opts) di = DiskImport(lu, dest_node_uuid, opts, instance, "disk%d" % idx, transfer.dest_io, transfer.dest_ioargs, timeouts, dest_cbs, private=dtp) ieloop.Add(di) dtp.dest_import = di else: dtp = _DiskTransferPrivate(None, False, None) all_dtp.append(dtp) ieloop.Run() finally: ieloop.FinalizeAll() assert len(all_dtp) == len(all_transfers) assert compat.all((dtp.src_export is None or dtp.src_export.success is not None) and (dtp.dest_import is None or dtp.dest_import.success is not None) for dtp in all_dtp), \ "Not all imports/exports are finalized" return [bool(dtp.success) for dtp in all_dtp] class _RemoteExportCb(ImportExportCbBase): def __init__(self, feedback_fn, disk_count): """Initializes this class. """ ImportExportCbBase.__init__(self) self._feedback_fn = feedback_fn self._dresults = [None] * disk_count @property def disk_results(self): """Returns per-disk results. """ return self._dresults def ReportConnected(self, ie, private): """Called when a connection has been established. """ (idx, _) = private self._feedback_fn("Disk %s is now sending data" % idx) def ReportProgress(self, ie, private): """Called when new progress information should be reported. """ (idx, _) = private progress = ie.progress if not progress: return self._feedback_fn("Disk %s sent %s" % (idx, FormatProgress(progress))) def ReportFinished(self, ie, private): """Called when a transfer has finished. """ (idx, finished_fn) = private if ie.success: self._feedback_fn("Disk %s finished sending data" % idx) else: self._feedback_fn("Disk %s failed to send data: %s (recent output: %s)" % (idx, ie.final_message, ie.recent_output)) self._dresults[idx] = bool(ie.success) if finished_fn: finished_fn() class ExportInstanceHelper: def __init__(self, lu, feedback_fn, instance): """Initializes this class. @param lu: Logical unit instance @param feedback_fn: Feedback function @type instance: L{objects.Instance} @param instance: Instance object """ self._lu = lu self._feedback_fn = feedback_fn self._instance = instance self._snap_disks = [] self._removed_snaps = [False] * len(instance.disks) def CreateSnapshots(self): """Creates an LVM snapshot for every disk of the instance. """ assert not self._snap_disks instance = self._instance src_node = instance.primary_node for idx, disk in enumerate(instance.disks): self._feedback_fn("Creating a snapshot of disk/%s on node %s" % (idx, src_node)) # result.payload will be a snapshot of an lvm leaf of the one we # passed result = self._lu.rpc.call_blockdev_snapshot(src_node, (disk, instance)) new_dev = False msg = result.fail_msg if msg: self._lu.LogWarning("Could not snapshot disk/%s on node %s: %s", idx, src_node, msg) elif (not isinstance(result.payload, (tuple, list)) or len(result.payload) != 2): self._lu.LogWarning("Could not snapshot disk/%s on node %s: invalid" " result '%s'", idx, src_node, result.payload) else: disk_id = tuple(result.payload) disk_params = constants.DISK_LD_DEFAULTS[constants.DT_PLAIN].copy() new_dev = objects.Disk(dev_type=constants.DT_PLAIN, size=disk.size, logical_id=disk_id, physical_id=disk_id, iv_name=disk.iv_name, params=disk_params) self._snap_disks.append(new_dev) assert len(self._snap_disks) == len(instance.disks) assert len(self._removed_snaps) == len(instance.disks) def _RemoveSnapshot(self, disk_index): """Removes an LVM snapshot. @type disk_index: number @param disk_index: Index of the snapshot to be removed """ disk = self._snap_disks[disk_index] if disk and not self._removed_snaps[disk_index]: src_node = self._instance.primary_node self._feedback_fn("Removing snapshot of disk/%s on node %s" % (disk_index, src_node)) result = self._lu.rpc.call_blockdev_remove(src_node, disk) if result.fail_msg: self._lu.LogWarning("Could not remove snapshot for disk/%d from node" " %s: %s", disk_index, src_node, result.fail_msg) else: self._removed_snaps[disk_index] = True def LocalExport(self, dest_node): """Intra-cluster instance export. @type dest_node: L{objects.Node} @param dest_node: Destination node """ instance = self._instance src_node_uuid = instance.primary_node assert len(self._snap_disks) == len(instance.disks) transfers = [] for idx, dev in enumerate(self._snap_disks): if not dev: transfers.append(None) continue path = utils.PathJoin(pathutils.EXPORT_DIR, "%s.new" % instance.name, dev.physical_id[1]) finished_fn = compat.partial(self._TransferFinished, idx) # FIXME: pass debug option from opcode to backend dt = DiskTransfer("snapshot/%s" % idx, constants.IEIO_SCRIPT, (dev, idx), constants.IEIO_FILE, (path, ), finished_fn) transfers.append(dt) # Actually export data dresults = TransferInstanceData(self._lu, self._feedback_fn, src_node_uuid, dest_node.uuid, dest_node.secondary_ip, instance, transfers) assert len(dresults) == len(instance.disks) self._feedback_fn("Finalizing export on %s" % dest_node.name) result = self._lu.rpc.call_finalize_export(dest_node.uuid, instance, self._snap_disks) msg = result.fail_msg fin_resu = not msg if msg: self._lu.LogWarning("Could not finalize export for instance %s" " on node %s: %s", instance.name, dest_node.name, msg) return (fin_resu, dresults) def RemoteExport(self, disk_info, key_name, dest_ca_pem, timeouts): """Inter-cluster instance export. @type disk_info: list @param disk_info: Per-disk destination information @type key_name: string @param key_name: Name of X509 key to use @type dest_ca_pem: string @param dest_ca_pem: Destination X509 CA in PEM format @type timeouts: L{ImportExportTimeouts} @param timeouts: Timeouts for this import """ instance = self._instance assert len(disk_info) == len(instance.disks) cbs = _RemoteExportCb(self._feedback_fn, len(instance.disks)) ieloop = ImportExportLoop(self._lu) try: for idx, (dev, (host, port, magic)) in enumerate(zip(instance.disks, disk_info)): # Decide whether to use IPv6 ipv6 = netutils.IP6Address.IsValid(host) opts = objects.ImportExportOptions(key_name=key_name, ca_pem=dest_ca_pem, magic=magic, ipv6=ipv6) self._feedback_fn("Sending disk %s to %s:%s" % (idx, host, port)) finished_fn = compat.partial(self._TransferFinished, idx) ieloop.Add(DiskExport(self._lu, instance.primary_node, opts, host, port, instance, "disk%d" % idx, constants.IEIO_SCRIPT, (dev, idx), timeouts, cbs, private=(idx, finished_fn))) ieloop.Run() finally: ieloop.FinalizeAll() return (True, cbs.disk_results) def _TransferFinished(self, idx): """Called once a transfer has finished. @type idx: number @param idx: Disk index """ logging.debug("Transfer %s finished", idx) self._RemoveSnapshot(idx) def Cleanup(self): """Remove all snapshots. """ assert len(self._removed_snaps) == len(self._instance.disks) for idx in range(len(self._instance.disks)): self._RemoveSnapshot(idx) class _RemoteImportCb(ImportExportCbBase): def __init__(self, feedback_fn, cds, x509_cert_pem, disk_count, external_address): """Initializes this class. @type cds: string @param cds: Cluster domain secret @type x509_cert_pem: string @param x509_cert_pem: CA used for signing import key @type disk_count: number @param disk_count: Number of disks @type external_address: string @param external_address: External address of destination node """ ImportExportCbBase.__init__(self) self._feedback_fn = feedback_fn self._cds = cds self._x509_cert_pem = x509_cert_pem self._disk_count = disk_count self._external_address = external_address self._dresults = [None] * disk_count self._daemon_port = [None] * disk_count self._salt = utils.GenerateSecret(8) @property def disk_results(self): """Returns per-disk results. """ return self._dresults def _CheckAllListening(self): """Checks whether all daemons are listening. If all daemons are listening, the information is sent to the client. """ if not compat.all(dp is not None for dp in self._daemon_port): return host = self._external_address disks = [] for idx, (port, magic) in enumerate(self._daemon_port): disks.append(ComputeRemoteImportDiskInfo(self._cds, self._salt, idx, host, port, magic)) assert len(disks) == self._disk_count self._feedback_fn(constants.ELOG_REMOTE_IMPORT, { "disks": disks, "x509_ca": self._x509_cert_pem, }) def ReportListening(self, ie, private, _): """Called when daemon started listening. """ (idx, ) = private self._feedback_fn("Disk %s is now listening" % idx) assert self._daemon_port[idx] is None self._daemon_port[idx] = (ie.listen_port, ie.magic) self._CheckAllListening() def ReportConnected(self, ie, private): """Called when a connection has been established. """ (idx, ) = private self._feedback_fn("Disk %s is now receiving data" % idx) def ReportFinished(self, ie, private): """Called when a transfer has finished. """ (idx, ) = private # Daemon is certainly no longer listening self._daemon_port[idx] = None if ie.success: self._feedback_fn("Disk %s finished receiving data" % idx) else: self._feedback_fn(("Disk %s failed to receive data: %s" " (recent output: %s)") % (idx, ie.final_message, ie.recent_output)) self._dresults[idx] = bool(ie.success) def RemoteImport(lu, feedback_fn, instance, pnode, source_x509_ca, cds, timeouts): """Imports an instance from another cluster. @param lu: Logical unit instance @param feedback_fn: Feedback function @type instance: L{objects.Instance} @param instance: Instance object @type pnode: L{objects.Node} @param pnode: Primary node of instance as an object @type source_x509_ca: OpenSSL.crypto.X509 @param source_x509_ca: Import source's X509 CA @type cds: string @param cds: Cluster domain secret @type timeouts: L{ImportExportTimeouts} @param timeouts: Timeouts for this import """ source_ca_pem = OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, source_x509_ca) magic_base = utils.GenerateSecret(6) # Decide whether to use IPv6 ipv6 = netutils.IP6Address.IsValid(pnode.primary_ip) # Create crypto key result = lu.rpc.call_x509_cert_create(instance.primary_node, constants.RIE_CERT_VALIDITY) result.Raise("Can't create X509 key and certificate on %s" % result.node) (x509_key_name, x509_cert_pem) = result.payload try: # Load certificate x509_cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, x509_cert_pem) # Sign certificate signed_x509_cert_pem = \ utils.SignX509Certificate(x509_cert, cds, utils.GenerateSecret(8)) cbs = _RemoteImportCb(feedback_fn, cds, signed_x509_cert_pem, len(instance.disks), pnode.primary_ip) ieloop = ImportExportLoop(lu) try: for idx, dev in enumerate(instance.disks): magic = _GetInstDiskMagic(magic_base, instance.name, idx) # Import daemon options opts = objects.ImportExportOptions(key_name=x509_key_name, ca_pem=source_ca_pem, magic=magic, ipv6=ipv6) ieloop.Add(DiskImport(lu, instance.primary_node, opts, instance, "disk%d" % idx, constants.IEIO_SCRIPT, (dev, idx), timeouts, cbs, private=(idx, ))) ieloop.Run() finally: ieloop.FinalizeAll() finally: # Remove crypto key and certificate result = lu.rpc.call_x509_cert_remove(instance.primary_node, x509_key_name) result.Raise("Can't remove X509 key and certificate on %s" % result.node) return cbs.disk_results def _GetImportExportHandshakeMessage(version): """Returns the handshake message for a RIE protocol version. @type version: number """ return "%s:%s" % (version, constants.RIE_HANDSHAKE) def ComputeRemoteExportHandshake(cds): """Computes the remote import/export handshake. @type cds: string @param cds: Cluster domain secret """ salt = utils.GenerateSecret(8) msg = _GetImportExportHandshakeMessage(constants.RIE_VERSION) return (constants.RIE_VERSION, utils.Sha1Hmac(cds, msg, salt=salt), salt) def CheckRemoteExportHandshake(cds, handshake): """Checks the handshake of a remote import/export. @type cds: string @param cds: Cluster domain secret @type handshake: sequence @param handshake: Handshake sent by remote peer """ try: (version, hmac_digest, hmac_salt) = handshake except (TypeError, ValueError), err: return "Invalid data: %s" % err if not utils.VerifySha1Hmac(cds, _GetImportExportHandshakeMessage(version), hmac_digest, salt=hmac_salt): return "Hash didn't match, clusters don't share the same domain secret" if version != constants.RIE_VERSION: return ("Clusters don't have the same remote import/export protocol" " (local=%s, remote=%s)" % (constants.RIE_VERSION, version)) return None def _GetRieDiskInfoMessage(disk_index, host, port, magic): """Returns the hashed text for import/export disk information. @type disk_index: number @param disk_index: Index of disk (included in hash) @type host: string @param host: Hostname @type port: number @param port: Daemon port @type magic: string @param magic: Magic value """ return "%s:%s:%s:%s" % (disk_index, host, port, magic) def CheckRemoteExportDiskInfo(cds, disk_index, disk_info): """Verifies received disk information for an export. @type cds: string @param cds: Cluster domain secret @type disk_index: number @param disk_index: Index of disk (included in hash) @type disk_info: sequence @param disk_info: Disk information sent by remote peer """ try: (host, port, magic, hmac_digest, hmac_salt) = disk_info except (TypeError, ValueError), err: raise errors.GenericError("Invalid data: %s" % err) if not (host and port and magic): raise errors.GenericError("Missing destination host, port or magic") msg = _GetRieDiskInfoMessage(disk_index, host, port, magic) if not utils.VerifySha1Hmac(cds, msg, hmac_digest, salt=hmac_salt): raise errors.GenericError("HMAC is wrong") if netutils.IP6Address.IsValid(host) or netutils.IP4Address.IsValid(host): destination = host else: destination = netutils.Hostname.GetNormalizedName(host) return (destination, utils.ValidateServiceName(port), magic) def ComputeRemoteImportDiskInfo(cds, salt, disk_index, host, port, magic): """Computes the signed disk information for a remote import. @type cds: string @param cds: Cluster domain secret @type salt: string @param salt: HMAC salt @type disk_index: number @param disk_index: Index of disk (included in hash) @type host: string @param host: Hostname @type port: number @param port: Daemon port @type magic: string @param magic: Magic value """ msg = _GetRieDiskInfoMessage(disk_index, host, port, magic) hmac_digest = utils.Sha1Hmac(cds, msg, salt=salt) return (host, port, magic, hmac_digest, salt) def CalculateGroupIPolicy(cluster, group): """Calculate instance policy for group. """ return cluster.SimpleFillIPolicy(group.ipolicy) def ComputeDiskSize(disk_template, disks): """Compute disk size requirements according to disk template """ # Required free disk space as a function of disk and swap space req_size_dict = { constants.DT_DISKLESS: 0, constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks), # 128 MB are added for drbd metadata for each disk constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + constants.DRBD_META_SIZE for d in disks), constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks), constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks), constants.DT_BLOCK: 0, constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks), constants.DT_EXT: sum(d[constants.IDISK_SIZE] for d in disks), } if disk_template not in req_size_dict: raise errors.ProgrammerError("Disk template '%s' size requirement" " is unknown" % disk_template) return req_size_dict[disk_template] ganeti-2.9.3/lib/masterd/__init__.py0000644000000000000000000000150112230001635017275 0ustar00rootroot00000000000000# # # Copyright (C) 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # empty file for package definition """Masterd-related classes and functions. """ ganeti-2.9.3/lib/masterd/iallocator.py0000644000000000000000000006224012271422343017706 0ustar00rootroot00000000000000# # # Copyright (C) 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module implementing the iallocator code.""" from ganeti import compat from ganeti import constants from ganeti import errors from ganeti import ht from ganeti import outils from ganeti import opcodes from ganeti import rpc from ganeti import serializer from ganeti import utils import ganeti.masterd.instance as gmi _STRING_LIST = ht.TListOf(ht.TString) _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, { # pylint: disable=E1101 # Class '...' has no 'OP_ID' member "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID, opcodes.OpInstanceMigrate.OP_ID, opcodes.OpInstanceReplaceDisks.OP_ID]), }))) _NEVAC_MOVED = \ ht.TListOf(ht.TAnd(ht.TIsLength(3), ht.TItems([ht.TNonEmptyString, ht.TNonEmptyString, ht.TListOf(ht.TNonEmptyString), ]))) _NEVAC_FAILED = \ ht.TListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([ht.TNonEmptyString, ht.TMaybeString, ]))) _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3), ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST])) _INST_NAME = ("name", ht.TNonEmptyString) _INST_UUID = ("inst_uuid", ht.TNonEmptyString) class _AutoReqParam(outils.AutoSlots): """Meta class for request definitions. """ @classmethod def _GetSlots(mcs, attrs): """Extract the slots out of REQ_PARAMS. """ params = attrs.setdefault("REQ_PARAMS", []) return [slot for (slot, _) in params] class IARequestBase(outils.ValidatedSlots): """A generic IAllocator request object. """ __metaclass__ = _AutoReqParam MODE = NotImplemented REQ_PARAMS = [] REQ_RESULT = NotImplemented def __init__(self, **kwargs): """Constructor for IARequestBase. The constructor takes only keyword arguments and will set attributes on this object based on the passed arguments. As such, it means that you should not pass arguments which are not in the REQ_PARAMS attribute for this class. """ outils.ValidatedSlots.__init__(self, **kwargs) self.Validate() def Validate(self): """Validates all parameters of the request. """ assert self.MODE in constants.VALID_IALLOCATOR_MODES for (param, validator) in self.REQ_PARAMS: if not hasattr(self, param): raise errors.OpPrereqError("Request is missing '%s' parameter" % param, errors.ECODE_INVAL) value = getattr(self, param) if not validator(value): raise errors.OpPrereqError(("Request parameter '%s' has invalid" " type %s/value %s") % (param, type(value), value), errors.ECODE_INVAL) def GetRequest(self, cfg): """Gets the request data dict. @param cfg: The configuration instance """ raise NotImplementedError def ValidateResult(self, ia, result): """Validates the result of an request. @param ia: The IAllocator instance @param result: The IAllocator run result @raises ResultValidationError: If validation fails """ if ia.success and not self.REQ_RESULT(result): raise errors.ResultValidationError("iallocator returned invalid result," " expected %s, got %s" % (self.REQ_RESULT, result)) class IAReqInstanceAlloc(IARequestBase): """An instance allocation request. """ # pylint: disable=E1101 MODE = constants.IALLOCATOR_MODE_ALLOC REQ_PARAMS = [ _INST_NAME, ("memory", ht.TNonNegativeInt), ("spindle_use", ht.TNonNegativeInt), ("disks", ht.TListOf(ht.TDict)), ("disk_template", ht.TString), ("os", ht.TString), ("tags", _STRING_LIST), ("nics", ht.TListOf(ht.TDict)), ("vcpus", ht.TInt), ("hypervisor", ht.TString), ("node_whitelist", ht.TMaybeListOf(ht.TNonEmptyString)), ] REQ_RESULT = ht.TList def RequiredNodes(self): """Calculates the required nodes based on the disk_template. """ if self.disk_template in constants.DTS_INT_MIRROR: return 2 else: return 1 def GetRequest(self, cfg): """Requests a new instance. The checks for the completeness of the opcode must have already been done. """ disk_space = gmi.ComputeDiskSize(self.disk_template, self.disks) return { "name": self.name, "disk_template": self.disk_template, "tags": self.tags, "os": self.os, "vcpus": self.vcpus, "memory": self.memory, "spindle_use": self.spindle_use, "disks": self.disks, "disk_space_total": disk_space, "nics": self.nics, "required_nodes": self.RequiredNodes(), "hypervisor": self.hypervisor, } def ValidateResult(self, ia, result): """Validates an single instance allocation request. """ IARequestBase.ValidateResult(self, ia, result) if ia.success and len(result) != self.RequiredNodes(): raise errors.ResultValidationError("iallocator returned invalid number" " of nodes (%s), required %s" % (len(result), self.RequiredNodes())) class IAReqMultiInstanceAlloc(IARequestBase): """An multi instance allocation request. """ # pylint: disable=E1101 MODE = constants.IALLOCATOR_MODE_MULTI_ALLOC REQ_PARAMS = [ ("instances", ht.TListOf(ht.TInstanceOf(IAReqInstanceAlloc))), ] _MASUCCESS = \ ht.TListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([ht.TNonEmptyString, ht.TListOf(ht.TNonEmptyString), ]))) _MAFAILED = ht.TListOf(ht.TNonEmptyString) REQ_RESULT = ht.TAnd(ht.TList, ht.TIsLength(2), ht.TItems([_MASUCCESS, _MAFAILED])) def GetRequest(self, cfg): return { "instances": [iareq.GetRequest(cfg) for iareq in self.instances], } class IAReqRelocate(IARequestBase): """A relocation request. """ # pylint: disable=E1101 MODE = constants.IALLOCATOR_MODE_RELOC REQ_PARAMS = [ _INST_UUID, ("relocate_from_node_uuids", _STRING_LIST), ] REQ_RESULT = ht.TList def GetRequest(self, cfg): """Request an relocation of an instance The checks for the completeness of the opcode must have already been done. """ instance = cfg.GetInstanceInfo(self.inst_uuid) if instance is None: raise errors.ProgrammerError("Unknown instance '%s' passed to" " IAllocator" % self.inst_uuid) if instance.disk_template not in constants.DTS_MIRRORED: raise errors.OpPrereqError("Can't relocate non-mirrored instances", errors.ECODE_INVAL) if (instance.disk_template in constants.DTS_INT_MIRROR and len(instance.secondary_nodes) != 1): raise errors.OpPrereqError("Instance has not exactly one secondary node", errors.ECODE_STATE) disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks] disk_space = gmi.ComputeDiskSize(instance.disk_template, disk_sizes) return { "name": instance.name, "disk_space_total": disk_space, "required_nodes": 1, "relocate_from": cfg.GetNodeNames(self.relocate_from_node_uuids), } def ValidateResult(self, ia, result): """Validates the result of an relocation request. """ IARequestBase.ValidateResult(self, ia, result) node2group = dict((name, ndata["group"]) for (name, ndata) in ia.in_data["nodes"].items()) fn = compat.partial(self._NodesToGroups, node2group, ia.in_data["nodegroups"]) instance = ia.cfg.GetInstanceInfo(self.inst_uuid) request_groups = fn(ia.cfg.GetNodeNames(self.relocate_from_node_uuids) + ia.cfg.GetNodeNames([instance.primary_node])) result_groups = fn(result + ia.cfg.GetNodeNames([instance.primary_node])) if ia.success and not set(result_groups).issubset(request_groups): raise errors.ResultValidationError("Groups of nodes returned by" " iallocator (%s) differ from original" " groups (%s)" % (utils.CommaJoin(result_groups), utils.CommaJoin(request_groups))) @staticmethod def _NodesToGroups(node2group, groups, nodes): """Returns a list of unique group names for a list of nodes. @type node2group: dict @param node2group: Map from node name to group UUID @type groups: dict @param groups: Group information @type nodes: list @param nodes: Node names """ result = set() for node in nodes: try: group_uuid = node2group[node] except KeyError: # Ignore unknown node pass else: try: group = groups[group_uuid] except KeyError: # Can't find group, let's use UUID group_name = group_uuid else: group_name = group["name"] result.add(group_name) return sorted(result) class IAReqNodeEvac(IARequestBase): """A node evacuation request. """ # pylint: disable=E1101 MODE = constants.IALLOCATOR_MODE_NODE_EVAC REQ_PARAMS = [ ("instances", _STRING_LIST), ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)), ] REQ_RESULT = _NEVAC_RESULT def GetRequest(self, cfg): """Get data for node-evacuate requests. """ return { "instances": self.instances, "evac_mode": self.evac_mode, } class IAReqGroupChange(IARequestBase): """A group change request. """ # pylint: disable=E1101 MODE = constants.IALLOCATOR_MODE_CHG_GROUP REQ_PARAMS = [ ("instances", _STRING_LIST), ("target_groups", _STRING_LIST), ] REQ_RESULT = _NEVAC_RESULT def GetRequest(self, cfg): """Get data for node-evacuate requests. """ return { "instances": self.instances, "target_groups": self.target_groups, } class IAllocator(object): """IAllocator framework. An IAllocator instance has three sets of attributes: - cfg that is needed to query the cluster - input data (all members of the _KEYS class attribute are required) - four buffer attributes (in|out_data|text), that represent the input (to the external script) in text and data structure format, and the output from it, again in two formats - the result variables from the script (success, info, nodes) for easy usage """ # pylint: disable=R0902 # lots of instance attributes def __init__(self, cfg, rpc_runner, req): self.cfg = cfg self.rpc = rpc_runner self.req = req # init buffer variables self.in_text = self.out_text = self.in_data = self.out_data = None # init result fields self.success = self.info = self.result = None self._BuildInputData(req) def _ComputeClusterDataNodeInfo(self, node_list, cluster_info, hypervisor_name): """Prepare and execute node info call. @type node_list: list of strings @param node_list: list of nodes' UUIDs @type cluster_info: L{objects.Cluster} @param cluster_info: the cluster's information from the config @type hypervisor_name: string @param hypervisor_name: the hypervisor name @rtype: same as the result of the node info RPC call @return: the result of the node info RPC call """ storage_units_raw = utils.storage.GetStorageUnitsOfCluster( self.cfg, include_spindles=True) storage_units = rpc.PrepareStorageUnitsForNodes(self.cfg, storage_units_raw, node_list) hvspecs = [(hypervisor_name, cluster_info.hvparams[hypervisor_name])] return self.rpc.call_node_info(node_list, storage_units, hvspecs) def _ComputeClusterData(self): """Compute the generic allocator input data. This is the data that is independent of the actual operation. """ cluster_info = self.cfg.GetClusterInfo() # cluster data data = { "version": constants.IALLOCATOR_VERSION, "cluster_name": self.cfg.GetClusterName(), "cluster_tags": list(cluster_info.GetTags()), "enabled_hypervisors": list(cluster_info.enabled_hypervisors), "ipolicy": cluster_info.ipolicy, } ninfo = self.cfg.GetAllNodesInfo() iinfo = self.cfg.GetAllInstancesInfo().values() i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo] # node data node_list = [n.uuid for n in ninfo.values() if n.vm_capable] if isinstance(self.req, IAReqInstanceAlloc): hypervisor_name = self.req.hypervisor node_whitelist = self.req.node_whitelist elif isinstance(self.req, IAReqRelocate): hypervisor_name = self.cfg.GetInstanceInfo(self.req.inst_uuid).hypervisor node_whitelist = None else: hypervisor_name = cluster_info.primary_hypervisor node_whitelist = None has_lvm = utils.storage.IsLvmEnabled(cluster_info.enabled_disk_templates) node_data = self._ComputeClusterDataNodeInfo(node_list, cluster_info, hypervisor_name) node_iinfo = \ self.rpc.call_all_instances_info(node_list, cluster_info.enabled_hypervisors, cluster_info.hvparams) data["nodegroups"] = self._ComputeNodeGroupData(self.cfg) config_ndata = self._ComputeBasicNodeData(self.cfg, ninfo, node_whitelist) data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo, i_list, config_ndata, has_lvm) assert len(data["nodes"]) == len(ninfo), \ "Incomplete node data computed" data["instances"] = self._ComputeInstanceData(self.cfg, cluster_info, i_list) self.in_data = data @staticmethod def _ComputeNodeGroupData(cfg): """Compute node groups data. """ cluster = cfg.GetClusterInfo() ng = dict((guuid, { "name": gdata.name, "alloc_policy": gdata.alloc_policy, "networks": [net_uuid for net_uuid, _ in gdata.networks.items()], "ipolicy": gmi.CalculateGroupIPolicy(cluster, gdata), "tags": list(gdata.GetTags()), }) for guuid, gdata in cfg.GetAllNodeGroupsInfo().items()) return ng @staticmethod def _ComputeBasicNodeData(cfg, node_cfg, node_whitelist): """Compute global node data. @rtype: dict @returns: a dict of name: (node dict, node config) """ # fill in static (config-based) values node_results = dict((ninfo.name, { "tags": list(ninfo.GetTags()), "primary_ip": ninfo.primary_ip, "secondary_ip": ninfo.secondary_ip, "offline": (ninfo.offline or not (node_whitelist is None or ninfo.name in node_whitelist)), "drained": ninfo.drained, "master_candidate": ninfo.master_candidate, "group": ninfo.group, "master_capable": ninfo.master_capable, "vm_capable": ninfo.vm_capable, "ndparams": cfg.GetNdParams(ninfo), }) for ninfo in node_cfg.values()) return node_results @staticmethod def _GetAttributeFromHypervisorNodeData(hv_info, node_name, attr): """Extract an attribute from the hypervisor's node information. This is a helper function to extract data from the hypervisor's information about the node, as part of the result of a node_info query. @type hv_info: dict of strings @param hv_info: dictionary of node information from the hypervisor @type node_name: string @param node_name: name of the node @type attr: string @param attr: key of the attribute in the hv_info dictionary @rtype: integer @return: the value of the attribute @raises errors.OpExecError: if key not in dictionary or value not integer """ if attr not in hv_info: raise errors.OpExecError("Node '%s' didn't return attribute" " '%s'" % (node_name, attr)) value = hv_info[attr] if not isinstance(value, int): raise errors.OpExecError("Node '%s' returned invalid value" " for '%s': %s" % (node_name, attr, value)) return value @staticmethod def _ComputeStorageDataFromSpaceInfo(space_info, node_name, has_lvm): """Extract storage data from node info. @type space_info: see result of the RPC call node info @param space_info: the storage reporting part of the result of the RPC call node info @type node_name: string @param node_name: the node's name @type has_lvm: boolean @param has_lvm: whether or not LVM storage information is requested @rtype: 4-tuple of integers @return: tuple of storage info (total_disk, free_disk, total_spindles, free_spindles) """ # TODO: replace this with proper storage reporting if has_lvm: lvm_vg_info = utils.storage.LookupSpaceInfoByStorageType( space_info, constants.ST_LVM_VG) if not lvm_vg_info: raise errors.OpExecError("Node '%s' didn't return LVM vg space info." % (node_name)) total_disk = lvm_vg_info["storage_size"] free_disk = lvm_vg_info["storage_free"] lvm_pv_info = utils.storage.LookupSpaceInfoByStorageType( space_info, constants.ST_LVM_PV) if not lvm_vg_info: raise errors.OpExecError("Node '%s' didn't return LVM pv space info." % (node_name)) total_spindles = lvm_pv_info["storage_size"] free_spindles = lvm_pv_info["storage_free"] else: # we didn't even ask the node for VG status, so use zeros total_disk = free_disk = 0 total_spindles = free_spindles = 0 return (total_disk, free_disk, total_spindles, free_spindles) @staticmethod def _ComputeInstanceMemory(instance_list, node_instances_info, node_uuid, input_mem_free): """Compute memory used by primary instances. @rtype: tuple (int, int, int) @returns: A tuple of three integers: 1. the sum of memory used by primary instances on the node (including the ones that are currently down), 2. the sum of memory used by primary instances of the node that are up, 3. the amount of memory that is free on the node considering the current usage of the instances. """ i_p_mem = i_p_up_mem = 0 mem_free = input_mem_free for iinfo, beinfo in instance_list: if iinfo.primary_node == node_uuid: i_p_mem += beinfo[constants.BE_MAXMEM] if iinfo.name not in node_instances_info[node_uuid].payload: i_used_mem = 0 else: i_used_mem = int(node_instances_info[node_uuid] .payload[iinfo.name]["memory"]) i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem mem_free -= max(0, i_mem_diff) if iinfo.admin_state == constants.ADMINST_UP: i_p_up_mem += beinfo[constants.BE_MAXMEM] return (i_p_mem, i_p_up_mem, mem_free) def _ComputeDynamicNodeData(self, node_cfg, node_data, node_iinfo, i_list, node_results, has_lvm): """Compute global node data. @param node_results: the basic node structures as filled from the config """ #TODO(dynmem): compute the right data on MAX and MIN memory # make a copy of the current dict node_results = dict(node_results) for nuuid, nresult in node_data.items(): ninfo = node_cfg[nuuid] assert ninfo.name in node_results, "Missing basic data for node %s" % \ ninfo.name if not ninfo.offline: nresult.Raise("Can't get data for node %s" % ninfo.name) node_iinfo[nuuid].Raise("Can't get node instance info from node %s" % ninfo.name) (_, space_info, (hv_info, )) = nresult.payload mem_free = self._GetAttributeFromHypervisorNodeData(hv_info, ninfo.name, "memory_free") (i_p_mem, i_p_up_mem, mem_free) = self._ComputeInstanceMemory( i_list, node_iinfo, nuuid, mem_free) (total_disk, free_disk, total_spindles, free_spindles) = \ self._ComputeStorageDataFromSpaceInfo(space_info, ninfo.name, has_lvm) # compute memory used by instances pnr_dyn = { "total_memory": self._GetAttributeFromHypervisorNodeData( hv_info, ninfo.name, "memory_total"), "reserved_memory": self._GetAttributeFromHypervisorNodeData( hv_info, ninfo.name, "memory_dom0"), "free_memory": mem_free, "total_disk": total_disk, "free_disk": free_disk, "total_spindles": total_spindles, "free_spindles": free_spindles, "total_cpus": self._GetAttributeFromHypervisorNodeData( hv_info, ninfo.name, "cpu_total"), "reserved_cpus": self._GetAttributeFromHypervisorNodeData( hv_info, ninfo.name, "cpu_dom0"), "i_pri_memory": i_p_mem, "i_pri_up_memory": i_p_up_mem, } pnr_dyn.update(node_results[ninfo.name]) node_results[ninfo.name] = pnr_dyn return node_results @staticmethod def _ComputeInstanceData(cfg, cluster_info, i_list): """Compute global instance data. """ instance_data = {} for iinfo, beinfo in i_list: nic_data = [] for nic in iinfo.nics: filled_params = cluster_info.SimpleFillNIC(nic.nicparams) nic_dict = { "mac": nic.mac, "ip": nic.ip, "mode": filled_params[constants.NIC_MODE], "link": filled_params[constants.NIC_LINK], } if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: nic_dict["bridge"] = filled_params[constants.NIC_LINK] nic_data.append(nic_dict) pir = { "tags": list(iinfo.GetTags()), "admin_state": iinfo.admin_state, "vcpus": beinfo[constants.BE_VCPUS], "memory": beinfo[constants.BE_MAXMEM], "spindle_use": beinfo[constants.BE_SPINDLE_USE], "os": iinfo.os, "nodes": [cfg.GetNodeName(iinfo.primary_node)] + cfg.GetNodeNames(iinfo.secondary_nodes), "nics": nic_data, "disks": [{constants.IDISK_SIZE: dsk.size, constants.IDISK_MODE: dsk.mode, constants.IDISK_SPINDLES: dsk.spindles} for dsk in iinfo.disks], "disk_template": iinfo.disk_template, "disks_active": iinfo.disks_active, "hypervisor": iinfo.hypervisor, } pir["disk_space_total"] = gmi.ComputeDiskSize(iinfo.disk_template, pir["disks"]) instance_data[iinfo.name] = pir return instance_data def _BuildInputData(self, req): """Build input data structures. """ self._ComputeClusterData() request = req.GetRequest(self.cfg) request["type"] = req.MODE self.in_data["request"] = request self.in_text = serializer.Dump(self.in_data) def Run(self, name, validate=True, call_fn=None): """Run an instance allocator and return the results. """ if call_fn is None: call_fn = self.rpc.call_iallocator_runner result = call_fn(self.cfg.GetMasterNode(), name, self.in_text) result.Raise("Failure while running the iallocator script") self.out_text = result.payload if validate: self._ValidateResult() def _ValidateResult(self): """Process the allocator results. This will process and if successful save the result in self.out_data and the other parameters. """ try: rdict = serializer.Load(self.out_text) except Exception, err: raise errors.OpExecError("Can't parse iallocator results: %s" % str(err)) if not isinstance(rdict, dict): raise errors.OpExecError("Can't parse iallocator results: not a dict") # TODO: remove backwards compatiblity in later versions if "nodes" in rdict and "result" not in rdict: rdict["result"] = rdict["nodes"] del rdict["nodes"] for key in "success", "info", "result": if key not in rdict: raise errors.OpExecError("Can't parse iallocator results:" " missing key '%s'" % key) setattr(self, key, rdict[key]) self.req.ValidateResult(self, self.result) self.out_data = rdict ganeti-2.9.3/lib/bootstrap.py0000644000000000000000000012131312271422343016130 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Functions to bootstrap a new cluster. """ import os import os.path import re import logging import time import tempfile from ganeti.cmdlib import cluster from ganeti import rpc from ganeti import ssh from ganeti import utils from ganeti import errors from ganeti import config from ganeti import constants from ganeti import objects from ganeti import ssconf from ganeti import serializer from ganeti import hypervisor from ganeti.storage import drbd from ganeti.storage import filestorage from ganeti import netutils from ganeti import luxi from ganeti import jstore from ganeti import pathutils # ec_id for InitConfig's temporary reservation manager _INITCONF_ECID = "initconfig-ecid" #: After how many seconds daemon must be responsive _DAEMON_READY_TIMEOUT = 10.0 def _InitSSHSetup(): """Setup the SSH configuration for the cluster. This generates a dsa keypair for root, adds the pub key to the permitted hosts and adds the hostkey to its own known hosts. """ priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.SSH_LOGIN_USER) for name in priv_key, pub_key: if os.path.exists(name): utils.CreateBackup(name) utils.RemoveFile(name) result = utils.RunCmd(["ssh-keygen", "-t", "dsa", "-f", priv_key, "-q", "-N", ""]) if result.failed: raise errors.OpExecError("Could not generate ssh keypair, error %s" % result.output) utils.AddAuthorizedKey(auth_keys, utils.ReadFile(pub_key)) def GenerateHmacKey(file_name): """Writes a new HMAC key. @type file_name: str @param file_name: Path to output file """ utils.WriteFile(file_name, data="%s\n" % utils.GenerateSecret(), mode=0400, backup=True) def GenerateClusterCrypto(new_cluster_cert, new_rapi_cert, new_spice_cert, new_confd_hmac_key, new_cds, rapi_cert_pem=None, spice_cert_pem=None, spice_cacert_pem=None, cds=None, nodecert_file=pathutils.NODED_CERT_FILE, rapicert_file=pathutils.RAPI_CERT_FILE, spicecert_file=pathutils.SPICE_CERT_FILE, spicecacert_file=pathutils.SPICE_CACERT_FILE, hmackey_file=pathutils.CONFD_HMAC_KEY, cds_file=pathutils.CLUSTER_DOMAIN_SECRET_FILE): """Updates the cluster certificates, keys and secrets. @type new_cluster_cert: bool @param new_cluster_cert: Whether to generate a new cluster certificate @type new_rapi_cert: bool @param new_rapi_cert: Whether to generate a new RAPI certificate @type new_spice_cert: bool @param new_spice_cert: Whether to generate a new SPICE certificate @type new_confd_hmac_key: bool @param new_confd_hmac_key: Whether to generate a new HMAC key @type new_cds: bool @param new_cds: Whether to generate a new cluster domain secret @type rapi_cert_pem: string @param rapi_cert_pem: New RAPI certificate in PEM format @type spice_cert_pem: string @param spice_cert_pem: New SPICE certificate in PEM format @type spice_cacert_pem: string @param spice_cacert_pem: Certificate of the CA that signed the SPICE certificate, in PEM format @type cds: string @param cds: New cluster domain secret @type nodecert_file: string @param nodecert_file: optional override of the node cert file path @type rapicert_file: string @param rapicert_file: optional override of the rapi cert file path @type spicecert_file: string @param spicecert_file: optional override of the spice cert file path @type spicecacert_file: string @param spicecacert_file: optional override of the spice CA cert file path @type hmackey_file: string @param hmackey_file: optional override of the hmac key file path """ # noded SSL certificate cluster_cert_exists = os.path.exists(nodecert_file) if new_cluster_cert or not cluster_cert_exists: if cluster_cert_exists: utils.CreateBackup(nodecert_file) logging.debug("Generating new cluster certificate at %s", nodecert_file) utils.GenerateSelfSignedSslCert(nodecert_file) # confd HMAC key if new_confd_hmac_key or not os.path.exists(hmackey_file): logging.debug("Writing new confd HMAC key to %s", hmackey_file) GenerateHmacKey(hmackey_file) # RAPI rapi_cert_exists = os.path.exists(rapicert_file) if rapi_cert_pem: # Assume rapi_pem contains a valid PEM-formatted certificate and key logging.debug("Writing RAPI certificate at %s", rapicert_file) utils.WriteFile(rapicert_file, data=rapi_cert_pem, backup=True) elif new_rapi_cert or not rapi_cert_exists: if rapi_cert_exists: utils.CreateBackup(rapicert_file) logging.debug("Generating new RAPI certificate at %s", rapicert_file) utils.GenerateSelfSignedSslCert(rapicert_file) # SPICE spice_cert_exists = os.path.exists(spicecert_file) spice_cacert_exists = os.path.exists(spicecacert_file) if spice_cert_pem: # spice_cert_pem implies also spice_cacert_pem logging.debug("Writing SPICE certificate at %s", spicecert_file) utils.WriteFile(spicecert_file, data=spice_cert_pem, backup=True) logging.debug("Writing SPICE CA certificate at %s", spicecacert_file) utils.WriteFile(spicecacert_file, data=spice_cacert_pem, backup=True) elif new_spice_cert or not spice_cert_exists: if spice_cert_exists: utils.CreateBackup(spicecert_file) if spice_cacert_exists: utils.CreateBackup(spicecacert_file) logging.debug("Generating new self-signed SPICE certificate at %s", spicecert_file) (_, cert_pem) = utils.GenerateSelfSignedSslCert(spicecert_file) # Self-signed certificate -> the public certificate is also the CA public # certificate logging.debug("Writing the public certificate to %s", spicecert_file) utils.io.WriteFile(spicecacert_file, mode=0400, data=cert_pem) # Cluster domain secret if cds: logging.debug("Writing cluster domain secret to %s", cds_file) utils.WriteFile(cds_file, data=cds, backup=True) elif new_cds or not os.path.exists(cds_file): logging.debug("Generating new cluster domain secret at %s", cds_file) GenerateHmacKey(cds_file) def _InitGanetiServerSetup(master_name): """Setup the necessary configuration for the initial node daemon. This creates the nodepass file containing the shared password for the cluster, generates the SSL certificate and starts the node daemon. @type master_name: str @param master_name: Name of the master node """ # Generate cluster secrets GenerateClusterCrypto(True, False, False, False, False) result = utils.RunCmd([pathutils.DAEMON_UTIL, "start", constants.NODED]) if result.failed: raise errors.OpExecError("Could not start the node daemon, command %s" " had exitcode %s and error %s" % (result.cmd, result.exit_code, result.output)) _WaitForNodeDaemon(master_name) def _WaitForNodeDaemon(node_name): """Wait for node daemon to become responsive. """ def _CheckNodeDaemon(): # Pylint bug # pylint: disable=E1101 result = rpc.BootstrapRunner().call_version([node_name])[node_name] if result.fail_msg: raise utils.RetryAgain() try: utils.Retry(_CheckNodeDaemon, 1.0, _DAEMON_READY_TIMEOUT) except utils.RetryTimeout: raise errors.OpExecError("Node daemon on %s didn't answer queries within" " %s seconds" % (node_name, _DAEMON_READY_TIMEOUT)) def _WaitForMasterDaemon(): """Wait for master daemon to become responsive. """ def _CheckMasterDaemon(): try: cl = luxi.Client() (cluster_name, ) = cl.QueryConfigValues(["cluster_name"]) except Exception: raise utils.RetryAgain() logging.debug("Received cluster name %s from master", cluster_name) try: utils.Retry(_CheckMasterDaemon, 1.0, _DAEMON_READY_TIMEOUT) except utils.RetryTimeout: raise errors.OpExecError("Master daemon didn't answer queries within" " %s seconds" % _DAEMON_READY_TIMEOUT) def _WaitForSshDaemon(hostname, port, family): """Wait for SSH daemon to become responsive. """ hostip = netutils.GetHostname(name=hostname, family=family).ip def _CheckSshDaemon(): if netutils.TcpPing(hostip, port, timeout=1.0, live_port_needed=True): logging.debug("SSH daemon on %s:%s (IP address %s) has become" " responsive", hostname, port, hostip) else: raise utils.RetryAgain() try: utils.Retry(_CheckSshDaemon, 1.0, _DAEMON_READY_TIMEOUT) except utils.RetryTimeout: raise errors.OpExecError("SSH daemon on %s:%s (IP address %s) didn't" " become responsive within %s seconds" % (hostname, port, hostip, _DAEMON_READY_TIMEOUT)) def RunNodeSetupCmd(cluster_name, node, basecmd, debug, verbose, use_cluster_key, ask_key, strict_host_check, data): """Runs a command to configure something on a remote machine. @type cluster_name: string @param cluster_name: Cluster name @type node: string @param node: Node name @type basecmd: string @param basecmd: Base command (path on the remote machine) @type debug: bool @param debug: Enable debug output @type verbose: bool @param verbose: Enable verbose output @type use_cluster_key: bool @param use_cluster_key: See L{ssh.SshRunner.BuildCmd} @type ask_key: bool @param ask_key: See L{ssh.SshRunner.BuildCmd} @type strict_host_check: bool @param strict_host_check: See L{ssh.SshRunner.BuildCmd} @param data: JSON-serializable input data for script (passed to stdin) """ cmd = [basecmd] # Pass --debug/--verbose to the external script if set on our invocation if debug: cmd.append("--debug") if verbose: cmd.append("--verbose") family = ssconf.SimpleStore().GetPrimaryIPFamily() srun = ssh.SshRunner(cluster_name, ipv6=(family == netutils.IP6Address.family)) scmd = srun.BuildCmd(node, constants.SSH_LOGIN_USER, utils.ShellQuoteArgs(cmd), batch=False, ask_key=ask_key, quiet=False, strict_host_check=strict_host_check, use_cluster_key=use_cluster_key) tempfh = tempfile.TemporaryFile() try: tempfh.write(serializer.DumpJson(data)) tempfh.seek(0) result = utils.RunCmd(scmd, interactive=True, input_fd=tempfh) finally: tempfh.close() if result.failed: raise errors.OpExecError("Command '%s' failed: %s" % (result.cmd, result.fail_reason)) _WaitForSshDaemon(node, netutils.GetDaemonPort(constants.SSH), family) def _InitFileStorageDir(file_storage_dir): """Initialize if needed the file storage. @param file_storage_dir: the user-supplied value @return: either empty string (if file storage was disabled at build time) or the normalized path to the storage directory """ file_storage_dir = os.path.normpath(file_storage_dir) if not os.path.isabs(file_storage_dir): raise errors.OpPrereqError("File storage directory '%s' is not an absolute" " path" % file_storage_dir, errors.ECODE_INVAL) if not os.path.exists(file_storage_dir): try: os.makedirs(file_storage_dir, 0750) except OSError, err: raise errors.OpPrereqError("Cannot create file storage directory" " '%s': %s" % (file_storage_dir, err), errors.ECODE_ENVIRON) if not os.path.isdir(file_storage_dir): raise errors.OpPrereqError("The file storage directory '%s' is not" " a directory." % file_storage_dir, errors.ECODE_ENVIRON) return file_storage_dir def _PrepareFileBasedStorage( enabled_disk_templates, file_storage_dir, default_dir, file_disk_template, init_fn=_InitFileStorageDir, acceptance_fn=None): """Checks if a file-base storage type is enabled and inits the dir. @type enabled_disk_templates: list of string @param enabled_disk_templates: list of enabled disk templates @type file_storage_dir: string @param file_storage_dir: the file storage directory @type default_dir: string @param default_dir: default file storage directory when C{file_storage_dir} is 'None' @type file_disk_template: string @param file_disk_template: a disk template whose storage type is 'ST_FILE' @rtype: string @returns: the name of the actual file storage directory """ assert (file_disk_template in utils.storage.GetDiskTemplatesOfStorageType(constants.ST_FILE)) if file_storage_dir is None: file_storage_dir = default_dir if not acceptance_fn: acceptance_fn = \ lambda path: filestorage.CheckFileStoragePathAcceptance( path, exact_match_ok=True) cluster.CheckFileStoragePathVsEnabledDiskTemplates( logging.warning, file_storage_dir, enabled_disk_templates) file_storage_enabled = file_disk_template in enabled_disk_templates if file_storage_enabled: try: acceptance_fn(file_storage_dir) except errors.FileStoragePathError as e: raise errors.OpPrereqError(str(e)) result_file_storage_dir = init_fn(file_storage_dir) else: result_file_storage_dir = file_storage_dir return result_file_storage_dir def _PrepareFileStorage( enabled_disk_templates, file_storage_dir, init_fn=_InitFileStorageDir, acceptance_fn=None): """Checks if file storage is enabled and inits the dir. @see: C{_PrepareFileBasedStorage} """ return _PrepareFileBasedStorage( enabled_disk_templates, file_storage_dir, pathutils.DEFAULT_FILE_STORAGE_DIR, constants.DT_FILE, init_fn=init_fn, acceptance_fn=acceptance_fn) def _PrepareSharedFileStorage( enabled_disk_templates, file_storage_dir, init_fn=_InitFileStorageDir, acceptance_fn=None): """Checks if shared file storage is enabled and inits the dir. @see: C{_PrepareFileBasedStorage} """ return _PrepareFileBasedStorage( enabled_disk_templates, file_storage_dir, pathutils.DEFAULT_SHARED_FILE_STORAGE_DIR, constants.DT_SHARED_FILE, init_fn=init_fn, acceptance_fn=acceptance_fn) def _InitCheckEnabledDiskTemplates(enabled_disk_templates): """Checks the sanity of the enabled disk templates. """ if not enabled_disk_templates: raise errors.OpPrereqError("Enabled disk templates list must contain at" " least one member", errors.ECODE_INVAL) invalid_disk_templates = \ set(enabled_disk_templates) - constants.DISK_TEMPLATES if invalid_disk_templates: raise errors.OpPrereqError("Enabled disk templates list contains invalid" " entries: %s" % invalid_disk_templates, errors.ECODE_INVAL) def _RestrictIpolicyToEnabledDiskTemplates(ipolicy, enabled_disk_templates): """Restricts the ipolicy's disk templates to the enabled ones. This function clears the ipolicy's list of allowed disk templates from the ones that are not enabled by the cluster. @type ipolicy: dict @param ipolicy: the instance policy @type enabled_disk_templates: list of string @param enabled_disk_templates: the list of cluster-wide enabled disk templates """ assert constants.IPOLICY_DTS in ipolicy allowed_disk_templates = ipolicy[constants.IPOLICY_DTS] restricted_disk_templates = list(set(allowed_disk_templates) .intersection(set(enabled_disk_templates))) ipolicy[constants.IPOLICY_DTS] = restricted_disk_templates def InitCluster(cluster_name, mac_prefix, # pylint: disable=R0913, R0914 master_netmask, master_netdev, file_storage_dir, shared_file_storage_dir, candidate_pool_size, secondary_ip=None, vg_name=None, beparams=None, nicparams=None, ndparams=None, hvparams=None, diskparams=None, enabled_hypervisors=None, modify_etc_hosts=True, modify_ssh_setup=True, maintain_node_health=False, drbd_helper=None, uid_pool=None, default_iallocator=None, primary_ip_version=None, ipolicy=None, prealloc_wipe_disks=False, use_external_mip_script=False, hv_state=None, disk_state=None, enabled_disk_templates=None): """Initialise the cluster. @type candidate_pool_size: int @param candidate_pool_size: master candidate pool size @type enabled_disk_templates: list of string @param enabled_disk_templates: list of disk_templates to be used in this cluster """ # TODO: complete the docstring if config.ConfigWriter.IsCluster(): raise errors.OpPrereqError("Cluster is already initialised", errors.ECODE_STATE) if not enabled_hypervisors: raise errors.OpPrereqError("Enabled hypervisors list must contain at" " least one member", errors.ECODE_INVAL) invalid_hvs = set(enabled_hypervisors) - constants.HYPER_TYPES if invalid_hvs: raise errors.OpPrereqError("Enabled hypervisors contains invalid" " entries: %s" % invalid_hvs, errors.ECODE_INVAL) _InitCheckEnabledDiskTemplates(enabled_disk_templates) try: ipcls = netutils.IPAddress.GetClassFromIpVersion(primary_ip_version) except errors.ProgrammerError: raise errors.OpPrereqError("Invalid primary ip version: %d." % primary_ip_version, errors.ECODE_INVAL) hostname = netutils.GetHostname(family=ipcls.family) if not ipcls.IsValid(hostname.ip): raise errors.OpPrereqError("This host's IP (%s) is not a valid IPv%d" " address." % (hostname.ip, primary_ip_version), errors.ECODE_INVAL) if ipcls.IsLoopback(hostname.ip): raise errors.OpPrereqError("This host's IP (%s) resolves to a loopback" " address. Please fix DNS or %s." % (hostname.ip, pathutils.ETC_HOSTS), errors.ECODE_ENVIRON) if not ipcls.Own(hostname.ip): raise errors.OpPrereqError("Inconsistency: this host's name resolves" " to %s,\nbut this ip address does not" " belong to this host" % hostname.ip, errors.ECODE_ENVIRON) clustername = netutils.GetHostname(name=cluster_name, family=ipcls.family) if netutils.TcpPing(clustername.ip, constants.DEFAULT_NODED_PORT, timeout=5): raise errors.OpPrereqError("Cluster IP already active", errors.ECODE_NOTUNIQUE) if not secondary_ip: if primary_ip_version == constants.IP6_VERSION: raise errors.OpPrereqError("When using a IPv6 primary address, a valid" " IPv4 address must be given as secondary", errors.ECODE_INVAL) secondary_ip = hostname.ip if not netutils.IP4Address.IsValid(secondary_ip): raise errors.OpPrereqError("Secondary IP address (%s) has to be a valid" " IPv4 address." % secondary_ip, errors.ECODE_INVAL) if not netutils.IP4Address.Own(secondary_ip): raise errors.OpPrereqError("You gave %s as secondary IP," " but it does not belong to this host." % secondary_ip, errors.ECODE_ENVIRON) if master_netmask is not None: if not ipcls.ValidateNetmask(master_netmask): raise errors.OpPrereqError("CIDR netmask (%s) not valid for IPv%s " % (master_netmask, primary_ip_version), errors.ECODE_INVAL) else: master_netmask = ipcls.iplen if vg_name: # Check if volume group is valid vgstatus = utils.CheckVolumeGroupSize(utils.ListVolumeGroups(), vg_name, constants.MIN_VG_SIZE) if vgstatus: raise errors.OpPrereqError("Error: %s" % vgstatus, errors.ECODE_INVAL) if drbd_helper is not None: try: curr_helper = drbd.DRBD8.GetUsermodeHelper() except errors.BlockDeviceError, err: raise errors.OpPrereqError("Error while checking drbd helper" " (specify --no-drbd-storage if you are not" " using drbd): %s" % str(err), errors.ECODE_ENVIRON) if drbd_helper != curr_helper: raise errors.OpPrereqError("Error: requiring %s as drbd helper but %s" " is the current helper" % (drbd_helper, curr_helper), errors.ECODE_INVAL) logging.debug("Stopping daemons (if any are running)") result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-all"]) if result.failed: raise errors.OpExecError("Could not stop daemons, command %s" " had exitcode %s and error '%s'" % (result.cmd, result.exit_code, result.output)) file_storage_dir = _PrepareFileStorage(enabled_disk_templates, file_storage_dir) shared_file_storage_dir = _PrepareSharedFileStorage(enabled_disk_templates, shared_file_storage_dir) if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$", mac_prefix): raise errors.OpPrereqError("Invalid mac prefix given '%s'" % mac_prefix, errors.ECODE_INVAL) result = utils.RunCmd(["ip", "link", "show", "dev", master_netdev]) if result.failed: raise errors.OpPrereqError("Invalid master netdev given (%s): '%s'" % (master_netdev, result.output.strip()), errors.ECODE_INVAL) dirs = [(pathutils.RUN_DIR, constants.RUN_DIRS_MODE)] utils.EnsureDirs(dirs) objects.UpgradeBeParams(beparams) utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES) utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES) objects.NIC.CheckParameterSyntax(nicparams) full_ipolicy = objects.FillIPolicy(constants.IPOLICY_DEFAULTS, ipolicy) _RestrictIpolicyToEnabledDiskTemplates(full_ipolicy, enabled_disk_templates) if ndparams is not None: utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES) else: ndparams = dict(constants.NDC_DEFAULTS) # This is ugly, as we modify the dict itself # FIXME: Make utils.ForceDictType pure functional or write a wrapper # around it if hv_state: for hvname, hvs_data in hv_state.items(): utils.ForceDictType(hvs_data, constants.HVSTS_PARAMETER_TYPES) hv_state[hvname] = objects.Cluster.SimpleFillHvState(hvs_data) else: hv_state = dict((hvname, constants.HVST_DEFAULTS) for hvname in enabled_hypervisors) # FIXME: disk_state has no default values yet if disk_state: for storage, ds_data in disk_state.items(): if storage not in constants.DS_VALID_TYPES: raise errors.OpPrereqError("Invalid storage type in disk state: %s" % storage, errors.ECODE_INVAL) for ds_name, state in ds_data.items(): utils.ForceDictType(state, constants.DSS_PARAMETER_TYPES) ds_data[ds_name] = objects.Cluster.SimpleFillDiskState(state) # hvparams is a mapping of hypervisor->hvparams dict for hv_name, hv_params in hvparams.iteritems(): utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) hv_class = hypervisor.GetHypervisor(hv_name) hv_class.CheckParameterSyntax(hv_params) # diskparams is a mapping of disk-template->diskparams dict for template, dt_params in diskparams.items(): param_keys = set(dt_params.keys()) default_param_keys = set(constants.DISK_DT_DEFAULTS[template].keys()) if not (param_keys <= default_param_keys): unknown_params = param_keys - default_param_keys raise errors.OpPrereqError("Invalid parameters for disk template %s:" " %s" % (template, utils.CommaJoin(unknown_params)), errors.ECODE_INVAL) utils.ForceDictType(dt_params, constants.DISK_DT_TYPES) if template == constants.DT_DRBD8 and vg_name is not None: # The default METAVG value is equal to the VG name set at init time, # if provided dt_params[constants.DRBD_DEFAULT_METAVG] = vg_name try: utils.VerifyDictOptions(diskparams, constants.DISK_DT_DEFAULTS) except errors.OpPrereqError, err: raise errors.OpPrereqError("While verify diskparam options: %s" % err, errors.ECODE_INVAL) # set up ssh config and /etc/hosts rsa_sshkey = "" dsa_sshkey = "" if os.path.isfile(pathutils.SSH_HOST_RSA_PUB): sshline = utils.ReadFile(pathutils.SSH_HOST_RSA_PUB) rsa_sshkey = sshline.split(" ")[1] if os.path.isfile(pathutils.SSH_HOST_DSA_PUB): sshline = utils.ReadFile(pathutils.SSH_HOST_DSA_PUB) dsa_sshkey = sshline.split(" ")[1] if not rsa_sshkey and not dsa_sshkey: raise errors.OpPrereqError("Failed to find SSH public keys", errors.ECODE_ENVIRON) if modify_etc_hosts: utils.AddHostToEtcHosts(hostname.name, hostname.ip) if modify_ssh_setup: _InitSSHSetup() if default_iallocator is not None: alloc_script = utils.FindFile(default_iallocator, constants.IALLOCATOR_SEARCH_PATH, os.path.isfile) if alloc_script is None: raise errors.OpPrereqError("Invalid default iallocator script '%s'" " specified" % default_iallocator, errors.ECODE_INVAL) elif constants.HTOOLS: # htools was enabled at build-time, we default to it if utils.FindFile(constants.IALLOC_HAIL, constants.IALLOCATOR_SEARCH_PATH, os.path.isfile): default_iallocator = constants.IALLOC_HAIL now = time.time() # init of cluster config file cluster_config = objects.Cluster( serial_no=1, rsahostkeypub=rsa_sshkey, dsahostkeypub=dsa_sshkey, highest_used_port=(constants.FIRST_DRBD_PORT - 1), mac_prefix=mac_prefix, volume_group_name=vg_name, tcpudp_port_pool=set(), master_ip=clustername.ip, master_netmask=master_netmask, master_netdev=master_netdev, cluster_name=clustername.name, file_storage_dir=file_storage_dir, shared_file_storage_dir=shared_file_storage_dir, enabled_hypervisors=enabled_hypervisors, beparams={constants.PP_DEFAULT: beparams}, nicparams={constants.PP_DEFAULT: nicparams}, ndparams=ndparams, hvparams=hvparams, diskparams=diskparams, candidate_pool_size=candidate_pool_size, modify_etc_hosts=modify_etc_hosts, modify_ssh_setup=modify_ssh_setup, uid_pool=uid_pool, ctime=now, mtime=now, maintain_node_health=maintain_node_health, drbd_usermode_helper=drbd_helper, default_iallocator=default_iallocator, primary_ip_family=ipcls.family, prealloc_wipe_disks=prealloc_wipe_disks, use_external_mip_script=use_external_mip_script, ipolicy=full_ipolicy, hv_state_static=hv_state, disk_state_static=disk_state, enabled_disk_templates=enabled_disk_templates, ) master_node_config = objects.Node(name=hostname.name, primary_ip=hostname.ip, secondary_ip=secondary_ip, serial_no=1, master_candidate=True, offline=False, drained=False, ctime=now, mtime=now, ) InitConfig(constants.CONFIG_VERSION, cluster_config, master_node_config) cfg = config.ConfigWriter(offline=True) ssh.WriteKnownHostsFile(cfg, pathutils.SSH_KNOWN_HOSTS_FILE) cfg.Update(cfg.GetClusterInfo(), logging.error) ssconf.WriteSsconfFiles(cfg.GetSsconfValues()) # set up the inter-node password and certificate _InitGanetiServerSetup(hostname.name) logging.debug("Starting daemons") result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-all"]) if result.failed: raise errors.OpExecError("Could not start daemons, command %s" " had exitcode %s and error %s" % (result.cmd, result.exit_code, result.output)) _WaitForMasterDaemon() def InitConfig(version, cluster_config, master_node_config, cfg_file=pathutils.CLUSTER_CONF_FILE): """Create the initial cluster configuration. It will contain the current node, which will also be the master node, and no instances. @type version: int @param version: configuration version @type cluster_config: L{objects.Cluster} @param cluster_config: cluster configuration @type master_node_config: L{objects.Node} @param master_node_config: master node configuration @type cfg_file: string @param cfg_file: configuration file path """ uuid_generator = config.TemporaryReservationManager() cluster_config.uuid = uuid_generator.Generate([], utils.NewUUID, _INITCONF_ECID) master_node_config.uuid = uuid_generator.Generate([], utils.NewUUID, _INITCONF_ECID) cluster_config.master_node = master_node_config.uuid nodes = { master_node_config.uuid: master_node_config, } default_nodegroup = objects.NodeGroup( uuid=uuid_generator.Generate([], utils.NewUUID, _INITCONF_ECID), name=constants.INITIAL_NODE_GROUP_NAME, members=[master_node_config.uuid], diskparams={}, ) nodegroups = { default_nodegroup.uuid: default_nodegroup, } now = time.time() config_data = objects.ConfigData(version=version, cluster=cluster_config, nodegroups=nodegroups, nodes=nodes, instances={}, networks={}, serial_no=1, ctime=now, mtime=now) utils.WriteFile(cfg_file, data=serializer.Dump(config_data.ToDict()), mode=0600) def FinalizeClusterDestroy(master_uuid): """Execute the last steps of cluster destroy This function shuts down all the daemons, completing the destroy begun in cmdlib.LUDestroyOpcode. """ cfg = config.ConfigWriter() modify_ssh_setup = cfg.GetClusterInfo().modify_ssh_setup runner = rpc.BootstrapRunner() master_name = cfg.GetNodeName(master_uuid) master_params = cfg.GetMasterNetworkParameters() master_params.uuid = master_uuid ems = cfg.GetUseExternalMipScript() result = runner.call_node_deactivate_master_ip(master_name, master_params, ems) msg = result.fail_msg if msg: logging.warning("Could not disable the master IP: %s", msg) result = runner.call_node_stop_master(master_name) msg = result.fail_msg if msg: logging.warning("Could not disable the master role: %s", msg) result = runner.call_node_leave_cluster(master_name, modify_ssh_setup) msg = result.fail_msg if msg: logging.warning("Could not shutdown the node daemon and cleanup" " the node: %s", msg) def SetupNodeDaemon(opts, cluster_name, node): """Add a node to the cluster. This function must be called before the actual opcode, and will ssh to the remote node, copy the needed files, and start ganeti-noded, allowing the master to do the rest via normal rpc calls. @param cluster_name: the cluster name @param node: the name of the new node """ data = { constants.NDS_CLUSTER_NAME: cluster_name, constants.NDS_NODE_DAEMON_CERTIFICATE: utils.ReadFile(pathutils.NODED_CERT_FILE), constants.NDS_SSCONF: ssconf.SimpleStore().ReadAll(), constants.NDS_START_NODE_DAEMON: True, } RunNodeSetupCmd(cluster_name, node, pathutils.NODE_DAEMON_SETUP, opts.debug, opts.verbose, True, opts.ssh_key_check, opts.ssh_key_check, data) _WaitForNodeDaemon(node) def MasterFailover(no_voting=False): """Failover the master node. This checks that we are not already the master, and will cause the current master to cease being master, and the non-master to become new master. @type no_voting: boolean @param no_voting: force the operation without remote nodes agreement (dangerous) """ sstore = ssconf.SimpleStore() old_master, new_master = ssconf.GetMasterAndMyself(sstore) node_names = sstore.GetNodeList() mc_list = sstore.GetMasterCandidates() if old_master == new_master: raise errors.OpPrereqError("This commands must be run on the node" " where you want the new master to be." " %s is already the master" % old_master, errors.ECODE_INVAL) if new_master not in mc_list: mc_no_master = [name for name in mc_list if name != old_master] raise errors.OpPrereqError("This node is not among the nodes marked" " as master candidates. Only these nodes" " can become masters. Current list of" " master candidates is:\n" "%s" % ("\n".join(mc_no_master)), errors.ECODE_STATE) if not no_voting: vote_list = GatherMasterVotes(node_names) if vote_list: voted_master = vote_list[0][0] if voted_master is None: raise errors.OpPrereqError("Cluster is inconsistent, most nodes did" " not respond.", errors.ECODE_ENVIRON) elif voted_master != old_master: raise errors.OpPrereqError("I have a wrong configuration, I believe" " the master is %s but the other nodes" " voted %s. Please resync the configuration" " of this node." % (old_master, voted_master), errors.ECODE_STATE) # end checks rcode = 0 logging.info("Setting master to %s, old master: %s", new_master, old_master) try: # instantiate a real config writer, as we now know we have the # configuration data cfg = config.ConfigWriter(accept_foreign=True) old_master_node = cfg.GetNodeInfoByName(old_master) if old_master_node is None: raise errors.OpPrereqError("Could not find old master node '%s' in" " cluster configuration." % old_master, errors.ECODE_NOENT) cluster_info = cfg.GetClusterInfo() new_master_node = cfg.GetNodeInfoByName(new_master) if new_master_node is None: raise errors.OpPrereqError("Could not find new master node '%s' in" " cluster configuration." % new_master, errors.ECODE_NOENT) cluster_info.master_node = new_master_node.uuid # this will also regenerate the ssconf files, since we updated the # cluster info cfg.Update(cluster_info, logging.error) except errors.ConfigurationError, err: logging.error("Error while trying to set the new master: %s", str(err)) return 1 # if cfg.Update worked, then it means the old master daemon won't be # able now to write its own config file (we rely on locking in both # backend.UploadFile() and ConfigWriter._Write(); hence the next # step is to kill the old master logging.info("Stopping the master daemon on node %s", old_master) runner = rpc.BootstrapRunner() master_params = cfg.GetMasterNetworkParameters() master_params.uuid = old_master_node.uuid ems = cfg.GetUseExternalMipScript() result = runner.call_node_deactivate_master_ip(old_master, master_params, ems) msg = result.fail_msg if msg: logging.warning("Could not disable the master IP: %s", msg) result = runner.call_node_stop_master(old_master) msg = result.fail_msg if msg: logging.error("Could not disable the master role on the old master" " %s, please disable manually: %s", old_master, msg) logging.info("Checking master IP non-reachability...") master_ip = sstore.GetMasterIP() total_timeout = 30 # Here we have a phase where no master should be running def _check_ip(): if netutils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT): raise utils.RetryAgain() try: utils.Retry(_check_ip, (1, 1.5, 5), total_timeout) except utils.RetryTimeout: logging.warning("The master IP is still reachable after %s seconds," " continuing but activating the master on the current" " node will probably fail", total_timeout) if jstore.CheckDrainFlag(): logging.info("Undraining job queue") jstore.SetDrainFlag(False) logging.info("Starting the master daemons on the new master") result = rpc.BootstrapRunner().call_node_start_master_daemons(new_master, no_voting) msg = result.fail_msg if msg: logging.error("Could not start the master role on the new master" " %s, please check: %s", new_master, msg) rcode = 1 logging.info("Master failed over from %s to %s", old_master, new_master) return rcode def GetMaster(): """Returns the current master node. This is a separate function in bootstrap since it's needed by gnt-cluster, and instead of importing directly ssconf, it's better to abstract it in bootstrap, where we do use ssconf in other functions too. """ sstore = ssconf.SimpleStore() old_master, _ = ssconf.GetMasterAndMyself(sstore) return old_master def GatherMasterVotes(node_names): """Check the agreement on who is the master. This function will return a list of (node, number of votes), ordered by the number of votes. Errors will be denoted by the key 'None'. Note that the sum of votes is the number of nodes this machine knows, whereas the number of entries in the list could be different (if some nodes vote for another master). We remove ourselves from the list since we know that (bugs aside) since we use the same source for configuration information for both backend and boostrap, we'll always vote for ourselves. @type node_names: list @param node_names: the list of nodes to query for master info; the current node will be removed if it is in the list @rtype: list @return: list of (node, votes) """ myself = netutils.Hostname.GetSysName() try: node_names.remove(myself) except ValueError: pass if not node_names: # no nodes left (eventually after removing myself) return [] results = rpc.BootstrapRunner().call_master_info(node_names) if not isinstance(results, dict): # this should not happen (unless internal error in rpc) logging.critical("Can't complete rpc call, aborting master startup") return [(None, len(node_names))] votes = {} for node_name in results: nres = results[node_name] data = nres.payload msg = nres.fail_msg fail = False if msg: logging.warning("Error contacting node %s: %s", node_name, msg) fail = True # for now we accept both length 3, 4 and 5 (data[3] is primary ip version # and data[4] is the master netmask) elif not isinstance(data, (tuple, list)) or len(data) < 3: logging.warning("Invalid data received from node %s: %s", node_name, data) fail = True if fail: if None not in votes: votes[None] = 0 votes[None] += 1 continue master_node = data[2] if master_node not in votes: votes[master_node] = 0 votes[master_node] += 1 vote_list = [v for v in votes.items()] # sort first on number of votes then on name, since we want None # sorted later if we have the half of the nodes not responding, and # half voting all for the same master vote_list.sort(key=lambda x: (x[1], x[0]), reverse=True) return vote_list ganeti-2.9.3/lib/workerpool.py0000644000000000000000000004440612244641676016341 0ustar00rootroot00000000000000# # # Copyright (C) 2008, 2009, 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Base classes for worker pools. """ import logging import threading import heapq import itertools from ganeti import compat from ganeti import errors _TERMINATE = object() _DEFAULT_PRIORITY = 0 class DeferTask(Exception): """Special exception class to defer a task. This class can be raised by L{BaseWorker.RunTask} to defer the execution of a task. Optionally, the priority of the task can be changed. """ def __init__(self, priority=None): """Initializes this class. @type priority: number @param priority: New task priority (None means no change) """ Exception.__init__(self) self.priority = priority class NoSuchTask(Exception): """Exception raised when a task can't be found. """ class BaseWorker(threading.Thread, object): """Base worker class for worker pools. Users of a worker pool must override RunTask in a subclass. """ # pylint: disable=W0212 def __init__(self, pool, worker_id): """Constructor for BaseWorker thread. @param pool: the parent worker pool @param worker_id: identifier for this worker """ super(BaseWorker, self).__init__(name=worker_id) self.pool = pool self._worker_id = worker_id self._current_task = None assert self.getName() == worker_id def ShouldTerminate(self): """Returns whether this worker should terminate. Should only be called from within L{RunTask}. """ self.pool._lock.acquire() try: assert self._HasRunningTaskUnlocked() return self.pool._ShouldWorkerTerminateUnlocked(self) finally: self.pool._lock.release() def GetCurrentPriority(self): """Returns the priority of the current task. Should only be called from within L{RunTask}. """ self.pool._lock.acquire() try: assert self._HasRunningTaskUnlocked() (priority, _, _, _) = self._current_task return priority finally: self.pool._lock.release() def SetTaskName(self, taskname): """Sets the name of the current task. Should only be called from within L{RunTask}. @type taskname: string @param taskname: Task's name """ if taskname: name = "%s/%s" % (self._worker_id, taskname) else: name = self._worker_id # Set thread name self.setName(name) def _HasRunningTaskUnlocked(self): """Returns whether this worker is currently running a task. """ return (self._current_task is not None) def _GetCurrentOrderAndTaskId(self): """Returns the order and task ID of the current task. Should only be called from within L{RunTask}. """ self.pool._lock.acquire() try: assert self._HasRunningTaskUnlocked() (_, order_id, task_id, _) = self._current_task return (order_id, task_id) finally: self.pool._lock.release() def run(self): """Main thread function. Waits for new tasks to show up in the queue. """ pool = self.pool while True: assert self._current_task is None defer = None try: # Wait on lock to be told either to terminate or to do a task pool._lock.acquire() try: task = pool._WaitForTaskUnlocked(self) if task is _TERMINATE: # Told to terminate break if task is None: # Spurious notification, ignore continue self._current_task = task # No longer needed, dispose of reference del task assert self._HasRunningTaskUnlocked() finally: pool._lock.release() (priority, _, _, args) = self._current_task try: # Run the actual task assert defer is None logging.debug("Starting task %r, priority %s", args, priority) assert self.getName() == self._worker_id try: self.RunTask(*args) # pylint: disable=W0142 finally: self.SetTaskName(None) logging.debug("Done with task %r, priority %s", args, priority) except DeferTask, err: defer = err if defer.priority is None: # Use same priority defer.priority = priority logging.debug("Deferring task %r, new priority %s", args, defer.priority) assert self._HasRunningTaskUnlocked() except: # pylint: disable=W0702 logging.exception("Caught unhandled exception") assert self._HasRunningTaskUnlocked() finally: # Notify pool pool._lock.acquire() try: if defer: assert self._current_task # Schedule again for later run (_, _, task_id, args) = self._current_task pool._AddTaskUnlocked(args, defer.priority, task_id) if self._current_task: self._current_task = None pool._worker_to_pool.notifyAll() finally: pool._lock.release() assert not self._HasRunningTaskUnlocked() logging.debug("Terminates") def RunTask(self, *args): """Function called to start a task. This needs to be implemented by child classes. """ raise NotImplementedError() class WorkerPool(object): """Worker pool with a queue. This class is thread-safe. Tasks are guaranteed to be started in the order in which they're added to the pool. Due to the nature of threading, they're not guaranteed to finish in the same order. @type _tasks: list of tuples @ivar _tasks: Each tuple has the format (priority, order ID, task ID, arguments). Priority and order ID are numeric and essentially control the sort order. The order ID is an increasing number denoting the order in which tasks are added to the queue. The task ID is controlled by user of workerpool, see L{AddTask} for details. The task arguments are C{None} for abandoned tasks, otherwise a sequence of arguments to be passed to L{BaseWorker.RunTask}). The list must fulfill the heap property (for use by the C{heapq} module). @type _taskdata: dict; (task IDs as keys, tuples as values) @ivar _taskdata: Mapping from task IDs to entries in L{_tasks} """ def __init__(self, name, num_workers, worker_class): """Constructor for worker pool. @param num_workers: number of workers to be started (dynamic resizing is not yet implemented) @param worker_class: the class to be instantiated for workers; should derive from L{BaseWorker} """ # Some of these variables are accessed by BaseWorker self._lock = threading.Lock() self._pool_to_pool = threading.Condition(self._lock) self._pool_to_worker = threading.Condition(self._lock) self._worker_to_pool = threading.Condition(self._lock) self._worker_class = worker_class self._name = name self._last_worker_id = 0 self._workers = [] self._quiescing = False self._active = True # Terminating workers self._termworkers = [] # Queued tasks self._counter = itertools.count() self._tasks = [] self._taskdata = {} # Start workers self.Resize(num_workers) # TODO: Implement dynamic resizing? def _WaitWhileQuiescingUnlocked(self): """Wait until the worker pool has finished quiescing. """ while self._quiescing: self._pool_to_pool.wait() def _AddTaskUnlocked(self, args, priority, task_id): """Adds a task to the internal queue. @type args: sequence @param args: Arguments passed to L{BaseWorker.RunTask} @type priority: number @param priority: Task priority @param task_id: Task ID """ assert isinstance(args, (tuple, list)), "Arguments must be a sequence" assert isinstance(priority, (int, long)), "Priority must be numeric" assert task_id is None or isinstance(task_id, (int, long)), \ "Task ID must be numeric or None" task = [priority, self._counter.next(), task_id, args] if task_id is not None: assert task_id not in self._taskdata # Keep a reference to change priority later if necessary self._taskdata[task_id] = task # A counter is used to ensure elements are processed in their incoming # order. For processing they're sorted by priority and then counter. heapq.heappush(self._tasks, task) # Notify a waiting worker self._pool_to_worker.notify() def AddTask(self, args, priority=_DEFAULT_PRIORITY, task_id=None): """Adds a task to the queue. @type args: sequence @param args: arguments passed to L{BaseWorker.RunTask} @type priority: number @param priority: Task priority @param task_id: Task ID @note: The task ID can be essentially anything that can be used as a dictionary key. Callers, however, must ensure a task ID is unique while a task is in the pool or while it might return to the pool due to deferring using L{DeferTask}. """ self._lock.acquire() try: self._WaitWhileQuiescingUnlocked() self._AddTaskUnlocked(args, priority, task_id) finally: self._lock.release() def AddManyTasks(self, tasks, priority=_DEFAULT_PRIORITY, task_id=None): """Add a list of tasks to the queue. @type tasks: list of tuples @param tasks: list of args passed to L{BaseWorker.RunTask} @type priority: number or list of numbers @param priority: Priority for all added tasks or a list with the priority for each task @type task_id: list @param task_id: List with the ID for each task @note: See L{AddTask} for a note on task IDs. """ assert compat.all(isinstance(task, (tuple, list)) for task in tasks), \ "Each task must be a sequence" assert (isinstance(priority, (int, long)) or compat.all(isinstance(prio, (int, long)) for prio in priority)), \ "Priority must be numeric or be a list of numeric values" assert task_id is None or isinstance(task_id, (tuple, list)), \ "Task IDs must be in a sequence" if isinstance(priority, (int, long)): priority = [priority] * len(tasks) elif len(priority) != len(tasks): raise errors.ProgrammerError("Number of priorities (%s) doesn't match" " number of tasks (%s)" % (len(priority), len(tasks))) if task_id is None: task_id = [None] * len(tasks) elif len(task_id) != len(tasks): raise errors.ProgrammerError("Number of task IDs (%s) doesn't match" " number of tasks (%s)" % (len(task_id), len(tasks))) self._lock.acquire() try: self._WaitWhileQuiescingUnlocked() assert compat.all(isinstance(prio, (int, long)) for prio in priority) assert len(tasks) == len(priority) assert len(tasks) == len(task_id) for (args, prio, tid) in zip(tasks, priority, task_id): self._AddTaskUnlocked(args, prio, tid) finally: self._lock.release() def ChangeTaskPriority(self, task_id, priority): """Changes a task's priority. @param task_id: Task ID @type priority: number @param priority: New task priority @raise NoSuchTask: When the task referred by C{task_id} can not be found (it may never have existed, may have already been processed, or is currently running) """ assert isinstance(priority, (int, long)), "Priority must be numeric" self._lock.acquire() try: logging.debug("About to change priority of task %s to %s", task_id, priority) # Find old task oldtask = self._taskdata.get(task_id, None) if oldtask is None: msg = "Task '%s' was not found" % task_id logging.debug(msg) raise NoSuchTask(msg) # Prepare new task newtask = [priority] + oldtask[1:] # Mark old entry as abandoned (this doesn't change the sort order and # therefore doesn't invalidate the heap property of L{self._tasks}). # See also . oldtask[-1] = None # Change reference to new task entry and forget the old one assert task_id is not None self._taskdata[task_id] = newtask # Add a new task with the old number and arguments heapq.heappush(self._tasks, newtask) # Notify a waiting worker self._pool_to_worker.notify() finally: self._lock.release() def SetActive(self, active): """Enable/disable processing of tasks. This is different from L{Quiesce} in the sense that this function just changes an internal flag and doesn't wait for the queue to be empty. Tasks already being processed continue normally, but no new tasks will be started. New tasks can still be added. @type active: bool @param active: Whether tasks should be processed """ self._lock.acquire() try: self._active = active if active: # Tell all workers to continue processing self._pool_to_worker.notifyAll() finally: self._lock.release() def _WaitForTaskUnlocked(self, worker): """Waits for a task for a worker. @type worker: L{BaseWorker} @param worker: Worker thread """ while True: if self._ShouldWorkerTerminateUnlocked(worker): return _TERMINATE # If there's a pending task, return it immediately if self._active and self._tasks: # Get task from queue and tell pool about it try: task = heapq.heappop(self._tasks) finally: self._worker_to_pool.notifyAll() (_, _, task_id, args) = task # If the priority was changed, "args" is None if args is None: # Try again logging.debug("Found abandoned task (%r)", task) continue # Delete reference if task_id is not None: del self._taskdata[task_id] return task logging.debug("Waiting for tasks") # wait() releases the lock and sleeps until notified self._pool_to_worker.wait() logging.debug("Notified while waiting") def _ShouldWorkerTerminateUnlocked(self, worker): """Returns whether a worker should terminate. """ return (worker in self._termworkers) def _HasRunningTasksUnlocked(self): """Checks whether there's a task running in a worker. """ for worker in self._workers + self._termworkers: if worker._HasRunningTaskUnlocked(): # pylint: disable=W0212 return True return False def HasRunningTasks(self): """Checks whether there's at least one task running. """ self._lock.acquire() try: return self._HasRunningTasksUnlocked() finally: self._lock.release() def Quiesce(self): """Waits until the task queue is empty. """ self._lock.acquire() try: self._quiescing = True # Wait while there are tasks pending or running while self._tasks or self._HasRunningTasksUnlocked(): self._worker_to_pool.wait() finally: self._quiescing = False # Make sure AddTasks continues in case it was waiting self._pool_to_pool.notifyAll() self._lock.release() def _NewWorkerIdUnlocked(self): """Return an identifier for a new worker. """ self._last_worker_id += 1 return "%s%d" % (self._name, self._last_worker_id) def _ResizeUnlocked(self, num_workers): """Changes the number of workers. """ assert num_workers >= 0, "num_workers must be >= 0" logging.debug("Resizing to %s workers", num_workers) current_count = len(self._workers) if current_count == num_workers: # Nothing to do pass elif current_count > num_workers: if num_workers == 0: # Create copy of list to iterate over while lock isn't held. termworkers = self._workers[:] del self._workers[:] else: # TODO: Implement partial downsizing raise NotImplementedError() #termworkers = ... self._termworkers += termworkers # Notify workers that something has changed self._pool_to_worker.notifyAll() # Join all terminating workers self._lock.release() try: for worker in termworkers: logging.debug("Waiting for thread %s", worker.getName()) worker.join() finally: self._lock.acquire() # Remove terminated threads. This could be done in a more efficient way # (del self._termworkers[:]), but checking worker.isAlive() makes sure we # don't leave zombie threads around. for worker in termworkers: assert worker in self._termworkers, ("Worker not in list of" " terminating workers") if not worker.isAlive(): self._termworkers.remove(worker) assert not self._termworkers, "Zombie worker detected" elif current_count < num_workers: # Create (num_workers - current_count) new workers for _ in range(num_workers - current_count): worker = self._worker_class(self, self._NewWorkerIdUnlocked()) self._workers.append(worker) worker.start() def Resize(self, num_workers): """Changes the number of workers in the pool. @param num_workers: the new number of workers """ self._lock.acquire() try: return self._ResizeUnlocked(num_workers) finally: self._lock.release() def TerminateWorkers(self): """Terminate all worker threads. Unstarted tasks will be ignored. """ logging.debug("Terminating all workers") self._lock.acquire() try: self._ResizeUnlocked(0) if self._tasks: logging.debug("There are %s tasks left", len(self._tasks)) finally: self._lock.release() logging.debug("All workers terminated") ganeti-2.9.3/lib/daemon.py0000644000000000000000000006720012271422343015362 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2008, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module with helper classes and functions for daemons""" import asyncore import asynchat import collections import os import signal import logging import sched import time import socket import select import sys from ganeti import utils from ganeti import constants from ganeti import errors from ganeti import netutils from ganeti import ssconf from ganeti import runtime from ganeti import compat class SchedulerBreakout(Exception): """Exception used to get out of the scheduler loop """ def AsyncoreDelayFunction(timeout): """Asyncore-compatible scheduler delay function. This is a delay function for sched that, rather than actually sleeping, executes asyncore events happening in the meantime. After an event has occurred, rather than returning, it raises a SchedulerBreakout exception, which will force the current scheduler.run() invocation to terminate, so that we can also check for signals. The main loop will then call the scheduler run again, which will allow it to actually process any due events. This is needed because scheduler.run() doesn't support a count=..., as asyncore loop, and the scheduler module documents throwing exceptions from inside the delay function as an allowed usage model. """ asyncore.loop(timeout=timeout, count=1, use_poll=True) raise SchedulerBreakout() class AsyncoreScheduler(sched.scheduler): """Event scheduler integrated with asyncore """ def __init__(self, timefunc): """Initializes this class. """ sched.scheduler.__init__(self, timefunc, self._LimitedDelay) self._max_delay = None def run(self, max_delay=None): # pylint: disable=W0221 """Run any pending events. @type max_delay: None or number @param max_delay: Maximum delay (useful if caller has timeouts running) """ assert self._max_delay is None # The delay function used by the scheduler can't be different on each run, # hence an instance variable must be used. if max_delay is None: self._max_delay = None else: self._max_delay = utils.RunningTimeout(max_delay, False) try: return sched.scheduler.run(self) finally: self._max_delay = None def _LimitedDelay(self, duration): """Custom delay function for C{sched.scheduler}. """ if self._max_delay is None: timeout = duration else: timeout = min(duration, self._max_delay.Remaining()) return AsyncoreDelayFunction(timeout) class GanetiBaseAsyncoreDispatcher(asyncore.dispatcher): """Base Ganeti Asyncore Dispacher """ # this method is overriding an asyncore.dispatcher method def handle_error(self): """Log an error in handling any request, and proceed. """ logging.exception("Error while handling asyncore request") # this method is overriding an asyncore.dispatcher method def writable(self): """Most of the time we don't want to check for writability. """ return False class AsyncStreamServer(GanetiBaseAsyncoreDispatcher): """A stream server to use with asyncore. Each request is accepted, and then dispatched to a separate asyncore dispatcher to handle. """ _REQUEST_QUEUE_SIZE = 5 def __init__(self, family, address): """Constructor for AsyncUnixStreamSocket @type family: integer @param family: socket family (one of socket.AF_*) @type address: address family dependent @param address: address to bind the socket to """ GanetiBaseAsyncoreDispatcher.__init__(self) self.family = family self.create_socket(self.family, socket.SOCK_STREAM) self.set_reuse_addr() self.bind(address) self.listen(self._REQUEST_QUEUE_SIZE) # this method is overriding an asyncore.dispatcher method def handle_accept(self): """Accept a new client connection. Creates a new instance of the handler class, which will use asyncore to serve the client. """ accept_result = utils.IgnoreSignals(self.accept) if accept_result is not None: connected_socket, client_address = accept_result if self.family == socket.AF_UNIX: # override the client address, as for unix sockets nothing meaningful # is passed in from accept anyway client_address = netutils.GetSocketCredentials(connected_socket) logging.info("Accepted connection from %s", netutils.FormatAddress(client_address, family=self.family)) self.handle_connection(connected_socket, client_address) def handle_connection(self, connected_socket, client_address): """Handle an already accepted connection. """ raise NotImplementedError class AsyncTerminatedMessageStream(asynchat.async_chat): """A terminator separated message stream asyncore module. Handles a stream connection receiving messages terminated by a defined separator. For each complete message handle_message is called. """ def __init__(self, connected_socket, peer_address, terminator, family, unhandled_limit): """AsyncTerminatedMessageStream constructor. @type connected_socket: socket.socket @param connected_socket: connected stream socket to receive messages from @param peer_address: family-specific peer address @type terminator: string @param terminator: terminator separating messages in the stream @type family: integer @param family: socket family @type unhandled_limit: integer or None @param unhandled_limit: maximum unanswered messages """ # python 2.4/2.5 uses conn=... while 2.6 has sock=... we have to cheat by # using a positional argument rather than a keyword one. asynchat.async_chat.__init__(self, connected_socket) self.connected_socket = connected_socket # on python 2.4 there is no "family" attribute for the socket class # FIXME: when we move to python 2.5 or above remove the family parameter #self.family = self.connected_socket.family self.family = family self.peer_address = peer_address self.terminator = terminator self.unhandled_limit = unhandled_limit self.set_terminator(terminator) self.ibuffer = [] self.receive_count = 0 self.send_count = 0 self.oqueue = collections.deque() self.iqueue = collections.deque() # this method is overriding an asynchat.async_chat method def collect_incoming_data(self, data): self.ibuffer.append(data) def _can_handle_message(self): return (self.unhandled_limit is None or (self.receive_count < self.send_count + self.unhandled_limit) and not self.iqueue) # this method is overriding an asynchat.async_chat method def found_terminator(self): message = "".join(self.ibuffer) self.ibuffer = [] message_id = self.receive_count # We need to increase the receive_count after checking if the message can # be handled, but before calling handle_message can_handle = self._can_handle_message() self.receive_count += 1 if can_handle: self.handle_message(message, message_id) else: self.iqueue.append((message, message_id)) def handle_message(self, message, message_id): """Handle a terminated message. @type message: string @param message: message to handle @type message_id: integer @param message_id: stream's message sequence number """ pass # TODO: move this method to raise NotImplementedError # raise NotImplementedError def send_message(self, message): """Send a message to the remote peer. This function is thread-safe. @type message: string @param message: message to send, without the terminator @warning: If calling this function from a thread different than the one performing the main asyncore loop, remember that you have to wake that one up. """ # If we just append the message we received to the output queue, this # function can be safely called by multiple threads at the same time, and # we don't need locking, since deques are thread safe. handle_write in the # asyncore thread will handle the next input message if there are any # enqueued. self.oqueue.append(message) # this method is overriding an asyncore.dispatcher method def readable(self): # read from the socket if we can handle the next requests return self._can_handle_message() and asynchat.async_chat.readable(self) # this method is overriding an asyncore.dispatcher method def writable(self): # the output queue may become full just after we called writable. This only # works if we know we'll have something else waking us up from the select, # in such case, anyway. return asynchat.async_chat.writable(self) or self.oqueue # this method is overriding an asyncore.dispatcher method def handle_write(self): if self.oqueue: # if we have data in the output queue, then send_message was called. # this means we can process one more message from the input queue, if # there are any. data = self.oqueue.popleft() self.push(data + self.terminator) self.send_count += 1 if self.iqueue: self.handle_message(*self.iqueue.popleft()) self.initiate_send() def close_log(self): logging.info("Closing connection from %s", netutils.FormatAddress(self.peer_address, family=self.family)) self.close() # this method is overriding an asyncore.dispatcher method def handle_expt(self): self.close_log() # this method is overriding an asyncore.dispatcher method def handle_error(self): """Log an error in handling any request, and proceed. """ logging.exception("Error while handling asyncore request") self.close_log() class AsyncUDPSocket(GanetiBaseAsyncoreDispatcher): """An improved asyncore udp socket. """ def __init__(self, family): """Constructor for AsyncUDPSocket """ GanetiBaseAsyncoreDispatcher.__init__(self) self._out_queue = [] self._family = family self.create_socket(family, socket.SOCK_DGRAM) # this method is overriding an asyncore.dispatcher method def handle_connect(self): # Python thinks that the first udp message from a source qualifies as a # "connect" and further ones are part of the same connection. We beg to # differ and treat all messages equally. pass # this method is overriding an asyncore.dispatcher method def handle_read(self): recv_result = utils.IgnoreSignals(self.recvfrom, constants.MAX_UDP_DATA_SIZE) if recv_result is not None: payload, address = recv_result if self._family == socket.AF_INET6: # we ignore 'flow info' and 'scope id' as we don't need them ip, port, _, _ = address else: ip, port = address self.handle_datagram(payload, ip, port) def handle_datagram(self, payload, ip, port): """Handle an already read udp datagram """ raise NotImplementedError # this method is overriding an asyncore.dispatcher method def writable(self): # We should check whether we can write to the socket only if we have # something scheduled to be written return bool(self._out_queue) # this method is overriding an asyncore.dispatcher method def handle_write(self): if not self._out_queue: logging.error("handle_write called with empty output queue") return (ip, port, payload) = self._out_queue[0] utils.IgnoreSignals(self.sendto, payload, 0, (ip, port)) self._out_queue.pop(0) def enqueue_send(self, ip, port, payload): """Enqueue a datagram to be sent when possible """ if len(payload) > constants.MAX_UDP_DATA_SIZE: raise errors.UdpDataSizeError("Packet too big: %s > %s" % (len(payload), constants.MAX_UDP_DATA_SIZE)) self._out_queue.append((ip, port, payload)) def process_next_packet(self, timeout=0): """Process the next datagram, waiting for it if necessary. @type timeout: float @param timeout: how long to wait for data @rtype: boolean @return: True if some data has been handled, False otherwise """ result = utils.WaitForFdCondition(self, select.POLLIN, timeout) if result is not None and result & select.POLLIN: self.handle_read() return True else: return False class AsyncAwaker(GanetiBaseAsyncoreDispatcher): """A way to notify the asyncore loop that something is going on. If an asyncore daemon is multithreaded when a thread tries to push some data to a socket, the main loop handling asynchronous requests might be sleeping waiting on a select(). To avoid this it can create an instance of the AsyncAwaker, which other threads can use to wake it up. """ def __init__(self, signal_fn=None): """Constructor for AsyncAwaker @type signal_fn: function @param signal_fn: function to call when awaken """ GanetiBaseAsyncoreDispatcher.__init__(self) assert signal_fn is None or callable(signal_fn) (self.in_socket, self.out_socket) = socket.socketpair(socket.AF_UNIX, socket.SOCK_STREAM) self.in_socket.setblocking(0) self.in_socket.shutdown(socket.SHUT_WR) self.out_socket.shutdown(socket.SHUT_RD) self.set_socket(self.in_socket) self.need_signal = True self.signal_fn = signal_fn self.connected = True # this method is overriding an asyncore.dispatcher method def handle_read(self): utils.IgnoreSignals(self.recv, 4096) if self.signal_fn: self.signal_fn() self.need_signal = True # this method is overriding an asyncore.dispatcher method def close(self): asyncore.dispatcher.close(self) self.out_socket.close() def signal(self): """Signal the asyncore main loop. Any data we send here will be ignored, but it will cause the select() call to return. """ # Yes, there is a race condition here. No, we don't care, at worst we're # sending more than one wakeup token, which doesn't harm at all. if self.need_signal: self.need_signal = False self.out_socket.send(chr(0)) class _ShutdownCheck: """Logic for L{Mainloop} shutdown. """ def __init__(self, fn): """Initializes this class. @type fn: callable @param fn: Function returning C{None} if mainloop can be stopped or a duration in seconds after which the function should be called again @see: L{Mainloop.Run} """ assert callable(fn) self._fn = fn self._defer = None def CanShutdown(self): """Checks whether mainloop can be stopped. @rtype: bool """ if self._defer and self._defer.Remaining() > 0: # A deferred check has already been scheduled return False # Ask mainloop driver whether we can stop or should check again timeout = self._fn() if timeout is None: # Yes, can stop mainloop return True # Schedule another check in the future self._defer = utils.RunningTimeout(timeout, True) return False class Mainloop(object): """Generic mainloop for daemons @ivar scheduler: A sched.scheduler object, which can be used to register timed events """ _SHUTDOWN_TIMEOUT_PRIORITY = -(sys.maxint - 1) def __init__(self): """Constructs a new Mainloop instance. """ self._signal_wait = [] self.scheduler = AsyncoreScheduler(time.time) # Resolve uid/gids used runtime.GetEnts() @utils.SignalHandled([signal.SIGCHLD]) @utils.SignalHandled([signal.SIGTERM]) @utils.SignalHandled([signal.SIGINT]) def Run(self, shutdown_wait_fn=None, signal_handlers=None): """Runs the mainloop. @type shutdown_wait_fn: callable @param shutdown_wait_fn: Function to check whether loop can be terminated; B{important}: function must be idempotent and must return either None for shutting down or a timeout for another call @type signal_handlers: dict @param signal_handlers: signal->L{utils.SignalHandler} passed by decorator """ assert isinstance(signal_handlers, dict) and \ len(signal_handlers) > 0, \ "Broken SignalHandled decorator" # Counter for received signals shutdown_signals = 0 # Logic to wait for shutdown shutdown_waiter = None # Start actual main loop while True: if shutdown_signals == 1 and shutdown_wait_fn is not None: if shutdown_waiter is None: shutdown_waiter = _ShutdownCheck(shutdown_wait_fn) # Let mainloop driver decide if we can already abort if shutdown_waiter.CanShutdown(): break # Re-evaluate in a second timeout = 1.0 elif shutdown_signals >= 1: # Abort loop if more than one signal has been sent or no callback has # been given break else: # Wait forever on I/O events timeout = None if self.scheduler.empty(): asyncore.loop(count=1, timeout=timeout, use_poll=True) else: try: self.scheduler.run(max_delay=timeout) except SchedulerBreakout: pass # Check whether a signal was raised for (sig, handler) in signal_handlers.items(): if handler.called: self._CallSignalWaiters(sig) if sig in (signal.SIGTERM, signal.SIGINT): logging.info("Received signal %s asking for shutdown", sig) shutdown_signals += 1 handler.Clear() def _CallSignalWaiters(self, signum): """Calls all signal waiters for a certain signal. @type signum: int @param signum: Signal number """ for owner in self._signal_wait: owner.OnSignal(signum) def RegisterSignal(self, owner): """Registers a receiver for signal notifications The receiver must support a "OnSignal(self, signum)" function. @type owner: instance @param owner: Receiver """ self._signal_wait.append(owner) def _VerifyDaemonUser(daemon_name): """Verifies the process uid matches the configured uid. This method verifies that a daemon is started as the user it is intended to be run @param daemon_name: The name of daemon to be started @return: A tuple with the first item indicating success or not, the second item current uid and third with expected uid """ getents = runtime.GetEnts() running_uid = os.getuid() daemon_uids = { constants.MASTERD: getents.masterd_uid, constants.RAPI: getents.rapi_uid, constants.NODED: getents.noded_uid, constants.CONFD: getents.confd_uid, } assert daemon_name in daemon_uids, "Invalid daemon %s" % daemon_name return (daemon_uids[daemon_name] == running_uid, running_uid, daemon_uids[daemon_name]) def _BeautifyError(err): """Try to format an error better. Since we're dealing with daemon startup errors, in many cases this will be due to socket error and such, so we try to format these cases better. @param err: an exception object @rtype: string @return: the formatted error description """ try: if isinstance(err, socket.error): return "Socket-related error: %s (errno=%s)" % (err.args[1], err.args[0]) elif isinstance(err, EnvironmentError): if err.filename is None: return "%s (errno=%s)" % (err.strerror, err.errno) else: return "%s (file %s) (errno=%s)" % (err.strerror, err.filename, err.errno) else: return str(err) except Exception: # pylint: disable=W0703 logging.exception("Error while handling existing error %s", err) return "%s" % str(err) def _HandleSigHup(reopen_fn, signum, frame): # pylint: disable=W0613 """Handler for SIGHUP. @param reopen_fn: List of callback functions for reopening log files """ logging.info("Reopening log files after receiving SIGHUP") for fn in reopen_fn: if fn: fn() def GenericMain(daemon_name, optionparser, check_fn, prepare_fn, exec_fn, multithreaded=False, console_logging=False, default_ssl_cert=None, default_ssl_key=None): """Shared main function for daemons. @type daemon_name: string @param daemon_name: daemon name @type optionparser: optparse.OptionParser @param optionparser: initialized optionparser with daemon-specific options (common -f -d options will be handled by this module) @type check_fn: function which accepts (options, args) @param check_fn: function that checks start conditions and exits if they're not met @type prepare_fn: function which accepts (options, args) @param prepare_fn: function that is run before forking, or None; it's result will be passed as the third parameter to exec_fn, or if None was passed in, we will just pass None to exec_fn @type exec_fn: function which accepts (options, args, prepare_results) @param exec_fn: function that's executed with the daemon's pid file held, and runs the daemon itself. @type multithreaded: bool @param multithreaded: Whether the daemon uses threads @type console_logging: boolean @param console_logging: if True, the daemon will fall back to the system console if logging fails @type default_ssl_cert: string @param default_ssl_cert: Default SSL certificate path @type default_ssl_key: string @param default_ssl_key: Default SSL key path """ optionparser.add_option("-f", "--foreground", dest="fork", help="Don't detach from the current terminal", default=True, action="store_false") optionparser.add_option("-d", "--debug", dest="debug", help="Enable some debug messages", default=False, action="store_true") optionparser.add_option("--syslog", dest="syslog", help="Enable logging to syslog (except debug" " messages); one of 'no', 'yes' or 'only' [%s]" % constants.SYSLOG_USAGE, default=constants.SYSLOG_USAGE, choices=["no", "yes", "only"]) family = ssconf.SimpleStore().GetPrimaryIPFamily() # family will default to AF_INET if there is no ssconf file (e.g. when # upgrading a cluster from 2.2 -> 2.3. This is intended, as Ganeti clusters # <= 2.2 can not be AF_INET6 if daemon_name in constants.DAEMONS_PORTS: default_bind_address = constants.IP4_ADDRESS_ANY if family == netutils.IP6Address.family: default_bind_address = constants.IP6_ADDRESS_ANY default_port = netutils.GetDaemonPort(daemon_name) # For networked daemons we allow choosing the port and bind address optionparser.add_option("-p", "--port", dest="port", help="Network port (default: %s)" % default_port, default=default_port, type="int") optionparser.add_option("-b", "--bind", dest="bind_address", help=("Bind address (default: '%s')" % default_bind_address), default=default_bind_address, metavar="ADDRESS") optionparser.add_option("-i", "--interface", dest="bind_interface", help=("Bind interface"), metavar="INTERFACE") if default_ssl_key is not None and default_ssl_cert is not None: optionparser.add_option("--no-ssl", dest="ssl", help="Do not secure HTTP protocol with SSL", default=True, action="store_false") optionparser.add_option("-K", "--ssl-key", dest="ssl_key", help=("SSL key path (default: %s)" % default_ssl_key), default=default_ssl_key, type="string", metavar="SSL_KEY_PATH") optionparser.add_option("-C", "--ssl-cert", dest="ssl_cert", help=("SSL certificate path (default: %s)" % default_ssl_cert), default=default_ssl_cert, type="string", metavar="SSL_CERT_PATH") # Disable the use of fork(2) if the daemon uses threads if multithreaded: utils.DisableFork() options, args = optionparser.parse_args() if getattr(options, "bind_interface", None) is not None: if options.bind_address != default_bind_address: msg = ("Can't specify both, bind address (%s) and bind interface (%s)" % (options.bind_address, options.bind_interface)) print >> sys.stderr, msg sys.exit(constants.EXIT_FAILURE) interface_ip_addresses = \ netutils.GetInterfaceIpAddresses(options.bind_interface) if family == netutils.IP6Address.family: if_addresses = interface_ip_addresses[constants.IP6_VERSION] else: if_addresses = interface_ip_addresses[constants.IP4_VERSION] if len(if_addresses) < 1: msg = "Failed to find IP for interface %s" % options.bind_interace print >> sys.stderr, msg sys.exit(constants.EXIT_FAILURE) options.bind_address = if_addresses[0] if getattr(options, "ssl", False): ssl_paths = { "certificate": options.ssl_cert, "key": options.ssl_key, } for name, path in ssl_paths.iteritems(): if not os.path.isfile(path): print >> sys.stderr, "SSL %s file '%s' was not found" % (name, path) sys.exit(constants.EXIT_FAILURE) # TODO: By initiating http.HttpSslParams here we would only read the files # once and have a proper validation (isfile returns False on directories) # at the same time. result, running_uid, expected_uid = _VerifyDaemonUser(daemon_name) if not result: msg = ("%s started using wrong user ID (%d), expected %d" % (daemon_name, running_uid, expected_uid)) print >> sys.stderr, msg sys.exit(constants.EXIT_FAILURE) if check_fn is not None: check_fn(options, args) log_filename = constants.DAEMONS_LOGFILES[daemon_name] if options.fork: utils.CloseFDs() (wpipe, stdio_reopen_fn) = utils.Daemonize(logfile=log_filename) else: (wpipe, stdio_reopen_fn) = (None, None) log_reopen_fn = \ utils.SetupLogging(log_filename, daemon_name, debug=options.debug, stderr_logging=not options.fork, multithreaded=multithreaded, syslog=options.syslog, console_logging=console_logging) # Reopen log file(s) on SIGHUP signal.signal(signal.SIGHUP, compat.partial(_HandleSigHup, [log_reopen_fn, stdio_reopen_fn])) try: utils.WritePidFile(utils.DaemonPidFileName(daemon_name)) except errors.PidFileLockError, err: print >> sys.stderr, "Error while locking PID file:\n%s" % err sys.exit(constants.EXIT_FAILURE) try: try: logging.info("%s daemon startup", daemon_name) if callable(prepare_fn): prep_results = prepare_fn(options, args) else: prep_results = None except Exception, err: utils.WriteErrorToFD(wpipe, _BeautifyError(err)) raise if wpipe is not None: # we're done with the preparation phase, we close the pipe to # let the parent know it's safe to exit os.close(wpipe) exec_fn(options, args, prep_results) finally: utils.RemoveFile(utils.DaemonPidFileName(daemon_name)) ganeti-2.9.3/lib/qlang.py0000644000000000000000000002313012271422343015213 0ustar00rootroot00000000000000# # # Copyright (C) 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module for a simple query language A query filter is always a list. The first item in the list is the operator (e.g. C{[OP_AND, ...]}), while the other items depend on the operator. For logic operators (e.g. L{OP_AND}, L{OP_OR}), they are subfilters whose results are combined. Unary operators take exactly one other item (e.g. a subfilter for L{OP_NOT} and a field name for L{OP_TRUE}). Binary operators take exactly two operands, usually a field name and a value to compare against. Filters are converted to callable functions by L{query._CompileFilter}. """ import re import string # pylint: disable=W0402 import logging import pyparsing as pyp from ganeti import errors from ganeti import utils from ganeti import compat # Logic operators with one or more operands, each of which is a filter on its # own OP_OR = "|" OP_AND = "&" # Unary operators with exactly one operand OP_NOT = "!" OP_TRUE = "?" # Binary operators with exactly two operands, the field name and an # operator-specific value OP_EQUAL = "=" OP_NOT_EQUAL = "!=" OP_LT = "<" OP_LE = "<=" OP_GT = ">" OP_GE = ">=" OP_REGEXP = "=~" OP_CONTAINS = "=[]" #: Characters used for detecting user-written filters (see L{_CheckFilter}) FILTER_DETECTION_CHARS = frozenset("()=/!~'\"\\<>" + string.whitespace) #: Characters used to detect globbing filters (see L{_CheckGlobbing}) GLOB_DETECTION_CHARS = frozenset("*?") def MakeSimpleFilter(namefield, values): """Builds simple a filter. @param namefield: Name of field containing item name @param values: List of names """ if values: return [OP_OR] + [[OP_EQUAL, namefield, i] for i in values] return None def _ConvertLogicOp(op): """Creates parsing action function for logic operator. @type op: string @param op: Operator for data structure, e.g. L{OP_AND} """ def fn(toks): """Converts parser tokens to query operator structure. @rtype: list @return: Query operator structure, e.g. C{[OP_AND, ["=", "foo", "bar"]]} """ operands = toks[0] if len(operands) == 1: return operands[0] # Build query operator structure return [[op] + operands.asList()] return fn _KNOWN_REGEXP_DELIM = "/#^|" _KNOWN_REGEXP_FLAGS = frozenset("si") def _ConvertRegexpValue(_, loc, toks): """Regular expression value for condition. """ (regexp, flags) = toks[0] # Ensure only whitelisted flags are used unknown_flags = (frozenset(flags) - _KNOWN_REGEXP_FLAGS) if unknown_flags: raise pyp.ParseFatalException("Unknown regular expression flags: '%s'" % "".join(unknown_flags), loc) if flags: re_flags = "(?%s)" % "".join(sorted(flags)) else: re_flags = "" re_cond = re_flags + regexp # Test if valid try: re.compile(re_cond) except re.error, err: raise pyp.ParseFatalException("Invalid regular expression (%s)" % err, loc) return [re_cond] def BuildFilterParser(): """Builds a parser for query filter strings. @rtype: pyparsing.ParserElement """ field_name = pyp.Word(pyp.alphas, pyp.alphanums + "_/.") # Integer num_sign = pyp.Word("-+", exact=1) number = pyp.Combine(pyp.Optional(num_sign) + pyp.Word(pyp.nums)) number.setParseAction(lambda toks: int(toks[0])) quoted_string = pyp.quotedString.copy().setParseAction(pyp.removeQuotes) # Right-hand-side value rval = (number | quoted_string) # Boolean condition bool_cond = field_name.copy() bool_cond.setParseAction(lambda (fname, ): [[OP_TRUE, fname]]) # Simple binary conditions binopstbl = { "==": OP_EQUAL, "!=": OP_NOT_EQUAL, "<": OP_LT, "<=": OP_LE, ">": OP_GT, ">=": OP_GE, } binary_cond = (field_name + pyp.oneOf(binopstbl.keys()) + rval) binary_cond.setParseAction(lambda (lhs, op, rhs): [[binopstbl[op], lhs, rhs]]) # "in" condition in_cond = (rval + pyp.Suppress("in") + field_name) in_cond.setParseAction(lambda (value, field): [[OP_CONTAINS, field, value]]) # "not in" condition not_in_cond = (rval + pyp.Suppress("not") + pyp.Suppress("in") + field_name) not_in_cond.setParseAction(lambda (value, field): [[OP_NOT, [OP_CONTAINS, field, value]]]) # Regular expression, e.g. m/foobar/i regexp_val = pyp.Group(pyp.Optional("m").suppress() + pyp.MatchFirst([pyp.QuotedString(i, escChar="\\") for i in _KNOWN_REGEXP_DELIM]) + pyp.Optional(pyp.Word(pyp.alphas), default="")) regexp_val.setParseAction(_ConvertRegexpValue) regexp_cond = (field_name + pyp.Suppress("=~") + regexp_val) regexp_cond.setParseAction(lambda (field, value): [[OP_REGEXP, field, value]]) not_regexp_cond = (field_name + pyp.Suppress("!~") + regexp_val) not_regexp_cond.setParseAction(lambda (field, value): [[OP_NOT, [OP_REGEXP, field, value]]]) # Globbing, e.g. name =* "*.site" glob_cond = (field_name + pyp.Suppress("=*") + quoted_string) glob_cond.setParseAction(lambda (field, value): [[OP_REGEXP, field, utils.DnsNameGlobPattern(value)]]) not_glob_cond = (field_name + pyp.Suppress("!*") + quoted_string) not_glob_cond.setParseAction(lambda (field, value): [[OP_NOT, [OP_REGEXP, field, utils.DnsNameGlobPattern(value)]]]) # All possible conditions condition = (binary_cond ^ bool_cond ^ in_cond ^ not_in_cond ^ regexp_cond ^ not_regexp_cond ^ glob_cond ^ not_glob_cond) # Associativity operators filter_expr = pyp.operatorPrecedence(condition, [ (pyp.Keyword("not").suppress(), 1, pyp.opAssoc.RIGHT, lambda toks: [[OP_NOT, toks[0][0]]]), (pyp.Keyword("and").suppress(), 2, pyp.opAssoc.LEFT, _ConvertLogicOp(OP_AND)), (pyp.Keyword("or").suppress(), 2, pyp.opAssoc.LEFT, _ConvertLogicOp(OP_OR)), ]) parser = pyp.StringStart() + filter_expr + pyp.StringEnd() parser.parseWithTabs() # Originally C{parser.validate} was called here, but there seems to be some # issue causing it to fail whenever the "not" operator is included above. return parser def ParseFilter(text, parser=None): """Parses a query filter. @type text: string @param text: Query filter @type parser: pyparsing.ParserElement @param parser: Pyparsing object @rtype: list """ logging.debug("Parsing as query filter: %s", text) if parser is None: parser = BuildFilterParser() try: return parser.parseString(text)[0] except pyp.ParseBaseException, err: raise errors.QueryFilterParseError("Failed to parse query filter" " '%s': %s" % (text, err), err) def _CheckFilter(text): """CHecks if a string could be a filter. @rtype: bool """ return bool(frozenset(text) & FILTER_DETECTION_CHARS) def _CheckGlobbing(text): """Checks if a string could be a globbing pattern. @rtype: bool """ return bool(frozenset(text) & GLOB_DETECTION_CHARS) def _MakeFilterPart(namefield, text, isnumeric=False): """Generates filter for one argument. """ if isnumeric: try: number = int(text) except (TypeError, ValueError), err: raise errors.OpPrereqError("Invalid job ID passed: %s" % str(err), errors.ECODE_INVAL) return [OP_EQUAL, namefield, number] elif _CheckGlobbing(text): return [OP_REGEXP, namefield, utils.DnsNameGlobPattern(text)] else: return [OP_EQUAL, namefield, text] def MakeFilter(args, force_filter, namefield=None, isnumeric=False): """Try to make a filter from arguments to a command. If the name could be a filter it is parsed as such. If it's just a globbing pattern, e.g. "*.site", such a filter is constructed. As a last resort the names are treated just as a plain name filter. @type args: list of string @param args: Arguments to command @type force_filter: bool @param force_filter: Whether to force treatment as a full-fledged filter @type namefield: string @param namefield: Name of field to use for simple filters (use L{None} for a default of "name") @type isnumeric: bool @param isnumeric: Whether the namefield type is numeric, as opposed to the default string type; this influences how the filter is built @rtype: list @return: Query filter """ if namefield is None: namefield = "name" if (force_filter or (args and len(args) == 1 and _CheckFilter(args[0]))): try: (filter_text, ) = args except (TypeError, ValueError): raise errors.OpPrereqError("Exactly one argument must be given as a" " filter", errors.ECODE_INVAL) result = ParseFilter(filter_text) elif args: result = [OP_OR] + map(compat.partial(_MakeFilterPart, namefield, isnumeric=isnumeric), args) else: result = None return result ganeti-2.9.3/lib/mcpu.py0000644000000000000000000004365012271422343015066 0ustar00rootroot00000000000000# # # Copyright (C) 2006, 2007, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Module implementing the logic behind the cluster operations This module implements the logic for doing operations in the cluster. There are two kinds of classes defined: - logical units, which know how to deal with their specific opcode only - the processor, which dispatches the opcodes to their logical units """ import sys import logging import random import time import itertools import traceback from ganeti import opcodes from ganeti import constants from ganeti import errors from ganeti import hooksmaster from ganeti import cmdlib from ganeti import locking from ganeti import utils from ganeti import compat _OP_PREFIX = "Op" _LU_PREFIX = "LU" #: LU classes which don't need to acquire the node allocation lock #: (L{locking.NAL}) when they acquire all node or node resource locks _NODE_ALLOC_WHITELIST = frozenset([]) #: LU classes which don't need to acquire the node allocation lock #: (L{locking.NAL}) in the same mode (shared/exclusive) as the node #: or node resource locks _NODE_ALLOC_MODE_WHITELIST = compat.UniqueFrozenset([ cmdlib.LUBackupExport, cmdlib.LUBackupRemove, cmdlib.LUOobCommand, ]) class LockAcquireTimeout(Exception): """Exception to report timeouts on acquiring locks. """ def _CalculateLockAttemptTimeouts(): """Calculate timeouts for lock attempts. """ result = [constants.LOCK_ATTEMPTS_MINWAIT] running_sum = result[0] # Wait for a total of at least LOCK_ATTEMPTS_TIMEOUT before doing a # blocking acquire while running_sum < constants.LOCK_ATTEMPTS_TIMEOUT: timeout = (result[-1] * 1.05) ** 1.25 # Cap max timeout. This gives other jobs a chance to run even if # we're still trying to get our locks, before finally moving to a # blocking acquire. timeout = min(timeout, constants.LOCK_ATTEMPTS_MAXWAIT) # And also cap the lower boundary for safety timeout = max(timeout, constants.LOCK_ATTEMPTS_MINWAIT) result.append(timeout) running_sum += timeout return result class LockAttemptTimeoutStrategy(object): """Class with lock acquire timeout strategy. """ __slots__ = [ "_timeouts", "_random_fn", "_time_fn", ] _TIMEOUT_PER_ATTEMPT = _CalculateLockAttemptTimeouts() def __init__(self, _time_fn=time.time, _random_fn=random.random): """Initializes this class. @param _time_fn: Time function for unittests @param _random_fn: Random number generator for unittests """ object.__init__(self) self._timeouts = iter(self._TIMEOUT_PER_ATTEMPT) self._time_fn = _time_fn self._random_fn = _random_fn def NextAttempt(self): """Returns the timeout for the next attempt. """ try: timeout = self._timeouts.next() except StopIteration: # No more timeouts, do blocking acquire timeout = None if timeout is not None: # Add a small variation (-/+ 5%) to timeout. This helps in situations # where two or more jobs are fighting for the same lock(s). variation_range = timeout * 0.1 timeout += ((self._random_fn() * variation_range) - (variation_range * 0.5)) return timeout class OpExecCbBase: # pylint: disable=W0232 """Base class for OpCode execution callbacks. """ def NotifyStart(self): """Called when we are about to execute the LU. This function is called when we're about to start the lu's Exec() method, that is, after we have acquired all locks. """ def Feedback(self, *args): """Sends feedback from the LU code to the end-user. """ def CurrentPriority(self): # pylint: disable=R0201 """Returns current priority or C{None}. """ return None def SubmitManyJobs(self, jobs): """Submits jobs for processing. See L{jqueue.JobQueue.SubmitManyJobs}. """ raise NotImplementedError def _LUNameForOpName(opname): """Computes the LU name for a given OpCode name. """ assert opname.startswith(_OP_PREFIX), \ "Invalid OpCode name, doesn't start with %s: %s" % (_OP_PREFIX, opname) return _LU_PREFIX + opname[len(_OP_PREFIX):] def _ComputeDispatchTable(): """Computes the opcode-to-lu dispatch table. """ return dict((op, getattr(cmdlib, _LUNameForOpName(op.__name__))) for op in opcodes.OP_MAPPING.values() if op.WITH_LU) def _SetBaseOpParams(src, defcomment, dst): """Copies basic opcode parameters. @type src: L{opcodes.OpCode} @param src: Source opcode @type defcomment: string @param defcomment: Comment to specify if not already given @type dst: L{opcodes.OpCode} @param dst: Destination opcode """ if hasattr(src, "debug_level"): dst.debug_level = src.debug_level if (getattr(dst, "priority", None) is None and hasattr(src, "priority")): dst.priority = src.priority if not getattr(dst, opcodes.COMMENT_ATTR, None): dst.comment = defcomment def _ProcessResult(submit_fn, op, result): """Examines opcode result. If necessary, additional processing on the result is done. """ if isinstance(result, cmdlib.ResultWithJobs): # Copy basic parameters (e.g. priority) map(compat.partial(_SetBaseOpParams, op, "Submitted by %s" % op.OP_ID), itertools.chain(*result.jobs)) # Submit jobs job_submission = submit_fn(result.jobs) # Build dictionary result = result.other assert constants.JOB_IDS_KEY not in result, \ "Key '%s' found in additional return values" % constants.JOB_IDS_KEY result[constants.JOB_IDS_KEY] = job_submission return result def _FailingSubmitManyJobs(_): """Implementation of L{OpExecCbBase.SubmitManyJobs} to raise an exception. """ raise errors.ProgrammerError("Opcodes processed without callbacks (e.g." " queries) can not submit jobs") def _VerifyLocks(lu, glm, _mode_whitelist=_NODE_ALLOC_MODE_WHITELIST, _nal_whitelist=_NODE_ALLOC_WHITELIST): """Performs consistency checks on locks acquired by a logical unit. @type lu: L{cmdlib.LogicalUnit} @param lu: Logical unit instance @type glm: L{locking.GanetiLockManager} @param glm: Lock manager """ if not __debug__: return have_nal = glm.check_owned(locking.LEVEL_NODE_ALLOC, locking.NAL) for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]: # TODO: Verify using actual lock mode, not using LU variables if level in lu.needed_locks: share_node_alloc = lu.share_locks[locking.LEVEL_NODE_ALLOC] share_level = lu.share_locks[level] if lu.__class__ in _mode_whitelist: assert share_node_alloc != share_level, \ "LU is whitelisted to use different modes for node allocation lock" else: assert bool(share_node_alloc) == bool(share_level), \ ("Node allocation lock must be acquired using the same mode as nodes" " and node resources") if lu.__class__ in _nal_whitelist: assert not have_nal, \ "LU is whitelisted for not acquiring the node allocation lock" elif lu.needed_locks[level] == locking.ALL_SET or glm.owning_all(level): assert have_nal, \ ("Node allocation lock must be used if an LU acquires all nodes" " or node resources") class Processor(object): """Object which runs OpCodes""" DISPATCH_TABLE = _ComputeDispatchTable() def __init__(self, context, ec_id, enable_locks=True): """Constructor for Processor @type context: GanetiContext @param context: global Ganeti context @type ec_id: string @param ec_id: execution context identifier """ self.context = context self._ec_id = ec_id self._cbs = None self.rpc = context.rpc self.hmclass = hooksmaster.HooksMaster self._enable_locks = enable_locks def _CheckLocksEnabled(self): """Checks if locking is enabled. @raise errors.ProgrammerError: In case locking is not enabled """ if not self._enable_locks: raise errors.ProgrammerError("Attempted to use disabled locks") def _AcquireLocks(self, level, names, shared, opportunistic, timeout): """Acquires locks via the Ganeti lock manager. @type level: int @param level: Lock level @type names: list or string @param names: Lock names @type shared: bool @param shared: Whether the locks should be acquired in shared mode @type opportunistic: bool @param opportunistic: Whether to acquire opportunistically @type timeout: None or float @param timeout: Timeout for acquiring the locks @raise LockAcquireTimeout: In case locks couldn't be acquired in specified amount of time """ self._CheckLocksEnabled() if self._cbs: priority = self._cbs.CurrentPriority() else: priority = None acquired = self.context.glm.acquire(level, names, shared=shared, timeout=timeout, priority=priority, opportunistic=opportunistic) if acquired is None: raise LockAcquireTimeout() return acquired def _ExecLU(self, lu): """Logical Unit execution sequence. """ write_count = self.context.cfg.write_count lu.CheckPrereq() hm = self.BuildHooksManager(lu) h_results = hm.RunPhase(constants.HOOKS_PHASE_PRE) lu.HooksCallBack(constants.HOOKS_PHASE_PRE, h_results, self.Log, None) if getattr(lu.op, "dry_run", False): # in this mode, no post-hooks are run, and the config is not # written (as it might have been modified by another LU, and we # shouldn't do writeout on behalf of other threads self.LogInfo("dry-run mode requested, not actually executing" " the operation") return lu.dry_run_result if self._cbs: submit_mj_fn = self._cbs.SubmitManyJobs else: submit_mj_fn = _FailingSubmitManyJobs try: result = _ProcessResult(submit_mj_fn, lu.op, lu.Exec(self.Log)) h_results = hm.RunPhase(constants.HOOKS_PHASE_POST) result = lu.HooksCallBack(constants.HOOKS_PHASE_POST, h_results, self.Log, result) finally: # FIXME: This needs locks if not lu_class.REQ_BGL if write_count != self.context.cfg.write_count: hm.RunConfigUpdate() return result def BuildHooksManager(self, lu): return self.hmclass.BuildFromLu(lu.rpc.call_hooks_runner, lu) def _LockAndExecLU(self, lu, level, calc_timeout): """Execute a Logical Unit, with the needed locks. This is a recursive function that starts locking the given level, and proceeds up, till there are no more locks to acquire. Then it executes the given LU and its opcodes. """ glm = self.context.glm adding_locks = level in lu.add_locks acquiring_locks = level in lu.needed_locks if level not in locking.LEVELS: _VerifyLocks(lu, glm) if self._cbs: self._cbs.NotifyStart() try: result = self._ExecLU(lu) except AssertionError, err: # this is a bit ugly, as we don't know from which phase # (prereq, exec) this comes; but it's better than an exception # with no information (_, _, tb) = sys.exc_info() err_info = traceback.format_tb(tb) del tb logging.exception("Detected AssertionError") raise errors.OpExecError("Internal assertion error: please report" " this as a bug.\nError message: '%s';" " location:\n%s" % (str(err), err_info[-1])) elif adding_locks and acquiring_locks: # We could both acquire and add locks at the same level, but for now we # don't need this, so we'll avoid the complicated code needed. raise NotImplementedError("Can't declare locks to acquire when adding" " others") elif adding_locks or acquiring_locks: self._CheckLocksEnabled() lu.DeclareLocks(level) share = lu.share_locks[level] opportunistic = lu.opportunistic_locks[level] try: assert adding_locks ^ acquiring_locks, \ "Locks must be either added or acquired" if acquiring_locks: # Acquiring locks needed_locks = lu.needed_locks[level] self._AcquireLocks(level, needed_locks, share, opportunistic, calc_timeout()) else: # Adding locks add_locks = lu.add_locks[level] lu.remove_locks[level] = add_locks try: glm.add(level, add_locks, acquired=1, shared=share) except errors.LockError: logging.exception("Detected lock error in level %s for locks" " %s, shared=%s", level, add_locks, share) raise errors.OpPrereqError( "Couldn't add locks (%s), most likely because of another" " job who added them first" % add_locks, errors.ECODE_NOTUNIQUE) try: result = self._LockAndExecLU(lu, level + 1, calc_timeout) finally: if level in lu.remove_locks: glm.remove(level, lu.remove_locks[level]) finally: if glm.is_owned(level): glm.release(level) else: result = self._LockAndExecLU(lu, level + 1, calc_timeout) return result def ExecOpCode(self, op, cbs, timeout=None): """Execute an opcode. @type op: an OpCode instance @param op: the opcode to be executed @type cbs: L{OpExecCbBase} @param cbs: Runtime callbacks @type timeout: float or None @param timeout: Maximum time to acquire all locks, None for no timeout @raise LockAcquireTimeout: In case locks couldn't be acquired in specified amount of time """ if not isinstance(op, opcodes.OpCode): raise errors.ProgrammerError("Non-opcode instance passed" " to ExecOpcode (%s)" % type(op)) lu_class = self.DISPATCH_TABLE.get(op.__class__, None) if lu_class is None: raise errors.OpCodeUnknown("Unknown opcode") if timeout is None: calc_timeout = lambda: None else: calc_timeout = utils.RunningTimeout(timeout, False).Remaining self._cbs = cbs try: if self._enable_locks: # Acquire the Big Ganeti Lock exclusively if this LU requires it, # and in a shared fashion otherwise (to prevent concurrent run with # an exclusive LU. self._AcquireLocks(locking.LEVEL_CLUSTER, locking.BGL, not lu_class.REQ_BGL, False, calc_timeout()) elif lu_class.REQ_BGL: raise errors.ProgrammerError("Opcode '%s' requires BGL, but locks are" " disabled" % op.OP_ID) try: lu = lu_class(self, op, self.context, self.rpc) lu.ExpandNames() assert lu.needed_locks is not None, "needed_locks not set by LU" try: result = self._LockAndExecLU(lu, locking.LEVEL_CLUSTER + 1, calc_timeout) finally: if self._ec_id: self.context.cfg.DropECReservations(self._ec_id) finally: # Release BGL if owned if self.context.glm.is_owned(locking.LEVEL_CLUSTER): assert self._enable_locks self.context.glm.release(locking.LEVEL_CLUSTER) finally: self._cbs = None resultcheck_fn = op.OP_RESULT if not (resultcheck_fn is None or resultcheck_fn(result)): logging.error("Expected opcode result matching %s, got %s", resultcheck_fn, result) if not getattr(op, "dry_run", False): # FIXME: LUs should still behave in dry_run mode, or # alternately we should have OP_DRYRUN_RESULT; in the # meantime, we simply skip the OP_RESULT check in dry-run mode raise errors.OpResultError("Opcode result does not match %s: %s" % (resultcheck_fn, utils.Truncate(result, 80))) return result def Log(self, *args): """Forward call to feedback callback function. """ if self._cbs: self._cbs.Feedback(*args) def LogStep(self, current, total, message): """Log a change in LU execution progress. """ logging.debug("Step %d/%d %s", current, total, message) self.Log("STEP %d/%d %s" % (current, total, message)) def LogWarning(self, message, *args, **kwargs): """Log a warning to the logs and the user. The optional keyword argument is 'hint' and can be used to show a hint to the user (presumably related to the warning). If the message is empty, it will not be printed at all, allowing one to show only a hint. """ assert not kwargs or (len(kwargs) == 1 and "hint" in kwargs), \ "Invalid keyword arguments for LogWarning (%s)" % str(kwargs) if args: message = message % tuple(args) if message: logging.warning(message) self.Log(" - WARNING: %s" % message) if "hint" in kwargs: self.Log(" Hint: %s" % kwargs["hint"]) def LogInfo(self, message, *args): """Log an informational message to the logs and the user. """ if args: message = message % tuple(args) logging.info(message) self.Log(" - INFO: %s" % message) def GetECId(self): """Returns the current execution context ID. """ if not self._ec_id: raise errors.ProgrammerError("Tried to use execution context id when" " not set") return self._ec_id ganeti-2.9.3/lib/tools/0000755000000000000000000000000012271445544014710 5ustar00rootroot00000000000000ganeti-2.9.3/lib/tools/burnin.py0000744000000000000000000012323312271422343016554 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Burnin program """ import sys import optparse import time import socket import urllib from itertools import izip, islice, cycle from cStringIO import StringIO from ganeti import opcodes from ganeti import constants from ganeti import cli from ganeti import errors from ganeti import utils from ganeti import hypervisor from ganeti import compat from ganeti import pathutils from ganeti.confd import client as confd_client USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...") MAX_RETRIES = 3 LOG_HEADERS = { 0: "- ", 1: "* ", 2: "", } #: Disk templates supporting a single node _SINGLE_NODE_DISK_TEMPLATES = compat.UniqueFrozenset([ constants.DT_DISKLESS, constants.DT_PLAIN, constants.DT_FILE, constants.DT_SHARED_FILE, constants.DT_EXT, constants.DT_RBD, ]) _SUPPORTED_DISK_TEMPLATES = compat.UniqueFrozenset([ constants.DT_DISKLESS, constants.DT_DRBD8, constants.DT_EXT, constants.DT_FILE, constants.DT_PLAIN, constants.DT_RBD, constants.DT_SHARED_FILE, ]) #: Disk templates for which import/export is tested _IMPEXP_DISK_TEMPLATES = (_SUPPORTED_DISK_TEMPLATES - frozenset([ constants.DT_DISKLESS, constants.DT_FILE, constants.DT_SHARED_FILE, ])) class InstanceDown(Exception): """The checked instance was not up""" class BurninFailure(Exception): """Failure detected during burning""" def Usage(): """Shows program usage information and exits the program.""" print >> sys.stderr, "Usage:" print >> sys.stderr, USAGE sys.exit(2) def Log(msg, *args, **kwargs): """Simple function that prints out its argument. """ if args: msg = msg % args indent = kwargs.get("indent", 0) sys.stdout.write("%*s%s%s\n" % (2 * indent, "", LOG_HEADERS.get(indent, " "), msg)) sys.stdout.flush() def Err(msg, exit_code=1): """Simple error logging that prints to stderr. """ sys.stderr.write(msg + "\n") sys.stderr.flush() sys.exit(exit_code) class SimpleOpener(urllib.FancyURLopener): """A simple url opener""" # pylint: disable=W0221 def prompt_user_passwd(self, host, realm, clear_cache=0): """No-interaction version of prompt_user_passwd.""" # we follow parent class' API # pylint: disable=W0613 return None, None def http_error_default(self, url, fp, errcode, errmsg, headers): """Custom error handling""" # make sure sockets are not left in CLOSE_WAIT, this is similar # but with a different exception to the BasicURLOpener class _ = fp.read() # throw away data fp.close() raise InstanceDown("HTTP error returned: code %s, msg %s" % (errcode, errmsg)) OPTIONS = [ cli.cli_option("-o", "--os", dest="os", default=None, help="OS to use during burnin", metavar="", completion_suggest=cli.OPT_COMPL_ONE_OS), cli.HYPERVISOR_OPT, cli.OSPARAMS_OPT, cli.cli_option("--disk-size", dest="disk_size", help="Disk size (determines disk count)", default="128m", type="string", metavar="", completion_suggest=("128M 512M 1G 4G 1G,256M" " 4G,1G,1G 10G").split()), cli.cli_option("--disk-growth", dest="disk_growth", help="Disk growth", default="128m", type="string", metavar=""), cli.cli_option("--mem-size", dest="mem_size", help="Memory size", default=None, type="unit", metavar="", completion_suggest=("128M 256M 512M 1G 4G 8G" " 12G 16G").split()), cli.cli_option("--maxmem-size", dest="maxmem_size", help="Max Memory size", default=256, type="unit", metavar="", completion_suggest=("128M 256M 512M 1G 4G 8G" " 12G 16G").split()), cli.cli_option("--minmem-size", dest="minmem_size", help="Min Memory size", default=128, type="unit", metavar="", completion_suggest=("128M 256M 512M 1G 4G 8G" " 12G 16G").split()), cli.cli_option("--vcpu-count", dest="vcpu_count", help="VCPU count", default=3, type="unit", metavar="", completion_suggest=("1 2 3 4").split()), cli.DEBUG_OPT, cli.VERBOSE_OPT, cli.NOIPCHECK_OPT, cli.NONAMECHECK_OPT, cli.EARLY_RELEASE_OPT, cli.cli_option("--no-replace1", dest="do_replace1", help="Skip disk replacement with the same secondary", action="store_false", default=True), cli.cli_option("--no-replace2", dest="do_replace2", help="Skip disk replacement with a different secondary", action="store_false", default=True), cli.cli_option("--no-failover", dest="do_failover", help="Skip instance failovers", action="store_false", default=True), cli.cli_option("--no-migrate", dest="do_migrate", help="Skip instance live migration", action="store_false", default=True), cli.cli_option("--no-move", dest="do_move", help="Skip instance moves", action="store_false", default=True), cli.cli_option("--no-importexport", dest="do_importexport", help="Skip instance export/import", action="store_false", default=True), cli.cli_option("--no-startstop", dest="do_startstop", help="Skip instance stop/start", action="store_false", default=True), cli.cli_option("--no-reinstall", dest="do_reinstall", help="Skip instance reinstall", action="store_false", default=True), cli.cli_option("--no-reboot", dest="do_reboot", help="Skip instance reboot", action="store_false", default=True), cli.cli_option("--no-renamesame", dest="do_renamesame", help="Skip instance rename to same name", action="store_false", default=True), cli.cli_option("--reboot-types", dest="reboot_types", help="Specify the reboot types", default=None), cli.cli_option("--no-activate-disks", dest="do_activate_disks", help="Skip disk activation/deactivation", action="store_false", default=True), cli.cli_option("--no-add-disks", dest="do_addremove_disks", help="Skip disk addition/removal", action="store_false", default=True), cli.cli_option("--no-add-nics", dest="do_addremove_nics", help="Skip NIC addition/removal", action="store_false", default=True), cli.cli_option("--no-nics", dest="nics", help="No network interfaces", action="store_const", const=[], default=[{}]), cli.cli_option("--no-confd", dest="do_confd_tests", help="Skip confd queries", action="store_false", default=constants.ENABLE_CONFD), cli.cli_option("--rename", dest="rename", default=None, help=("Give one unused instance name which is taken" " to start the renaming sequence"), metavar=""), cli.cli_option("-t", "--disk-template", dest="disk_template", choices=list(_SUPPORTED_DISK_TEMPLATES), default=constants.DT_DRBD8, help=("Disk template (default %s, otherwise one of %s)" % (constants.DT_DRBD8, utils.CommaJoin(_SUPPORTED_DISK_TEMPLATES)))), cli.cli_option("-n", "--nodes", dest="nodes", default="", help=("Comma separated list of nodes to perform" " the burnin on (defaults to all nodes)"), completion_suggest=cli.OPT_COMPL_MANY_NODES), cli.cli_option("-I", "--iallocator", dest="iallocator", default=None, type="string", help=("Perform the allocation using an iallocator" " instead of fixed node spread (node restrictions no" " longer apply, therefore -n/--nodes must not be" " used"), completion_suggest=cli.OPT_COMPL_ONE_IALLOCATOR), cli.cli_option("-p", "--parallel", default=False, action="store_true", dest="parallel", help=("Enable parallelization of some operations in" " order to speed burnin or to test granular locking")), cli.cli_option("--net-timeout", default=15, type="int", dest="net_timeout", help=("The instance check network timeout in seconds" " (defaults to 15 seconds)"), completion_suggest="15 60 300 900".split()), cli.cli_option("-C", "--http-check", default=False, action="store_true", dest="http_check", help=("Enable checking of instance status via http," " looking for /hostname.txt that should contain the" " name of the instance")), cli.cli_option("-K", "--keep-instances", default=False, action="store_true", dest="keep_instances", help=("Leave instances on the cluster after burnin," " for investigation in case of errors or simply" " to use them")), cli.REASON_OPT, ] # Mainly used for bash completion ARGUMENTS = [cli.ArgInstance(min=1)] def _DoCheckInstances(fn): """Decorator for checking instances. """ def wrapper(self, *args, **kwargs): val = fn(self, *args, **kwargs) for instance in self.instances: self._CheckInstanceAlive(instance) # pylint: disable=W0212 return val return wrapper def _DoBatch(retry): """Decorator for possible batch operations. Must come after the _DoCheckInstances decorator (if any). @param retry: whether this is a retryable batch, will be passed to StartBatch """ def wrap(fn): def batched(self, *args, **kwargs): self.StartBatch(retry) val = fn(self, *args, **kwargs) self.CommitQueue() return val return batched return wrap class Burner(object): """Burner class.""" def __init__(self): """Constructor.""" self.url_opener = SimpleOpener() self._feed_buf = StringIO() self.nodes = [] self.instances = [] self.to_rem = [] self.queued_ops = [] self.opts = None self.queue_retry = False self.disk_count = self.disk_growth = self.disk_size = None self.hvp = self.bep = None self.ParseOptions() self.cl = cli.GetClient() self.GetState() def ClearFeedbackBuf(self): """Clear the feedback buffer.""" self._feed_buf.truncate(0) def GetFeedbackBuf(self): """Return the contents of the buffer.""" return self._feed_buf.getvalue() def Feedback(self, msg): """Acumulate feedback in our buffer.""" formatted_msg = "%s %s" % (time.ctime(utils.MergeTime(msg[0])), msg[2]) self._feed_buf.write(formatted_msg + "\n") if self.opts.verbose: Log(formatted_msg, indent=3) def MaybeRetry(self, retry_count, msg, fn, *args): """Possibly retry a given function execution. @type retry_count: int @param retry_count: retry counter: - 0: non-retryable action - 1: last retry for a retryable action - MAX_RETRIES: original try for a retryable action @type msg: str @param msg: the kind of the operation @type fn: callable @param fn: the function to be called """ try: val = fn(*args) if retry_count > 0 and retry_count < MAX_RETRIES: Log("Idempotent %s succeeded after %d retries", msg, MAX_RETRIES - retry_count) return val except Exception, err: # pylint: disable=W0703 if retry_count == 0: Log("Non-idempotent %s failed, aborting", msg) raise elif retry_count == 1: Log("Idempotent %s repeated failure, aborting", msg) raise else: Log("Idempotent %s failed, retry #%d/%d: %s", msg, MAX_RETRIES - retry_count + 1, MAX_RETRIES, err) self.MaybeRetry(retry_count - 1, msg, fn, *args) def _ExecOp(self, *ops): """Execute one or more opcodes and manage the exec buffer. @return: if only opcode has been passed, we return its result; otherwise we return the list of results """ job_id = cli.SendJob(ops, cl=self.cl) results = cli.PollJob(job_id, cl=self.cl, feedback_fn=self.Feedback) if len(ops) == 1: return results[0] else: return results def ExecOp(self, retry, *ops): """Execute one or more opcodes and manage the exec buffer. @return: if only opcode has been passed, we return its result; otherwise we return the list of results """ if retry: rval = MAX_RETRIES else: rval = 0 cli.SetGenericOpcodeOpts(ops, self.opts) return self.MaybeRetry(rval, "opcode", self._ExecOp, *ops) def ExecOrQueue(self, name, ops, post_process=None): """Execute an opcode and manage the exec buffer.""" if self.opts.parallel: cli.SetGenericOpcodeOpts(ops, self.opts) self.queued_ops.append((ops, name, post_process)) else: val = self.ExecOp(self.queue_retry, *ops) # pylint: disable=W0142 if post_process is not None: post_process() return val def StartBatch(self, retry): """Start a new batch of jobs. @param retry: whether this is a retryable batch """ self.queued_ops = [] self.queue_retry = retry def CommitQueue(self): """Execute all submitted opcodes in case of parallel burnin""" if not self.opts.parallel or not self.queued_ops: return if self.queue_retry: rval = MAX_RETRIES else: rval = 0 try: results = self.MaybeRetry(rval, "jobset", self.ExecJobSet, self.queued_ops) finally: self.queued_ops = [] return results def ExecJobSet(self, jobs): """Execute a set of jobs and return once all are done. The method will return the list of results, if all jobs are successful. Otherwise, OpExecError will be raised from within cli.py. """ self.ClearFeedbackBuf() jex = cli.JobExecutor(cl=self.cl, feedback_fn=self.Feedback) for ops, name, _ in jobs: jex.QueueJob(name, *ops) # pylint: disable=W0142 try: results = jex.GetResults() except Exception, err: # pylint: disable=W0703 Log("Jobs failed: %s", err) raise BurninFailure() fail = False val = [] for (_, name, post_process), (success, result) in zip(jobs, results): if success: if post_process: try: post_process() except Exception, err: # pylint: disable=W0703 Log("Post process call for job %s failed: %s", name, err) fail = True val.append(result) else: fail = True if fail: raise BurninFailure() return val def ParseOptions(self): """Parses the command line options. In case of command line errors, it will show the usage and exit the program. """ parser = optparse.OptionParser(usage="\n%s" % USAGE, version=("%%prog (ganeti) %s" % constants.RELEASE_VERSION), option_list=OPTIONS) options, args = parser.parse_args() if len(args) < 1 or options.os is None: Usage() if options.mem_size: options.maxmem_size = options.mem_size options.minmem_size = options.mem_size elif options.minmem_size > options.maxmem_size: Err("Maximum memory lower than minimum memory") if options.disk_template not in _SUPPORTED_DISK_TEMPLATES: Err("Unknown or unsupported disk template '%s'" % options.disk_template) if options.disk_template == constants.DT_DISKLESS: disk_size = disk_growth = [] options.do_addremove_disks = False else: disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")] disk_growth = [utils.ParseUnit(v) for v in options.disk_growth.split(",")] if len(disk_growth) != len(disk_size): Err("Wrong disk sizes/growth combination") if ((disk_size and options.disk_template == constants.DT_DISKLESS) or (not disk_size and options.disk_template != constants.DT_DISKLESS)): Err("Wrong disk count/disk template combination") self.disk_size = disk_size self.disk_growth = disk_growth self.disk_count = len(disk_size) if options.nodes and options.iallocator: Err("Give either the nodes option or the iallocator option, not both") if options.http_check and not options.name_check: Err("Can't enable HTTP checks without name checks") self.opts = options self.instances = args self.bep = { constants.BE_MINMEM: options.minmem_size, constants.BE_MAXMEM: options.maxmem_size, constants.BE_VCPUS: options.vcpu_count, } self.hypervisor = None self.hvp = {} if options.hypervisor: self.hypervisor, self.hvp = options.hypervisor if options.reboot_types is None: options.reboot_types = constants.REBOOT_TYPES else: options.reboot_types = options.reboot_types.split(",") rt_diff = set(options.reboot_types).difference(constants.REBOOT_TYPES) if rt_diff: Err("Invalid reboot types specified: %s" % utils.CommaJoin(rt_diff)) socket.setdefaulttimeout(options.net_timeout) def GetState(self): """Read the cluster state from the master daemon.""" if self.opts.nodes: names = self.opts.nodes.split(",") else: names = [] try: op = opcodes.OpNodeQuery(output_fields=["name", "offline", "drained"], names=names, use_locking=True) result = self.ExecOp(True, op) except errors.GenericError, err: err_code, msg = cli.FormatError(err) Err(msg, exit_code=err_code) self.nodes = [data[0] for data in result if not (data[1] or data[2])] op_diagnose = opcodes.OpOsDiagnose(output_fields=["name", "variants", "hidden"], names=[]) result = self.ExecOp(True, op_diagnose) if not result: Err("Can't get the OS list") found = False for (name, variants, _) in result: if self.opts.os in cli.CalculateOSNames(name, variants): found = True break if not found: Err("OS '%s' not found" % self.opts.os) cluster_info = self.cl.QueryClusterInfo() self.cluster_info = cluster_info if not self.cluster_info: Err("Can't get cluster info") default_nic_params = self.cluster_info["nicparams"][constants.PP_DEFAULT] self.cluster_default_nicparams = default_nic_params if self.hypervisor is None: self.hypervisor = self.cluster_info["default_hypervisor"] self.hv_can_migrate = \ hypervisor.GetHypervisorClass(self.hypervisor).CAN_MIGRATE @_DoCheckInstances @_DoBatch(False) def BurnCreateInstances(self): """Create the given instances. """ self.to_rem = [] mytor = izip(cycle(self.nodes), islice(cycle(self.nodes), 1, None), self.instances) Log("Creating instances") for pnode, snode, instance in mytor: Log("instance %s", instance, indent=1) if self.opts.iallocator: pnode = snode = None msg = "with iallocator %s" % self.opts.iallocator elif self.opts.disk_template not in constants.DTS_INT_MIRROR: snode = None msg = "on %s" % pnode else: msg = "on %s, %s" % (pnode, snode) Log(msg, indent=2) op = opcodes.OpInstanceCreate(instance_name=instance, disks=[{"size": size} for size in self.disk_size], disk_template=self.opts.disk_template, nics=self.opts.nics, mode=constants.INSTANCE_CREATE, os_type=self.opts.os, pnode=pnode, snode=snode, start=True, ip_check=self.opts.ip_check, name_check=self.opts.name_check, wait_for_sync=True, file_driver="loop", file_storage_dir=None, iallocator=self.opts.iallocator, beparams=self.bep, hvparams=self.hvp, hypervisor=self.hypervisor, osparams=self.opts.osparams, ) remove_instance = lambda name: lambda: self.to_rem.append(name) self.ExecOrQueue(instance, [op], post_process=remove_instance(instance)) @_DoBatch(False) def BurnModifyRuntimeMemory(self): """Alter the runtime memory.""" Log("Setting instance runtime memory") for instance in self.instances: Log("instance %s", instance, indent=1) tgt_mem = self.bep[constants.BE_MINMEM] op = opcodes.OpInstanceSetParams(instance_name=instance, runtime_mem=tgt_mem) Log("Set memory to %s MB", tgt_mem, indent=2) self.ExecOrQueue(instance, [op]) @_DoBatch(False) def BurnGrowDisks(self): """Grow both the os and the swap disks by the requested amount, if any.""" Log("Growing disks") for instance in self.instances: Log("instance %s", instance, indent=1) for idx, growth in enumerate(self.disk_growth): if growth > 0: op = opcodes.OpInstanceGrowDisk(instance_name=instance, disk=idx, amount=growth, wait_for_sync=True) Log("increase disk/%s by %s MB", idx, growth, indent=2) self.ExecOrQueue(instance, [op]) @_DoBatch(True) def BurnReplaceDisks1D8(self): """Replace disks on primary and secondary for drbd8.""" Log("Replacing disks on the same nodes") early_release = self.opts.early_release for instance in self.instances: Log("instance %s", instance, indent=1) ops = [] for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI: op = opcodes.OpInstanceReplaceDisks(instance_name=instance, mode=mode, disks=list(range(self.disk_count)), early_release=early_release) Log("run %s", mode, indent=2) ops.append(op) self.ExecOrQueue(instance, ops) @_DoBatch(True) def BurnReplaceDisks2(self): """Replace secondary node.""" Log("Changing the secondary node") mode = constants.REPLACE_DISK_CHG mytor = izip(islice(cycle(self.nodes), 2, None), self.instances) for tnode, instance in mytor: Log("instance %s", instance, indent=1) if self.opts.iallocator: tnode = None msg = "with iallocator %s" % self.opts.iallocator else: msg = tnode op = opcodes.OpInstanceReplaceDisks(instance_name=instance, mode=mode, remote_node=tnode, iallocator=self.opts.iallocator, disks=[], early_release=self.opts.early_release) Log("run %s %s", mode, msg, indent=2) self.ExecOrQueue(instance, [op]) @_DoCheckInstances @_DoBatch(False) def BurnFailover(self): """Failover the instances.""" Log("Failing over instances") for instance in self.instances: Log("instance %s", instance, indent=1) op = opcodes.OpInstanceFailover(instance_name=instance, ignore_consistency=False) self.ExecOrQueue(instance, [op]) @_DoCheckInstances @_DoBatch(False) def BurnMove(self): """Move the instances.""" Log("Moving instances") mytor = izip(islice(cycle(self.nodes), 1, None), self.instances) for tnode, instance in mytor: Log("instance %s", instance, indent=1) op = opcodes.OpInstanceMove(instance_name=instance, target_node=tnode) self.ExecOrQueue(instance, [op]) @_DoBatch(False) def BurnMigrate(self): """Migrate the instances.""" Log("Migrating instances") for instance in self.instances: Log("instance %s", instance, indent=1) op1 = opcodes.OpInstanceMigrate(instance_name=instance, mode=None, cleanup=False) op2 = opcodes.OpInstanceMigrate(instance_name=instance, mode=None, cleanup=True) Log("migration and migration cleanup", indent=2) self.ExecOrQueue(instance, [op1, op2]) @_DoCheckInstances @_DoBatch(False) def BurnImportExport(self): """Export the instance, delete it, and import it back. """ Log("Exporting and re-importing instances") mytor = izip(cycle(self.nodes), islice(cycle(self.nodes), 1, None), islice(cycle(self.nodes), 2, None), self.instances) for pnode, snode, enode, instance in mytor: Log("instance %s", instance, indent=1) # read the full name of the instance nam_op = opcodes.OpInstanceQuery(output_fields=["name"], names=[instance], use_locking=True) full_name = self.ExecOp(False, nam_op)[0][0] if self.opts.iallocator: pnode = snode = None import_log_msg = ("import from %s" " with iallocator %s" % (enode, self.opts.iallocator)) elif self.opts.disk_template not in constants.DTS_INT_MIRROR: snode = None import_log_msg = ("import from %s to %s" % (enode, pnode)) else: import_log_msg = ("import from %s to %s, %s" % (enode, pnode, snode)) exp_op = opcodes.OpBackupExport(instance_name=instance, target_node=enode, mode=constants.EXPORT_MODE_LOCAL, shutdown=True) rem_op = opcodes.OpInstanceRemove(instance_name=instance, ignore_failures=True) imp_dir = utils.PathJoin(pathutils.EXPORT_DIR, full_name) imp_op = opcodes.OpInstanceCreate(instance_name=instance, disks=[{"size": size} for size in self.disk_size], disk_template=self.opts.disk_template, nics=self.opts.nics, mode=constants.INSTANCE_IMPORT, src_node=enode, src_path=imp_dir, pnode=pnode, snode=snode, start=True, ip_check=self.opts.ip_check, name_check=self.opts.name_check, wait_for_sync=True, file_storage_dir=None, file_driver="loop", iallocator=self.opts.iallocator, beparams=self.bep, hvparams=self.hvp, osparams=self.opts.osparams, ) erem_op = opcodes.OpBackupRemove(instance_name=instance) Log("export to node %s", enode, indent=2) Log("remove instance", indent=2) Log(import_log_msg, indent=2) Log("remove export", indent=2) self.ExecOrQueue(instance, [exp_op, rem_op, imp_op, erem_op]) @staticmethod def StopInstanceOp(instance): """Stop given instance.""" return opcodes.OpInstanceShutdown(instance_name=instance) @staticmethod def StartInstanceOp(instance): """Start given instance.""" return opcodes.OpInstanceStartup(instance_name=instance, force=False) @staticmethod def RenameInstanceOp(instance, instance_new): """Rename instance.""" return opcodes.OpInstanceRename(instance_name=instance, new_name=instance_new) @_DoCheckInstances @_DoBatch(True) def BurnStopStart(self): """Stop/start the instances.""" Log("Stopping and starting instances") for instance in self.instances: Log("instance %s", instance, indent=1) op1 = self.StopInstanceOp(instance) op2 = self.StartInstanceOp(instance) self.ExecOrQueue(instance, [op1, op2]) @_DoBatch(False) def BurnRemove(self): """Remove the instances.""" Log("Removing instances") for instance in self.to_rem: Log("instance %s", instance, indent=1) op = opcodes.OpInstanceRemove(instance_name=instance, ignore_failures=True) self.ExecOrQueue(instance, [op]) def BurnRename(self): """Rename the instances. Note that this function will not execute in parallel, since we only have one target for rename. """ Log("Renaming instances") rename = self.opts.rename for instance in self.instances: Log("instance %s", instance, indent=1) op_stop1 = self.StopInstanceOp(instance) op_stop2 = self.StopInstanceOp(rename) op_rename1 = self.RenameInstanceOp(instance, rename) op_rename2 = self.RenameInstanceOp(rename, instance) op_start1 = self.StartInstanceOp(rename) op_start2 = self.StartInstanceOp(instance) self.ExecOp(False, op_stop1, op_rename1, op_start1) self._CheckInstanceAlive(rename) self.ExecOp(False, op_stop2, op_rename2, op_start2) self._CheckInstanceAlive(instance) @_DoCheckInstances @_DoBatch(True) def BurnReinstall(self): """Reinstall the instances.""" Log("Reinstalling instances") for instance in self.instances: Log("instance %s", instance, indent=1) op1 = self.StopInstanceOp(instance) op2 = opcodes.OpInstanceReinstall(instance_name=instance) Log("reinstall without passing the OS", indent=2) op3 = opcodes.OpInstanceReinstall(instance_name=instance, os_type=self.opts.os) Log("reinstall specifying the OS", indent=2) op4 = self.StartInstanceOp(instance) self.ExecOrQueue(instance, [op1, op2, op3, op4]) @_DoCheckInstances @_DoBatch(True) def BurnReboot(self): """Reboot the instances.""" Log("Rebooting instances") for instance in self.instances: Log("instance %s", instance, indent=1) ops = [] for reboot_type in self.opts.reboot_types: op = opcodes.OpInstanceReboot(instance_name=instance, reboot_type=reboot_type, ignore_secondaries=False) Log("reboot with type '%s'", reboot_type, indent=2) ops.append(op) self.ExecOrQueue(instance, ops) @_DoCheckInstances @_DoBatch(True) def BurnRenameSame(self): """Rename the instances to their own name.""" Log("Renaming the instances to their own name") for instance in self.instances: Log("instance %s", instance, indent=1) op1 = self.StopInstanceOp(instance) op2 = self.RenameInstanceOp(instance, instance) Log("rename to the same name", indent=2) op4 = self.StartInstanceOp(instance) self.ExecOrQueue(instance, [op1, op2, op4]) @_DoCheckInstances @_DoBatch(True) def BurnActivateDisks(self): """Activate and deactivate disks of the instances.""" Log("Activating/deactivating disks") for instance in self.instances: Log("instance %s", instance, indent=1) op_start = self.StartInstanceOp(instance) op_act = opcodes.OpInstanceActivateDisks(instance_name=instance) op_deact = opcodes.OpInstanceDeactivateDisks(instance_name=instance) op_stop = self.StopInstanceOp(instance) Log("activate disks when online", indent=2) Log("activate disks when offline", indent=2) Log("deactivate disks (when offline)", indent=2) self.ExecOrQueue(instance, [op_act, op_stop, op_act, op_deact, op_start]) @_DoCheckInstances @_DoBatch(False) def BurnAddRemoveDisks(self): """Add and remove an extra disk for the instances.""" Log("Adding and removing disks") for instance in self.instances: Log("instance %s", instance, indent=1) op_add = opcodes.OpInstanceSetParams( instance_name=instance, disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})]) op_rem = opcodes.OpInstanceSetParams( instance_name=instance, disks=[(constants.DDM_REMOVE, {})]) op_stop = self.StopInstanceOp(instance) op_start = self.StartInstanceOp(instance) Log("adding a disk", indent=2) Log("removing last disk", indent=2) self.ExecOrQueue(instance, [op_add, op_stop, op_rem, op_start]) @_DoBatch(False) def BurnAddRemoveNICs(self): """Add, change and remove an extra NIC for the instances.""" Log("Adding and removing NICs") for instance in self.instances: Log("instance %s", instance, indent=1) op_add = opcodes.OpInstanceSetParams( instance_name=instance, nics=[(constants.DDM_ADD, {})]) op_chg = opcodes.OpInstanceSetParams( instance_name=instance, nics=[(constants.DDM_MODIFY, -1, {"mac": constants.VALUE_GENERATE})]) op_rem = opcodes.OpInstanceSetParams( instance_name=instance, nics=[(constants.DDM_REMOVE, {})]) Log("adding a NIC", indent=2) Log("changing a NIC", indent=2) Log("removing last NIC", indent=2) self.ExecOrQueue(instance, [op_add, op_chg, op_rem]) def ConfdCallback(self, reply): """Callback for confd queries""" if reply.type == confd_client.UPCALL_REPLY: if reply.server_reply.status != constants.CONFD_REPL_STATUS_OK: Err("Query %s gave non-ok status %s: %s" % (reply.orig_request, reply.server_reply.status, reply.server_reply)) if reply.orig_request.type == constants.CONFD_REQ_PING: Log("Ping: OK", indent=1) elif reply.orig_request.type == constants.CONFD_REQ_CLUSTER_MASTER: if reply.server_reply.answer == self.cluster_info["master"]: Log("Master: OK", indent=1) else: Err("Master: wrong: %s" % reply.server_reply.answer) elif reply.orig_request.type == constants.CONFD_REQ_NODE_ROLE_BYNAME: if reply.server_reply.answer == constants.CONFD_NODE_ROLE_MASTER: Log("Node role for master: OK", indent=1) else: Err("Node role for master: wrong: %s" % reply.server_reply.answer) def DoConfdRequestReply(self, req): self.confd_counting_callback.RegisterQuery(req.rsalt) self.confd_client.SendRequest(req, async=False) while not self.confd_counting_callback.AllAnswered(): if not self.confd_client.ReceiveReply(): Err("Did not receive all expected confd replies") break def BurnConfd(self): """Run confd queries for our instances. The following confd queries are tested: - CONFD_REQ_PING: simple ping - CONFD_REQ_CLUSTER_MASTER: cluster master - CONFD_REQ_NODE_ROLE_BYNAME: node role, for the master """ Log("Checking confd results") filter_callback = confd_client.ConfdFilterCallback(self.ConfdCallback) counting_callback = confd_client.ConfdCountingCallback(filter_callback) self.confd_counting_callback = counting_callback self.confd_client = confd_client.GetConfdClient(counting_callback) req = confd_client.ConfdClientRequest(type=constants.CONFD_REQ_PING) self.DoConfdRequestReply(req) req = confd_client.ConfdClientRequest( type=constants.CONFD_REQ_CLUSTER_MASTER) self.DoConfdRequestReply(req) req = confd_client.ConfdClientRequest( type=constants.CONFD_REQ_NODE_ROLE_BYNAME, query=self.cluster_info["master"]) self.DoConfdRequestReply(req) def _CheckInstanceAlive(self, instance): """Check if an instance is alive by doing http checks. This will try to retrieve the url on the instance /hostname.txt and check that it contains the hostname of the instance. In case we get ECONNREFUSED, we retry up to the net timeout seconds, for any other error we abort. """ if not self.opts.http_check: return end_time = time.time() + self.opts.net_timeout url = None while time.time() < end_time and url is None: try: url = self.url_opener.open("http://%s/hostname.txt" % instance) except IOError: # here we can have connection refused, no route to host, etc. time.sleep(1) if url is None: raise InstanceDown(instance, "Cannot contact instance") hostname = url.read().strip() url.close() if hostname != instance: raise InstanceDown(instance, ("Hostname mismatch, expected %s, got %s" % (instance, hostname))) def BurninCluster(self): """Test a cluster intensively. This will create instances and then start/stop/failover them. It is safe for existing instances but could impact performance. """ Log("Testing global parameters") if (len(self.nodes) == 1 and self.opts.disk_template not in _SINGLE_NODE_DISK_TEMPLATES): Err("When one node is available/selected the disk template must" " be one of %s" % utils.CommaJoin(_SINGLE_NODE_DISK_TEMPLATES)) if self.opts.do_confd_tests and not constants.ENABLE_CONFD: Err("You selected confd tests but confd was disabled at configure time") has_err = True try: self.BurnCreateInstances() if self.bep[constants.BE_MINMEM] < self.bep[constants.BE_MAXMEM]: self.BurnModifyRuntimeMemory() if self.opts.do_replace1 and \ self.opts.disk_template in constants.DTS_INT_MIRROR: self.BurnReplaceDisks1D8() if (self.opts.do_replace2 and len(self.nodes) > 2 and self.opts.disk_template in constants.DTS_INT_MIRROR): self.BurnReplaceDisks2() if (self.opts.disk_template in constants.DTS_GROWABLE and compat.any(n > 0 for n in self.disk_growth)): self.BurnGrowDisks() if self.opts.do_failover and \ self.opts.disk_template in constants.DTS_MIRRORED: self.BurnFailover() if self.opts.do_migrate: if self.opts.disk_template not in constants.DTS_MIRRORED: Log("Skipping migration (disk template %s does not support it)", self.opts.disk_template) elif not self.hv_can_migrate: Log("Skipping migration (hypervisor %s does not support it)", self.hypervisor) else: self.BurnMigrate() if (self.opts.do_move and len(self.nodes) > 1 and self.opts.disk_template in [constants.DT_PLAIN, constants.DT_FILE]): self.BurnMove() if (self.opts.do_importexport and self.opts.disk_template in _IMPEXP_DISK_TEMPLATES): self.BurnImportExport() if self.opts.do_reinstall: self.BurnReinstall() if self.opts.do_reboot: self.BurnReboot() if self.opts.do_renamesame: self.BurnRenameSame() if self.opts.do_addremove_disks: self.BurnAddRemoveDisks() default_nic_mode = self.cluster_default_nicparams[constants.NIC_MODE] # Don't add/remove nics in routed mode, as we would need an ip to add # them with if self.opts.do_addremove_nics: if default_nic_mode == constants.NIC_MODE_BRIDGED: self.BurnAddRemoveNICs() else: Log("Skipping nic add/remove as the cluster is not in bridged mode") if self.opts.do_activate_disks: self.BurnActivateDisks() if self.opts.rename: self.BurnRename() if self.opts.do_confd_tests: self.BurnConfd() if self.opts.do_startstop: self.BurnStopStart() has_err = False finally: if has_err: Log("Error detected: opcode buffer follows:\n\n") Log(self.GetFeedbackBuf()) Log("\n\n") if not self.opts.keep_instances: try: self.BurnRemove() except Exception, err: # pylint: disable=W0703 if has_err: # already detected errors, so errors in removal # are quite expected Log("Note: error detected during instance remove: %s", err) else: # non-expected error raise return constants.EXIT_SUCCESS def Main(): """Main function. """ utils.SetupLogging(pathutils.LOG_BURNIN, sys.argv[0], debug=False, stderr_logging=True) return Burner().BurninCluster() ganeti-2.9.3/lib/tools/node_daemon_setup.py0000644000000000000000000001471612244641676020767 0ustar00rootroot00000000000000# # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script to configure the node daemon. """ import os import os.path import optparse import sys import logging import OpenSSL from cStringIO import StringIO from ganeti import cli from ganeti import constants from ganeti import errors from ganeti import pathutils from ganeti import utils from ganeti import serializer from ganeti import runtime from ganeti import ht from ganeti import ssconf _DATA_CHECK = ht.TStrictDict(False, True, { constants.NDS_CLUSTER_NAME: ht.TNonEmptyString, constants.NDS_NODE_DAEMON_CERTIFICATE: ht.TNonEmptyString, constants.NDS_SSCONF: ht.TDictOf(ht.TNonEmptyString, ht.TString), constants.NDS_START_NODE_DAEMON: ht.TBool, }) class SetupError(errors.GenericError): """Local class for reporting errors. """ def ParseOptions(): """Parses the options passed to the program. @return: Options and arguments """ parser = optparse.OptionParser(usage="%prog [--dry-run]", prog=os.path.basename(sys.argv[0])) parser.add_option(cli.DEBUG_OPT) parser.add_option(cli.VERBOSE_OPT) parser.add_option(cli.DRY_RUN_OPT) (opts, args) = parser.parse_args() return VerifyOptions(parser, opts, args) def VerifyOptions(parser, opts, args): """Verifies options and arguments for correctness. """ if args: parser.error("No arguments are expected") return opts def _VerifyCertificate(cert_pem, _check_fn=utils.CheckNodeCertificate): """Verifies a certificate against the local node daemon certificate. @type cert_pem: string @param cert_pem: Certificate and key in PEM format @rtype: string @return: Formatted key and certificate """ try: cert = \ OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, cert_pem) except Exception, err: raise errors.X509CertError("(stdin)", "Unable to load certificate: %s" % err) try: key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, cert_pem) except OpenSSL.crypto.Error, err: raise errors.X509CertError("(stdin)", "Unable to load private key: %s" % err) # Check certificate with given key; this detects cases where the key given on # stdin doesn't match the certificate also given on stdin x509_check_fn = utils.PrepareX509CertKeyCheck(cert, key) try: x509_check_fn() except OpenSSL.SSL.Error: raise errors.X509CertError("(stdin)", "Certificate is not signed with given key") # Standard checks, including check against an existing local certificate # (no-op if that doesn't exist) _check_fn(cert) # Format for storing on disk buf = StringIO() buf.write(OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, key)) buf.write(OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, cert)) return buf.getvalue() def VerifyCertificate(data, _verify_fn=_VerifyCertificate): """Verifies cluster certificate. @type data: dict @rtype: string @return: Formatted key and certificate """ cert = data.get(constants.NDS_NODE_DAEMON_CERTIFICATE) if not cert: raise SetupError("Node daemon certificate must be specified") return _verify_fn(cert) def VerifyClusterName(data, _verify_fn=ssconf.VerifyClusterName): """Verifies cluster name. @type data: dict @rtype: string @return: Cluster name """ name = data.get(constants.NDS_CLUSTER_NAME) if not name: raise SetupError("Cluster name must be specified") _verify_fn(name) return name def VerifySsconf(data, cluster_name, _verify_fn=ssconf.VerifyKeys): """Verifies ssconf names. @type data: dict """ items = data.get(constants.NDS_SSCONF) if not items: raise SetupError("Ssconf values must be specified") # TODO: Should all keys be required? Right now any subset of valid keys is # accepted. _verify_fn(items.keys()) if items.get(constants.SS_CLUSTER_NAME) != cluster_name: raise SetupError("Cluster name in ssconf does not match") return items def LoadData(raw): """Parses and verifies input data. @rtype: dict """ return serializer.LoadAndVerifyJson(raw, _DATA_CHECK) def Main(): """Main routine. """ opts = ParseOptions() utils.SetupToolLogging(opts.debug, opts.verbose) try: getent = runtime.GetEnts() data = LoadData(sys.stdin.read()) cluster_name = VerifyClusterName(data) cert_pem = VerifyCertificate(data) ssdata = VerifySsconf(data, cluster_name) logging.info("Writing ssconf files ...") ssconf.WriteSsconfFiles(ssdata, dry_run=opts.dry_run) logging.info("Writing node daemon certificate ...") utils.WriteFile(pathutils.NODED_CERT_FILE, data=cert_pem, mode=pathutils.NODED_CERT_MODE, uid=getent.masterd_uid, gid=getent.masterd_gid, dry_run=opts.dry_run) if (data.get(constants.NDS_START_NODE_DAEMON) and # pylint: disable=E1103 not opts.dry_run): logging.info("Restarting node daemon ...") stop_cmd = "%s stop-all" % pathutils.DAEMON_UTIL noded_cmd = "%s start %s" % (pathutils.DAEMON_UTIL, constants.NODED) mond_cmd = "" if constants.ENABLE_MOND: mond_cmd = "%s start %s" % (pathutils.DAEMON_UTIL, constants.MOND) cmd = "; ".join([stop_cmd, noded_cmd, mond_cmd]) result = utils.RunCmd(cmd, interactive=True) if result.failed: raise SetupError("Could not start the node daemons, command '%s'" " failed: %s" % (result.cmd, result.fail_reason)) logging.info("Node daemon successfully configured") except Exception, err: # pylint: disable=W0703 logging.debug("Caught unhandled exception", exc_info=True) (retcode, message) = cli.FormatError(err) logging.error(message) return retcode else: return constants.EXIT_SUCCESS ganeti-2.9.3/lib/tools/ensure_dirs.py0000644000000000000000000002203112244641676017606 0ustar00rootroot00000000000000# # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script to ensure permissions on files/dirs are accurate. """ import os import os.path import optparse import sys import logging from ganeti import constants from ganeti import errors from ganeti import runtime from ganeti import ssconf from ganeti import utils from ganeti import cli from ganeti import pathutils from ganeti import compat (DIR, FILE, QUEUE_DIR) = range(1, 4) ALL_TYPES = compat.UniqueFrozenset([ DIR, FILE, QUEUE_DIR, ]) def RecursiveEnsure(path, uid, gid, dir_perm, file_perm): """Ensures permissions recursively down a directory. This functions walks the path and sets permissions accordingly. @param path: The absolute path to walk @param uid: The uid used as owner @param gid: The gid used as group @param dir_perm: The permission bits set for directories @param file_perm: The permission bits set for files """ assert os.path.isabs(path), "Path %s is not absolute" % path assert os.path.isdir(path), "Path %s is not a dir" % path logging.debug("Recursively processing %s", path) for root, dirs, files in os.walk(path): for subdir in dirs: utils.EnforcePermission(os.path.join(root, subdir), dir_perm, uid=uid, gid=gid) for filename in files: utils.EnforcePermission(os.path.join(root, filename), file_perm, uid=uid, gid=gid) def EnsureQueueDir(path, mode, uid, gid): """Sets the correct permissions on all job files in the queue. @param path: Directory path @param mode: Wanted file mode @param uid: Wanted user ID @param gid: Wanted group ID """ for filename in utils.ListVisibleFiles(path): if constants.JOB_FILE_RE.match(filename): utils.EnforcePermission(utils.PathJoin(path, filename), mode, uid=uid, gid=gid) def ProcessPath(path): """Processes a path component. @param path: A tuple of the path component to process """ (pathname, pathtype, mode, uid, gid) = path[0:5] assert pathtype in ALL_TYPES if pathtype in (DIR, QUEUE_DIR): # No additional parameters assert len(path) == 5 if pathtype == DIR: utils.MakeDirWithPerm(pathname, mode, uid, gid) elif pathtype == QUEUE_DIR: EnsureQueueDir(pathname, mode, uid, gid) elif pathtype == FILE: (must_exist, ) = path[5:] utils.EnforcePermission(pathname, mode, uid=uid, gid=gid, must_exist=must_exist) def GetPaths(): """Returns a tuple of path objects to process. """ getent = runtime.GetEnts() masterd_log = constants.DAEMONS_LOGFILES[constants.MASTERD] noded_log = constants.DAEMONS_LOGFILES[constants.NODED] confd_log = constants.DAEMONS_LOGFILES[constants.CONFD] luxid_log = constants.DAEMONS_LOGFILES[constants.LUXID] rapi_log = constants.DAEMONS_LOGFILES[constants.RAPI] mond_log = constants.DAEMONS_LOGFILES[constants.MOND] rapi_dir = os.path.join(pathutils.DATA_DIR, "rapi") cleaner_log_dir = os.path.join(pathutils.LOG_DIR, "cleaner") master_cleaner_log_dir = os.path.join(pathutils.LOG_DIR, "master-cleaner") # A note on the ordering: The parent directory (type C{DIR}) must always be # listed before files (type C{FILE}) in that directory. Once the directory is # set, only files directly in that directory can be listed. paths = [ (pathutils.DATA_DIR, DIR, 0755, getent.masterd_uid, getent.masterd_gid), (pathutils.CLUSTER_DOMAIN_SECRET_FILE, FILE, 0640, getent.masterd_uid, getent.masterd_gid, False), (pathutils.CLUSTER_CONF_FILE, FILE, 0640, getent.masterd_uid, getent.confd_gid, False), (pathutils.CONFD_HMAC_KEY, FILE, 0440, getent.confd_uid, getent.masterd_gid, False), (pathutils.SSH_KNOWN_HOSTS_FILE, FILE, 0644, getent.masterd_uid, getent.masterd_gid, False), (pathutils.RAPI_CERT_FILE, FILE, 0440, getent.rapi_uid, getent.masterd_gid, False), (pathutils.SPICE_CERT_FILE, FILE, 0440, getent.noded_uid, getent.masterd_gid, False), (pathutils.SPICE_CACERT_FILE, FILE, 0440, getent.noded_uid, getent.masterd_gid, False), (pathutils.NODED_CERT_FILE, FILE, pathutils.NODED_CERT_MODE, getent.masterd_uid, getent.masterd_gid, False), (pathutils.WATCHER_PAUSEFILE, FILE, 0644, getent.masterd_uid, getent.masterd_gid, False), ] ss = ssconf.SimpleStore() for ss_path in ss.GetFileList(): paths.append((ss_path, FILE, constants.SS_FILE_PERMS, getent.noded_uid, getent.noded_gid, False)) paths.extend([ (pathutils.QUEUE_DIR, DIR, 0750, getent.masterd_uid, getent.daemons_gid), (pathutils.QUEUE_DIR, QUEUE_DIR, constants.JOB_QUEUE_FILES_PERMS, getent.masterd_uid, getent.daemons_gid), (pathutils.JOB_QUEUE_DRAIN_FILE, FILE, 0644, getent.masterd_uid, getent.daemons_gid, False), (pathutils.JOB_QUEUE_LOCK_FILE, FILE, constants.JOB_QUEUE_FILES_PERMS, getent.masterd_uid, getent.daemons_gid, False), (pathutils.JOB_QUEUE_SERIAL_FILE, FILE, constants.JOB_QUEUE_FILES_PERMS, getent.masterd_uid, getent.daemons_gid, False), (pathutils.JOB_QUEUE_VERSION_FILE, FILE, constants.JOB_QUEUE_FILES_PERMS, getent.masterd_uid, getent.daemons_gid, False), (pathutils.JOB_QUEUE_ARCHIVE_DIR, DIR, 0750, getent.masterd_uid, getent.daemons_gid), (rapi_dir, DIR, 0750, getent.rapi_uid, getent.masterd_gid), (pathutils.RAPI_USERS_FILE, FILE, 0640, getent.rapi_uid, getent.masterd_gid, False), (pathutils.RUN_DIR, DIR, 0775, getent.masterd_uid, getent.daemons_gid), (pathutils.SOCKET_DIR, DIR, 0770, getent.masterd_uid, getent.daemons_gid), (pathutils.MASTER_SOCKET, FILE, 0660, getent.masterd_uid, getent.daemons_gid, False), (pathutils.QUERY_SOCKET, FILE, 0660, getent.luxid_uid, getent.daemons_gid, False), (pathutils.BDEV_CACHE_DIR, DIR, 0755, getent.noded_uid, getent.masterd_gid), (pathutils.UIDPOOL_LOCKDIR, DIR, 0750, getent.noded_uid, getent.masterd_gid), (pathutils.DISK_LINKS_DIR, DIR, 0755, getent.noded_uid, getent.masterd_gid), (pathutils.CRYPTO_KEYS_DIR, DIR, 0700, getent.noded_uid, getent.masterd_gid), (pathutils.IMPORT_EXPORT_DIR, DIR, 0755, getent.noded_uid, getent.masterd_gid), (pathutils.LOG_DIR, DIR, 0770, getent.masterd_uid, getent.daemons_gid), (masterd_log, FILE, 0600, getent.masterd_uid, getent.masterd_gid, False), (confd_log, FILE, 0600, getent.confd_uid, getent.masterd_gid, False), (luxid_log, FILE, 0600, getent.luxid_uid, getent.masterd_gid, False), (noded_log, FILE, 0600, getent.noded_uid, getent.masterd_gid, False), (rapi_log, FILE, 0600, getent.rapi_uid, getent.masterd_gid, False), (mond_log, FILE, 0600, getent.mond_uid, getent.masterd_gid, False), (pathutils.LOG_OS_DIR, DIR, 0750, getent.noded_uid, getent.daemons_gid), (pathutils.LOG_XEN_DIR, DIR, 0750, getent.noded_uid, getent.daemons_gid), (cleaner_log_dir, DIR, 0750, getent.noded_uid, getent.noded_gid), (master_cleaner_log_dir, DIR, 0750, getent.masterd_uid, getent.masterd_gid), (pathutils.INSTANCE_REASON_DIR, DIR, 0755, getent.noded_uid, getent.noded_gid), ]) return paths def ParseOptions(): """Parses the options passed to the program. @return: Options and arguments """ program = os.path.basename(sys.argv[0]) parser = optparse.OptionParser(usage="%prog [--full-run]", prog=program) parser.add_option(cli.DEBUG_OPT) parser.add_option(cli.VERBOSE_OPT) parser.add_option("--full-run", "-f", dest="full_run", action="store_true", default=False, help=("Make a full run and set permissions" " on archived jobs (time consuming)")) return parser.parse_args() def Main(): """Main routine. """ (opts, args) = ParseOptions() utils.SetupToolLogging(opts.debug, opts.verbose) if args: logging.error("No arguments are expected") return constants.EXIT_FAILURE if opts.full_run: logging.info("Running in full mode") getent = runtime.GetEnts() try: for path in GetPaths(): ProcessPath(path) if opts.full_run: RecursiveEnsure(pathutils.JOB_QUEUE_ARCHIVE_DIR, getent.masterd_uid, getent.daemons_gid, 0750, constants.JOB_QUEUE_FILES_PERMS) except errors.GenericError, err: logging.error("An error occurred while setting permissions: %s", err) return constants.EXIT_FAILURE return constants.EXIT_SUCCESS ganeti-2.9.3/lib/tools/node_cleanup.py0000644000000000000000000000667512244641676017740 0ustar00rootroot00000000000000# # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script to configure the node daemon. """ import os import os.path import optparse import sys import logging from ganeti import cli from ganeti import constants from ganeti import pathutils from ganeti import ssconf from ganeti import utils def ParseOptions(): """Parses the options passed to the program. @return: Options and arguments """ parser = optparse.OptionParser(usage="%prog [--no-backup]", prog=os.path.basename(sys.argv[0])) parser.add_option(cli.DEBUG_OPT) parser.add_option(cli.VERBOSE_OPT) parser.add_option(cli.YES_DOIT_OPT) parser.add_option("--no-backup", dest="backup", default=True, action="store_false", help="Whether to create backup copies of deleted files") (opts, args) = parser.parse_args() return VerifyOptions(parser, opts, args) def VerifyOptions(parser, opts, args): """Verifies options and arguments for correctness. """ if args: parser.error("No arguments are expected") return opts def Main(): """Main routine. """ opts = ParseOptions() utils.SetupToolLogging(opts.debug, opts.verbose) try: # List of files to delete. Contains tuples consisting of the absolute path # and a boolean denoting whether a backup copy should be created before # deleting. clean_files = [ (pathutils.CONFD_HMAC_KEY, True), (pathutils.CLUSTER_CONF_FILE, True), (pathutils.CLUSTER_DOMAIN_SECRET_FILE, True), ] clean_files.extend(map(lambda s: (s, True), pathutils.ALL_CERT_FILES)) clean_files.extend(map(lambda s: (s, False), ssconf.SimpleStore().GetFileList())) if not opts.yes_do_it: cli.ToStderr("Cleaning a node is irreversible. If you really want to" " clean this node, supply the --yes-do-it option.") return constants.EXIT_FAILURE logging.info("Stopping daemons") result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-all"], interactive=True) if result.failed: raise Exception("Could not stop daemons, command '%s' failed: %s" % (result.cmd, result.fail_reason)) for (filename, backup) in clean_files: if os.path.exists(filename): if opts.backup and backup: logging.info("Backing up %s", filename) utils.CreateBackup(filename) logging.info("Removing %s", filename) utils.RemoveFile(filename) logging.info("Node successfully cleaned") except Exception, err: # pylint: disable=W0703 logging.debug("Caught unhandled exception", exc_info=True) (retcode, message) = cli.FormatError(err) logging.error(message) return retcode else: return constants.EXIT_SUCCESS ganeti-2.9.3/lib/tools/__init__.py0000644000000000000000000000141212230001635016777 0ustar00rootroot00000000000000# # # Copyright (C) 2011 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Common tools modules. """ ganeti-2.9.3/lib/tools/prepare_node_join.py0000644000000000000000000001557712244641676020767 0ustar00rootroot00000000000000# # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Script to prepare a node for joining a cluster. """ import os import os.path import optparse import sys import logging import OpenSSL from ganeti import cli from ganeti import constants from ganeti import errors from ganeti import pathutils from ganeti import utils from ganeti import serializer from ganeti import ht from ganeti import ssh from ganeti import ssconf _SSH_KEY_LIST_ITEM = \ ht.TAnd(ht.TIsLength(3), ht.TItems([ ht.TElemOf(constants.SSHK_ALL), ht.Comment("public")(ht.TNonEmptyString), ht.Comment("private")(ht.TNonEmptyString), ])) _SSH_KEY_LIST = ht.TListOf(_SSH_KEY_LIST_ITEM) _DATA_CHECK = ht.TStrictDict(False, True, { constants.SSHS_CLUSTER_NAME: ht.TNonEmptyString, constants.SSHS_NODE_DAEMON_CERTIFICATE: ht.TNonEmptyString, constants.SSHS_SSH_HOST_KEY: _SSH_KEY_LIST, constants.SSHS_SSH_ROOT_KEY: _SSH_KEY_LIST, }) class JoinError(errors.GenericError): """Local class for reporting errors. """ def ParseOptions(): """Parses the options passed to the program. @return: Options and arguments """ program = os.path.basename(sys.argv[0]) parser = optparse.OptionParser(usage="%prog [--dry-run]", prog=program) parser.add_option(cli.DEBUG_OPT) parser.add_option(cli.VERBOSE_OPT) parser.add_option(cli.DRY_RUN_OPT) (opts, args) = parser.parse_args() return VerifyOptions(parser, opts, args) def VerifyOptions(parser, opts, args): """Verifies options and arguments for correctness. """ if args: parser.error("No arguments are expected") return opts def _VerifyCertificate(cert_pem, _check_fn=utils.CheckNodeCertificate): """Verifies a certificate against the local node daemon certificate. @type cert_pem: string @param cert_pem: Certificate in PEM format (no key) """ try: OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, cert_pem) except OpenSSL.crypto.Error, err: pass else: raise JoinError("No private key may be given") try: cert = \ OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, cert_pem) except Exception, err: raise errors.X509CertError("(stdin)", "Unable to load certificate: %s" % err) _check_fn(cert) def VerifyCertificate(data, _verify_fn=_VerifyCertificate): """Verifies cluster certificate. @type data: dict """ cert = data.get(constants.SSHS_NODE_DAEMON_CERTIFICATE) if cert: _verify_fn(cert) def VerifyClusterName(data, _verify_fn=ssconf.VerifyClusterName): """Verifies cluster name. @type data: dict """ name = data.get(constants.SSHS_CLUSTER_NAME) if name: _verify_fn(name) else: raise JoinError("Cluster name must be specified") def _UpdateKeyFiles(keys, dry_run, keyfiles): """Updates SSH key files. @type keys: sequence of tuple; (string, string, string) @param keys: Keys to write, tuples consist of key type (L{constants.SSHK_ALL}), public and private key @type dry_run: boolean @param dry_run: Whether to perform a dry run @type keyfiles: dict; (string as key, tuple with (string, string) as values) @param keyfiles: Mapping from key types (L{constants.SSHK_ALL}) to file names; value tuples consist of public key filename and private key filename """ assert set(keyfiles) == constants.SSHK_ALL for (kind, private_key, public_key) in keys: (private_file, public_file) = keyfiles[kind] logging.debug("Writing %s ...", private_file) utils.WriteFile(private_file, data=private_key, mode=0600, backup=True, dry_run=dry_run) logging.debug("Writing %s ...", public_file) utils.WriteFile(public_file, data=public_key, mode=0644, backup=True, dry_run=dry_run) def UpdateSshDaemon(data, dry_run, _runcmd_fn=utils.RunCmd, _keyfiles=None): """Updates SSH daemon's keys. Unless C{dry_run} is set, the daemon is restarted at the end. @type data: dict @param data: Input data @type dry_run: boolean @param dry_run: Whether to perform a dry run """ keys = data.get(constants.SSHS_SSH_HOST_KEY) if not keys: return if _keyfiles is None: _keyfiles = constants.SSH_DAEMON_KEYFILES logging.info("Updating SSH daemon key files") _UpdateKeyFiles(keys, dry_run, _keyfiles) if dry_run: logging.info("This is a dry run, not restarting SSH daemon") else: result = _runcmd_fn([pathutils.DAEMON_UTIL, "reload-ssh-keys"], interactive=True) if result.failed: raise JoinError("Could not reload SSH keys, command '%s'" " had exitcode %s and error %s" % (result.cmd, result.exit_code, result.output)) def UpdateSshRoot(data, dry_run, _homedir_fn=None): """Updates root's SSH keys. Root's C{authorized_keys} file is also updated with new public keys. @type data: dict @param data: Input data @type dry_run: boolean @param dry_run: Whether to perform a dry run """ keys = data.get(constants.SSHS_SSH_ROOT_KEY) if not keys: return (auth_keys_file, keyfiles) = \ ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=True, _homedir_fn=_homedir_fn) _UpdateKeyFiles(keys, dry_run, keyfiles) if dry_run: logging.info("This is a dry run, not modifying %s", auth_keys_file) else: for (_, _, public_key) in keys: utils.AddAuthorizedKey(auth_keys_file, public_key) def LoadData(raw): """Parses and verifies input data. @rtype: dict """ return serializer.LoadAndVerifyJson(raw, _DATA_CHECK) def Main(): """Main routine. """ opts = ParseOptions() utils.SetupToolLogging(opts.debug, opts.verbose) try: data = LoadData(sys.stdin.read()) # Check if input data is correct VerifyClusterName(data) VerifyCertificate(data) # Update SSH files UpdateSshDaemon(data, opts.dry_run) UpdateSshRoot(data, opts.dry_run) logging.info("Setup finished successfully") except Exception, err: # pylint: disable=W0703 logging.debug("Caught unhandled exception", exc_info=True) (retcode, message) = cli.FormatError(err) logging.error(message) return retcode else: return constants.EXIT_SUCCESS ganeti-2.9.3/lib/serializer.py0000644000000000000000000001077212244641676016306 0ustar00rootroot00000000000000# # # Copyright (C) 2007, 2008 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Serializer abstraction module This module introduces a simple abstraction over the serialization backend (currently json). """ # pylint: disable=C0103 # C0103: Invalid name, since pylint doesn't see that Dump points to a # function and not a constant import re # Python 2.6 and above contain a JSON module based on simplejson. Unfortunately # the standard library version is significantly slower than the external # module. While it should be better from at least Python 3.2 on (see Python # issue 7451), for now Ganeti needs to work well with older Python versions # too. import simplejson from ganeti import errors from ganeti import utils _RE_EOLSP = re.compile("[ \t]+$", re.MULTILINE) def DumpJson(data): """Serialize a given object. @param data: the data to serialize @return: the string representation of data """ encoded = simplejson.dumps(data) txt = _RE_EOLSP.sub("", encoded) if not txt.endswith("\n"): txt += "\n" return txt def LoadJson(txt): """Unserialize data from a string. @param txt: the json-encoded form @return: the original data """ return simplejson.loads(txt) def DumpSignedJson(data, key, salt=None, key_selector=None): """Serialize a given object and authenticate it. @param data: the data to serialize @param key: shared hmac key @param key_selector: name/id that identifies the key (in case there are multiple keys in use, e.g. in a multi-cluster environment) @return: the string representation of data signed by the hmac key """ txt = DumpJson(data) if salt is None: salt = "" signed_dict = { "msg": txt, "salt": salt, } if key_selector: signed_dict["key_selector"] = key_selector else: key_selector = "" signed_dict["hmac"] = utils.Sha1Hmac(key, txt, salt=salt + key_selector) return DumpJson(signed_dict) def LoadSignedJson(txt, key): """Verify that a given message was signed with the given key, and load it. @param txt: json-encoded hmac-signed message @param key: the shared hmac key or a callable taking one argument (the key selector), which returns the hmac key belonging to the key selector. Typical usage is to pass a reference to the get method of a dict. @rtype: tuple of original data, string @return: original data, salt @raises errors.SignatureError: if the message signature doesn't verify """ signed_dict = LoadJson(txt) if not isinstance(signed_dict, dict): raise errors.SignatureError("Invalid external message") try: msg = signed_dict["msg"] salt = signed_dict["salt"] hmac_sign = signed_dict["hmac"] except KeyError: raise errors.SignatureError("Invalid external message") if callable(key): # pylint: disable=E1103 key_selector = signed_dict.get("key_selector", None) hmac_key = key(key_selector) if not hmac_key: raise errors.SignatureError("No key with key selector '%s' found" % key_selector) else: key_selector = "" hmac_key = key if not utils.VerifySha1Hmac(hmac_key, msg, hmac_sign, salt=salt + key_selector): raise errors.SignatureError("Invalid Signature") return LoadJson(msg), salt def LoadAndVerifyJson(raw, verify_fn): """Parses and verifies JSON data. @type raw: string @param raw: Input data in JSON format @type verify_fn: callable @param verify_fn: Verification function, usually from L{ht} @return: De-serialized data """ try: data = LoadJson(raw) except Exception, err: raise errors.ParseError("Can't parse input data: %s" % err) if not verify_fn(data): raise errors.ParseError("Data does not match expected format: %s" % verify_fn) return data Dump = DumpJson Load = LoadJson DumpSigned = DumpSignedJson LoadSigned = LoadSignedJson ganeti-2.9.3/NEWS0000644000000000000000000035572312271422343013510 0ustar00rootroot00000000000000News ==== Version 2.9.3 ------------- *(Released Mon, 27 Jan 2014)* - Ensure that all the hypervisors exist in the config file (Issue 640) - Correctly recognise the role as master node (Issue 687) - configure: allow detection of Sphinx 1.2+ (Issue 502) - gnt-instance now honors the KVM path correctly (Issue 691) Inherited from the 2.8 branch: - Change the list separator for the usb_devices parameter from comma to space. Commas could not work because they are already the hypervisor option separator (Issue 649) - Add support for blktap2 file-driver (Issue 638) - Add network tag definitions to the haskell codebase (Issue 641) - Fix RAPI network tag handling - Add the network tags to the tags searched by gnt-cluster search-tags - Fix caching bug preventing jobs from being cancelled - Start-master/stop-master was always failing if ConfD was disabled. (Issue 685) Version 2.9.2 ------------- *(Released Fri, 13 Dec 2013)* - use custom KVM path if set for version checking - SingleNotifyPipeCondition: don't share pollers Inherited from the 2.8 branch: - Fixed Luxi daemon socket permissions after master-failover - Improve IP version detection code directly checking for colons rather than passing the family from the cluster object - Fix NODE/NODE_RES locking in LUInstanceCreate by not acquiring NODE_RES locks opportunistically anymore (Issue 622) - Allow link local IPv6 gateways (Issue 624) - Fix error printing (Issue 616) - Fix a bug in InstanceSetParams concerning names: in case no name is passed in disk modifications, keep the old one. If name=none then set disk name to None. - Update build_chroot script to work with the latest hackage packages - Add a packet number limit to "fping" in master-ip-setup (Issue 630) - Fix evacuation out of drained node (Issue 615) - Add default file_driver if missing (Issue 571) - Fix job error message after unclean master shutdown (Issue 618) - Lock group(s) when creating instances (Issue 621) - SetDiskID() before accepting an instance (Issue 633) - Allow the ext template disks to receive arbitrary parameters, both at creation time and while being modified - Xen handle domain shutdown (future proofing cherry-pick) - Refactor reading live data in htools (future proofing cherry-pick) Version 2.9.1 ------------- *(Released Wed, 13 Nov 2013)* - fix bug, that kept nodes offline when readding - when verifying DRBD versions, ignore unavailable nodes - fix bug that made the console unavailable on kvm in split-user setup (issue 608) - DRBD: ensure peers are UpToDate for dual-primary (inherited 2.8.2) Version 2.9.0 ------------- *(Released Tue, 5 Nov 2013)* Incompatible/important changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - hroller now also plans for capacity to move non-redundant instances off any node to be rebooted; the old behavior of completely ignoring any non-redundant instances can be restored by adding the --ignore-non-redundant option. - The cluster option '--no-lvm-storage' was removed in favor of the new option '--enabled-disk-templates'. - On instance creation, disk templates no longer need to be specified with '-t'. The default disk template will be taken from the list of enabled disk templates. - The monitoring daemon is now running as root, in order to be able to collect information only available to root (such as the state of Xen instances). - The ConfD client is now IPv6 compatible. - File and shared file storage is no longer dis/enabled at configure time, but using the option '--enabled-disk-templates' at cluster initialization and modification. - The default directories for file and shared file storage are not anymore specified at configure time, but taken from the cluster's configuration. They can be set at cluster initialization and modification with '--file-storage-dir' and '--shared-file-storage-dir'. - Cluster verification now includes stricter checks regarding the default file and shared file storage directories. It now checks that the directories are explicitely allowed in the 'file-storage-paths' file and that the directories exist on all nodes. - The list of allowed disk templates in the instance policy and the list of cluster-wide enabled disk templates is now checked for consistency on cluster or group modification. On cluster initialization, the ipolicy disk templates are ensured to be a subset of the cluster-wide enabled disk templates. New features ~~~~~~~~~~~~ - DRBD 8.4 support. Depending on the installed DRBD version, Ganeti now uses the correct command syntax. It is possible to use different DRBD versions on different nodes as long as they are compatible to each other. This enables rolling upgrades of DRBD with no downtime. As permanent operation of different DRBD versions within a node group is discouraged, ``gnt-cluster verify`` will emit a warning if it detects such a situation. - New "inst-status-xen" data collector for the monitoring daemon, providing information about the state of the xen instances on the nodes. - New "lv" data collector for the monitoring daemon, collecting data about the logical volumes on the nodes, and pairing them with the name of the instances they belong to. - New "diskstats" data collector, collecting the data from /proc/diskstats and presenting them over the monitoring daemon interface. - The ConfD client is now IPv6 compatible. New dependencies ~~~~~~~~~~~~~~~~ The following new dependencies have been added. Python - ``python-mock`` (http://www.voidspace.org.uk/python/mock/) is now a required for the unit tests (and only used for testing). Haskell - ``hslogger`` (http://software.complete.org/hslogger) is now always required, even if confd is not enabled. Since 2.9.0 rc3 ~~~~~~~~~~~~~~~ - Correctly start/stop luxid during gnt-cluster master-failover (inherited from stable-2.8) - Improved error messsages (inherited from stable-2.8) Version 2.9.0 rc3 ----------------- *(Released Tue, 15 Oct 2013)* The third release candidate in the 2.9 series. Since 2.9.0 rc2: - in implicit configuration upgrade, match ipolicy with enabled disk templates - improved harep documentation (inherited from stable-2.8) Version 2.9.0 rc2 ----------------- *(Released Wed, 9 Oct 2013)* The second release candidate in the 2.9 series. Since 2.9.0 rc1: - Fix bug in cfgupgrade that led to failure when upgrading from 2.8 with at least one DRBD instance. - Fix bug in cfgupgrade that led to an invalid 2.8 configuration after downgrading. Version 2.9.0 rc1 ----------------- *(Released Tue, 1 Oct 2013)* The first release candidate in the 2.9 series. Since 2.9.0 beta1: - various bug fixes - update of the documentation, in particular installation instructions - merging of LD_* constants into DT_* constants - python style changes to be compatible with newer versions of pylint Version 2.9.0 beta1 ------------------- *(Released Thu, 29 Aug 2013)* This was the first beta release of the 2.9 series. All important changes are listed in the latest 2.9 entry. Version 2.8.4 ------------- *(Released Thu, 23 Jan 2014)* - Change the list separator for the usb_devices parameter from comma to space. Commas could not work because they are already the hypervisor option separator (Issue 649) - Add support for blktap2 file-driver (Issue 638) - Add network tag definitions to the haskell codebase (Issue 641) - Fix RAPI network tag handling - Add the network tags to the tags searched by gnt-cluster search-tags - Fix caching bug preventing jobs from being cancelled - Start-master/stop-master was always failing if ConfD was disabled. (Issue 685) Version 2.8.3 ------------- *(Released Thu, 12 Dec 2013)* - Fixed Luxi daemon socket permissions after master-failover - Improve IP version detection code directly checking for colons rather than passing the family from the cluster object - Fix NODE/NODE_RES locking in LUInstanceCreate by not acquiring NODE_RES locks opportunistically anymore (Issue 622) - Allow link local IPv6 gateways (Issue 624) - Fix error printing (Issue 616) - Fix a bug in InstanceSetParams concerning names: in case no name is passed in disk modifications, keep the old one. If name=none then set disk name to None. - Update build_chroot script to work with the latest hackage packages - Add a packet number limit to "fping" in master-ip-setup (Issue 630) - Fix evacuation out of drained node (Issue 615) - Add default file_driver if missing (Issue 571) - Fix job error message after unclean master shutdown (Issue 618) - Lock group(s) when creating instances (Issue 621) - SetDiskID() before accepting an instance (Issue 633) - Allow the ext template disks to receive arbitrary parameters, both at creation time and while being modified - Xen handle domain shutdown (future proofing cherry-pick) - Refactor reading live data in htools (future proofing cherry-pick) Version 2.8.2 ------------- *(Released Thu, 07 Nov 2013)* - DRBD: ensure peers are UpToDate for dual-primary - Improve error message for replace-disks - More dependency checks at configure time - Placate warnings on ganeti.outils_unittest.py Version 2.8.1 ------------- *(Released Thu, 17 Oct 2013)* - Correctly start/stop luxid during gnt-cluster master-failover - Don't attempt IPv6 ssh in case of IPv4 cluster (Issue 595) - Fix path for the job queue serial file - Improved harep man page - Minor documentation improvements Version 2.8.0 ------------- *(Released Mon, 30 Sep 2013)* Incompatible/important changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Instance policy can contain multiple instance specs, as described in the “Constrained instance sizes†section of :doc:`Partitioned Ganeti `. As a consequence, it's not possible to partially change or override instance specs. Bounding specs (min and max) can be specified as a whole using the new option ``--ipolicy-bounds-specs``, while standard specs use the new option ``--ipolicy-std-specs``. - The output of the info command of gnt-cluster, gnt-group, gnt-node, gnt-instance is a valid YAML object. - hail now honors network restrictions when allocating nodes. This led to an update of the IAllocator protocol. See the IAllocator documentation for details. - confd now only answers static configuration request over the network. luxid was extracted, listens on the local LUXI socket and responds to live queries. This allows finer grained permissions if using separate users. New features ~~~~~~~~~~~~ - The :doc:`Remote API ` daemon now supports a command line flag to always require authentication, ``--require-authentication``. It can be specified in ``$sysconfdir/default/ganeti``. - A new cluster attribute 'enabled_disk_templates' is introduced. It will be used to manage the disk templates to be used by instances in the cluster. Initially, it will be set to a list that includes plain, drbd, if they were enabled by specifying a volume group name, and file and sharedfile, if those were enabled at configure time. Additionally, it will include all disk templates that are currently used by instances. The order of disk templates will be based on Ganeti's history of supporting them. In the future, the first entry of the list will be used as a default disk template on instance creation. - ``cfgupgrade`` now supports a ``--downgrade`` option to bring the configuration back to the previous stable version. - Disk templates in group ipolicy can be restored to the default value. - Initial support for diskless instances and virtual clusters in QA. - More QA and unit tests for instance policies. - Every opcode now contains a reason trail (visible through ``gnt-job info``) describing why the opcode itself was executed. - The monitoring daemon is now available. It allows users to query the cluster for obtaining information about the status of the system. The daemon is only responsible for providing the information over the network: the actual data gathering is performed by data collectors (currently, only the DRBD status collector is available). - In order to help developers work on Ganeti, a new script (``devel/build_chroot``) is provided, for building a chroot that contains all the required development libraries and tools for compiling Ganeti on a Debian Squeeze system. - A new tool, ``harep``, for performing self-repair and recreation of instances in Ganeti has been added. - Split queries are enabled for tags, network, exports, cluster info, groups, jobs, nodes. - New command ``show-ispecs-cmd`` for ``gnt-cluster`` and ``gnt-group``. It prints the command line to set the current policies, to ease changing them. - Add the ``vnet_hdr`` HV parameter for KVM, to control whether the tap devices for KVM virtio-net interfaces will get created with VNET_HDR (IFF_VNET_HDR) support. If set to false, it disables offloading on the virtio-net interfaces, which prevents host kernel tainting and log flooding, when dealing with broken or malicious virtio-net drivers. It's set to true by default. - Instance failover now supports a ``--cleanup`` parameter for fixing previous failures. - Support 'viridian' parameter in Xen HVM - Support DSA SSH keys in bootstrap - To simplify the work of packaging frameworks that want to add the needed users and groups in a split-user setup themselves, at build time three files in ``doc/users`` will be generated. The ``groups`` files contains, one per line, the groups to be generated, the ``users`` file contains, one per line, the users to be generated, optionally followed by their primary group, where important. The ``groupmemberships`` file contains, one per line, additional user-group membership relations that need to be established. The syntax of these files will remain stable in all future versions. New dependencies ~~~~~~~~~~~~~~~~ The following new dependencies have been added: For Haskell: - The ``curl`` library is not optional anymore for compiling the Haskell code. - ``snap-server`` library (if monitoring is enabled). For Python: - The minimum Python version needed to run Ganeti is now 2.6. - ``yaml`` library (only for running the QA). Since 2.8.0 rc3 ~~~~~~~~~~~~~~~ - Perform proper cleanup on termination of Haskell daemons - Fix corner-case in handling of remaining retry time Version 2.8.0 rc3 ----------------- *(Released Tue, 17 Sep 2013)* - To simplify the work of packaging frameworks that want to add the needed users and groups in a split-user setup themselves, at build time three files in ``doc/users`` will be generated. The ``groups`` files contains, one per line, the groups to be generated, the ``users`` file contains, one per line, the users to be generated, optionally followed by their primary group, where important. The ``groupmemberships`` file contains, one per line, additional user-group membership relations that need to be established. The syntax of these files will remain stable in all future versions. - Add a default to file-driver when unspecified over RAPI (Issue 571) - Mark the DSA host pubkey as optional, and remove it during config downgrade (Issue 560) - Some documentation fixes Version 2.8.0 rc2 ----------------- *(Released Tue, 27 Aug 2013)* The second release candidate of the 2.8 series. Since 2.8.0. rc1: - Support 'viridian' parameter in Xen HVM (Issue 233) - Include VCS version in ``gnt-cluster version`` - Support DSA SSH keys in bootstrap (Issue 338) - Fix batch creation of instances - Use FQDN to check master node status (Issue 551) - Make the DRBD collector more failure-resilient Version 2.8.0 rc1 ----------------- *(Released Fri, 2 Aug 2013)* The first release candidate of the 2.8 series. Since 2.8.0 beta1: - Fix upgrading/downgrading from 2.7 - Increase maximum RAPI message size - Documentation updates - Split ``confd`` between ``luxid`` and ``confd`` - Merge 2.7 series up to the 2.7.1 release - Allow the ``modify_etc_hosts`` option to be changed - Add better debugging for ``luxid`` queries - Expose bulk parameter for GetJobs in RAPI client - Expose missing ``network`` fields in RAPI - Add some ``cluster verify`` tests - Some unittest fixes - Fix a malfunction in ``hspace``'s tiered allocation - Fix query compatibility between haskell and python implementations - Add the ``vnet_hdr`` HV parameter for KVM - Add ``--cleanup`` to instance failover - Change the connected groups format in ``gnt-network info`` output; it was previously displayed as a raw list by mistake. (Merged from 2.7) Version 2.8.0 beta1 ------------------- *(Released Mon, 24 Jun 2013)* This was the first beta release of the 2.8 series. All important changes are listed in the latest 2.8 entry. Version 2.7.2 ------------- *(Released Thu, 26 Sep 2013)* - Change the connected groups format in ``gnt-network info`` output; it was previously displayed as a raw list by mistake - Check disk template in right dict when copying - Support multi-instance allocs without iallocator - Fix some errors in the documentation - Fix formatting of tuple in an error message Version 2.7.1 ------------- *(Released Thu, 25 Jul 2013)* - Add logrotate functionality in daemon-util - Add logrotate example file - Add missing fields to network queries over rapi - Fix network object timestamps - Add support for querying network timestamps - Fix a typo in the example crontab - Fix a documentation typo Version 2.7.0 ------------- *(Released Thu, 04 Jul 2013)* Incompatible/important changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Instance policies for disk size were documented to be on a per-disk basis, but hail applied them to the sum of all disks. This has been fixed. - ``hbal`` will now exit with status 0 if, during job execution over LUXI, early exit has been requested and all jobs are successful; before, exit status 1 was used, which cannot be differentiated from "job error" case - Compatibility with newer versions of rbd has been fixed - ``gnt-instance batch-create`` has been changed to use the bulk create opcode from Ganeti. This lead to incompatible changes in the format of the JSON file. It's now not a custom dict anymore but a dict compatible with the ``OpInstanceCreate`` opcode. - Parent directories for file storage need to be listed in ``$sysconfdir/ganeti/file-storage-paths`` now. ``cfgupgrade`` will write the file automatically based on old configuration values, but it can not distribute it across all nodes and the file contents should be verified. Use ``gnt-cluster copyfile $sysconfdir/ganeti/file-storage-paths`` once the cluster has been upgraded. The reason for requiring this list of paths now is that before it would have been possible to inject new paths via RPC, allowing files to be created in arbitrary locations. The RPC protocol is protected using SSL/X.509 certificates, but as a design principle Ganeti does not permit arbitrary paths to be passed. - The parsing of the variants file for OSes (see :manpage:`ganeti-os-interface(7)`) has been slightly changed: now empty lines and comment lines (starting with ``#``) are ignored for better readability. - The ``setup-ssh`` tool added in Ganeti 2.2 has been replaced and is no longer available. ``gnt-node add`` now invokes a new tool on the destination node, named ``prepare-node-join``, to configure the SSH daemon. Paramiko is no longer necessary to configure nodes' SSH daemons via ``gnt-node add``. - Draining (``gnt-cluster queue drain``) and un-draining the job queue (``gnt-cluster queue undrain``) now affects all nodes in a cluster and the flag is not reset after a master failover. - Python 2.4 has *not* been tested with this release. Using 2.6 or above is recommended. 2.6 will be mandatory from the 2.8 series. New features ~~~~~~~~~~~~ - New network management functionality to support automatic allocation of IP addresses and managing of network parameters. See :manpage:`gnt-network(8)` for more details. - New external storage backend, to allow managing arbitrary storage systems external to the cluster. See :manpage:`ganeti-extstorage-interface(7)`. - New ``exclusive-storage`` node parameter added, restricted to nodegroup level. When it's set to true, physical disks are assigned in an exclusive fashion to instances, as documented in :doc:`Partitioned Ganeti `. Currently, only instances using the ``plain`` disk template are supported. - The KVM hypervisor has been updated with many new hypervisor parameters, including a generic one for passing arbitrary command line values. See a complete list in :manpage:`gnt-instance(8)`. It is now compatible up to qemu 1.4. - A new tool, called ``mon-collector``, is the stand-alone executor of the data collectors for a monitoring system. As of this version, it just includes the DRBD data collector, that can be executed by calling ``mon-collector`` using the ``drbd`` parameter. See :manpage:`mon-collector(7)`. - A new user option, :pyeval:`rapi.RAPI_ACCESS_READ`, has been added for RAPI users. It allows granting permissions to query for information to a specific user without giving :pyeval:`rapi.RAPI_ACCESS_WRITE` permissions. - A new tool named ``node-cleanup`` has been added. It cleans remains of a cluster from a machine by stopping all daemons, removing certificates and ssconf files. Unless the ``--no-backup`` option is given, copies of the certificates are made. - Instance creations now support the use of opportunistic locking, potentially speeding up the (parallel) creation of multiple instances. This feature is currently only available via the :doc:`RAPI ` interface and when an instance allocator is used. If the ``opportunistic_locking`` parameter is set the opcode will try to acquire as many locks as possible, but will not wait for any locks held by other opcodes. If not enough resources can be found to allocate the instance, the temporary error code :pyeval:`errors.ECODE_TEMP_NORES` is returned. The operation can be retried thereafter, with or without opportunistic locking. - New experimental linux-ha resource scripts. - Restricted-commands support: ganeti can now be asked (via command line or rapi) to perform commands on a node. These are passed via ganeti RPC rather than ssh. This functionality is restricted to commands specified on the ``$sysconfdir/ganeti/restricted-commands`` for security reasons. The file is not copied automatically. Misc changes ~~~~~~~~~~~~ - Diskless instances are now externally mirrored (Issue 237). This for now has only been tested in conjunction with explicit target nodes for migration/failover. - Queries not needing locks or RPC access to the node can now be performed by the confd daemon, making them independent from jobs, and thus faster to execute. This is selectable at configure time. - The functionality for allocating multiple instances at once has been overhauled and is now also available through :doc:`RAPI `. There are no significant changes from version 2.7.0~rc3. Version 2.7.0 rc3 ----------------- *(Released Tue, 25 Jun 2013)* - Fix permissions on the confd query socket (Issue 477) - Fix permissions on the job archive dir (Issue 498) - Fix handling of an internal exception in replace-disks (Issue 472) - Fix gnt-node info handling of shortened names (Issue 497) - Fix gnt-instance grow-disk when wiping is enabled - Documentation improvements, and support for newer pandoc - Fix hspace honoring ipolicy for disks (Issue 484) - Improve handling of the ``kvm_extra`` HV parameter Version 2.7.0 rc2 ----------------- *(Released Fri, 24 May 2013)* - ``devel/upload`` now works when ``/var/run`` on the target nodes is a symlink. - Disks added through ``gnt-instance modify`` or created through ``gnt-instance recreate-disks`` are wiped, if the ``prealloc_wipe_disks`` flag is set. - If wiping newly created disks fails, the disks are removed. Also, partial failures in creating disks through ``gnt-instance modify`` triggers a cleanup of the partially-created disks. - Removing the master IP address doesn't fail if the address has been already removed. - Fix ownership of the OS log dir - Workaround missing SO_PEERCRED constant (Issue 191) Version 2.7.0 rc1 ----------------- *(Released Fri, 3 May 2013)* This was the first release candidate of the 2.7 series. Since beta3: - Fix kvm compatibility with qemu 1.4 (Issue 389) - Documentation updates (admin guide, upgrade notes, install instructions) (Issue 372) - Fix gnt-group list nodes and instances count (Issue 436) - Fix compilation without non-mandatory libraries (Issue 441) - Fix xen-hvm hypervisor forcing nics to type 'ioemu' (Issue 247) - Make confd logging more verbose at INFO level (Issue 435) - Improve "networks" documentation in :manpage:`gnt-instance(8)` - Fix failure path for instance storage type conversion (Issue 229) - Update htools text backend documentation - Improve the renew-crypto section of :manpage:`gnt-cluster(8)` - Disable inter-cluster instance move for file-based instances, because it is dependant on instance export, which is not supported for file-based instances. (Issue 414) - Fix gnt-job crashes on non-ascii characters (Issue 427) - Fix volume group checks on non-vm-capable nodes (Issue 432) Version 2.7.0 beta3 ------------------- *(Released Mon, 22 Apr 2013)* This was the third beta release of the 2.7 series. Since beta2: - Fix hail to verify disk instance policies on a per-disk basis (Issue 418). - Fix data loss on wrong usage of ``gnt-instance move`` - Properly export errors in confd-based job queries - Add ``users-setup`` tool - Fix iallocator protocol to report 0 as a disk size for diskless instances. This avoids hail breaking when a diskless instance is present. - Fix job queue directory permission problem that made confd job queries fail. This requires running an ``ensure-dirs --full-run`` on upgrade for access to archived jobs (Issue 406). - Limit the sizes of networks supported by ``gnt-network`` to something between a ``/16`` and a ``/30`` to prevent memory bloat and crashes. - Fix bugs in instance disk template conversion - Fix GHC 7 compatibility - Fix ``burnin`` install path (Issue 426). - Allow very small disk grows (Issue 347). - Fix a ``ganeti-noded`` memory bloat introduced in 2.5, by making sure that noded doesn't import masterd code (Issue 419). - Make sure the default metavg at cluster init is the same as the vg, if unspecified (Issue 358). - Fix cleanup of partially created disks (part of Issue 416) Version 2.7.0 beta2 ------------------- *(Released Tue, 2 Apr 2013)* This was the second beta release of the 2.7 series. Since beta1: - Networks no longer have a "type" slot, since this information was unused in Ganeti: instead of it tags should be used. - The rapi client now has a ``target_node`` option to MigrateInstance. - Fix early exit return code for hbal (Issue 386). - Fix ``gnt-instance migrate/failover -n`` (Issue 396). - Fix ``rbd showmapped`` output parsing (Issue 312). - Networks are now referenced indexed by UUID, rather than name. This will require running cfgupgrade, from 2.7.0beta1, if networks are in use. - The OS environment now includes network information. - Deleting of a network is now disallowed if any instance nic is using it, to prevent dangling references. - External storage is now documented in man pages. - The exclusive_storage flag can now only be set at nodegroup level. - Hbal can now submit an explicit priority with its jobs. - Many network related locking fixes. - Bump up the required pylint version to 0.25.1. - Fix the ``no_remember`` option in RAPI client. - Many ipolicy related tests, qa, and fixes. - Many documentation improvements and fixes. - Fix building with ``--disable-file-storage``. - Fix ``-q`` option in htools, which was broken if passed more than once. - Some haskell/python interaction improvements and fixes. - Fix iallocator in case of missing LVM storage. - Fix confd config load in case of ``--no-lvm-storage``. - The confd/query functionality is now mentioned in the security documentation. Version 2.7.0 beta1 ------------------- *(Released Wed, 6 Feb 2013)* This was the first beta release of the 2.7 series. All important changes are listed in the latest 2.7 entry. Version 2.6.2 ------------- *(Released Fri, 21 Dec 2012)* Important behaviour change: hbal won't rebalance anymore instances which have the ``auto_balance`` attribute set to false. This was the intention all along, but until now it only skipped those from the N+1 memory reservation (DRBD-specific). A significant number of bug fixes in this release: - Fixed disk adoption interaction with ipolicy checks. - Fixed networking issues when instances are started, stopped or migrated, by forcing the tap device's MAC prefix to "fe" (issue 217). - Fixed the warning in cluster verify for shared storage instances not being redundant. - Fixed removal of storage directory on shared file storage (issue 262). - Fixed validation of LVM volume group name in OpClusterSetParams (``gnt-cluster modify``) (issue 285). - Fixed runtime memory increases (``gnt-instance modify -m``). - Fixed live migration under Xen's ``xl`` mode. - Fixed ``gnt-instance console`` with ``xl``. - Fixed building with newer Haskell compiler/libraries. - Fixed PID file writing in Haskell daemons (confd); this prevents restart issues if confd was launched manually (outside of ``daemon-util``) while another copy of it was running - Fixed a type error when doing live migrations with KVM (issue 297) and the error messages for failing migrations have been improved. - Fixed opcode validation for the out-of-band commands (``gnt-node power``). - Fixed a type error when unsetting OS hypervisor parameters (issue 311); now it's possible to unset all OS-specific hypervisor parameters. - Fixed the ``dry-run`` mode for many operations: verification of results was over-zealous but didn't take into account the ``dry-run`` operation, resulting in "wrong" failures. - Fixed bash completion in ``gnt-job list`` when the job queue has hundreds of entries; especially with older ``bash`` versions, this results in significant CPU usage. And lastly, a few other improvements have been made: - Added option to force master-failover without voting (issue 282). - Clarified error message on lock conflict (issue 287). - Logging of newly submitted jobs has been improved (issue 290). - Hostname checks have been made uniform between instance rename and create (issue 291). - The ``--submit`` option is now supported by ``gnt-debug delay``. - Shutting down the master daemon by sending SIGTERM now stops it from processing jobs waiting for locks; instead, those jobs will be started once again after the master daemon is started the next time (issue 296). - Support for Xen's ``xl`` program has been improved (besides the fixes above). - Reduced logging noise in the Haskell confd daemon (only show one log entry for each config reload, instead of two). - Several man page updates and typo fixes. Version 2.6.1 ------------- *(Released Fri, 12 Oct 2012)* A small bugfix release. Among the bugs fixed: - Fixed double use of ``PRIORITY_OPT`` in ``gnt-node migrate``, that made the command unusable. - Commands that issue many jobs don't fail anymore just because some jobs take so long that other jobs are archived. - Failures during ``gnt-instance reinstall`` are reflected by the exit status. - Issue 190 fixed. Check for DRBD in cluster verify is enabled only when DRBD is enabled. - When ``always_failover`` is set, ``--allow-failover`` is not required in migrate commands anymore. - ``bash_completion`` works even if extglob is disabled. - Fixed bug with locks that made failover for RDB-based instances fail. - Fixed bug in non-mirrored instance allocation that made Ganeti choose a random node instead of one based on the allocator metric. - Support for newer versions of pylint and pep8. - Hail doesn't fail anymore when trying to add an instance of type ``file``, ``sharedfile`` or ``rbd``. - Added new Makefile target to rebuild the whole distribution, so that all files are included. Version 2.6.0 ------------- *(Released Fri, 27 Jul 2012)* .. attention:: The ``LUXI`` protocol has been made more consistent regarding its handling of command arguments. This, however, leads to incompatibility issues with previous versions. Please ensure that you restart Ganeti daemons soon after the upgrade, otherwise most ``LUXI`` calls (job submission, setting/resetting the drain flag, pausing/resuming the watcher, cancelling and archiving jobs, querying the cluster configuration) will fail. New features ~~~~~~~~~~~~ Instance run status +++++++++++++++++++ The current ``admin_up`` field, which used to denote whether an instance should be running or not, has been removed. Instead, ``admin_state`` is introduced, with 3 possible values -- ``up``, ``down`` and ``offline``. The rational behind this is that an instance being “down†can have different meanings: - it could be down during a reboot - it could be temporarily be down for a reinstall - or it could be down because it is deprecated and kept just for its disk The previous Boolean state was making it difficult to do capacity calculations: should Ganeti reserve memory for a down instance? Now, the tri-state field makes it clear: - in ``up`` and ``down`` state, all resources are reserved for the instance, and it can be at any time brought up if it is down - in ``offline`` state, only disk space is reserved for it, but not memory or CPUs The field can have an extra use: since the transition between ``up`` and ``down`` and vice-versus is done via ``gnt-instance start/stop``, but transition between ``offline`` and ``down`` is done via ``gnt-instance modify``, it is possible to given different rights to users. For example, owners of an instance could be allowed to start/stop it, but not transition it out of the offline state. Instance policies and specs +++++++++++++++++++++++++++ In previous Ganeti versions, an instance creation request was not limited on the minimum size and on the maximum size just by the cluster resources. As such, any policy could be implemented only in third-party clients (RAPI clients, or shell wrappers over ``gnt-*`` tools). Furthermore, calculating cluster capacity via ``hspace`` again required external input with regards to instance sizes. In order to improve these workflows and to allow for example better per-node group differentiation, we introduced instance specs, which allow declaring: - minimum instance disk size, disk count, memory size, cpu count - maximum values for the above metrics - and “standard†values (used in ``hspace`` to calculate the standard sized instances) The minimum/maximum values can be also customised at node-group level, for example allowing more powerful hardware to support bigger instance memory sizes. Beside the instance specs, there are a few other settings belonging to the instance policy framework. It is possible now to customise, per cluster and node-group: - the list of allowed disk templates - the maximum ratio of VCPUs per PCPUs (to control CPU oversubscription) - the maximum ratio of instance to spindles (see below for more information) for local storage All these together should allow all tools that talk to Ganeti to know what are the ranges of allowed values for instances and the over-subscription that is allowed. For the VCPU/PCPU ratio, we already have the VCPU configuration from the instance configuration, and the physical CPU configuration from the node. For the spindle ratios however, we didn't track before these values, so new parameters have been added: - a new node parameter ``spindle_count``, defaults to 1, customisable at node group or node level - at new backend parameter (for instances), ``spindle_use`` defaults to 1 Note that spindles in this context doesn't need to mean actual mechanical hard-drives; it's just a relative number for both the node I/O capacity and instance I/O consumption. Instance migration behaviour ++++++++++++++++++++++++++++ While live-migration is in general desirable over failover, it is possible that for some workloads it is actually worse, due to the variable time of the “suspend†phase during live migration. To allow the tools to work consistently over such instances (without having to hard-code instance names), a new backend parameter ``always_failover`` has been added to control the migration/failover behaviour. When set to True, all migration requests for an instance will instead fall-back to failover. Instance memory ballooning ++++++++++++++++++++++++++ Initial support for memory ballooning has been added. The memory for an instance is no longer fixed (backend parameter ``memory``), but instead can vary between minimum and maximum values (backend parameters ``minmem`` and ``maxmem``). Currently we only change an instance's memory when: - live migrating or failing over and instance and the target node doesn't have enough memory - user requests changing the memory via ``gnt-instance modify --runtime-memory`` Instance CPU pinning ++++++++++++++++++++ In order to control the use of specific CPUs by instance, support for controlling CPU pinning has been added for the Xen, HVM and LXC hypervisors. This is controlled by a new hypervisor parameter ``cpu_mask``; details about possible values for this are in the :manpage:`gnt-instance(8)`. Note that use of the most specific (precise VCPU-to-CPU mapping) form will work well only when all nodes in your cluster have the same amount of CPUs. Disk parameters +++++++++++++++ Another area in which Ganeti was not customisable were the parameters used for storage configuration, e.g. how many stripes to use for LVM, DRBD resync configuration, etc. To improve this area, we've added disks parameters, which are customisable at cluster and node group level, and which allow to specify various parameters for disks (DRBD has the most parameters currently), for example: - DRBD resync algorithm and parameters (e.g. speed) - the default VG for meta-data volumes for DRBD - number of stripes for LVM (plain disk template) - the RBD pool These parameters can be modified via ``gnt-cluster modify -D …`` and ``gnt-group modify -D …``, and are used at either instance creation (in case of LVM stripes, for example) or at disk “activation†time (e.g. resync speed). Rados block device support ++++++++++++++++++++++++++ A Rados (http://ceph.com/wiki/Rbd) storage backend has been added, denoted by the ``rbd`` disk template type. This is considered experimental, feedback is welcome. For details on configuring it, see the :doc:`install` document and the :manpage:`gnt-cluster(8)` man page. Master IP setup +++++++++++++++ The existing master IP functionality works well only in simple setups (a single network shared by all nodes); however, if nodes belong to different networks, then the ``/32`` setup and lack of routing information is not enough. To allow the master IP to function well in more complex cases, the system was reworked as follows: - a master IP netmask setting has been added - the master IP activation/turn-down code was moved from the node daemon to a separate script - whether to run the Ganeti-supplied master IP script or a user-supplied on is a ``gnt-cluster init`` setting Details about the location of the standard and custom setup scripts are in the man page :manpage:`gnt-cluster(8)`; for information about the setup script protocol, look at the Ganeti-supplied script. SPICE support +++++++++++++ The `SPICE `_ support has been improved. It is now possible to use TLS-protected connections, and when renewing or changing the cluster certificates (via ``gnt-cluster renew-crypto``, it is now possible to specify spice or spice CA certificates. Also, it is possible to configure a password for SPICE sessions via the hypervisor parameter ``spice_password_file``. There are also new parameters to control the compression and streaming options (e.g. ``spice_image_compression``, ``spice_streaming_video``, etc.). For details, see the man page :manpage:`gnt-instance(8)` and look for the spice parameters. Lastly, it is now possible to see the SPICE connection information via ``gnt-instance console``. OVF converter +++++++++++++ A new tool (``tools/ovfconverter``) has been added that supports conversion between Ganeti and the `Open Virtualization Format `_ (both to and from). This relies on the ``qemu-img`` tool to convert the disk formats, so the actual compatibility with other virtualization solutions depends on it. Confd daemon changes ++++++++++++++++++++ The configuration query daemon (``ganeti-confd``) is now optional, and has been rewritten in Haskell; whether to use the daemon at all, use the Python (default) or the Haskell version is selectable at configure time via the ``--enable-confd`` parameter, which can take one of the ``haskell``, ``python`` or ``no`` values. If not used, disabling the daemon will result in a smaller footprint; for larger systems, we welcome feedback on the Haskell version which might become the default in future versions. If you want to use ``gnt-node list-drbd`` you need to have the Haskell daemon running. The Python version doesn't implement the new call. User interface changes ~~~~~~~~~~~~~~~~~~~~~~ We have replaced the ``--disks`` option of ``gnt-instance replace-disks`` with a more flexible ``--disk`` option, which allows adding and removing disks at arbitrary indices (Issue 188). Furthermore, disk size and mode can be changed upon recreation (via ``gnt-instance recreate-disks``, which accepts the same ``--disk`` option). As many people are used to a ``show`` command, we have added that as an alias to ``info`` on all ``gnt-*`` commands. The ``gnt-instance grow-disk`` command has a new mode in which it can accept the target size of the disk, instead of the delta; this can be more safe since two runs in absolute mode will be idempotent, and sometimes it's also easier to specify the desired size directly. Also the handling of instances with regard to offline secondaries has been improved. Instance operations should not fail because one of it's secondary nodes is offline, even though it's safe to proceed. A new command ``list-drbd`` has been added to the ``gnt-node`` script to support debugging of DRBD issues on nodes. It provides a mapping of DRBD minors to instance name. API changes ~~~~~~~~~~~ RAPI coverage has improved, with (for example) new resources for recreate-disks, node power-cycle, etc. Compatibility ~~~~~~~~~~~~~ There is partial support for ``xl`` in the Xen hypervisor; feedback is welcome. Python 2.7 is better supported, and after Ganeti 2.6 we will investigate whether to still support Python 2.4 or move to Python 2.6 as minimum required version. Support for Fedora has been slightly improved; the provided example init.d script should work better on it and the INSTALL file should document the needed dependencies. Internal changes ~~~~~~~~~~~~~~~~ The deprecated ``QueryLocks`` LUXI request has been removed. Use ``Query(what=QR_LOCK, ...)`` instead. The LUXI requests :pyeval:`luxi.REQ_QUERY_JOBS`, :pyeval:`luxi.REQ_QUERY_INSTANCES`, :pyeval:`luxi.REQ_QUERY_NODES`, :pyeval:`luxi.REQ_QUERY_GROUPS`, :pyeval:`luxi.REQ_QUERY_EXPORTS` and :pyeval:`luxi.REQ_QUERY_TAGS` are deprecated and will be removed in a future version. :pyeval:`luxi.REQ_QUERY` should be used instead. RAPI client: ``CertificateError`` now derives from ``GanetiApiError``. This should make it more easy to handle Ganeti errors. Deprecation warnings due to PyCrypto/paramiko import in ``tools/setup-ssh`` have been silenced, as usually they are safe; please make sure to run an up-to-date paramiko version, if you use this tool. The QA scripts now depend on Python 2.5 or above (the main code base still works with Python 2.4). The configuration file (``config.data``) is now written without indentation for performance reasons; if you want to edit it, it can be re-formatted via ``tools/fmtjson``. A number of bugs has been fixed in the cluster merge tool. ``x509`` certification verification (used in import-export) has been changed to allow the same clock skew as permitted by the cluster verification. This will remove some rare but hard to diagnose errors in import-export. Version 2.6.0 rc4 ----------------- *(Released Thu, 19 Jul 2012)* Very few changes from rc4 to the final release, only bugfixes: - integrated fixes from release 2.5.2 (fix general boot flag for KVM instance, fix CDROM booting for KVM instances) - fixed node group modification of node parameters - fixed issue in LUClusterVerifyGroup with multi-group clusters - fixed generation of bash completion to ensure a stable ordering - fixed a few typos Version 2.6.0 rc3 ----------------- *(Released Fri, 13 Jul 2012)* Third release candidate for 2.6. The following changes were done from rc3 to rc4: - Fixed ``UpgradeConfig`` w.r.t. to disk parameters on disk objects. - Fixed an inconsistency in the LUXI protocol with the provided arguments (NOT backwards compatible) - Fixed a bug with node groups ipolicy where ``min`` was greater than the cluster ``std`` value - Implemented a new ``gnt-node list-drbd`` call to list DRBD minors for easier instance debugging on nodes (requires ``hconfd`` to work) Version 2.6.0 rc2 ----------------- *(Released Tue, 03 Jul 2012)* Second release candidate for 2.6. The following changes were done from rc2 to rc3: - Fixed ``gnt-cluster verify`` regarding ``master-ip-script`` on non master candidates - Fixed a RAPI regression on missing beparams/memory - Fixed redistribution of files on offline nodes - Added possibility to run activate-disks even though secondaries are offline. With this change it relaxes also the strictness on some other commands which use activate disks internally: * ``gnt-instance start|reboot|rename|backup|export`` - Made it possible to remove safely an instance if its secondaries are offline - Made it possible to reinstall even though secondaries are offline Version 2.6.0 rc1 ----------------- *(Released Mon, 25 Jun 2012)* First release candidate for 2.6. The following changes were done from rc1 to rc2: - Fixed bugs with disk parameters and ``rbd`` templates as well as ``instance_os_add`` - Made ``gnt-instance modify`` more consistent regarding new NIC/Disk behaviour. It supports now the modify operation - ``hcheck`` implemented to analyze cluster health and possibility of improving health by rebalance - ``hbal`` has been improved in dealing with split instances Version 2.6.0 beta2 ------------------- *(Released Mon, 11 Jun 2012)* Second beta release of 2.6. The following changes were done from beta2 to rc1: - Fixed ``daemon-util`` with non-root user models - Fixed creation of plain instances with ``--no-wait-for-sync`` - Fix wrong iv_names when running ``cfgupgrade`` - Export more information in RAPI group queries - Fixed bug when changing instance network interfaces - Extended burnin to do NIC changes - query: Added ``<``, ``>``, ``<=``, ``>=`` comparison operators - Changed default for DRBD barriers - Fixed DRBD error reporting for syncer rate - Verify the options on disk parameters And of course various fixes to documentation and improved unittests and QA. Version 2.6.0 beta1 ------------------- *(Released Wed, 23 May 2012)* First beta release of 2.6. The following changes were done from beta1 to beta2: - integrated patch for distributions without ``start-stop-daemon`` - adapted example init.d script to work on Fedora - fixed log handling in Haskell daemons - adapted checks in the watcher for pycurl linked against libnss - add partial support for ``xl`` instead of ``xm`` for Xen - fixed a type issue in cluster verification - fixed ssconf handling in the Haskell code (was breaking confd in IPv6 clusters) Plus integrated fixes from the 2.5 branch: - fixed ``kvm-ifup`` to use ``/bin/bash`` - fixed parallel build failures - KVM live migration when using a custom keymap Version 2.5.2 ------------- *(Released Tue, 24 Jul 2012)* A small bugfix release, with no new features: - fixed bash-isms in kvm-ifup, for compatibility with systems which use a different default shell (e.g. Debian, Ubuntu) - fixed KVM startup and live migration with a custom keymap (fixes Issue 243 and Debian bug #650664) - fixed compatibility with KVM versions that don't support multiple boot devices (fixes Issue 230 and Debian bug #624256) Additionally, a few fixes were done to the build system (fixed parallel build failures) and to the unittests (fixed race condition in test for FileID functions, and the default enable/disable mode for QA test is now customisable). Version 2.5.1 ------------- *(Released Fri, 11 May 2012)* A small bugfix release. The main issues solved are on the topic of compatibility with newer LVM releases: - fixed parsing of ``lv_attr`` field - adapted to new ``vgreduce --removemissing`` behaviour where sometimes the ``--force`` flag is needed Also on the topic of compatibility, ``tools/lvmstrap`` has been changed to accept kernel 3.x too (was hardcoded to 2.6.*). A regression present in 2.5.0 that broke handling (in the gnt-* scripts) of hook results and that also made display of other errors suboptimal was fixed; the code behaves now like 2.4 and earlier. Another change in 2.5, the cleanup of the OS scripts environment, is too aggressive: it removed even the ``PATH`` variable, which requires the OS scripts to *always* need to export it. Since this is a bit too strict, we now export a minimal PATH, the same that we export for hooks. The fix for issue 201 (Preserve bridge MTU in KVM ifup script) was integrated into this release. Finally, a few other miscellaneous changes were done (no new features, just small improvements): - Fix ``gnt-group --help`` display - Fix hardcoded Xen kernel path - Fix grow-disk handling of invalid units - Update synopsis for ``gnt-cluster repair-disk-sizes`` - Accept both PUT and POST in noded (makes future upgrade to 2.6 easier) Version 2.5.0 ------------- *(Released Thu, 12 Apr 2012)* Incompatible/important changes and bugfixes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The default of the ``/2/instances/[instance_name]/rename`` RAPI resource's ``ip_check`` parameter changed from ``True`` to ``False`` to match the underlying LUXI interface. - The ``/2/nodes/[node_name]/evacuate`` RAPI resource was changed to use body parameters, see :doc:`RAPI documentation `. The server does not maintain backwards-compatibility as the underlying operation changed in an incompatible way. The RAPI client can talk to old servers, but it needs to be told so as the return value changed. - When creating file-based instances via RAPI, the ``file_driver`` parameter no longer defaults to ``loop`` and must be specified. - The deprecated ``bridge`` NIC parameter is no longer supported. Use ``link`` instead. - Support for the undocumented and deprecated RAPI instance creation request format version 0 has been dropped. Use version 1, supported since Ganeti 2.1.3 and :doc:`documented `, instead. - Pyparsing 1.4.6 or above is required, see :doc:`installation documentation `. - The "cluster-verify" hooks are now executed per group by the ``OP_CLUSTER_VERIFY_GROUP`` opcode. This maintains the same behavior if you just run ``gnt-cluster verify``, which generates one opcode per group. - The environment as passed to the OS scripts is cleared, and thus no environment variables defined in the node daemon's environment will be inherited by the scripts. - The :doc:`iallocator ` mode ``multi-evacuate`` has been deprecated. - :doc:`New iallocator modes ` have been added to support operations involving multiple node groups. - Offline nodes are ignored when failing over an instance. - Support for KVM version 1.0, which changed the version reporting format from 3 to 2 digits. - TCP/IP ports used by DRBD disks are returned to a pool upon instance removal. - ``Makefile`` is now compatible with Automake 1.11.2 - Includes all bugfixes made in the 2.4 series New features ~~~~~~~~~~~~ - The ganeti-htools project has been merged into the ganeti-core source tree and will be built as part of Ganeti (see :doc:`install-quick`). - Implemented support for :doc:`shared storage `. - Add support for disks larger than 2 TB in ``lvmstrap`` by supporting GPT-style partition tables (requires `parted `_). - Added support for floppy drive and 2nd CD-ROM drive in KVM hypervisor. - Allowed adding tags on instance creation. - Export instance tags to hooks (``INSTANCE_TAGS``, see :doc:`hooks`) - Allow instances to be started in a paused state, enabling the user to see the complete console output on boot using the console. - Added new hypervisor flag to control default reboot behaviour (``reboot_behavior``). - Added support for KVM keymaps (hypervisor parameter ``keymap``). - Improved out-of-band management support: - Added ``gnt-node health`` command reporting the health status of nodes. - Added ``gnt-node power`` command to manage power status of nodes. - Added command for emergency power-off (EPO), ``gnt-cluster epo``. - Instance migration can fall back to failover if instance is not running. - Filters can be used when listing nodes, instances, groups and locks; see :manpage:`ganeti(7)` manpage. - Added post-execution status as variables to :doc:`hooks ` environment. - Instance tags are exported/imported together with the instance. - When given an explicit job ID, ``gnt-job info`` will work for archived jobs. - Jobs can define dependencies on other jobs (not yet supported via RAPI or command line, but used by internal commands and usable via LUXI). - Lock monitor (``gnt-debug locks``) shows jobs waiting for dependencies. - Instance failover is now available as a RAPI resource (``/2/instances/[instance_name]/failover``). - ``gnt-instance info`` defaults to static information if primary node is offline. - Opcodes have a new ``comment`` attribute. - Added basic SPICE support to KVM hypervisor. - ``tools/ganeti-listrunner`` allows passing of arguments to executable. Node group improvements ~~~~~~~~~~~~~~~~~~~~~~~ - ``gnt-cluster verify`` has been modified to check groups separately, thereby improving performance. - Node group support has been added to ``gnt-cluster verify-disks``, which now operates per node group. - Watcher has been changed to work better with node groups. - One process and state file per node group. - Slow watcher in one group doesn't block other group's watcher. - Added new command, ``gnt-group evacuate``, to move all instances in a node group to other groups. - Added ``gnt-instance change-group`` to move an instance to another node group. - ``gnt-cluster command`` and ``gnt-cluster copyfile`` now support per-group operations. - Node groups can be tagged. - Some operations switch from an exclusive to a shared lock as soon as possible. - Instance's primary and secondary nodes' groups are now available as query fields (``pnode.group``, ``pnode.group.uuid``, ``snodes.group`` and ``snodes.group.uuid``). Misc ~~~~ - Numerous updates to documentation and manpages. - :doc:`RAPI ` documentation now has detailed parameter descriptions. - Some opcode/job results are now also documented, see :doc:`RAPI `. - A lockset's internal lock is now also visible in lock monitor. - Log messages from job queue workers now contain information about the opcode they're processing. - ``gnt-instance console`` no longer requires the instance lock. - A short delay when waiting for job changes reduces the number of LUXI requests significantly. - DRBD metadata volumes are overwritten with zeros during disk creation. - Out-of-band commands no longer acquire the cluster lock in exclusive mode. - ``devel/upload`` now uses correct permissions for directories. Version 2.5.0 rc6 ----------------- *(Released Fri, 23 Mar 2012)* This was the sixth release candidate of the 2.5 series. Version 2.5.0 rc5 ----------------- *(Released Mon, 9 Jan 2012)* This was the fifth release candidate of the 2.5 series. Version 2.5.0 rc4 ----------------- *(Released Thu, 27 Oct 2011)* This was the fourth release candidate of the 2.5 series. Version 2.5.0 rc3 ----------------- *(Released Wed, 26 Oct 2011)* This was the third release candidate of the 2.5 series. Version 2.5.0 rc2 ----------------- *(Released Tue, 18 Oct 2011)* This was the second release candidate of the 2.5 series. Version 2.5.0 rc1 ----------------- *(Released Tue, 4 Oct 2011)* This was the first release candidate of the 2.5 series. Version 2.5.0 beta3 ------------------- *(Released Wed, 31 Aug 2011)* This was the third beta release of the 2.5 series. Version 2.5.0 beta2 ------------------- *(Released Mon, 22 Aug 2011)* This was the second beta release of the 2.5 series. Version 2.5.0 beta1 ------------------- *(Released Fri, 12 Aug 2011)* This was the first beta release of the 2.5 series. Version 2.4.5 ------------- *(Released Thu, 27 Oct 2011)* - Fixed bug when parsing command line parameter values ending in backslash - Fixed assertion error after unclean master shutdown - Disable HTTP client pool for RPC, significantly reducing memory usage of master daemon - Fixed queue archive creation with wrong permissions Version 2.4.4 ------------- *(Released Tue, 23 Aug 2011)* Small bug-fixes: - Fixed documentation for importing with ``--src-dir`` option - Fixed a bug in ``ensure-dirs`` with queue/archive permissions - Fixed a parsing issue with DRBD 8.3.11 in the Linux kernel Version 2.4.3 ------------- *(Released Fri, 5 Aug 2011)* Many bug-fixes and a few small features: - Fixed argument order in ``ReserveLV`` and ``ReserveMAC`` which caused issues when you tried to add an instance with two MAC addresses in one request - KVM: fixed per-instance stored UID value - KVM: configure bridged NICs at migration start - KVM: Fix a bug where instance will not start with never KVM versions (>= 0.14) - Added OS search path to ``gnt-cluster info`` - Fixed an issue with ``file_storage_dir`` where you were forced to provide an absolute path, but the documentation states it is a relative path, the documentation was right - Added a new parameter to instance stop/start called ``--no-remember`` that will make the state change to not be remembered - Implemented ``no_remember`` at RAPI level - Improved the documentation - Node evacuation: don't call IAllocator if node is already empty - Fixed bug in DRBD8 replace disks on current nodes - Fixed bug in recreate-disks for DRBD instances - Moved assertion checking locks in ``gnt-instance replace-disks`` causing it to abort with not owning the right locks for some situation - Job queue: Fixed potential race condition when cancelling queued jobs - Fixed off-by-one bug in job serial generation - ``gnt-node volumes``: Fix instance names - Fixed aliases in bash completion - Fixed a bug in reopening log files after being sent a SIGHUP - Added a flag to burnin to allow specifying VCPU count - Bugfixes to non-root Ganeti configuration Version 2.4.2 ------------- *(Released Thu, 12 May 2011)* Many bug-fixes and a few new small features: - Fixed a bug related to log opening failures - Fixed a bug in instance listing with orphan instances - Fixed a bug which prevented resetting the cluster-level node parameter ``oob_program`` to the default - Many fixes related to the ``cluster-merge`` tool - Fixed a race condition in the lock monitor, which caused failures during (at least) creation of many instances in parallel - Improved output for gnt-job info - Removed the quiet flag on some ssh calls which prevented debugging failures - Improved the N+1 failure messages in cluster verify by actually showing the memory values (needed and available) - Increased lock attempt timeouts so that when executing long operations (e.g. DRBD replace-disks) other jobs do not enter 'blocking acquire' too early and thus prevent the use of the 'fair' mechanism - Changed instance query data (``gnt-instance info``) to not acquire locks unless needed, thus allowing its use on locked instance if only static information is asked for - Improved behaviour with filesystems that do not support rename on an opened file - Fixed the behaviour of ``prealloc_wipe_disks`` cluster parameter which kept locks on all nodes during the wipe, which is unneeded - Fixed ``gnt-watcher`` handling of errors during hooks execution - Fixed bug in ``prealloc_wipe_disks`` with small disk sizes (less than 10GiB) which caused the wipe to fail right at the end in some cases - Fixed master IP activation when doing master failover with no-voting - Fixed bug in ``gnt-node add --readd`` which allowed the re-adding of the master node itself - Fixed potential data-loss in under disk full conditions, where Ganeti wouldn't check correctly the return code and would consider partially-written files 'correct' - Fixed bug related to multiple VGs and DRBD disk replacing - Added new disk parameter ``metavg`` that allows placement of the meta device for DRBD in a different volume group - Fixed error handling in the node daemon when the system libc doesn't have major number 6 (i.e. if ``libc.so.6`` is not the actual libc) - Fixed lock release during replace-disks, which kept cluster-wide locks when doing disk replaces with an iallocator script - Added check for missing bridges in cluster verify - Handle EPIPE errors while writing to the terminal better, so that piping the output to e.g. ``less`` doesn't cause a backtrace - Fixed rare case where a ^C during Luxi calls could have been interpreted as server errors, instead of simply terminating - Fixed a race condition in LUGroupAssignNodes (``gnt-group assign-nodes``) - Added a few more parameters to the KVM hypervisor, allowing a second CDROM, custom disk type for CDROMs and a floppy image - Removed redundant message in instance rename when the name is given already as a FQDN - Added option to ``gnt-instance recreate-disks`` to allow creating the disks on new nodes, allowing recreation when the original instance nodes are completely gone - Added option when converting disk templates to DRBD to skip waiting for the resync, in order to make the instance available sooner - Added two new variables to the OS scripts environment (containing the instance's nodes) - Made the root_path and optional parameter for the xen-pvm hypervisor, to allow use of ``pvgrub`` as bootloader - Changed the instance memory modifications to only check out-of-memory conditions on memory increases, and turned the secondary node warnings into errors (they can still be overridden via ``--force``) - Fixed the handling of a corner case when the Python installation gets corrupted (e.g. a bad disk) while ganeti-noded is running and we try to execute a command that doesn't exist - Fixed a bug in ``gnt-instance move`` (LUInstanceMove) when the primary node of the instance returned failures during instance shutdown; this adds the option ``--ignore-consistency`` to gnt-instance move And as usual, various improvements to the error messages, documentation and man pages. Version 2.4.1 ------------- *(Released Wed, 09 Mar 2011)* Emergency bug-fix release. ``tools/cfgupgrade`` was broken and overwrote the RAPI users file if run twice (even with ``--dry-run``). The release fixes that bug (nothing else changed). Version 2.4.0 ------------- *(Released Mon, 07 Mar 2011)* Final 2.4.0 release. Just a few small fixes: - Fixed RAPI node evacuate - Fixed the kvm-ifup script - Fixed internal error handling for special job cases - Updated man page to specify the escaping feature for options Version 2.4.0 rc3 ----------------- *(Released Mon, 28 Feb 2011)* A critical fix for the ``prealloc_wipe_disks`` feature: it is possible that this feature wiped the disks of the wrong instance, leading to loss of data. Other changes: - Fixed title of query field containing instance name - Expanded the glossary in the documentation - Fixed one unittest (internal issue) Version 2.4.0 rc2 ----------------- *(Released Mon, 21 Feb 2011)* A number of bug fixes plus just a couple functionality changes. On the user-visible side, the ``gnt-* list`` command output has changed with respect to "special" field states. The current rc1 style of display can be re-enabled by passing a new ``--verbose`` (``-v``) flag, but in the default output mode special fields states are displayed as follows: - Offline resource: ``*`` - Unavailable/not applicable: ``-`` - Data missing (RPC failure): ``?`` - Unknown field: ``??`` Another user-visible change is the addition of ``--force-join`` to ``gnt-node add``. As for bug fixes: - ``tools/cluster-merge`` has seen many fixes and is now enabled again - Fixed regression in RAPI/instance reinstall where all parameters were required (instead of optional) - Fixed ``gnt-cluster repair-disk-sizes``, was broken since Ganeti 2.2 - Fixed iallocator usage (offline nodes were not considered offline) - Fixed ``gnt-node list`` with respect to non-vm_capable nodes - Fixed hypervisor and OS parameter validation with respect to non-vm_capable nodes - Fixed ``gnt-cluster verify`` with respect to offline nodes (mostly cosmetic) - Fixed ``tools/listrunner`` with respect to agent-based usage Version 2.4.0 rc1 ----------------- *(Released Fri, 4 Feb 2011)* Many changes and fixes since the beta1 release. While there were some internal changes, the code has been mostly stabilised for the RC release. Note: the dumb allocator was removed in this release, as it was not kept up-to-date with the IAllocator protocol changes. It is recommended to use the ``hail`` command from the ganeti-htools package. Note: the 2.4 and up versions of Ganeti are not compatible with the 0.2.x branch of ganeti-htools. You need to upgrade to ganeti-htools-0.3.0 (or later). Regressions fixed from 2.3 ~~~~~~~~~~~~~~~~~~~~~~~~~~ - Fixed the ``gnt-cluster verify-disks`` command - Made ``gnt-cluster verify-disks`` work in parallel (as opposed to serially on nodes) - Fixed disk adoption breakage - Fixed wrong headers in instance listing for field aliases Other bugs fixed ~~~~~~~~~~~~~~~~ - Fixed corner case in KVM handling of NICs - Fixed many cases of wrong handling of non-vm_capable nodes - Fixed a bug where a missing instance symlink was not possible to recreate with any ``gnt-*`` command (now ``gnt-instance activate-disks`` does it) - Fixed the volume group name as reported by ``gnt-cluster verify-disks`` - Increased timeouts for the import-export code, hopefully leading to fewer aborts due network or instance timeouts - Fixed bug in ``gnt-node list-storage`` - Fixed bug where not all daemons were started on cluster initialisation, but only at the first watcher run - Fixed many bugs in the OOB implementation - Fixed watcher behaviour in presence of instances with offline secondaries - Fixed instance list output for instances running on the wrong node - a few fixes to the cluster-merge tool, but it still cannot merge multi-node groups (currently it is not recommended to use this tool) Improvements ~~~~~~~~~~~~ - Improved network configuration for the KVM hypervisor - Added e1000 as a supported NIC for Xen-HVM - Improved the lvmstrap tool to also be able to use partitions, as opposed to full disks - Improved speed of disk wiping (the cluster parameter ``prealloc_wipe_disks``, so that it has a low impact on the total time of instance creations - Added documentation for the OS parameters - Changed ``gnt-instance deactivate-disks`` so that it can work if the hypervisor is not responding - Added display of blacklisted and hidden OS information in ``gnt-cluster info`` - Extended ``gnt-cluster verify`` to also validate hypervisor, backend, NIC and node parameters, which might create problems with currently invalid (but undetected) configuration files, but prevents validation failures when unrelated parameters are modified - Changed cluster initialisation to wait for the master daemon to become available - Expanded the RAPI interface: - Added config redistribution resource - Added activation/deactivation of instance disks - Added export of console information - Implemented log file reopening on SIGHUP, which allows using logrotate(8) for the Ganeti log files - Added a basic OOB helper script as an example Version 2.4.0 beta1 ------------------- *(Released Fri, 14 Jan 2011)* User-visible ~~~~~~~~~~~~ - Fixed timezone issues when formatting timestamps - Added support for node groups, available via ``gnt-group`` and other commands - Added out-of-band framework and management, see :doc:`design document ` - Removed support for roman numbers from ``gnt-node list`` and ``gnt-instance list``. - Allowed modification of master network interface via ``gnt-cluster modify --master-netdev`` - Accept offline secondaries while shutting down instance disks - Added ``blockdev_prefix`` parameter to Xen PVM and HVM hypervisors - Added support for multiple LVM volume groups - Avoid sorting nodes for ``gnt-node list`` if specific nodes are requested - Added commands to list available fields: - ``gnt-node list-fields`` - ``gnt-group list-fields`` - ``gnt-instance list-fields`` - Updated documentation and man pages Integration ~~~~~~~~~~~ - Moved ``rapi_users`` file into separate directory, now named ``.../ganeti/rapi/users``, ``cfgupgrade`` moves the file and creates a symlink - Added new tool for running commands on many machines, ``tools/ganeti-listrunner`` - Implemented more verbose result in ``OpInstanceConsole`` opcode, also improving the ``gnt-instance console`` output - Allowed customisation of disk index separator at ``configure`` time - Export node group allocation policy to :doc:`iallocator ` - Added support for non-partitioned md disks in ``lvmstrap`` - Added script to gracefully power off KVM instances - Split ``utils`` module into smaller parts - Changed query operations to return more detailed information, e.g. whether an information is unavailable due to an offline node. To use this new functionality, the LUXI call ``Query`` must be used. Field information is now stored by the master daemon and can be retrieved using ``QueryFields``. Instances, nodes and groups can also be queried using the new opcodes ``OpQuery`` and ``OpQueryFields`` (not yet exposed via RAPI). The following commands make use of this infrastructure change: - ``gnt-group list`` - ``gnt-group list-fields`` - ``gnt-node list`` - ``gnt-node list-fields`` - ``gnt-instance list`` - ``gnt-instance list-fields`` - ``gnt-debug locks`` Remote API ~~~~~~~~~~ - New RAPI resources (see :doc:`rapi`): - ``/2/modify`` - ``/2/groups`` - ``/2/groups/[group_name]`` - ``/2/groups/[group_name]/assign-nodes`` - ``/2/groups/[group_name]/modify`` - ``/2/groups/[group_name]/rename`` - ``/2/instances/[instance_name]/disk/[disk_index]/grow`` - RAPI changes: - Implemented ``no_install`` for instance creation - Implemented OS parameters for instance reinstallation, allowing use of special settings on reinstallation (e.g. for preserving data) Misc ~~~~ - Added IPv6 support in import/export - Pause DRBD synchronization while wiping disks on instance creation - Updated unittests and QA scripts - Improved network parameters passed to KVM - Converted man pages from docbook to reStructuredText Version 2.3.1 ------------- *(Released Mon, 20 Dec 2010)* Released version 2.3.1~rc1 without any changes. Version 2.3.1 rc1 ----------------- *(Released Wed, 1 Dec 2010)* - impexpd: Disable OpenSSL compression in socat if possible (backport from master, commit e90739d625b, see :doc:`installation guide ` for details) - Changed unittest coverage report to exclude test scripts - Added script to check version format Version 2.3.0 ------------- *(Released Wed, 1 Dec 2010)* Released version 2.3.0~rc1 without any changes. Version 2.3.0 rc1 ----------------- *(Released Fri, 19 Nov 2010)* A number of bugfixes and documentation updates: - Update ganeti-os-interface documentation - Fixed a bug related to duplicate MACs or similar items which should be unique - Fix breakage in OS state modify - Reinstall instance: disallow offline secondaries (fixes bug related to OS changing but reinstall failing) - plus all the other fixes between 2.2.1 and 2.2.2 Version 2.3.0 rc0 ----------------- *(Released Tue, 2 Nov 2010)* - Fixed clearing of the default iallocator using ``gnt-cluster modify`` - Fixed master failover race with watcher - Fixed a bug in ``gnt-node modify`` which could lead to an inconsistent configuration - Accept previously stopped instance for export with instance removal - Simplify and extend the environment variables for instance OS scripts - Added new node flags, ``master_capable`` and ``vm_capable`` - Added optional instance disk wiping prior during allocation. This is a cluster-wide option and can be set/modified using ``gnt-cluster {init,modify} --prealloc-wipe-disks``. - Added IPv6 support, see :doc:`design document ` and :doc:`install-quick` - Added a new watcher option (``--ignore-pause``) - Added option to ignore offline node on instance start/stop (``--ignore-offline``) - Allow overriding OS parameters with ``gnt-instance reinstall`` - Added ability to change node's secondary IP address using ``gnt-node modify`` - Implemented privilege separation for all daemons except ``ganeti-noded``, see ``configure`` options - Complain if an instance's disk is marked faulty in ``gnt-cluster verify`` - Implemented job priorities (see ``ganeti(7)`` manpage) - Ignore failures while shutting down instances during failover from offline node - Exit daemon's bootstrap process only once daemon is ready - Export more information via ``LUInstanceQuery``/remote API - Improved documentation, QA and unittests - RAPI daemon now watches ``rapi_users`` all the time and doesn't need a restart if the file was created or changed - Added LUXI protocol version sent with each request and response, allowing detection of server/client mismatches - Moved the Python scripts among gnt-* and ganeti-* into modules - Moved all code related to setting up SSH to an external script, ``setup-ssh`` - Infrastructure changes for node group support in future versions Version 2.2.2 ------------- *(Released Fri, 19 Nov 2010)* A few small bugs fixed, and some improvements to the build system: - Fix documentation regarding conversion to drbd - Fix validation of parameters in cluster modify (``gnt-cluster modify -B``) - Fix error handling in node modify with multiple changes - Allow remote imports without checked names Version 2.2.1 ------------- *(Released Tue, 19 Oct 2010)* - Disable SSL session ID cache in RPC client Version 2.2.1 rc1 ----------------- *(Released Thu, 14 Oct 2010)* - Fix interaction between Curl/GnuTLS and the Python's HTTP server (thanks Apollon Oikonomopoulos!), finally allowing the use of Curl with GnuTLS - Fix problems with interaction between Curl and Python's HTTP server, resulting in increased speed in many RPC calls - Improve our release script to prevent breakage with older aclocal and Python 2.6 Version 2.2.1 rc0 ----------------- *(Released Thu, 7 Oct 2010)* - Fixed issue 125, replace hardcoded "xenvg" in ``gnt-cluster`` with value retrieved from master - Added support for blacklisted or hidden OS definitions - Added simple lock monitor (accessible via (``gnt-debug locks``) - Added support for -mem-path in KVM hypervisor abstraction layer - Allow overriding instance parameters in tool for inter-cluster instance moves (``tools/move-instance``) - Improved opcode summaries (e.g. in ``gnt-job list``) - Improve consistency of OS listing by sorting it - Documentation updates Version 2.2.0.1 --------------- *(Released Fri, 8 Oct 2010)* - Rebuild with a newer autotools version, to fix python 2.6 compatibility Version 2.2.0 ------------- *(Released Mon, 4 Oct 2010)* - Fixed regression in ``gnt-instance rename`` Version 2.2.0 rc2 ----------------- *(Released Wed, 22 Sep 2010)* - Fixed OS_VARIANT variable for OS scripts - Fixed cluster tag operations via RAPI - Made ``setup-ssh`` exit with non-zero code if an error occurred - Disabled RAPI CA checks in watcher Version 2.2.0 rc1 ----------------- *(Released Mon, 23 Aug 2010)* - Support DRBD versions of the format "a.b.c.d" - Updated manpages - Re-introduce support for usage from multiple threads in RAPI client - Instance renames and modify via RAPI - Work around race condition between processing and archival in job queue - Mark opcodes following failed one as failed, too - Job field ``lock_status`` was removed due to difficulties making it work with the changed job queue in Ganeti 2.2; a better way to monitor locks is expected for a later 2.2.x release - Fixed dry-run behaviour with many commands - Support ``ssh-agent`` again when adding nodes - Many additional bugfixes Version 2.2.0 rc0 ----------------- *(Released Fri, 30 Jul 2010)* Important change: the internal RPC mechanism between Ganeti nodes has changed from using a home-grown http library (based on the Python base libraries) to use the PycURL library. This requires that PycURL is installed on nodes. Please note that on Debian/Ubuntu, PycURL is linked against GnuTLS by default. cURL's support for GnuTLS had known issues before cURL 7.21.0 and we recommend using the latest cURL release or linking against OpenSSL. Most other distributions already link PycURL and cURL against OpenSSL. The command:: python -c 'import pycurl; print pycurl.version' can be used to determine the libraries PycURL and cURL are linked against. Other significant changes: - Rewrote much of the internals of the job queue, in order to achieve better parallelism; this decouples job query operations from the job processing, and it should allow much nicer behaviour of the master daemon under load, and it also has uncovered some long-standing bugs related to the job serialisation (now fixed) - Added a default iallocator setting to the cluster parameters, eliminating the need to always pass nodes or an iallocator for operations that require selection of new node(s) - Added experimental support for the LXC virtualization method - Added support for OS parameters, which allows the installation of instances to pass parameter to OS scripts in order to customise the instance - Added a hypervisor parameter controlling the migration type (live or non-live), since hypervisors have various levels of reliability; this has renamed the 'live' parameter to 'mode' - Added a cluster parameter ``reserved_lvs`` that denotes reserved logical volumes, meaning that cluster verify will ignore them and not flag their presence as errors - The watcher will now reset the error count for failed instances after 8 hours, thus allowing self-healing if the problem that caused the instances to be down/fail to start has cleared in the meantime - Added a cluster parameter ``drbd_usermode_helper`` that makes Ganeti check for, and warn, if the drbd module parameter ``usermode_helper`` is not consistent with the cluster-wide setting; this is needed to make diagnose easier of failed drbd creations - Started adding base IPv6 support, but this is not yet enabled/available for use - Rename operations (cluster, instance) will now return the new name, which is especially useful if a short name was passed in - Added support for instance migration in RAPI - Added a tool to pre-configure nodes for the SSH setup, before joining them to the cluster; this will allow in the future a simplified model for node joining (but not yet fully enabled in 2.2); this needs the paramiko python library - Fixed handling of name-resolving errors - Fixed consistency of job results on the error path - Fixed master-failover race condition when executed multiple times in sequence - Fixed many bugs related to the job queue (mostly introduced during the 2.2 development cycle, so not all are impacting 2.1) - Fixed instance migration with missing disk symlinks - Fixed handling of unknown jobs in ``gnt-job archive`` - And many other small fixes/improvements Internal changes: - Enhanced both the unittest and the QA coverage - Switched the opcode validation to a generic model, and extended the validation to all opcode parameters - Changed more parts of the code that write shell scripts to use the same class for this - Switched the master daemon to use the asyncore library for the Luxi server endpoint Version 2.2.0 beta0 ------------------- *(Released Thu, 17 Jun 2010)* - Added tool (``move-instance``) and infrastructure to move instances between separate clusters (see :doc:`separate documentation ` and :doc:`design document `) - Added per-request RPC timeout - RAPI now requires a Content-Type header for requests with a body (e.g. ``PUT`` or ``POST``) which must be set to ``application/json`` (see :rfc:`2616` (HTTP/1.1), section 7.2.1) - ``ganeti-watcher`` attempts to restart ``ganeti-rapi`` if RAPI is not reachable - Implemented initial support for running Ganeti daemons as separate users, see configure-time flags ``--with-user-prefix`` and ``--with-group-prefix`` (only ``ganeti-rapi`` is supported at this time) - Instances can be removed after export (``gnt-backup export --remove-instance``) - Self-signed certificates generated by Ganeti now use a 2048 bit RSA key (instead of 1024 bit) - Added new cluster configuration file for cluster domain secret - Import/export now use SSL instead of SSH - Added support for showing estimated time when exporting an instance, see the ``ganeti-os-interface(7)`` manpage and look for ``EXP_SIZE_FD`` Version 2.1.8 ------------- *(Released Tue, 16 Nov 2010)* Some more bugfixes. Unless critical bugs occur, this will be the last 2.1 release: - Fix case of MAC special-values - Fix mac checker regex - backend: Fix typo causing "out of range" error - Add missing --units in gnt-instance list man page Version 2.1.7 ------------- *(Released Tue, 24 Aug 2010)* Bugfixes only: - Don't ignore secondary node silently on non-mirrored disk templates (issue 113) - Fix --master-netdev arg name in gnt-cluster(8) (issue 114) - Fix usb_mouse parameter breaking with vnc_console (issue 109) - Properly document the usb_mouse parameter - Fix path in ganeti-rapi(8) (issue 116) - Adjust error message when the ganeti user's .ssh directory is missing - Add same-node-check when changing the disk template to drbd Version 2.1.6 ------------- *(Released Fri, 16 Jul 2010)* Bugfixes only: - Add an option to only select some reboot types during qa/burnin. (on some hypervisors consequent reboots are not supported) - Fix infrequent race condition in master failover. Sometimes the old master ip address would be still detected as up for a short time after it was removed, causing failover to fail. - Decrease mlockall warnings when the ctypes module is missing. On Python 2.4 we support running even if no ctypes module is installed, but we were too verbose about this issue. - Fix building on old distributions, on which man doesn't have a --warnings option. - Fix RAPI not to ignore the MAC address on instance creation - Implement the old instance creation format in the RAPI client. Version 2.1.5 ------------- *(Released Thu, 01 Jul 2010)* A small bugfix release: - Fix disk adoption: broken by strict --disk option checking in 2.1.4 - Fix batch-create: broken in the whole 2.1 series due to a lookup on a non-existing option - Fix instance create: the --force-variant option was ignored - Improve pylint 0.21 compatibility and warnings with Python 2.6 - Fix modify node storage with non-FQDN arguments - Fix RAPI client to authenticate under Python 2.6 when used for more than 5 requests needing authentication - Fix gnt-instance modify -t (storage) giving a wrong error message when converting a non-shutdown drbd instance to plain Version 2.1.4 ------------- *(Released Fri, 18 Jun 2010)* A small bugfix release: - Fix live migration of KVM instances started with older Ganeti versions which had fewer hypervisor parameters - Fix gnt-instance grow-disk on down instances - Fix an error-reporting bug during instance migration - Better checking of the ``--net`` and ``--disk`` values, to avoid silently ignoring broken ones - Fix an RPC error reporting bug affecting, for example, RAPI client users - Fix bug triggered by different API version os-es on different nodes - Fix a bug in instance startup with custom hvparams: OS level parameters would fail to be applied. - Fix the RAPI client under Python 2.6 (but more work is needed to make it work completely well with OpenSSL) - Fix handling of errors when resolving names from DNS Version 2.1.3 ------------- *(Released Thu, 3 Jun 2010)* A medium sized development cycle. Some new features, and some fixes/small improvements/cleanups. Significant features ~~~~~~~~~~~~~~~~~~~~ The node deamon now tries to mlock itself into memory, unless the ``--no-mlock`` flag is passed. It also doesn't fail if it can't write its logs, and falls back to console logging. This allows emergency features such as ``gnt-node powercycle`` to work even in the event of a broken node disk (tested offlining the disk hosting the node's filesystem and dropping its memory caches; don't try this at home) KVM: add vhost-net acceleration support. It can be tested with a new enough version of the kernel and of qemu-kvm. KVM: Add instance chrooting feature. If you use privilege dropping for your VMs you can also now force them to chroot to an empty directory, before starting the emulated guest. KVM: Add maximum migration bandwith and maximum downtime tweaking support (requires a new-enough version of qemu-kvm). Cluster verify will now warn if the master node doesn't have the master ip configured on it. Add a new (incompatible) instance creation request format to RAPI which supports all parameters (previously only a subset was supported, and it wasn't possible to extend the old format to accomodate all the new features. The old format is still supported, and a client can check for this feature, before using it, by checking for its presence in the ``features`` RAPI resource. Now with ancient latin support. Try it passing the ``--roman`` option to ``gnt-instance info``, ``gnt-cluster info`` or ``gnt-node list`` (requires the python-roman module to be installed, in order to work). Other changes ~~~~~~~~~~~~~ As usual many internal code refactorings, documentation updates, and such. Among others: - Lots of improvements and cleanups to the experimental Remote API (RAPI) client library. - A new unit test suite for the core daemon libraries. - A fix to creating missing directories makes sure the umask is not applied anymore. This enforces the same directory permissions everywhere. - Better handling terminating daemons with ctrl+c (used when running them in debugging mode). - Fix a race condition in live migrating a KVM instance, when stat() on the old proc status file returned EINVAL, which is an unexpected value. - Fixed manpage checking with newer man and utf-8 charachters. But now you need the en_US.UTF-8 locale enabled to build Ganeti from git. Version 2.1.2.1 --------------- *(Released Fri, 7 May 2010)* Fix a bug which prevented untagged KVM instances from starting. Version 2.1.2 ------------- *(Released Fri, 7 May 2010)* Another release with a long development cycle, during which many different features were added. Significant features ~~~~~~~~~~~~~~~~~~~~ The KVM hypervisor now can run the individual instances as non-root, to reduce the impact of a VM being hijacked due to bugs in the hypervisor. It is possible to run all instances as a single (non-root) user, to manually specify a user for each instance, or to dynamically allocate a user out of a cluster-wide pool to each instance, with the guarantee that no two instances will run under the same user ID on any given node. An experimental RAPI client library, that can be used standalone (without the other Ganeti libraries), is provided in the source tree as ``lib/rapi/client.py``. Note this client might change its interface in the future, as we iterate on its capabilities. A new command, ``gnt-cluster renew-crypto`` has been added to easily replace the cluster's certificates and crypto keys. This might help in case they have been compromised, or have simply expired. A new disk option for instance creation has been added that allows one to "adopt" currently existing logical volumes, with data preservation. This should allow easier migration to Ganeti from unmanaged (or managed via other software) instances. Another disk improvement is the possibility to convert between redundant (DRBD) and plain (LVM) disk configuration for an instance. This should allow better scalability (starting with one node and growing the cluster, or shrinking a two-node cluster to one node). A new feature that could help with automated node failovers has been implemented: if a node sees itself as offline (by querying the master candidates), it will try to shutdown (hard) all instances and any active DRBD devices. This reduces the risk of duplicate instances if an external script automatically failovers the instances on such nodes. To enable this, the cluster parameter ``maintain_node_health`` should be enabled; in the future this option (per the name) will enable other automatic maintenance features. Instance export/import now will reuse the original instance specifications for all parameters; that means exporting an instance, deleting it and the importing it back should give an almost identical instance. Note that the default import behaviour has changed from before, where it created only one NIC; now it recreates the original number of NICs. Cluster verify has added a few new checks: SSL certificates validity, /etc/hosts consistency across the cluster, etc. Other changes ~~~~~~~~~~~~~ As usual, many internal changes were done, documentation fixes, etc. Among others: - Fixed cluster initialization with disabled cluster storage (regression introduced in 2.1.1) - File-based storage supports growing the disks - Fixed behaviour of node role changes - Fixed cluster verify for some corner cases, plus a general rewrite of cluster verify to allow future extension with more checks - Fixed log spamming by watcher and node daemon (regression introduced in 2.1.1) - Fixed possible validation issues when changing the list of enabled hypervisors - Fixed cleanup of /etc/hosts during node removal - Fixed RAPI response for invalid methods - Fixed bug with hashed passwords in ``ganeti-rapi`` daemon - Multiple small improvements to the KVM hypervisor (VNC usage, booting from ide disks, etc.) - Allow OS changes without re-installation (to record a changed OS outside of Ganeti, or to allow OS renames) - Allow instance creation without OS installation (useful for example if the OS will be installed manually, or restored from a backup not in Ganeti format) - Implemented option to make cluster ``copyfile`` use the replication network - Added list of enabled hypervisors to ssconf (possibly useful for external scripts) - Added a new tool (``tools/cfgupgrade12``) that allows upgrading from 1.2 clusters - A partial form of node re-IP is possible via node readd, which now allows changed node primary IP - Command line utilities now show an informational message if the job is waiting for a lock - The logs of the master daemon now show the PID/UID/GID of the connected client Version 2.1.1 ------------- *(Released Fri, 12 Mar 2010)* During the 2.1.0 long release candidate cycle, a lot of improvements and changes have accumulated with were released later as 2.1.1. Major changes ~~~~~~~~~~~~~ The node evacuate command (``gnt-node evacuate``) was significantly rewritten, and as such the IAllocator protocol was changed - a new request type has been added. This unfortunate change during a stable series is designed to improve performance of node evacuations; on clusters with more than about five nodes and which are well-balanced, evacuation should proceed in parallel for all instances of the node being evacuated. As such, any existing IAllocator scripts need to be updated, otherwise the above command will fail due to the unknown request. The provided "dumb" allocator has not been updated; but the ganeti-htools package supports the new protocol since version 0.2.4. Another important change is increased validation of node and instance names. This might create problems in special cases, if invalid host names are being used. Also, a new layer of hypervisor parameters has been added, that sits at OS level between the cluster defaults and the instance ones. This allows customisation of virtualization parameters depending on the installed OS. For example instances with OS 'X' may have a different KVM kernel (or any other parameter) than the cluster defaults. This is intended to help managing a multiple OSes on the same cluster, without manual modification of each instance's parameters. A tool for merging clusters, ``cluster-merge``, has been added in the tools sub-directory. Bug fixes ~~~~~~~~~ - Improved the int/float conversions that should make the code more robust in face of errors from the node daemons - Fixed the remove node code in case of internal configuration errors - Fixed the node daemon behaviour in face of inconsistent queue directory (e.g. read-only file-system where we can't open the files read-write, etc.) - Fixed the behaviour of gnt-node modify for master candidate demotion; now it either aborts cleanly or, if given the new "auto_promote" parameter, will automatically promote other nodes as needed - Fixed compatibility with (unreleased yet) Python 2.6.5 that would completely prevent Ganeti from working - Fixed bug for instance export when not all disks were successfully exported - Fixed behaviour of node add when the new node is slow in starting up the node daemon - Fixed handling of signals in the LUXI client, which should improve behaviour of command-line scripts - Added checks for invalid node/instance names in the configuration (now flagged during cluster verify) - Fixed watcher behaviour for disk activation errors - Fixed two potentially endless loops in http library, which led to the RAPI daemon hanging and consuming 100% CPU in some cases - Fixed bug in RAPI daemon related to hashed passwords - Fixed bug for unintended qemu-level bridging of multi-NIC KVM instances - Enhanced compatibility with non-Debian OSes, but not using absolute path in some commands and allowing customisation of the ssh configuration directory - Fixed possible future issue with new Python versions by abiding to the proper use of ``__slots__`` attribute on classes - Added checks that should prevent directory traversal attacks - Many documentation fixes based on feedback from users New features ~~~~~~~~~~~~ - Added an "early_release" more for instance replace disks and node evacuate, where we release locks earlier and thus allow higher parallelism within the cluster - Added watcher hooks, intended to allow the watcher to restart other daemons (e.g. from the ganeti-nbma project), but they can be used of course for any other purpose - Added a compile-time disable for DRBD barriers, to increase performance if the administrator trusts the power supply or the storage system to not lose writes - Added the option of using syslog for logging instead of, or in addition to, Ganeti's own log files - Removed boot restriction for paravirtual NICs for KVM, recent versions can indeed boot from a paravirtual NIC - Added a generic debug level for many operations; while this is not used widely yet, it allows one to pass the debug value all the way to the OS scripts - Enhanced the hooks environment for instance moves (failovers, migrations) where the primary/secondary nodes changed during the operation, by adding {NEW,OLD}_{PRIMARY,SECONDARY} vars - Enhanced data validations for many user-supplied values; one important item is the restrictions imposed on instance and node names, which might reject some (invalid) host names - Add a configure-time option to disable file-based storage, if it's not needed; this allows greater security separation between the master node and the other nodes from the point of view of the inter-node RPC protocol - Added user notification in interactive tools if job is waiting in the job queue or trying to acquire locks - Added log messages when a job is waiting for locks - Added filtering by node tags in instance operations which admit multiple instances (start, stop, reboot, reinstall) - Added a new tool for cluster mergers, ``cluster-merge`` - Parameters from command line which are of the form ``a=b,c=d`` can now use backslash escapes to pass in values which contain commas, e.g. ``a=b\\c,d=e`` where the 'a' parameter would get the value ``b,c`` - For KVM, the instance name is the first parameter passed to KVM, so that it's more visible in the process list Version 2.1.0 ------------- *(Released Tue, 2 Mar 2010)* Ganeti 2.1 brings many improvements with it. Major changes: - Added infrastructure to ease automated disk repairs - Added new daemon to export configuration data in a cheaper way than using the remote API - Instance NICs can now be routed instead of being associated with a networking bridge - Improved job locking logic to reduce impact of jobs acquiring multiple locks waiting for other long-running jobs In-depth implementation details can be found in the Ganeti 2.1 design document. Details ~~~~~~~ - Added chroot hypervisor - Added more options to xen-hvm hypervisor (``kernel_path`` and ``device_model``) - Added more options to xen-pvm hypervisor (``use_bootloader``, ``bootloader_path`` and ``bootloader_args``) - Added the ``use_localtime`` option for the xen-hvm and kvm hypervisors, and the default value for this has changed to false (in 2.0 xen-hvm always enabled it) - Added luxi call to submit multiple jobs in one go - Added cluster initialization option to not modify ``/etc/hosts`` file on nodes - Added network interface parameters - Added dry run mode to some LUs - Added RAPI resources: - ``/2/instances/[instance_name]/info`` - ``/2/instances/[instance_name]/replace-disks`` - ``/2/nodes/[node_name]/evacuate`` - ``/2/nodes/[node_name]/migrate`` - ``/2/nodes/[node_name]/role`` - ``/2/nodes/[node_name]/storage`` - ``/2/nodes/[node_name]/storage/modify`` - ``/2/nodes/[node_name]/storage/repair`` - Added OpCodes to evacuate or migrate all instances on a node - Added new command to list storage elements on nodes (``gnt-node list-storage``) and modify them (``gnt-node modify-storage``) - Added new ssconf files with master candidate IP address (``ssconf_master_candidates_ips``), node primary IP address (``ssconf_node_primary_ips``) and node secondary IP address (``ssconf_node_secondary_ips``) - Added ``ganeti-confd`` and a client library to query the Ganeti configuration via UDP - Added ability to run hooks after cluster initialization and before cluster destruction - Added automatic mode for disk replace (``gnt-instance replace-disks --auto``) - Added ``gnt-instance recreate-disks`` to re-create (empty) disks after catastrophic data-loss - Added ``gnt-node repair-storage`` command to repair damaged LVM volume groups - Added ``gnt-instance move`` command to move instances - Added ``gnt-cluster watcher`` command to control watcher - Added ``gnt-node powercycle`` command to powercycle nodes - Added new job status field ``lock_status`` - Added parseable error codes to cluster verification (``gnt-cluster verify --error-codes``) and made output less verbose (use ``--verbose`` to restore previous behaviour) - Added UUIDs to the main config entities (cluster, nodes, instances) - Added support for OS variants - Added support for hashed passwords in the Ganeti remote API users file (``rapi_users``) - Added option to specify maximum timeout on instance shutdown - Added ``--no-ssh-init`` option to ``gnt-cluster init`` - Added new helper script to start and stop Ganeti daemons (``daemon-util``), with the intent to reduce the work necessary to adjust Ganeti for non-Debian distributions and to start/stop daemons from one place - Added more unittests - Fixed critical bug in ganeti-masterd startup - Removed the configure-time ``kvm-migration-port`` parameter, this is now customisable at the cluster level for both the KVM and Xen hypervisors using the new ``migration_port`` parameter - Pass ``INSTANCE_REINSTALL`` variable to OS installation script when reinstalling an instance - Allowed ``@`` in tag names - Migrated to Sphinx (http://sphinx.pocoo.org/) for documentation - Many documentation updates - Distribute hypervisor files on ``gnt-cluster redist-conf`` - ``gnt-instance reinstall`` can now reinstall multiple instances - Updated many command line parameters - Introduced new OS API version 15 - No longer support a default hypervisor - Treat virtual LVs as inexistent - Improved job locking logic to reduce lock contention - Match instance and node names case insensitively - Reimplemented bash completion script to be more complete - Improved burnin Version 2.0.6 ------------- *(Released Thu, 4 Feb 2010)* - Fix cleaner behaviour on nodes not in a cluster (Debian bug 568105) - Fix a string formatting bug - Improve safety of the code in some error paths - Improve data validation in the master of values returned from nodes Version 2.0.5 ------------- *(Released Thu, 17 Dec 2009)* - Fix security issue due to missing validation of iallocator names; this allows local and remote execution of arbitrary executables - Fix failure of gnt-node list during instance removal - Ship the RAPI documentation in the archive Version 2.0.4 ------------- *(Released Wed, 30 Sep 2009)* - Fixed many wrong messages - Fixed a few bugs related to the locking library - Fixed MAC checking at instance creation time - Fixed a DRBD parsing bug related to gaps in /proc/drbd - Fixed a few issues related to signal handling in both daemons and scripts - Fixed the example startup script provided - Fixed insserv dependencies in the example startup script (patch from Debian) - Fixed handling of drained nodes in the iallocator framework - Fixed handling of KERNEL_PATH parameter for xen-hvm (Debian bug #528618) - Fixed error related to invalid job IDs in job polling - Fixed job/opcode persistence on unclean master shutdown - Fixed handling of partial job processing after unclean master shutdown - Fixed error reporting from LUs, previously all errors were converted into execution errors - Fixed error reporting from burnin - Decreased significantly the memory usage of the job queue - Optimised slightly multi-job submission - Optimised slightly opcode loading - Backported the multi-job submit framework from the development branch; multi-instance start and stop should be faster - Added script to clean archived jobs after 21 days; this will reduce the size of the queue directory - Added some extra checks in disk size tracking - Added an example ethers hook script - Added a cluster parameter that prevents Ganeti from modifying of /etc/hosts - Added more node information to RAPI responses - Added a ``gnt-job watch`` command that allows following the ouput of a job - Added a bind-address option to ganeti-rapi - Added more checks to the configuration verify - Enhanced the burnin script such that some operations can be retried automatically - Converted instance reinstall to multi-instance model Version 2.0.3 ------------- *(Released Fri, 7 Aug 2009)* - Added ``--ignore-size`` to the ``gnt-instance activate-disks`` command to allow using the pre-2.0.2 behaviour in activation, if any existing instances have mismatched disk sizes in the configuration - Added ``gnt-cluster repair-disk-sizes`` command to check and update any configuration mismatches for disk sizes - Added ``gnt-master cluste-failover --no-voting`` to allow master failover to work on two-node clusters - Fixed the ``--net`` option of ``gnt-backup import``, which was unusable - Fixed detection of OS script errors in ``gnt-backup export`` - Fixed exit code of ``gnt-backup export`` Version 2.0.2 ------------- *(Released Fri, 17 Jul 2009)* - Added experimental support for stripped logical volumes; this should enhance performance but comes with a higher complexity in the block device handling; stripping is only enabled when passing ``--with-lvm-stripecount=N`` to ``configure``, but codepaths are affected even in the non-stripped mode - Improved resiliency against transient failures at the end of DRBD resyncs, and in general of DRBD resync checks - Fixed a couple of issues with exports and snapshot errors - Fixed a couple of issues in instance listing - Added display of the disk size in ``gnt-instance info`` - Fixed checking for valid OSes in instance creation - Fixed handling of the "vcpus" parameter in instance listing and in general of invalid parameters - Fixed http server library, and thus RAPI, to handle invalid username/password combinations correctly; this means that now they report unauthorized for queries too, not only for modifications, allowing earlier detect of configuration problems - Added a new "role" node list field, equivalent to the master/master candidate/drained/offline flags combinations - Fixed cluster modify and changes of candidate pool size - Fixed cluster verify error messages for wrong files on regular nodes - Fixed a couple of issues with node demotion from master candidate role - Fixed node readd issues - Added non-interactive mode for ``ganeti-masterd --no-voting`` startup - Added a new ``--no-voting`` option for masterfailover to fix failover on two-nodes clusters when the former master node is unreachable - Added instance reinstall over RAPI Version 2.0.1 ------------- *(Released Tue, 16 Jun 2009)* - added ``-H``/``-B`` startup parameters to ``gnt-instance``, which will allow re-adding the start in single-user option (regression from 1.2) - the watcher writes the instance status to a file, to allow monitoring to report the instance status (from the master) based on cached results of the watcher's queries; while this can get stale if the watcher is being locked due to other work on the cluster, this is still an improvement - the watcher now also restarts the node daemon and the rapi daemon if they died - fixed the watcher to handle full and drained queue cases - hooks export more instance data in the environment, which helps if hook scripts need to take action based on the instance's properties (no longer need to query back into ganeti) - instance failovers when the instance is stopped do not check for free RAM, so that failing over a stopped instance is possible in low memory situations - rapi uses queries for tags instead of jobs (for less job traffic), and for cluster tags it won't talk to masterd at all but read them from ssconf - a couple of error handling fixes in RAPI - drbd handling: improved the error handling of inconsistent disks after resync to reduce the frequency of "there are some degraded disks for this instance" messages - fixed a bug in live migration when DRBD doesn't want to reconnect (the error handling path called a wrong function name) Version 2.0.0 ------------- *(Released Wed, 27 May 2009)* - no changes from rc5 Version 2.0 rc5 --------------- *(Released Wed, 20 May 2009)* - fix a couple of bugs (validation, argument checks) - fix ``gnt-cluster getmaster`` on non-master nodes (regression) - some small improvements to RAPI and IAllocator - make watcher automatically start the master daemon if down Version 2.0 rc4 --------------- *(Released Mon, 27 Apr 2009)* - change the OS list to not require locks; this helps with big clusters - fix ``gnt-cluster verify`` and ``gnt-cluster verify-disks`` when the volume group is broken - ``gnt-instance info``, without any arguments, doesn't run for all instances anymore; either pass ``--all`` or pass the desired instances; this helps against mistakes on big clusters where listing the information for all instances takes a long time - miscellaneous doc and man pages fixes Version 2.0 rc3 --------------- *(Released Wed, 8 Apr 2009)* - Change the internal locking model of some ``gnt-node`` commands, in order to reduce contention (and blocking of master daemon) when batching many creation/reinstall jobs - Fixes to Xen soft reboot - No longer build documentation at build time, instead distribute it in the archive, in order to reduce the need for the whole docbook/rst toolchains Version 2.0 rc2 --------------- *(Released Fri, 27 Mar 2009)* - Now the cfgupgrade scripts works and can upgrade 1.2.7 clusters to 2.0 - Fix watcher startup sequence, improves the behaviour of busy clusters - Some other fixes in ``gnt-cluster verify``, ``gnt-instance replace-disks``, ``gnt-instance add``, ``gnt-cluster queue``, KVM VNC bind address and other places - Some documentation fixes and updates Version 2.0 rc1 --------------- *(Released Mon, 2 Mar 2009)* - More documentation updates, now all docs should be more-or-less up-to-date - A couple of small fixes (mixed hypervisor clusters, offline nodes, etc.) - Added a customizable HV_KERNEL_ARGS hypervisor parameter (for Xen PVM and KVM) - Fix an issue related to $libdir/run/ganeti and cluster creation Version 2.0 beta2 ----------------- *(Released Thu, 19 Feb 2009)* - Xen PVM and KVM have switched the default value for the instance root disk to the first partition on the first drive, instead of the whole drive; this means that the OS installation scripts must be changed accordingly - Man pages have been updated - RAPI has been switched by default to HTTPS, and the exported functions should all work correctly - RAPI v1 has been removed - Many improvements to the KVM hypervisor - Block device errors are now better reported - Many other bugfixes and small improvements Version 2.0 beta1 ----------------- *(Released Mon, 26 Jan 2009)* - Version 2 is a general rewrite of the code and therefore the differences are too many to list, see the design document for 2.0 in the ``doc/`` subdirectory for more details - In this beta version there is not yet a migration path from 1.2 (there will be one in the final 2.0 release) - A few significant changes are: - all commands are executed by a daemon (``ganeti-masterd``) and the various ``gnt-*`` commands are just front-ends to it - all the commands are entered into, and executed from a job queue, see the ``gnt-job(8)`` manpage - the RAPI daemon supports read-write operations, secured by basic HTTP authentication on top of HTTPS - DRBD version 0.7 support has been removed, DRBD 8 is the only supported version (when migrating from Ganeti 1.2 to 2.0, you need to migrate to DRBD 8 first while still running Ganeti 1.2) - DRBD devices are using statically allocated minor numbers, which will be assigned to existing instances during the migration process - there is support for both Xen PVM and Xen HVM instances running on the same cluster - KVM virtualization is supported too - file-based storage has been implemented, which means that it is possible to run the cluster without LVM and DRBD storage, for example using a shared filesystem exported from shared storage (and still have live migration) Version 1.2.7 ------------- *(Released Tue, 13 Jan 2009)* - Change the default reboot type in ``gnt-instance reboot`` to "hard" - Reuse the old instance mac address by default on instance import, if the instance name is the same. - Handle situations in which the node info rpc returns incomplete results (issue 46) - Add checks for tcp/udp ports collisions in ``gnt-cluster verify`` - Improved version of batcher: - state file support - instance mac address support - support for HVM clusters/instances - Add an option to show the number of cpu sockets and nodes in ``gnt-node list`` - Support OSes that handle more than one version of the OS api (but do not change the current API in any other way) - Fix ``gnt-node migrate`` - ``gnt-debug`` man page - Fixes various more typos and small issues - Increase disk resync maximum speed to 60MB/s (from 30MB/s) Version 1.2.6 ------------- *(Released Wed, 24 Sep 2008)* - new ``--hvm-nic-type`` and ``--hvm-disk-type`` flags to control the type of disk exported to fully virtualized instances. - provide access to the serial console of HVM instances - instance auto_balance flag, set by default. If turned off it will avoid warnings on cluster verify if there is not enough memory to fail over an instance. in the future it will prevent automatically failing it over when we will support that. - batcher tool for instance creation, see ``tools/README.batcher`` - ``gnt-instance reinstall --select-os`` to interactively select a new operating system when reinstalling an instance. - when changing the memory amount on instance modify a check has been added that the instance will be able to start. also warnings are emitted if the instance will not be able to fail over, if auto_balance is true. - documentation fixes - sync fields between ``gnt-instance list/modify/add/import`` - fix a race condition in drbd when the sync speed was set after giving the device a remote peer. Version 1.2.5 ------------- *(Released Tue, 22 Jul 2008)* - note: the allowed size and number of tags per object were reduced - fix a bug in ``gnt-cluster verify`` with inconsistent volume groups - fixed twisted 8.x compatibility - fixed ``gnt-instance replace-disks`` with iallocator - add TCP keepalives on twisted connections to detect restarted nodes - disk increase support, see ``gnt-instance grow-disk`` - implement bulk node/instance query for RAPI - add tags in node/instance listing (optional) - experimental migration (and live migration) support, read the man page for ``gnt-instance migrate`` - the ``ganeti-watcher`` logs are now timestamped, and the watcher also has some small improvements in handling its state file Version 1.2.4 ------------- *(Released Fri, 13 Jun 2008)* - Experimental readonly, REST-based remote API implementation; automatically started on master node, TCP port 5080, if enabled by ``--enable-rapi`` parameter to configure script. - Instance allocator support. Add and import instance accept a ``--iallocator`` parameter, and call that instance allocator to decide which node to use for the instance. The iallocator document describes what's expected from an allocator script. - ``gnt-cluster verify`` N+1 memory redundancy checks: Unless passed the ``--no-nplus1-mem`` option ``gnt-cluster verify`` now checks that if a node is lost there is still enough memory to fail over the instances that reside on it. - ``gnt-cluster verify`` hooks: it is now possible to add post-hooks to ``gnt-cluster verify``, to check for site-specific compliance. All the hooks will run, and their output, if any, will be displayed. Any failing hook will make the verification return an error value. - ``gnt-cluster verify`` now checks that its peers are reachable on the primary and secondary interfaces - ``gnt-node add`` now supports the ``--readd`` option, to readd a node that is still declared as part of the cluster and has failed. - ``gnt-* list`` commands now accept a new ``-o +field`` way of specifying output fields, that just adds the chosen fields to the default ones. - ``gnt-backup`` now has a new ``remove`` command to delete an existing export from the filesystem. - New per-instance parameters hvm_acpi, hvm_pae and hvm_cdrom_image_path have been added. Using them you can enable/disable acpi and pae support, and specify a path for a cd image to be exported to the instance. These parameters as the name suggest only work on HVM clusters. - When upgrading an HVM cluster to Ganeti 1.2.4, the values for ACPI and PAE support will be set to the previously hardcoded values, but the (previously hardcoded) path to the CDROM ISO image will be unset and if required, needs to be set manually with ``gnt-instance modify`` after the upgrade. - The address to which an instance's VNC console is bound is now selectable per-instance, rather than being cluster wide. Of course this only applies to instances controlled via VNC, so currently just applies to HVM clusters. Version 1.2.3 ------------- *(Released Mon, 18 Feb 2008)* - more tweaks to the disk activation code (especially helpful for DRBD) - change the default ``gnt-instance list`` output format, now there is one combined status field (see the manpage for the exact values this field will have) - some more fixes for the mac export to hooks change - make Ganeti not break with DRBD 8.2.x (which changed the version format in ``/proc/drbd``) (issue 24) - add an upgrade tool from "remote_raid1" disk template to "drbd" disk template, allowing migration from DRBD0.7+MD to DRBD8 Version 1.2.2 ------------- *(Released Wed, 30 Jan 2008)* - fix ``gnt-instance modify`` breakage introduced in 1.2.1 with the HVM support (issue 23) - add command aliases infrastructure and a few aliases - allow listing of VCPUs in the ``gnt-instance list`` and improve the man pages and the ``--help`` option of ``gnt-node list``/``gnt-instance list`` - fix ``gnt-backup list`` with down nodes (issue 21) - change the tools location (move from $pkgdatadir to $pkglibdir/tools) - fix the dist archive and add a check for including svn/git files in the future - some developer-related changes: improve the burnin and the QA suite, add an upload script for testing during development Version 1.2.1 ------------- *(Released Wed, 16 Jan 2008)* - experimental HVM support, read the install document, section "Initializing the cluster" - allow for the PVM hypervisor per-instance kernel and initrd paths - add a new command ``gnt-cluster verify-disks`` which uses a new algorithm to improve the reconnection of the DRBD pairs if the device on the secondary node has gone away - make logical volume code auto-activate LVs at disk activation time - slightly improve the speed of activating disks - allow specification of the MAC address at instance creation time, and changing it later via ``gnt-instance modify`` - fix handling of external commands that generate lots of output on stderr - update documentation with regard to minimum version of DRBD8 supported Version 1.2.0 ------------- *(Released Tue, 4 Dec 2007)* - Log the ``xm create`` output to the node daemon log on failure (to help diagnosing the error) - In debug mode, log all external commands output if failed to the logs - Change parsing of lvm commands to ignore stderr Version 1.2 beta3 ----------------- *(Released Wed, 28 Nov 2007)* - Another round of updates to the DRBD 8 code to deal with more failures in the replace secondary node operation - Some more logging of failures in disk operations (lvm, drbd) - A few documentation updates - QA updates Version 1.2 beta2 ----------------- *(Released Tue, 13 Nov 2007)* - Change configuration file format from Python's Pickle to JSON. Upgrading is possible using the cfgupgrade utility. - Add support for DRBD 8.0 (new disk template ``drbd``) which allows for faster replace disks and is more stable (DRBD 8 has many improvements compared to DRBD 0.7) - Added command line tags support (see man pages for ``gnt-instance``, ``gnt-node``, ``gnt-cluster``) - Added instance rename support - Added multi-instance startup/shutdown - Added cluster rename support - Added ``gnt-node evacuate`` to simplify some node operations - Added instance reboot operation that can speedup reboot as compared to stop and start - Soften the requirement that hostnames are in FQDN format - The ``ganeti-watcher`` now activates drbd pairs after secondary node reboots - Removed dependency on debian's patched fping that uses the non-standard ``-S`` option - Now the OS definitions are searched for in multiple, configurable paths (easier for distros to package) - Some changes to the hooks infrastructure (especially the new post-configuration update hook) - Other small bugfixes .. vim: set textwidth=72 syntax=rst : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/UPGRADE0000644000000000000000000002530012271422343014004 0ustar00rootroot00000000000000Upgrade notes ============= .. highlight:: shell-example This document details the steps needed to upgrade a cluster to newer versions of Ganeti. As a general rule the node daemons need to be restarted after each software upgrade; if using the provided example init.d script, this means running the following command on all nodes:: $ /etc/init.d/ganeti restart 2.1 and above ------------- Starting with Ganeti 2.0, upgrades between revisions (e.g. 2.1.0 to 2.1.1) should not need manual intervention. As a safety measure, minor releases (e.g. 2.1.3 to 2.2.0) require the ``cfgupgrade`` command for changing the configuration version. Below you find the steps necessary to upgrade between minor releases. To run commands on all nodes, the `distributed shell (dsh) `_ can be used, e.g. ``dsh -M -F 8 -f /var/lib/ganeti/ssconf_online_nodes gnt-cluster --version``. #. Ensure no jobs are running (master node only):: $ gnt-job list #. Pause the watcher for an hour (master node only):: $ gnt-cluster watcher pause 1h #. Stop all daemons on all nodes:: $ /etc/init.d/ganeti stop #. Backup old configuration (master node only):: $ tar czf /var/lib/ganeti-$(date +\%FT\%T).tar.gz -C /var/lib ganeti #. Install new Ganeti version on all nodes #. Run cfgupgrade on the master node:: $ /usr/lib/ganeti/tools/cfgupgrade --verbose --dry-run $ /usr/lib/ganeti/tools/cfgupgrade --verbose (``cfgupgrade`` supports a number of parameters, run it with ``--help`` for more information) #. Upgrade the directory permissions on all nodes:: $ /usr/lib/ganeti/ensure-dirs --full-run #. Create the (missing) required users and make users part of the required groups on all nodes:: $ /usr/lib/ganeti/tools/users-setup This will ask for confirmation. To execute directly, add the ``--yes-do-it`` option. #. Restart daemons on all nodes:: $ /etc/init.d/ganeti restart #. Re-distribute configuration (master node only):: $ gnt-cluster redist-conf #. If you use file storage, check that the ``/etc/ganeti/file-storage-paths`` is correct on all nodes. For security reasons it's not copied automatically, but it can be copied manually via:: $ gnt-cluster copyfile /etc/ganeti/file-storage-paths #. Restart daemons again on all nodes:: $ /etc/init.d/ganeti restart #. Enable the watcher again (master node only):: $ gnt-cluster watcher continue #. Verify cluster (master node only):: $ gnt-cluster verify Reverting an upgrade ~~~~~~~~~~~~~~~~~~~~ For going back between revisions (e.g. 2.1.1 to 2.1.0) no manual intervention is required, as for upgrades. Starting from version 2.8, ``cfgupgrade`` supports ``--downgrade`` option to bring the configuration back to the previous stable version. This is useful if you upgrade Ganeti and after some time you run into problems with the new version. You can downgrade the configuration without losing the changes made since the upgrade. Any feature not supported by the old version will be removed from the configuration, of course, but you get a warning about it. If there is any new feature and you haven't changed from its default value, you don't have to worry about it, as it will get the same value whenever you'll upgrade again. The procedure is similar to upgrading, but please notice that you have to revert the configuration **before** installing the old version. #. Ensure no jobs are running (master node only):: $ gnt-job list #. Pause the watcher for an hour (master node only):: $ gnt-cluster watcher pause 1h #. Stop all daemons on all nodes:: $ /etc/init.d/ganeti stop #. Backup old configuration (master node only):: $ tar czf /var/lib/ganeti-$(date +\%FT\%T).tar.gz -C /var/lib ganeti #. Run cfgupgrade on the master node:: $ /usr/lib/ganeti/tools/cfgupgrade --verbose --downgrade --dry-run $ /usr/lib/ganeti/tools/cfgupgrade --verbose --downgrade You may want to copy all the messages about features that have been removed during the downgrade, in case you want to restore them when upgrading again. #. Install the old Ganeti version on all nodes NB: in Ganeti 2.8, the ``cmdlib.py`` file was split into a series of files contained in the ``cmdlib`` directory. If Ganeti is installed from sources and not from a package, while downgrading Ganeti to a pre-2.8 version it is important to remember to remove the ``cmdlib`` directory from the directory containing the Ganeti python files (which usually is ``${PREFIX}/lib/python${VERSION}/dist-packages/ganeti``). A simpler upgrade/downgrade procedure will be made available in future versions of Ganeti. #. Restart daemons on all nodes:: $ /etc/init.d/ganeti restart #. Re-distribute configuration (master node only):: $ gnt-cluster redist-conf #. Restart daemons again on all nodes:: $ /etc/init.d/ganeti restart #. Enable the watcher again (master node only):: $ gnt-cluster watcher continue #. Verify cluster (master node only):: $ gnt-cluster verify 2.0 releases ------------ 2.0.3 to 2.0.4 ~~~~~~~~~~~~~~ No changes needed except restarting the daemon; but rollback to 2.0.3 might require configuration editing. If you're using Xen-HVM instances, please double-check the network configuration (``nic_type`` parameter) as the defaults might have changed: 2.0.4 adds any missing configuration items and depending on the version of the software the cluster has been installed with, some new keys might have been added. 2.0.1 to 2.0.2/2.0.3 ~~~~~~~~~~~~~~~~~~~~ Between 2.0.1 and 2.0.2 there have been some changes in the handling of block devices, which can cause some issues. 2.0.3 was then released which adds two new options/commands to fix this issue. If you use DRBD-type instances and see problems in instance start or activate-disks with messages from DRBD about "lower device too small" or similar, it is recoomended to: #. Run ``gnt-instance activate-disks --ignore-size $instance`` for each of the affected instances #. Then run ``gnt-cluster repair-disk-sizes`` which will check that instances have the correct disk sizes 1.2 to 2.0 ---------- Prerequisites: - Ganeti 1.2.7 is currently installed - All instances have been migrated from DRBD 0.7 to DRBD 8.x (i.e. no ``remote_raid1`` disk template) - Upgrade to Ganeti 2.0.0~rc2 or later (~rc1 and earlier don't have the needed upgrade tool) In the below steps, replace :file:`/var/lib` with ``$libdir`` if Ganeti was not installed with this prefix (e.g. :file:`/usr/local/var`). Same for :file:`/usr/lib`. Execution (all steps are required in the order given): #. Make a backup of the current configuration, for safety:: $ cp -a /var/lib/ganeti /var/lib/ganeti-1.2.backup #. Stop all instances:: $ gnt-instance stop --all #. Make sure no DRBD device are in use, the following command should show no active minors:: $ gnt-cluster command grep cs: /proc/drbd | grep -v cs:Unconf #. Stop the node daemons and rapi daemon on all nodes (note: should be logged in not via the cluster name, but the master node name, as the command below will remove the cluster ip from the master node):: $ gnt-cluster command /etc/init.d/ganeti stop #. Install the new software on all nodes, either from packaging (if available) or from sources; the master daemon will not start but give error messages about wrong configuration file, which is normal #. Upgrade the configuration file:: $ /usr/lib/ganeti/tools/cfgupgrade12 -v --dry-run $ /usr/lib/ganeti/tools/cfgupgrade12 -v #. Make sure ``ganeti-noded`` is running on all nodes (and start it if not) #. Start the master daemon:: $ ganeti-masterd #. Check that a simple node-list works:: $ gnt-node list #. Redistribute updated configuration to all nodes:: $ gnt-cluster redist-conf $ gnt-cluster copyfile /var/lib/ganeti/known_hosts #. Optional: if needed, install RAPI-specific certificates under :file:`/var/lib/ganeti/rapi.pem` and run:: $ gnt-cluster copyfile /var/lib/ganeti/rapi.pem #. Run a cluster verify, this should show no problems:: $ gnt-cluster verify #. Remove some obsolete files:: $ gnt-cluster command rm /var/lib/ganeti/ssconf_node_pass $ gnt-cluster command rm /var/lib/ganeti/ssconf_hypervisor #. Update the xen pvm (if this was a pvm cluster) setting for 1.2 compatibility:: $ gnt-cluster modify -H xen-pvm:root_path=/dev/sda #. Depending on your setup, you might also want to reset the initrd parameter:: $ gnt-cluster modify -H xen-pvm:initrd_path=/boot/initrd-2.6-xenU #. Reset the instance autobalance setting to default:: $ for i in $(gnt-instance list -o name --no-headers); do \ gnt-instance modify -B auto_balance=default $i; \ done #. Optional: start the RAPI demon:: $ ganeti-rapi #. Restart instances:: $ gnt-instance start --force-multiple --all At this point, ``gnt-cluster verify`` should show no errors and the migration is complete. 1.2 releases ------------ 1.2.4 to any other higher 1.2 version ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ No changes needed. Rollback will usually require manual edit of the configuration file. 1.2.3 to 1.2.4 ~~~~~~~~~~~~~~ No changes needed. Note that going back from 1.2.4 to 1.2.3 will require manual edit of the configuration file (since we added some HVM-related new attributes). 1.2.2 to 1.2.3 ~~~~~~~~~~~~~~ No changes needed. Note that the drbd7-to-8 upgrade tool does a disk format change for the DRBD metadata, so in theory this might be **risky**. It is advised to have (good) backups before doing the upgrade. 1.2.1 to 1.2.2 ~~~~~~~~~~~~~~ No changes needed. 1.2.0 to 1.2.1 ~~~~~~~~~~~~~~ No changes needed. Only some bugfixes and new additions that don't affect existing clusters. 1.2.0 beta 3 to 1.2.0 ~~~~~~~~~~~~~~~~~~~~~ No changes needed. 1.2.0 beta 2 to beta 3 ~~~~~~~~~~~~~~~~~~~~~~ No changes needed. A new version of the debian-etch-instance OS (0.3) has been released, but upgrading it is not required. 1.2.0 beta 1 to beta 2 ~~~~~~~~~~~~~~~~~~~~~~ Beta 2 switched the config file format to JSON. Steps to upgrade: #. Stop the daemons (``/etc/init.d/ganeti stop``) on all nodes #. Disable the cron job (default is :file:`/etc/cron.d/ganeti`) #. Install the new version #. Make a backup copy of the config file #. Upgrade the config file using the following command:: $ /usr/share/ganeti/cfgupgrade --verbose /var/lib/ganeti/config.data #. Start the daemons and run ``gnt-cluster info``, ``gnt-node list`` and ``gnt-instance list`` to check if the upgrade process finished successfully The OS definition also need to be upgraded. There is a new version of the debian-etch-instance OS (0.2) that goes along with beta 2. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/0000755000000000000000000000000012271445544013547 5ustar00rootroot00000000000000ganeti-2.9.3/doc/news.rst0000644000000000000000000035604312271443364015266 0ustar00rootroot00000000000000.. This file is automatically updated at build time from NEWS. .. Do not edit. News ==== Version 2.9.3 ------------- *(Released Mon, 27 Jan 2014)* - Ensure that all the hypervisors exist in the config file (Issue 640) - Correctly recognise the role as master node (Issue 687) - configure: allow detection of Sphinx 1.2+ (Issue 502) - gnt-instance now honors the KVM path correctly (Issue 691) Inherited from the 2.8 branch: - Change the list separator for the usb_devices parameter from comma to space. Commas could not work because they are already the hypervisor option separator (Issue 649) - Add support for blktap2 file-driver (Issue 638) - Add network tag definitions to the haskell codebase (Issue 641) - Fix RAPI network tag handling - Add the network tags to the tags searched by gnt-cluster search-tags - Fix caching bug preventing jobs from being cancelled - Start-master/stop-master was always failing if ConfD was disabled. (Issue 685) Version 2.9.2 ------------- *(Released Fri, 13 Dec 2013)* - use custom KVM path if set for version checking - SingleNotifyPipeCondition: don't share pollers Inherited from the 2.8 branch: - Fixed Luxi daemon socket permissions after master-failover - Improve IP version detection code directly checking for colons rather than passing the family from the cluster object - Fix NODE/NODE_RES locking in LUInstanceCreate by not acquiring NODE_RES locks opportunistically anymore (Issue 622) - Allow link local IPv6 gateways (Issue 624) - Fix error printing (Issue 616) - Fix a bug in InstanceSetParams concerning names: in case no name is passed in disk modifications, keep the old one. If name=none then set disk name to None. - Update build_chroot script to work with the latest hackage packages - Add a packet number limit to "fping" in master-ip-setup (Issue 630) - Fix evacuation out of drained node (Issue 615) - Add default file_driver if missing (Issue 571) - Fix job error message after unclean master shutdown (Issue 618) - Lock group(s) when creating instances (Issue 621) - SetDiskID() before accepting an instance (Issue 633) - Allow the ext template disks to receive arbitrary parameters, both at creation time and while being modified - Xen handle domain shutdown (future proofing cherry-pick) - Refactor reading live data in htools (future proofing cherry-pick) Version 2.9.1 ------------- *(Released Wed, 13 Nov 2013)* - fix bug, that kept nodes offline when readding - when verifying DRBD versions, ignore unavailable nodes - fix bug that made the console unavailable on kvm in split-user setup (issue 608) - DRBD: ensure peers are UpToDate for dual-primary (inherited 2.8.2) Version 2.9.0 ------------- *(Released Tue, 5 Nov 2013)* Incompatible/important changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - hroller now also plans for capacity to move non-redundant instances off any node to be rebooted; the old behavior of completely ignoring any non-redundant instances can be restored by adding the --ignore-non-redundant option. - The cluster option '--no-lvm-storage' was removed in favor of the new option '--enabled-disk-templates'. - On instance creation, disk templates no longer need to be specified with '-t'. The default disk template will be taken from the list of enabled disk templates. - The monitoring daemon is now running as root, in order to be able to collect information only available to root (such as the state of Xen instances). - The ConfD client is now IPv6 compatible. - File and shared file storage is no longer dis/enabled at configure time, but using the option '--enabled-disk-templates' at cluster initialization and modification. - The default directories for file and shared file storage are not anymore specified at configure time, but taken from the cluster's configuration. They can be set at cluster initialization and modification with '--file-storage-dir' and '--shared-file-storage-dir'. - Cluster verification now includes stricter checks regarding the default file and shared file storage directories. It now checks that the directories are explicitely allowed in the 'file-storage-paths' file and that the directories exist on all nodes. - The list of allowed disk templates in the instance policy and the list of cluster-wide enabled disk templates is now checked for consistency on cluster or group modification. On cluster initialization, the ipolicy disk templates are ensured to be a subset of the cluster-wide enabled disk templates. New features ~~~~~~~~~~~~ - DRBD 8.4 support. Depending on the installed DRBD version, Ganeti now uses the correct command syntax. It is possible to use different DRBD versions on different nodes as long as they are compatible to each other. This enables rolling upgrades of DRBD with no downtime. As permanent operation of different DRBD versions within a node group is discouraged, ``gnt-cluster verify`` will emit a warning if it detects such a situation. - New "inst-status-xen" data collector for the monitoring daemon, providing information about the state of the xen instances on the nodes. - New "lv" data collector for the monitoring daemon, collecting data about the logical volumes on the nodes, and pairing them with the name of the instances they belong to. - New "diskstats" data collector, collecting the data from /proc/diskstats and presenting them over the monitoring daemon interface. - The ConfD client is now IPv6 compatible. New dependencies ~~~~~~~~~~~~~~~~ The following new dependencies have been added. Python - ``python-mock`` (http://www.voidspace.org.uk/python/mock/) is now a required for the unit tests (and only used for testing). Haskell - ``hslogger`` (http://software.complete.org/hslogger) is now always required, even if confd is not enabled. Since 2.9.0 rc3 ~~~~~~~~~~~~~~~ - Correctly start/stop luxid during gnt-cluster master-failover (inherited from stable-2.8) - Improved error messsages (inherited from stable-2.8) Version 2.9.0 rc3 ----------------- *(Released Tue, 15 Oct 2013)* The third release candidate in the 2.9 series. Since 2.9.0 rc2: - in implicit configuration upgrade, match ipolicy with enabled disk templates - improved harep documentation (inherited from stable-2.8) Version 2.9.0 rc2 ----------------- *(Released Wed, 9 Oct 2013)* The second release candidate in the 2.9 series. Since 2.9.0 rc1: - Fix bug in cfgupgrade that led to failure when upgrading from 2.8 with at least one DRBD instance. - Fix bug in cfgupgrade that led to an invalid 2.8 configuration after downgrading. Version 2.9.0 rc1 ----------------- *(Released Tue, 1 Oct 2013)* The first release candidate in the 2.9 series. Since 2.9.0 beta1: - various bug fixes - update of the documentation, in particular installation instructions - merging of LD_* constants into DT_* constants - python style changes to be compatible with newer versions of pylint Version 2.9.0 beta1 ------------------- *(Released Thu, 29 Aug 2013)* This was the first beta release of the 2.9 series. All important changes are listed in the latest 2.9 entry. Version 2.8.4 ------------- *(Released Thu, 23 Jan 2014)* - Change the list separator for the usb_devices parameter from comma to space. Commas could not work because they are already the hypervisor option separator (Issue 649) - Add support for blktap2 file-driver (Issue 638) - Add network tag definitions to the haskell codebase (Issue 641) - Fix RAPI network tag handling - Add the network tags to the tags searched by gnt-cluster search-tags - Fix caching bug preventing jobs from being cancelled - Start-master/stop-master was always failing if ConfD was disabled. (Issue 685) Version 2.8.3 ------------- *(Released Thu, 12 Dec 2013)* - Fixed Luxi daemon socket permissions after master-failover - Improve IP version detection code directly checking for colons rather than passing the family from the cluster object - Fix NODE/NODE_RES locking in LUInstanceCreate by not acquiring NODE_RES locks opportunistically anymore (Issue 622) - Allow link local IPv6 gateways (Issue 624) - Fix error printing (Issue 616) - Fix a bug in InstanceSetParams concerning names: in case no name is passed in disk modifications, keep the old one. If name=none then set disk name to None. - Update build_chroot script to work with the latest hackage packages - Add a packet number limit to "fping" in master-ip-setup (Issue 630) - Fix evacuation out of drained node (Issue 615) - Add default file_driver if missing (Issue 571) - Fix job error message after unclean master shutdown (Issue 618) - Lock group(s) when creating instances (Issue 621) - SetDiskID() before accepting an instance (Issue 633) - Allow the ext template disks to receive arbitrary parameters, both at creation time and while being modified - Xen handle domain shutdown (future proofing cherry-pick) - Refactor reading live data in htools (future proofing cherry-pick) Version 2.8.2 ------------- *(Released Thu, 07 Nov 2013)* - DRBD: ensure peers are UpToDate for dual-primary - Improve error message for replace-disks - More dependency checks at configure time - Placate warnings on ganeti.outils_unittest.py Version 2.8.1 ------------- *(Released Thu, 17 Oct 2013)* - Correctly start/stop luxid during gnt-cluster master-failover - Don't attempt IPv6 ssh in case of IPv4 cluster (Issue 595) - Fix path for the job queue serial file - Improved harep man page - Minor documentation improvements Version 2.8.0 ------------- *(Released Mon, 30 Sep 2013)* Incompatible/important changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Instance policy can contain multiple instance specs, as described in the “Constrained instance sizes†section of :doc:`Partitioned Ganeti `. As a consequence, it's not possible to partially change or override instance specs. Bounding specs (min and max) can be specified as a whole using the new option ``--ipolicy-bounds-specs``, while standard specs use the new option ``--ipolicy-std-specs``. - The output of the info command of gnt-cluster, gnt-group, gnt-node, gnt-instance is a valid YAML object. - hail now honors network restrictions when allocating nodes. This led to an update of the IAllocator protocol. See the IAllocator documentation for details. - confd now only answers static configuration request over the network. luxid was extracted, listens on the local LUXI socket and responds to live queries. This allows finer grained permissions if using separate users. New features ~~~~~~~~~~~~ - The :doc:`Remote API ` daemon now supports a command line flag to always require authentication, ``--require-authentication``. It can be specified in ``$sysconfdir/default/ganeti``. - A new cluster attribute 'enabled_disk_templates' is introduced. It will be used to manage the disk templates to be used by instances in the cluster. Initially, it will be set to a list that includes plain, drbd, if they were enabled by specifying a volume group name, and file and sharedfile, if those were enabled at configure time. Additionally, it will include all disk templates that are currently used by instances. The order of disk templates will be based on Ganeti's history of supporting them. In the future, the first entry of the list will be used as a default disk template on instance creation. - ``cfgupgrade`` now supports a ``--downgrade`` option to bring the configuration back to the previous stable version. - Disk templates in group ipolicy can be restored to the default value. - Initial support for diskless instances and virtual clusters in QA. - More QA and unit tests for instance policies. - Every opcode now contains a reason trail (visible through ``gnt-job info``) describing why the opcode itself was executed. - The monitoring daemon is now available. It allows users to query the cluster for obtaining information about the status of the system. The daemon is only responsible for providing the information over the network: the actual data gathering is performed by data collectors (currently, only the DRBD status collector is available). - In order to help developers work on Ganeti, a new script (``devel/build_chroot``) is provided, for building a chroot that contains all the required development libraries and tools for compiling Ganeti on a Debian Squeeze system. - A new tool, ``harep``, for performing self-repair and recreation of instances in Ganeti has been added. - Split queries are enabled for tags, network, exports, cluster info, groups, jobs, nodes. - New command ``show-ispecs-cmd`` for ``gnt-cluster`` and ``gnt-group``. It prints the command line to set the current policies, to ease changing them. - Add the ``vnet_hdr`` HV parameter for KVM, to control whether the tap devices for KVM virtio-net interfaces will get created with VNET_HDR (IFF_VNET_HDR) support. If set to false, it disables offloading on the virtio-net interfaces, which prevents host kernel tainting and log flooding, when dealing with broken or malicious virtio-net drivers. It's set to true by default. - Instance failover now supports a ``--cleanup`` parameter for fixing previous failures. - Support 'viridian' parameter in Xen HVM - Support DSA SSH keys in bootstrap - To simplify the work of packaging frameworks that want to add the needed users and groups in a split-user setup themselves, at build time three files in ``doc/users`` will be generated. The ``groups`` files contains, one per line, the groups to be generated, the ``users`` file contains, one per line, the users to be generated, optionally followed by their primary group, where important. The ``groupmemberships`` file contains, one per line, additional user-group membership relations that need to be established. The syntax of these files will remain stable in all future versions. New dependencies ~~~~~~~~~~~~~~~~ The following new dependencies have been added: For Haskell: - The ``curl`` library is not optional anymore for compiling the Haskell code. - ``snap-server`` library (if monitoring is enabled). For Python: - The minimum Python version needed to run Ganeti is now 2.6. - ``yaml`` library (only for running the QA). Since 2.8.0 rc3 ~~~~~~~~~~~~~~~ - Perform proper cleanup on termination of Haskell daemons - Fix corner-case in handling of remaining retry time Version 2.8.0 rc3 ----------------- *(Released Tue, 17 Sep 2013)* - To simplify the work of packaging frameworks that want to add the needed users and groups in a split-user setup themselves, at build time three files in ``doc/users`` will be generated. The ``groups`` files contains, one per line, the groups to be generated, the ``users`` file contains, one per line, the users to be generated, optionally followed by their primary group, where important. The ``groupmemberships`` file contains, one per line, additional user-group membership relations that need to be established. The syntax of these files will remain stable in all future versions. - Add a default to file-driver when unspecified over RAPI (Issue 571) - Mark the DSA host pubkey as optional, and remove it during config downgrade (Issue 560) - Some documentation fixes Version 2.8.0 rc2 ----------------- *(Released Tue, 27 Aug 2013)* The second release candidate of the 2.8 series. Since 2.8.0. rc1: - Support 'viridian' parameter in Xen HVM (Issue 233) - Include VCS version in ``gnt-cluster version`` - Support DSA SSH keys in bootstrap (Issue 338) - Fix batch creation of instances - Use FQDN to check master node status (Issue 551) - Make the DRBD collector more failure-resilient Version 2.8.0 rc1 ----------------- *(Released Fri, 2 Aug 2013)* The first release candidate of the 2.8 series. Since 2.8.0 beta1: - Fix upgrading/downgrading from 2.7 - Increase maximum RAPI message size - Documentation updates - Split ``confd`` between ``luxid`` and ``confd`` - Merge 2.7 series up to the 2.7.1 release - Allow the ``modify_etc_hosts`` option to be changed - Add better debugging for ``luxid`` queries - Expose bulk parameter for GetJobs in RAPI client - Expose missing ``network`` fields in RAPI - Add some ``cluster verify`` tests - Some unittest fixes - Fix a malfunction in ``hspace``'s tiered allocation - Fix query compatibility between haskell and python implementations - Add the ``vnet_hdr`` HV parameter for KVM - Add ``--cleanup`` to instance failover - Change the connected groups format in ``gnt-network info`` output; it was previously displayed as a raw list by mistake. (Merged from 2.7) Version 2.8.0 beta1 ------------------- *(Released Mon, 24 Jun 2013)* This was the first beta release of the 2.8 series. All important changes are listed in the latest 2.8 entry. Version 2.7.2 ------------- *(Released Thu, 26 Sep 2013)* - Change the connected groups format in ``gnt-network info`` output; it was previously displayed as a raw list by mistake - Check disk template in right dict when copying - Support multi-instance allocs without iallocator - Fix some errors in the documentation - Fix formatting of tuple in an error message Version 2.7.1 ------------- *(Released Thu, 25 Jul 2013)* - Add logrotate functionality in daemon-util - Add logrotate example file - Add missing fields to network queries over rapi - Fix network object timestamps - Add support for querying network timestamps - Fix a typo in the example crontab - Fix a documentation typo Version 2.7.0 ------------- *(Released Thu, 04 Jul 2013)* Incompatible/important changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Instance policies for disk size were documented to be on a per-disk basis, but hail applied them to the sum of all disks. This has been fixed. - ``hbal`` will now exit with status 0 if, during job execution over LUXI, early exit has been requested and all jobs are successful; before, exit status 1 was used, which cannot be differentiated from "job error" case - Compatibility with newer versions of rbd has been fixed - ``gnt-instance batch-create`` has been changed to use the bulk create opcode from Ganeti. This lead to incompatible changes in the format of the JSON file. It's now not a custom dict anymore but a dict compatible with the ``OpInstanceCreate`` opcode. - Parent directories for file storage need to be listed in ``$sysconfdir/ganeti/file-storage-paths`` now. ``cfgupgrade`` will write the file automatically based on old configuration values, but it can not distribute it across all nodes and the file contents should be verified. Use ``gnt-cluster copyfile $sysconfdir/ganeti/file-storage-paths`` once the cluster has been upgraded. The reason for requiring this list of paths now is that before it would have been possible to inject new paths via RPC, allowing files to be created in arbitrary locations. The RPC protocol is protected using SSL/X.509 certificates, but as a design principle Ganeti does not permit arbitrary paths to be passed. - The parsing of the variants file for OSes (see :manpage:`ganeti-os-interface(7)`) has been slightly changed: now empty lines and comment lines (starting with ``#``) are ignored for better readability. - The ``setup-ssh`` tool added in Ganeti 2.2 has been replaced and is no longer available. ``gnt-node add`` now invokes a new tool on the destination node, named ``prepare-node-join``, to configure the SSH daemon. Paramiko is no longer necessary to configure nodes' SSH daemons via ``gnt-node add``. - Draining (``gnt-cluster queue drain``) and un-draining the job queue (``gnt-cluster queue undrain``) now affects all nodes in a cluster and the flag is not reset after a master failover. - Python 2.4 has *not* been tested with this release. Using 2.6 or above is recommended. 2.6 will be mandatory from the 2.8 series. New features ~~~~~~~~~~~~ - New network management functionality to support automatic allocation of IP addresses and managing of network parameters. See :manpage:`gnt-network(8)` for more details. - New external storage backend, to allow managing arbitrary storage systems external to the cluster. See :manpage:`ganeti-extstorage-interface(7)`. - New ``exclusive-storage`` node parameter added, restricted to nodegroup level. When it's set to true, physical disks are assigned in an exclusive fashion to instances, as documented in :doc:`Partitioned Ganeti `. Currently, only instances using the ``plain`` disk template are supported. - The KVM hypervisor has been updated with many new hypervisor parameters, including a generic one for passing arbitrary command line values. See a complete list in :manpage:`gnt-instance(8)`. It is now compatible up to qemu 1.4. - A new tool, called ``mon-collector``, is the stand-alone executor of the data collectors for a monitoring system. As of this version, it just includes the DRBD data collector, that can be executed by calling ``mon-collector`` using the ``drbd`` parameter. See :manpage:`mon-collector(7)`. - A new user option, :pyeval:`rapi.RAPI_ACCESS_READ`, has been added for RAPI users. It allows granting permissions to query for information to a specific user without giving :pyeval:`rapi.RAPI_ACCESS_WRITE` permissions. - A new tool named ``node-cleanup`` has been added. It cleans remains of a cluster from a machine by stopping all daemons, removing certificates and ssconf files. Unless the ``--no-backup`` option is given, copies of the certificates are made. - Instance creations now support the use of opportunistic locking, potentially speeding up the (parallel) creation of multiple instances. This feature is currently only available via the :doc:`RAPI ` interface and when an instance allocator is used. If the ``opportunistic_locking`` parameter is set the opcode will try to acquire as many locks as possible, but will not wait for any locks held by other opcodes. If not enough resources can be found to allocate the instance, the temporary error code :pyeval:`errors.ECODE_TEMP_NORES` is returned. The operation can be retried thereafter, with or without opportunistic locking. - New experimental linux-ha resource scripts. - Restricted-commands support: ganeti can now be asked (via command line or rapi) to perform commands on a node. These are passed via ganeti RPC rather than ssh. This functionality is restricted to commands specified on the ``$sysconfdir/ganeti/restricted-commands`` for security reasons. The file is not copied automatically. Misc changes ~~~~~~~~~~~~ - Diskless instances are now externally mirrored (Issue 237). This for now has only been tested in conjunction with explicit target nodes for migration/failover. - Queries not needing locks or RPC access to the node can now be performed by the confd daemon, making them independent from jobs, and thus faster to execute. This is selectable at configure time. - The functionality for allocating multiple instances at once has been overhauled and is now also available through :doc:`RAPI `. There are no significant changes from version 2.7.0~rc3. Version 2.7.0 rc3 ----------------- *(Released Tue, 25 Jun 2013)* - Fix permissions on the confd query socket (Issue 477) - Fix permissions on the job archive dir (Issue 498) - Fix handling of an internal exception in replace-disks (Issue 472) - Fix gnt-node info handling of shortened names (Issue 497) - Fix gnt-instance grow-disk when wiping is enabled - Documentation improvements, and support for newer pandoc - Fix hspace honoring ipolicy for disks (Issue 484) - Improve handling of the ``kvm_extra`` HV parameter Version 2.7.0 rc2 ----------------- *(Released Fri, 24 May 2013)* - ``devel/upload`` now works when ``/var/run`` on the target nodes is a symlink. - Disks added through ``gnt-instance modify`` or created through ``gnt-instance recreate-disks`` are wiped, if the ``prealloc_wipe_disks`` flag is set. - If wiping newly created disks fails, the disks are removed. Also, partial failures in creating disks through ``gnt-instance modify`` triggers a cleanup of the partially-created disks. - Removing the master IP address doesn't fail if the address has been already removed. - Fix ownership of the OS log dir - Workaround missing SO_PEERCRED constant (Issue 191) Version 2.7.0 rc1 ----------------- *(Released Fri, 3 May 2013)* This was the first release candidate of the 2.7 series. Since beta3: - Fix kvm compatibility with qemu 1.4 (Issue 389) - Documentation updates (admin guide, upgrade notes, install instructions) (Issue 372) - Fix gnt-group list nodes and instances count (Issue 436) - Fix compilation without non-mandatory libraries (Issue 441) - Fix xen-hvm hypervisor forcing nics to type 'ioemu' (Issue 247) - Make confd logging more verbose at INFO level (Issue 435) - Improve "networks" documentation in :manpage:`gnt-instance(8)` - Fix failure path for instance storage type conversion (Issue 229) - Update htools text backend documentation - Improve the renew-crypto section of :manpage:`gnt-cluster(8)` - Disable inter-cluster instance move for file-based instances, because it is dependant on instance export, which is not supported for file-based instances. (Issue 414) - Fix gnt-job crashes on non-ascii characters (Issue 427) - Fix volume group checks on non-vm-capable nodes (Issue 432) Version 2.7.0 beta3 ------------------- *(Released Mon, 22 Apr 2013)* This was the third beta release of the 2.7 series. Since beta2: - Fix hail to verify disk instance policies on a per-disk basis (Issue 418). - Fix data loss on wrong usage of ``gnt-instance move`` - Properly export errors in confd-based job queries - Add ``users-setup`` tool - Fix iallocator protocol to report 0 as a disk size for diskless instances. This avoids hail breaking when a diskless instance is present. - Fix job queue directory permission problem that made confd job queries fail. This requires running an ``ensure-dirs --full-run`` on upgrade for access to archived jobs (Issue 406). - Limit the sizes of networks supported by ``gnt-network`` to something between a ``/16`` and a ``/30`` to prevent memory bloat and crashes. - Fix bugs in instance disk template conversion - Fix GHC 7 compatibility - Fix ``burnin`` install path (Issue 426). - Allow very small disk grows (Issue 347). - Fix a ``ganeti-noded`` memory bloat introduced in 2.5, by making sure that noded doesn't import masterd code (Issue 419). - Make sure the default metavg at cluster init is the same as the vg, if unspecified (Issue 358). - Fix cleanup of partially created disks (part of Issue 416) Version 2.7.0 beta2 ------------------- *(Released Tue, 2 Apr 2013)* This was the second beta release of the 2.7 series. Since beta1: - Networks no longer have a "type" slot, since this information was unused in Ganeti: instead of it tags should be used. - The rapi client now has a ``target_node`` option to MigrateInstance. - Fix early exit return code for hbal (Issue 386). - Fix ``gnt-instance migrate/failover -n`` (Issue 396). - Fix ``rbd showmapped`` output parsing (Issue 312). - Networks are now referenced indexed by UUID, rather than name. This will require running cfgupgrade, from 2.7.0beta1, if networks are in use. - The OS environment now includes network information. - Deleting of a network is now disallowed if any instance nic is using it, to prevent dangling references. - External storage is now documented in man pages. - The exclusive_storage flag can now only be set at nodegroup level. - Hbal can now submit an explicit priority with its jobs. - Many network related locking fixes. - Bump up the required pylint version to 0.25.1. - Fix the ``no_remember`` option in RAPI client. - Many ipolicy related tests, qa, and fixes. - Many documentation improvements and fixes. - Fix building with ``--disable-file-storage``. - Fix ``-q`` option in htools, which was broken if passed more than once. - Some haskell/python interaction improvements and fixes. - Fix iallocator in case of missing LVM storage. - Fix confd config load in case of ``--no-lvm-storage``. - The confd/query functionality is now mentioned in the security documentation. Version 2.7.0 beta1 ------------------- *(Released Wed, 6 Feb 2013)* This was the first beta release of the 2.7 series. All important changes are listed in the latest 2.7 entry. Version 2.6.2 ------------- *(Released Fri, 21 Dec 2012)* Important behaviour change: hbal won't rebalance anymore instances which have the ``auto_balance`` attribute set to false. This was the intention all along, but until now it only skipped those from the N+1 memory reservation (DRBD-specific). A significant number of bug fixes in this release: - Fixed disk adoption interaction with ipolicy checks. - Fixed networking issues when instances are started, stopped or migrated, by forcing the tap device's MAC prefix to "fe" (issue 217). - Fixed the warning in cluster verify for shared storage instances not being redundant. - Fixed removal of storage directory on shared file storage (issue 262). - Fixed validation of LVM volume group name in OpClusterSetParams (``gnt-cluster modify``) (issue 285). - Fixed runtime memory increases (``gnt-instance modify -m``). - Fixed live migration under Xen's ``xl`` mode. - Fixed ``gnt-instance console`` with ``xl``. - Fixed building with newer Haskell compiler/libraries. - Fixed PID file writing in Haskell daemons (confd); this prevents restart issues if confd was launched manually (outside of ``daemon-util``) while another copy of it was running - Fixed a type error when doing live migrations with KVM (issue 297) and the error messages for failing migrations have been improved. - Fixed opcode validation for the out-of-band commands (``gnt-node power``). - Fixed a type error when unsetting OS hypervisor parameters (issue 311); now it's possible to unset all OS-specific hypervisor parameters. - Fixed the ``dry-run`` mode for many operations: verification of results was over-zealous but didn't take into account the ``dry-run`` operation, resulting in "wrong" failures. - Fixed bash completion in ``gnt-job list`` when the job queue has hundreds of entries; especially with older ``bash`` versions, this results in significant CPU usage. And lastly, a few other improvements have been made: - Added option to force master-failover without voting (issue 282). - Clarified error message on lock conflict (issue 287). - Logging of newly submitted jobs has been improved (issue 290). - Hostname checks have been made uniform between instance rename and create (issue 291). - The ``--submit`` option is now supported by ``gnt-debug delay``. - Shutting down the master daemon by sending SIGTERM now stops it from processing jobs waiting for locks; instead, those jobs will be started once again after the master daemon is started the next time (issue 296). - Support for Xen's ``xl`` program has been improved (besides the fixes above). - Reduced logging noise in the Haskell confd daemon (only show one log entry for each config reload, instead of two). - Several man page updates and typo fixes. Version 2.6.1 ------------- *(Released Fri, 12 Oct 2012)* A small bugfix release. Among the bugs fixed: - Fixed double use of ``PRIORITY_OPT`` in ``gnt-node migrate``, that made the command unusable. - Commands that issue many jobs don't fail anymore just because some jobs take so long that other jobs are archived. - Failures during ``gnt-instance reinstall`` are reflected by the exit status. - Issue 190 fixed. Check for DRBD in cluster verify is enabled only when DRBD is enabled. - When ``always_failover`` is set, ``--allow-failover`` is not required in migrate commands anymore. - ``bash_completion`` works even if extglob is disabled. - Fixed bug with locks that made failover for RDB-based instances fail. - Fixed bug in non-mirrored instance allocation that made Ganeti choose a random node instead of one based on the allocator metric. - Support for newer versions of pylint and pep8. - Hail doesn't fail anymore when trying to add an instance of type ``file``, ``sharedfile`` or ``rbd``. - Added new Makefile target to rebuild the whole distribution, so that all files are included. Version 2.6.0 ------------- *(Released Fri, 27 Jul 2012)* .. attention:: The ``LUXI`` protocol has been made more consistent regarding its handling of command arguments. This, however, leads to incompatibility issues with previous versions. Please ensure that you restart Ganeti daemons soon after the upgrade, otherwise most ``LUXI`` calls (job submission, setting/resetting the drain flag, pausing/resuming the watcher, cancelling and archiving jobs, querying the cluster configuration) will fail. New features ~~~~~~~~~~~~ Instance run status +++++++++++++++++++ The current ``admin_up`` field, which used to denote whether an instance should be running or not, has been removed. Instead, ``admin_state`` is introduced, with 3 possible values -- ``up``, ``down`` and ``offline``. The rational behind this is that an instance being “down†can have different meanings: - it could be down during a reboot - it could be temporarily be down for a reinstall - or it could be down because it is deprecated and kept just for its disk The previous Boolean state was making it difficult to do capacity calculations: should Ganeti reserve memory for a down instance? Now, the tri-state field makes it clear: - in ``up`` and ``down`` state, all resources are reserved for the instance, and it can be at any time brought up if it is down - in ``offline`` state, only disk space is reserved for it, but not memory or CPUs The field can have an extra use: since the transition between ``up`` and ``down`` and vice-versus is done via ``gnt-instance start/stop``, but transition between ``offline`` and ``down`` is done via ``gnt-instance modify``, it is possible to given different rights to users. For example, owners of an instance could be allowed to start/stop it, but not transition it out of the offline state. Instance policies and specs +++++++++++++++++++++++++++ In previous Ganeti versions, an instance creation request was not limited on the minimum size and on the maximum size just by the cluster resources. As such, any policy could be implemented only in third-party clients (RAPI clients, or shell wrappers over ``gnt-*`` tools). Furthermore, calculating cluster capacity via ``hspace`` again required external input with regards to instance sizes. In order to improve these workflows and to allow for example better per-node group differentiation, we introduced instance specs, which allow declaring: - minimum instance disk size, disk count, memory size, cpu count - maximum values for the above metrics - and “standard†values (used in ``hspace`` to calculate the standard sized instances) The minimum/maximum values can be also customised at node-group level, for example allowing more powerful hardware to support bigger instance memory sizes. Beside the instance specs, there are a few other settings belonging to the instance policy framework. It is possible now to customise, per cluster and node-group: - the list of allowed disk templates - the maximum ratio of VCPUs per PCPUs (to control CPU oversubscription) - the maximum ratio of instance to spindles (see below for more information) for local storage All these together should allow all tools that talk to Ganeti to know what are the ranges of allowed values for instances and the over-subscription that is allowed. For the VCPU/PCPU ratio, we already have the VCPU configuration from the instance configuration, and the physical CPU configuration from the node. For the spindle ratios however, we didn't track before these values, so new parameters have been added: - a new node parameter ``spindle_count``, defaults to 1, customisable at node group or node level - at new backend parameter (for instances), ``spindle_use`` defaults to 1 Note that spindles in this context doesn't need to mean actual mechanical hard-drives; it's just a relative number for both the node I/O capacity and instance I/O consumption. Instance migration behaviour ++++++++++++++++++++++++++++ While live-migration is in general desirable over failover, it is possible that for some workloads it is actually worse, due to the variable time of the “suspend†phase during live migration. To allow the tools to work consistently over such instances (without having to hard-code instance names), a new backend parameter ``always_failover`` has been added to control the migration/failover behaviour. When set to True, all migration requests for an instance will instead fall-back to failover. Instance memory ballooning ++++++++++++++++++++++++++ Initial support for memory ballooning has been added. The memory for an instance is no longer fixed (backend parameter ``memory``), but instead can vary between minimum and maximum values (backend parameters ``minmem`` and ``maxmem``). Currently we only change an instance's memory when: - live migrating or failing over and instance and the target node doesn't have enough memory - user requests changing the memory via ``gnt-instance modify --runtime-memory`` Instance CPU pinning ++++++++++++++++++++ In order to control the use of specific CPUs by instance, support for controlling CPU pinning has been added for the Xen, HVM and LXC hypervisors. This is controlled by a new hypervisor parameter ``cpu_mask``; details about possible values for this are in the :manpage:`gnt-instance(8)`. Note that use of the most specific (precise VCPU-to-CPU mapping) form will work well only when all nodes in your cluster have the same amount of CPUs. Disk parameters +++++++++++++++ Another area in which Ganeti was not customisable were the parameters used for storage configuration, e.g. how many stripes to use for LVM, DRBD resync configuration, etc. To improve this area, we've added disks parameters, which are customisable at cluster and node group level, and which allow to specify various parameters for disks (DRBD has the most parameters currently), for example: - DRBD resync algorithm and parameters (e.g. speed) - the default VG for meta-data volumes for DRBD - number of stripes for LVM (plain disk template) - the RBD pool These parameters can be modified via ``gnt-cluster modify -D …`` and ``gnt-group modify -D …``, and are used at either instance creation (in case of LVM stripes, for example) or at disk “activation†time (e.g. resync speed). Rados block device support ++++++++++++++++++++++++++ A Rados (http://ceph.com/wiki/Rbd) storage backend has been added, denoted by the ``rbd`` disk template type. This is considered experimental, feedback is welcome. For details on configuring it, see the :doc:`install` document and the :manpage:`gnt-cluster(8)` man page. Master IP setup +++++++++++++++ The existing master IP functionality works well only in simple setups (a single network shared by all nodes); however, if nodes belong to different networks, then the ``/32`` setup and lack of routing information is not enough. To allow the master IP to function well in more complex cases, the system was reworked as follows: - a master IP netmask setting has been added - the master IP activation/turn-down code was moved from the node daemon to a separate script - whether to run the Ganeti-supplied master IP script or a user-supplied on is a ``gnt-cluster init`` setting Details about the location of the standard and custom setup scripts are in the man page :manpage:`gnt-cluster(8)`; for information about the setup script protocol, look at the Ganeti-supplied script. SPICE support +++++++++++++ The `SPICE `_ support has been improved. It is now possible to use TLS-protected connections, and when renewing or changing the cluster certificates (via ``gnt-cluster renew-crypto``, it is now possible to specify spice or spice CA certificates. Also, it is possible to configure a password for SPICE sessions via the hypervisor parameter ``spice_password_file``. There are also new parameters to control the compression and streaming options (e.g. ``spice_image_compression``, ``spice_streaming_video``, etc.). For details, see the man page :manpage:`gnt-instance(8)` and look for the spice parameters. Lastly, it is now possible to see the SPICE connection information via ``gnt-instance console``. OVF converter +++++++++++++ A new tool (``tools/ovfconverter``) has been added that supports conversion between Ganeti and the `Open Virtualization Format `_ (both to and from). This relies on the ``qemu-img`` tool to convert the disk formats, so the actual compatibility with other virtualization solutions depends on it. Confd daemon changes ++++++++++++++++++++ The configuration query daemon (``ganeti-confd``) is now optional, and has been rewritten in Haskell; whether to use the daemon at all, use the Python (default) or the Haskell version is selectable at configure time via the ``--enable-confd`` parameter, which can take one of the ``haskell``, ``python`` or ``no`` values. If not used, disabling the daemon will result in a smaller footprint; for larger systems, we welcome feedback on the Haskell version which might become the default in future versions. If you want to use ``gnt-node list-drbd`` you need to have the Haskell daemon running. The Python version doesn't implement the new call. User interface changes ~~~~~~~~~~~~~~~~~~~~~~ We have replaced the ``--disks`` option of ``gnt-instance replace-disks`` with a more flexible ``--disk`` option, which allows adding and removing disks at arbitrary indices (Issue 188). Furthermore, disk size and mode can be changed upon recreation (via ``gnt-instance recreate-disks``, which accepts the same ``--disk`` option). As many people are used to a ``show`` command, we have added that as an alias to ``info`` on all ``gnt-*`` commands. The ``gnt-instance grow-disk`` command has a new mode in which it can accept the target size of the disk, instead of the delta; this can be more safe since two runs in absolute mode will be idempotent, and sometimes it's also easier to specify the desired size directly. Also the handling of instances with regard to offline secondaries has been improved. Instance operations should not fail because one of it's secondary nodes is offline, even though it's safe to proceed. A new command ``list-drbd`` has been added to the ``gnt-node`` script to support debugging of DRBD issues on nodes. It provides a mapping of DRBD minors to instance name. API changes ~~~~~~~~~~~ RAPI coverage has improved, with (for example) new resources for recreate-disks, node power-cycle, etc. Compatibility ~~~~~~~~~~~~~ There is partial support for ``xl`` in the Xen hypervisor; feedback is welcome. Python 2.7 is better supported, and after Ganeti 2.6 we will investigate whether to still support Python 2.4 or move to Python 2.6 as minimum required version. Support for Fedora has been slightly improved; the provided example init.d script should work better on it and the INSTALL file should document the needed dependencies. Internal changes ~~~~~~~~~~~~~~~~ The deprecated ``QueryLocks`` LUXI request has been removed. Use ``Query(what=QR_LOCK, ...)`` instead. The LUXI requests :pyeval:`luxi.REQ_QUERY_JOBS`, :pyeval:`luxi.REQ_QUERY_INSTANCES`, :pyeval:`luxi.REQ_QUERY_NODES`, :pyeval:`luxi.REQ_QUERY_GROUPS`, :pyeval:`luxi.REQ_QUERY_EXPORTS` and :pyeval:`luxi.REQ_QUERY_TAGS` are deprecated and will be removed in a future version. :pyeval:`luxi.REQ_QUERY` should be used instead. RAPI client: ``CertificateError`` now derives from ``GanetiApiError``. This should make it more easy to handle Ganeti errors. Deprecation warnings due to PyCrypto/paramiko import in ``tools/setup-ssh`` have been silenced, as usually they are safe; please make sure to run an up-to-date paramiko version, if you use this tool. The QA scripts now depend on Python 2.5 or above (the main code base still works with Python 2.4). The configuration file (``config.data``) is now written without indentation for performance reasons; if you want to edit it, it can be re-formatted via ``tools/fmtjson``. A number of bugs has been fixed in the cluster merge tool. ``x509`` certification verification (used in import-export) has been changed to allow the same clock skew as permitted by the cluster verification. This will remove some rare but hard to diagnose errors in import-export. Version 2.6.0 rc4 ----------------- *(Released Thu, 19 Jul 2012)* Very few changes from rc4 to the final release, only bugfixes: - integrated fixes from release 2.5.2 (fix general boot flag for KVM instance, fix CDROM booting for KVM instances) - fixed node group modification of node parameters - fixed issue in LUClusterVerifyGroup with multi-group clusters - fixed generation of bash completion to ensure a stable ordering - fixed a few typos Version 2.6.0 rc3 ----------------- *(Released Fri, 13 Jul 2012)* Third release candidate for 2.6. The following changes were done from rc3 to rc4: - Fixed ``UpgradeConfig`` w.r.t. to disk parameters on disk objects. - Fixed an inconsistency in the LUXI protocol with the provided arguments (NOT backwards compatible) - Fixed a bug with node groups ipolicy where ``min`` was greater than the cluster ``std`` value - Implemented a new ``gnt-node list-drbd`` call to list DRBD minors for easier instance debugging on nodes (requires ``hconfd`` to work) Version 2.6.0 rc2 ----------------- *(Released Tue, 03 Jul 2012)* Second release candidate for 2.6. The following changes were done from rc2 to rc3: - Fixed ``gnt-cluster verify`` regarding ``master-ip-script`` on non master candidates - Fixed a RAPI regression on missing beparams/memory - Fixed redistribution of files on offline nodes - Added possibility to run activate-disks even though secondaries are offline. With this change it relaxes also the strictness on some other commands which use activate disks internally: * ``gnt-instance start|reboot|rename|backup|export`` - Made it possible to remove safely an instance if its secondaries are offline - Made it possible to reinstall even though secondaries are offline Version 2.6.0 rc1 ----------------- *(Released Mon, 25 Jun 2012)* First release candidate for 2.6. The following changes were done from rc1 to rc2: - Fixed bugs with disk parameters and ``rbd`` templates as well as ``instance_os_add`` - Made ``gnt-instance modify`` more consistent regarding new NIC/Disk behaviour. It supports now the modify operation - ``hcheck`` implemented to analyze cluster health and possibility of improving health by rebalance - ``hbal`` has been improved in dealing with split instances Version 2.6.0 beta2 ------------------- *(Released Mon, 11 Jun 2012)* Second beta release of 2.6. The following changes were done from beta2 to rc1: - Fixed ``daemon-util`` with non-root user models - Fixed creation of plain instances with ``--no-wait-for-sync`` - Fix wrong iv_names when running ``cfgupgrade`` - Export more information in RAPI group queries - Fixed bug when changing instance network interfaces - Extended burnin to do NIC changes - query: Added ``<``, ``>``, ``<=``, ``>=`` comparison operators - Changed default for DRBD barriers - Fixed DRBD error reporting for syncer rate - Verify the options on disk parameters And of course various fixes to documentation and improved unittests and QA. Version 2.6.0 beta1 ------------------- *(Released Wed, 23 May 2012)* First beta release of 2.6. The following changes were done from beta1 to beta2: - integrated patch for distributions without ``start-stop-daemon`` - adapted example init.d script to work on Fedora - fixed log handling in Haskell daemons - adapted checks in the watcher for pycurl linked against libnss - add partial support for ``xl`` instead of ``xm`` for Xen - fixed a type issue in cluster verification - fixed ssconf handling in the Haskell code (was breaking confd in IPv6 clusters) Plus integrated fixes from the 2.5 branch: - fixed ``kvm-ifup`` to use ``/bin/bash`` - fixed parallel build failures - KVM live migration when using a custom keymap Version 2.5.2 ------------- *(Released Tue, 24 Jul 2012)* A small bugfix release, with no new features: - fixed bash-isms in kvm-ifup, for compatibility with systems which use a different default shell (e.g. Debian, Ubuntu) - fixed KVM startup and live migration with a custom keymap (fixes Issue 243 and Debian bug #650664) - fixed compatibility with KVM versions that don't support multiple boot devices (fixes Issue 230 and Debian bug #624256) Additionally, a few fixes were done to the build system (fixed parallel build failures) and to the unittests (fixed race condition in test for FileID functions, and the default enable/disable mode for QA test is now customisable). Version 2.5.1 ------------- *(Released Fri, 11 May 2012)* A small bugfix release. The main issues solved are on the topic of compatibility with newer LVM releases: - fixed parsing of ``lv_attr`` field - adapted to new ``vgreduce --removemissing`` behaviour where sometimes the ``--force`` flag is needed Also on the topic of compatibility, ``tools/lvmstrap`` has been changed to accept kernel 3.x too (was hardcoded to 2.6.*). A regression present in 2.5.0 that broke handling (in the gnt-* scripts) of hook results and that also made display of other errors suboptimal was fixed; the code behaves now like 2.4 and earlier. Another change in 2.5, the cleanup of the OS scripts environment, is too aggressive: it removed even the ``PATH`` variable, which requires the OS scripts to *always* need to export it. Since this is a bit too strict, we now export a minimal PATH, the same that we export for hooks. The fix for issue 201 (Preserve bridge MTU in KVM ifup script) was integrated into this release. Finally, a few other miscellaneous changes were done (no new features, just small improvements): - Fix ``gnt-group --help`` display - Fix hardcoded Xen kernel path - Fix grow-disk handling of invalid units - Update synopsis for ``gnt-cluster repair-disk-sizes`` - Accept both PUT and POST in noded (makes future upgrade to 2.6 easier) Version 2.5.0 ------------- *(Released Thu, 12 Apr 2012)* Incompatible/important changes and bugfixes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The default of the ``/2/instances/[instance_name]/rename`` RAPI resource's ``ip_check`` parameter changed from ``True`` to ``False`` to match the underlying LUXI interface. - The ``/2/nodes/[node_name]/evacuate`` RAPI resource was changed to use body parameters, see :doc:`RAPI documentation `. The server does not maintain backwards-compatibility as the underlying operation changed in an incompatible way. The RAPI client can talk to old servers, but it needs to be told so as the return value changed. - When creating file-based instances via RAPI, the ``file_driver`` parameter no longer defaults to ``loop`` and must be specified. - The deprecated ``bridge`` NIC parameter is no longer supported. Use ``link`` instead. - Support for the undocumented and deprecated RAPI instance creation request format version 0 has been dropped. Use version 1, supported since Ganeti 2.1.3 and :doc:`documented `, instead. - Pyparsing 1.4.6 or above is required, see :doc:`installation documentation `. - The "cluster-verify" hooks are now executed per group by the ``OP_CLUSTER_VERIFY_GROUP`` opcode. This maintains the same behavior if you just run ``gnt-cluster verify``, which generates one opcode per group. - The environment as passed to the OS scripts is cleared, and thus no environment variables defined in the node daemon's environment will be inherited by the scripts. - The :doc:`iallocator ` mode ``multi-evacuate`` has been deprecated. - :doc:`New iallocator modes ` have been added to support operations involving multiple node groups. - Offline nodes are ignored when failing over an instance. - Support for KVM version 1.0, which changed the version reporting format from 3 to 2 digits. - TCP/IP ports used by DRBD disks are returned to a pool upon instance removal. - ``Makefile`` is now compatible with Automake 1.11.2 - Includes all bugfixes made in the 2.4 series New features ~~~~~~~~~~~~ - The ganeti-htools project has been merged into the ganeti-core source tree and will be built as part of Ganeti (see :doc:`install-quick`). - Implemented support for :doc:`shared storage `. - Add support for disks larger than 2 TB in ``lvmstrap`` by supporting GPT-style partition tables (requires `parted `_). - Added support for floppy drive and 2nd CD-ROM drive in KVM hypervisor. - Allowed adding tags on instance creation. - Export instance tags to hooks (``INSTANCE_TAGS``, see :doc:`hooks`) - Allow instances to be started in a paused state, enabling the user to see the complete console output on boot using the console. - Added new hypervisor flag to control default reboot behaviour (``reboot_behavior``). - Added support for KVM keymaps (hypervisor parameter ``keymap``). - Improved out-of-band management support: - Added ``gnt-node health`` command reporting the health status of nodes. - Added ``gnt-node power`` command to manage power status of nodes. - Added command for emergency power-off (EPO), ``gnt-cluster epo``. - Instance migration can fall back to failover if instance is not running. - Filters can be used when listing nodes, instances, groups and locks; see :manpage:`ganeti(7)` manpage. - Added post-execution status as variables to :doc:`hooks ` environment. - Instance tags are exported/imported together with the instance. - When given an explicit job ID, ``gnt-job info`` will work for archived jobs. - Jobs can define dependencies on other jobs (not yet supported via RAPI or command line, but used by internal commands and usable via LUXI). - Lock monitor (``gnt-debug locks``) shows jobs waiting for dependencies. - Instance failover is now available as a RAPI resource (``/2/instances/[instance_name]/failover``). - ``gnt-instance info`` defaults to static information if primary node is offline. - Opcodes have a new ``comment`` attribute. - Added basic SPICE support to KVM hypervisor. - ``tools/ganeti-listrunner`` allows passing of arguments to executable. Node group improvements ~~~~~~~~~~~~~~~~~~~~~~~ - ``gnt-cluster verify`` has been modified to check groups separately, thereby improving performance. - Node group support has been added to ``gnt-cluster verify-disks``, which now operates per node group. - Watcher has been changed to work better with node groups. - One process and state file per node group. - Slow watcher in one group doesn't block other group's watcher. - Added new command, ``gnt-group evacuate``, to move all instances in a node group to other groups. - Added ``gnt-instance change-group`` to move an instance to another node group. - ``gnt-cluster command`` and ``gnt-cluster copyfile`` now support per-group operations. - Node groups can be tagged. - Some operations switch from an exclusive to a shared lock as soon as possible. - Instance's primary and secondary nodes' groups are now available as query fields (``pnode.group``, ``pnode.group.uuid``, ``snodes.group`` and ``snodes.group.uuid``). Misc ~~~~ - Numerous updates to documentation and manpages. - :doc:`RAPI ` documentation now has detailed parameter descriptions. - Some opcode/job results are now also documented, see :doc:`RAPI `. - A lockset's internal lock is now also visible in lock monitor. - Log messages from job queue workers now contain information about the opcode they're processing. - ``gnt-instance console`` no longer requires the instance lock. - A short delay when waiting for job changes reduces the number of LUXI requests significantly. - DRBD metadata volumes are overwritten with zeros during disk creation. - Out-of-band commands no longer acquire the cluster lock in exclusive mode. - ``devel/upload`` now uses correct permissions for directories. Version 2.5.0 rc6 ----------------- *(Released Fri, 23 Mar 2012)* This was the sixth release candidate of the 2.5 series. Version 2.5.0 rc5 ----------------- *(Released Mon, 9 Jan 2012)* This was the fifth release candidate of the 2.5 series. Version 2.5.0 rc4 ----------------- *(Released Thu, 27 Oct 2011)* This was the fourth release candidate of the 2.5 series. Version 2.5.0 rc3 ----------------- *(Released Wed, 26 Oct 2011)* This was the third release candidate of the 2.5 series. Version 2.5.0 rc2 ----------------- *(Released Tue, 18 Oct 2011)* This was the second release candidate of the 2.5 series. Version 2.5.0 rc1 ----------------- *(Released Tue, 4 Oct 2011)* This was the first release candidate of the 2.5 series. Version 2.5.0 beta3 ------------------- *(Released Wed, 31 Aug 2011)* This was the third beta release of the 2.5 series. Version 2.5.0 beta2 ------------------- *(Released Mon, 22 Aug 2011)* This was the second beta release of the 2.5 series. Version 2.5.0 beta1 ------------------- *(Released Fri, 12 Aug 2011)* This was the first beta release of the 2.5 series. Version 2.4.5 ------------- *(Released Thu, 27 Oct 2011)* - Fixed bug when parsing command line parameter values ending in backslash - Fixed assertion error after unclean master shutdown - Disable HTTP client pool for RPC, significantly reducing memory usage of master daemon - Fixed queue archive creation with wrong permissions Version 2.4.4 ------------- *(Released Tue, 23 Aug 2011)* Small bug-fixes: - Fixed documentation for importing with ``--src-dir`` option - Fixed a bug in ``ensure-dirs`` with queue/archive permissions - Fixed a parsing issue with DRBD 8.3.11 in the Linux kernel Version 2.4.3 ------------- *(Released Fri, 5 Aug 2011)* Many bug-fixes and a few small features: - Fixed argument order in ``ReserveLV`` and ``ReserveMAC`` which caused issues when you tried to add an instance with two MAC addresses in one request - KVM: fixed per-instance stored UID value - KVM: configure bridged NICs at migration start - KVM: Fix a bug where instance will not start with never KVM versions (>= 0.14) - Added OS search path to ``gnt-cluster info`` - Fixed an issue with ``file_storage_dir`` where you were forced to provide an absolute path, but the documentation states it is a relative path, the documentation was right - Added a new parameter to instance stop/start called ``--no-remember`` that will make the state change to not be remembered - Implemented ``no_remember`` at RAPI level - Improved the documentation - Node evacuation: don't call IAllocator if node is already empty - Fixed bug in DRBD8 replace disks on current nodes - Fixed bug in recreate-disks for DRBD instances - Moved assertion checking locks in ``gnt-instance replace-disks`` causing it to abort with not owning the right locks for some situation - Job queue: Fixed potential race condition when cancelling queued jobs - Fixed off-by-one bug in job serial generation - ``gnt-node volumes``: Fix instance names - Fixed aliases in bash completion - Fixed a bug in reopening log files after being sent a SIGHUP - Added a flag to burnin to allow specifying VCPU count - Bugfixes to non-root Ganeti configuration Version 2.4.2 ------------- *(Released Thu, 12 May 2011)* Many bug-fixes and a few new small features: - Fixed a bug related to log opening failures - Fixed a bug in instance listing with orphan instances - Fixed a bug which prevented resetting the cluster-level node parameter ``oob_program`` to the default - Many fixes related to the ``cluster-merge`` tool - Fixed a race condition in the lock monitor, which caused failures during (at least) creation of many instances in parallel - Improved output for gnt-job info - Removed the quiet flag on some ssh calls which prevented debugging failures - Improved the N+1 failure messages in cluster verify by actually showing the memory values (needed and available) - Increased lock attempt timeouts so that when executing long operations (e.g. DRBD replace-disks) other jobs do not enter 'blocking acquire' too early and thus prevent the use of the 'fair' mechanism - Changed instance query data (``gnt-instance info``) to not acquire locks unless needed, thus allowing its use on locked instance if only static information is asked for - Improved behaviour with filesystems that do not support rename on an opened file - Fixed the behaviour of ``prealloc_wipe_disks`` cluster parameter which kept locks on all nodes during the wipe, which is unneeded - Fixed ``gnt-watcher`` handling of errors during hooks execution - Fixed bug in ``prealloc_wipe_disks`` with small disk sizes (less than 10GiB) which caused the wipe to fail right at the end in some cases - Fixed master IP activation when doing master failover with no-voting - Fixed bug in ``gnt-node add --readd`` which allowed the re-adding of the master node itself - Fixed potential data-loss in under disk full conditions, where Ganeti wouldn't check correctly the return code and would consider partially-written files 'correct' - Fixed bug related to multiple VGs and DRBD disk replacing - Added new disk parameter ``metavg`` that allows placement of the meta device for DRBD in a different volume group - Fixed error handling in the node daemon when the system libc doesn't have major number 6 (i.e. if ``libc.so.6`` is not the actual libc) - Fixed lock release during replace-disks, which kept cluster-wide locks when doing disk replaces with an iallocator script - Added check for missing bridges in cluster verify - Handle EPIPE errors while writing to the terminal better, so that piping the output to e.g. ``less`` doesn't cause a backtrace - Fixed rare case where a ^C during Luxi calls could have been interpreted as server errors, instead of simply terminating - Fixed a race condition in LUGroupAssignNodes (``gnt-group assign-nodes``) - Added a few more parameters to the KVM hypervisor, allowing a second CDROM, custom disk type for CDROMs and a floppy image - Removed redundant message in instance rename when the name is given already as a FQDN - Added option to ``gnt-instance recreate-disks`` to allow creating the disks on new nodes, allowing recreation when the original instance nodes are completely gone - Added option when converting disk templates to DRBD to skip waiting for the resync, in order to make the instance available sooner - Added two new variables to the OS scripts environment (containing the instance's nodes) - Made the root_path and optional parameter for the xen-pvm hypervisor, to allow use of ``pvgrub`` as bootloader - Changed the instance memory modifications to only check out-of-memory conditions on memory increases, and turned the secondary node warnings into errors (they can still be overridden via ``--force``) - Fixed the handling of a corner case when the Python installation gets corrupted (e.g. a bad disk) while ganeti-noded is running and we try to execute a command that doesn't exist - Fixed a bug in ``gnt-instance move`` (LUInstanceMove) when the primary node of the instance returned failures during instance shutdown; this adds the option ``--ignore-consistency`` to gnt-instance move And as usual, various improvements to the error messages, documentation and man pages. Version 2.4.1 ------------- *(Released Wed, 09 Mar 2011)* Emergency bug-fix release. ``tools/cfgupgrade`` was broken and overwrote the RAPI users file if run twice (even with ``--dry-run``). The release fixes that bug (nothing else changed). Version 2.4.0 ------------- *(Released Mon, 07 Mar 2011)* Final 2.4.0 release. Just a few small fixes: - Fixed RAPI node evacuate - Fixed the kvm-ifup script - Fixed internal error handling for special job cases - Updated man page to specify the escaping feature for options Version 2.4.0 rc3 ----------------- *(Released Mon, 28 Feb 2011)* A critical fix for the ``prealloc_wipe_disks`` feature: it is possible that this feature wiped the disks of the wrong instance, leading to loss of data. Other changes: - Fixed title of query field containing instance name - Expanded the glossary in the documentation - Fixed one unittest (internal issue) Version 2.4.0 rc2 ----------------- *(Released Mon, 21 Feb 2011)* A number of bug fixes plus just a couple functionality changes. On the user-visible side, the ``gnt-* list`` command output has changed with respect to "special" field states. The current rc1 style of display can be re-enabled by passing a new ``--verbose`` (``-v``) flag, but in the default output mode special fields states are displayed as follows: - Offline resource: ``*`` - Unavailable/not applicable: ``-`` - Data missing (RPC failure): ``?`` - Unknown field: ``??`` Another user-visible change is the addition of ``--force-join`` to ``gnt-node add``. As for bug fixes: - ``tools/cluster-merge`` has seen many fixes and is now enabled again - Fixed regression in RAPI/instance reinstall where all parameters were required (instead of optional) - Fixed ``gnt-cluster repair-disk-sizes``, was broken since Ganeti 2.2 - Fixed iallocator usage (offline nodes were not considered offline) - Fixed ``gnt-node list`` with respect to non-vm_capable nodes - Fixed hypervisor and OS parameter validation with respect to non-vm_capable nodes - Fixed ``gnt-cluster verify`` with respect to offline nodes (mostly cosmetic) - Fixed ``tools/listrunner`` with respect to agent-based usage Version 2.4.0 rc1 ----------------- *(Released Fri, 4 Feb 2011)* Many changes and fixes since the beta1 release. While there were some internal changes, the code has been mostly stabilised for the RC release. Note: the dumb allocator was removed in this release, as it was not kept up-to-date with the IAllocator protocol changes. It is recommended to use the ``hail`` command from the ganeti-htools package. Note: the 2.4 and up versions of Ganeti are not compatible with the 0.2.x branch of ganeti-htools. You need to upgrade to ganeti-htools-0.3.0 (or later). Regressions fixed from 2.3 ~~~~~~~~~~~~~~~~~~~~~~~~~~ - Fixed the ``gnt-cluster verify-disks`` command - Made ``gnt-cluster verify-disks`` work in parallel (as opposed to serially on nodes) - Fixed disk adoption breakage - Fixed wrong headers in instance listing for field aliases Other bugs fixed ~~~~~~~~~~~~~~~~ - Fixed corner case in KVM handling of NICs - Fixed many cases of wrong handling of non-vm_capable nodes - Fixed a bug where a missing instance symlink was not possible to recreate with any ``gnt-*`` command (now ``gnt-instance activate-disks`` does it) - Fixed the volume group name as reported by ``gnt-cluster verify-disks`` - Increased timeouts for the import-export code, hopefully leading to fewer aborts due network or instance timeouts - Fixed bug in ``gnt-node list-storage`` - Fixed bug where not all daemons were started on cluster initialisation, but only at the first watcher run - Fixed many bugs in the OOB implementation - Fixed watcher behaviour in presence of instances with offline secondaries - Fixed instance list output for instances running on the wrong node - a few fixes to the cluster-merge tool, but it still cannot merge multi-node groups (currently it is not recommended to use this tool) Improvements ~~~~~~~~~~~~ - Improved network configuration for the KVM hypervisor - Added e1000 as a supported NIC for Xen-HVM - Improved the lvmstrap tool to also be able to use partitions, as opposed to full disks - Improved speed of disk wiping (the cluster parameter ``prealloc_wipe_disks``, so that it has a low impact on the total time of instance creations - Added documentation for the OS parameters - Changed ``gnt-instance deactivate-disks`` so that it can work if the hypervisor is not responding - Added display of blacklisted and hidden OS information in ``gnt-cluster info`` - Extended ``gnt-cluster verify`` to also validate hypervisor, backend, NIC and node parameters, which might create problems with currently invalid (but undetected) configuration files, but prevents validation failures when unrelated parameters are modified - Changed cluster initialisation to wait for the master daemon to become available - Expanded the RAPI interface: - Added config redistribution resource - Added activation/deactivation of instance disks - Added export of console information - Implemented log file reopening on SIGHUP, which allows using logrotate(8) for the Ganeti log files - Added a basic OOB helper script as an example Version 2.4.0 beta1 ------------------- *(Released Fri, 14 Jan 2011)* User-visible ~~~~~~~~~~~~ - Fixed timezone issues when formatting timestamps - Added support for node groups, available via ``gnt-group`` and other commands - Added out-of-band framework and management, see :doc:`design document ` - Removed support for roman numbers from ``gnt-node list`` and ``gnt-instance list``. - Allowed modification of master network interface via ``gnt-cluster modify --master-netdev`` - Accept offline secondaries while shutting down instance disks - Added ``blockdev_prefix`` parameter to Xen PVM and HVM hypervisors - Added support for multiple LVM volume groups - Avoid sorting nodes for ``gnt-node list`` if specific nodes are requested - Added commands to list available fields: - ``gnt-node list-fields`` - ``gnt-group list-fields`` - ``gnt-instance list-fields`` - Updated documentation and man pages Integration ~~~~~~~~~~~ - Moved ``rapi_users`` file into separate directory, now named ``.../ganeti/rapi/users``, ``cfgupgrade`` moves the file and creates a symlink - Added new tool for running commands on many machines, ``tools/ganeti-listrunner`` - Implemented more verbose result in ``OpInstanceConsole`` opcode, also improving the ``gnt-instance console`` output - Allowed customisation of disk index separator at ``configure`` time - Export node group allocation policy to :doc:`iallocator ` - Added support for non-partitioned md disks in ``lvmstrap`` - Added script to gracefully power off KVM instances - Split ``utils`` module into smaller parts - Changed query operations to return more detailed information, e.g. whether an information is unavailable due to an offline node. To use this new functionality, the LUXI call ``Query`` must be used. Field information is now stored by the master daemon and can be retrieved using ``QueryFields``. Instances, nodes and groups can also be queried using the new opcodes ``OpQuery`` and ``OpQueryFields`` (not yet exposed via RAPI). The following commands make use of this infrastructure change: - ``gnt-group list`` - ``gnt-group list-fields`` - ``gnt-node list`` - ``gnt-node list-fields`` - ``gnt-instance list`` - ``gnt-instance list-fields`` - ``gnt-debug locks`` Remote API ~~~~~~~~~~ - New RAPI resources (see :doc:`rapi`): - ``/2/modify`` - ``/2/groups`` - ``/2/groups/[group_name]`` - ``/2/groups/[group_name]/assign-nodes`` - ``/2/groups/[group_name]/modify`` - ``/2/groups/[group_name]/rename`` - ``/2/instances/[instance_name]/disk/[disk_index]/grow`` - RAPI changes: - Implemented ``no_install`` for instance creation - Implemented OS parameters for instance reinstallation, allowing use of special settings on reinstallation (e.g. for preserving data) Misc ~~~~ - Added IPv6 support in import/export - Pause DRBD synchronization while wiping disks on instance creation - Updated unittests and QA scripts - Improved network parameters passed to KVM - Converted man pages from docbook to reStructuredText Version 2.3.1 ------------- *(Released Mon, 20 Dec 2010)* Released version 2.3.1~rc1 without any changes. Version 2.3.1 rc1 ----------------- *(Released Wed, 1 Dec 2010)* - impexpd: Disable OpenSSL compression in socat if possible (backport from master, commit e90739d625b, see :doc:`installation guide ` for details) - Changed unittest coverage report to exclude test scripts - Added script to check version format Version 2.3.0 ------------- *(Released Wed, 1 Dec 2010)* Released version 2.3.0~rc1 without any changes. Version 2.3.0 rc1 ----------------- *(Released Fri, 19 Nov 2010)* A number of bugfixes and documentation updates: - Update ganeti-os-interface documentation - Fixed a bug related to duplicate MACs or similar items which should be unique - Fix breakage in OS state modify - Reinstall instance: disallow offline secondaries (fixes bug related to OS changing but reinstall failing) - plus all the other fixes between 2.2.1 and 2.2.2 Version 2.3.0 rc0 ----------------- *(Released Tue, 2 Nov 2010)* - Fixed clearing of the default iallocator using ``gnt-cluster modify`` - Fixed master failover race with watcher - Fixed a bug in ``gnt-node modify`` which could lead to an inconsistent configuration - Accept previously stopped instance for export with instance removal - Simplify and extend the environment variables for instance OS scripts - Added new node flags, ``master_capable`` and ``vm_capable`` - Added optional instance disk wiping prior during allocation. This is a cluster-wide option and can be set/modified using ``gnt-cluster {init,modify} --prealloc-wipe-disks``. - Added IPv6 support, see :doc:`design document ` and :doc:`install-quick` - Added a new watcher option (``--ignore-pause``) - Added option to ignore offline node on instance start/stop (``--ignore-offline``) - Allow overriding OS parameters with ``gnt-instance reinstall`` - Added ability to change node's secondary IP address using ``gnt-node modify`` - Implemented privilege separation for all daemons except ``ganeti-noded``, see ``configure`` options - Complain if an instance's disk is marked faulty in ``gnt-cluster verify`` - Implemented job priorities (see ``ganeti(7)`` manpage) - Ignore failures while shutting down instances during failover from offline node - Exit daemon's bootstrap process only once daemon is ready - Export more information via ``LUInstanceQuery``/remote API - Improved documentation, QA and unittests - RAPI daemon now watches ``rapi_users`` all the time and doesn't need a restart if the file was created or changed - Added LUXI protocol version sent with each request and response, allowing detection of server/client mismatches - Moved the Python scripts among gnt-* and ganeti-* into modules - Moved all code related to setting up SSH to an external script, ``setup-ssh`` - Infrastructure changes for node group support in future versions Version 2.2.2 ------------- *(Released Fri, 19 Nov 2010)* A few small bugs fixed, and some improvements to the build system: - Fix documentation regarding conversion to drbd - Fix validation of parameters in cluster modify (``gnt-cluster modify -B``) - Fix error handling in node modify with multiple changes - Allow remote imports without checked names Version 2.2.1 ------------- *(Released Tue, 19 Oct 2010)* - Disable SSL session ID cache in RPC client Version 2.2.1 rc1 ----------------- *(Released Thu, 14 Oct 2010)* - Fix interaction between Curl/GnuTLS and the Python's HTTP server (thanks Apollon Oikonomopoulos!), finally allowing the use of Curl with GnuTLS - Fix problems with interaction between Curl and Python's HTTP server, resulting in increased speed in many RPC calls - Improve our release script to prevent breakage with older aclocal and Python 2.6 Version 2.2.1 rc0 ----------------- *(Released Thu, 7 Oct 2010)* - Fixed issue 125, replace hardcoded "xenvg" in ``gnt-cluster`` with value retrieved from master - Added support for blacklisted or hidden OS definitions - Added simple lock monitor (accessible via (``gnt-debug locks``) - Added support for -mem-path in KVM hypervisor abstraction layer - Allow overriding instance parameters in tool for inter-cluster instance moves (``tools/move-instance``) - Improved opcode summaries (e.g. in ``gnt-job list``) - Improve consistency of OS listing by sorting it - Documentation updates Version 2.2.0.1 --------------- *(Released Fri, 8 Oct 2010)* - Rebuild with a newer autotools version, to fix python 2.6 compatibility Version 2.2.0 ------------- *(Released Mon, 4 Oct 2010)* - Fixed regression in ``gnt-instance rename`` Version 2.2.0 rc2 ----------------- *(Released Wed, 22 Sep 2010)* - Fixed OS_VARIANT variable for OS scripts - Fixed cluster tag operations via RAPI - Made ``setup-ssh`` exit with non-zero code if an error occurred - Disabled RAPI CA checks in watcher Version 2.2.0 rc1 ----------------- *(Released Mon, 23 Aug 2010)* - Support DRBD versions of the format "a.b.c.d" - Updated manpages - Re-introduce support for usage from multiple threads in RAPI client - Instance renames and modify via RAPI - Work around race condition between processing and archival in job queue - Mark opcodes following failed one as failed, too - Job field ``lock_status`` was removed due to difficulties making it work with the changed job queue in Ganeti 2.2; a better way to monitor locks is expected for a later 2.2.x release - Fixed dry-run behaviour with many commands - Support ``ssh-agent`` again when adding nodes - Many additional bugfixes Version 2.2.0 rc0 ----------------- *(Released Fri, 30 Jul 2010)* Important change: the internal RPC mechanism between Ganeti nodes has changed from using a home-grown http library (based on the Python base libraries) to use the PycURL library. This requires that PycURL is installed on nodes. Please note that on Debian/Ubuntu, PycURL is linked against GnuTLS by default. cURL's support for GnuTLS had known issues before cURL 7.21.0 and we recommend using the latest cURL release or linking against OpenSSL. Most other distributions already link PycURL and cURL against OpenSSL. The command:: python -c 'import pycurl; print pycurl.version' can be used to determine the libraries PycURL and cURL are linked against. Other significant changes: - Rewrote much of the internals of the job queue, in order to achieve better parallelism; this decouples job query operations from the job processing, and it should allow much nicer behaviour of the master daemon under load, and it also has uncovered some long-standing bugs related to the job serialisation (now fixed) - Added a default iallocator setting to the cluster parameters, eliminating the need to always pass nodes or an iallocator for operations that require selection of new node(s) - Added experimental support for the LXC virtualization method - Added support for OS parameters, which allows the installation of instances to pass parameter to OS scripts in order to customise the instance - Added a hypervisor parameter controlling the migration type (live or non-live), since hypervisors have various levels of reliability; this has renamed the 'live' parameter to 'mode' - Added a cluster parameter ``reserved_lvs`` that denotes reserved logical volumes, meaning that cluster verify will ignore them and not flag their presence as errors - The watcher will now reset the error count for failed instances after 8 hours, thus allowing self-healing if the problem that caused the instances to be down/fail to start has cleared in the meantime - Added a cluster parameter ``drbd_usermode_helper`` that makes Ganeti check for, and warn, if the drbd module parameter ``usermode_helper`` is not consistent with the cluster-wide setting; this is needed to make diagnose easier of failed drbd creations - Started adding base IPv6 support, but this is not yet enabled/available for use - Rename operations (cluster, instance) will now return the new name, which is especially useful if a short name was passed in - Added support for instance migration in RAPI - Added a tool to pre-configure nodes for the SSH setup, before joining them to the cluster; this will allow in the future a simplified model for node joining (but not yet fully enabled in 2.2); this needs the paramiko python library - Fixed handling of name-resolving errors - Fixed consistency of job results on the error path - Fixed master-failover race condition when executed multiple times in sequence - Fixed many bugs related to the job queue (mostly introduced during the 2.2 development cycle, so not all are impacting 2.1) - Fixed instance migration with missing disk symlinks - Fixed handling of unknown jobs in ``gnt-job archive`` - And many other small fixes/improvements Internal changes: - Enhanced both the unittest and the QA coverage - Switched the opcode validation to a generic model, and extended the validation to all opcode parameters - Changed more parts of the code that write shell scripts to use the same class for this - Switched the master daemon to use the asyncore library for the Luxi server endpoint Version 2.2.0 beta0 ------------------- *(Released Thu, 17 Jun 2010)* - Added tool (``move-instance``) and infrastructure to move instances between separate clusters (see :doc:`separate documentation ` and :doc:`design document `) - Added per-request RPC timeout - RAPI now requires a Content-Type header for requests with a body (e.g. ``PUT`` or ``POST``) which must be set to ``application/json`` (see :rfc:`2616` (HTTP/1.1), section 7.2.1) - ``ganeti-watcher`` attempts to restart ``ganeti-rapi`` if RAPI is not reachable - Implemented initial support for running Ganeti daemons as separate users, see configure-time flags ``--with-user-prefix`` and ``--with-group-prefix`` (only ``ganeti-rapi`` is supported at this time) - Instances can be removed after export (``gnt-backup export --remove-instance``) - Self-signed certificates generated by Ganeti now use a 2048 bit RSA key (instead of 1024 bit) - Added new cluster configuration file for cluster domain secret - Import/export now use SSL instead of SSH - Added support for showing estimated time when exporting an instance, see the ``ganeti-os-interface(7)`` manpage and look for ``EXP_SIZE_FD`` Version 2.1.8 ------------- *(Released Tue, 16 Nov 2010)* Some more bugfixes. Unless critical bugs occur, this will be the last 2.1 release: - Fix case of MAC special-values - Fix mac checker regex - backend: Fix typo causing "out of range" error - Add missing --units in gnt-instance list man page Version 2.1.7 ------------- *(Released Tue, 24 Aug 2010)* Bugfixes only: - Don't ignore secondary node silently on non-mirrored disk templates (issue 113) - Fix --master-netdev arg name in gnt-cluster(8) (issue 114) - Fix usb_mouse parameter breaking with vnc_console (issue 109) - Properly document the usb_mouse parameter - Fix path in ganeti-rapi(8) (issue 116) - Adjust error message when the ganeti user's .ssh directory is missing - Add same-node-check when changing the disk template to drbd Version 2.1.6 ------------- *(Released Fri, 16 Jul 2010)* Bugfixes only: - Add an option to only select some reboot types during qa/burnin. (on some hypervisors consequent reboots are not supported) - Fix infrequent race condition in master failover. Sometimes the old master ip address would be still detected as up for a short time after it was removed, causing failover to fail. - Decrease mlockall warnings when the ctypes module is missing. On Python 2.4 we support running even if no ctypes module is installed, but we were too verbose about this issue. - Fix building on old distributions, on which man doesn't have a --warnings option. - Fix RAPI not to ignore the MAC address on instance creation - Implement the old instance creation format in the RAPI client. Version 2.1.5 ------------- *(Released Thu, 01 Jul 2010)* A small bugfix release: - Fix disk adoption: broken by strict --disk option checking in 2.1.4 - Fix batch-create: broken in the whole 2.1 series due to a lookup on a non-existing option - Fix instance create: the --force-variant option was ignored - Improve pylint 0.21 compatibility and warnings with Python 2.6 - Fix modify node storage with non-FQDN arguments - Fix RAPI client to authenticate under Python 2.6 when used for more than 5 requests needing authentication - Fix gnt-instance modify -t (storage) giving a wrong error message when converting a non-shutdown drbd instance to plain Version 2.1.4 ------------- *(Released Fri, 18 Jun 2010)* A small bugfix release: - Fix live migration of KVM instances started with older Ganeti versions which had fewer hypervisor parameters - Fix gnt-instance grow-disk on down instances - Fix an error-reporting bug during instance migration - Better checking of the ``--net`` and ``--disk`` values, to avoid silently ignoring broken ones - Fix an RPC error reporting bug affecting, for example, RAPI client users - Fix bug triggered by different API version os-es on different nodes - Fix a bug in instance startup with custom hvparams: OS level parameters would fail to be applied. - Fix the RAPI client under Python 2.6 (but more work is needed to make it work completely well with OpenSSL) - Fix handling of errors when resolving names from DNS Version 2.1.3 ------------- *(Released Thu, 3 Jun 2010)* A medium sized development cycle. Some new features, and some fixes/small improvements/cleanups. Significant features ~~~~~~~~~~~~~~~~~~~~ The node deamon now tries to mlock itself into memory, unless the ``--no-mlock`` flag is passed. It also doesn't fail if it can't write its logs, and falls back to console logging. This allows emergency features such as ``gnt-node powercycle`` to work even in the event of a broken node disk (tested offlining the disk hosting the node's filesystem and dropping its memory caches; don't try this at home) KVM: add vhost-net acceleration support. It can be tested with a new enough version of the kernel and of qemu-kvm. KVM: Add instance chrooting feature. If you use privilege dropping for your VMs you can also now force them to chroot to an empty directory, before starting the emulated guest. KVM: Add maximum migration bandwith and maximum downtime tweaking support (requires a new-enough version of qemu-kvm). Cluster verify will now warn if the master node doesn't have the master ip configured on it. Add a new (incompatible) instance creation request format to RAPI which supports all parameters (previously only a subset was supported, and it wasn't possible to extend the old format to accomodate all the new features. The old format is still supported, and a client can check for this feature, before using it, by checking for its presence in the ``features`` RAPI resource. Now with ancient latin support. Try it passing the ``--roman`` option to ``gnt-instance info``, ``gnt-cluster info`` or ``gnt-node list`` (requires the python-roman module to be installed, in order to work). Other changes ~~~~~~~~~~~~~ As usual many internal code refactorings, documentation updates, and such. Among others: - Lots of improvements and cleanups to the experimental Remote API (RAPI) client library. - A new unit test suite for the core daemon libraries. - A fix to creating missing directories makes sure the umask is not applied anymore. This enforces the same directory permissions everywhere. - Better handling terminating daemons with ctrl+c (used when running them in debugging mode). - Fix a race condition in live migrating a KVM instance, when stat() on the old proc status file returned EINVAL, which is an unexpected value. - Fixed manpage checking with newer man and utf-8 charachters. But now you need the en_US.UTF-8 locale enabled to build Ganeti from git. Version 2.1.2.1 --------------- *(Released Fri, 7 May 2010)* Fix a bug which prevented untagged KVM instances from starting. Version 2.1.2 ------------- *(Released Fri, 7 May 2010)* Another release with a long development cycle, during which many different features were added. Significant features ~~~~~~~~~~~~~~~~~~~~ The KVM hypervisor now can run the individual instances as non-root, to reduce the impact of a VM being hijacked due to bugs in the hypervisor. It is possible to run all instances as a single (non-root) user, to manually specify a user for each instance, or to dynamically allocate a user out of a cluster-wide pool to each instance, with the guarantee that no two instances will run under the same user ID on any given node. An experimental RAPI client library, that can be used standalone (without the other Ganeti libraries), is provided in the source tree as ``lib/rapi/client.py``. Note this client might change its interface in the future, as we iterate on its capabilities. A new command, ``gnt-cluster renew-crypto`` has been added to easily replace the cluster's certificates and crypto keys. This might help in case they have been compromised, or have simply expired. A new disk option for instance creation has been added that allows one to "adopt" currently existing logical volumes, with data preservation. This should allow easier migration to Ganeti from unmanaged (or managed via other software) instances. Another disk improvement is the possibility to convert between redundant (DRBD) and plain (LVM) disk configuration for an instance. This should allow better scalability (starting with one node and growing the cluster, or shrinking a two-node cluster to one node). A new feature that could help with automated node failovers has been implemented: if a node sees itself as offline (by querying the master candidates), it will try to shutdown (hard) all instances and any active DRBD devices. This reduces the risk of duplicate instances if an external script automatically failovers the instances on such nodes. To enable this, the cluster parameter ``maintain_node_health`` should be enabled; in the future this option (per the name) will enable other automatic maintenance features. Instance export/import now will reuse the original instance specifications for all parameters; that means exporting an instance, deleting it and the importing it back should give an almost identical instance. Note that the default import behaviour has changed from before, where it created only one NIC; now it recreates the original number of NICs. Cluster verify has added a few new checks: SSL certificates validity, /etc/hosts consistency across the cluster, etc. Other changes ~~~~~~~~~~~~~ As usual, many internal changes were done, documentation fixes, etc. Among others: - Fixed cluster initialization with disabled cluster storage (regression introduced in 2.1.1) - File-based storage supports growing the disks - Fixed behaviour of node role changes - Fixed cluster verify for some corner cases, plus a general rewrite of cluster verify to allow future extension with more checks - Fixed log spamming by watcher and node daemon (regression introduced in 2.1.1) - Fixed possible validation issues when changing the list of enabled hypervisors - Fixed cleanup of /etc/hosts during node removal - Fixed RAPI response for invalid methods - Fixed bug with hashed passwords in ``ganeti-rapi`` daemon - Multiple small improvements to the KVM hypervisor (VNC usage, booting from ide disks, etc.) - Allow OS changes without re-installation (to record a changed OS outside of Ganeti, or to allow OS renames) - Allow instance creation without OS installation (useful for example if the OS will be installed manually, or restored from a backup not in Ganeti format) - Implemented option to make cluster ``copyfile`` use the replication network - Added list of enabled hypervisors to ssconf (possibly useful for external scripts) - Added a new tool (``tools/cfgupgrade12``) that allows upgrading from 1.2 clusters - A partial form of node re-IP is possible via node readd, which now allows changed node primary IP - Command line utilities now show an informational message if the job is waiting for a lock - The logs of the master daemon now show the PID/UID/GID of the connected client Version 2.1.1 ------------- *(Released Fri, 12 Mar 2010)* During the 2.1.0 long release candidate cycle, a lot of improvements and changes have accumulated with were released later as 2.1.1. Major changes ~~~~~~~~~~~~~ The node evacuate command (``gnt-node evacuate``) was significantly rewritten, and as such the IAllocator protocol was changed - a new request type has been added. This unfortunate change during a stable series is designed to improve performance of node evacuations; on clusters with more than about five nodes and which are well-balanced, evacuation should proceed in parallel for all instances of the node being evacuated. As such, any existing IAllocator scripts need to be updated, otherwise the above command will fail due to the unknown request. The provided "dumb" allocator has not been updated; but the ganeti-htools package supports the new protocol since version 0.2.4. Another important change is increased validation of node and instance names. This might create problems in special cases, if invalid host names are being used. Also, a new layer of hypervisor parameters has been added, that sits at OS level between the cluster defaults and the instance ones. This allows customisation of virtualization parameters depending on the installed OS. For example instances with OS 'X' may have a different KVM kernel (or any other parameter) than the cluster defaults. This is intended to help managing a multiple OSes on the same cluster, without manual modification of each instance's parameters. A tool for merging clusters, ``cluster-merge``, has been added in the tools sub-directory. Bug fixes ~~~~~~~~~ - Improved the int/float conversions that should make the code more robust in face of errors from the node daemons - Fixed the remove node code in case of internal configuration errors - Fixed the node daemon behaviour in face of inconsistent queue directory (e.g. read-only file-system where we can't open the files read-write, etc.) - Fixed the behaviour of gnt-node modify for master candidate demotion; now it either aborts cleanly or, if given the new "auto_promote" parameter, will automatically promote other nodes as needed - Fixed compatibility with (unreleased yet) Python 2.6.5 that would completely prevent Ganeti from working - Fixed bug for instance export when not all disks were successfully exported - Fixed behaviour of node add when the new node is slow in starting up the node daemon - Fixed handling of signals in the LUXI client, which should improve behaviour of command-line scripts - Added checks for invalid node/instance names in the configuration (now flagged during cluster verify) - Fixed watcher behaviour for disk activation errors - Fixed two potentially endless loops in http library, which led to the RAPI daemon hanging and consuming 100% CPU in some cases - Fixed bug in RAPI daemon related to hashed passwords - Fixed bug for unintended qemu-level bridging of multi-NIC KVM instances - Enhanced compatibility with non-Debian OSes, but not using absolute path in some commands and allowing customisation of the ssh configuration directory - Fixed possible future issue with new Python versions by abiding to the proper use of ``__slots__`` attribute on classes - Added checks that should prevent directory traversal attacks - Many documentation fixes based on feedback from users New features ~~~~~~~~~~~~ - Added an "early_release" more for instance replace disks and node evacuate, where we release locks earlier and thus allow higher parallelism within the cluster - Added watcher hooks, intended to allow the watcher to restart other daemons (e.g. from the ganeti-nbma project), but they can be used of course for any other purpose - Added a compile-time disable for DRBD barriers, to increase performance if the administrator trusts the power supply or the storage system to not lose writes - Added the option of using syslog for logging instead of, or in addition to, Ganeti's own log files - Removed boot restriction for paravirtual NICs for KVM, recent versions can indeed boot from a paravirtual NIC - Added a generic debug level for many operations; while this is not used widely yet, it allows one to pass the debug value all the way to the OS scripts - Enhanced the hooks environment for instance moves (failovers, migrations) where the primary/secondary nodes changed during the operation, by adding {NEW,OLD}_{PRIMARY,SECONDARY} vars - Enhanced data validations for many user-supplied values; one important item is the restrictions imposed on instance and node names, which might reject some (invalid) host names - Add a configure-time option to disable file-based storage, if it's not needed; this allows greater security separation between the master node and the other nodes from the point of view of the inter-node RPC protocol - Added user notification in interactive tools if job is waiting in the job queue or trying to acquire locks - Added log messages when a job is waiting for locks - Added filtering by node tags in instance operations which admit multiple instances (start, stop, reboot, reinstall) - Added a new tool for cluster mergers, ``cluster-merge`` - Parameters from command line which are of the form ``a=b,c=d`` can now use backslash escapes to pass in values which contain commas, e.g. ``a=b\\c,d=e`` where the 'a' parameter would get the value ``b,c`` - For KVM, the instance name is the first parameter passed to KVM, so that it's more visible in the process list Version 2.1.0 ------------- *(Released Tue, 2 Mar 2010)* Ganeti 2.1 brings many improvements with it. Major changes: - Added infrastructure to ease automated disk repairs - Added new daemon to export configuration data in a cheaper way than using the remote API - Instance NICs can now be routed instead of being associated with a networking bridge - Improved job locking logic to reduce impact of jobs acquiring multiple locks waiting for other long-running jobs In-depth implementation details can be found in the Ganeti 2.1 design document. Details ~~~~~~~ - Added chroot hypervisor - Added more options to xen-hvm hypervisor (``kernel_path`` and ``device_model``) - Added more options to xen-pvm hypervisor (``use_bootloader``, ``bootloader_path`` and ``bootloader_args``) - Added the ``use_localtime`` option for the xen-hvm and kvm hypervisors, and the default value for this has changed to false (in 2.0 xen-hvm always enabled it) - Added luxi call to submit multiple jobs in one go - Added cluster initialization option to not modify ``/etc/hosts`` file on nodes - Added network interface parameters - Added dry run mode to some LUs - Added RAPI resources: - ``/2/instances/[instance_name]/info`` - ``/2/instances/[instance_name]/replace-disks`` - ``/2/nodes/[node_name]/evacuate`` - ``/2/nodes/[node_name]/migrate`` - ``/2/nodes/[node_name]/role`` - ``/2/nodes/[node_name]/storage`` - ``/2/nodes/[node_name]/storage/modify`` - ``/2/nodes/[node_name]/storage/repair`` - Added OpCodes to evacuate or migrate all instances on a node - Added new command to list storage elements on nodes (``gnt-node list-storage``) and modify them (``gnt-node modify-storage``) - Added new ssconf files with master candidate IP address (``ssconf_master_candidates_ips``), node primary IP address (``ssconf_node_primary_ips``) and node secondary IP address (``ssconf_node_secondary_ips``) - Added ``ganeti-confd`` and a client library to query the Ganeti configuration via UDP - Added ability to run hooks after cluster initialization and before cluster destruction - Added automatic mode for disk replace (``gnt-instance replace-disks --auto``) - Added ``gnt-instance recreate-disks`` to re-create (empty) disks after catastrophic data-loss - Added ``gnt-node repair-storage`` command to repair damaged LVM volume groups - Added ``gnt-instance move`` command to move instances - Added ``gnt-cluster watcher`` command to control watcher - Added ``gnt-node powercycle`` command to powercycle nodes - Added new job status field ``lock_status`` - Added parseable error codes to cluster verification (``gnt-cluster verify --error-codes``) and made output less verbose (use ``--verbose`` to restore previous behaviour) - Added UUIDs to the main config entities (cluster, nodes, instances) - Added support for OS variants - Added support for hashed passwords in the Ganeti remote API users file (``rapi_users``) - Added option to specify maximum timeout on instance shutdown - Added ``--no-ssh-init`` option to ``gnt-cluster init`` - Added new helper script to start and stop Ganeti daemons (``daemon-util``), with the intent to reduce the work necessary to adjust Ganeti for non-Debian distributions and to start/stop daemons from one place - Added more unittests - Fixed critical bug in ganeti-masterd startup - Removed the configure-time ``kvm-migration-port`` parameter, this is now customisable at the cluster level for both the KVM and Xen hypervisors using the new ``migration_port`` parameter - Pass ``INSTANCE_REINSTALL`` variable to OS installation script when reinstalling an instance - Allowed ``@`` in tag names - Migrated to Sphinx (http://sphinx.pocoo.org/) for documentation - Many documentation updates - Distribute hypervisor files on ``gnt-cluster redist-conf`` - ``gnt-instance reinstall`` can now reinstall multiple instances - Updated many command line parameters - Introduced new OS API version 15 - No longer support a default hypervisor - Treat virtual LVs as inexistent - Improved job locking logic to reduce lock contention - Match instance and node names case insensitively - Reimplemented bash completion script to be more complete - Improved burnin Version 2.0.6 ------------- *(Released Thu, 4 Feb 2010)* - Fix cleaner behaviour on nodes not in a cluster (Debian bug 568105) - Fix a string formatting bug - Improve safety of the code in some error paths - Improve data validation in the master of values returned from nodes Version 2.0.5 ------------- *(Released Thu, 17 Dec 2009)* - Fix security issue due to missing validation of iallocator names; this allows local and remote execution of arbitrary executables - Fix failure of gnt-node list during instance removal - Ship the RAPI documentation in the archive Version 2.0.4 ------------- *(Released Wed, 30 Sep 2009)* - Fixed many wrong messages - Fixed a few bugs related to the locking library - Fixed MAC checking at instance creation time - Fixed a DRBD parsing bug related to gaps in /proc/drbd - Fixed a few issues related to signal handling in both daemons and scripts - Fixed the example startup script provided - Fixed insserv dependencies in the example startup script (patch from Debian) - Fixed handling of drained nodes in the iallocator framework - Fixed handling of KERNEL_PATH parameter for xen-hvm (Debian bug #528618) - Fixed error related to invalid job IDs in job polling - Fixed job/opcode persistence on unclean master shutdown - Fixed handling of partial job processing after unclean master shutdown - Fixed error reporting from LUs, previously all errors were converted into execution errors - Fixed error reporting from burnin - Decreased significantly the memory usage of the job queue - Optimised slightly multi-job submission - Optimised slightly opcode loading - Backported the multi-job submit framework from the development branch; multi-instance start and stop should be faster - Added script to clean archived jobs after 21 days; this will reduce the size of the queue directory - Added some extra checks in disk size tracking - Added an example ethers hook script - Added a cluster parameter that prevents Ganeti from modifying of /etc/hosts - Added more node information to RAPI responses - Added a ``gnt-job watch`` command that allows following the ouput of a job - Added a bind-address option to ganeti-rapi - Added more checks to the configuration verify - Enhanced the burnin script such that some operations can be retried automatically - Converted instance reinstall to multi-instance model Version 2.0.3 ------------- *(Released Fri, 7 Aug 2009)* - Added ``--ignore-size`` to the ``gnt-instance activate-disks`` command to allow using the pre-2.0.2 behaviour in activation, if any existing instances have mismatched disk sizes in the configuration - Added ``gnt-cluster repair-disk-sizes`` command to check and update any configuration mismatches for disk sizes - Added ``gnt-master cluste-failover --no-voting`` to allow master failover to work on two-node clusters - Fixed the ``--net`` option of ``gnt-backup import``, which was unusable - Fixed detection of OS script errors in ``gnt-backup export`` - Fixed exit code of ``gnt-backup export`` Version 2.0.2 ------------- *(Released Fri, 17 Jul 2009)* - Added experimental support for stripped logical volumes; this should enhance performance but comes with a higher complexity in the block device handling; stripping is only enabled when passing ``--with-lvm-stripecount=N`` to ``configure``, but codepaths are affected even in the non-stripped mode - Improved resiliency against transient failures at the end of DRBD resyncs, and in general of DRBD resync checks - Fixed a couple of issues with exports and snapshot errors - Fixed a couple of issues in instance listing - Added display of the disk size in ``gnt-instance info`` - Fixed checking for valid OSes in instance creation - Fixed handling of the "vcpus" parameter in instance listing and in general of invalid parameters - Fixed http server library, and thus RAPI, to handle invalid username/password combinations correctly; this means that now they report unauthorized for queries too, not only for modifications, allowing earlier detect of configuration problems - Added a new "role" node list field, equivalent to the master/master candidate/drained/offline flags combinations - Fixed cluster modify and changes of candidate pool size - Fixed cluster verify error messages for wrong files on regular nodes - Fixed a couple of issues with node demotion from master candidate role - Fixed node readd issues - Added non-interactive mode for ``ganeti-masterd --no-voting`` startup - Added a new ``--no-voting`` option for masterfailover to fix failover on two-nodes clusters when the former master node is unreachable - Added instance reinstall over RAPI Version 2.0.1 ------------- *(Released Tue, 16 Jun 2009)* - added ``-H``/``-B`` startup parameters to ``gnt-instance``, which will allow re-adding the start in single-user option (regression from 1.2) - the watcher writes the instance status to a file, to allow monitoring to report the instance status (from the master) based on cached results of the watcher's queries; while this can get stale if the watcher is being locked due to other work on the cluster, this is still an improvement - the watcher now also restarts the node daemon and the rapi daemon if they died - fixed the watcher to handle full and drained queue cases - hooks export more instance data in the environment, which helps if hook scripts need to take action based on the instance's properties (no longer need to query back into ganeti) - instance failovers when the instance is stopped do not check for free RAM, so that failing over a stopped instance is possible in low memory situations - rapi uses queries for tags instead of jobs (for less job traffic), and for cluster tags it won't talk to masterd at all but read them from ssconf - a couple of error handling fixes in RAPI - drbd handling: improved the error handling of inconsistent disks after resync to reduce the frequency of "there are some degraded disks for this instance" messages - fixed a bug in live migration when DRBD doesn't want to reconnect (the error handling path called a wrong function name) Version 2.0.0 ------------- *(Released Wed, 27 May 2009)* - no changes from rc5 Version 2.0 rc5 --------------- *(Released Wed, 20 May 2009)* - fix a couple of bugs (validation, argument checks) - fix ``gnt-cluster getmaster`` on non-master nodes (regression) - some small improvements to RAPI and IAllocator - make watcher automatically start the master daemon if down Version 2.0 rc4 --------------- *(Released Mon, 27 Apr 2009)* - change the OS list to not require locks; this helps with big clusters - fix ``gnt-cluster verify`` and ``gnt-cluster verify-disks`` when the volume group is broken - ``gnt-instance info``, without any arguments, doesn't run for all instances anymore; either pass ``--all`` or pass the desired instances; this helps against mistakes on big clusters where listing the information for all instances takes a long time - miscellaneous doc and man pages fixes Version 2.0 rc3 --------------- *(Released Wed, 8 Apr 2009)* - Change the internal locking model of some ``gnt-node`` commands, in order to reduce contention (and blocking of master daemon) when batching many creation/reinstall jobs - Fixes to Xen soft reboot - No longer build documentation at build time, instead distribute it in the archive, in order to reduce the need for the whole docbook/rst toolchains Version 2.0 rc2 --------------- *(Released Fri, 27 Mar 2009)* - Now the cfgupgrade scripts works and can upgrade 1.2.7 clusters to 2.0 - Fix watcher startup sequence, improves the behaviour of busy clusters - Some other fixes in ``gnt-cluster verify``, ``gnt-instance replace-disks``, ``gnt-instance add``, ``gnt-cluster queue``, KVM VNC bind address and other places - Some documentation fixes and updates Version 2.0 rc1 --------------- *(Released Mon, 2 Mar 2009)* - More documentation updates, now all docs should be more-or-less up-to-date - A couple of small fixes (mixed hypervisor clusters, offline nodes, etc.) - Added a customizable HV_KERNEL_ARGS hypervisor parameter (for Xen PVM and KVM) - Fix an issue related to $libdir/run/ganeti and cluster creation Version 2.0 beta2 ----------------- *(Released Thu, 19 Feb 2009)* - Xen PVM and KVM have switched the default value for the instance root disk to the first partition on the first drive, instead of the whole drive; this means that the OS installation scripts must be changed accordingly - Man pages have been updated - RAPI has been switched by default to HTTPS, and the exported functions should all work correctly - RAPI v1 has been removed - Many improvements to the KVM hypervisor - Block device errors are now better reported - Many other bugfixes and small improvements Version 2.0 beta1 ----------------- *(Released Mon, 26 Jan 2009)* - Version 2 is a general rewrite of the code and therefore the differences are too many to list, see the design document for 2.0 in the ``doc/`` subdirectory for more details - In this beta version there is not yet a migration path from 1.2 (there will be one in the final 2.0 release) - A few significant changes are: - all commands are executed by a daemon (``ganeti-masterd``) and the various ``gnt-*`` commands are just front-ends to it - all the commands are entered into, and executed from a job queue, see the ``gnt-job(8)`` manpage - the RAPI daemon supports read-write operations, secured by basic HTTP authentication on top of HTTPS - DRBD version 0.7 support has been removed, DRBD 8 is the only supported version (when migrating from Ganeti 1.2 to 2.0, you need to migrate to DRBD 8 first while still running Ganeti 1.2) - DRBD devices are using statically allocated minor numbers, which will be assigned to existing instances during the migration process - there is support for both Xen PVM and Xen HVM instances running on the same cluster - KVM virtualization is supported too - file-based storage has been implemented, which means that it is possible to run the cluster without LVM and DRBD storage, for example using a shared filesystem exported from shared storage (and still have live migration) Version 1.2.7 ------------- *(Released Tue, 13 Jan 2009)* - Change the default reboot type in ``gnt-instance reboot`` to "hard" - Reuse the old instance mac address by default on instance import, if the instance name is the same. - Handle situations in which the node info rpc returns incomplete results (issue 46) - Add checks for tcp/udp ports collisions in ``gnt-cluster verify`` - Improved version of batcher: - state file support - instance mac address support - support for HVM clusters/instances - Add an option to show the number of cpu sockets and nodes in ``gnt-node list`` - Support OSes that handle more than one version of the OS api (but do not change the current API in any other way) - Fix ``gnt-node migrate`` - ``gnt-debug`` man page - Fixes various more typos and small issues - Increase disk resync maximum speed to 60MB/s (from 30MB/s) Version 1.2.6 ------------- *(Released Wed, 24 Sep 2008)* - new ``--hvm-nic-type`` and ``--hvm-disk-type`` flags to control the type of disk exported to fully virtualized instances. - provide access to the serial console of HVM instances - instance auto_balance flag, set by default. If turned off it will avoid warnings on cluster verify if there is not enough memory to fail over an instance. in the future it will prevent automatically failing it over when we will support that. - batcher tool for instance creation, see ``tools/README.batcher`` - ``gnt-instance reinstall --select-os`` to interactively select a new operating system when reinstalling an instance. - when changing the memory amount on instance modify a check has been added that the instance will be able to start. also warnings are emitted if the instance will not be able to fail over, if auto_balance is true. - documentation fixes - sync fields between ``gnt-instance list/modify/add/import`` - fix a race condition in drbd when the sync speed was set after giving the device a remote peer. Version 1.2.5 ------------- *(Released Tue, 22 Jul 2008)* - note: the allowed size and number of tags per object were reduced - fix a bug in ``gnt-cluster verify`` with inconsistent volume groups - fixed twisted 8.x compatibility - fixed ``gnt-instance replace-disks`` with iallocator - add TCP keepalives on twisted connections to detect restarted nodes - disk increase support, see ``gnt-instance grow-disk`` - implement bulk node/instance query for RAPI - add tags in node/instance listing (optional) - experimental migration (and live migration) support, read the man page for ``gnt-instance migrate`` - the ``ganeti-watcher`` logs are now timestamped, and the watcher also has some small improvements in handling its state file Version 1.2.4 ------------- *(Released Fri, 13 Jun 2008)* - Experimental readonly, REST-based remote API implementation; automatically started on master node, TCP port 5080, if enabled by ``--enable-rapi`` parameter to configure script. - Instance allocator support. Add and import instance accept a ``--iallocator`` parameter, and call that instance allocator to decide which node to use for the instance. The iallocator document describes what's expected from an allocator script. - ``gnt-cluster verify`` N+1 memory redundancy checks: Unless passed the ``--no-nplus1-mem`` option ``gnt-cluster verify`` now checks that if a node is lost there is still enough memory to fail over the instances that reside on it. - ``gnt-cluster verify`` hooks: it is now possible to add post-hooks to ``gnt-cluster verify``, to check for site-specific compliance. All the hooks will run, and their output, if any, will be displayed. Any failing hook will make the verification return an error value. - ``gnt-cluster verify`` now checks that its peers are reachable on the primary and secondary interfaces - ``gnt-node add`` now supports the ``--readd`` option, to readd a node that is still declared as part of the cluster and has failed. - ``gnt-* list`` commands now accept a new ``-o +field`` way of specifying output fields, that just adds the chosen fields to the default ones. - ``gnt-backup`` now has a new ``remove`` command to delete an existing export from the filesystem. - New per-instance parameters hvm_acpi, hvm_pae and hvm_cdrom_image_path have been added. Using them you can enable/disable acpi and pae support, and specify a path for a cd image to be exported to the instance. These parameters as the name suggest only work on HVM clusters. - When upgrading an HVM cluster to Ganeti 1.2.4, the values for ACPI and PAE support will be set to the previously hardcoded values, but the (previously hardcoded) path to the CDROM ISO image will be unset and if required, needs to be set manually with ``gnt-instance modify`` after the upgrade. - The address to which an instance's VNC console is bound is now selectable per-instance, rather than being cluster wide. Of course this only applies to instances controlled via VNC, so currently just applies to HVM clusters. Version 1.2.3 ------------- *(Released Mon, 18 Feb 2008)* - more tweaks to the disk activation code (especially helpful for DRBD) - change the default ``gnt-instance list`` output format, now there is one combined status field (see the manpage for the exact values this field will have) - some more fixes for the mac export to hooks change - make Ganeti not break with DRBD 8.2.x (which changed the version format in ``/proc/drbd``) (issue 24) - add an upgrade tool from "remote_raid1" disk template to "drbd" disk template, allowing migration from DRBD0.7+MD to DRBD8 Version 1.2.2 ------------- *(Released Wed, 30 Jan 2008)* - fix ``gnt-instance modify`` breakage introduced in 1.2.1 with the HVM support (issue 23) - add command aliases infrastructure and a few aliases - allow listing of VCPUs in the ``gnt-instance list`` and improve the man pages and the ``--help`` option of ``gnt-node list``/``gnt-instance list`` - fix ``gnt-backup list`` with down nodes (issue 21) - change the tools location (move from $pkgdatadir to $pkglibdir/tools) - fix the dist archive and add a check for including svn/git files in the future - some developer-related changes: improve the burnin and the QA suite, add an upload script for testing during development Version 1.2.1 ------------- *(Released Wed, 16 Jan 2008)* - experimental HVM support, read the install document, section "Initializing the cluster" - allow for the PVM hypervisor per-instance kernel and initrd paths - add a new command ``gnt-cluster verify-disks`` which uses a new algorithm to improve the reconnection of the DRBD pairs if the device on the secondary node has gone away - make logical volume code auto-activate LVs at disk activation time - slightly improve the speed of activating disks - allow specification of the MAC address at instance creation time, and changing it later via ``gnt-instance modify`` - fix handling of external commands that generate lots of output on stderr - update documentation with regard to minimum version of DRBD8 supported Version 1.2.0 ------------- *(Released Tue, 4 Dec 2007)* - Log the ``xm create`` output to the node daemon log on failure (to help diagnosing the error) - In debug mode, log all external commands output if failed to the logs - Change parsing of lvm commands to ignore stderr Version 1.2 beta3 ----------------- *(Released Wed, 28 Nov 2007)* - Another round of updates to the DRBD 8 code to deal with more failures in the replace secondary node operation - Some more logging of failures in disk operations (lvm, drbd) - A few documentation updates - QA updates Version 1.2 beta2 ----------------- *(Released Tue, 13 Nov 2007)* - Change configuration file format from Python's Pickle to JSON. Upgrading is possible using the cfgupgrade utility. - Add support for DRBD 8.0 (new disk template ``drbd``) which allows for faster replace disks and is more stable (DRBD 8 has many improvements compared to DRBD 0.7) - Added command line tags support (see man pages for ``gnt-instance``, ``gnt-node``, ``gnt-cluster``) - Added instance rename support - Added multi-instance startup/shutdown - Added cluster rename support - Added ``gnt-node evacuate`` to simplify some node operations - Added instance reboot operation that can speedup reboot as compared to stop and start - Soften the requirement that hostnames are in FQDN format - The ``ganeti-watcher`` now activates drbd pairs after secondary node reboots - Removed dependency on debian's patched fping that uses the non-standard ``-S`` option - Now the OS definitions are searched for in multiple, configurable paths (easier for distros to package) - Some changes to the hooks infrastructure (especially the new post-configuration update hook) - Other small bugfixes .. vim: set textwidth=72 syntax=rst : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/iallocator.rst0000644000000000000000000004306612271422343016433 0ustar00rootroot00000000000000Ganeti automatic instance allocation ==================================== Documents Ganeti version 2.9 .. contents:: Introduction ------------ Currently in Ganeti the admin has to specify the exact locations for an instance's node(s). This prevents a completely automatic node evacuation, and is in general a nuisance. The *iallocator* framework will enable automatic placement via external scripts, which allows customization of the cluster layout per the site's requirements. User-visible changes ~~~~~~~~~~~~~~~~~~~~ There are two parts of the ganeti operation that are impacted by the auto-allocation: how the cluster knows what the allocator algorithms are and how the admin uses these in creating instances. An allocation algorithm is just the filename of a program installed in a defined list of directories. Cluster configuration ~~~~~~~~~~~~~~~~~~~~~ At configure time, the list of the directories can be selected via the ``--with-iallocator-search-path=LIST`` option, where *LIST* is a comma-separated list of directories. If not given, this defaults to ``$libdir/ganeti/iallocators``, i.e. for an installation under ``/usr``, this will be ``/usr/lib/ganeti/iallocators``. Ganeti will then search for allocator script in the configured list, using the first one whose filename matches the one given by the user. Command line interface changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The node selection options in instance add and instance replace disks can be replace by the new ``--iallocator=NAME`` option (shortened to ``-I``), which will cause the auto-assignement of nodes with the passed iallocator. The selected node(s) will be shown as part of the command output. IAllocator API -------------- The protocol for communication between Ganeti and an allocator script will be the following: #. ganeti launches the program with a single argument, a filename that contains a JSON-encoded structure (the input message) #. if the script finishes with exit code different from zero, it is considered a general failure and the full output will be reported to the users; this can be the case when the allocator can't parse the input message #. if the allocator finishes with exit code zero, it is expected to output (on its stdout) a JSON-encoded structure (the response) Input message ~~~~~~~~~~~~~ The input message will be the JSON encoding of a dictionary containing all the required information to perform the operation. We explain the contents of this dictionary in two parts: common information that every type of operation requires, and operation-specific information. Common information ++++++++++++++++++ All input dictionaries to the IAllocator must carry the following keys: version the version of the protocol; this document specifies version 2 cluster_name the cluster name cluster_tags the list of cluster tags enabled_hypervisors the list of enabled hypervisors ipolicy the cluster-wide instance policy (for information; the per-node group values take precedence and should be used instead) request a dictionary containing the details of the request; the keys vary depending on the type of operation that's being requested, as explained in `Operation-specific input`_ below. nodegroups a dictionary with the data for the cluster's node groups; it is keyed on the group UUID, and the values are a dictionary with the following keys: name the node group name alloc_policy the allocation policy of the node group (consult the semantics of this attribute in the :manpage:`gnt-group(8)` manpage) networks the list of network UUID's this node group is connected to ipolicy the instance policy of the node group tags the list of node group tags instances a dictionary with the data for the current existing instance on the cluster, indexed by instance name; the contents are similar to the instance definitions for the allocate mode, with the addition of: admin_state if this instance is set to run (but not the actual status of the instance) nodes list of nodes on which this instance is placed; the primary node of the instance is always the first one nodes dictionary with the data for the nodes in the cluster, indexed by the node name; the dict contains [*]_ : total_disk the total disk size of this node (mebibytes) free_disk the free disk space on the node total_memory the total memory size free_memory free memory on the node; note that currently this does not take into account the instances which are down on the node total_cpus the physical number of CPUs present on the machine; depending on the hypervisor, this might or might not be equal to how many CPUs the node operating system sees; primary_ip the primary IP address of the node secondary_ip the secondary IP address of the node (the one used for the DRBD replication); note that this can be the same as the primary one tags list with the tags of the node master_candidate: a boolean flag denoting whether this node is a master candidate drained: a boolean flag denoting whether this node is being drained offline: a boolean flag denoting whether this node is offline i_pri_memory: total memory required by primary instances i_pri_up_memory: total memory required by running primary instances group: the node group that this node belongs to No allocations should be made on nodes having either the ``drained`` or ``offline`` flags set. More details about these of node status flags is available in the manpage :manpage:`ganeti(7)`. .. [*] Note that no run-time data is present for offline, drained or non-vm_capable nodes; this means the tags total_memory, reserved_memory, free_memory, total_disk, free_disk, total_cpus, i_pri_memory and i_pri_up memory will be absent Operation-specific input ++++++++++++++++++++++++ All input dictionaries to the IAllocator carry, in the ``request`` dictionary, detailed information about the operation that's being requested. The required keys vary depending on the type of operation, as follows. In all cases, it includes: type the request type; this can be either ``allocate``, ``relocate``, ``change-group`` or ``node-evacuate``. The ``allocate`` request is used when a new instance needs to be placed on the cluster. The ``relocate`` request is used when an existing instance needs to be moved within its node group. The ``multi-evacuate`` protocol used to request that the script computes the optimal relocate solution for all secondary instances of the given nodes. It is now deprecated and needs only be implemented if backwards compatibility with Ganeti 2.4 and lower is needed. The ``change-group`` request is used to relocate multiple instances across multiple node groups. ``node-evacuate`` evacuates instances off their node(s). These are described in a separate :ref:`design document `. The ``multi-allocate`` request is used to allocate multiple instances on the cluster. The request is beside of that very similiar to the ``allocate`` one. For more details look at :doc:`Ganeti bulk create `. For both allocate and relocate mode, the following extra keys are needed in the ``request`` dictionary: name the name of the instance; if the request is a realocation, then this name will be found in the list of instances (see below), otherwise is the FQDN of the new instance; type *string* required_nodes how many nodes should the algorithm return; while this information can be deduced from the instace's disk template, it's better if this computation is left to Ganeti as then allocator scripts are less sensitive to changes to the disk templates; type *integer* disk_space_total the total disk space that will be used by this instance on the (new) nodes; again, this information can be computed from the list of instance disks and its template type, but Ganeti is better suited to compute it; type *integer* .. pyassert:: constants.DISK_ACCESS_SET == set([constants.DISK_RDONLY, constants.DISK_RDWR]) Allocation needs, in addition: disks list of dictionaries holding the disk definitions for this instance (in the order they are exported to the hypervisor): mode either :pyeval:`constants.DISK_RDONLY` or :pyeval:`constants.DISK_RDWR` denoting if the disk is read-only or writable size the size of this disk in mebibytes nics a list of dictionaries holding the network interfaces for this instance, containing: ip the IP address that Ganeti know for this instance, or null mac the MAC address for this interface bridge the bridge to which this interface will be connected vcpus the number of VCPUs for the instance disk_template the disk template for the instance memory the memory size for the instance os the OS type for the instance tags the list of the instance's tags hypervisor the hypervisor of this instance Relocation: relocate_from a list of nodes to move the instance away from; for DRBD-based instances, this will contain a single node, the current secondary of the instance, whereas for shared-storage instance, this will contain also a single node, the current primary of the instance; type *list of strings* As for ``node-evacuate``, it needs the following request arguments: instances a list of instance names to evacuate; type *list of strings* evac_mode specify which instances to evacuate; one of ``primary-only``, ``secondary-only``, ``all``, type *string* ``change-group`` needs the following request arguments: instances a list of instance names whose group to change; type *list of strings* target_groups must either be the empty list, or contain a list of group UUIDs that should be considered for relocating instances to; type *list of strings* ``multi-allocate`` needs the following request arguments: instances a list of request dicts Response message ~~~~~~~~~~~~~~~~ The response message is much more simple than the input one. It is also a dict having three keys: success a boolean value denoting if the allocation was successful or not info a string with information from the scripts; if the allocation fails, this will be shown to the user result the output of the algorithm; even if the algorithm failed (i.e. success is false), this must be returned as an empty list for allocate/relocate, this is the list of node(s) for the instance; note that the length of this list must equal the ``requested_nodes`` entry in the input message, otherwise Ganeti will consider the result as failed for the ``node-evacuate`` and ``change-group`` modes, this is a dictionary containing, among other information, a list of lists of serialized opcodes; see the :ref:`design document ` for a detailed description for the ``multi-allocate`` mode this is a tuple of 2 lists, the first being element of the tuple is a list of succeeded allocation, with the instance name as first element of each entry and the node placement in the second. The second element of the tuple is the instance list of failed allocations. .. note:: Current Ganeti version accepts either ``result`` or ``nodes`` as a backwards-compatibility measure (older versions only supported ``nodes``) Examples -------- Input messages to scripts ~~~~~~~~~~~~~~~~~~~~~~~~~ Input message, new instance allocation (common elements are listed this time, but not included in further examples below):: { "version": 2, "cluster_name": "cluster1.example.com", "cluster_tags": [], "enabled_hypervisors": [ "xen-pvm" ], "nodegroups": { "f4e06e0d-528a-4963-a5ad-10f3e114232d": { "name": "default", "alloc_policy": "preferred", "networks": ["net-uuid-1", "net-uuid-2"], "ipolicy": { "disk-templates": ["drbd", "plain"], "minmax": [ { "max": { "cpu-count": 2, "disk-count": 8, "disk-size": 2048, "memory-size": 12800, "nic-count": 8, "spindle-use": 8 }, "min": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 } } ], "spindle-ratio": 32.0, "std": { "cpu-count": 1, "disk-count": 1, "disk-size": 1024, "memory-size": 128, "nic-count": 1, "spindle-use": 1 }, "vcpu-ratio": 4.0 }, "tags": ["ng-tag-1", "ng-tag-2"] } }, "instances": { "instance1.example.com": { "tags": [], "should_run": false, "disks": [ { "mode": "w", "size": 64 }, { "mode": "w", "size": 512 } ], "nics": [ { "ip": null, "mac": "aa:00:00:00:60:bf", "bridge": "xen-br0" } ], "vcpus": 1, "disk_template": "plain", "memory": 128, "nodes": [ "nodee1.com" ], "os": "debootstrap+default" }, "instance2.example.com": { "tags": [], "should_run": false, "disks": [ { "mode": "w", "size": 512 }, { "mode": "w", "size": 256 } ], "nics": [ { "ip": null, "mac": "aa:00:00:55:f8:38", "bridge": "xen-br0" } ], "vcpus": 1, "disk_template": "drbd", "memory": 512, "nodes": [ "node2.example.com", "node3.example.com" ], "os": "debootstrap+default" } }, "nodes": { "node1.example.com": { "total_disk": 858276, "primary_ip": "198.51.100.1", "secondary_ip": "192.0.2.1", "tags": [], "group": "f4e06e0d-528a-4963-a5ad-10f3e114232d", "free_memory": 3505, "free_disk": 856740, "total_memory": 4095 }, "node2.example.com": { "total_disk": 858240, "primary_ip": "198.51.100.2", "secondary_ip": "192.0.2.2", "tags": ["test"], "group": "f4e06e0d-528a-4963-a5ad-10f3e114232d", "free_memory": 3505, "free_disk": 848320, "total_memory": 4095 }, "node3.example.com.com": { "total_disk": 572184, "primary_ip": "198.51.100.3", "secondary_ip": "192.0.2.3", "tags": [], "group": "f4e06e0d-528a-4963-a5ad-10f3e114232d", "free_memory": 3505, "free_disk": 570648, "total_memory": 4095 } }, "request": { "type": "allocate", "name": "instance3.example.com", "required_nodes": 2, "disk_space_total": 3328, "disks": [ { "mode": "w", "size": 1024 }, { "mode": "w", "size": 2048 } ], "nics": [ { "ip": null, "mac": "00:11:22:33:44:55", "bridge": null } ], "vcpus": 1, "disk_template": "drbd", "memory": 2048, "os": "debootstrap+default", "tags": [ "type:test", "owner:foo" ], hypervisor: "xen-pvm" } } Input message, reallocation:: { "version": 2, ... "request": { "type": "relocate", "name": "instance2.example.com", "required_nodes": 1, "disk_space_total": 832, "relocate_from": [ "node3.example.com" ] } } Response messages ~~~~~~~~~~~~~~~~~ Successful response message:: { "success": true, "info": "Allocation successful", "result": [ "node2.example.com", "node1.example.com" ] } Failed response message:: { "success": false, "info": "Can't find a suitable node for position 2 (already selected: node2.example.com)", "result": [] } Successful node evacuation message:: { "success": true, "info": "Request successful", "result": [ [ "instance1", "node3" ], [ "instance2", "node1" ] ] } Command line messages ~~~~~~~~~~~~~~~~~~~~~ :: # gnt-instance add -t plain -m 2g --os-size 1g --swap-size 512m --iallocator hail -o debootstrap+default instance3 Selected nodes for the instance: node1.example.com * creating instance disks... [...] # gnt-instance add -t plain -m 3400m --os-size 1g --swap-size 512m --iallocator hail -o debootstrap+default instance4 Failure: prerequisites not met for this operation: Can't compute nodes using iallocator 'hail': Can't find a suitable node for position 1 (already selected: ) # gnt-instance add -t drbd -m 1400m --os-size 1g --swap-size 512m --iallocator hail -o debootstrap+default instance5 Failure: prerequisites not met for this operation: Can't compute nodes using iallocator 'hail': Can't find a suitable node for position 2 (already selected: node1.example.com) Reference implementation ~~~~~~~~~~~~~~~~~~~~~~~~ Ganeti's default iallocator is "hail" which is available when "htools" components have been enabled at build time (see :doc:`install-quick` for more details). .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/walkthrough.rst0000644000000000000000000012507212244641676016653 0ustar00rootroot00000000000000Ganeti walk-through =================== Documents Ganeti version |version| .. contents:: .. highlight:: shell-example Introduction ------------ This document serves as a more example-oriented guide to Ganeti; while the administration guide shows a conceptual approach, here you will find a step-by-step example to managing instances and the cluster. Our simulated, example cluster will have three machines, named ``node1``, ``node2``, ``node3``. Note that in real life machines will usually have FQDNs but here we use short names for brevity. We will use a secondary network for replication data, ``192.0.2.0/24``, with nodes having the last octet the same as their index. The cluster name will be ``example-cluster``. All nodes have the same simulated hardware configuration, two disks of 750GB, 32GB of memory and 4 CPUs. On this cluster, we will create up to seven instances, named ``instance1`` to ``instance7``. Cluster creation ---------------- Follow the :doc:`install` document and prepare the nodes. Then it's time to initialise the cluster:: $ gnt-cluster init -s %192.0.2.1% --enabled-hypervisors=xen-pvm %example-cluster% $ The creation was fine. Let's check that one node we have is functioning correctly:: $ gnt-node list Node DTotal DFree MTotal MNode MFree Pinst Sinst node1 1.3T 1.3T 32.0G 1.0G 30.5G 0 0 $ gnt-cluster verify Mon Oct 26 02:08:51 2009 * Verifying global settings Mon Oct 26 02:08:51 2009 * Gathering data (1 nodes) Mon Oct 26 02:08:52 2009 * Verifying node status Mon Oct 26 02:08:52 2009 * Verifying instance status Mon Oct 26 02:08:52 2009 * Verifying orphan volumes Mon Oct 26 02:08:52 2009 * Verifying remaining instances Mon Oct 26 02:08:52 2009 * Verifying N+1 Memory redundancy Mon Oct 26 02:08:52 2009 * Other Notes Mon Oct 26 02:08:52 2009 * Hooks Results $ Since this proceeded correctly, let's add the other two nodes:: $ gnt-node add -s %192.0.2.2% %node2% -- WARNING -- Performing this operation is going to replace the ssh daemon keypair on the target machine (node2) with the ones of the current one and grant full intra-cluster ssh root access to/from it Unable to verify hostkey of host xen-devi-5.fra.corp.google.com: f7:…. Do you want to accept it? y/[n]/?: %y% Mon Oct 26 02:11:53 2009 Authentication to node2 via public key failed, trying password root password: Mon Oct 26 02:11:54 2009 - INFO: Node will be a master candidate $ gnt-node add -s %192.0.2.3% %node3% -- WARNING -- Performing this operation is going to replace the ssh daemon keypair on the target machine (node3) with the ones of the current one and grant full intra-cluster ssh root access to/from it … Mon Oct 26 02:12:43 2009 - INFO: Node will be a master candidate Checking the cluster status again:: $ gnt-node list Node DTotal DFree MTotal MNode MFree Pinst Sinst node1 1.3T 1.3T 32.0G 1.0G 30.5G 0 0 node2 1.3T 1.3T 32.0G 1.0G 30.5G 0 0 node3 1.3T 1.3T 32.0G 1.0G 30.5G 0 0 $ gnt-cluster verify Mon Oct 26 02:15:14 2009 * Verifying global settings Mon Oct 26 02:15:14 2009 * Gathering data (3 nodes) Mon Oct 26 02:15:16 2009 * Verifying node status Mon Oct 26 02:15:16 2009 * Verifying instance status Mon Oct 26 02:15:16 2009 * Verifying orphan volumes Mon Oct 26 02:15:16 2009 * Verifying remaining instances Mon Oct 26 02:15:16 2009 * Verifying N+1 Memory redundancy Mon Oct 26 02:15:16 2009 * Other Notes Mon Oct 26 02:15:16 2009 * Hooks Results $ And let's check that we have a valid OS:: $ gnt-os list Name debootstrap node1# Running a burn-in ----------------- Now that the cluster is created, it is time to check that the hardware works correctly, that the hypervisor can actually create instances, etc. This is done via the debootstrap tool as described in the admin guide. Similar output lines are replaced with ``…`` in the below log:: $ /usr/lib/ganeti/tools/burnin -o debootstrap -p instance{1..5} - Testing global parameters - Creating instances * instance instance1 on node1, node2 * instance instance2 on node2, node3 … * instance instance5 on node2, node3 * Submitted job ID(s) 157, 158, 159, 160, 161 waiting for job 157 for instance1 … waiting for job 161 for instance5 - Replacing disks on the same nodes * instance instance1 run replace_on_secondary run replace_on_primary … * instance instance5 run replace_on_secondary run replace_on_primary * Submitted job ID(s) 162, 163, 164, 165, 166 waiting for job 162 for instance1 … - Changing the secondary node * instance instance1 run replace_new_secondary node3 * instance instance2 run replace_new_secondary node1 … * instance instance5 run replace_new_secondary node1 * Submitted job ID(s) 167, 168, 169, 170, 171 waiting for job 167 for instance1 … - Growing disks * instance instance1 increase disk/0 by 128 MB … * instance instance5 increase disk/0 by 128 MB * Submitted job ID(s) 173, 174, 175, 176, 177 waiting for job 173 for instance1 … - Failing over instances * instance instance1 … * instance instance5 * Submitted job ID(s) 179, 180, 181, 182, 183 waiting for job 179 for instance1 … - Migrating instances * instance instance1 migration and migration cleanup … * instance instance5 migration and migration cleanup * Submitted job ID(s) 184, 185, 186, 187, 188 waiting for job 184 for instance1 … - Exporting and re-importing instances * instance instance1 export to node node3 remove instance import from node3 to node1, node2 remove export … * instance instance5 export to node node1 remove instance import from node1 to node2, node3 remove export * Submitted job ID(s) 196, 197, 198, 199, 200 waiting for job 196 for instance1 … - Reinstalling instances * instance instance1 reinstall without passing the OS reinstall specifying the OS … * instance instance5 reinstall without passing the OS reinstall specifying the OS * Submitted job ID(s) 203, 204, 205, 206, 207 waiting for job 203 for instance1 … - Rebooting instances * instance instance1 reboot with type 'hard' reboot with type 'soft' reboot with type 'full' … * instance instance5 reboot with type 'hard' reboot with type 'soft' reboot with type 'full' * Submitted job ID(s) 208, 209, 210, 211, 212 waiting for job 208 for instance1 … - Adding and removing disks * instance instance1 adding a disk removing last disk … * instance instance5 adding a disk removing last disk * Submitted job ID(s) 213, 214, 215, 216, 217 waiting for job 213 for instance1 … - Adding and removing NICs * instance instance1 adding a NIC removing last NIC … * instance instance5 adding a NIC removing last NIC * Submitted job ID(s) 218, 219, 220, 221, 222 waiting for job 218 for instance1 … - Activating/deactivating disks * instance instance1 activate disks when online activate disks when offline deactivate disks (when offline) … * instance instance5 activate disks when online activate disks when offline deactivate disks (when offline) * Submitted job ID(s) 223, 224, 225, 226, 227 waiting for job 223 for instance1 … - Stopping and starting instances * instance instance1 … * instance instance5 * Submitted job ID(s) 230, 231, 232, 233, 234 waiting for job 230 for instance1 … - Removing instances * instance instance1 … * instance instance5 * Submitted job ID(s) 235, 236, 237, 238, 239 waiting for job 235 for instance1 … $ You can see in the above what operations the burn-in does. Ideally, the burn-in log would proceed successfully through all the steps and end cleanly, without throwing errors. Instance operations ------------------- Creation ++++++++ At this point, Ganeti and the hardware seems to be functioning correctly, so we'll follow up with creating the instances manually:: $ gnt-instance add -t drbd -o debootstrap -s %256m% %instance1% Mon Oct 26 04:06:52 2009 - INFO: Selected nodes for instance instance1 via iallocator hail: node2, node3 Mon Oct 26 04:06:53 2009 * creating instance disks... Mon Oct 26 04:06:57 2009 adding instance instance1 to cluster config Mon Oct 26 04:06:57 2009 - INFO: Waiting for instance instance1 to sync disks. Mon Oct 26 04:06:57 2009 - INFO: - device disk/0: 20.00\% done, 4 estimated seconds remaining Mon Oct 26 04:07:01 2009 - INFO: Instance instance1's disks are in sync. Mon Oct 26 04:07:01 2009 creating os for instance instance1 on node node2 Mon Oct 26 04:07:01 2009 * running the instance OS create scripts... Mon Oct 26 04:07:14 2009 * starting instance... $ gnt-instance add -t drbd -o debootstrap -s %256m% -n %node1%:%node2% %instance2% Mon Oct 26 04:11:37 2009 * creating instance disks... Mon Oct 26 04:11:40 2009 adding instance instance2 to cluster config Mon Oct 26 04:11:41 2009 - INFO: Waiting for instance instance2 to sync disks. Mon Oct 26 04:11:41 2009 - INFO: - device disk/0: 35.40\% done, 1 estimated seconds remaining Mon Oct 26 04:11:42 2009 - INFO: - device disk/0: 58.50\% done, 1 estimated seconds remaining Mon Oct 26 04:11:43 2009 - INFO: - device disk/0: 86.20\% done, 0 estimated seconds remaining Mon Oct 26 04:11:44 2009 - INFO: - device disk/0: 92.40\% done, 0 estimated seconds remaining Mon Oct 26 04:11:44 2009 - INFO: - device disk/0: 97.00\% done, 0 estimated seconds remaining Mon Oct 26 04:11:44 2009 - INFO: Instance instance2's disks are in sync. Mon Oct 26 04:11:44 2009 creating os for instance instance2 on node node1 Mon Oct 26 04:11:44 2009 * running the instance OS create scripts... Mon Oct 26 04:11:57 2009 * starting instance... $ The above shows one instance created via an iallocator script, and one being created with manual node assignment. The other three instances were also created and now it's time to check them:: $ gnt-instance list Instance Hypervisor OS Primary_node Status Memory instance1 xen-pvm debootstrap node2 running 128M instance2 xen-pvm debootstrap node1 running 128M instance3 xen-pvm debootstrap node1 running 128M instance4 xen-pvm debootstrap node3 running 128M instance5 xen-pvm debootstrap node2 running 128M Accessing instances +++++++++++++++++++ Accessing an instance's console is easy:: $ gnt-instance console %instance2% [ 0.000000] Bootdata ok (command line is root=/dev/sda1 ro) [ 0.000000] Linux version 2.6… [ 0.000000] BIOS-provided physical RAM map: [ 0.000000] Xen: 0000000000000000 - 0000000008800000 (usable) [13138176.018071] Built 1 zonelists. Total pages: 34816 [13138176.018074] Kernel command line: root=/dev/sda1 ro [13138176.018694] Initializing CPU#0 … Checking file systems...fsck 1.41.3 (12-Oct-2008) done. Setting kernel variables (/etc/sysctl.conf)...done. Mounting local filesystems...done. Activating swapfile swap...done. Setting up networking.... Configuring network interfaces...done. Setting console screen modes and fonts. INIT: Entering runlevel: 2 Starting enhanced syslogd: rsyslogd. Starting periodic command scheduler: crond. Debian GNU/Linux 5.0 instance2 tty1 instance2 login: At this moment you can login to the instance and, after configuring the network (and doing this on all instances), we can check their connectivity:: $ fping %instance{1..5}% instance1 is alive instance2 is alive instance3 is alive instance4 is alive instance5 is alive $ Removal +++++++ Removing unwanted instances is also easy:: $ gnt-instance remove %instance5% This will remove the volumes of the instance instance5 (including mirrors), thus removing all the data of the instance. Continue? y/[n]/?: %y% $ Recovering from hardware failures --------------------------------- Recovering from node failure ++++++++++++++++++++++++++++ We are now left with four instances. Assume that at this point, node3, which has one primary and one secondary instance, crashes:: $ gnt-node info %node3% Node name: node3 primary ip: 198.51.100.1 secondary ip: 192.0.2.3 master candidate: True drained: False offline: False primary for instances: - instance4 secondary for instances: - instance1 $ fping %node3% node3 is unreachable At this point, the primary instance of that node (instance4) is down, but the secondary instance (instance1) is not affected except it has lost disk redundancy:: $ fping %instance{1,4}% instance1 is alive instance4 is unreachable $ If we try to check the status of instance4 via the instance info command, it fails because it tries to contact node3 which is down:: $ gnt-instance info %instance4% Failure: command execution error: Error checking node node3: Connection failed (113: No route to host) $ So we need to mark node3 as being *offline*, and thus Ganeti won't talk to it anymore:: $ gnt-node modify -O yes -f %node3% Mon Oct 26 04:34:12 2009 - WARNING: Not enough master candidates (desired 10, new value will be 2) Mon Oct 26 04:34:15 2009 - WARNING: Communication failure to node node3: Connection failed (113: No route to host) Modified node node3 - offline -> True - master_candidate -> auto-demotion due to offline $ And now we can failover the instance:: $ gnt-instance failover %instance4% Failover will happen to image instance4. This requires a shutdown of the instance. Continue? y/[n]/?: %y% Mon Oct 26 04:35:34 2009 * checking disk consistency between source and target Failure: command execution error: Disk disk/0 is degraded on target node, aborting failover. $ gnt-instance failover --ignore-consistency %instance4% Failover will happen to image instance4. This requires a shutdown of the instance. Continue? y/[n]/?: y Mon Oct 26 04:35:47 2009 * checking disk consistency between source and target Mon Oct 26 04:35:47 2009 * shutting down instance on source node Mon Oct 26 04:35:47 2009 - WARNING: Could not shutdown instance instance4 on node node3. Proceeding anyway. Please make sure node node3 is down. Error details: Node is marked offline Mon Oct 26 04:35:47 2009 * deactivating the instance's disks on source node Mon Oct 26 04:35:47 2009 - WARNING: Could not shutdown block device disk/0 on node node3: Node is marked offline Mon Oct 26 04:35:47 2009 * activating the instance's disks on target node Mon Oct 26 04:35:47 2009 - WARNING: Could not prepare block device disk/0 on node node3 (is_primary=False, pass=1): Node is marked offline Mon Oct 26 04:35:48 2009 * starting the instance on the target node $ Note in our first attempt, Ganeti refused to do the failover since it wasn't sure what is the status of the instance's disks. We pass the ``--ignore-consistency`` flag and then we can failover:: $ gnt-instance list Instance Hypervisor OS Primary_node Status Memory instance1 xen-pvm debootstrap node2 running 128M instance2 xen-pvm debootstrap node1 running 128M instance3 xen-pvm debootstrap node1 running 128M instance4 xen-pvm debootstrap node1 running 128M $ But at this point, both instance1 and instance4 are without disk redundancy:: $ gnt-instance info %instance1% Instance name: instance1 UUID: 45173e82-d1fa-417c-8758-7d582ab7eef4 Serial number: 2 Creation time: 2009-10-26 04:06:57 Modification time: 2009-10-26 04:07:14 State: configured to be up, actual state is up Nodes: - primary: node2 - secondaries: node3 Operating system: debootstrap Allocated network port: None Hypervisor: xen-pvm - root_path: default (/dev/sda1) - kernel_args: default (ro) - use_bootloader: default (False) - bootloader_args: default () - bootloader_path: default () - kernel_path: default (/boot/vmlinuz-2.6-xenU) - initrd_path: default () Hardware: - VCPUs: 1 - maxmem: 256MiB - minmem: 512MiB - NICs: - nic/0: MAC: aa:00:00:78:da:63, IP: None, mode: bridged, link: xen-br0 Disks: - disk/0: drbd8, size 256M access mode: rw nodeA: node2, minor=0 nodeB: node3, minor=0 port: 11035 auth key: 8e950e3cec6854b0181fbc3a6058657701f2d458 on primary: /dev/drbd0 (147:0) in sync, status *DEGRADED* child devices: - child 0: lvm, size 256M logical_id: xenvg/22459cf8-117d-4bea-a1aa-791667d07800.disk0_data on primary: /dev/xenvg/22459cf8-117d-4bea-a1aa-791667d07800.disk0_data (254:0) - child 1: lvm, size 128M logical_id: xenvg/22459cf8-117d-4bea-a1aa-791667d07800.disk0_meta on primary: /dev/xenvg/22459cf8-117d-4bea-a1aa-791667d07800.disk0_meta (254:1) The output is similar for instance4. In order to recover this, we need to run the node evacuate command which will change from the current secondary node to a new one (in this case, we only have two working nodes, so all instances will be end on nodes one and two):: $ gnt-node evacuate -I hail %node3% Relocate instance(s) 'instance1','instance4' from node node3 using iallocator hail? y/[n]/?: %y% Mon Oct 26 05:05:39 2009 - INFO: Selected new secondary for instance 'instance1': node1 Mon Oct 26 05:05:40 2009 - INFO: Selected new secondary for instance 'instance4': node2 Mon Oct 26 05:05:40 2009 Replacing disk(s) 0 for instance1 Mon Oct 26 05:05:40 2009 STEP 1/6 Check device existence Mon Oct 26 05:05:40 2009 - INFO: Checking disk/0 on node2 Mon Oct 26 05:05:40 2009 - INFO: Checking volume groups Mon Oct 26 05:05:40 2009 STEP 2/6 Check peer consistency Mon Oct 26 05:05:40 2009 - INFO: Checking disk/0 consistency on node node2 Mon Oct 26 05:05:40 2009 STEP 3/6 Allocate new storage Mon Oct 26 05:05:40 2009 - INFO: Adding new local storage on node1 for disk/0 Mon Oct 26 05:05:41 2009 STEP 4/6 Changing drbd configuration Mon Oct 26 05:05:41 2009 - INFO: activating a new drbd on node1 for disk/0 Mon Oct 26 05:05:42 2009 - INFO: Shutting down drbd for disk/0 on old node Mon Oct 26 05:05:42 2009 - WARNING: Failed to shutdown drbd for disk/0 on oldnode: Node is marked offline Mon Oct 26 05:05:42 2009 Hint: Please cleanup this device manually as soon as possible Mon Oct 26 05:05:42 2009 - INFO: Detaching primary drbds from the network (=> standalone) Mon Oct 26 05:05:42 2009 - INFO: Updating instance configuration Mon Oct 26 05:05:45 2009 - INFO: Attaching primary drbds to new secondary (standalone => connected) Mon Oct 26 05:05:46 2009 STEP 5/6 Sync devices Mon Oct 26 05:05:46 2009 - INFO: Waiting for instance instance1 to sync disks. Mon Oct 26 05:05:46 2009 - INFO: - device disk/0: 13.90\% done, 7 estimated seconds remaining Mon Oct 26 05:05:53 2009 - INFO: Instance instance1's disks are in sync. Mon Oct 26 05:05:53 2009 STEP 6/6 Removing old storage Mon Oct 26 05:05:53 2009 - INFO: Remove logical volumes for 0 Mon Oct 26 05:05:53 2009 - WARNING: Can't remove old LV: Node is marked offline Mon Oct 26 05:05:53 2009 Hint: remove unused LVs manually Mon Oct 26 05:05:53 2009 - WARNING: Can't remove old LV: Node is marked offline Mon Oct 26 05:05:53 2009 Hint: remove unused LVs manually Mon Oct 26 05:05:53 2009 Replacing disk(s) 0 for instance4 Mon Oct 26 05:05:53 2009 STEP 1/6 Check device existence Mon Oct 26 05:05:53 2009 - INFO: Checking disk/0 on node1 Mon Oct 26 05:05:53 2009 - INFO: Checking volume groups Mon Oct 26 05:05:53 2009 STEP 2/6 Check peer consistency Mon Oct 26 05:05:53 2009 - INFO: Checking disk/0 consistency on node node1 Mon Oct 26 05:05:54 2009 STEP 3/6 Allocate new storage Mon Oct 26 05:05:54 2009 - INFO: Adding new local storage on node2 for disk/0 Mon Oct 26 05:05:54 2009 STEP 4/6 Changing drbd configuration Mon Oct 26 05:05:54 2009 - INFO: activating a new drbd on node2 for disk/0 Mon Oct 26 05:05:55 2009 - INFO: Shutting down drbd for disk/0 on old node Mon Oct 26 05:05:55 2009 - WARNING: Failed to shutdown drbd for disk/0 on oldnode: Node is marked offline Mon Oct 26 05:05:55 2009 Hint: Please cleanup this device manually as soon as possible Mon Oct 26 05:05:55 2009 - INFO: Detaching primary drbds from the network (=> standalone) Mon Oct 26 05:05:55 2009 - INFO: Updating instance configuration Mon Oct 26 05:05:55 2009 - INFO: Attaching primary drbds to new secondary (standalone => connected) Mon Oct 26 05:05:56 2009 STEP 5/6 Sync devices Mon Oct 26 05:05:56 2009 - INFO: Waiting for instance instance4 to sync disks. Mon Oct 26 05:05:56 2009 - INFO: - device disk/0: 12.40\% done, 8 estimated seconds remaining Mon Oct 26 05:06:04 2009 - INFO: Instance instance4's disks are in sync. Mon Oct 26 05:06:04 2009 STEP 6/6 Removing old storage Mon Oct 26 05:06:04 2009 - INFO: Remove logical volumes for 0 Mon Oct 26 05:06:04 2009 - WARNING: Can't remove old LV: Node is marked offline Mon Oct 26 05:06:04 2009 Hint: remove unused LVs manually Mon Oct 26 05:06:04 2009 - WARNING: Can't remove old LV: Node is marked offline Mon Oct 26 05:06:04 2009 Hint: remove unused LVs manually $ And now node3 is completely free of instances and can be repaired:: $ gnt-node list Node DTotal DFree MTotal MNode MFree Pinst Sinst node1 1.3T 1.3T 32.0G 1.0G 30.2G 3 1 node2 1.3T 1.3T 32.0G 1.0G 30.4G 1 3 node3 ? ? ? ? ? 0 0 Re-adding a node to the cluster ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Let's say node3 has been repaired and is now ready to be reused. Re-adding it is simple:: $ gnt-node add --readd %node3% The authenticity of host 'node3 (198.51.100.1)' can't be established. RSA key fingerprint is 9f:2e:5a:2e:e0:bd:00:09:e4:5c:32:f2:27:57:7a:f4. Are you sure you want to continue connecting (yes/no)? yes Mon Oct 26 05:27:39 2009 - INFO: Readding a node, the offline/drained flags were reset Mon Oct 26 05:27:39 2009 - INFO: Node will be a master candidate And it is now working again:: $ gnt-node list Node DTotal DFree MTotal MNode MFree Pinst Sinst node1 1.3T 1.3T 32.0G 1.0G 30.2G 3 1 node2 1.3T 1.3T 32.0G 1.0G 30.4G 1 3 node3 1.3T 1.3T 32.0G 1.0G 30.4G 0 0 .. note:: If Ganeti has been built with the htools component enabled, you can shuffle the instances around to have a better use of the nodes. Disk failures +++++++++++++ A disk failure is simpler than a full node failure. First, a single disk failure should not cause data-loss for any redundant instance; only the performance of some instances might be reduced due to more network traffic. Let take the cluster status in the above listing, and check what volumes are in use:: $ gnt-node volumes -o phys,instance %node2% PhysDev Instance /dev/sdb1 instance4 /dev/sdb1 instance4 /dev/sdb1 instance1 /dev/sdb1 instance1 /dev/sdb1 instance3 /dev/sdb1 instance3 /dev/sdb1 instance2 /dev/sdb1 instance2 $ You can see that all instances on node2 have logical volumes on ``/dev/sdb1``. Let's simulate a disk failure on that disk:: $ ssh node2 # on node2 $ echo offline > /sys/block/sdb/device/state $ vgs /dev/sdb1: read failed after 0 of 4096 at 0: Input/output error /dev/sdb1: read failed after 0 of 4096 at 750153695232: Input/output error /dev/sdb1: read failed after 0 of 4096 at 0: Input/output error Couldn't find device with uuid '954bJA-mNL0-7ydj-sdpW-nc2C-ZrCi-zFp91c'. Couldn't find all physical volumes for volume group xenvg. /dev/sdb1: read failed after 0 of 4096 at 0: Input/output error /dev/sdb1: read failed after 0 of 4096 at 0: Input/output error Couldn't find device with uuid '954bJA-mNL0-7ydj-sdpW-nc2C-ZrCi-zFp91c'. Couldn't find all physical volumes for volume group xenvg. Volume group xenvg not found $ At this point, the node is broken and if we are to examine instance2 we get (simplified output shown):: $ gnt-instance info %instance2% Instance name: instance2 State: configured to be up, actual state is up Nodes: - primary: node1 - secondaries: node2 Disks: - disk/0: drbd8, size 256M on primary: /dev/drbd0 (147:0) in sync, status ok on secondary: /dev/drbd1 (147:1) in sync, status *DEGRADED* *MISSING DISK* This instance has a secondary only on node2. Let's verify a primary instance of node2:: $ gnt-instance info %instance1% Instance name: instance1 State: configured to be up, actual state is up Nodes: - primary: node2 - secondaries: node1 Disks: - disk/0: drbd8, size 256M on primary: /dev/drbd0 (147:0) in sync, status *DEGRADED* *MISSING DISK* on secondary: /dev/drbd3 (147:3) in sync, status ok $ gnt-instance console %instance1% Debian GNU/Linux 5.0 instance1 tty1 instance1 login: root Last login: Tue Oct 27 01:24:09 UTC 2009 on tty1 instance1:~# date > test instance1:~# sync instance1:~# cat test Tue Oct 27 01:25:20 UTC 2009 instance1:~# dmesg|tail [5439785.235448] NET: Registered protocol family 15 [5439785.235489] 802.1Q VLAN Support v1.8 Ben Greear [5439785.235495] All bugs added by David S. Miller [5439785.235517] XENBUS: Device with no driver: device/console/0 [5439785.236576] kjournald starting. Commit interval 5 seconds [5439785.236588] EXT3-fs: mounted filesystem with ordered data mode. [5439785.236625] VFS: Mounted root (ext3 filesystem) readonly. [5439785.236663] Freeing unused kernel memory: 172k freed [5439787.533779] EXT3 FS on sda1, internal journal [5440655.065431] eth0: no IPv6 routers present instance1:~# As you can see, the instance is running fine and doesn't see any disk issues. It is now time to fix node2 and re-establish redundancy for the involved instances. .. note:: For Ganeti 2.0 we need to fix manually the volume group on node2 by running ``vgreduce --removemissing xenvg`` :: $ gnt-node repair-storage %node2% lvm-vg %xenvg% Mon Oct 26 18:14:03 2009 Repairing storage unit 'xenvg' on node2 ... $ ssh %node2% vgs VG #PV #LV #SN Attr VSize VFree xenvg 1 8 0 wz--n- 673.84G 673.84G $ This has removed the 'bad' disk from the volume group, which is now left with only one PV. We can now replace the disks for the involved instances:: $ for i in %instance{1..4}%; do gnt-instance replace-disks -a $i; done Mon Oct 26 18:15:38 2009 Replacing disk(s) 0 for instance1 Mon Oct 26 18:15:38 2009 STEP 1/6 Check device existence Mon Oct 26 18:15:38 2009 - INFO: Checking disk/0 on node1 Mon Oct 26 18:15:38 2009 - INFO: Checking disk/0 on node2 Mon Oct 26 18:15:38 2009 - INFO: Checking volume groups Mon Oct 26 18:15:38 2009 STEP 2/6 Check peer consistency Mon Oct 26 18:15:38 2009 - INFO: Checking disk/0 consistency on node node1 Mon Oct 26 18:15:39 2009 STEP 3/6 Allocate new storage Mon Oct 26 18:15:39 2009 - INFO: Adding storage on node2 for disk/0 Mon Oct 26 18:15:39 2009 STEP 4/6 Changing drbd configuration Mon Oct 26 18:15:39 2009 - INFO: Detaching disk/0 drbd from local storage Mon Oct 26 18:15:40 2009 - INFO: Renaming the old LVs on the target node Mon Oct 26 18:15:40 2009 - INFO: Renaming the new LVs on the target node Mon Oct 26 18:15:40 2009 - INFO: Adding new mirror component on node2 Mon Oct 26 18:15:41 2009 STEP 5/6 Sync devices Mon Oct 26 18:15:41 2009 - INFO: Waiting for instance instance1 to sync disks. Mon Oct 26 18:15:41 2009 - INFO: - device disk/0: 12.40\% done, 9 estimated seconds remaining Mon Oct 26 18:15:50 2009 - INFO: Instance instance1's disks are in sync. Mon Oct 26 18:15:50 2009 STEP 6/6 Removing old storage Mon Oct 26 18:15:50 2009 - INFO: Remove logical volumes for disk/0 Mon Oct 26 18:15:52 2009 Replacing disk(s) 0 for instance2 Mon Oct 26 18:15:52 2009 STEP 1/6 Check device existence … Mon Oct 26 18:16:01 2009 STEP 6/6 Removing old storage Mon Oct 26 18:16:01 2009 - INFO: Remove logical volumes for disk/0 Mon Oct 26 18:16:02 2009 Replacing disk(s) 0 for instance3 Mon Oct 26 18:16:02 2009 STEP 1/6 Check device existence … Mon Oct 26 18:16:09 2009 STEP 6/6 Removing old storage Mon Oct 26 18:16:09 2009 - INFO: Remove logical volumes for disk/0 Mon Oct 26 18:16:10 2009 Replacing disk(s) 0 for instance4 Mon Oct 26 18:16:10 2009 STEP 1/6 Check device existence … Mon Oct 26 18:16:18 2009 STEP 6/6 Removing old storage Mon Oct 26 18:16:18 2009 - INFO: Remove logical volumes for disk/0 $ As this point, all instances should be healthy again. .. note:: Ganeti 2.0 doesn't have the ``-a`` option to replace-disks, so for it you have to run the loop twice, once over primary instances with argument ``-p`` and once secondary instances with argument ``-s``, but otherwise the operations are similar:: $ gnt-instance replace-disks -p instance1 … $ for i in %instance{2..4}%; do gnt-instance replace-disks -s $i; done Common cluster problems ----------------------- There are a number of small issues that might appear on a cluster that can be solved easily as long as the issue is properly identified. For this exercise we will consider the case of node3, which was broken previously and re-added to the cluster without reinstallation. Running cluster verify on the cluster reports:: $ gnt-cluster verify Mon Oct 26 18:30:08 2009 * Verifying global settings Mon Oct 26 18:30:08 2009 * Gathering data (3 nodes) Mon Oct 26 18:30:10 2009 * Verifying node status Mon Oct 26 18:30:10 2009 - ERROR: node node3: unallocated drbd minor 0 is in use Mon Oct 26 18:30:10 2009 - ERROR: node node3: unallocated drbd minor 1 is in use Mon Oct 26 18:30:10 2009 * Verifying instance status Mon Oct 26 18:30:10 2009 - ERROR: instance instance4: instance should not run on node node3 Mon Oct 26 18:30:10 2009 * Verifying orphan volumes Mon Oct 26 18:30:10 2009 - ERROR: node node3: volume 22459cf8-117d-4bea-a1aa-791667d07800.disk0_data is unknown Mon Oct 26 18:30:10 2009 - ERROR: node node3: volume 1aaf4716-e57f-4101-a8d6-03af5da9dc50.disk0_data is unknown Mon Oct 26 18:30:10 2009 - ERROR: node node3: volume 1aaf4716-e57f-4101-a8d6-03af5da9dc50.disk0_meta is unknown Mon Oct 26 18:30:10 2009 - ERROR: node node3: volume 22459cf8-117d-4bea-a1aa-791667d07800.disk0_meta is unknown Mon Oct 26 18:30:10 2009 * Verifying remaining instances Mon Oct 26 18:30:10 2009 * Verifying N+1 Memory redundancy Mon Oct 26 18:30:10 2009 * Other Notes Mon Oct 26 18:30:10 2009 * Hooks Results $ Instance status +++++++++++++++ As you can see, *instance4* has a copy running on node3, because we forced the failover when node3 failed. This case is dangerous as the instance will have the same IP and MAC address, wreaking havoc on the network environment and anyone who tries to use it. Ganeti doesn't directly handle this case. It is recommended to logon to node3 and run:: $ xm destroy %instance4% Unallocated DRBD minors +++++++++++++++++++++++ There are still unallocated DRBD minors on node3. Again, these are not handled by Ganeti directly and need to be cleaned up via DRBD commands:: $ ssh %node3% # on node 3 $ drbdsetup /dev/drbd%0% down $ drbdsetup /dev/drbd%1% down $ Orphan volumes ++++++++++++++ At this point, the only remaining problem should be the so-called *orphan* volumes. This can happen also in the case of an aborted disk-replace, or similar situation where Ganeti was not able to recover automatically. Here you need to remove them manually via LVM commands:: $ ssh %node3% # on node3 $ lvremove %xenvg% Do you really want to remove active logical volume "22459cf8-117d-4bea-a1aa-791667d07800.disk0_data"? [y/n]: %y% Logical volume "22459cf8-117d-4bea-a1aa-791667d07800.disk0_data" successfully removed Do you really want to remove active logical volume "22459cf8-117d-4bea-a1aa-791667d07800.disk0_meta"? [y/n]: %y% Logical volume "22459cf8-117d-4bea-a1aa-791667d07800.disk0_meta" successfully removed Do you really want to remove active logical volume "1aaf4716-e57f-4101-a8d6-03af5da9dc50.disk0_data"? [y/n]: %y% Logical volume "1aaf4716-e57f-4101-a8d6-03af5da9dc50.disk0_data" successfully removed Do you really want to remove active logical volume "1aaf4716-e57f-4101-a8d6-03af5da9dc50.disk0_meta"? [y/n]: %y% Logical volume "1aaf4716-e57f-4101-a8d6-03af5da9dc50.disk0_meta" successfully removed node3# At this point cluster verify shouldn't complain anymore:: $ gnt-cluster verify Mon Oct 26 18:37:51 2009 * Verifying global settings Mon Oct 26 18:37:51 2009 * Gathering data (3 nodes) Mon Oct 26 18:37:53 2009 * Verifying node status Mon Oct 26 18:37:53 2009 * Verifying instance status Mon Oct 26 18:37:53 2009 * Verifying orphan volumes Mon Oct 26 18:37:53 2009 * Verifying remaining instances Mon Oct 26 18:37:53 2009 * Verifying N+1 Memory redundancy Mon Oct 26 18:37:53 2009 * Other Notes Mon Oct 26 18:37:53 2009 * Hooks Results $ N+1 errors ++++++++++ Since redundant instances in Ganeti have a primary/secondary model, it is needed to leave aside on each node enough memory so that if one of its peer node fails, all the secondary instances that have that node as primary can be relocated. More specifically, if instance2 has node1 as primary and node2 as secondary (and node1 and node2 do not have any other instances in this layout), then it means that node2 must have enough free memory so that if node1 fails, we can failover instance2 without any other operations (for reducing the downtime window). Let's increase the memory of the current instances to 4G, and add three new instances, two on node2:node3 with 8GB of RAM and one on node1:node2, with 12GB of RAM (numbers chosen so that we run out of memory):: $ gnt-instance modify -B memory=%4G% %instance1% Modified instance instance1 - be/maxmem -> 4096 - be/minmem -> 4096 Please don't forget that these parameters take effect only at the next start of the instance. $ gnt-instance modify … $ gnt-instance add -t drbd -n %node2%:%node3% -s %512m% -B memory=%8G% -o %debootstrap% %instance5% … $ gnt-instance add -t drbd -n %node2%:%node3% -s %512m% -B memory=%8G% -o %debootstrap% %instance6% … $ gnt-instance add -t drbd -n %node1%:%node2% -s %512m% -B memory=%8G% -o %debootstrap% %instance7% $ gnt-instance reboot --all The reboot will operate on 7 instances. Do you want to continue? Affected instances: instance1 instance2 instance3 instance4 instance5 instance6 instance7 y/[n]/?: %y% Submitted jobs 677, 678, 679, 680, 681, 682, 683 Waiting for job 677 for instance1... Waiting for job 678 for instance2... Waiting for job 679 for instance3... Waiting for job 680 for instance4... Waiting for job 681 for instance5... Waiting for job 682 for instance6... Waiting for job 683 for instance7... $ We rebooted the instances for the memory changes to have effect. Now the cluster looks like:: $ gnt-node list Node DTotal DFree MTotal MNode MFree Pinst Sinst node1 1.3T 1.3T 32.0G 1.0G 6.5G 4 1 node2 1.3T 1.3T 32.0G 1.0G 10.5G 3 4 node3 1.3T 1.3T 32.0G 1.0G 30.5G 0 2 $ gnt-cluster verify Mon Oct 26 18:59:36 2009 * Verifying global settings Mon Oct 26 18:59:36 2009 * Gathering data (3 nodes) Mon Oct 26 18:59:37 2009 * Verifying node status Mon Oct 26 18:59:37 2009 * Verifying instance status Mon Oct 26 18:59:37 2009 * Verifying orphan volumes Mon Oct 26 18:59:37 2009 * Verifying remaining instances Mon Oct 26 18:59:37 2009 * Verifying N+1 Memory redundancy Mon Oct 26 18:59:37 2009 - ERROR: node node2: not enough memory to accommodate instance failovers should node node1 fail Mon Oct 26 18:59:37 2009 * Other Notes Mon Oct 26 18:59:37 2009 * Hooks Results $ The cluster verify error above shows that if node1 fails, node2 will not have enough memory to failover all primary instances on node1 to it. To solve this, you have a number of options: - try to manually move instances around (but this can become complicated for any non-trivial cluster) - try to reduce the minimum memory of some instances on the source node of the N+1 failure (in the example above ``node1``): this will allow it to start and be failed over/migrated with less than its maximum memory - try to reduce the runtime/maximum memory of some instances on the destination node of the N+1 failure (in the example above ``node2``) to create additional available node memory (check the :doc:`admin` guide for what Ganeti will and won't automatically do in regards to instance runtime memory modification) - if Ganeti has been built with the htools package enabled, you can run the ``hbal`` tool which will try to compute an automated cluster solution that complies with the N+1 rule Network issues ++++++++++++++ In case a node has problems with the network (usually the secondary network, as problems with the primary network will render the node unusable for ganeti commands), it will show up in cluster verify as:: $ gnt-cluster verify Mon Oct 26 19:07:19 2009 * Verifying global settings Mon Oct 26 19:07:19 2009 * Gathering data (3 nodes) Mon Oct 26 19:07:23 2009 * Verifying node status Mon Oct 26 19:07:23 2009 - ERROR: node node1: tcp communication with node 'node3': failure using the secondary interface(s) Mon Oct 26 19:07:23 2009 - ERROR: node node2: tcp communication with node 'node3': failure using the secondary interface(s) Mon Oct 26 19:07:23 2009 - ERROR: node node3: tcp communication with node 'node1': failure using the secondary interface(s) Mon Oct 26 19:07:23 2009 - ERROR: node node3: tcp communication with node 'node2': failure using the secondary interface(s) Mon Oct 26 19:07:23 2009 - ERROR: node node3: tcp communication with node 'node3': failure using the secondary interface(s) Mon Oct 26 19:07:23 2009 * Verifying instance status Mon Oct 26 19:07:23 2009 * Verifying orphan volumes Mon Oct 26 19:07:23 2009 * Verifying remaining instances Mon Oct 26 19:07:23 2009 * Verifying N+1 Memory redundancy Mon Oct 26 19:07:23 2009 * Other Notes Mon Oct 26 19:07:23 2009 * Hooks Results $ This shows that both node1 and node2 have problems contacting node3 over the secondary network, and node3 has problems contacting them. From this output is can be deduced that since node1 and node2 can communicate between themselves, node3 is the one having problems, and you need to investigate its network settings/connection. Migration problems ++++++++++++++++++ Since live migration can sometimes fail and leave the instance in an inconsistent state, Ganeti provides a ``--cleanup`` argument to the migrate command that does: - check on which node the instance is actually running (has the command failed before or after the actual migration?) - reconfigure the DRBD disks accordingly It is always safe to run this command as long as the instance has good data on its primary node (i.e. not showing as degraded). If so, you can simply run:: $ gnt-instance migrate --cleanup %instance1% Instance instance1 will be recovered from a failed migration. Note that the migration procedure (including cleanup) is **experimental** in this version. This might impact the instance if anything goes wrong. Continue? y/[n]/?: %y% Mon Oct 26 19:13:49 2009 Migrating instance instance1 Mon Oct 26 19:13:49 2009 * checking where the instance actually runs (if this hangs, the hypervisor might be in a bad state) Mon Oct 26 19:13:49 2009 * instance confirmed to be running on its primary node (node2) Mon Oct 26 19:13:49 2009 * switching node node1 to secondary mode Mon Oct 26 19:13:50 2009 * wait until resync is done Mon Oct 26 19:13:50 2009 * changing into standalone mode Mon Oct 26 19:13:50 2009 * changing disks into single-master mode Mon Oct 26 19:13:50 2009 * wait until resync is done Mon Oct 26 19:13:51 2009 * done $ In use disks at instance shutdown +++++++++++++++++++++++++++++++++ If you see something like the following when trying to shutdown or deactivate disks for an instance:: $ gnt-instance shutdown %instance1% Mon Oct 26 19:16:23 2009 - WARNING: Could not shutdown block device disk/0 on node node2: drbd0: can't shutdown drbd device: /dev/drbd0: State change failed: (-12) Device is held open by someone\n It most likely means something is holding open the underlying DRBD device. This can be bad if the instance is not running, as it might mean that there was concurrent access from both the node and the instance to the disks, but not always (e.g. you could only have had the partitions activated via ``kpartx``). To troubleshoot this issue you need to follow standard Linux practices, and pay attention to the hypervisor being used: - check if (in the above example) ``/dev/drbd0`` on node2 is being mounted somewhere (``cat /proc/mounts``) - check if the device is not being used by device mapper itself: ``dmsetup ls`` and look for entries of the form ``drbd0pX``, and if so remove them with either ``kpartx -d`` or ``dmsetup remove`` For Xen, check if it's not using the disks itself:: $ xenstore-ls /local/domain/%0%/backend/vbd|grep -e "domain =" -e physical-device domain = "instance2" physical-device = "93:0" domain = "instance3" physical-device = "93:1" domain = "instance4" physical-device = "93:2" $ You can see in the above output that the node exports three disks, to three instances. The ``physical-device`` key is in major:minor format in hexadecimal, and ``0x93`` represents DRBD's major number. Thus we can see from the above that instance2 has /dev/drbd0, instance3 /dev/drbd1, and instance4 /dev/drbd2. LUXI version mismatch +++++++++++++++++++++ LUXI is the protocol used for communication between clients and the master daemon. Starting in Ganeti 2.3, the peers exchange their version in each message. When they don't match, an error is raised:: $ gnt-node modify -O yes %node3% Unhandled Ganeti error: LUXI version mismatch, server 2020000, request 2030000 Usually this means that server and client are from different Ganeti versions or import their libraries from different, consistent paths (e.g. an older version installed in another place). You can print the import path for Ganeti's modules using the following command (note that depending on your setup you might have to use an explicit version in the Python command, e.g. ``python2.6``):: python -c 'import ganeti; print ganeti.__file__' .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/install.rst0000644000000000000000000007306312271422343015750 0ustar00rootroot00000000000000Ganeti installation tutorial ============================ Documents Ganeti version |version| .. contents:: .. highlight:: shell-example Introduction ------------ Ganeti is a cluster virtualization management system based on Xen or KVM. This document explains how to bootstrap a Ganeti node (Xen *dom0*, the host Linux system for KVM), create a running cluster and install virtual instances (Xen *domUs*, KVM guests). You need to repeat most of the steps in this document for every node you want to install, but of course we recommend creating some semi-automatic procedure if you plan to deploy Ganeti on a medium/large scale. A basic Ganeti terminology glossary is provided in the introductory section of the :doc:`admin`. Please refer to that document if you are uncertain about the terms we are using. Ganeti has been developed for Linux and should be distribution-agnostic. This documentation will use Debian Squeeze as an example system but the examples can be translated to any other distribution. You are expected to be familiar with your distribution, its package management system, and Xen or KVM before trying to use Ganeti. This document is divided into two main sections: - Installation of the base system and base components - Configuration of the environment for Ganeti Each of these is divided into sub-sections. While a full Ganeti system will need all of the steps specified, some are not strictly required for every environment. Which ones they are, and why, is specified in the corresponding sections. Installing the base system and base components ---------------------------------------------- Hardware requirements +++++++++++++++++++++ Any system supported by your Linux distribution is fine. 64-bit systems are better as they can support more memory. Any disk drive recognized by Linux (``IDE``/``SCSI``/``SATA``/etc.) is supported in Ganeti. Note that no shared storage (e.g. ``SAN``) is needed to get high-availability features (but of course, one can be used to store the images). Whilte it is highly recommended to use more than one disk drive in order to improve speed, Ganeti also works with one disk per machine. Installing the base system ++++++++++++++++++++++++++ **Mandatory** on all nodes. It is advised to start with a clean, minimal install of the operating system. The only requirement you need to be aware of at this stage is to partition leaving enough space for a big (**minimum** 20GiB) LVM volume group which will then host your instance filesystems, if you want to use all Ganeti features. The volume group name Ganeti uses (by default) is ``xenvg``. You can also use file-based storage only, without LVM, but this setup is not detailed in this document. If you choose to use RBD-based instances, there's no need for LVM provisioning. However, this feature is experimental, and is not yet recommended for production clusters. While you can use an existing system, please note that the Ganeti installation is intrusive in terms of changes to the system configuration, and it's best to use a newly-installed system without important data on it. Also, for best results, it's advised that the nodes have as much as possible the same hardware and software configuration. This will make administration much easier. Hostname issues ~~~~~~~~~~~~~~~ Note that Ganeti requires the hostnames of the systems (i.e. what the ``hostname`` command outputs to be a fully-qualified name, not a short name. In other words, you should use *node1.example.com* as a hostname and not just *node1*. .. admonition:: Debian Debian usually configures the hostname differently than you need it for Ganeti. For example, this is what it puts in ``/etc/hosts`` in certain situations:: 127.0.0.1 localhost 127.0.1.1 node1.example.com node1 but for Ganeti you need to have:: 127.0.0.1 localhost 192.0.2.1 node1.example.com node1 replacing ``192.0.2.1`` with your node's address. Also, the file ``/etc/hostname`` which configures the hostname of the system should contain ``node1.example.com`` and not just ``node1`` (you need to run the command ``/etc/init.d/hostname.sh start`` after changing the file). .. admonition:: Why a fully qualified host name Although most distributions use only the short name in the /etc/hostname file, we still think Ganeti nodes should use the full name. The reason for this is that calling 'hostname --fqdn' requires the resolver library to work and is a 'guess' via heuristics at what is your domain name. Since Ganeti can be used among other things to host DNS servers, we don't want to depend on them as much as possible, and we'd rather have the uname() syscall return the full node name. We haven't ever found any breakage in using a full hostname on a Linux system, and anyway we recommend to have only a minimal installation on Ganeti nodes, and to use instances (or other dedicated machines) to run the rest of your network services. By doing this you can change the /etc/hostname file to contain an FQDN without the fear of breaking anything unrelated. Installing The Hypervisor +++++++++++++++++++++++++ **Mandatory** on all nodes. While Ganeti is developed with the ability to modularly run on different virtualization environments in mind the only two currently useable on a live system are Xen and KVM. Supported Xen versions are: 3.0.3 and later 3.x versions, and 4.x (tested up to 4.1). Supported KVM versions are 72 and above. Please follow your distribution's recommended way to install and set up Xen, or install Xen from the upstream source, if you wish, following their manual. For KVM, make sure you have a KVM-enabled kernel and the KVM tools. After installing Xen, you need to reboot into your new system. On some distributions this might involve configuring GRUB appropriately, whereas others will configure it automatically when you install the respective kernels. For KVM no reboot should be necessary. .. admonition:: Xen on Debian Under Debian you can install the relevant ``xen-linux-system`` package, which will pull in both the hypervisor and the relevant kernel. Also, if you are installing a 32-bit system, you should install the ``libc6-xen`` package (run ``apt-get install libc6-xen``). Xen settings ~~~~~~~~~~~~ It's recommended that dom0 is restricted to a low amount of memory (512MiB or 1GiB is reasonable) and that memory ballooning is disabled in the file ``/etc/xen/xend-config.sxp`` by setting the value ``dom0-min-mem`` to 0, like this:: (dom0-min-mem 0) For optimum performance when running both CPU and I/O intensive instances, it's also recommended that the dom0 is restricted to one CPU only. For example you can add ``dom0_max_vcpus=1,dom0_vcpus_pin`` to your kernels boot command line and set ``dom0-cpus`` in ``/etc/xen/xend-config.sxp`` like this:: (dom0-cpus 1) It is recommended that you disable xen's automatic save of virtual machines at system shutdown and subsequent restore of them at reboot. To obtain this make sure the variable ``XENDOMAINS_SAVE`` in the file ``/etc/default/xendomains`` is set to an empty value. If you want to use live migration make sure you have, in the xen config file, something that allows the nodes to migrate instances between each other. For example: .. code-block:: text (xend-relocation-server yes) (xend-relocation-port 8002) (xend-relocation-address '') (xend-relocation-hosts-allow '^192\\.0\\.2\\.[0-9]+$') The second line assumes that the hypervisor parameter ``migration_port`` is set 8002, otherwise modify it to match. The last line assumes that all your nodes have secondary IPs in the 192.0.2.0/24 network, adjust it accordingly to your setup. If you want to run HVM instances too with Ganeti and want VNC access to the console of your instances, set the following two entries in ``/etc/xen/xend-config.sxp``: .. code-block:: text (vnc-listen '0.0.0.0') (vncpasswd '') You need to restart the Xen daemon for these settings to take effect:: $ /etc/init.d/xend restart Selecting the instance kernel ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ After you have installed Xen, you need to tell Ganeti exactly what kernel to use for the instances it will create. This is done by creating a symlink from your actual kernel to ``/boot/vmlinuz-3-xenU``, and one from your initrd to ``/boot/initrd-3-xenU`` [#defkernel]_. Note that if you don't use an initrd for the domU kernel, you don't need to create the initrd symlink. .. admonition:: Debian After installation of the ``xen-linux-system`` package, you need to run (replace the exact version number with the one you have):: $ cd /boot $ ln -s vmlinuz-%2.6.26-1%-xen-amd64 vmlinuz-3-xenU $ ln -s initrd.img-%2.6.26-1%-xen-amd64 initrd-3-xenU By default, the initrd doesn't contain the Xen block drivers needed to mount the root device, so it is recommended to update the initrd by following these two steps: - edit ``/etc/initramfs-tools/modules`` and add ``xen_blkfront`` - run ``update-initramfs -u`` Installing DRBD +++++++++++++++ Recommended on all nodes: DRBD_ is required if you want to use the high availability (HA) features of Ganeti, but optional if you don't require them or only run Ganeti on single-node clusters. You can upgrade a non-HA cluster to an HA one later, but you might need to convert all your instances to DRBD to take advantage of the new features. .. _DRBD: http://www.drbd.org/ Supported DRBD versions: 8.0-8.3. It's recommended to have at least version 8.0.12. Note that for version 8.2 and newer it is needed to pass the ``usermode_helper=/bin/true`` parameter to the module, either by configuring ``/etc/modules`` or when inserting it manually. Now the bad news: unless your distribution already provides it installing DRBD might involve recompiling your kernel or anyway fiddling with it. Hopefully at least the Xen-ified kernel source to start from will be provided (if you intend to use Xen). The good news is that you don't need to configure DRBD at all. Ganeti will do it for you for every instance you set up. If you have the DRBD utils installed and the module in your kernel you're fine. Please check that your system is configured to load the module at every boot, and that it passes the following option to the module: ``minor_count=NUMBER``. We recommend that you use 128 as the value of the minor_count - this will allow you to use up to 64 instances in total per node (both primary and secondary, when using only one disk per instance). You can increase the number up to 255 if you need more instances on a node. .. admonition:: Debian On Debian, you can just install (build) the DRBD module with the following commands, making sure you are running the target (Xen or KVM) kernel:: $ apt-get install drbd8-source drbd8-utils $ m-a update $ m-a a-i drbd8 Or on newer versions, if the kernel already has modules: $ apt-get install drbd8-utils Then to configure it for Ganeti:: $ echo drbd minor_count=128 usermode_helper=/bin/true >> /etc/modules $ depmod -a $ modprobe drbd minor_count=128 usermode_helper=/bin/true It is also recommended that you comment out the default resources (if any) in the ``/etc/drbd.conf`` file, so that the init script doesn't try to configure any drbd devices. You can do this by prefixing all *resource* lines in the file with the keyword *skip*, like this: .. code-block:: text skip { resource r0 { ... } } skip { resource "r1" { ... } } Installing RBD ++++++++++++++ Recommended on all nodes: RBD_ is required if you want to create instances with RBD disks residing inside a RADOS cluster (make use of the rbd disk template). RBD-based instances can failover or migrate to any other node in the ganeti cluster, enabling you to exploit of all Ganeti's high availabilily (HA) features. .. attention:: Be careful though: rbd is still experimental! For now it is recommended only for testing purposes. No sensitive data should be stored there. .. _RBD: http://ceph.newdream.net/ You will need the ``rbd`` and ``libceph`` kernel modules, the RBD/Ceph userspace utils (ceph-common Debian package) and an appropriate Ceph/RADOS configuration file on every VM-capable node. You will also need a working RADOS Cluster accessible by the above nodes. RADOS Cluster ~~~~~~~~~~~~~ You will need a working RADOS Cluster accesible by all VM-capable nodes to use the RBD template. For more information on setting up a RADOS Cluster, refer to the `official docs `_. If you want to use a pool for storing RBD disk images other than the default (``rbd``), you should first create the pool in the RADOS Cluster, and then set the corresponding rbd disk parameter named ``pool``. Kernel Modules ~~~~~~~~~~~~~~ Unless your distribution already provides it, you might need to compile the ``rbd`` and ``libceph`` modules from source. You will need Linux Kernel 3.2 or above for the kernel modules. Alternatively you will have to build them as external modules (from Linux Kernel source 3.2 or above), if you want to run a less recent kernel, or your kernel doesn't include them. Userspace Utils ~~~~~~~~~~~~~~~ The RBD template has been tested with ``ceph-common`` v0.38 and above. We recommend using the latest version of ``ceph-common``. .. admonition:: Debian On Debian, you can just install the RBD/Ceph userspace utils with the following command:: $ apt-get install ceph-common Configuration file ~~~~~~~~~~~~~~~~~~ You should also provide an appropriate configuration file (``ceph.conf``) in ``/etc/ceph``. For the rbd userspace utils, you'll only need to specify the IP addresses of the RADOS Cluster monitors. .. admonition:: ceph.conf Sample configuration file: .. code-block:: text [mon.a] host = example_monitor_host1 mon addr = 1.2.3.4:6789 [mon.b] host = example_monitor_host2 mon addr = 1.2.3.5:6789 [mon.c] host = example_monitor_host3 mon addr = 1.2.3.6:6789 For more information, please see the `Ceph Docs `_ Other required software +++++++++++++++++++++++ Please install all software requirements mentioned in :doc:`install-quick`. If you want to build Ganeti from source, don't forget to follow the steps required for that as well. Setting up the environment for Ganeti ------------------------------------- Configuring the network +++++++++++++++++++++++ **Mandatory** on all nodes. You can run Ganeti either in "bridged mode", "routed mode" or "openvswitch mode". In bridged mode, the default, the instances network interfaces will be attached to a software bridge running in dom0. Xen by default creates such a bridge at startup, but your distribution might have a different way to do things, and you'll definitely need to manually set it up under KVM. Beware that the default name Ganeti uses is ``xen-br0`` (which was used in Xen 2.0) while Xen 3.0 uses ``xenbr0`` by default. See the `Initializing the cluster`_ section to learn how to choose a different bridge, or not to use one at all and use "routed mode". In order to use "routed mode" under Xen, you'll need to change the relevant parameters in the Xen config file. Under KVM instead, no config change is necessary, but you still need to set up your network interfaces correctly. By default, under KVM, the "link" parameter you specify per-nic will represent, if non-empty, a different routing table name or number to use for your instances. This allows isolation between different instance groups, and different routing policies between node traffic and instance traffic. You will need to configure your routing table basic routes and rules outside of ganeti. The vif scripts will only add /32 routes to your instances, through their interface, in the table you specified (under KVM, and in the main table under Xen). Also for "openvswitch mode" under Xen a custom network script is needed. Under KVM everything should work, but you'll need to configure your switches outside of Ganeti (as for bridges). .. admonition:: Bridging issues with certain kernels Some kernel versions (e.g. 2.6.32) have an issue where the bridge will automatically change its ``MAC`` address to the lower-numbered slave on port addition and removal. This means that, depending on the ``MAC`` address of the actual NIC on the node and the addresses of the instances, it could be that starting, stopping or migrating instances will lead to timeouts due to the address of the bridge (and thus node itself) changing. To prevent this, it's enough to set the bridge manually to a specific ``MAC`` address, which will disable this automatic address change. In Debian, this can be done as follows in the bridge configuration snippet:: up ip link set addr $(cat /sys/class/net/$IFACE/address) dev $IFACE which will "set" the bridge address to the initial one, disallowing changes. .. admonition:: Bridging under Debian The recommended way to configure the Xen bridge is to edit your ``/etc/network/interfaces`` file and substitute your normal Ethernet stanza with the following snippet:: auto xen-br0 iface xen-br0 inet static address %YOUR_IP_ADDRESS% netmask %YOUR_NETMASK% network %YOUR_NETWORK% broadcast %YOUR_BROADCAST_ADDRESS% gateway %YOUR_GATEWAY% bridge_ports eth0 bridge_stp off bridge_fd 0 # example for setting manually the bridge address to the eth0 NIC up ip link set addr $(cat /sys/class/net/eth0/address) dev $IFACE The following commands need to be executed on the local console:: $ ifdown eth0 $ ifup xen-br0 To check if the bridge is setup, use the ``ip`` and ``brctl show`` commands:: $ ip a show xen-br0 9: xen-br0: mtu 1500 qdisc noqueue link/ether 00:20:fc:1e:d5:5d brd ff:ff:ff:ff:ff:ff inet 10.1.1.200/24 brd 10.1.1.255 scope global xen-br0 inet6 fe80::220:fcff:fe1e:d55d/64 scope link valid_lft forever preferred_lft forever $ brctl show xen-br0 bridge name bridge id STP enabled interfaces xen-br0 8000.0020fc1ed55d no eth0 In order to have a custom and more advanced networking configuration in Xen which can vary among instances, after having successfully installed Ganeti you have to create a symbolic link to the vif-script provided by Ganeti inside /etc/xen/scripts (assuming you installed Ganeti under /usr/lib):: $ ln -s /usr/lib/ganeti/vif-ganeti /etc/xen/scripts/vif-ganeti This has to be done on all nodes. Afterwards you can set the ``vif_script`` hypervisor parameter to point to that script by:: $ gnt-cluster modify -H xen-pvm:vif_script=/etc/xen/scripts/vif-ganeti Having this hypervisor parameter you are able to create your own scripts and create instances with different networking configurations. .. _configure-lvm-label: Configuring LVM +++++++++++++++ **Mandatory** on all nodes. The volume group is required to be at least 20GiB. If you haven't configured your LVM volume group at install time you need to do it before trying to initialize the Ganeti cluster. This is done by formatting the devices/partitions you want to use for it and then adding them to the relevant volume group:: $ pvcreate /dev/%sda3% $ vgcreate xenvg /dev/%sda3% or:: $ pvcreate /dev/%sdb1% $ pvcreate /dev/%sdc1% $ vgcreate xenvg /dev/%sdb1% /dev/%sdc1% If you want to add a device later you can do so with the *vgextend* command:: $ pvcreate /dev/%sdd1% $ vgextend xenvg /dev/%sdd1% Optional: it is recommended to configure LVM not to scan the DRBD devices for physical volumes. This can be accomplished by editing ``/etc/lvm/lvm.conf`` and adding the ``/dev/drbd[0-9]+`` regular expression to the ``filter`` variable, like this: .. code-block:: text filter = ["r|/dev/cdrom|", "r|/dev/drbd[0-9]+|" ] Note that with Ganeti a helper script is provided - ``lvmstrap`` which will erase and configure as LVM any not in-use disk on your system. This is dangerous and it's recommended to read its ``--help`` output if you want to use it. Installing Ganeti +++++++++++++++++ **Mandatory** on all nodes. It's now time to install the Ganeti software itself. Download the source from the project page at ``_, and install it (replace 2.6.0 with the latest version):: $ tar xvzf ganeti-%2.6.0%.tar.gz $ cd ganeti-%2.6.0% $ ./configure --localstatedir=/var --sysconfdir=/etc $ make $ make install $ mkdir /srv/ganeti/ /srv/ganeti/os /srv/ganeti/export You also need to copy the file ``doc/examples/ganeti.initd`` from the source archive to ``/etc/init.d/ganeti`` and register it with your distribution's startup scripts, for example in Debian:: $ chmod +x /etc/init.d/ganeti $ update-rc.d ganeti defaults 20 80 In order to automatically restart failed instances, you need to setup a cron job run the *ganeti-watcher* command. A sample cron file is provided in the source at ``doc/examples/ganeti.cron`` and you can copy that (eventually altering the path) to ``/etc/cron.d/ganeti``. Finally, a sample logrotate snippet is provided in the source at ``doc/examples/ganeti.logrotate`` and you can copy it to ``/etc/logrotate.d/ganeti`` to have Ganeti's logs rotated automatically. What gets installed ~~~~~~~~~~~~~~~~~~~ The above ``make install`` invocation, or installing via your distribution mechanisms, will install on the system: - a set of python libraries under the *ganeti* namespace (depending on the python version this can be located in either ``lib/python-$ver/site-packages`` or various other locations) - a set of programs under ``/usr/local/sbin`` or ``/usr/sbin`` - if the htools component was enabled, a set of programs under ``/usr/local/bin`` or ``/usr/bin/`` - man pages for the above programs - a set of tools under the ``lib/ganeti/tools`` directory - an example iallocator script (see the admin guide for details) under ``lib/ganeti/iallocators`` - a cron job that is needed for cluster maintenance - an init script for automatic startup of Ganeti daemons - provided but not installed automatically by ``make install`` is a bash completion script that hopefully will ease working with the many cluster commands Installing the Operating System support packages ++++++++++++++++++++++++++++++++++++++++++++++++ **Mandatory** on all nodes. To be able to install instances you need to have an Operating System installation script. An example OS that works under Debian and can install Debian and Ubuntu instace OSes is provided on the project web site. Download it from the project page and follow the instructions in the ``README`` file. Here is the installation procedure (replace 0.12 with the latest version that is compatible with your ganeti version):: $ cd /usr/local/src/ $ wget http://ganeti.googlecode.com/files/ganeti-instance-debootstrap-%0.12%.tar.gz $ tar xzf ganeti-instance-debootstrap-%0.12%.tar.gz $ cd ganeti-instance-debootstrap-%0.12% $ ./configure --with-os-dir=/srv/ganeti/os $ make $ make install In order to use this OS definition, you need to have internet access from your nodes and have the *debootstrap*, *dump* and *restore* commands installed on all nodes. Also, if the OS is configured to partition the instance's disk in ``/etc/default/ganeti-instance-debootstrap``, you will need *kpartx* installed. .. admonition:: Debian Use this command on all nodes to install the required packages:: $ apt-get install debootstrap dump kpartx Or alternatively install the OS definition from the Debian package:: $ apt-get install ganeti-instance-debootstrap .. admonition:: KVM In order for debootstrap instances to be able to shutdown cleanly they must install have basic ACPI support inside the instance. Which packages are needed depend on the exact flavor of Debian or Ubuntu which you're installing, but the example defaults file has a commented out configuration line that works for Debian Lenny and Squeeze:: EXTRA_PKGS="acpi-support-base,console-tools,udev" ``kbd`` can be used instead of ``console-tools``, and more packages can be added, of course, if needed. Please refer to the ``README`` file of ``ganeti-instance-debootstrap`` for further documentation. Alternatively, you can create your own OS definitions. See the manpage :manpage:`ganeti-os-interface(7)`. Initializing the cluster ++++++++++++++++++++++++ **Mandatory** once per cluster, on the first node. The last step is to initialize the cluster. After you have repeated the above process on all of your nodes and choose one as the master. Make sure there is a SSH key pair on the master node (optionally generating one using ``ssh-keygen``). Finally execute:: $ gnt-cluster init %CLUSTERNAME% The *CLUSTERNAME* is a hostname, which must be resolvable (e.g. it must exist in DNS or in ``/etc/hosts``) by all the nodes in the cluster. You must choose a name different from any of the nodes names for a multi-node cluster. In general the best choice is to have a unique name for a cluster, even if it consists of only one machine, as you will be able to expand it later without any problems. Please note that the hostname used for this must resolve to an IP address reserved **exclusively** for this purpose, and cannot be the name of the first (master) node. If you want to use a bridge which is not ``xen-br0``, or no bridge at all, change it with the ``--nic-parameters`` option. For example to bridge on br0 you can add:: --nic-parameters link=br0 Or to not bridge at all, and use a separate routing table:: --nic-parameters mode=routed,link=100 If you don't have a ``xen-br0`` interface you also have to specify a different network interface which will get the cluster IP, on the master node, by using the ``--master-netdev `` option. You can use a different name than ``xenvg`` for the volume group (but note that the name must be identical on all nodes). In this case you need to specify it by passing the *--vg-name * option to ``gnt-cluster init``. To set up the cluster as an Xen HVM cluster, use the ``--enabled-hypervisors=xen-hvm`` option to enable the HVM hypervisor (you can also add ``,xen-pvm`` to enable the PVM one too). You will also need to create the VNC cluster password file ``/etc/ganeti/vnc-cluster-password`` which contains one line with the default VNC password for the cluster. To setup the cluster for KVM-only usage (KVM and Xen cannot be mixed), pass ``--enabled-hypervisors=kvm`` to the init command. You can also invoke the command with the ``--help`` option in order to see all the possibilities. Hypervisor/Network/Cluster parameters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Please note that the default hypervisor/network/cluster parameters may not be the correct one for your environment. Carefully check them, and change them either at cluster init time, or later with ``gnt-cluster modify``. Your instance types, networking environment, hypervisor type and version may all affect what kind of parameters should be used on your cluster. .. admonition:: KVM Instances are by default configured to use a host kernel, and to be reached via serial console, which works nice for Linux paravirtualized instances. If you want fully virtualized instances you may want to handle their kernel inside the instance, and to use VNC. Some versions of KVM have a bug that will make an instance hang when configured to use the serial console (which is the default) unless a connection is made to it within about 2 seconds of the instance's startup. For such case it's recommended to disable the ``serial_console`` option. Joining the nodes to the cluster ++++++++++++++++++++++++++++++++ **Mandatory** for all the other nodes. After you have initialized your cluster you need to join the other nodes to it. You can do so by executing the following command on the master node:: $ gnt-node add %NODENAME% Separate replication network ++++++++++++++++++++++++++++ **Optional** Ganeti uses DRBD to mirror the disk of the virtual instances between nodes. To use a dedicated network interface for this (in order to improve performance or to enhance security) you need to configure an additional interface for each node. Use the *-s* option with ``gnt-cluster init`` and ``gnt-node add`` to specify the IP address of this secondary interface to use for each node. Note that if you specified this option at cluster setup time, you must afterwards use it for every node add operation. Testing the setup +++++++++++++++++ Execute the ``gnt-node list`` command to see all nodes in the cluster:: $ gnt-node list Node DTotal DFree MTotal MNode MFree Pinst Sinst node1.example.com 197404 197404 2047 1896 125 0 0 The above shows a couple of things: - The various Ganeti daemons can talk to each other - Ganeti can examine the storage of the node (DTotal/DFree) - Ganeti can talk to the selected hypervisor (MTotal/MNode/MFree) Cluster burnin ~~~~~~~~~~~~~~ With Ganeti a tool called :command:`burnin` is provided that can test most of the Ganeti functionality. The tool is installed under the ``lib/ganeti/tools`` directory (either under ``/usr`` or ``/usr/local`` based on the installation method). See more details under :ref:`burnin-label`. Further steps ------------- You can now proceed either to the :doc:`admin`, or read the manpages of the various commands (:manpage:`ganeti(7)`, :manpage:`gnt-cluster(8)`, :manpage:`gnt-node(8)`, :manpage:`gnt-instance(8)`, :manpage:`gnt-job(8)`). .. rubric:: Footnotes .. [#defkernel] The kernel and initrd paths can be changed at either cluster level (which changes the default for all instances) or at instance level. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/rapi.rst0000644000000000000000000014376112271422343015240 0ustar00rootroot00000000000000Ganeti remote API ================= Documents Ganeti version |version| .. contents:: Introduction ------------ Ganeti supports a remote API for enable external tools to easily retrieve information about a cluster's state. The remote API daemon, *ganeti-rapi*, is automatically started on the master node. By default it runs on TCP port 5080, but this can be changed either in ``.../constants.py`` or via the command line parameter *-p*. SSL mode, which is used by default, can also be disabled by passing command line parameters. .. _rapi-users: Users and passwords ------------------- ``ganeti-rapi`` reads users and passwords from a file (usually ``/var/lib/ganeti/rapi/users``) on startup. Changes to the file will be read automatically. Lines starting with the hash sign (``#``) are treated as comments. Each line consists of two or three fields separated by whitespace. The first two fields are for username and password. The third field is optional and can be used to specify per-user options (separated by comma without spaces). Passwords can either be written in clear text or as a hash. Clear text passwords may not start with an opening brace (``{``) or they must be prefixed with ``{cleartext}``. To use the hashed form, get the MD5 hash of the string ``$username:Ganeti Remote API:$password`` (e.g. ``echo -n 'jack:Ganeti Remote API:abc123' | openssl md5``) [#pwhash]_ and prefix it with ``{ha1}``. Using the scheme prefix for all passwords is recommended. Scheme prefixes are case insensitive. Options control a user's access permissions. The section :ref:`rapi-access-permissions` lists the permissions required for each resource. If the ``--require-authentication`` command line option is given to the ``ganeti-rapi`` daemon, all requests require authentication. Available options: .. pyassert:: rapi.RAPI_ACCESS_ALL == set([ rapi.RAPI_ACCESS_WRITE, rapi.RAPI_ACCESS_READ, ]) .. pyassert:: rlib2.R_2_nodes_name_storage.GET_ACCESS == [rapi.RAPI_ACCESS_WRITE] .. pyassert:: rlib2.R_2_jobs_id_wait.GET_ACCESS == [rapi.RAPI_ACCESS_WRITE] :pyeval:`rapi.RAPI_ACCESS_WRITE` Enables the user to execute operations modifying the cluster. Implies :pyeval:`rapi.RAPI_ACCESS_READ` access. Resources blocking other operations for read-only access, such as :ref:`/2/nodes/[node_name]/storage ` or blocking server-side processes, such as :ref:`/2/jobs/[job_id]/wait `, use :pyeval:`rapi.RAPI_ACCESS_WRITE` to control access to their :pyeval:`http.HTTP_GET` method. :pyeval:`rapi.RAPI_ACCESS_READ` Allow access to operations querying for information. Example:: # Give Jack and Fred read-only access jack abc123 fred {cleartext}foo555 # Give write access to an imaginary instance creation script autocreator xyz789 write # Hashed password for Jessica jessica {HA1}7046452df2cbb530877058712cf17bd4 write # Monitoring can query for values monitoring {HA1}ec018ffe72b8e75bb4d508ed5b6d079c read # A user who can read and write (the former is implied by granting # write access) superuser {HA1}ec018ffe72b8e75bb4d508ed5b6d079c read,write When using the RAPI, username and password can be sent to the server by using the standard HTTP basic access authentication. This means that for accessing the protected URL ``https://cluster.example.com/resource``, the address ``https://username:password@cluster.example.com/resource`` should be used instead. Alternatively, the appropriate parameter of your HTTP client (such as ``-u`` for ``curl``) can be used. .. [#pwhash] Using the MD5 hash of username, realm and password is described in :rfc:`2617` ("HTTP Authentication"), sections 3.2.2.2 and 3.3. The reason for using it over another algorithm is forward compatibility. If ``ganeti-rapi`` were to implement HTTP Digest authentication in the future, the same hash could be used. In the current version ``ganeti-rapi``'s realm, ``Ganeti Remote API``, can only be changed by modifying the source code. Protocol -------- The protocol used is JSON_ over HTTP designed after the REST_ principle. HTTP Basic authentication as per :rfc:`2617` is supported. .. _JSON: http://www.json.org/ .. _REST: http://en.wikipedia.org/wiki/Representational_State_Transfer HTTP requests with a body (e.g. ``PUT`` or ``POST``) require the request header ``Content-type`` be set to ``application/json`` (see :rfc:`2616` (HTTP/1.1), section 7.2.1). A note on JSON as used by RAPI ++++++++++++++++++++++++++++++ JSON_ as used by Ganeti RAPI does not conform to the specification in :rfc:`4627`. Section 2 defines a JSON text to be either an object (``{"key": "value", …}``) or an array (``[1, 2, 3, …]``). In violation of this RAPI uses plain strings (``"master-candidate"``, ``"1234"``) for some requests or responses. Changing this now would likely break existing clients and cause a lot of trouble. .. highlight:: ruby Unlike Python's `JSON encoder and decoder `_, other programming languages or libraries may only provide a strict implementation, not allowing plain values. For those, responses can usually be wrapped in an array whose first element is then used, e.g. the response ``"1234"`` becomes ``["1234"]``. This works equally well for more complex values. Example in Ruby:: require "json" # Insert code to get response here response = "\"1234\"" decoded = JSON.parse("[#{response}]").first Short of modifying the encoder to allow encoding to a less strict format, requests will have to be formatted by hand. Newer RAPI requests already use a dictionary as their input data and shouldn't cause any problems. PUT or POST? ------------ According to :rfc:`2616` the main difference between PUT and POST is that POST can create new resources but PUT can only create the resource the URI was pointing to on the PUT request. Unfortunately, due to historic reasons, the Ganeti RAPI library is not consistent with this usage, so just use the methods as documented below for each resource. For more details have a look in the source code at ``lib/rapi/rlib2.py``. Generic parameter types ----------------------- A few generic refered parameter types and the values they allow. ``bool`` ++++++++ A boolean option will accept ``1`` or ``0`` as numbers but not i.e. ``True`` or ``False``. Generic parameters ------------------ A few parameter mean the same thing across all resources which implement it. ``bulk`` ++++++++ Bulk-mode means that for the resources which usually return just a list of child resources (e.g. ``/2/instances`` which returns just instance names), the output will instead contain detailed data for all these subresources. This is more efficient than query-ing the sub-resources themselves. ``dry-run`` +++++++++++ The boolean *dry-run* argument, if provided and set, signals to Ganeti that the job should not be executed, only the pre-execution checks will be done. This is useful in trying to determine (without guarantees though, as in the meantime the cluster state could have changed) if the operation is likely to succeed or at least start executing. ``force`` +++++++++++ Force operation to continue even if it will cause the cluster to become inconsistent (e.g. because there are not enough master candidates). Parameter details ----------------- Some parameters are not straight forward, so we describe them in details here. .. _rapi-ipolicy: ``ipolicy`` +++++++++++ The instance policy specification is a dict with the following fields: .. pyassert:: constants.IPOLICY_ALL_KEYS == set([constants.ISPECS_MINMAX, constants.ISPECS_STD, constants.IPOLICY_DTS, constants.IPOLICY_VCPU_RATIO, constants.IPOLICY_SPINDLE_RATIO]) .. pyassert:: (set(constants.ISPECS_PARAMETER_TYPES.keys()) == set([constants.ISPEC_MEM_SIZE, constants.ISPEC_DISK_SIZE, constants.ISPEC_DISK_COUNT, constants.ISPEC_CPU_COUNT, constants.ISPEC_NIC_COUNT, constants.ISPEC_SPINDLE_USE])) .. |ispec-min| replace:: :pyeval:`constants.ISPECS_MIN` .. |ispec-max| replace:: :pyeval:`constants.ISPECS_MAX` .. |ispec-std| replace:: :pyeval:`constants.ISPECS_STD` :pyeval:`constants.ISPECS_MINMAX` A list of dictionaries, each with the following two fields: |ispec-min|, |ispec-max| A sub- `dict` with the following fields, which sets the limit of the instances: :pyeval:`constants.ISPEC_MEM_SIZE` The size in MiB of the memory used :pyeval:`constants.ISPEC_DISK_SIZE` The size in MiB of the disk used :pyeval:`constants.ISPEC_DISK_COUNT` The numbers of disks used :pyeval:`constants.ISPEC_CPU_COUNT` The numbers of cpus used :pyeval:`constants.ISPEC_NIC_COUNT` The numbers of nics used :pyeval:`constants.ISPEC_SPINDLE_USE` The numbers of virtual disk spindles used by this instance. They are not real in the sense of actual HDD spindles, but useful for accounting the spindle usage on the residing node |ispec-std| A sub- `dict` with the same fields as |ispec-min| and |ispec-max| above, which sets the standard values of the instances. :pyeval:`constants.IPOLICY_DTS` A `list` of disk templates allowed for instances using this policy :pyeval:`constants.IPOLICY_VCPU_RATIO` Maximum ratio of virtual to physical CPUs (`float`) :pyeval:`constants.IPOLICY_SPINDLE_RATIO` Maximum ratio of instances to their node's ``spindle_count`` (`float`) Usage examples -------------- You can access the API using your favorite programming language as long as it supports network connections. Ganeti RAPI client ++++++++++++++++++ Ganeti includes a standalone RAPI client, ``lib/rapi/client.py``. Shell +++++ .. highlight:: shell-example Using ``wget``:: $ wget -q -O - https://%CLUSTERNAME%:5080/2/info or ``curl``:: $ curl https://%CLUSTERNAME%:5080/2/info Note: with ``curl``, the request method (GET, POST, PUT) can be specified using the ``-X`` command line option, and the username/password can be specified with the ``-u`` option. In case of POST requests with a body, the Content-Type can be set to JSON (as per the Protocol_ section) using the parameter ``-H "Content-Type: application/json"``. Python ++++++ .. highlight:: python :: import urllib2 f = urllib2.urlopen('https://CLUSTERNAME:5080/2/info') print f.read() JavaScript ++++++++++ .. warning:: While it's possible to use JavaScript, it poses several potential problems, including browser blocking request due to non-standard ports or different domain names. Fetching the data on the webserver is easier. .. highlight:: javascript :: var url = 'https://CLUSTERNAME:5080/2/info'; var info; var xmlreq = new XMLHttpRequest(); xmlreq.onreadystatechange = function () { if (xmlreq.readyState != 4) return; if (xmlreq.status == 200) { info = eval("(" + xmlreq.responseText + ")"); alert(info); } else { alert('Error fetching cluster info'); } xmlreq = null; }; xmlreq.open('GET', url, true); xmlreq.send(null); Resources --------- .. highlight:: javascript ``/`` +++++ The root resource. Has no function, but for legacy reasons the ``GET`` method is supported. ``/2`` ++++++ Has no function, but for legacy reasons the ``GET`` method is supported. .. _rapi-res-info: ``/2/info`` +++++++++++ Cluster information resource. .. rapi_resource_details:: /2/info .. _rapi-res-info+get: ``GET`` ~~~~~~~ Returns cluster information. Example:: { "config_version": 2000000, "name": "cluster", "software_version": "2.0.0~beta2", "os_api_version": 10, "export_version": 0, "candidate_pool_size": 10, "enabled_hypervisors": [ "fake" ], "hvparams": { "fake": {} }, "default_hypervisor": "fake", "master": "node1.example.com", "architecture": [ "64bit", "x86_64" ], "protocol_version": 20, "beparams": { "default": { "auto_balance": true, "vcpus": 1, "memory": 128 } }, … } .. _rapi-res-redistribute-config: ``/2/redistribute-config`` ++++++++++++++++++++++++++ Redistribute configuration to all nodes. .. rapi_resource_details:: /2/redistribute-config .. _rapi-res-redistribute-config+put: ``PUT`` ~~~~~~~ Redistribute configuration to all nodes. The result will be a job id. Job result: .. opcode_result:: OP_CLUSTER_REDIST_CONF .. _rapi-res-features: ``/2/features`` +++++++++++++++ .. rapi_resource_details:: /2/features .. _rapi-res-features+get: ``GET`` ~~~~~~~ Returns a list of features supported by the RAPI server. Available features: .. pyassert:: rlib2.ALL_FEATURES == set([rlib2._INST_CREATE_REQV1, rlib2._INST_REINSTALL_REQV1, rlib2._NODE_MIGRATE_REQV1, rlib2._NODE_EVAC_RES1]) :pyeval:`rlib2._INST_CREATE_REQV1` Instance creation request data version 1 supported :pyeval:`rlib2._INST_REINSTALL_REQV1` Instance reinstall supports body parameters :pyeval:`rlib2._NODE_MIGRATE_REQV1` Whether migrating a node (``/2/nodes/[node_name]/migrate``) supports request body parameters :pyeval:`rlib2._NODE_EVAC_RES1` Whether evacuating a node (``/2/nodes/[node_name]/evacuate``) returns a new-style result (see resource description) .. _rapi-res-modify: ``/2/modify`` ++++++++++++++++++++++++++++++++++++++++ Modifies cluster parameters. .. rapi_resource_details:: /2/modify .. _rapi-res-modify+put: ``PUT`` ~~~~~~~ Returns a job ID. Body parameters: .. opcode_params:: OP_CLUSTER_SET_PARAMS Job result: .. opcode_result:: OP_CLUSTER_SET_PARAMS .. _rapi-res-groups: ``/2/groups`` +++++++++++++ The groups resource. .. rapi_resource_details:: /2/groups .. _rapi-res-groups+get: ``GET`` ~~~~~~~ Returns a list of all existing node groups. Example:: [ { "name": "group1", "uri": "\/2\/groups\/group1" }, { "name": "group2", "uri": "\/2\/groups\/group2" } ] If the optional bool *bulk* argument is provided and set to a true value (i.e ``?bulk=1``), the output contains detailed information about node groups as a list. Returned fields: :pyeval:`utils.CommaJoin(sorted(rlib2.G_FIELDS))`. Example:: [ { "name": "group1", "node_cnt": 2, "node_list": [ "node1.example.com", "node2.example.com" ], "uuid": "0d7d407c-262e-49af-881a-6a430034bf43", … }, { "name": "group2", "node_cnt": 1, "node_list": [ "node3.example.com" ], "uuid": "f5a277e7-68f9-44d3-a378-4b25ecb5df5c", … }, … ] .. _rapi-res-groups+post: ``POST`` ~~~~~~~~ Creates a node group. If the optional bool *dry-run* argument is provided, the job will not be actually executed, only the pre-execution checks will be done. Returns: a job ID that can be used later for polling. Body parameters: .. opcode_params:: OP_GROUP_ADD Earlier versions used a parameter named ``name`` which, while still supported, has been renamed to ``group_name``. Job result: .. opcode_result:: OP_GROUP_ADD .. _rapi-res-groups-group_name: ``/2/groups/[group_name]`` ++++++++++++++++++++++++++ Returns information about a node group. .. rapi_resource_details:: /2/groups/[group_name] .. _rapi-res-groups-group_name+get: ``GET`` ~~~~~~~ Returns information about a node group, similar to the bulk output from the node group list. Returned fields: :pyeval:`utils.CommaJoin(sorted(rlib2.G_FIELDS))`. .. _rapi-res-groups-group_name+delete: ``DELETE`` ~~~~~~~~~~ Deletes a node group. It supports the ``dry-run`` argument. Job result: .. opcode_result:: OP_GROUP_REMOVE .. _rapi-res-groups-group_name-modify: ``/2/groups/[group_name]/modify`` +++++++++++++++++++++++++++++++++ Modifies the parameters of a node group. .. rapi_resource_details:: /2/groups/[group_name]/modify .. _rapi-res-groups-group_name-modify+put: ``PUT`` ~~~~~~~ Returns a job ID. Body parameters: .. opcode_params:: OP_GROUP_SET_PARAMS :exclude: group_name Job result: .. opcode_result:: OP_GROUP_SET_PARAMS .. _rapi-res-groups-group_name-rename: ``/2/groups/[group_name]/rename`` +++++++++++++++++++++++++++++++++ Renames a node group. .. rapi_resource_details:: /2/groups/[group_name]/rename .. _rapi-res-groups-group_name-rename+put: ``PUT`` ~~~~~~~ Returns a job ID. Body parameters: .. opcode_params:: OP_GROUP_RENAME :exclude: group_name Job result: .. opcode_result:: OP_GROUP_RENAME .. _rapi-res-groups-group_name-assign-nodes: ``/2/groups/[group_name]/assign-nodes`` +++++++++++++++++++++++++++++++++++++++ Assigns nodes to a group. .. rapi_resource_details:: /2/groups/[group_name]/assign-nodes .. _rapi-res-groups-group_name-assign-nodes+put: ``PUT`` ~~~~~~~ Returns a job ID. It supports the ``dry-run`` and ``force`` arguments. Body parameters: .. opcode_params:: OP_GROUP_ASSIGN_NODES :exclude: group_name, force, dry_run Job result: .. opcode_result:: OP_GROUP_ASSIGN_NODES .. _rapi-res-groups-group_name-tags: ``/2/groups/[group_name]/tags`` +++++++++++++++++++++++++++++++ Manages per-nodegroup tags. .. rapi_resource_details:: /2/groups/[group_name]/tags .. _rapi-res-groups-group_name-tags+get: ``GET`` ~~~~~~~ Returns a list of tags. Example:: ["tag1", "tag2", "tag3"] .. _rapi-res-groups-group_name-tags+put: ``PUT`` ~~~~~~~ Add a set of tags. The request as a list of strings should be ``PUT`` to this URI. The result will be a job id. It supports the ``dry-run`` argument. .. _rapi-res-groups-group_name-tags+delete: ``DELETE`` ~~~~~~~~~~ Delete a tag. In order to delete a set of tags, the DELETE request should be addressed to URI like:: /tags?tag=[tag]&tag=[tag] It supports the ``dry-run`` argument. .. _rapi-res-networks: ``/2/networks`` +++++++++++++++ The networks resource. .. rapi_resource_details:: /2/networks .. _rapi-res-networks+get: ``GET`` ~~~~~~~ Returns a list of all existing networks. Example:: [ { "name": "network1", "uri": "\/2\/networks\/network1" }, { "name": "network2", "uri": "\/2\/networks\/network2" } ] If the optional bool *bulk* argument is provided and set to a true value (i.e ``?bulk=1``), the output contains detailed information about networks as a list. Returned fields: :pyeval:`utils.CommaJoin(sorted(rlib2.NET_FIELDS))`. Example:: [ { 'external_reservations': '10.0.0.0, 10.0.0.1, 10.0.0.15', 'free_count': 13, 'gateway': '10.0.0.1', 'gateway6': None, 'group_list': ['default(bridged, prv0)'], 'inst_list': [], 'mac_prefix': None, 'map': 'XX.............X', 'name': 'nat', 'network': '10.0.0.0/28', 'network6': None, 'reserved_count': 3, 'tags': ['nfdhcpd'], … }, … ] .. _rapi-res-networks+post: ``POST`` ~~~~~~~~ Creates a network. If the optional bool *dry-run* argument is provided, the job will not be actually executed, only the pre-execution checks will be done. Returns: a job ID that can be used later for polling. Body parameters: .. opcode_params:: OP_NETWORK_ADD Job result: .. opcode_result:: OP_NETWORK_ADD .. _rapi-res-networks-network_name: ``/2/networks/[network_name]`` ++++++++++++++++++++++++++++++ Returns information about a network. .. rapi_resource_details:: /2/networks/[network_name] .. _rapi-res-networks-network_name+get: ``GET`` ~~~~~~~ Returns information about a network, similar to the bulk output from the network list. Returned fields: :pyeval:`utils.CommaJoin(sorted(rlib2.NET_FIELDS))`. .. _rapi-res-networks-network_name+delete: ``DELETE`` ~~~~~~~~~~ Deletes a network. It supports the ``dry-run`` argument. Job result: .. opcode_result:: OP_NETWORK_REMOVE .. _rapi-res-networks-network_name-modify: ``/2/networks/[network_name]/modify`` +++++++++++++++++++++++++++++++++++++ Modifies the parameters of a network. .. rapi_resource_details:: /2/networks/[network_name]/modify .. _rapi-res-networks-network_name-modify+put: ``PUT`` ~~~~~~~ Returns a job ID. Body parameters: .. opcode_params:: OP_NETWORK_SET_PARAMS Job result: .. opcode_result:: OP_NETWORK_SET_PARAMS .. _rapi-res-networks-network_name-connect: ``/2/networks/[network_name]/connect`` ++++++++++++++++++++++++++++++++++++++ Connects a network to a nodegroup. .. rapi_resource_details:: /2/networks/[network_name]/connect .. _rapi-res-networks-network_name-connect+put: ``PUT`` ~~~~~~~ Returns a job ID. It supports the ``dry-run`` arguments. Body parameters: .. opcode_params:: OP_NETWORK_CONNECT Job result: .. opcode_result:: OP_NETWORK_CONNECT .. _rapi-res-networks-network_name-disconnect: ``/2/networks/[network_name]/disconnect`` +++++++++++++++++++++++++++++++++++++++++ Disonnects a network from a nodegroup. .. rapi_resource_details:: /2/networks/[network_name]/disconnect .. _rapi-res-networks-network_name-disconnect+put: ``PUT`` ~~~~~~~ Returns a job ID. It supports the ``dry-run`` arguments. Body parameters: .. opcode_params:: OP_NETWORK_DISCONNECT Job result: .. opcode_result:: OP_NETWORK_DISCONNECT .. _rapi-res-networks-network_name-tags: ``/2/networks/[network_name]/tags`` +++++++++++++++++++++++++++++++++++ Manages per-network tags. .. rapi_resource_details:: /2/networks/[network_name]/tags .. _rapi-res-networks-network_name-tags+get: ``GET`` ~~~~~~~ Returns a list of tags. Example:: ["tag1", "tag2", "tag3"] .. _rapi-res-networks-network_name-tags+put: ``PUT`` ~~~~~~~ Add a set of tags. The request as a list of strings should be ``PUT`` to this URI. The result will be a job id. It supports the ``dry-run`` argument. .. _rapi-res-networks-network_name-tags+delete: ``DELETE`` ~~~~~~~~~~ Delete a tag. In order to delete a set of tags, the DELETE request should be addressed to URI like:: /tags?tag=[tag]&tag=[tag] It supports the ``dry-run`` argument. .. _rapi-res-instances-multi-alloc: ``/2/instances-multi-alloc`` ++++++++++++++++++++++++++++ Tries to allocate multiple instances. .. rapi_resource_details:: /2/instances-multi-alloc .. _rapi-res-instances-multi-alloc+post: ``POST`` ~~~~~~~~ The parameters: .. opcode_params:: OP_INSTANCE_MULTI_ALLOC Job result: .. opcode_result:: OP_INSTANCE_MULTI_ALLOC .. _rapi-res-instances: ``/2/instances`` ++++++++++++++++ The instances resource. .. rapi_resource_details:: /2/instances .. _rapi-res-instances+get: ``GET`` ~~~~~~~ Returns a list of all available instances. Example:: [ { "name": "web.example.com", "uri": "\/instances\/web.example.com" }, { "name": "mail.example.com", "uri": "\/instances\/mail.example.com" } ] If the optional bool *bulk* argument is provided and set to a true value (i.e ``?bulk=1``), the output contains detailed information about instances as a list. Returned fields: :pyeval:`utils.CommaJoin(sorted(rlib2.I_FIELDS))`. Example:: [ { "status": "running", "disk_usage": 20480, "nic.bridges": [ "xen-br0" ], "name": "web.example.com", "tags": ["tag1", "tag2"], "beparams": { "vcpus": 2, "memory": 512 }, "disk.sizes": [ 20480 ], "pnode": "node1.example.com", "nic.macs": ["01:23:45:67:89:01"], "snodes": ["node2.example.com"], "disk_template": "drbd", "admin_state": true, "os": "debian-etch", "oper_state": true, … }, … ] .. _rapi-res-instances+post: ``POST`` ~~~~~~~~ Creates an instance. If the optional bool *dry-run* argument is provided, the job will not be actually executed, only the pre-execution checks will be done. Query-ing the job result will return, in both dry-run and normal case, the list of nodes selected for the instance. Returns: a job ID that can be used later for polling. Body parameters: ``__version__`` (int, required) Must be ``1`` (older Ganeti versions used a different format for instance creation requests, version ``0``, but that format is no longer supported) .. opcode_params:: OP_INSTANCE_CREATE Earlier versions used parameters named ``name`` and ``os``. These have been replaced by ``instance_name`` and ``os_type`` to match the underlying opcode. The old names can still be used. Job result: .. opcode_result:: OP_INSTANCE_CREATE .. _rapi-res-instances-instance_name: ``/2/instances/[instance_name]`` ++++++++++++++++++++++++++++++++ Instance-specific resource. .. rapi_resource_details:: /2/instances/[instance_name] .. _rapi-res-instances-instance_name+get: ``GET`` ~~~~~~~ Returns information about an instance, similar to the bulk output from the instance list. Returned fields: :pyeval:`utils.CommaJoin(sorted(rlib2.I_FIELDS))`. .. _rapi-res-instances-instance_name+delete: ``DELETE`` ~~~~~~~~~~ Deletes an instance. It supports the ``dry-run`` argument. Job result: .. opcode_result:: OP_INSTANCE_REMOVE .. _rapi-res-instances-instance_name-info: ``/2/instances/[instance_name]/info`` +++++++++++++++++++++++++++++++++++++++ .. rapi_resource_details:: /2/instances/[instance_name]/info .. _rapi-res-instances-instance_name-info+get: ``GET`` ~~~~~~~ Requests detailed information about the instance. An optional parameter, ``static`` (bool), can be set to return only static information from the configuration without querying the instance's nodes. The result will be a job id. Job result: .. opcode_result:: OP_INSTANCE_QUERY_DATA .. _rapi-res-instances-instance_name-reboot: ``/2/instances/[instance_name]/reboot`` +++++++++++++++++++++++++++++++++++++++ Reboots URI for an instance. .. rapi_resource_details:: /2/instances/[instance_name]/reboot .. _rapi-res-instances-instance_name-reboot+post: ``POST`` ~~~~~~~~ Reboots the instance. The URI takes optional ``type=soft|hard|full`` and ``ignore_secondaries=0|1`` parameters. ``type`` defines the reboot type. ``soft`` is just a normal reboot, without terminating the hypervisor. ``hard`` means full shutdown (including terminating the hypervisor process) and startup again. ``full`` is like ``hard`` but also recreates the configuration from ground up as if you would have done a ``gnt-instance shutdown`` and ``gnt-instance start`` on it. ``ignore_secondaries`` is a bool argument indicating if we start the instance even if secondary disks are failing. It supports the ``dry-run`` argument. Job result: .. opcode_result:: OP_INSTANCE_REBOOT .. _rapi-res-instances-instance_name-shutdown: ``/2/instances/[instance_name]/shutdown`` +++++++++++++++++++++++++++++++++++++++++ Instance shutdown URI. .. rapi_resource_details:: /2/instances/[instance_name]/shutdown .. _rapi-res-instances-instance_name-shutdown+put: ``PUT`` ~~~~~~~ Shutdowns an instance. It supports the ``dry-run`` argument. .. opcode_params:: OP_INSTANCE_SHUTDOWN :exclude: instance_name, dry_run Job result: .. opcode_result:: OP_INSTANCE_SHUTDOWN .. _rapi-res-instances-instance_name-startup: ``/2/instances/[instance_name]/startup`` ++++++++++++++++++++++++++++++++++++++++ Instance startup URI. .. rapi_resource_details:: /2/instances/[instance_name]/startup .. _rapi-res-instances-instance_name-startup+put: ``PUT`` ~~~~~~~ Startup an instance. The URI takes an optional ``force=1|0`` parameter to start the instance even if secondary disks are failing. It supports the ``dry-run`` argument. Job result: .. opcode_result:: OP_INSTANCE_STARTUP .. _rapi-res-instances-instance_name-reinstall: ``/2/instances/[instance_name]/reinstall`` ++++++++++++++++++++++++++++++++++++++++++++++ Installs the operating system again. .. rapi_resource_details:: /2/instances/[instance_name]/reinstall .. _rapi-res-instances-instance_name-reinstall+post: ``POST`` ~~~~~~~~ Returns a job ID. Body parameters: ``os`` (string, required) Instance operating system. ``start`` (bool, defaults to true) Whether to start instance after reinstallation. ``osparams`` (dict) Dictionary with (temporary) OS parameters. For backwards compatbility, this resource also takes the query parameters ``os`` (OS template name) and ``nostartup`` (bool). New clients should use the body parameters. .. _rapi-res-instances-instance_name-replace-disks: ``/2/instances/[instance_name]/replace-disks`` ++++++++++++++++++++++++++++++++++++++++++++++ Replaces disks on an instance. .. rapi_resource_details:: /2/instances/[instance_name]/replace-disks .. _rapi-res-instances-instance_name-replace-disks+post: ``POST`` ~~~~~~~~ Returns a job ID. Body parameters: .. opcode_params:: OP_INSTANCE_REPLACE_DISKS :exclude: instance_name Ganeti 2.4 and below used query parameters. Those are deprecated and should no longer be used. Job result: .. opcode_result:: OP_INSTANCE_REPLACE_DISKS .. _rapi-res-instances-instance_name-activate-disks: ``/2/instances/[instance_name]/activate-disks`` +++++++++++++++++++++++++++++++++++++++++++++++ Activate disks on an instance. .. rapi_resource_details:: /2/instances/[instance_name]/activate-disks .. _rapi-res-instances-instance_name-activate-disks+put: ``PUT`` ~~~~~~~ Takes the bool parameter ``ignore_size``. When set ignore the recorded size (useful for forcing activation when recorded size is wrong). Job result: .. opcode_result:: OP_INSTANCE_ACTIVATE_DISKS .. _rapi-res-instances-instance_name-deactivate-disks: ``/2/instances/[instance_name]/deactivate-disks`` +++++++++++++++++++++++++++++++++++++++++++++++++ Deactivate disks on an instance. .. rapi_resource_details:: /2/instances/[instance_name]/deactivate-disks .. _rapi-res-instances-instance_name-deactivate-disks+put: ``PUT`` ~~~~~~~ Takes no parameters. Job result: .. opcode_result:: OP_INSTANCE_DEACTIVATE_DISKS .. _rapi-res-instances-instance_name-recreate-disks: ``/2/instances/[instance_name]/recreate-disks`` +++++++++++++++++++++++++++++++++++++++++++++++++ Recreate disks of an instance. .. rapi_resource_details:: /2/instances/[instance_name]/recreate-disks .. _rapi-res-instances-instance_name-recreate-disks+post: ``POST`` ~~~~~~~~ Returns a job ID. Body parameters: .. opcode_params:: OP_INSTANCE_RECREATE_DISKS :exclude: instance_name Job result: .. opcode_result:: OP_INSTANCE_RECREATE_DISKS .. _rapi-res-instances-instance_name-disk-disk_index-grow: ``/2/instances/[instance_name]/disk/[disk_index]/grow`` +++++++++++++++++++++++++++++++++++++++++++++++++++++++ Grows one disk of an instance. .. rapi_resource_details:: /2/instances/[instance_name]/disk/[disk_index]/grow .. _rapi-res-instances-instance_name-disk-disk_index-grow+post: ``POST`` ~~~~~~~~ Returns a job ID. Body parameters: .. opcode_params:: OP_INSTANCE_GROW_DISK :exclude: instance_name, disk Job result: .. opcode_result:: OP_INSTANCE_GROW_DISK .. _rapi-res-instances-instance_name-prepare-export: ``/2/instances/[instance_name]/prepare-export`` +++++++++++++++++++++++++++++++++++++++++++++++++ Prepares an export of an instance. .. rapi_resource_details:: /2/instances/[instance_name]/prepare-export .. _rapi-res-instances-instance_name-prepare-export+put: ``PUT`` ~~~~~~~ Takes one parameter, ``mode``, for the export mode. Returns a job ID. Job result: .. opcode_result:: OP_BACKUP_PREPARE .. _rapi-res-instances-instance_name-export: ``/2/instances/[instance_name]/export`` +++++++++++++++++++++++++++++++++++++++++++++++++ Exports an instance. .. rapi_resource_details:: /2/instances/[instance_name]/export .. _rapi-res-instances-instance_name-export+put: ``PUT`` ~~~~~~~ Returns a job ID. Body parameters: .. opcode_params:: OP_BACKUP_EXPORT :exclude: instance_name :alias: target_node=destination Job result: .. opcode_result:: OP_BACKUP_EXPORT .. _rapi-res-instances-instance_name-migrate: ``/2/instances/[instance_name]/migrate`` ++++++++++++++++++++++++++++++++++++++++ Migrates an instance. .. rapi_resource_details:: /2/instances/[instance_name]/migrate .. _rapi-res-instances-instance_name-migrate+put: ``PUT`` ~~~~~~~ Returns a job ID. Body parameters: .. opcode_params:: OP_INSTANCE_MIGRATE :exclude: instance_name, live Job result: .. opcode_result:: OP_INSTANCE_MIGRATE .. _rapi-res-instances-instance_name-failover: ``/2/instances/[instance_name]/failover`` +++++++++++++++++++++++++++++++++++++++++ Does a failover of an instance. .. rapi_resource_details:: /2/instances/[instance_name]/failover .. _rapi-res-instances-instance_name-failover+put: ``PUT`` ~~~~~~~ Returns a job ID. Body parameters: .. opcode_params:: OP_INSTANCE_FAILOVER :exclude: instance_name Job result: .. opcode_result:: OP_INSTANCE_FAILOVER .. _rapi-res-instances-instance_name-rename: ``/2/instances/[instance_name]/rename`` ++++++++++++++++++++++++++++++++++++++++ Renames an instance. .. rapi_resource_details:: /2/instances/[instance_name]/rename .. _rapi-res-instances-instance_name-rename+put: ``PUT`` ~~~~~~~ Returns a job ID. Body parameters: .. opcode_params:: OP_INSTANCE_RENAME :exclude: instance_name Job result: .. opcode_result:: OP_INSTANCE_RENAME .. _rapi-res-instances-instance_name-modify: ``/2/instances/[instance_name]/modify`` ++++++++++++++++++++++++++++++++++++++++ Modifies an instance. .. rapi_resource_details:: /2/instances/[instance_name]/modify .. _rapi-res-instances-instance_name-modify+put: ``PUT`` ~~~~~~~ Returns a job ID. Body parameters: .. opcode_params:: OP_INSTANCE_SET_PARAMS :exclude: instance_name Job result: .. opcode_result:: OP_INSTANCE_SET_PARAMS .. _rapi-res-instances-instance_name-console: ``/2/instances/[instance_name]/console`` ++++++++++++++++++++++++++++++++++++++++ Request information for connecting to instance's console. .. rapi_resource_details:: /2/instances/[instance_name]/console .. _rapi-res-instances-instance_name-console+get: ``GET`` ~~~~~~~ Returns a dictionary containing information about the instance's console. Contained keys: .. pyassert:: constants.CONS_ALL == frozenset([ constants.CONS_MESSAGE, constants.CONS_SSH, constants.CONS_VNC, constants.CONS_SPICE, ]) .. pyassert:: frozenset(objects.InstanceConsole.GetAllSlots()) == frozenset([ "command", "display", "host", "instance", "kind", "message", "port", "user", ]) ``instance`` Instance name ``kind`` Console type, one of :pyeval:`constants.CONS_SSH`, :pyeval:`constants.CONS_VNC`, :pyeval:`constants.CONS_SPICE` or :pyeval:`constants.CONS_MESSAGE` ``message`` Message to display (:pyeval:`constants.CONS_MESSAGE` type only) ``host`` Host to connect to (:pyeval:`constants.CONS_SSH`, :pyeval:`constants.CONS_VNC` or :pyeval:`constants.CONS_SPICE` only) ``port`` TCP port to connect to (:pyeval:`constants.CONS_VNC` or :pyeval:`constants.CONS_SPICE` only) ``user`` Username to use (:pyeval:`constants.CONS_SSH` only) ``command`` Command to execute on machine (:pyeval:`constants.CONS_SSH` only) ``display`` VNC display number (:pyeval:`constants.CONS_VNC` only) .. _rapi-res-instances-instance_name-tags: ``/2/instances/[instance_name]/tags`` +++++++++++++++++++++++++++++++++++++ Manages per-instance tags. .. rapi_resource_details:: /2/instances/[instance_name]/tags .. _rapi-res-instances-instance_name-tags+get: ``GET`` ~~~~~~~ Returns a list of tags. Example:: ["tag1", "tag2", "tag3"] .. _rapi-res-instances-instance_name-tags+put: ``PUT`` ~~~~~~~ Add a set of tags. The request as a list of strings should be ``PUT`` to this URI. The result will be a job id. It supports the ``dry-run`` argument. .. _rapi-res-instances-instance_name-tags+delete: ``DELETE`` ~~~~~~~~~~ Delete a tag. In order to delete a set of tags, the DELETE request should be addressed to URI like:: /tags?tag=[tag]&tag=[tag] It supports the ``dry-run`` argument. .. _rapi-res-jobs: ``/2/jobs`` +++++++++++ The ``/2/jobs`` resource. .. rapi_resource_details:: /2/jobs .. _rapi-res-jobs+get: ``GET`` ~~~~~~~ Returns a dictionary of jobs. Returns: a dictionary with jobs id and uri. If the optional bool *bulk* argument is provided and set to a true value (i.e. ``?bulk=1``), the output contains detailed information about jobs as a list. Returned fields for bulk requests (unlike other bulk requests, these fields are not the same as for per-job requests): :pyeval:`utils.CommaJoin(sorted(rlib2.J_FIELDS_BULK))`. .. _rapi-res-jobs-job_id: ``/2/jobs/[job_id]`` ++++++++++++++++++++ Individual job URI. .. rapi_resource_details:: /2/jobs/[job_id] .. _rapi-res-jobs-job_id+get: ``GET`` ~~~~~~~ Returns a dictionary with job parameters, containing the fields :pyeval:`utils.CommaJoin(sorted(rlib2.J_FIELDS))`. The result includes: - id: job ID as a number - status: current job status as a string - ops: involved OpCodes as a list of dictionaries for each opcodes in the job - opstatus: OpCodes status as a list - opresult: OpCodes results as a list For a successful opcode, the ``opresult`` field corresponding to it will contain the raw result from its :term:`LogicalUnit`. In case an opcode has failed, its element in the opresult list will be a list of two elements: - first element the error type (the Ganeti internal error name) - second element a list of either one or two elements: - the first element is the textual error description - the second element, if any, will hold an error classification The error classification is most useful for the ``OpPrereqError`` error type - these errors happen before the OpCode has started executing, so it's possible to retry the OpCode without side effects. But whether it make sense to retry depends on the error classification: .. pyassert:: errors.ECODE_ALL == set([errors.ECODE_RESOLVER, errors.ECODE_NORES, errors.ECODE_INVAL, errors.ECODE_STATE, errors.ECODE_NOENT, errors.ECODE_EXISTS, errors.ECODE_NOTUNIQUE, errors.ECODE_FAULT, errors.ECODE_ENVIRON, errors.ECODE_TEMP_NORES]) :pyeval:`errors.ECODE_RESOLVER` Resolver errors. This usually means that a name doesn't exist in DNS, so if it's a case of slow DNS propagation the operation can be retried later. :pyeval:`errors.ECODE_NORES` Not enough resources (iallocator failure, disk space, memory, etc.). If the resources on the cluster increase, the operation might succeed. :pyeval:`errors.ECODE_TEMP_NORES` Simliar to :pyeval:`errors.ECODE_NORES`, but indicating the operation should be attempted again after some time. :pyeval:`errors.ECODE_INVAL` Wrong arguments (at syntax level). The operation will not ever be accepted unless the arguments change. :pyeval:`errors.ECODE_STATE` Wrong entity state. For example, live migration has been requested for a down instance, or instance creation on an offline node. The operation can be retried once the resource has changed state. :pyeval:`errors.ECODE_NOENT` Entity not found. For example, information has been requested for an unknown instance. :pyeval:`errors.ECODE_EXISTS` Entity already exists. For example, instance creation has been requested for an already-existing instance. :pyeval:`errors.ECODE_NOTUNIQUE` Resource not unique (e.g. MAC or IP duplication). :pyeval:`errors.ECODE_FAULT` Internal cluster error. For example, a node is unreachable but not set offline, or the ganeti node daemons are not working, etc. A ``gnt-cluster verify`` should be run. :pyeval:`errors.ECODE_ENVIRON` Environment error (e.g. node disk error). A ``gnt-cluster verify`` should be run. Note that in the above list, by entity we refer to a node or instance, while by a resource we refer to an instance's disk, or NIC, etc. .. _rapi-res-jobs-job_id+delete: ``DELETE`` ~~~~~~~~~~ Cancel a not-yet-started job. .. _rapi-res-jobs-job_id-wait: ``/2/jobs/[job_id]/wait`` +++++++++++++++++++++++++ .. rapi_resource_details:: /2/jobs/[job_id]/wait .. _rapi-res-jobs-job_id-wait+get: ``GET`` ~~~~~~~ Waits for changes on a job. Takes the following body parameters in a dict: ``fields`` The job fields on which to watch for changes ``previous_job_info`` Previously received field values or None if not yet available ``previous_log_serial`` Highest log serial number received so far or None if not yet available Returns None if no changes have been detected and a dict with two keys, ``job_info`` and ``log_entries`` otherwise. .. _rapi-res-nodes: ``/2/nodes`` ++++++++++++ Nodes resource. .. rapi_resource_details:: /2/nodes .. _rapi-res-nodes+get: ``GET`` ~~~~~~~ Returns a list of all nodes. Example:: [ { "id": "node1.example.com", "uri": "\/nodes\/node1.example.com" }, { "id": "node2.example.com", "uri": "\/nodes\/node2.example.com" } ] If the optional bool *bulk* argument is provided and set to a true value (i.e ``?bulk=1``), the output contains detailed information about nodes as a list. Returned fields: :pyeval:`utils.CommaJoin(sorted(rlib2.N_FIELDS))`. Example:: [ { "pinst_cnt": 1, "mfree": 31280, "mtotal": 32763, "name": "www.example.com", "tags": [], "mnode": 512, "dtotal": 5246208, "sinst_cnt": 2, "dfree": 5171712, "offline": false, … }, … ] .. _rapi-res-nodes-node_name: ``/2/nodes/[node_name]`` +++++++++++++++++++++++++++++++++ Returns information about a node. .. rapi_resource_details:: /2/nodes/[node_name] .. _rapi-res-nodes-node_name+get: ``GET`` ~~~~~~~ Returned fields: :pyeval:`utils.CommaJoin(sorted(rlib2.N_FIELDS))`. .. _rapi-res-nodes-node_name-powercycle: ``/2/nodes/[node_name]/powercycle`` +++++++++++++++++++++++++++++++++++ Powercycles a node. .. rapi_resource_details:: /2/nodes/[node_name]/powercycle .. _rapi-res-nodes-node_name-powercycle+post: ``POST`` ~~~~~~~~ Returns a job ID. Job result: .. opcode_result:: OP_NODE_POWERCYCLE .. _rapi-res-nodes-node_name-evacuate: ``/2/nodes/[node_name]/evacuate`` +++++++++++++++++++++++++++++++++ Evacuates instances off a node. .. rapi_resource_details:: /2/nodes/[node_name]/evacuate .. _rapi-res-nodes-node_name-evacuate+post: ``POST`` ~~~~~~~~ Returns a job ID. The result of the job will contain the IDs of the individual jobs submitted to evacuate the node. Body parameters: .. opcode_params:: OP_NODE_EVACUATE :exclude: nodes Up to and including Ganeti 2.4 query arguments were used. Those are no longer supported. The new request can be detected by the presence of the :pyeval:`rlib2._NODE_EVAC_RES1` feature string. Job result: .. opcode_result:: OP_NODE_EVACUATE .. _rapi-res-nodes-node_name-migrate: ``/2/nodes/[node_name]/migrate`` +++++++++++++++++++++++++++++++++ Migrates all primary instances from a node. .. rapi_resource_details:: /2/nodes/[node_name]/migrate .. _rapi-res-nodes-node_name-migrate+post: ``POST`` ~~~~~~~~ If no mode is explicitly specified, each instances' hypervisor default migration mode will be used. Body parameters: .. opcode_params:: OP_NODE_MIGRATE :exclude: node_name The query arguments used up to and including Ganeti 2.4 are deprecated and should no longer be used. The new request format can be detected by the presence of the :pyeval:`rlib2._NODE_MIGRATE_REQV1` feature string. Job result: .. opcode_result:: OP_NODE_MIGRATE .. _rapi-res-nodes-node_name-role: ``/2/nodes/[node_name]/role`` +++++++++++++++++++++++++++++ Manages node role. .. rapi_resource_details:: /2/nodes/[node_name]/role The role is always one of the following: - drained - master-candidate - offline - regular Note that the 'master' role is a special, and currently it can't be modified via RAPI, only via the command line (``gnt-cluster master-failover``). .. _rapi-res-nodes-node_name-role+get: ``GET`` ~~~~~~~ Returns the current node role. Example:: "master-candidate" .. _rapi-res-nodes-node_name-role+put: ``PUT`` ~~~~~~~ Change the node role. The request is a string which should be PUT to this URI. The result will be a job id. It supports the bool ``force`` argument. Job result: .. opcode_result:: OP_NODE_SET_PARAMS .. _rapi-res-nodes-node_name-modify: ``/2/nodes/[node_name]/modify`` +++++++++++++++++++++++++++++++ Modifies the parameters of a node. .. rapi_resource_details:: /2/nodes/[node_name]/modify .. _rapi-res-nodes-node_name-modify+post: ``POST`` ~~~~~~~~ Returns a job ID. Body parameters: .. opcode_params:: OP_NODE_SET_PARAMS :exclude: node_name Job result: .. opcode_result:: OP_NODE_SET_PARAMS .. _rapi-res-nodes-node_name-storage: ``/2/nodes/[node_name]/storage`` ++++++++++++++++++++++++++++++++ Manages storage units on the node. .. rapi_resource_details:: /2/nodes/[node_name]/storage .. _rapi-res-nodes-node_name-storage+get: ``GET`` ~~~~~~~ FIXME: enable ".. pyassert::" again when all storage types are implemented:: constants.STORAGE_TYPES == set([constants.ST_FILE, constants.ST_LVM_PV, constants.ST_LVM_VG]) Requests a list of storage units on a node. Requires the parameters ``storage_type`` (one of :pyeval:`constants.ST_FILE`, :pyeval:`constants.ST_LVM_PV` or :pyeval:`constants.ST_LVM_VG`) and ``output_fields``. The result will be a job id, using which the result can be retrieved. .. _rapi-res-nodes-node_name-storage-modify: ``/2/nodes/[node_name]/storage/modify`` +++++++++++++++++++++++++++++++++++++++ Modifies storage units on the node. .. rapi_resource_details:: /2/nodes/[node_name]/storage/modify .. _rapi-res-nodes-node_name-storage-modify+put: ``PUT`` ~~~~~~~ Modifies parameters of storage units on the node. Requires the parameters ``storage_type`` (one of :pyeval:`constants.ST_FILE`, :pyeval:`constants.ST_LVM_PV` or :pyeval:`constants.ST_LVM_VG`) and ``name`` (name of the storage unit). Parameters can be passed additionally. Currently only :pyeval:`constants.SF_ALLOCATABLE` (bool) is supported. The result will be a job id. Job result: .. opcode_result:: OP_NODE_MODIFY_STORAGE .. _rapi-res-nodes-node_name-storage-repair: ``/2/nodes/[node_name]/storage/repair`` +++++++++++++++++++++++++++++++++++++++ Repairs a storage unit on the node. .. rapi_resource_details:: /2/nodes/[node_name]/storage/repair .. _rapi-res-nodes-node_name-storage-repair+put: ``PUT`` ~~~~~~~ .. pyassert:: constants.VALID_STORAGE_OPERATIONS == { constants.ST_LVM_VG: set([constants.SO_FIX_CONSISTENCY]), } Repairs a storage unit on the node. Requires the parameters ``storage_type`` (currently only :pyeval:`constants.ST_LVM_VG` can be repaired) and ``name`` (name of the storage unit). The result will be a job id. Job result: .. opcode_result:: OP_REPAIR_NODE_STORAGE .. _rapi-res-nodes-node_name-tags: ``/2/nodes/[node_name]/tags`` +++++++++++++++++++++++++++++ Manages per-node tags. .. rapi_resource_details:: /2/nodes/[node_name]/tags .. _rapi-res-nodes-node_name-tags+get: ``GET`` ~~~~~~~ Returns a list of tags. Example:: ["tag1", "tag2", "tag3"] .. _rapi-res-nodes-node_name-tags+put: ``PUT`` ~~~~~~~ Add a set of tags. The request as a list of strings should be PUT to this URI. The result will be a job id. It supports the ``dry-run`` argument. .. _rapi-res-nodes-node_name-tags+delete: ``DELETE`` ~~~~~~~~~~ Deletes tags. In order to delete a set of tags, the DELETE request should be addressed to URI like:: /tags?tag=[tag]&tag=[tag] It supports the ``dry-run`` argument. .. _rapi-res-query-resource: ``/2/query/[resource]`` +++++++++++++++++++++++ Requests resource information. Available fields can be found in man pages and using ``/2/query/[resource]/fields``. The resource is one of :pyeval:`utils.CommaJoin(constants.QR_VIA_RAPI)`. See the :doc:`query2 design document ` for more details. .. rapi_resource_details:: /2/query/[resource] .. _rapi-res-query-resource+get: ``GET`` ~~~~~~~ Returns list of included fields and actual data. Takes a query parameter named "fields", containing a comma-separated list of field names. Does not support filtering. .. _rapi-res-query-resource+put: ``PUT`` ~~~~~~~ Returns list of included fields and actual data. The list of requested fields can either be given as the query parameter "fields" or as a body parameter with the same name. The optional body parameter "filter" can be given and must be either ``null`` or a list containing filter operators. .. _rapi-res-query-resource-fields: ``/2/query/[resource]/fields`` ++++++++++++++++++++++++++++++ Request list of available fields for a resource. The resource is one of :pyeval:`utils.CommaJoin(constants.QR_VIA_RAPI)`. See the :doc:`query2 design document ` for more details. .. rapi_resource_details:: /2/query/[resource]/fields .. _rapi-res-query-resource-fields+get: ``GET`` ~~~~~~~ Returns a list of field descriptions for available fields. Takes an optional query parameter named "fields", containing a comma-separated list of field names. .. _rapi-res-os: ``/2/os`` +++++++++ OS resource. .. rapi_resource_details:: /2/os .. _rapi-res-os+get: ``GET`` ~~~~~~~ Return a list of all OSes. Can return error 500 in case of a problem. Since this is a costly operation for Ganeti 2.0, it is not recommended to execute it too often. Example:: ["debian-etch"] .. _rapi-res-tags: ``/2/tags`` +++++++++++ Manages cluster tags. .. rapi_resource_details:: /2/tags .. _rapi-res-tags+get: ``GET`` ~~~~~~~ Returns the cluster tags. Example:: ["tag1", "tag2", "tag3"] .. _rapi-res-tags+put: ``PUT`` ~~~~~~~ Adds a set of tags. The request as a list of strings should be PUT to this URI. The result will be a job id. It supports the ``dry-run`` argument. .. _rapi-res-tags+delete: ``DELETE`` ~~~~~~~~~~ Deletes tags. In order to delete a set of tags, the DELETE request should be addressed to URI like:: /tags?tag=[tag]&tag=[tag] It supports the ``dry-run`` argument. .. _rapi-res-version: ``/version`` ++++++++++++ The version resource. This resource should be used to determine the remote API version and to adapt clients accordingly. .. rapi_resource_details:: /version .. _rapi-res-version+get: ``GET`` ~~~~~~~ Returns the remote API version. Ganeti 1.2 returned ``1`` and Ganeti 2.0 returns ``2``. .. _rapi-access-permissions: Access permissions ------------------ The following list describes the access permissions required for each resource. See :ref:`rapi-users` for more details. .. rapi_access_table:: .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-2.4.rst0000644000000000000000000000037712230001635016042 0ustar00rootroot00000000000000================= Ganeti 2.4 design ================= The following design documents have been implemented in Ganeti 2.4: - :doc:`design-oob` - :doc:`design-query2` .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/hooks.rst0000644000000000000000000005244712271422343015430 0ustar00rootroot00000000000000Ganeti customisation using hooks ================================ Documents Ganeti version 2.9 .. contents:: Introduction ------------ In order to allow customisation of operations, Ganeti runs scripts in sub-directories of ``@SYSCONFDIR@/ganeti/hooks``. These sub-directories are named ``$hook-$phase.d``, where ``$phase`` is either ``pre`` or ``post`` and ``$hook`` matches the directory name given for a hook (e.g. ``cluster-verify-post.d`` or ``node-add-pre.d``). This is similar to the ``/etc/network/`` structure present in Debian for network interface handling. Organisation ------------ For every operation, two sets of scripts are run: - pre phase (for authorization/checking) - post phase (for logging) Also, for each operation, the scripts are run on one or more nodes, depending on the operation type. Note that, even though we call them scripts, we are actually talking about any executable. *pre* scripts ~~~~~~~~~~~~~ The *pre* scripts have a definite target: to check that the operation is allowed given the site-specific constraints. You could have, for example, a rule that says every new instance is required to exists in a database; to implement this, you could write a script that checks the new instance parameters against your database. The objective of these scripts should be their return code (zero or non-zero for success and failure). However, if they modify the environment in any way, they should be idempotent, as failed executions could be restarted and thus the script(s) run again with exactly the same parameters. Note that if a node is unreachable at the time a hooks is run, this will not be interpreted as a deny for the execution. In other words, only an actual error returned from a script will cause abort, and not an unreachable node. Therefore, if you want to guarantee that a hook script is run and denies an action, it's best to put it on the master node. *post* scripts ~~~~~~~~~~~~~~ These scripts should do whatever you need as a reaction to the completion of an operation. Their return code is not checked (but logged), and they should not depend on the fact that the *pre* scripts have been run. Naming ~~~~~~ The allowed names for the scripts consist of (similar to *run-parts*) upper and lower case, digits, underscores and hyphens. In other words, the regexp ``^[a-zA-Z0-9_-]+$``. Also, non-executable scripts will be ignored. Order of execution ~~~~~~~~~~~~~~~~~~ On a single node, the scripts in a directory are run in lexicographic order (more exactly, the python string comparison order). It is advisable to implement the usual *NN-name* convention where *NN* is a two digit number. For an operation whose hooks are run on multiple nodes, there is no specific ordering of nodes with regard to hooks execution; you should assume that the scripts are run in parallel on the target nodes (keeping on each node the above specified ordering). If you need any kind of inter-node synchronisation, you have to implement it yourself in the scripts. Execution environment ~~~~~~~~~~~~~~~~~~~~~ The scripts will be run as follows: - no command line arguments - no controlling *tty* - stdin is actually */dev/null* - stdout and stderr are directed to files - PATH is reset to :pyeval:`constants.HOOKS_PATH` - the environment is cleared, and only ganeti-specific variables will be left All information about the cluster is passed using environment variables. Different operations will have sligthly different environments, but most of the variables are common. Operation list -------------- Node operations ~~~~~~~~~~~~~~~ OP_NODE_ADD +++++++++++ Adds a node to the cluster. :directory: node-add :env. vars: NODE_NAME, NODE_PIP, NODE_SIP, MASTER_CAPABLE, VM_CAPABLE :pre-execution: all existing nodes :post-execution: all nodes plus the new node OP_NODE_REMOVE ++++++++++++++ Removes a node from the cluster. On the removed node the hooks are called during the execution of the operation and not after its completion. :directory: node-remove :env. vars: NODE_NAME :pre-execution: all existing nodes except the removed node :post-execution: all existing nodes OP_NODE_SET_PARAMS ++++++++++++++++++ Changes a node's parameters. :directory: node-modify :env. vars: MASTER_CANDIDATE, OFFLINE, DRAINED, MASTER_CAPABLE, VM_CAPABLE :pre-execution: master node, the target node :post-execution: master node, the target node OP_NODE_MIGRATE ++++++++++++++++ Relocate secondary instances from a node. :directory: node-migrate :env. vars: NODE_NAME :pre-execution: master node :post-execution: master node Node group operations ~~~~~~~~~~~~~~~~~~~~~ OP_GROUP_ADD ++++++++++++ Adds a node group to the cluster. :directory: group-add :env. vars: GROUP_NAME :pre-execution: master node :post-execution: master node OP_GROUP_SET_PARAMS +++++++++++++++++++ Changes a node group's parameters. :directory: group-modify :env. vars: GROUP_NAME, NEW_ALLOC_POLICY :pre-execution: master node :post-execution: master node OP_GROUP_REMOVE +++++++++++++++ Removes a node group from the cluster. Since the node group must be empty for removal to succeed, the concept of "nodes in the group" does not exist, and the hook is only executed in the master node. :directory: group-remove :env. vars: GROUP_NAME :pre-execution: master node :post-execution: master node OP_GROUP_RENAME +++++++++++++++ Renames a node group. :directory: group-rename :env. vars: OLD_NAME, NEW_NAME :pre-execution: master node and all nodes in the group :post-execution: master node and all nodes in the group OP_GROUP_EVACUATE +++++++++++++++++ Evacuates a node group. :directory: group-evacuate :env. vars: GROUP_NAME, TARGET_GROUPS :pre-execution: master node and all nodes in the group :post-execution: master node and all nodes in the group Network operations ~~~~~~~~~~~~~~~~~~ OP_NETWORK_ADD ++++++++++++++ Adds a network to the cluster. :directory: network-add :env. vars: NETWORK_NAME, NETWORK_SUBNET, NETWORK_GATEWAY, NETWORK_SUBNET6, NETWORK_GATEWAY6, NETWORK_MAC_PREFIX, NETWORK_TAGS :pre-execution: master node :post-execution: master node OP_NETWORK_REMOVE +++++++++++++++++ Removes a network from the cluster. :directory: network-remove :env. vars: NETWORK_NAME :pre-execution: master node :post-execution: master node OP_NETWORK_CONNECT ++++++++++++++++++ Connects a network to a nodegroup. :directory: network-connect :env. vars: GROUP_NAME, NETWORK_NAME, GROUP_NETWORK_MODE, GROUP_NETWORK_LINK, NETWORK_SUBNET, NETWORK_GATEWAY, NETWORK_SUBNET6, NETWORK_GATEWAY6, NETWORK_MAC_PREFIX, NETWORK_TAGS :pre-execution: nodegroup nodes :post-execution: nodegroup nodes OP_NETWORK_DISCONNECT +++++++++++++++++++++ Disconnects a network from a nodegroup. :directory: network-disconnect :env. vars: GROUP_NAME, NETWORK_NAME, GROUP_NETWORK_MODE, GROUP_NETWORK_LINK, NETWORK_SUBNET, NETWORK_GATEWAY, NETWORK_SUBNET6, NETWORK_GATEWAY6, NETWORK_MAC_PREFIX, NETWORK_TAGS :pre-execution: nodegroup nodes :post-execution: nodegroup nodes OP_NETWORK_SET_PARAMS +++++++++++++++++++++ Modifies a network. :directory: network-modify :env. vars: NETWORK_NAME, NETWORK_SUBNET, NETWORK_GATEWAY, NETWORK_SUBNET6, NETWORK_GATEWAY6, NETWORK_MAC_PREFIX, NETWORK_TAGS :pre-execution: master node :post-execution: master node Instance operations ~~~~~~~~~~~~~~~~~~~ All instance operations take at least the following variables: INSTANCE_NAME, INSTANCE_PRIMARY, INSTANCE_SECONDARY, INSTANCE_OS_TYPE, INSTANCE_DISK_TEMPLATE, INSTANCE_MEMORY, INSTANCE_DISK_SIZES, INSTANCE_VCPUS, INSTANCE_NIC_COUNT, INSTANCE_NICn_IP, INSTANCE_NICn_BRIDGE, INSTANCE_NICn_MAC, INSTANCE_NICn_NETWORK, INSTANCE_NICn_NETWORK_UUID, INSTANCE_NICn_NETWORK_SUBNET, INSTANCE_NICn_NETWORK_GATEWAY, INSTANCE_NICn_NETWORK_SUBNET6, INSTANCE_NICn_NETWORK_GATEWAY6, INSTANCE_NICn_NETWORK_MAC_PREFIX, INSTANCE_DISK_COUNT, INSTANCE_DISKn_SIZE, INSTANCE_DISKn_MODE. The INSTANCE_NICn_* and INSTANCE_DISKn_* variables represent the properties of the *n* -th NIC and disk, and are zero-indexed. The INSTANCE_NICn_NETWORK_* variables are only passed if a NIC's network parameter is set (that is if the NIC is associated to a network defined via ``gnt-network``) OP_INSTANCE_CREATE ++++++++++++++++++ Creates a new instance. :directory: instance-add :env. vars: ADD_MODE, SRC_NODE, SRC_PATH, SRC_IMAGES :pre-execution: master node, primary and secondary nodes :post-execution: master node, primary and secondary nodes OP_INSTANCE_REINSTALL +++++++++++++++++++++ Reinstalls an instance. :directory: instance-reinstall :env. vars: only the standard instance vars :pre-execution: master node, primary and secondary nodes :post-execution: master node, primary and secondary nodes OP_BACKUP_EXPORT ++++++++++++++++ Exports the instance. :directory: instance-export :env. vars: EXPORT_MODE, EXPORT_NODE, EXPORT_DO_SHUTDOWN, REMOVE_INSTANCE :pre-execution: master node, primary and secondary nodes :post-execution: master node, primary and secondary nodes OP_INSTANCE_STARTUP +++++++++++++++++++ Starts an instance. :directory: instance-start :env. vars: FORCE :pre-execution: master node, primary and secondary nodes :post-execution: master node, primary and secondary nodes OP_INSTANCE_SHUTDOWN ++++++++++++++++++++ Stops an instance. :directory: instance-stop :env. vars: TIMEOUT :pre-execution: master node, primary and secondary nodes :post-execution: master node, primary and secondary nodes OP_INSTANCE_REBOOT ++++++++++++++++++ Reboots an instance. :directory: instance-reboot :env. vars: IGNORE_SECONDARIES, REBOOT_TYPE, SHUTDOWN_TIMEOUT :pre-execution: master node, primary and secondary nodes :post-execution: master node, primary and secondary nodes OP_INSTANCE_SET_PARAMS ++++++++++++++++++++++ Modifies the instance parameters. :directory: instance-modify :env. vars: NEW_DISK_TEMPLATE, RUNTIME_MEMORY :pre-execution: master node, primary and secondary nodes :post-execution: master node, primary and secondary nodes OP_INSTANCE_FAILOVER ++++++++++++++++++++ Failovers an instance. In the post phase INSTANCE_PRIMARY and INSTANCE_SECONDARY refer to the nodes that were repectively primary and secondary before failover. :directory: instance-failover :env. vars: IGNORE_CONSISTENCY, SHUTDOWN_TIMEOUT, OLD_PRIMARY, OLD_SECONDARY, NEW_PRIMARY, NEW_SECONDARY :pre-execution: master node, secondary node :post-execution: master node, primary and secondary nodes OP_INSTANCE_MIGRATE ++++++++++++++++++++ Migrates an instance. In the post phase INSTANCE_PRIMARY and INSTANCE_SECONDARY refer to the nodes that were repectively primary and secondary before migration. :directory: instance-migrate :env. vars: MIGRATE_LIVE, MIGRATE_CLEANUP, OLD_PRIMARY, OLD_SECONDARY, NEW_PRIMARY, NEW_SECONDARY :pre-execution: master node, primary and secondary nodes :post-execution: master node, primary and secondary nodes OP_INSTANCE_REMOVE ++++++++++++++++++ Remove an instance. :directory: instance-remove :env. vars: SHUTDOWN_TIMEOUT :pre-execution: master node :post-execution: master node, primary and secondary nodes OP_INSTANCE_GROW_DISK +++++++++++++++++++++ Grows the disk of an instance. :directory: disk-grow :env. vars: DISK, AMOUNT :pre-execution: master node, primary and secondary nodes :post-execution: master node, primary and secondary nodes OP_INSTANCE_RENAME ++++++++++++++++++ Renames an instance. :directory: instance-rename :env. vars: INSTANCE_NEW_NAME :pre-execution: master node, primary and secondary nodes :post-execution: master node, primary and secondary nodes OP_INSTANCE_MOVE ++++++++++++++++ Move an instance by data-copying. :directory: instance-move :env. vars: TARGET_NODE, SHUTDOWN_TIMEOUT :pre-execution: master node, primary and target nodes :post-execution: master node, primary and target nodes OP_INSTANCE_RECREATE_DISKS ++++++++++++++++++++++++++ Recreate an instance's missing disks. :directory: instance-recreate-disks :env. vars: only the standard instance vars :pre-execution: master node, primary and secondary nodes :post-execution: master node, primary and secondary nodes OP_INSTANCE_REPLACE_DISKS +++++++++++++++++++++++++ Replace the disks of an instance. :directory: mirrors-replace :env. vars: MODE, NEW_SECONDARY, OLD_SECONDARY :pre-execution: master node, primary and new secondary nodes :post-execution: master node, primary and new secondary nodes OP_INSTANCE_CHANGE_GROUP ++++++++++++++++++++++++ Moves an instance to another group. :directory: instance-change-group :env. vars: TARGET_GROUPS :pre-execution: master node :post-execution: master node Cluster operations ~~~~~~~~~~~~~~~~~~ OP_CLUSTER_POST_INIT ++++++++++++++++++++ This hook is called via a special "empty" LU right after cluster initialization. :directory: cluster-init :env. vars: none :pre-execution: none :post-execution: master node OP_CLUSTER_DESTROY ++++++++++++++++++ The post phase of this hook is called during the execution of destroy operation and not after its completion. :directory: cluster-destroy :env. vars: none :pre-execution: none :post-execution: master node OP_CLUSTER_VERIFY_GROUP +++++++++++++++++++++++ Verifies all nodes in a group. This is a special LU with regard to hooks, as the result of the opcode will be combined with the result of post-execution hooks, in order to allow administrators to enhance the cluster verification procedure. :directory: cluster-verify :env. vars: CLUSTER, MASTER, CLUSTER_TAGS, NODE_TAGS_ :pre-execution: none :post-execution: all nodes in a group OP_CLUSTER_RENAME +++++++++++++++++ Renames the cluster. :directory: cluster-rename :env. vars: NEW_NAME :pre-execution: master-node :post-execution: master-node OP_CLUSTER_SET_PARAMS +++++++++++++++++++++ Modifies the cluster parameters. :directory: cluster-modify :env. vars: NEW_VG_NAME :pre-execution: master node :post-execution: master node Virtual operation :pyeval:`constants.FAKE_OP_MASTER_TURNUP` +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ This doesn't correspond to an actual op-code, but it is called when the master IP is activated. :directory: master-ip-turnup :env. vars: MASTER_NETDEV, MASTER_IP, MASTER_NETMASK, CLUSTER_IP_VERSION :pre-execution: master node :post-execution: master node Virtual operation :pyeval:`constants.FAKE_OP_MASTER_TURNDOWN` +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ This doesn't correspond to an actual op-code, but it is called when the master IP is deactivated. :directory: master-ip-turndown :env. vars: MASTER_NETDEV, MASTER_IP, MASTER_NETMASK, CLUSTER_IP_VERSION :pre-execution: master node :post-execution: master node Obsolete operations ~~~~~~~~~~~~~~~~~~~ The following operations are no longer present or don't execute hooks anymore in Ganeti 2.0: - OP_INIT_CLUSTER - OP_MASTER_FAILOVER - OP_INSTANCE_ADD_MDDRBD - OP_INSTANCE_REMOVE_MDDRBD Environment variables --------------------- Note that all variables listed here are actually prefixed with *GANETI_* in order to provide a clear namespace. In addition, post-execution scripts receive another set of variables, prefixed with *GANETI_POST_*, representing the status after the opcode executed. Common variables ~~~~~~~~~~~~~~~~ This is the list of environment variables supported by all operations: HOOKS_VERSION Documents the hooks interface version. In case this doesnt match what the script expects, it should not run. The documents conforms to the version 2. HOOKS_PHASE One of *PRE* or *POST* denoting which phase are we in. CLUSTER The cluster name. MASTER The master node. OP_CODE One of the *OP_* values from the list of operations. OBJECT_TYPE One of ``INSTANCE``, ``NODE``, ``CLUSTER``. DATA_DIR The path to the Ganeti configuration directory (to read, for example, the *ssconf* files). Specialised variables ~~~~~~~~~~~~~~~~~~~~~ This is the list of variables which are specific to one or more operations. CLUSTER_IP_VERSION IP version of the master IP (4 or 6) INSTANCE_NAME The name of the instance which is the target of the operation. INSTANCE_BE_x,y,z,... Instance BE params. There is one variable per BE param. For instance, GANETI_INSTANCE_BE_auto_balance INSTANCE_DISK_TEMPLATE The disk type for the instance. NEW_DISK_TEMPLATE The new disk type for the instance. INSTANCE_DISK_COUNT The number of disks for the instance. INSTANCE_DISKn_SIZE The size of disk *n* for the instance. INSTANCE_DISKn_MODE Either *rw* for a read-write disk or *ro* for a read-only one. INSTANCE_HV_x,y,z,... Instance hypervisor options. There is one variable per option. For instance, GANETI_INSTANCE_HV_use_bootloader INSTANCE_HYPERVISOR The instance hypervisor. INSTANCE_NIC_COUNT The number of NICs for the instance. INSTANCE_NICn_BRIDGE The bridge to which the *n* -th NIC of the instance is attached. INSTANCE_NICn_IP The IP (if any) of the *n* -th NIC of the instance. INSTANCE_NICn_MAC The MAC address of the *n* -th NIC of the instance. INSTANCE_NICn_MODE The mode of the *n* -th NIC of the instance. INSTANCE_OS_TYPE The name of the instance OS. INSTANCE_PRIMARY The name of the node which is the primary for the instance. Note that for migrations/failovers, you shouldn't rely on this variable since the nodes change during the exectution, but on the OLD_PRIMARY/NEW_PRIMARY values. INSTANCE_SECONDARY Space-separated list of secondary nodes for the instance. Note that for migrations/failovers, you shouldn't rely on this variable since the nodes change during the exectution, but on the OLD_SECONDARY/NEW_SECONDARY values. INSTANCE_MEMORY The memory size (in MiBs) of the instance. INSTANCE_VCPUS The number of virtual CPUs for the instance. INSTANCE_STATUS The run status of the instance. MASTER_CAPABLE Whether a node is capable of being promoted to master. VM_CAPABLE Whether the node can host instances. MASTER_NETDEV Network device of the master IP MASTER_IP The master IP MASTER_NETMASK Netmask of the master IP INSTANCE_TAGS A space-delimited list of the instance's tags. NODE_NAME The target node of this operation (not the node on which the hook runs). NODE_PIP The primary IP of the target node (the one over which inter-node communication is done). NODE_SIP The secondary IP of the target node (the one over which drbd replication is done). This can be equal to the primary ip, in case the cluster is not dual-homed. FORCE This is provided by some operations when the user gave this flag. IGNORE_CONSISTENCY The user has specified this flag. It is used when failing over instances in case the primary node is down. ADD_MODE The mode of the instance create: either *create* for create from scratch or *import* for restoring from an exported image. SRC_NODE, SRC_PATH, SRC_IMAGE In case the instance has been added by import, these variables are defined and point to the source node, source path (the directory containing the image and the config file) and the source disk image file. NEW_SECONDARY The name of the node on which the new mirror component is being added (for replace disk). This can be the name of the current secondary, if the new mirror is on the same secondary. For migrations/failovers, this is the old primary node. OLD_SECONDARY The name of the old secondary in the replace-disks command. Note that this can be equal to the new secondary if the secondary node hasn't actually changed. For migrations/failovers, this is the new primary node. OLD_PRIMARY, NEW_PRIMARY For migrations/failovers, the old and respectively new primary nodes. These two mirror the NEW_SECONDARY/OLD_SECONDARY variables EXPORT_MODE The instance export mode. Either "remote" or "local". EXPORT_NODE The node on which the exported image of the instance was done. EXPORT_DO_SHUTDOWN This variable tells if the instance has been shutdown or not while doing the export. In the "was shutdown" case, it's likely that the filesystem is consistent, whereas in the "did not shutdown" case, the filesystem would need a check (journal replay or full fsck) in order to guarantee consistency. REMOVE_INSTANCE Whether the instance was removed from the node. SHUTDOWN_TIMEOUT Amount of time to wait for the instance to shutdown. TIMEOUT Amount of time to wait before aborting the op. OLD_NAME, NEW_NAME Old/new name of the node group. GROUP_NAME The name of the node group. NEW_ALLOC_POLICY The new allocation policy for the node group. CLUSTER_TAGS The list of cluster tags, space separated. NODE_TAGS_ The list of tags for node **, space separated. Examples -------- The startup of an instance will pass this environment to the hook script:: GANETI_CLUSTER=cluster1.example.com GANETI_DATA_DIR=/var/lib/ganeti GANETI_FORCE=False GANETI_HOOKS_PATH=instance-start GANETI_HOOKS_PHASE=post GANETI_HOOKS_VERSION=2 GANETI_INSTANCE_DISK0_MODE=rw GANETI_INSTANCE_DISK0_SIZE=128 GANETI_INSTANCE_DISK_COUNT=1 GANETI_INSTANCE_DISK_TEMPLATE=drbd GANETI_INSTANCE_MEMORY=128 GANETI_INSTANCE_NAME=instance2.example.com GANETI_INSTANCE_NIC0_BRIDGE=xen-br0 GANETI_INSTANCE_NIC0_IP= GANETI_INSTANCE_NIC0_MAC=aa:00:00:a5:91:58 GANETI_INSTANCE_NIC_COUNT=1 GANETI_INSTANCE_OS_TYPE=debootstrap GANETI_INSTANCE_PRIMARY=node3.example.com GANETI_INSTANCE_SECONDARY=node5.example.com GANETI_INSTANCE_STATUS=down GANETI_INSTANCE_VCPUS=1 GANETI_MASTER=node1.example.com GANETI_OBJECT_TYPE=INSTANCE GANETI_OP_CODE=OP_INSTANCE_STARTUP GANETI_OP_TARGET=instance2.example.com .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-daemons.rst0000644000000000000000000003353712271422343017201 0ustar00rootroot00000000000000========================== Ganeti daemons refactoring ========================== .. contents:: :depth: 2 This is a design document detailing the plan for refactoring the internal structure of Ganeti, and particularly the set of daemons it is divided into. Current state and shortcomings ============================== Ganeti is comprised of a growing number of daemons, each dealing with part of the tasks the cluster has to face, and communicating with the other daemons using a variety of protocols. Specifically, as of Ganeti 2.8, the situation is as follows: ``Master daemon (MasterD)`` It is responsible for managing the entire cluster, and it's written in Python. It is executed on a single node (the master node). It receives the commands given by the cluster administrator (through the remote API daemon or the command line tools) over the LUXI protocol. The master daemon is responsible for creating and managing the jobs that will execute such commands, and for managing the locks that ensure the cluster will not incur in race conditions. Each job is managed by a separate Python thread, that interacts with the node daemons via RPC calls. The master daemon is also responsible for managing the configuration of the cluster, changing it when required by some job. It is also responsible for copying the configuration to the other master candidates after updating it. ``RAPI daemon (RapiD)`` It is written in Python and runs on the master node only. It waits for requests issued remotely through the remote API protocol. Then, it forwards them, using the LUXI protocol, to the master daemon (if they are commands) or to the query daemon if they are queries about the configuration (including live status) of the cluster. ``Node daemon (NodeD)`` It is written in Python. It runs on all the nodes. It is responsible for receiving the master requests over RPC and execute them, using the appropriate backend (hypervisors, DRBD, LVM, etc.). It also receives requests over RPC for the execution of queries gathering live data on behalf of the query daemon. ``Configuration daemon (ConfD)`` It is written in Haskell. It runs on all the master candidates. Since the configuration is replicated only on the master node, this daemon exists in order to provide information about the configuration to nodes needing them. The requests are done through ConfD's own protocol, HMAC signed, implemented over UDP, and meant to be used by parallely querying all the master candidates (or a subset thereof) and getting the most up to date answer. This is meant as a way to provide a robust service even in case master is temporarily unavailable. ``Query daemon (QueryD)`` It is written in Haskell. It runs on all the master candidates. It replies to Luxi queries about the current status of the system, including live data it obtains by querying the node daemons through RPCs. ``Monitoring daemon (MonD)`` It is written in Haskell. It runs on all nodes, including the ones that are not vm-capable. It is meant to provide information on the status of the system. Such information is related only to the specific node the daemon is running on, and it is provided as JSON encoded data over HTTP, to be easily readable by external tools. The monitoring daemon communicates with ConfD to get information about the configuration of the cluster. The choice of communicating with ConfD instead of MasterD allows it to obtain configuration information even when the cluster is heavily degraded (e.g.: when master and some, but not all, of the master candidates are unreachable). The current structure of the Ganeti daemons is inefficient because there are many different protocols involved, and each daemon needs to be able to use multiple ones, and has to deal with doing different things, thus making sometimes unclear which daemon is responsible for performing a specific task. Also, with the current configuration, jobs are managed by the master daemon using python threads. This makes terminating a job after it has started a difficult operation, and it is the main reason why this is not possible yet. The master daemon currently has too many different tasks, that could be handled better if split among different daemons. Proposed changes ================ In order to improve on the current situation, a new daemon subdivision is proposed, and presented hereafter. .. digraph:: "new-daemons-structure" {rank=same; RConfD LuxiD;} {rank=same; Jobs rconfigdata;} node [shape=box] RapiD [label="RapiD [M]"] LuxiD [label="LuxiD [M]"] WConfD [label="WConfD [M]"] Jobs [label="Jobs [M]"] RConfD [label="RConfD [MC]"] MonD [label="MonD [All]"] NodeD [label="NodeD [All]"] Clients [label="gnt-*\nclients [M]"] p1 [shape=none, label=""] p2 [shape=none, label=""] p3 [shape=none, label=""] p4 [shape=none, label=""] configdata [shape=none, label="config.data"] rconfigdata [shape=none, label="config.data\n[MC copy]"] locksdata [shape=none, label="locks.data"] RapiD -> LuxiD [label="LUXI"] LuxiD -> WConfD [label="WConfD\nproto"] LuxiD -> Jobs [label="fork/exec"] Jobs -> WConfD [label="WConfD\nproto"] Jobs -> NodeD [label="RPC"] LuxiD -> NodeD [label="RPC"] rconfigdata -> RConfD configdata -> rconfigdata [label="sync via\nNodeD RPC"] WConfD -> NodeD [label="RPC"] WConfD -> configdata WConfD -> locksdata MonD -> RConfD [label="RConfD\nproto"] Clients -> LuxiD [label="LUXI"] p1 -> MonD [label="MonD proto"] p2 -> RapiD [label="RAPI"] p3 -> RConfD [label="RConfD\nproto"] p4 -> Clients [label="CLI"] ``LUXI daemon (LuxiD)`` It will be written in Haskell. It will run on the master node and it will be the only LUXI server, replying to all the LUXI queries. These includes both the queries about the live configuration of the cluster, previously served by QueryD, and the commands actually changing the status of the cluster by submitting jobs. Therefore, this daemon will also be the one responsible with managing the job queue. When a job needs to be executed, the LuxiD will spawn a separate process tasked with the execution of that specific job, thus making it easier to terminate the job itself, if needeed. When a job requires locks, LuxiD will request them from WConfD. In order to keep availability of the cluster in case of failure of the master node, LuxiD will replicate the job queue to the other master candidates, by RPCs to the NodeD running there (the choice of RPCs for this task might be reviewed at a second time, after implementing this design). ``Configuration management daemon (WConfD)`` It will run on the master node and it will be responsible for the management of the authoritative copy of the cluster configuration (that is, it will be the daemon actually modifying the ``config.data`` file). All the requests of configuration changes will have to pass through this daemon, and will be performed using a LUXI-like protocol ("WConfD proto" in the graph. The exact protocol will be defined in the separate design document that will detail the WConfD separation). Having a single point of configuration management will also allow Ganeti to get rid of possible race conditions due to concurrent modifications of the configuration. When the configuration is updated, it will have to push the received changes to the other master candidates, via RPCs, so that RConfD daemons and (in case of a failure on the master node) the WConfD daemon on the new master can access an up-to-date version of it (the choice of RPCs for this task might be reviewed at a second time). This daemon will also be the one responsible for managing the locks, granting them to the jobs requesting them, and taking care of freeing them up if the jobs holding them crash or are terminated before releasing them. In order to do this, each job, after being spawned by LuxiD, will open a local unix socket that will be used to communicate with it, and will be destroyed when the job terminates. LuxiD will be able to check, after a timeout, whether the job is still running by connecting here, and to ask WConfD to forcefully remove the locks if the socket is closed. Also, WConfD should hold a serialized list of the locks and their owners in a file (``locks.data``), so that it can keep track of their status in case it crashes and needs to be restarted (by asking LuxiD which of them are still running). Interaction with this daemon will be performed using Unix sockets. ``Configuration query daemon (RConfD)`` It is written in Haskell, and it corresponds to the old ConfD. It will run on all the master candidates and it will serve information about the the static configuration of the cluster (the one contained in ``config.data``). The provided information will be highly available (as in: a response will be available as long as a stable-enough connection between the client and at least one working master candidate is available) and its freshness will be best effort (the most recent reply from any of the master candidates will be returned, but it might still be older than the one available through WConfD). The information will be served through the ConfD protocol. ``Rapi daemon (RapiD)`` It remains basically unchanged, with the only difference that all of its LUXI query are directed towards LuxiD instead of being split between MasterD and QueryD. ``Monitoring daemon (MonD)`` It remains unaffected by the changes in this design document. It will just get some of the data it needs from RConfD instead of the old ConfD, but the interfaces of the two are identical. ``Node daemon (NodeD)`` It remains unaffected by the changes proposed in the design document. The only difference being that it will receive its RPCs from LuxiD (for job queue replication), from WConfD (for configuration replication) and for the processes executing single jobs (for all the operations to be performed by nodes) instead of receiving them just from MasterD. This restructuring will allow us to reorganize and improve the codebase, introducing cleaner interfaces and giving well defined and more restricted tasks to each daemon. Furthermore, having more well-defined interfaces will allow us to have easier upgrade procedures, and to work towards the possibility of upgrading single components of a cluster one at a time, without the need for immediately upgrading the entire cluster in a single step. Implementation ============== While performing this refactoring, we aim to increase the amount of Haskell code, thus benefiting from the additional type safety provided by its wide compile-time checks. In particular, all the job queue management and the configuration management daemon will be written in Haskell, taking over the role currently fulfilled by Python code executed as part of MasterD. The changes describe by this design document are quite extensive, therefore they will not be implemented all at the same time, but through a sequence of steps, leaving the codebase in a consistent and usable state. #. Rename QueryD to LuxiD. A part of LuxiD, the one replying to configuration queries including live information about the system, already exists in the form of QueryD. This is being renamed to LuxiD, and will form the first part of the new daemon. NB: this is happening starting from Ganeti 2.8. At the beginning, only the already existing queries will be replied to by LuxiD. More queries will be implemented in the next versions. #. Let LuxiD be the interface for the queries and MasterD be their executor. Currently, MasterD is the only responsible for receiving and executing LUXI queries, and for managing the jobs they create. Receiving the queries and managing the job queue will be extracted from MasterD into LuxiD. Actually executing jobs will still be done by MasterD, that contains all the logic for doing that and for properly managing locks and the configuration. A separate design document will detail how the system will decide which jobs to send over for execution, and how to rate-limit them. #. Extract WConfD from MasterD. The logic for managing the configuration file is factored out to the dedicated WConfD daemon. All configuration changes, currently executed directly by MasterD, will be changed to be IPC requests sent to the new daemon. #. Extract locking management from MasterD. The logic for managing and granting locks is extracted to WConfD as well. Locks will not be taken directly anymore, but asked via IPC to WConfD. This step can be executed on its own or at the same time as the previous one. #. Jobs are executed as processes. The logic for running jobs is rewritten so that each job can be managed by an independent process. LuxiD will spawn a new (Python) process for every single job. The RPCs will remain unchanged, and the LU code will stay as is as much as possible. MasterD will cease to exist as a deamon on its own at this point, but not before. Further considerations ====================== There is a possibility that a job will finish performing its task while LuxiD and/or WConfD will not be available. In order to deal with this situation, each job will write the results of its execution on a file. The name of this file will be known to LuxiD before starting the job, and will be stored together with the job ID, and the name of the job-unique socket. The job, upon ending its execution, will signal LuxiD (through the socket), so that it can read the result of the execution and release the locks as needed. In case LuxiD is not available at that time, the job will just terminate without signalling it, and writing the results on file as usual. When a new LuxiD becomes available, it will have the most up-to-date list of running jobs (received via replication from the former LuxiD), and go through it, cleaning up all the terminated jobs. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/upgrade.rst0000644000000000000000000002542312271443364015734 0ustar00rootroot00000000000000.. This file is automatically updated at build time from UPGRADE. .. Do not edit. Upgrade notes ============= .. highlight:: shell-example This document details the steps needed to upgrade a cluster to newer versions of Ganeti. As a general rule the node daemons need to be restarted after each software upgrade; if using the provided example init.d script, this means running the following command on all nodes:: $ /etc/init.d/ganeti restart 2.1 and above ------------- Starting with Ganeti 2.0, upgrades between revisions (e.g. 2.1.0 to 2.1.1) should not need manual intervention. As a safety measure, minor releases (e.g. 2.1.3 to 2.2.0) require the ``cfgupgrade`` command for changing the configuration version. Below you find the steps necessary to upgrade between minor releases. To run commands on all nodes, the `distributed shell (dsh) `_ can be used, e.g. ``dsh -M -F 8 -f /var/lib/ganeti/ssconf_online_nodes gnt-cluster --version``. #. Ensure no jobs are running (master node only):: $ gnt-job list #. Pause the watcher for an hour (master node only):: $ gnt-cluster watcher pause 1h #. Stop all daemons on all nodes:: $ /etc/init.d/ganeti stop #. Backup old configuration (master node only):: $ tar czf /var/lib/ganeti-$(date +\%FT\%T).tar.gz -C /var/lib ganeti #. Install new Ganeti version on all nodes #. Run cfgupgrade on the master node:: $ /usr/lib/ganeti/tools/cfgupgrade --verbose --dry-run $ /usr/lib/ganeti/tools/cfgupgrade --verbose (``cfgupgrade`` supports a number of parameters, run it with ``--help`` for more information) #. Upgrade the directory permissions on all nodes:: $ /usr/lib/ganeti/ensure-dirs --full-run #. Create the (missing) required users and make users part of the required groups on all nodes:: $ /usr/lib/ganeti/tools/users-setup This will ask for confirmation. To execute directly, add the ``--yes-do-it`` option. #. Restart daemons on all nodes:: $ /etc/init.d/ganeti restart #. Re-distribute configuration (master node only):: $ gnt-cluster redist-conf #. If you use file storage, check that the ``/etc/ganeti/file-storage-paths`` is correct on all nodes. For security reasons it's not copied automatically, but it can be copied manually via:: $ gnt-cluster copyfile /etc/ganeti/file-storage-paths #. Restart daemons again on all nodes:: $ /etc/init.d/ganeti restart #. Enable the watcher again (master node only):: $ gnt-cluster watcher continue #. Verify cluster (master node only):: $ gnt-cluster verify Reverting an upgrade ~~~~~~~~~~~~~~~~~~~~ For going back between revisions (e.g. 2.1.1 to 2.1.0) no manual intervention is required, as for upgrades. Starting from version 2.8, ``cfgupgrade`` supports ``--downgrade`` option to bring the configuration back to the previous stable version. This is useful if you upgrade Ganeti and after some time you run into problems with the new version. You can downgrade the configuration without losing the changes made since the upgrade. Any feature not supported by the old version will be removed from the configuration, of course, but you get a warning about it. If there is any new feature and you haven't changed from its default value, you don't have to worry about it, as it will get the same value whenever you'll upgrade again. The procedure is similar to upgrading, but please notice that you have to revert the configuration **before** installing the old version. #. Ensure no jobs are running (master node only):: $ gnt-job list #. Pause the watcher for an hour (master node only):: $ gnt-cluster watcher pause 1h #. Stop all daemons on all nodes:: $ /etc/init.d/ganeti stop #. Backup old configuration (master node only):: $ tar czf /var/lib/ganeti-$(date +\%FT\%T).tar.gz -C /var/lib ganeti #. Run cfgupgrade on the master node:: $ /usr/lib/ganeti/tools/cfgupgrade --verbose --downgrade --dry-run $ /usr/lib/ganeti/tools/cfgupgrade --verbose --downgrade You may want to copy all the messages about features that have been removed during the downgrade, in case you want to restore them when upgrading again. #. Install the old Ganeti version on all nodes NB: in Ganeti 2.8, the ``cmdlib.py`` file was split into a series of files contained in the ``cmdlib`` directory. If Ganeti is installed from sources and not from a package, while downgrading Ganeti to a pre-2.8 version it is important to remember to remove the ``cmdlib`` directory from the directory containing the Ganeti python files (which usually is ``${PREFIX}/lib/python${VERSION}/dist-packages/ganeti``). A simpler upgrade/downgrade procedure will be made available in future versions of Ganeti. #. Restart daemons on all nodes:: $ /etc/init.d/ganeti restart #. Re-distribute configuration (master node only):: $ gnt-cluster redist-conf #. Restart daemons again on all nodes:: $ /etc/init.d/ganeti restart #. Enable the watcher again (master node only):: $ gnt-cluster watcher continue #. Verify cluster (master node only):: $ gnt-cluster verify 2.0 releases ------------ 2.0.3 to 2.0.4 ~~~~~~~~~~~~~~ No changes needed except restarting the daemon; but rollback to 2.0.3 might require configuration editing. If you're using Xen-HVM instances, please double-check the network configuration (``nic_type`` parameter) as the defaults might have changed: 2.0.4 adds any missing configuration items and depending on the version of the software the cluster has been installed with, some new keys might have been added. 2.0.1 to 2.0.2/2.0.3 ~~~~~~~~~~~~~~~~~~~~ Between 2.0.1 and 2.0.2 there have been some changes in the handling of block devices, which can cause some issues. 2.0.3 was then released which adds two new options/commands to fix this issue. If you use DRBD-type instances and see problems in instance start or activate-disks with messages from DRBD about "lower device too small" or similar, it is recoomended to: #. Run ``gnt-instance activate-disks --ignore-size $instance`` for each of the affected instances #. Then run ``gnt-cluster repair-disk-sizes`` which will check that instances have the correct disk sizes 1.2 to 2.0 ---------- Prerequisites: - Ganeti 1.2.7 is currently installed - All instances have been migrated from DRBD 0.7 to DRBD 8.x (i.e. no ``remote_raid1`` disk template) - Upgrade to Ganeti 2.0.0~rc2 or later (~rc1 and earlier don't have the needed upgrade tool) In the below steps, replace :file:`/var/lib` with ``$libdir`` if Ganeti was not installed with this prefix (e.g. :file:`/usr/local/var`). Same for :file:`/usr/lib`. Execution (all steps are required in the order given): #. Make a backup of the current configuration, for safety:: $ cp -a /var/lib/ganeti /var/lib/ganeti-1.2.backup #. Stop all instances:: $ gnt-instance stop --all #. Make sure no DRBD device are in use, the following command should show no active minors:: $ gnt-cluster command grep cs: /proc/drbd | grep -v cs:Unconf #. Stop the node daemons and rapi daemon on all nodes (note: should be logged in not via the cluster name, but the master node name, as the command below will remove the cluster ip from the master node):: $ gnt-cluster command /etc/init.d/ganeti stop #. Install the new software on all nodes, either from packaging (if available) or from sources; the master daemon will not start but give error messages about wrong configuration file, which is normal #. Upgrade the configuration file:: $ /usr/lib/ganeti/tools/cfgupgrade12 -v --dry-run $ /usr/lib/ganeti/tools/cfgupgrade12 -v #. Make sure ``ganeti-noded`` is running on all nodes (and start it if not) #. Start the master daemon:: $ ganeti-masterd #. Check that a simple node-list works:: $ gnt-node list #. Redistribute updated configuration to all nodes:: $ gnt-cluster redist-conf $ gnt-cluster copyfile /var/lib/ganeti/known_hosts #. Optional: if needed, install RAPI-specific certificates under :file:`/var/lib/ganeti/rapi.pem` and run:: $ gnt-cluster copyfile /var/lib/ganeti/rapi.pem #. Run a cluster verify, this should show no problems:: $ gnt-cluster verify #. Remove some obsolete files:: $ gnt-cluster command rm /var/lib/ganeti/ssconf_node_pass $ gnt-cluster command rm /var/lib/ganeti/ssconf_hypervisor #. Update the xen pvm (if this was a pvm cluster) setting for 1.2 compatibility:: $ gnt-cluster modify -H xen-pvm:root_path=/dev/sda #. Depending on your setup, you might also want to reset the initrd parameter:: $ gnt-cluster modify -H xen-pvm:initrd_path=/boot/initrd-2.6-xenU #. Reset the instance autobalance setting to default:: $ for i in $(gnt-instance list -o name --no-headers); do \ gnt-instance modify -B auto_balance=default $i; \ done #. Optional: start the RAPI demon:: $ ganeti-rapi #. Restart instances:: $ gnt-instance start --force-multiple --all At this point, ``gnt-cluster verify`` should show no errors and the migration is complete. 1.2 releases ------------ 1.2.4 to any other higher 1.2 version ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ No changes needed. Rollback will usually require manual edit of the configuration file. 1.2.3 to 1.2.4 ~~~~~~~~~~~~~~ No changes needed. Note that going back from 1.2.4 to 1.2.3 will require manual edit of the configuration file (since we added some HVM-related new attributes). 1.2.2 to 1.2.3 ~~~~~~~~~~~~~~ No changes needed. Note that the drbd7-to-8 upgrade tool does a disk format change for the DRBD metadata, so in theory this might be **risky**. It is advised to have (good) backups before doing the upgrade. 1.2.1 to 1.2.2 ~~~~~~~~~~~~~~ No changes needed. 1.2.0 to 1.2.1 ~~~~~~~~~~~~~~ No changes needed. Only some bugfixes and new additions that don't affect existing clusters. 1.2.0 beta 3 to 1.2.0 ~~~~~~~~~~~~~~~~~~~~~ No changes needed. 1.2.0 beta 2 to beta 3 ~~~~~~~~~~~~~~~~~~~~~~ No changes needed. A new version of the debian-etch-instance OS (0.3) has been released, but upgrading it is not required. 1.2.0 beta 1 to beta 2 ~~~~~~~~~~~~~~~~~~~~~~ Beta 2 switched the config file format to JSON. Steps to upgrade: #. Stop the daemons (``/etc/init.d/ganeti stop``) on all nodes #. Disable the cron job (default is :file:`/etc/cron.d/ganeti`) #. Install the new version #. Make a backup copy of the config file #. Upgrade the config file using the following command:: $ /usr/share/ganeti/cfgupgrade --verbose /var/lib/ganeti/config.data #. Start the daemons and run ``gnt-cluster info``, ``gnt-node list`` and ``gnt-instance list`` to check if the upgrade process finished successfully The OS definition also need to be upgraded. There is a new version of the debian-etch-instance OS (0.2) that goes along with beta 2. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/locking.rst0000644000000000000000000000630312230001635015711 0ustar00rootroot00000000000000Ganeti locking ============== Introduction ------------ This document describes lock order dependencies in Ganeti. It is divided by functional sections Opcode Execution Locking ------------------------ These locks are declared by Logical Units (LUs) (in cmdlib.py) and acquired by the Processor (in mcpu.py) with the aid of the Ganeti Locking Library (locking.py). They are acquired in the following order: * BGL: this is the Big Ganeti Lock, it exists for retrocompatibility. New LUs acquire it in a shared fashion, and are able to execute all toghether (baring other lock waits) while old LUs acquire it exclusively and can only execute one at a time, and not at the same time with new LUs. * Instance locks: can be declared in ExpandNames() or DeclareLocks() by an LU, and have the same name as the instance itself. They are acquired as a set. Internally the locking library acquired them in alphabetical order. * Node locks: can be declared in ExpandNames() or DeclareLocks() by an LU, and have the same name as the node itself. They are acquired as a set. Internally the locking library acquired them in alphabetical order. Given this order it's possible to safely acquire a set of instances, and then the nodes they reside on. The ConfigWriter (in config.py) is also protected by a SharedLock, which is shared by functions that read the config and acquired exclusively by functions that modify it. Since the ConfigWriter calls rpc.call_upload_file to all nodes to distribute the config without holding the node locks, this call must be able to execute on the nodes in parallel with other operations (but not necessarily concurrently with itself on the same file, as inside the ConfigWriter this is called with the internal config lock held. Job Queue Locking ----------------- The job queue is designed to be thread-safe. This means that its public functions can be called from any thread. The job queue can be called from functions called by the queue itself (e.g. logical units), but special attention must be paid not to create deadlocks or an invalid state. The single queue lock is used from all classes involved in the queue handling. During development we tried to split locks, but deemed it to be too dangerous and difficult at the time. Job queue functions acquiring the lock can be safely called from all the rest of the code, as the lock is released before leaving the job queue again. Unlocked functions should only be called from job queue related classes (e.g. in jqueue.py) and the lock must be acquired beforehand. In the job queue worker (``_JobQueueWorker``), the lock must be released before calling the LU processor. Otherwise a deadlock can occur when log messages are added to opcode results. Node Daemon Locking ------------------- The node daemon contains a lock for the job queue. In order to avoid conflicts and/or corruption when an eventual master daemon or another node daemon is running, it must be held for all job queue operations There's one special case for the node daemon running on the master node. If grabbing the lock in exclusive fails on startup, the code assumes all checks have been done by the process keeping the lock. .. vim: set textwidth=72 : ganeti-2.9.3/doc/design-http-server.rst0000644000000000000000000001267112230001635020022 0ustar00rootroot00000000000000========================================= Design for replacing Ganeti's HTTP server ========================================= .. contents:: :depth: 4 .. _http-srv-shortcomings: Current state and shortcomings ------------------------------ The :doc:`new design for import/export ` depends on an HTTP server. Ganeti includes a home-grown HTTP server based on Python's ``BaseHTTPServer``. While it served us well so far, it only implements the very basics of the HTTP protocol. It is, for example, not structured well enough to support chunked transfers (:rfc:`2616`, section 3.6.1), which would have some advantages. In addition, it has not been designed for sending large responses. In the case of the node daemon the HTTP server can not easily be separated from the actual backend code and therefore must run as "root". The RAPI daemon does request parsing in the same process as talking to the master daemon via LUXI. Proposed changes ---------------- The proposal is to start using a full-fledged HTTP server in Ganeti and to run Ganeti's code as `FastCGI `_ applications. Reasons: - Simplify Ganeti's code by delegating the details of HTTP and SSL to another piece of software - Run HTTP frontend and handler backend as separate processes and users (esp. useful for node daemon, but also import/export and Remote API) - Allows implementation of :ref:`rpc-feedback` Software choice +++++++++++++++ Theoretically any server able of speaking FastCGI to a backend process could be used. However, to keep the number of steps required for setting up a new cluster at roughly the same level, the implementation will be geared for one specific HTTP server at the beginning. Support for other HTTP servers can still be implemented. After a rough selection of available HTTP servers `lighttpd `_ and `nginx `_ were the most likely candidates. Both are `widely used`_ and tested. .. _widely used: http://news.netcraft.com/archives/2011/01/12/ january-2011-web-server-survey-4.html Nginx' `original documentation `_ is in Russian, translations are `available in a Wiki `_. Nginx does not support old-style CGI programs. The author found `lighttpd's documentation `_ easier to understand and was able to configure a test server quickly. This, together with the support for more technologies, made deciding easier. With its use as a public-facing web server on a large number of websites (and possibly more behind proxies), lighttpd should be a safe choice. Unlike other webservers, such as the Apache HTTP Server, lighttpd's codebase is of manageable size. Initially the HTTP server would only be used for import/export transfers, but its use can be expanded to the Remote API and node daemon (see :ref:`rpc-feedback`). To reduce the attack surface, an option will be provided to configure services (e.g. import/export) to only listen on certain network interfaces. .. _rpc-feedback: RPC feedback ++++++++++++ HTTP/1.1 supports chunked transfers (:rfc:`2616`, section 3.6.1). They could be used to provide feedback from node daemons to the master, similar to the feedback from jobs. A good use would be to provide feedback to the user during long-running operations, e.g. downloading an instance's data from another cluster. .. _requirement: http://www.python.org/dev/peps/pep-0333/ #buffering-and-streaming WSGI 1.0 (:pep:`333`) includes the following `requirement`_: WSGI servers, gateways, and middleware **must not** delay the transmission of any block; they **must** either fully transmit the block to the client, or guarantee that they will continue transmission even while the application is producing its next block This behaviour was confirmed to work with lighttpd and the :ref:`flup ` library. FastCGI by itself has no such guarantee; webservers with buffering might require artificial padding to force the message to be transmitted. The node daemon can send JSON-encoded messages back to the master daemon by separating them using a predefined character (see :ref:`LUXI `). The final message contains the method's result. pycURL passes each received chunk to the callback set as ``CURLOPT_WRITEFUNCTION``. Once a message is complete, the master daemon can pass it to a callback function inside the job, which then decides on what to do (e.g. forward it as job feedback to the user). A more detailed design may have to be written before deciding whether to implement RPC feedback. .. _http-software-req: Software requirements +++++++++++++++++++++ - lighttpd 1.4.24 or above built with OpenSSL support (earlier versions `don't support SSL client certificates `_) - `flup `_ for FastCGI Lighttpd SSL configuration ++++++++++++++++++++++++++ .. highlight:: lighttpd The following sample shows how to configure SSL with client certificates in Lighttpd:: $SERVER["socket"] == ":443" { ssl.engine = "enable" ssl.pemfile = "server.pem" ssl.ca-file = "ca.pem" ssl.use-sslv2 = "disable" ssl.cipher-list = "HIGH:-DES:-3DES:-EXPORT:-ADH" ssl.verifyclient.activate = "enable" ssl.verifyclient.enforce = "enable" ssl.verifyclient.exportcert = "enable" ssl.verifyclient.username = "SSL_CLIENT_S_DN_CN" } .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-node-add.rst0000644000000000000000000001313012244641676017225 0ustar00rootroot00000000000000Design for adding a node to a cluster ===================================== .. contents:: :depth: 3 Current state and shortcomings ------------------------------ Before a node can be added to a cluster, its SSH daemon must be re-configured to use the cluster-wide SSH host key. Ganeti 2.3.0 changed the way this is done by moving all related code to a separate script, ``tools/setup-ssh``, using Paramiko. Before all such configuration was done from ``lib/bootstrap.py`` using the system's own SSH client and a shell script given to said client through parameters. Both solutions controlled all actions on the connecting machine; the newly added node was merely executing commands. This implies and requires a tight coupling and equality between nodes (e.g. paths to files being the same). Most of the logic and error handling is also done on the connecting machine. Once a node's SSH daemon has been configured, more than 25 files need to be copied using ``scp`` before the node daemon can be started. No verification is being done before files are copied. Once the node daemon is started, an opcode is submitted to the master daemon, which will then copy more files, such as the configuration and job queue for master candidates, using RPC. This process is somewhat fragile and requires initiating many SSH connections. Proposed changes ---------------- SSH ~~~ The main goal is to move more logic to the newly added node. Instead of having a relatively large script executed on the master node, most of it is moved over to the added node. A new script named ``prepare-node-join`` is added. It receives a JSON data structure (defined :ref:`below `) on its standard input. Once the data has been successfully decoded, it proceeds to configure the local node's SSH daemon and root's SSH settings, after which the SSH daemon is restarted. All the master node has to do to add a new node is to gather all required data, build the data structure, and invoke the script on the node to be added. This will enable us to once again use the system's own SSH client and to drop the dependency on Paramiko for Ganeti itself (``ganeti-listrunner`` is going to continue using Paramiko). Eventually ``setup-ssh`` can be removed. Node daemon ~~~~~~~~~~~ Similar to SSH setup changes, the process of copying files and starting the node daemon will be moved into a dedicated program. On its standard input it will receive a standardized JSON structure (defined :ref:`below `). Once the input data has been successfully decoded and the received values were verified for sanity, the program proceeds to write the values to files and then starts the node daemon (``ganeti-noded``). To add a new node to the cluster, the master node will have to gather all values, build the data structure, and then invoke the newly added ``node-daemon-setup`` program via SSH. In this way only a single SSH connection is needed and the values can be verified before being written to files. If the program exits successfully, the node is ready to be added to the master daemon's configuration. The node daemon will be running, but ``OpNodeAdd`` needs to be run before it becomes a full node. The opcode will copy more files, such as the :doc:`RAPI certificate `. Data structures --------------- .. _prepare-node-join-json: JSON structure for SSH setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The data is given in an object containing the keys described below. Unless specified otherwise, all entries are optional. ``cluster_name`` Required string with the cluster name. If a local cluster name is found, the join process is aborted unless the passed cluster name matches the local name. ``node_daemon_certificate`` Public part of cluster's node daemon certificate in PEM format. If a local node certificate and key is found, the join process is aborted unless this passed public part can be verified with the local key. ``ssh_host_key`` List containing public and private parts of SSH host key. See below for definition. ``ssh_root_key`` List containing public and private parts of root's key for SSH authorization. See below for definition. Lists of SSH keys use a tuple with three values. The first describes the key variant (``rsa`` or ``dsa``). The second and third are the private and public part of the key. Example: .. highlight:: javascript :: [ ("rsa", "-----BEGIN RSA PRIVATE KEY-----...", "ssh-rss AAAA..."), ("dsa", "-----BEGIN DSA PRIVATE KEY-----...", "ssh-dss AAAA..."), ] .. _node-daemon-setup-json: JSON structure for node daemon setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The data is given in an object containing the keys described below. Unless specified otherwise, all entries are optional. ``cluster_name`` Required string with the cluster name. If a local cluster name is found, the join process is aborted unless the passed cluster name matches the local name. The cluster name is also included in the dictionary given via the ``ssconf`` entry. ``node_daemon_certificate`` Public and private part of cluster's node daemon certificate in PEM format. If a local node certificate is found, the process is aborted unless it matches. ``ssconf`` Dictionary with ssconf names and their values. Both are strings. Example: .. highlight:: javascript :: { "cluster_name": "cluster.example.com", "master_ip": "192.168.2.1", "master_netdev": "br0", # … } ``start_node_daemon`` Boolean denoting whether the node daemon should be started (or restarted if it was running for some reason). .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-device-uuid-name.rst0000644000000000000000000000554112244641676020702 0ustar00rootroot00000000000000========================================== Design for adding UUID and name to devices ========================================== .. contents:: :depth: 4 This is a design document about adding UUID and name to instance devices (Disks/NICs) and the ability to reference them by those identifiers. Current state and shortcomings ============================== Currently, the only way to refer to a device (Disk/NIC) is by its index inside the VM (e.g. gnt-instance modify --disk 2:remove). Using indices as identifiers has the drawback that addition/removal of a device results in changing the identifiers(indices) of other devices and makes the net effect of commands depend on their strict ordering. A device reference is not absolute, meaning an external entity controlling Ganeti, e.g., over RAPI, cannot keep permanent identifiers for referring to devices, nor can it have more than one outstanding commands, since their order of execution is not guaranteed. Proposed Changes ================ To be able to reference a device in a unique way, we propose to extend Disks and NICs by assigning to them a UUID and a name. The UUID will be assigned by Ganeti upon creation, while the name will be an optional user parameter. Renaming a device will also be supported. Commands (e.g. `gnt-instance modify`) will be able to reference each device by its index, UUID, or name. To be able to refer to devices by name, we must guarantee that device names are unique. Unlike other objects (instances, networks, nodegroups, etc.), NIC and Disk objects will not have unique names across the cluster, since they are still not independent entities, but rather part of the instance object. This makes global uniqueness of names hard to achieve at this point. Instead their names will be unique at instance level. Apart from unique device names, we must also guarantee that a device name can not be the UUID of another device. Also, to remove ambiguity while supporting both indices and names as identifiers, we forbid purely numeric device names. Implementation Details ====================== Modify OpInstanceSetParams to accept not only indexes, but also device names and UUIDs. So, the accepted NIC and disk modifications will have the following format: identifier:action,key=value where, from now on, identifier can be an index (-1 for the last device), UUID, or name and action should be add, modify, or remove. Configuration Changes ~~~~~~~~~~~~~~~~~~~~~ Disk and NIC config objects get two extra slots: - uuid - name Instance Queries ~~~~~~~~~~~~~~~~~ We will extend the query mechanism to expose names and UUIDs of NICs and Disks. Hook Variables ~~~~~~~~~~~~~~ We will expose the name of NICs and Disks to the hook environment of instance-related operations: ``INSTANCE_NIC%d_NAME`` ``INSTANCE_DISK%d_NAME`` .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-multi-reloc.rst0000644000000000000000000001170112230001635017764 0ustar00rootroot00000000000000==================================== Moving instances accross node groups ==================================== This design document explains the changes needed in Ganeti to perform instance moves across node groups. Reader familiarity with the following existing documents is advised: - :doc:`Current IAllocator specification ` - :doc:`Shared storage model in 2.3+ ` Motivation and and design proposal ================================== At the moment, moving instances away from their primary or secondary nodes with the ``relocate`` and ``multi-evacuate`` IAllocator calls restricts target nodes to those on the same node group. This ensures a mobility domain is never crossed, and allows normal operation of each node group to be confined within itself. It is desirable, however, to have a way of moving instances across node groups so that, for example, it is possible to move a set of instances to another group for policy reasons, or completely empty a given group to perform maintenance operations. To implement this, we propose the addition of new IAllocator calls to compute inter-group instance moves and group-aware node evacuation, taking into account mobility domains as appropriate. The interface proposed below should be enough to cover the use cases mentioned above. With the implementation of this design proposal, the previous ``multi-evacuate`` mode will be deprecated. .. _multi-reloc-detailed-design: Detailed design =============== All requests honor the groups' ``alloc_policy`` attribute. Changing instance's groups -------------------------- Takes a list of instances and a list of node group UUIDs; the instances will be moved away from their current group, to any of the groups in the target list. All instances need to have their primary node in the same group, which may not be a target group. If the target group list is empty, the request is simply "change group" and the instances are placed in any group but their original one. Node evacuation --------------- Evacuates instances off their primary nodes. The evacuation mode can be given as ``primary-only``, ``secondary-only`` or ``all``. The call is given a list of instances whose primary nodes need to be in the same node group. The returned nodes need to be in the same group as the original primary node. .. _multi-reloc-result: Result ------ In all storage models, an inter-group move can be modeled as a sequence of **replace secondary**, **migration** and **failover** operations (when shared storage is used, they will all be failover or migration operations within the corresponding mobility domain). The result of the operations described above must contain two lists of instances and a list of jobs (each of which is a list of serialized opcodes) to actually execute the operation. :doc:`Job dependencies ` can be used to force jobs to run in a certain order while still making use of parallelism. The two lists of instances describe which instances could be moved/migrated and which couldn't for some reason ("unsuccessful"). The union of the instances in the two lists must be equal to the set of instances given in the original request. The successful list of instances contains elements as follows:: (instance name, target group name, [chosen node names]) The choice of names is simply for readability reasons (for example, Ganeti could log the computed solution in the job information) and for being able to check (manually) for consistency that the generated opcodes match the intended target groups/nodes. Note that for the node-evacuate operation, the group is not changed, but it should still be returned as such (as it's easier to have the same return type for both operations). The unsuccessful list of instances contains elements as follows:: (instance name, explanation) where ``explanation`` is a string describing why the plugin was not able to relocate the instance. The client is given a list of job IDs (see the :doc:`design for LU-generated jobs `) which it can watch. Failures should be reported to the user. .. highlight:: python Example job list:: [ # First job [ { "OP_ID": "OP_INSTANCE_MIGRATE", "instance_name": "inst1.example.com", }, { "OP_ID": "OP_INSTANCE_MIGRATE", "instance_name": "inst2.example.com", }, ], # Second job [ { "OP_ID": "OP_INSTANCE_REPLACE_DISKS", "depends": [ [-1, ["success"]], ], "instance_name": "inst2.example.com", "mode": "replace_new_secondary", "remote_node": "node4.example.com", }, ], # Third job [ { "OP_ID": "OP_INSTANCE_FAILOVER", "depends": [ [-2, []], ], "instance_name": "inst8.example.com", }, ], ] Accepted opcodes: - ``OP_INSTANCE_FAILOVER`` - ``OP_INSTANCE_MIGRATE`` - ``OP_INSTANCE_REPLACE_DISKS`` .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-2.0.rst0000644000000000000000000023120312244641676016054 0ustar00rootroot00000000000000================= Ganeti 2.0 design ================= This document describes the major changes in Ganeti 2.0 compared to the 1.2 version. The 2.0 version will constitute a rewrite of the 'core' architecture, paving the way for additional features in future 2.x versions. .. contents:: :depth: 3 Objective ========= Ganeti 1.2 has many scalability issues and restrictions due to its roots as software for managing small and 'static' clusters. Version 2.0 will attempt to remedy first the scalability issues and then the restrictions. Background ========== While Ganeti 1.2 is usable, it severely limits the flexibility of the cluster administration and imposes a very rigid model. It has the following main scalability issues: - only one operation at a time on the cluster [#]_ - poor handling of node failures in the cluster - mixing hypervisors in a cluster not allowed It also has a number of artificial restrictions, due to historical design: - fixed number of disks (two) per instance - fixed number of NICs .. [#] Replace disks will release the lock, but this is an exception and not a recommended way to operate The 2.0 version is intended to address some of these problems, and create a more flexible code base for future developments. Among these problems, the single-operation at a time restriction is biggest issue with the current version of Ganeti. It is such a big impediment in operating bigger clusters that many times one is tempted to remove the lock just to do a simple operation like start instance while an OS installation is running. Scalability problems -------------------- Ganeti 1.2 has a single global lock, which is used for all cluster operations. This has been painful at various times, for example: - It is impossible for two people to efficiently interact with a cluster (for example for debugging) at the same time. - When batch jobs are running it's impossible to do other work (for example failovers/fixes) on a cluster. This poses scalability problems: as clusters grow in node and instance size it's a lot more likely that operations which one could conceive should run in parallel (for example because they happen on different nodes) are actually stalling each other while waiting for the global lock, without a real reason for that to happen. One of the main causes of this global lock (beside the higher difficulty of ensuring data consistency in a more granular lock model) is the fact that currently there is no long-lived process in Ganeti that can coordinate multiple operations. Each command tries to acquire the so called *cmd* lock and when it succeeds, it takes complete ownership of the cluster configuration and state. Other scalability problems are due the design of the DRBD device model, which assumed at its creation a low (one to four) number of instances per node, which is no longer true with today's hardware. Artificial restrictions ----------------------- Ganeti 1.2 (and previous versions) have a fixed two-disks, one-NIC per instance model. This is a purely artificial restrictions, but it touches multiple areas (configuration, import/export, command line) that it's more fitted to a major release than a minor one. Architecture issues ------------------- The fact that each command is a separate process that reads the cluster state, executes the command, and saves the new state is also an issue on big clusters where the configuration data for the cluster begins to be non-trivial in size. Overview ======== In order to solve the scalability problems, a rewrite of the core design of Ganeti is required. While the cluster operations themselves won't change (e.g. start instance will do the same things, the way these operations are scheduled internally will change radically. The new design will change the cluster architecture to: .. digraph:: "ganeti-2.0-architecture" compound=false concentrate=true mclimit=100.0 nslimit=100.0 edge[fontsize="8" fontname="Helvetica-Oblique"] node[width="0" height="0" fontsize="12" fontcolor="black" shape=rect] subgraph outside { rclient[label="external clients"] label="Outside the cluster" } subgraph cluster_inside { label="ganeti cluster" labeljust=l subgraph cluster_master_node { label="master node" rapi[label="RAPI daemon"] cli[label="CLI"] watcher[label="Watcher"] burnin[label="Burnin"] masterd[shape=record style=filled label="{ luxi endpoint | master I/O thread | job queue | { worker| worker | worker }}"] {rapi;cli;watcher;burnin} -> masterd:luxi [label="LUXI" labelpos=100] } subgraph cluster_nodes { label="nodes" noded1 [shape=record label="{ RPC listener | Disk management | Network management | Hypervisor } "] noded2 [shape=record label="{ RPC listener | Disk management | Network management | Hypervisor } "] noded3 [shape=record label="{ RPC listener | Disk management | Network management | Hypervisor } "] } masterd:w2 -> {noded1;noded2;noded3} [label="node RPC"] cli -> {noded1;noded2;noded3} [label="SSH"] } rclient -> rapi [label="RAPI protocol"] This differs from the 1.2 architecture by the addition of the master daemon, which will be the only entity to talk to the node daemons. Detailed design =============== The changes for 2.0 can be split into roughly three areas: - core changes that affect the design of the software - features (or restriction removals) but which do not have a wide impact on the design - user-level and API-level changes which translate into differences for the operation of the cluster Core changes ------------ The main changes will be switching from a per-process model to a daemon based model, where the individual gnt-* commands will be clients that talk to this daemon (see `Master daemon`_). This will allow us to get rid of the global cluster lock for most operations, having instead a per-object lock (see `Granular locking`_). Also, the daemon will be able to queue jobs, and this will allow the individual clients to submit jobs without waiting for them to finish, and also see the result of old requests (see `Job Queue`_). Beside these major changes, another 'core' change but that will not be as visible to the users will be changing the model of object attribute storage, and separate that into name spaces (such that an Xen PVM instance will not have the Xen HVM parameters). This will allow future flexibility in defining additional parameters. For more details see `Object parameters`_. The various changes brought in by the master daemon model and the read-write RAPI will require changes to the cluster security; we move away from Twisted and use HTTP(s) for intra- and extra-cluster communications. For more details, see the security document in the doc/ directory. Master daemon ~~~~~~~~~~~~~ In Ganeti 2.0, we will have the following *entities*: - the master daemon (on the master node) - the node daemon (on all nodes) - the command line tools (on the master node) - the RAPI daemon (on the master node) The master-daemon related interaction paths are: - (CLI tools/RAPI daemon) and the master daemon, via the so called *LUXI* API - the master daemon and the node daemons, via the node RPC There are also some additional interaction paths for exceptional cases: - CLI tools might access via SSH the nodes (for ``gnt-cluster copyfile`` and ``gnt-cluster command``) - master failover is a special case when a non-master node will SSH and do node-RPC calls to the current master The protocol between the master daemon and the node daemons will be changed from (Ganeti 1.2) Twisted PB (perspective broker) to HTTP(S), using a simple PUT/GET of JSON-encoded messages. This is done due to difficulties in working with the Twisted framework and its protocols in a multithreaded environment, which we can overcome by using a simpler stack (see the caveats section). The protocol between the CLI/RAPI and the master daemon will be a custom one (called *LUXI*): on a UNIX socket on the master node, with rights restricted by filesystem permissions, the CLI/RAPI will talk to the master daemon using JSON-encoded messages. The operations supported over this internal protocol will be encoded via a python library that will expose a simple API for its users. Internally, the protocol will simply encode all objects in JSON format and decode them on the receiver side. For more details about the RAPI daemon see `Remote API changes`_, and for the node daemon see `Node daemon changes`_. .. _luxi: The LUXI protocol +++++++++++++++++ As described above, the protocol for making requests or queries to the master daemon will be a UNIX-socket based simple RPC of JSON-encoded messages. The choice of UNIX was in order to get rid of the need of authentication and authorisation inside Ganeti; for 2.0, the permissions on the Unix socket itself will determine the access rights. We will have two main classes of operations over this API: - cluster query functions - job related functions The cluster query functions are usually short-duration, and are the equivalent of the ``OP_QUERY_*`` opcodes in Ganeti 1.2 (and they are internally implemented still with these opcodes). The clients are guaranteed to receive the response in a reasonable time via a timeout. The job-related functions will be: - submit job - query job (which could also be categorized in the query-functions) - archive job (see the job queue design doc) - wait for job change, which allows a client to wait without polling For more details of the actual operation list, see the `Job Queue`_. Both requests and responses will consist of a JSON-encoded message followed by the ``ETX`` character (ASCII decimal 3), which is not a valid character in JSON messages and thus can serve as a message delimiter. The contents of the messages will be a dictionary with two fields: :method: the name of the method called :args: the arguments to the method, as a list (no keyword arguments allowed) Responses will follow the same format, with the two fields being: :success: a boolean denoting the success of the operation :result: the actual result, or error message in case of failure There are two special value for the result field: - in the case that the operation failed, and this field is a list of length two, the client library will try to interpret is as an exception, the first element being the exception type and the second one the actual exception arguments; this will allow a simple method of passing Ganeti-related exception across the interface - for the *WaitForChange* call (that waits on the server for a job to change status), if the result is equal to ``nochange`` instead of the usual result for this call (a list of changes), then the library will internally retry the call; this is done in order to differentiate internally between master daemon hung and job simply not changed Users of the API that don't use the provided python library should take care of the above two cases. Master daemon implementation ++++++++++++++++++++++++++++ The daemon will be based around a main I/O thread that will wait for new requests from the clients, and that does the setup/shutdown of the other thread (pools). There will two other classes of threads in the daemon: - job processing threads, part of a thread pool, and which are long-lived, started at daemon startup and terminated only at shutdown time - client I/O threads, which are the ones that talk the local protocol (LUXI) to the clients, and are short-lived Master startup/failover +++++++++++++++++++++++ In Ganeti 1.x there is no protection against failing over the master to a node with stale configuration. In effect, the responsibility of correct failovers falls on the admin. This is true both for the new master and for when an old, offline master startup. Since in 2.x we are extending the cluster state to cover the job queue and have a daemon that will execute by itself the job queue, we want to have more resilience for the master role. The following algorithm will happen whenever a node is ready to transition to the master role, either at startup time or at node failover: #. read the configuration file and parse the node list contained within #. query all the nodes and make sure we obtain an agreement via a quorum of at least half plus one nodes for the following: - we have the latest configuration and job list (as determined by the serial number on the configuration and highest job ID on the job queue) - if we are not failing over (but just starting), the quorum agrees that we are the designated master - if any of the above is false, we prevent the current operation (i.e. we don't become the master) #. at this point, the node transitions to the master role #. for all the in-progress jobs, mark them as failed, with reason unknown or something similar (master failed, etc.) Since due to exceptional conditions we could have a situation in which no node can become the master due to inconsistent data, we will have an override switch for the master daemon startup that will assume the current node has the right data and will replicate all the configuration files to the other nodes. **Note**: the above algorithm is by no means an election algorithm; it is a *confirmation* of the master role currently held by a node. Logging +++++++ The logging system will be switched completely to the standard python logging module; currently it's logging-based, but exposes a different API, which is just overhead. As such, the code will be switched over to standard logging calls, and only the setup will be custom. With this change, we will remove the separate debug/info/error logs, and instead have always one logfile per daemon model: - master-daemon.log for the master daemon - node-daemon.log for the node daemon (this is the same as in 1.2) - rapi-daemon.log for the RAPI daemon logs - rapi-access.log, an additional log file for the RAPI that will be in the standard HTTP log format for possible parsing by other tools Since the :term:`watcher` will only submit jobs to the master for startup of the instances, its log file will contain less information than before, mainly that it will start the instance, but not the results. Node daemon changes +++++++++++++++++++ The only change to the node daemon is that, since we need better concurrency, we don't process the inter-node RPC calls in the node daemon itself, but we fork and process each request in a separate child. Since we don't have many calls, and we only fork (not exec), the overhead should be minimal. Caveats +++++++ A discussed alternative is to keep the current individual processes touching the cluster configuration model. The reasons we have not chosen this approach is: - the speed of reading and unserializing the cluster state today is not small enough that we can ignore it; the addition of the job queue will make the startup cost even higher. While this runtime cost is low, it can be on the order of a few seconds on bigger clusters, which for very quick commands is comparable to the actual duration of the computation itself - individual commands would make it harder to implement a fire-and-forget job request, along the lines "start this instance but do not wait for it to finish"; it would require a model of backgrounding the operation and other things that are much better served by a daemon-based model Another area of discussion is moving away from Twisted in this new implementation. While Twisted has its advantages, there are also many disadvantages to using it: - first and foremost, it's not a library, but a framework; thus, if you use twisted, all the code needs to be 'twiste-ized' and written in an asynchronous manner, using deferreds; while this method works, it's not a common way to code and it requires that the entire process workflow is based around a single *reactor* (Twisted name for a main loop) - the more advanced granular locking that we want to implement would require, if written in the async-manner, deep integration with the Twisted stack, to such an extend that business-logic is inseparable from the protocol coding; we felt that this is an unreasonable request, and that a good protocol library should allow complete separation of low-level protocol calls and business logic; by comparison, the threaded approach combined with HTTPs protocol required (for the first iteration) absolutely no changes from the 1.2 code, and later changes for optimizing the inter-node RPC calls required just syntactic changes (e.g. ``rpc.call_...`` to ``self.rpc.call_...``) Another issue is with the Twisted API stability - during the Ganeti 1.x lifetime, we had to to implement many times workarounds to changes in the Twisted version, so that for example 1.2 is able to use both Twisted 2.x and 8.x. In the end, since we already had an HTTP server library for the RAPI, we just reused that for inter-node communication. Granular locking ~~~~~~~~~~~~~~~~ We want to make sure that multiple operations can run in parallel on a Ganeti Cluster. In order for this to happen we need to make sure concurrently run operations don't step on each other toes and break the cluster. This design addresses how we are going to deal with locking so that: - we preserve data coherency - we prevent deadlocks - we prevent job starvation Reaching the maximum possible parallelism is a Non-Goal. We have identified a set of operations that are currently bottlenecks and need to be parallelised and have worked on those. In the future it will be possible to address other needs, thus making the cluster more and more parallel one step at a time. This section only talks about parallelising Ganeti level operations, aka Logical Units, and the locking needed for that. Any other synchronization lock needed internally by the code is outside its scope. Library details +++++++++++++++ The proposed library has these features: - internally managing all the locks, making the implementation transparent from their usage - automatically grabbing multiple locks in the right order (avoid deadlock) - ability to transparently handle conversion to more granularity - support asynchronous operation (future goal) Locking will be valid only on the master node and will not be a distributed operation. Therefore, in case of master failure, the operations currently running will be aborted and the locks will be lost; it remains to the administrator to cleanup (if needed) the operation result (e.g. make sure an instance is either installed correctly or removed). A corollary of this is that a master-failover operation with both masters alive needs to happen while no operations are running, and therefore no locks are held. All the locks will be represented by objects (like ``lockings.SharedLock``), and the individual locks for each object will be created at initialisation time, from the config file. The API will have a way to grab one or more than one locks at the same time. Any attempt to grab a lock while already holding one in the wrong order will be checked for, and fail. The Locks +++++++++ At the first stage we have decided to provide the following locks: - One "config file" lock - One lock per node in the cluster - One lock per instance in the cluster All the instance locks will need to be taken before the node locks, and the node locks before the config lock. Locks will need to be acquired at the same time for multiple instances and nodes, and internal ordering will be dealt within the locking library, which, for simplicity, will just use alphabetical order. Each lock has the following three possible statuses: - unlocked (anyone can grab the lock) - shared (anyone can grab/have the lock but only in shared mode) - exclusive (no one else can grab/have the lock) Handling conversion to more granularity +++++++++++++++++++++++++++++++++++++++ In order to convert to a more granular approach transparently each time we split a lock into more we'll create a "metalock", which will depend on those sub-locks and live for the time necessary for all the code to convert (or forever, in some conditions). When a metalock exists all converted code must acquire it in shared mode, so it can run concurrently, but still be exclusive with old code, which acquires it exclusively. In the beginning the only such lock will be what replaces the current "command" lock, and will acquire all the locks in the system, before proceeding. This lock will be called the "Big Ganeti Lock" because holding that one will avoid any other concurrent Ganeti operations. We might also want to devise more metalocks (eg. all nodes, all nodes+config) in order to make it easier for some parts of the code to acquire what it needs without specifying it explicitly. In the future things like the node locks could become metalocks, should we decide to split them into an even more fine grained approach, but this will probably be only after the first 2.0 version has been released. Adding/Removing locks +++++++++++++++++++++ When a new instance or a new node is created an associated lock must be added to the list. The relevant code will need to inform the locking library of such a change. This needs to be compatible with every other lock in the system, especially metalocks that guarantee to grab sets of resources without specifying them explicitly. The implementation of this will be handled in the locking library itself. When instances or nodes disappear from the cluster the relevant locks must be removed. This is easier than adding new elements, as the code which removes them must own them exclusively already, and thus deals with metalocks exactly as normal code acquiring those locks. Any operation queuing on a removed lock will fail after its removal. Asynchronous operations +++++++++++++++++++++++ For the first version the locking library will only export synchronous operations, which will block till the needed lock are held, and only fail if the request is impossible or somehow erroneous. In the future we may want to implement different types of asynchronous operations such as: - try to acquire this lock set and fail if not possible - try to acquire one of these lock sets and return the first one you were able to get (or after a timeout) (select/poll like) These operations can be used to prioritize operations based on available locks, rather than making them just blindly queue for acquiring them. The inherent risk, though, is that any code using the first operation, or setting a timeout for the second one, is susceptible to starvation and thus may never be able to get the required locks and complete certain tasks. Considering this providing/using these operations should not be among our first priorities. Locking granularity +++++++++++++++++++ For the first version of this code we'll convert each Logical Unit to acquire/release the locks it needs, so locking will be at the Logical Unit level. In the future we may want to split logical units in independent "tasklets" with their own locking requirements. A different design doc (or mini design doc) will cover the move from Logical Units to tasklets. Code examples +++++++++++++ In general when acquiring locks we should use a code path equivalent to:: lock.acquire() try: ... # other code finally: lock.release() This makes sure we release all locks, and avoid possible deadlocks. Of course extra care must be used not to leave, if possible locked structures in an unusable state. Note that with Python 2.5 a simpler syntax will be possible, but we want to keep compatibility with Python 2.4 so the new constructs should not be used. In order to avoid this extra indentation and code changes everywhere in the Logical Units code, we decided to allow LUs to declare locks, and then execute their code with their locks acquired. In the new world LUs are called like this:: # user passed names are expanded to the internal lock/resource name, # then known needed locks are declared lu.ExpandNames() ... some locking/adding of locks may happen ... # late declaration of locks for one level: this is useful because sometimes # we can't know which resource we need before locking the previous level lu.DeclareLocks() # for each level (cluster, instance, node) ... more locking/adding of locks can happen ... # these functions are called with the proper locks held lu.CheckPrereq() lu.Exec() ... locks declared for removal are removed, all acquired locks released ... The Processor and the LogicalUnit class will contain exact documentation on how locks are supposed to be declared. Caveats +++++++ This library will provide an easy upgrade path to bring all the code to granular locking without breaking everything, and it will also guarantee against a lot of common errors. Code switching from the old "lock everything" lock to the new system, though, needs to be carefully scrutinised to be sure it is really acquiring all the necessary locks, and none has been overlooked or forgotten. The code can contain other locks outside of this library, to synchronise other threaded code (eg for the job queue) but in general these should be leaf locks or carefully structured non-leaf ones, to avoid deadlock race conditions. .. _jqueue-original-design: Job Queue ~~~~~~~~~ Granular locking is not enough to speed up operations, we also need a queue to store these and to be able to process as many as possible in parallel. A Ganeti job will consist of multiple ``OpCodes`` which are the basic element of operation in Ganeti 1.2 (and will remain as such). Most command-level commands are equivalent to one OpCode, or in some cases to a sequence of opcodes, all of the same type (e.g. evacuating a node will generate N opcodes of type replace disks). Job execution—“Life of a Ganeti job†++++++++++++++++++++++++++++++++++++ #. Job gets submitted by the client. A new job identifier is generated and assigned to the job. The job is then automatically replicated [#replic]_ to all nodes in the cluster. The identifier is returned to the client. #. A pool of worker threads waits for new jobs. If all are busy, the job has to wait and the first worker finishing its work will grab it. Otherwise any of the waiting threads will pick up the new job. #. Client waits for job status updates by calling a waiting RPC function. Log message may be shown to the user. Until the job is started, it can also be canceled. #. As soon as the job is finished, its final result and status can be retrieved from the server. #. If the client archives the job, it gets moved to a history directory. There will be a method to archive all jobs older than a a given age. .. [#replic] We need replication in order to maintain the consistency across all nodes in the system; the master node only differs in the fact that now it is running the master daemon, but it if fails and we do a master failover, the jobs are still visible on the new master (though marked as failed). Failures to replicate a job to other nodes will be only flagged as errors in the master daemon log if more than half of the nodes failed, otherwise we ignore the failure, and rely on the fact that the next update (for still running jobs) will retry the update. For finished jobs, it is less of a problem. Future improvements will look into checking the consistency of the job list and jobs themselves at master daemon startup. Job storage +++++++++++ Jobs are stored in the filesystem as individual files, serialized using JSON (standard serialization mechanism in Ganeti). The choice of storing each job in its own file was made because: - a file can be atomically replaced - a file can easily be replicated to other nodes - checking consistency across nodes can be implemented very easily, since all job files should be (at a given moment in time) identical The other possible choices that were discussed and discounted were: - single big file with all job data: not feasible due to difficult updates - in-process databases: hard to replicate the entire database to the other nodes, and replicating individual operations does not mean wee keep consistency Queue structure +++++++++++++++ All file operations have to be done atomically by writing to a temporary file and subsequent renaming. Except for log messages, every change in a job is stored and replicated to other nodes. :: /var/lib/ganeti/queue/ job-1 (JSON encoded job description and status) […] job-37 job-38 job-39 lock (Queue managing process opens this file in exclusive mode) serial (Last job ID used) version (Queue format version) Locking +++++++ Locking in the job queue is a complicated topic. It is called from more than one thread and must be thread-safe. For simplicity, a single lock is used for the whole job queue. A more detailed description can be found in doc/locking.rst. Internal RPC ++++++++++++ RPC calls available between Ganeti master and node daemons: jobqueue_update(file_name, content) Writes a file in the job queue directory. jobqueue_purge() Cleans the job queue directory completely, including archived job. jobqueue_rename(old, new) Renames a file in the job queue directory. Client RPC ++++++++++ RPC between Ganeti clients and the Ganeti master daemon supports the following operations: SubmitJob(ops) Submits a list of opcodes and returns the job identifier. The identifier is guaranteed to be unique during the lifetime of a cluster. WaitForJobChange(job_id, fields, […], timeout) This function waits until a job changes or a timeout expires. The condition for when a job changed is defined by the fields passed and the last log message received. QueryJobs(job_ids, fields) Returns field values for the job identifiers passed. CancelJob(job_id) Cancels the job specified by identifier. This operation may fail if the job is already running, canceled or finished. ArchiveJob(job_id) Moves a job into the …/archive/ directory. This operation will fail if the job has not been canceled or finished. Job and opcode status +++++++++++++++++++++ Each job and each opcode has, at any time, one of the following states: Queued The job/opcode was submitted, but did not yet start. Waiting The job/opcode is waiting for a lock to proceed. Running The job/opcode is running. Canceled The job/opcode was canceled before it started. Success The job/opcode ran and finished successfully. Error The job/opcode was aborted with an error. If the master is aborted while a job is running, the job will be set to the Error status once the master started again. History +++++++ Archived jobs are kept in a separate directory, ``/var/lib/ganeti/queue/archive/``. This is done in order to speed up the queue handling: by default, the jobs in the archive are not touched by any functions. Only the current (unarchived) jobs are parsed, loaded, and verified (if implemented) by the master daemon. Ganeti updates ++++++++++++++ The queue has to be completely empty for Ganeti updates with changes in the job queue structure. In order to allow this, there will be a way to prevent new jobs entering the queue. Object parameters ~~~~~~~~~~~~~~~~~ Across all cluster configuration data, we have multiple classes of parameters: A. cluster-wide parameters (e.g. name of the cluster, the master); these are the ones that we have today, and are unchanged from the current model #. node parameters #. instance specific parameters, e.g. the name of disks (LV), that cannot be shared with other instances #. instance parameters, that are or can be the same for many instances, but are not hypervisor related; e.g. the number of VCPUs, or the size of memory #. instance parameters that are hypervisor specific (e.g. kernel_path or PAE mode) The following definitions for instance parameters will be used below: :hypervisor parameter: a hypervisor parameter (or hypervisor specific parameter) is defined as a parameter that is interpreted by the hypervisor support code in Ganeti and usually is specific to a particular hypervisor (like the kernel path for :term:`PVM` which makes no sense for :term:`HVM`). :backend parameter: a backend parameter is defined as an instance parameter that can be shared among a list of instances, and is either generic enough not to be tied to a given hypervisor or cannot influence at all the hypervisor behaviour. For example: memory, vcpus, auto_balance All these parameters will be encoded into constants.py with the prefix "BE\_" and the whole list of parameters will exist in the set "BES_PARAMETERS" :proper parameter: a parameter whose value is unique to the instance (e.g. the name of a LV, or the MAC of a NIC) As a general rule, for all kind of parameters, “None†(or in JSON-speak, “nilâ€) will no longer be a valid value for a parameter. As such, only non-default parameters will be saved as part of objects in the serialization step, reducing the size of the serialized format. Cluster parameters ++++++++++++++++++ Cluster parameters remain as today, attributes at the top level of the Cluster object. In addition, two new attributes at this level will hold defaults for the instances: - hvparams, a dictionary indexed by hypervisor type, holding default values for hypervisor parameters that are not defined/overridden by the instances of this hypervisor type - beparams, a dictionary holding (for 2.0) a single element 'default', which holds the default value for backend parameters Node parameters +++++++++++++++ Node-related parameters are very few, and we will continue using the same model for these as previously (attributes on the Node object). There are three new node flags, described in a separate section "node flags" below. Instance parameters +++++++++++++++++++ As described before, the instance parameters are split in three: instance proper parameters, unique to each instance, instance hypervisor parameters and instance backend parameters. The “hvparams†and “beparams†are kept in two dictionaries at instance level. Only non-default parameters are stored (but once customized, a parameter will be kept, even with the same value as the default one, until reset). The names for hypervisor parameters in the instance.hvparams subtree should be choosen as generic as possible, especially if specific parameters could conceivably be useful for more than one hypervisor, e.g. ``instance.hvparams.vnc_console_port`` instead of using both ``instance.hvparams.hvm_vnc_console_port`` and ``instance.hvparams.kvm_vnc_console_port``. There are some special cases related to disks and NICs (for example): a disk has both Ganeti-related parameters (e.g. the name of the LV) and hypervisor-related parameters (how the disk is presented to/named in the instance). The former parameters remain as proper-instance parameters, while the latter value are migrated to the hvparams structure. In 2.0, we will have only globally-per-instance such hypervisor parameters, and not per-disk ones (e.g. all NICs will be exported as of the same type). Starting from the 1.2 list of instance parameters, here is how they will be mapped to the three classes of parameters: - name (P) - primary_node (P) - os (P) - hypervisor (P) - status (P) - memory (BE) - vcpus (BE) - nics (P) - disks (P) - disk_template (P) - network_port (P) - kernel_path (HV) - initrd_path (HV) - hvm_boot_order (HV) - hvm_acpi (HV) - hvm_pae (HV) - hvm_cdrom_image_path (HV) - hvm_nic_type (HV) - hvm_disk_type (HV) - vnc_bind_address (HV) - serial_no (P) Parameter validation ++++++++++++++++++++ To support the new cluster parameter design, additional features will be required from the hypervisor support implementations in Ganeti. The hypervisor support implementation API will be extended with the following features: :PARAMETERS: class-level attribute holding the list of valid parameters for this hypervisor :CheckParamSyntax(hvparams): checks that the given parameters are valid (as in the names are valid) for this hypervisor; usually just comparing ``hvparams.keys()`` and ``cls.PARAMETERS``; this is a class method that can be called from within master code (i.e. cmdlib) and should be safe to do so :ValidateParameters(hvparams): verifies the values of the provided parameters against this hypervisor; this is a method that will be called on the target node, from backend.py code, and as such can make node-specific checks (e.g. kernel_path checking) Default value application +++++++++++++++++++++++++ The application of defaults to an instance is done in the Cluster object, via two new methods as follows: - ``Cluster.FillHV(instance)``, returns 'filled' hvparams dict, based on instance's hvparams and cluster's ``hvparams[instance.hypervisor]`` - ``Cluster.FillBE(instance, be_type="default")``, which returns the beparams dict, based on the instance and cluster beparams The FillHV/BE transformations will be used, for example, in the RpcRunner when sending an instance for activation/stop, and the sent instance hvparams/beparams will have the final value (noded code doesn't know about defaults). LU code will need to self-call the transformation, if needed. Opcode changes ++++++++++++++ The parameter changes will have impact on the OpCodes, especially on the following ones: - ``OpInstanceCreate``, where the new hv and be parameters will be sent as dictionaries; note that all hv and be parameters are now optional, as the values can be instead taken from the cluster - ``OpInstanceQuery``, where we have to be able to query these new parameters; the syntax for names will be ``hvparam/$NAME`` and ``beparam/$NAME`` for querying an individual parameter out of one dictionary, and ``hvparams``, respectively ``beparams``, for the whole dictionaries - ``OpModifyInstance``, where the the modified parameters are sent as dictionaries Additionally, we will need new OpCodes to modify the cluster-level defaults for the be/hv sets of parameters. Caveats +++++++ One problem that might appear is that our classification is not complete or not good enough, and we'll need to change this model. As the last resort, we will need to rollback and keep 1.2 style. Another problem is that classification of one parameter is unclear (e.g. ``network_port``, is this BE or HV?); in this case we'll take the risk of having to move parameters later between classes. Security ++++++++ The only security issue that we foresee is if some new parameters will have sensitive value. If so, we will need to have a way to export the config data while purging the sensitive value. E.g. for the drbd shared secrets, we could export these with the values replaced by an empty string. Node flags ~~~~~~~~~~ Ganeti 2.0 adds three node flags that change the way nodes are handled within Ganeti and the related infrastructure (iallocator interaction, RAPI data export). *master candidate* flag +++++++++++++++++++++++ Ganeti 2.0 allows more scalability in operation by introducing parallelization. However, a new bottleneck is reached that is the synchronization and replication of cluster configuration to all nodes in the cluster. This breaks scalability as the speed of the replication decreases roughly with the size of the nodes in the cluster. The goal of the master candidate flag is to change this O(n) into O(1) with respect to job and configuration data propagation. Only nodes having this flag set (let's call this set of nodes the *candidate pool*) will have jobs and configuration data replicated. The cluster will have a new parameter (runtime changeable) called ``candidate_pool_size`` which represents the number of candidates the cluster tries to maintain (preferably automatically). This will impact the cluster operations as follows: - jobs and config data will be replicated only to a fixed set of nodes - master fail-over will only be possible to a node in the candidate pool - cluster verify needs changing to account for these two roles - external scripts will no longer have access to the configuration file (this is not recommended anyway) The caveats of this change are: - if all candidates are lost (completely), cluster configuration is lost (but it should be backed up external to the cluster anyway) - failed nodes which are candidate must be dealt with properly, so that we don't lose too many candidates at the same time; this will be reported in cluster verify - the 'all equal' concept of ganeti is no longer true - the partial distribution of config data means that all nodes will have to revert to ssconf files for master info (as in 1.2) Advantages: - speed on a 100+ nodes simulated cluster is greatly enhanced, even for a simple operation; ``gnt-instance remove`` on a diskless instance remove goes from ~9seconds to ~2 seconds - node failure of non-candidates will be less impacting on the cluster The default value for the candidate pool size will be set to 10 but this can be changed at cluster creation and modified any time later. Testing on simulated big clusters with sequential and parallel jobs show that this value (10) is a sweet-spot from performance and load point of view. *offline* flag ++++++++++++++ In order to support better the situation in which nodes are offline (e.g. for repair) without altering the cluster configuration, Ganeti needs to be told and needs to properly handle this state for nodes. This will result in simpler procedures, and less mistakes, when the amount of node failures is high on an absolute scale (either due to high failure rate or simply big clusters). Nodes having this attribute set will not be contacted for inter-node RPC calls, will not be master candidates, and will not be able to host instances as primaries. Setting this attribute on a node: - will not be allowed if the node is the master - will not be allowed if the node has primary instances - will cause the node to be demoted from the master candidate role (if it was), possibly causing another node to be promoted to that role This attribute will impact the cluster operations as follows: - querying these nodes for anything will fail instantly in the RPC library, with a specific RPC error (RpcResult.offline == True) - they will be listed in the Other section of cluster verify The code is changed in the following ways: - RPC calls were be converted to skip such nodes: - RpcRunner-instance-based RPC calls are easy to convert - static/classmethod RPC calls are harder to convert, and were left alone - the RPC results were unified so that this new result state (offline) can be differentiated - master voting still queries in repair nodes, as we need to ensure consistency in case the (wrong) masters have old data, and nodes have come back from repairs Caveats: - some operation semantics are less clear (e.g. what to do on instance start with offline secondary?); for now, these will just fail as if the flag is not set (but faster) - 2-node cluster with one node offline needs manual startup of the master with a special flag to skip voting (as the master can't get a quorum there) One of the advantages of implementing this flag is that it will allow in the future automation tools to automatically put the node in repairs and recover from this state, and the code (should/will) handle this much better than just timing out. So, future possible improvements (for later versions): - watcher will detect nodes which fail RPC calls, will attempt to ssh to them, if failure will put them offline - watcher will try to ssh and query the offline nodes, if successful will take them off the repair list Alternatives considered: The RPC call model in 2.0 is, by default, much nicer - errors are logged in the background, and job/opcode execution is clearer, so we could simply not introduce this. However, having this state will make both the codepaths clearer (offline vs. temporary failure) and the operational model (it's not a node with errors, but an offline node). *drained* flag ++++++++++++++ Due to parallel execution of jobs in Ganeti 2.0, we could have the following situation: - gnt-node migrate + failover is run - gnt-node evacuate is run, which schedules a long-running 6-opcode job for the node - partway through, a new job comes in that runs an iallocator script, which finds the above node as empty and a very good candidate - gnt-node evacuate has finished, but now it has to be run again, to clean the above instance(s) In order to prevent this situation, and to be able to get nodes into proper offline status easily, a new *drained* flag was added to the nodes. This flag (which actually means "is being, or was drained, and is expected to go offline"), will prevent allocations on the node, but otherwise all other operations (start/stop instance, query, etc.) are working without any restrictions. Interaction between flags +++++++++++++++++++++++++ While these flags are implemented as separate flags, they are mutually-exclusive and are acting together with the master node role as a single *node status* value. In other words, a flag is only in one of these roles at a given time. The lack of any of these flags denote a regular node. The current node status is visible in the ``gnt-cluster verify`` output, and the individual flags can be examined via separate flags in the ``gnt-node list`` output. These new flags will be exported in both the iallocator input message and via RAPI, see the respective man pages for the exact names. Feature changes --------------- The main feature-level changes will be: - a number of disk related changes - removal of fixed two-disk, one-nic per instance limitation Disk handling changes ~~~~~~~~~~~~~~~~~~~~~ The storage options available in Ganeti 1.x were introduced based on then-current software (first DRBD 0.7 then later DRBD 8) and the estimated usage patters. However, experience has later shown that some assumptions made initially are not true and that more flexibility is needed. One main assumption made was that disk failures should be treated as 'rare' events, and that each of them needs to be manually handled in order to ensure data safety; however, both these assumptions are false: - disk failures can be a common occurrence, based on usage patterns or cluster size - our disk setup is robust enough (referring to DRBD8 + LVM) that we could automate more of the recovery Note that we still don't have fully-automated disk recovery as a goal, but our goal is to reduce the manual work needed. As such, we plan the following main changes: - DRBD8 is much more flexible and stable than its previous version (0.7), such that removing the support for the ``remote_raid1`` template and focusing only on DRBD8 is easier - dynamic discovery of DRBD devices is not actually needed in a cluster that where the DRBD namespace is controlled by Ganeti; switching to a static assignment (done at either instance creation time or change secondary time) will change the disk activation time from O(n) to O(1), which on big clusters is a significant gain - remove the hard dependency on LVM (currently all available storage types are ultimately backed by LVM volumes) by introducing file-based storage Additionally, a number of smaller enhancements are also planned: - support variable number of disks - support read-only disks Future enhancements in the 2.x series, which do not require base design changes, might include: - enhancement of the LVM allocation method in order to try to keep all of an instance's virtual disks on the same physical disks - add support for DRBD8 authentication at handshake time in order to ensure each device connects to the correct peer - remove the restrictions on failover only to the secondary which creates very strict rules on cluster allocation DRBD minor allocation +++++++++++++++++++++ Currently, when trying to identify or activate a new DRBD (or MD) device, the code scans all in-use devices in order to see if we find one that looks similar to our parameters and is already in the desired state or not. Since this needs external commands to be run, it is very slow when more than a few devices are already present. Therefore, we will change the discovery model from dynamic to static. When a new device is logically created (added to the configuration) a free minor number is computed from the list of devices that should exist on that node and assigned to that device. At device activation, if the minor is already in use, we check if it has our parameters; if not so, we just destroy the device (if possible, otherwise we abort) and start it with our own parameters. This means that we in effect take ownership of the minor space for that device type; if there's a user-created DRBD minor, it will be automatically removed. The change will have the effect of reducing the number of external commands run per device from a constant number times the index of the first free DRBD minor to just a constant number. Removal of obsolete device types (MD, DRBD7) ++++++++++++++++++++++++++++++++++++++++++++ We need to remove these device types because of two issues. First, DRBD7 has bad failure modes in case of dual failures (both network and disk - it cannot propagate the error up the device stack and instead just panics. Second, due to the asymmetry between primary and secondary in MD+DRBD mode, we cannot do live failover (not even if we had MD+DRBD8). File-based storage support ++++++++++++++++++++++++++ Using files instead of logical volumes for instance storage would allow us to get rid of the hard requirement for volume groups for testing clusters and it would also allow usage of SAN storage to do live failover taking advantage of this storage solution. Better LVM allocation +++++++++++++++++++++ Currently, the LV to PV allocation mechanism is a very simple one: at each new request for a logical volume, tell LVM to allocate the volume in order based on the amount of free space. This is good for simplicity and for keeping the usage equally spread over the available physical disks, however it introduces a problem that an instance could end up with its (currently) two drives on two physical disks, or (worse) that the data and metadata for a DRBD device end up on different drives. This is bad because it causes unneeded ``replace-disks`` operations in case of a physical failure. The solution is to batch allocations for an instance and make the LVM handling code try to allocate as close as possible all the storage of one instance. We will still allow the logical volumes to spill over to additional disks as needed. Note that this clustered allocation can only be attempted at initial instance creation, or at change secondary node time. At add disk time, or at replacing individual disks, it's not easy enough to compute the current disk map so we'll not attempt the clustering. DRBD8 peer authentication at handshake ++++++++++++++++++++++++++++++++++++++ DRBD8 has a new feature that allow authentication of the peer at connect time. We can use this to prevent connecting to the wrong peer more that securing the connection. Even though we never had issues with wrong connections, it would be good to implement this. LVM self-repair (optional) ++++++++++++++++++++++++++ The complete failure of a physical disk is very tedious to troubleshoot, mainly because of the many failure modes and the many steps needed. We can safely automate some of the steps, more specifically the ``vgreduce --removemissing`` using the following method: #. check if all nodes have consistent volume groups #. if yes, and previous status was yes, do nothing #. if yes, and previous status was no, save status and restart #. if no, and previous status was no, do nothing #. if no, and previous status was yes: #. if more than one node is inconsistent, do nothing #. if only one node is inconsistent: #. run ``vgreduce --removemissing`` #. log this occurrence in the Ganeti log in a form that can be used for monitoring #. [FUTURE] run ``replace-disks`` for all instances affected Failover to any node ++++++++++++++++++++ With a modified disk activation sequence, we can implement the *failover to any* functionality, removing many of the layout restrictions of a cluster: - the need to reserve memory on the current secondary: this gets reduced to a must to reserve memory anywhere on the cluster - the need to first failover and then replace secondary for an instance: with failover-to-any, we can directly failover to another node, which also does the replace disks at the same step In the following, we denote the current primary by P1, the current secondary by S1, and the new primary and secondaries by P2 and S2. P2 is fixed to the node the user chooses, but the choice of S2 can be made between P1 and S1. This choice can be constrained, depending on which of P1 and S1 has failed. - if P1 has failed, then S1 must become S2, and live migration is not possible - if S1 has failed, then P1 must become S2, and live migration could be possible (in theory, but this is not a design goal for 2.0) The algorithm for performing the failover is straightforward: - verify that S2 (the node the user has chosen to keep as secondary) has valid data (is consistent) - tear down the current DRBD association and setup a DRBD pairing between P2 (P2 is indicated by the user) and S2; since P2 has no data, it will start re-syncing from S2 - as soon as P2 is in state SyncTarget (i.e. after the resync has started but before it has finished), we can promote it to primary role (r/w) and start the instance on P2 - as soon as the P2?S2 sync has finished, we can remove the old data on the old node that has not been chosen for S2 Caveats: during the P2?S2 sync, a (non-transient) network error will cause I/O errors on the instance, so (if a longer instance downtime is acceptable) we can postpone the restart of the instance until the resync is done. However, disk I/O errors on S2 will cause data loss, since we don't have a good copy of the data anymore, so in this case waiting for the sync to complete is not an option. As such, it is recommended that this feature is used only in conjunction with proper disk monitoring. Live migration note: While failover-to-any is possible for all choices of S2, migration-to-any is possible only if we keep P1 as S2. Caveats +++++++ The dynamic device model, while more complex, has an advantage: it will not reuse by mistake the DRBD device of another instance, since it always looks for either our own or a free one. The static one, in contrast, will assume that given a minor number N, it's ours and we can take over. This needs careful implementation such that if the minor is in use, either we are able to cleanly shut it down, or we abort the startup. Otherwise, it could be that we start syncing between two instance's disks, causing data loss. Variable number of disk/NICs per instance ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Variable number of disks ++++++++++++++++++++++++ In order to support high-security scenarios (for example read-only sda and read-write sdb), we need to make a fully flexibly disk definition. This has less impact that it might look at first sight: only the instance creation has hard coded number of disks, not the disk handling code. The block device handling and most of the instance handling code is already working with "the instance's disks" as opposed to "the two disks of the instance", but some pieces are not (e.g. import/export) and the code needs a review to ensure safety. The objective is to be able to specify the number of disks at instance creation, and to be able to toggle from read-only to read-write a disk afterward. Variable number of NICs +++++++++++++++++++++++ Similar to the disk change, we need to allow multiple network interfaces per instance. This will affect the internal code (some function will have to stop assuming that ``instance.nics`` is a list of length one), the OS API which currently can export/import only one instance, and the command line interface. Interface changes ----------------- There are two areas of interface changes: API-level changes (the OS interface and the RAPI interface) and the command line interface changes. OS interface ~~~~~~~~~~~~ The current Ganeti OS interface, version 5, is tailored for Ganeti 1.2. The interface is composed by a series of scripts which get called with certain parameters to perform OS-dependent operations on the cluster. The current scripts are: create called when a new instance is added to the cluster export called to export an instance disk to a stream import called to import from a stream to a new instance rename called to perform the os-specific operations necessary for renaming an instance Currently these scripts suffer from the limitations of Ganeti 1.2: for example they accept exactly one block and one swap devices to operate on, rather than any amount of generic block devices, they blindly assume that an instance will have just one network interface to operate, they can not be configured to optimise the instance for a particular hypervisor. Since in Ganeti 2.0 we want to support multiple hypervisors, and a non-fixed number of network and disks the OS interface need to change to transmit the appropriate amount of information about an instance to its managing operating system, when operating on it. Moreover since some old assumptions usually used in OS scripts are no longer valid we need to re-establish a common knowledge on what can be assumed and what cannot be regarding Ganeti environment. When designing the new OS API our priorities are: - ease of use - future extensibility - ease of porting from the old API - modularity As such we want to limit the number of scripts that must be written to support an OS, and make it easy to share code between them by uniforming their input. We also will leave the current script structure unchanged, as far as we can, and make a few of the scripts (import, export and rename) optional. Most information will be passed to the script through environment variables, for ease of access and at the same time ease of using only the information a script needs. The Scripts +++++++++++ As in Ganeti 1.2, every OS which wants to be installed in Ganeti needs to support the following functionality, through scripts: create: used to create a new instance running that OS. This script should prepare the block devices, and install them so that the new OS can boot under the specified hypervisor. export (optional): used to export an installed instance using the given OS to a format which can be used to import it back into a new instance. import (optional): used to import an exported instance into a new one. This script is similar to create, but the new instance should have the content of the export, rather than contain a pristine installation. rename (optional): used to perform the internal OS-specific operations needed to rename an instance. If any optional script is not implemented Ganeti will refuse to perform the given operation on instances using the non-implementing OS. Of course the create script is mandatory, and it doesn't make sense to support the either the export or the import operation but not both. Incompatibilities with 1.2 __________________________ We expect the following incompatibilities between the OS scripts for 1.2 and the ones for 2.0: - Input parameters: in 1.2 those were passed on the command line, in 2.0 we'll use environment variables, as there will be a lot more information and not all OSes may care about all of it. - Number of calls: export scripts will be called once for each device the instance has, and import scripts once for every exported disk. Imported instances will be forced to have a number of disks greater or equal to the one of the export. - Some scripts are not compulsory: if such a script is missing the relevant operations will be forbidden for instances of that OS. This makes it easier to distinguish between unsupported operations and no-op ones (if any). Input _____ Rather than using command line flags, as they do now, scripts will accept inputs from environment variables. We expect the following input values: OS_API_VERSION The version of the OS API that the following parameters comply with; this is used so that in the future we could have OSes supporting multiple versions and thus Ganeti send the proper version in this parameter INSTANCE_NAME Name of the instance acted on HYPERVISOR The hypervisor the instance should run on (e.g. 'xen-pvm', 'xen-hvm', 'kvm') DISK_COUNT The number of disks this instance will have NIC_COUNT The number of NICs this instance will have DISK__PATH Path to the Nth disk. DISK__ACCESS W if read/write, R if read only. OS scripts are not supposed to touch read-only disks, but will be passed them to know. DISK__FRONTEND_TYPE Type of the disk as seen by the instance. Can be 'scsi', 'ide', 'virtio' DISK__BACKEND_TYPE Type of the disk as seen from the node. Can be 'block', 'file:loop' or 'file:blktap' NIC__MAC Mac address for the Nth network interface NIC__IP Ip address for the Nth network interface, if available NIC__BRIDGE Node bridge the Nth network interface will be connected to NIC__FRONTEND_TYPE Type of the Nth NIC as seen by the instance. For example 'virtio', 'rtl8139', etc. DEBUG_LEVEL Whether more out should be produced, for debugging purposes. Currently the only valid values are 0 and 1. These are only the basic variables we are thinking of now, but more may come during the implementation and they will be documented in the :manpage:`ganeti-os-interface(7)` man page. All these variables will be available to all scripts. Some scripts will need a few more information to work. These will have per-script variables, such as for example: OLD_INSTANCE_NAME rename: the name the instance should be renamed from. EXPORT_DEVICE export: device to be exported, a snapshot of the actual device. The data must be exported to stdout. EXPORT_INDEX export: sequential number of the instance device targeted. IMPORT_DEVICE import: device to send the data to, part of the new instance. The data must be imported from stdin. IMPORT_INDEX import: sequential number of the instance device targeted. (Rationale for INSTANCE_NAME as an environment variable: the instance name is always needed and we could pass it on the command line. On the other hand, though, this would force scripts to both access the environment and parse the command line, so we'll move it for uniformity.) Output/Behaviour ________________ As discussed scripts should only send user-targeted information to stderr. The create and import scripts are supposed to format/initialise the given block devices and install the correct instance data. The export script is supposed to export instance data to stdout in a format understandable by the the import script. The data will be compressed by Ganeti, so no compression should be done. The rename script should only modify the instance's knowledge of what its name is. Other declarative style features ++++++++++++++++++++++++++++++++ Similar to Ganeti 1.2, OS specifications will need to provide a 'ganeti_api_version' containing list of numbers matching the version(s) of the API they implement. Ganeti itself will always be compatible with one version of the API and may maintain backwards compatibility if it's feasible to do so. The numbers are one-per-line, so an OS supporting both version 5 and version 20 will have a file containing two lines. This is different from Ganeti 1.2, which only supported one version number. In addition to that an OS will be able to declare that it does support only a subset of the Ganeti hypervisors, by declaring them in the 'hypervisors' file. Caveats/Notes +++++++++++++ We might want to have a "default" import/export behaviour that just dumps all disks and restores them. This can save work as most systems will just do this, while allowing flexibility for different systems. Environment variables are limited in size, but we expect that there will be enough space to store the information we need. If we discover that this is not the case we may want to go to a more complex API such as storing those information on the filesystem and providing the OS script with the path to a file where they are encoded in some format. Remote API changes ~~~~~~~~~~~~~~~~~~ The first Ganeti remote API (RAPI) was designed and deployed with the Ganeti 1.2.5 release. That version provide read-only access to the cluster state. Fully functional read-write API demands significant internal changes which will be implemented in version 2.0. We decided to go with implementing the Ganeti RAPI in a RESTful way, which is aligned with key features we looking. It is simple, stateless, scalable and extensible paradigm of API implementation. As transport it uses HTTP over SSL, and we are implementing it with JSON encoding, but in a way it possible to extend and provide any other one. Design ++++++ The Ganeti RAPI is implemented as independent daemon, running on the same node with the same permission level as Ganeti master daemon. Communication is done through the LUXI library to the master daemon. In order to keep communication asynchronous RAPI processes two types of client requests: - queries: server is able to answer immediately - job submission: some time is required for a useful response In the query case requested data send back to client in the HTTP response body. Typical examples of queries would be: list of nodes, instances, cluster info, etc. In the case of job submission, the client receive a job ID, the identifier which allows one to query the job progress in the job queue (see `Job Queue`_). Internally, each exported object has an version identifier, which is used as a state identifier in the HTTP header E-Tag field for requests/responses to avoid race conditions. Resource representation +++++++++++++++++++++++ The key difference of using REST instead of others API is that REST requires separation of services via resources with unique URIs. Each of them should have limited amount of state and support standard HTTP methods: GET, POST, DELETE, PUT. For example in Ganeti's case we can have a set of URI: - ``/{clustername}/instances`` - ``/{clustername}/instances/{instancename}`` - ``/{clustername}/instances/{instancename}/tag`` - ``/{clustername}/tag`` A GET request to ``/{clustername}/instances`` will return the list of instances, a POST to ``/{clustername}/instances`` should create a new instance, a DELETE ``/{clustername}/instances/{instancename}`` should delete the instance, a GET ``/{clustername}/tag`` should return get cluster tags. Each resource URI will have a version prefix. The resource IDs are to be determined. Internal encoding might be JSON, XML, or any other. The JSON encoding fits nicely in Ganeti RAPI needs. The client can request a specific representation via the Accept field in the HTTP header. REST uses HTTP as its transport and application protocol for resource access. The set of possible responses is a subset of standard HTTP responses. The statelessness model provides additional reliability and transparency to operations (e.g. only one request needs to be analyzed to understand the in-progress operation, not a sequence of multiple requests/responses). Security ++++++++ With the write functionality security becomes a much bigger an issue. The Ganeti RAPI uses basic HTTP authentication on top of an SSL-secured connection to grant access to an exported resource. The password is stored locally in an Apache-style ``.htpasswd`` file. Only one level of privileges is supported. Caveats +++++++ The model detailed above for job submission requires the client to poll periodically for updates to the job; an alternative would be to allow the client to request a callback, or a 'wait for updates' call. The callback model was not considered due to the following two issues: - callbacks would require a new model of allowed callback URLs, together with a method of managing these - callbacks only work when the client and the master are in the same security domain, and they fail in the other cases (e.g. when there is a firewall between the client and the RAPI daemon that only allows client-to-RAPI calls, which is usual in DMZ cases) The 'wait for updates' method is not suited to the HTTP protocol, where requests are supposed to be short-lived. Command line changes ~~~~~~~~~~~~~~~~~~~~ Ganeti 2.0 introduces several new features as well as new ways to handle instance resources like disks or network interfaces. This requires some noticeable changes in the way command line arguments are handled. - extend and modify command line syntax to support new features - ensure consistent patterns in command line arguments to reduce cognitive load The design changes that require these changes are, in no particular order: - flexible instance disk handling: support a variable number of disks with varying properties per instance, - flexible instance network interface handling: support a variable number of network interfaces with varying properties per instance - multiple hypervisors: multiple hypervisors can be active on the same cluster, each supporting different parameters, - support for device type CDROM (via ISO image) As such, there are several areas of Ganeti where the command line arguments will change: - Cluster configuration - cluster initialization - cluster default configuration - Instance configuration - handling of network cards for instances, - handling of disks for instances, - handling of CDROM devices and - handling of hypervisor specific options. There are several areas of Ganeti where the command line arguments will change: - Cluster configuration - cluster initialization - cluster default configuration - Instance configuration - handling of network cards for instances, - handling of disks for instances, - handling of CDROM devices and - handling of hypervisor specific options. Notes about device removal/addition +++++++++++++++++++++++++++++++++++ To avoid problems with device location changes (e.g. second network interface of the instance becoming the first or third and the like) the list of network/disk devices is treated as a stack, i.e. devices can only be added/removed at the end of the list of devices of each class (disk or network) for each instance. gnt-instance commands +++++++++++++++++++++ The commands for gnt-instance will be modified and extended to allow for the new functionality: - the add command will be extended to support the new device and hypervisor options, - the modify command continues to handle all modifications to instances, but will be extended with new arguments for handling devices. Network Device Options ++++++++++++++++++++++ The generic format of the network device option is: --net $DEVNUM[:$OPTION=$VALUE][,$OPTION=VALUE] :$DEVNUM: device number, unsigned integer, starting at 0, :$OPTION: device option, string, :$VALUE: device option value, string. Currently, the following device options will be defined (open to further changes): :mac: MAC address of the network interface, accepts either a valid MAC address or the string 'auto'. If 'auto' is specified, a new MAC address will be generated randomly. If the mac device option is not specified, the default value 'auto' is assumed. :bridge: network bridge the network interface is connected to. Accepts either a valid bridge name (the specified bridge must exist on the node(s)) as string or the string 'auto'. If 'auto' is specified, the default brigde is used. If the bridge option is not specified, the default value 'auto' is assumed. Disk Device Options +++++++++++++++++++ The generic format of the disk device option is: --disk $DEVNUM[:$OPTION=$VALUE][,$OPTION=VALUE] :$DEVNUM: device number, unsigned integer, starting at 0, :$OPTION: device option, string, :$VALUE: device option value, string. Currently, the following device options will be defined (open to further changes): :size: size of the disk device, either a positive number, specifying the disk size in mebibytes, or a number followed by a magnitude suffix (M for mebibytes, G for gibibytes). Also accepts the string 'auto' in which case the default disk size will be used. If the size option is not specified, 'auto' is assumed. This option is not valid for all disk layout types. :access: access mode of the disk device, a single letter, valid values are: - *w*: read/write access to the disk device or - *r*: read-only access to the disk device. If the access mode is not specified, the default mode of read/write access will be configured. :path: path to the image file for the disk device, string. No default exists. This option is not valid for all disk layout types. Adding devices ++++++++++++++ To add devices to an already existing instance, use the device type specific option to gnt-instance modify. Currently, there are two device type specific options supported: :--net: for network interface cards :--disk: for disk devices The syntax to the device specific options is similar to the generic device options, but instead of specifying a device number like for gnt-instance add, you specify the magic string add. The new device will always be appended at the end of the list of devices of this type for the specified instance, e.g. if the instance has disk devices 0,1 and 2, the newly added disk device will be disk device 3. Example: gnt-instance modify --net add:mac=auto test-instance Removing devices ++++++++++++++++ Removing devices from and instance is done via gnt-instance modify. The same device specific options as for adding instances are used. Instead of a device number and further device options, only the magic string remove is specified. It will always remove the last device in the list of devices of this type for the instance specified, e.g. if the instance has disk devices 0, 1, 2 and 3, the disk device number 3 will be removed. Example: gnt-instance modify --net remove test-instance Modifying devices +++++++++++++++++ Modifying devices is also done with device type specific options to the gnt-instance modify command. There are currently two device type options supported: :--net: for network interface cards :--disk: for disk devices The syntax to the device specific options is similar to the generic device options. The device number you specify identifies the device to be modified. Example:: gnt-instance modify --disk 2:access=r Hypervisor Options ++++++++++++++++++ Ganeti 2.0 will support more than one hypervisor. Different hypervisors have various options that only apply to a specific hypervisor. Those hypervisor specific options are treated specially via the ``--hypervisor`` option. The generic syntax of the hypervisor option is as follows:: --hypervisor $HYPERVISOR:$OPTION=$VALUE[,$OPTION=$VALUE] :$HYPERVISOR: symbolic name of the hypervisor to use, string, has to match the supported hypervisors. Example: xen-pvm :$OPTION: hypervisor option name, string :$VALUE: hypervisor option value, string The hypervisor option for an instance can be set on instance creation time via the ``gnt-instance add`` command. If the hypervisor for an instance is not specified upon instance creation, the default hypervisor will be used. Modifying hypervisor parameters +++++++++++++++++++++++++++++++ The hypervisor parameters of an existing instance can be modified using ``--hypervisor`` option of the ``gnt-instance modify`` command. However, the hypervisor type of an existing instance can not be changed, only the particular hypervisor specific option can be changed. Therefore, the format of the option parameters has been simplified to omit the hypervisor name and only contain the comma separated list of option-value pairs. Example:: gnt-instance modify --hypervisor cdrom=/srv/boot.iso,boot_order=cdrom:network test-instance gnt-cluster commands ++++++++++++++++++++ The command for gnt-cluster will be extended to allow setting and changing the default parameters of the cluster: - The init command will be extend to support the defaults option to set the cluster defaults upon cluster initialization. - The modify command will be added to modify the cluster parameters. It will support the --defaults option to change the cluster defaults. Cluster defaults The generic format of the cluster default setting option is: --defaults $OPTION=$VALUE[,$OPTION=$VALUE] :$OPTION: cluster default option, string, :$VALUE: cluster default option value, string. Currently, the following cluster default options are defined (open to further changes): :hypervisor: the default hypervisor to use for new instances, string. Must be a valid hypervisor known to and supported by the cluster. :disksize: the disksize for newly created instance disks, where applicable. Must be either a positive number, in which case the unit of megabyte is assumed, or a positive number followed by a supported magnitude symbol (M for megabyte or G for gigabyte). :bridge: the default network bridge to use for newly created instance network interfaces, string. Must be a valid bridge name of a bridge existing on the node(s). Hypervisor cluster defaults +++++++++++++++++++++++++++ The generic format of the hypervisor cluster wide default setting option is:: --hypervisor-defaults $HYPERVISOR:$OPTION=$VALUE[,$OPTION=$VALUE] :$HYPERVISOR: symbolic name of the hypervisor whose defaults you want to set, string :$OPTION: cluster default option, string, :$VALUE: cluster default option value, string. .. vim: set textwidth=72 : ganeti-2.9.3/doc/design-2.8.rst0000644000000000000000000000136312244641676016066 0ustar00rootroot00000000000000================= Ganeti 2.8 design ================= The following design documents have been implemented in Ganeti 2.8: - :doc:`design-reason-trail` - :doc:`design-autorepair` - :doc:`design-device-uuid-name` The following designs have been partially implemented in Ganeti 2.8: - :doc:`design-storagetypes` - :doc:`design-hroller` - :doc:`design-query-splitting`: everything except instance queries. - :doc:`design-partitioned`: "Constrained instance sizes" implemented. - :doc:`design-monitoring-agent`: implementation of all the core functionalities of the monitoring agent. Reason trail implemented as part of the work for the instance status collector. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-2.1.rst0000644000000000000000000013776112244641676016073 0ustar00rootroot00000000000000================= Ganeti 2.1 design ================= This document describes the major changes in Ganeti 2.1 compared to the 2.0 version. The 2.1 version will be a relatively small release. Its main aim is to avoid changing too much of the core code, while addressing issues and adding new features and improvements over 2.0, in a timely fashion. .. contents:: :depth: 4 Objective ========= Ganeti 2.1 will add features to help further automatization of cluster operations, further improve scalability to even bigger clusters, and make it easier to debug the Ganeti core. Detailed design =============== As for 2.0 we divide the 2.1 design into three areas: - core changes, which affect the master daemon/job queue/locking or all/most logical units - logical unit/feature changes - external interface changes (eg. command line, os api, hooks, ...) Core changes ------------ Storage units modelling ~~~~~~~~~~~~~~~~~~~~~~~ Currently, Ganeti has a good model of the block devices for instances (e.g. LVM logical volumes, files, DRBD devices, etc.) but none of the storage pools that are providing the space for these front-end devices. For example, there are hardcoded inter-node RPC calls for volume group listing, file storage creation/deletion, etc. The storage units framework will implement a generic handling for all kinds of storage backends: - LVM physical volumes - LVM volume groups - File-based storage directories - any other future storage method There will be a generic list of methods that each storage unit type will provide, like: - list of storage units of this type - check status of the storage unit Additionally, there will be specific methods for each method, for example: - enable/disable allocations on a specific PV - file storage directory creation/deletion - VG consistency fixing This will allow a much better modeling and unification of the various RPC calls related to backend storage pool in the future. Ganeti 2.1 is intended to add the basics of the framework, and not necessarilly move all the curent VG/FileBased operations to it. Note that while we model both LVM PVs and LVM VGs, the framework will **not** model any relationship between the different types. In other words, we don't model neither inheritances nor stacking, since this is too complex for our needs. While a ``vgreduce`` operation on a LVM VG could actually remove a PV from it, this will not be handled at the framework level, but at individual operation level. The goal is that this is a lightweight framework, for abstracting the different storage operation, and not for modelling the storage hierarchy. Locking improvements ~~~~~~~~~~~~~~~~~~~~ Current State and shortcomings ++++++++++++++++++++++++++++++ The class ``LockSet`` (see ``lib/locking.py``) is a container for one or many ``SharedLock`` instances. It provides an interface to add/remove locks and to acquire and subsequently release any number of those locks contained in it. Locks in a ``LockSet`` are always acquired in alphabetic order. Due to the way we're using locks for nodes and instances (the single cluster lock isn't affected by this issue) this can lead to long delays when acquiring locks if another operation tries to acquire multiple locks but has to wait for yet another operation. In the following demonstration we assume to have the instance locks ``inst1``, ``inst2``, ``inst3`` and ``inst4``. #. Operation A grabs lock for instance ``inst4``. #. Operation B wants to acquire all instance locks in alphabetic order, but it has to wait for ``inst4``. #. Operation C tries to lock ``inst1``, but it has to wait until Operation B (which is trying to acquire all locks) releases the lock again. #. Operation A finishes and releases lock on ``inst4``. Operation B can continue and eventually releases all locks. #. Operation C can get ``inst1`` lock and finishes. Technically there's no need for Operation C to wait for Operation A, and subsequently Operation B, to finish. Operation B can't continue until Operation A is done (it has to wait for ``inst4``), anyway. Proposed changes ++++++++++++++++ Non-blocking lock acquiring ^^^^^^^^^^^^^^^^^^^^^^^^^^^ Acquiring locks for OpCode execution is always done in blocking mode. They won't return until the lock has successfully been acquired (or an error occurred, although we won't cover that case here). ``SharedLock`` and ``LockSet`` must be able to be acquired in a non-blocking way. They must support a timeout and abort trying to acquire the lock(s) after the specified amount of time. Retry acquiring locks ^^^^^^^^^^^^^^^^^^^^^ To prevent other operations from waiting for a long time, such as described in the demonstration before, ``LockSet`` must not keep locks for a prolonged period of time when trying to acquire two or more locks. Instead it should, with an increasing timeout for acquiring all locks, release all locks again and sleep some time if it fails to acquire all requested locks. A good timeout value needs to be determined. In any case should ``LockSet`` proceed to acquire locks in blocking mode after a few (unsuccessful) attempts to acquire all requested locks. One proposal for the timeout is to use ``2**tries`` seconds, where ``tries`` is the number of unsuccessful tries. In the demonstration before this would allow Operation C to continue after Operation B unsuccessfully tried to acquire all locks and released all acquired locks (``inst1``, ``inst2`` and ``inst3``) again. Other solutions discussed +++++++++++++++++++++++++ There was also some discussion on going one step further and extend the job queue (see ``lib/jqueue.py``) to select the next task for a worker depending on whether it can acquire the necessary locks. While this may reduce the number of necessary worker threads and/or increase throughput on large clusters with many jobs, it also brings many potential problems, such as contention and increased memory usage, with it. As this would be an extension of the changes proposed before it could be implemented at a later point in time, but we decided to stay with the simpler solution for now. Implementation details ++++++++++++++++++++++ ``SharedLock`` redesign ^^^^^^^^^^^^^^^^^^^^^^^ The current design of ``SharedLock`` is not good for supporting timeouts when acquiring a lock and there are also minor fairness issues in it. We plan to address both with a redesign. A proof of concept implementation was written and resulted in significantly simpler code. Currently ``SharedLock`` uses two separate queues for shared and exclusive acquires and waiters get to run in turns. This means if an exclusive acquire is released, the lock will allow shared waiters to run and vice versa. Although it's still fair in the end there is a slight bias towards shared waiters in the current implementation. The same implementation with two shared queues can not support timeouts without adding a lot of complexity. Our proposed redesign changes ``SharedLock`` to have only one single queue. There will be one condition (see Condition_ for a note about performance) in the queue per exclusive acquire and two for all shared acquires (see below for an explanation). The maximum queue length will always be ``2 + (number of exclusive acquires waiting)``. The number of queue entries for shared acquires can vary from 0 to 2. The two conditions for shared acquires are a bit special. They will be used in turn. When the lock is instantiated, no conditions are in the queue. As soon as the first shared acquire arrives (and there are holder(s) or waiting acquires; see Acquire_), the active condition is added to the queue. Until it becomes the topmost condition in the queue and has been notified, any shared acquire is added to this active condition. When the active condition is notified, the conditions are swapped and further shared acquires are added to the previously inactive condition (which has now become the active condition). After all waiters on the previously active (now inactive) and now notified condition received the notification, it is removed from the queue of pending acquires. This means shared acquires will skip any exclusive acquire in the queue. We believe it's better to improve parallelization on operations only asking for shared (or read-only) locks. Exclusive operations holding the same lock can not be parallelized. Acquire ******* For exclusive acquires a new condition is created and appended to the queue. Shared acquires are added to the active condition for shared acquires and if the condition is not yet on the queue, it's appended. The next step is to wait for our condition to be on the top of the queue (to guarantee fairness). If the timeout expired, we return to the caller without acquiring the lock. On every notification we check whether the lock has been deleted, in which case an error is returned to the caller. The lock can be acquired if we're on top of the queue (there is no one else ahead of us). For an exclusive acquire, there must not be other exclusive or shared holders. For a shared acquire, there must not be an exclusive holder. If these conditions are all true, the lock is acquired and we return to the caller. In any other case we wait again on the condition. If it was the last waiter on a condition, the condition is removed from the queue. Optimization: There's no need to touch the queue if there are no pending acquires and no current holders. The caller can have the lock immediately. .. digraph:: "design-2.1-lock-acquire" graph[fontsize=8, fontname="Helvetica"] node[fontsize=8, fontname="Helvetica", width="0", height="0"] edge[fontsize=8, fontname="Helvetica"] /* Actions */ abort[label="Abort\n(couldn't acquire)"] acquire[label="Acquire lock"] add_to_queue[label="Add condition to queue"] wait[label="Wait for notification"] remove_from_queue[label="Remove from queue"] /* Conditions */ alone[label="Empty queue\nand can acquire?", shape=diamond] have_timeout[label="Do I have\ntimeout?", shape=diamond] top_of_queue_and_can_acquire[ label="On top of queue and\ncan acquire lock?", shape=diamond, ] /* Lines */ alone->acquire[label="Yes"] alone->add_to_queue[label="No"] have_timeout->abort[label="Yes"] have_timeout->wait[label="No"] top_of_queue_and_can_acquire->acquire[label="Yes"] top_of_queue_and_can_acquire->have_timeout[label="No"] add_to_queue->wait wait->top_of_queue_and_can_acquire acquire->remove_from_queue Release ******* First the lock removes the caller from the internal owner list. If there are pending acquires in the queue, the first (the oldest) condition is notified. If the first condition was the active condition for shared acquires, the inactive condition will be made active. This ensures fairness with exclusive locks by forcing consecutive shared acquires to wait in the queue. .. digraph:: "design-2.1-lock-release" graph[fontsize=8, fontname="Helvetica"] node[fontsize=8, fontname="Helvetica", width="0", height="0"] edge[fontsize=8, fontname="Helvetica"] /* Actions */ remove_from_owners[label="Remove from owner list"] notify[label="Notify topmost"] swap_shared[label="Swap shared conditions"] success[label="Success"] /* Conditions */ have_pending[label="Any pending\nacquires?", shape=diamond] was_active_queue[ label="Was active condition\nfor shared acquires?", shape=diamond, ] /* Lines */ remove_from_owners->have_pending have_pending->notify[label="Yes"] have_pending->success[label="No"] notify->was_active_queue was_active_queue->swap_shared[label="Yes"] was_active_queue->success[label="No"] swap_shared->success Delete ****** The caller must either hold the lock in exclusive mode already or the lock must be acquired in exclusive mode. Trying to delete a lock while it's held in shared mode must fail. After ensuring the lock is held in exclusive mode, the lock will mark itself as deleted and continue to notify all pending acquires. They will wake up, notice the deleted lock and return an error to the caller. Condition ^^^^^^^^^ Note: This is not necessary for the locking changes above, but it may be a good optimization (pending performance tests). The existing locking code in Ganeti 2.0 uses Python's built-in ``threading.Condition`` class. Unfortunately ``Condition`` implements timeouts by sleeping 1ms to 20ms between tries to acquire the condition lock in non-blocking mode. This requires unnecessary context switches and contention on the CPython GIL (Global Interpreter Lock). By using POSIX pipes (see ``pipe(2)``) we can use the operating system's support for timeouts on file descriptors (see ``select(2)``). A custom condition class will have to be written for this. On instantiation the class creates a pipe. After each notification the previous pipe is abandoned and re-created (technically the old pipe needs to stay around until all notifications have been delivered). All waiting clients of the condition use ``select(2)`` or ``poll(2)`` to wait for notifications, optionally with a timeout. A notification will be signalled to the waiting clients by closing the pipe. If the pipe wasn't closed during the timeout, the waiting function returns to its caller nonetheless. Node daemon availability ~~~~~~~~~~~~~~~~~~~~~~~~ Current State and shortcomings ++++++++++++++++++++++++++++++ Currently, when a Ganeti node suffers serious system disk damage, the migration/failover of an instance may not correctly shutdown the virtual machine on the broken node causing instances duplication. The ``gnt-node powercycle`` command can be used to force a node reboot and thus to avoid duplicated instances. This command relies on node daemon availability, though, and thus can fail if the node daemon has some pages swapped out of ram, for example. Proposed changes ++++++++++++++++ The proposed solution forces node daemon to run exclusively in RAM. It uses python ctypes to to call ``mlockall(MCL_CURRENT | MCL_FUTURE)`` on the node daemon process and all its children. In addition another log handler has been implemented for node daemon to redirect to ``/dev/console`` messages that cannot be written on the logfile. With these changes node daemon can successfully run basic tasks such as a powercycle request even when the system disk is heavily damaged and reading/writing to disk fails constantly. New Features ------------ Automated Ganeti Cluster Merger ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Current situation +++++++++++++++++ Currently there's no easy way to merge two or more clusters together. But in order to optimize resources this is a needed missing piece. The goal of this design doc is to come up with a easy to use solution which allows you to merge two or more cluster together. Initial contact +++++++++++++++ As the design of Ganeti is based on an autonomous system, Ganeti by itself has no way to reach nodes outside of its cluster. To overcome this situation we're required to prepare the cluster before we can go ahead with the actual merge: We've to replace at least the ssh keys on the affected nodes before we can do any operation within ``gnt-`` commands. To make this a automated process we'll ask the user to provide us with the root password of every cluster we've to merge. We use the password to grab the current ``id_dsa`` key and then rely on that ssh key for any further communication to be made until the cluster is fully merged. Cluster merge +++++++++++++ After initial contact we do the cluster merge: 1. Grab the list of nodes 2. On all nodes add our own ``id_dsa.pub`` key to ``authorized_keys`` 3. Stop all instances running on the merging cluster 4. Disable ``ganeti-watcher`` as it tries to restart Ganeti daemons 5. Stop all Ganeti daemons on all merging nodes 6. Grab the ``config.data`` from the master of the merging cluster 7. Stop local ``ganeti-masterd`` 8. Merge the config: 1. Open our own cluster ``config.data`` 2. Open cluster ``config.data`` of the merging cluster 3. Grab all nodes of the merging cluster 4. Set ``master_candidate`` to false on all merging nodes 5. Add the nodes to our own cluster ``config.data`` 6. Grab all the instances on the merging cluster 7. Adjust the port if the instance has drbd layout: 1. In ``logical_id`` (index 2) 2. In ``physical_id`` (index 1 and 3) 8. Add the instances to our own cluster ``config.data`` 9. Start ``ganeti-masterd`` with ``--no-voting`` ``--yes-do-it`` 10. ``gnt-node add --readd`` on all merging nodes 11. ``gnt-cluster redist-conf`` 12. Restart ``ganeti-masterd`` normally 13. Enable ``ganeti-watcher`` again 14. Start all merging instances again Rollback ++++++++ Until we actually (re)add any nodes we can abort and rollback the merge at any point. After merging the config, though, we've to get the backup copy of ``config.data`` (from another master candidate node). And for security reasons it's a good idea to undo ``id_dsa.pub`` distribution by going on every affected node and remove the ``id_dsa.pub`` key again. Also we've to keep in mind, that we've to start the Ganeti daemons and starting up the instances again. Verification ++++++++++++ Last but not least we should verify that the merge was successful. Therefore we run ``gnt-cluster verify``, which ensures that the cluster overall is in a healthy state. Additional it's also possible to compare the list of instances/nodes with a list made prior to the upgrade to make sure we didn't lose any data/instance/node. Appendix ++++++++ cluster-merge.py ^^^^^^^^^^^^^^^^ Used to merge the cluster config. This is a POC and might differ from actual production code. :: #!/usr/bin/python import sys from ganeti import config from ganeti import constants c_mine = config.ConfigWriter(offline=True) c_other = config.ConfigWriter(sys.argv[1]) fake_id = 0 for node in c_other.GetNodeList(): node_info = c_other.GetNodeInfo(node) node_info.master_candidate = False c_mine.AddNode(node_info, str(fake_id)) fake_id += 1 for instance in c_other.GetInstanceList(): instance_info = c_other.GetInstanceInfo(instance) for dsk in instance_info.disks: if dsk.dev_type in constants.LDS_DRBD: port = c_mine.AllocatePort() logical_id = list(dsk.logical_id) logical_id[2] = port dsk.logical_id = tuple(logical_id) physical_id = list(dsk.physical_id) physical_id[1] = physical_id[3] = port dsk.physical_id = tuple(physical_id) c_mine.AddInstance(instance_info, str(fake_id)) fake_id += 1 Feature changes --------------- Ganeti Confd ~~~~~~~~~~~~ Current State and shortcomings ++++++++++++++++++++++++++++++ In Ganeti 2.0 all nodes are equal, but some are more equal than others. In particular they are divided between "master", "master candidates" and "normal". (Moreover they can be offline or drained, but this is not important for the current discussion). In general the whole configuration is only replicated to master candidates, and some partial information is spread to all nodes via ssconf. This change was done so that the most frequent Ganeti operations didn't need to contact all nodes, and so clusters could become bigger. If we want more information to be available on all nodes, we need to add more ssconf values, which is counter-balancing the change, or to talk with the master node, which is not designed to happen now, and requires its availability. Information such as the instance->primary_node mapping will be needed on all nodes, and we also want to make sure services external to the cluster can query this information as well. This information must be available at all times, so we can't query it through RAPI, which would be a single point of failure, as it's only available on the master. Proposed changes ++++++++++++++++ In order to allow fast and highly available access read-only to some configuration values, we'll create a new ganeti-confd daemon, which will run on master candidates. This daemon will talk via UDP, and authenticate messages using HMAC with a cluster-wide shared key. This key will be generated at cluster init time, and stored on the clusters alongside the ganeti SSL keys, and readable only by root. An interested client can query a value by making a request to a subset of the cluster master candidates. It will then wait to get a few responses, and use the one with the highest configuration serial number. Since the configuration serial number is increased each time the ganeti config is updated, and the serial number is included in all answers, this can be used to make sure to use the most recent answer, in case some master candidates are stale or in the middle of a configuration update. In order to prevent replay attacks queries will contain the current unix timestamp according to the client, and the server will verify that its timestamp is in the same 5 minutes range (this requires synchronized clocks, which is a good idea anyway). Queries will also contain a "salt" which they expect the answers to be sent with, and clients are supposed to accept only answers which contain salt generated by them. The configuration daemon will be able to answer simple queries such as: - master candidates list - master node - offline nodes - instance list - instance primary nodes Wire protocol ^^^^^^^^^^^^^ A confd query will look like this, on the wire:: plj0{ "msg": "{\"type\": 1, \"rsalt\": \"9aa6ce92-8336-11de-af38-001d093e835f\", \"protocol\": 1, \"query\": \"node1.example.com\"}\n", "salt": "1249637704", "hmac": "4a4139b2c3c5921f7e439469a0a45ad200aead0f" } ``plj0`` is a fourcc that details the message content. It stands for plain json 0, and can be changed as we move on to different type of protocols (for example protocol buffers, or encrypted json). What follows is a json encoded string, with the following fields: - ``msg`` contains a JSON-encoded query, its fields are: - ``protocol``, integer, is the confd protocol version (initially just ``constants.CONFD_PROTOCOL_VERSION``, with a value of 1) - ``type``, integer, is the query type. For example "node role by name" or "node primary ip by instance ip". Constants will be provided for the actual available query types - ``query`` is a multi-type field (depending on the ``type`` field): - it can be missing, when the request is fully determined by the ``type`` field - it can contain a string which denotes the search key: for example an IP, or a node name - it can contain a dictionary, in which case the actual details vary further per request type - ``rsalt``, string, is the required response salt; the client must use it to recognize which answer it's getting. - ``salt`` must be the current unix timestamp, according to the client; servers should refuse messages which have a wrong timing, according to their configuration and clock - ``hmac`` is an hmac signature of salt+msg, with the cluster hmac key If an answer comes back (which is optional, since confd works over UDP) it will be in this format:: plj0{ "msg": "{\"status\": 0, \"answer\": 0, \"serial\": 42, \"protocol\": 1}\n", "salt": "9aa6ce92-8336-11de-af38-001d093e835f", "hmac": "aaeccc0dff9328fdf7967cb600b6a80a6a9332af" } Where: - ``plj0`` the message type magic fourcc, as discussed above - ``msg`` contains a JSON-encoded answer, its fields are: - ``protocol``, integer, is the confd protocol version (initially just constants.CONFD_PROTOCOL_VERSION, with a value of 1) - ``status``, integer, is the error code; initially just ``0`` for 'ok' or ``1`` for 'error' (in which case answer contains an error detail, rather than an answer), but in the future it may be expanded to have more meanings (e.g. ``2`` if the answer is compressed) - ``answer``, is the actual answer; its type and meaning is query specific: for example for "node primary ip by instance ip" queries it will be a string containing an IP address, for "node role by name" queries it will be an integer which encodes the role (master, candidate, drained, offline) according to constants - ``salt`` is the requested salt from the query; a client can use it to recognize what query the answer is answering. - ``hmac`` is an hmac signature of salt+msg, with the cluster hmac key Redistribute Config ~~~~~~~~~~~~~~~~~~~ Current State and shortcomings ++++++++++++++++++++++++++++++ Currently LUClusterRedistConf triggers a copy of the updated configuration file to all master candidates and of the ssconf files to all nodes. There are other files which are maintained manually but which are important to keep in sync. These are: - rapi SSL key certificate file (rapi.pem) (on master candidates) - rapi user/password file rapi_users (on master candidates) Furthermore there are some files which are hypervisor specific but we may want to keep in sync: - the xen-hvm hypervisor uses one shared file for all vnc passwords, and copies the file once, during node add. This design is subject to revision to be able to have different passwords for different groups of instances via the use of hypervisor parameters, and to allow xen-hvm and kvm to use an equal system to provide password-protected vnc sessions. In general, though, it would be useful if the vnc password files were copied as well, to avoid unwanted vnc password changes on instance failover/migrate. Optionally the admin may want to also ship files such as the global xend.conf file, and the network scripts to all nodes. Proposed changes ++++++++++++++++ RedistributeConfig will be changed to copy also the rapi files, and to call every enabled hypervisor asking for a list of additional files to copy. Users will have the possibility to populate a file containing a list of files to be distributed; this file will be propagated as well. Such solution is really simple to implement and it's easily usable by scripts. This code will be also shared (via tasklets or by other means, if tasklets are not ready for 2.1) with the AddNode and SetNodeParams LUs (so that the relevant files will be automatically shipped to new master candidates as they are set). VNC Console Password ~~~~~~~~~~~~~~~~~~~~ Current State and shortcomings ++++++++++++++++++++++++++++++ Currently just the xen-hvm hypervisor supports setting a password to connect the the instances' VNC console, and has one common password stored in a file. This doesn't allow different passwords for different instances/groups of instances, and makes it necessary to remember to copy the file around the cluster when the password changes. Proposed changes ++++++++++++++++ We'll change the VNC password file to a vnc_password_file hypervisor parameter. This way it can have a cluster default, but also a different value for each instance. The VNC enabled hypervisors (xen and kvm) will publish all the password files in use through the cluster so that a redistribute-config will ship them to all nodes (see the Redistribute Config proposed changes above). The current VNC_PASSWORD_FILE constant will be removed, but its value will be used as the default HV_VNC_PASSWORD_FILE value, thus retaining backwards compatibility with 2.0. The code to export the list of VNC password files from the hypervisors to RedistributeConfig will be shared between the KVM and xen-hvm hypervisors. Disk/Net parameters ~~~~~~~~~~~~~~~~~~~ Current State and shortcomings ++++++++++++++++++++++++++++++ Currently disks and network interfaces have a few tweakable options and all the rest is left to a default we chose. We're finding that we need more and more to tweak some of these parameters, for example to disable barriers for DRBD devices, or allow striping for the LVM volumes. Moreover for many of these parameters it will be nice to have cluster-wide defaults, and then be able to change them per disk/interface. Proposed changes ++++++++++++++++ We will add new cluster level diskparams and netparams, which will contain all the tweakable parameters. All values which have a sensible cluster-wide default will go into this new structure while parameters which have unique values will not. Example of network parameters: - mode: bridge/route - link: for mode "bridge" the bridge to connect to, for mode route it can contain the routing table, or the destination interface Example of disk parameters: - stripe: lvm stripes - stripe_size: lvm stripe size - meta_flushes: drbd, enable/disable metadata "barriers" - data_flushes: drbd, enable/disable data "barriers" Some parameters are bound to be disk-type specific (drbd, vs lvm, vs files) or hypervisor specific (nic models for example), but for now they will all live in the same structure. Each component is supposed to validate only the parameters it knows about, and ganeti itself will make sure that no "globally unknown" parameters are added, and that no parameters have overridden meanings for different components. The parameters will be kept, as for the BEPARAMS into a "default" category, which will allow us to expand on by creating instance "classes" in the future. Instance classes is not a feature we plan implementing in 2.1, though. Global hypervisor parameters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Current State and shortcomings ++++++++++++++++++++++++++++++ Currently all hypervisor parameters are modifiable both globally (cluster level) and at instance level. However, there is no other framework to held hypervisor-specific parameters, so if we want to add a new class of hypervisor parameters that only makes sense on a global level, we have to change the hvparams framework. Proposed changes ++++++++++++++++ We add a new (global, not per-hypervisor) list of parameters which are not changeable on a per-instance level. The create, modify and query instance operations are changed to not allow/show these parameters. Furthermore, to allow transition of parameters to the global list, and to allow cleanup of inadverdently-customised parameters, the ``UpgradeConfig()`` method of instances will drop any such parameters from their list of hvparams, such that a restart of the master daemon is all that is needed for cleaning these up. Also, the framework is simple enough that if we need to replicate it at beparams level we can do so easily. Non bridged instances support ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Current State and shortcomings ++++++++++++++++++++++++++++++ Currently each instance NIC must be connected to a bridge, and if the bridge is not specified the default cluster one is used. This makes it impossible to use the vif-route xen network scripts, or other alternative mechanisms that don't need a bridge to work. Proposed changes ++++++++++++++++ The new "mode" network parameter will distinguish between bridged interfaces and routed ones. When mode is "bridge" the "link" parameter will contain the bridge the instance should be connected to, effectively making things as today. The value has been migrated from a nic field to a parameter to allow for an easier manipulation of the cluster default. When mode is "route" the ip field of the interface will become mandatory, to allow for a route to be set. In the future we may want also to accept multiple IPs or IP/mask values for this purpose. We will evaluate possible meanings of the link parameter to signify a routing table to be used, which would allow for insulation between instance groups (as today happens for different bridges). For now we won't add a parameter to specify which network script gets called for which instance, so in a mixed cluster the network script must be able to handle both cases. The default kvm vif script will be changed to do so. (Xen doesn't have a ganeti provided script, so nothing will be done for that hypervisor) Introducing persistent UUIDs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Current state and shortcomings ++++++++++++++++++++++++++++++ Some objects in the Ganeti configurations are tracked by their name while also supporting renames. This creates an extra difficulty, because neither Ganeti nor external management tools can then track the actual entity, and due to the name change it behaves like a new one. Proposed changes part 1 +++++++++++++++++++++++ We will change Ganeti to use UUIDs for entity tracking, but in a staggered way. In 2.1, we will simply add an “uuid†attribute to each of the instances, nodes and cluster itself. This will be reported on instance creation for nodes, and on node adds for the nodes. It will be of course avaiblable for querying via the OpNodeQuery/Instance and cluster information, and via RAPI as well. Note that Ganeti will not provide any way to change this attribute. Upgrading from Ganeti 2.0 will automatically add an ‘uuid’ attribute to all entities missing it. Proposed changes part 2 +++++++++++++++++++++++ In the next release (e.g. 2.2), the tracking of objects will change from the name to the UUID internally, and externally Ganeti will accept both forms of identification; e.g. an RAPI call would be made either against ``/2/instances/foo.bar`` or against ``/2/instances/bb3b2e42…``. Since an FQDN must have at least a dot, and dots are not valid characters in UUIDs, we will not have namespace issues. Another change here is that node identification (during cluster operations/queries like master startup, “am I the master?†and similar) could be done via UUIDs which is more stable than the current hostname-based scheme. Internal tracking refers to the way the configuration is stored; a DRBD disk of an instance refers to the node name (so that IPs can be changed easily), but this is still a problem for name changes; thus these will be changed to point to the node UUID to ease renames. The advantages of this change (after the second round of changes), is that node rename becomes trivial, whereas today node rename would require a complete lock of all instances. Automated disk repairs infrastructure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Replacing defective disks in an automated fashion is quite difficult with the current version of Ganeti. These changes will introduce additional functionality and interfaces to simplify automating disk replacements on a Ganeti node. Fix node volume group +++++++++++++++++++++ This is the most difficult addition, as it can lead to dataloss if it's not properly safeguarded. The operation must be done only when all the other nodes that have instances in common with the target node are fine, i.e. this is the only node with problems, and also we have to double-check that all instances on this node have at least a good copy of the data. This might mean that we have to enhance the GetMirrorStatus calls, and introduce and a smarter version that can tell us more about the status of an instance. Stop allocation on a given PV +++++++++++++++++++++++++++++ This is somewhat simple. First we need a "list PVs" opcode (and its associated logical unit) and then a set PV status opcode/LU. These in combination should allow both checking and changing the disk/PV status. Instance disk status ++++++++++++++++++++ This new opcode or opcode change must list the instance-disk-index and node combinations of the instance together with their status. This will allow determining what part of the instance is broken (if any). Repair instance +++++++++++++++ This new opcode/LU/RAPI call will run ``replace-disks -p`` as needed, in order to fix the instance status. It only affects primary instances; secondaries can just be moved away. Migrate node ++++++++++++ This new opcode/LU/RAPI call will take over the current ``gnt-node migrate`` code and run migrate for all instances on the node. Evacuate node ++++++++++++++ This new opcode/LU/RAPI call will take over the current ``gnt-node evacuate`` code and run replace-secondary with an iallocator script for all instances on the node. User-id pool ~~~~~~~~~~~~ In order to allow running different processes under unique user-ids on a node, we introduce the user-id pool concept. The user-id pool is a cluster-wide configuration parameter. It is a list of user-ids and/or user-id ranges that are reserved for running Ganeti processes (including KVM instances). The code guarantees that on a given node a given user-id is only handed out if there is no other process running with that user-id. Please note, that this can only be guaranteed if all processes in the system - that run under a user-id belonging to the pool - are started by reserving a user-id first. That can be accomplished either by using the RequestUnusedUid() function to get an unused user-id or by implementing the same locking mechanism. Implementation ++++++++++++++ The functions that are specific to the user-id pool feature are located in a separate module: ``lib/uidpool.py``. Storage ^^^^^^^ The user-id pool is a single cluster parameter. It is stored in the *Cluster* object under the ``uid_pool`` name as a list of integer tuples. These tuples represent the boundaries of user-id ranges. For single user-ids, the boundaries are equal. The internal user-id pool representation is converted into a string: a newline separated list of user-ids or user-id ranges. This string representation is distributed to all the nodes via the *ssconf* mechanism. This means that the user-id pool can be accessed in a read-only way on any node without consulting the master node or master candidate nodes. Initial value ^^^^^^^^^^^^^ The value of the user-id pool cluster parameter can be initialized at cluster initialization time using the ``gnt-cluster init --uid-pool ...`` command. As there is no sensible default value for the user-id pool parameter, it is initialized to an empty list if no ``--uid-pool`` option is supplied at cluster init time. If the user-id pool is empty, the user-id pool feature is considered to be disabled. Manipulation ^^^^^^^^^^^^ The user-id pool cluster parameter can be modified from the command-line with the following commands: - ``gnt-cluster modify --uid-pool `` - ``gnt-cluster modify --add-uids `` - ``gnt-cluster modify --remove-uids `` The ``--uid-pool`` option overwrites the current setting with the supplied ````, while ``--add-uids``/``--remove-uids`` adds/removes the listed uids or uid-ranges from the pool. The ```` should be a comma-separated list of user-ids or user-id ranges. A range should be defined by a lower and a higher boundary. The boundaries should be separated with a dash. The boundaries are inclusive. The ```` is parsed into the internal representation, sanity-checked and stored in the ``uid_pool`` attribute of the *Cluster* object. It is also immediately converted into a string (formatted in the input format) and distributed to all nodes via the *ssconf* mechanism. Inspection ^^^^^^^^^^ The current value of the user-id pool cluster parameter is printed by the ``gnt-cluster info`` command. The output format is accepted by the ``gnt-cluster modify --uid-pool`` command. Locking ^^^^^^^ The ``uidpool.py`` module provides a function (``RequestUnusedUid``) for requesting an unused user-id from the pool. This will try to find a random user-id that is not currently in use. The algorithm is the following: 1) Randomize the list of user-ids in the user-id pool 2) Iterate over this randomized UID list 3) Create a lock file (it doesn't matter if it already exists) 4) Acquire an exclusive POSIX lock on the file, to provide mutual exclusion for the following non-atomic operations 5) Check if there is a process in the system with the given UID 6) If there isn't, return the UID, otherwise unlock the file and continue the iteration over the user-ids The user can than start a new process with this user-id. Once a process is successfully started, the exclusive POSIX lock can be released, but the lock file will remain in the filesystem. The presence of such a lock file means that the given user-id is most probably in use. The lack of a uid lock file does not guarantee that there are no processes with that user-id. After acquiring the exclusive POSIX lock, ``RequestUnusedUid`` always performs a check to see if there is a process running with the given uid. A user-id can be returned to the pool, by calling the ``ReleaseUid`` function. This will remove the corresponding lock file. Note, that it doesn't check if there is any process still running with that user-id. The removal of the lock file only means that there are most probably no processes with the given user-id. This helps in speeding up the process of finding a user-id that is guaranteed to be unused. There is a convenience function, called ``ExecWithUnusedUid`` that wraps the execution of a function (or any callable) that requires a unique user-id. ``ExecWithUnusedUid`` takes care of requesting an unused user-id and unlocking the lock file. It also automatically returns the user-id to the pool if the callable raises an exception. Code examples +++++++++++++ Requesting a user-id from the pool: :: from ganeti import ssconf from ganeti import uidpool # Get list of all user-ids in the uid-pool from ssconf ss = ssconf.SimpleStore() uid_pool = uidpool.ParseUidPool(ss.GetUidPool(), separator="\n") all_uids = set(uidpool.ExpandUidPool(uid_pool)) uid = uidpool.RequestUnusedUid(all_uids) try: # Once the process is started, we can release the file lock uid.Unlock() except ..., err: # Return the UID to the pool uidpool.ReleaseUid(uid) Releasing a user-id: :: from ganeti import uidpool uid = uidpool.ReleaseUid(uid) External interface changes -------------------------- OS API ~~~~~~ The OS API of Ganeti 2.0 has been built with extensibility in mind. Since we pass everything as environment variables it's a lot easier to send new information to the OSes without breaking retrocompatibility. This section of the design outlines the proposed extensions to the API and their implementation. API Version Compatibility Handling ++++++++++++++++++++++++++++++++++ In 2.1 there will be a new OS API version (eg. 15), which should be mostly compatible with api 10, except for some new added variables. Since it's easy not to pass some variables we'll be able to handle Ganeti 2.0 OSes by just filtering out the newly added piece of information. We will still encourage OSes to declare support for the new API after checking that the new variables don't provide any conflict for them, and we will drop api 10 support after ganeti 2.1 has released. New Environment variables +++++++++++++++++++++++++ Some variables have never been added to the OS api but would definitely be useful for the OSes. We plan to add an INSTANCE_HYPERVISOR variable to allow the OS to make changes relevant to the virtualization the instance is going to use. Since this field is immutable for each instance, the os can tight the install without caring of making sure the instance can run under any virtualization technology. We also want the OS to know the particular hypervisor parameters, to be able to customize the install even more. Since the parameters can change, though, we will pass them only as an "FYI": if an OS ties some instance functionality to the value of a particular hypervisor parameter manual changes or a reinstall may be needed to adapt the instance to the new environment. This is not a regression as of today, because even if the OSes are left blind about this information, sometimes they still need to make compromises and cannot satisfy all possible parameter values. OS Variants +++++++++++ Currently we are assisting to some degree of "os proliferation" just to change a simple installation behavior. This means that the same OS gets installed on the cluster multiple times, with different names, to customize just one installation behavior. Usually such OSes try to share as much as possible through symlinks, but this still causes complications on the user side, especially when multiple parameters must be cross-matched. For example today if you want to install debian etch, lenny or squeeze you probably need to install the debootstrap OS multiple times, changing its configuration file, and calling it debootstrap-etch, debootstrap-lenny or debootstrap-squeeze. Furthermore if you have for example a "server" and a "development" environment which installs different packages/configuration files and must be available for all installs you'll probably end up with deboostrap-etch-server, debootstrap-etch-dev, debootrap-lenny-server, debootstrap-lenny-dev, etc. Crossing more than two parameters quickly becomes not manageable. In order to avoid this we plan to make OSes more customizable, by allowing each OS to declare a list of variants which can be used to customize it. The variants list is mandatory and must be written, one variant per line, in the new "variants.list" file inside the main os dir. At least one supported variant must be supported. When choosing the OS exactly one variant will have to be specified, and will be encoded in the os name as +. As for today it will be possible to change an instance's OS at creation or install time. The 2.1 OS list will be the combination of each OS, plus its supported variants. This will cause the name name proliferation to remain, but at least the internal OS code will be simplified to just parsing the passed variant, without the need for symlinks or code duplication. Also we expect the OSes to declare only "interesting" variants, but to accept some non-declared ones which a user will be able to pass in by overriding the checks ganeti does. This will be useful for allowing some variations to be used without polluting the OS list (per-OS documentation should list all supported variants). If a variant which is not internally supported is forced through, the OS scripts should abort. In the future (post 2.1) we may want to move to full fledged parameters all orthogonal to each other (for example "architecture" (i386, amd64), "suite" (lenny, squeeze, ...), etc). (As opposed to the variant, which is a single parameter, and you need a different variant for all the set of combinations you want to support). In this case we envision the variants to be moved inside of Ganeti and be associated with lists parameter->values associations, which will then be passed to the OS. IAllocator changes ~~~~~~~~~~~~~~~~~~ Current State and shortcomings ++++++++++++++++++++++++++++++ The iallocator interface allows creation of instances without manually specifying nodes, but instead by specifying plugins which will do the required computations and produce a valid node list. However, the interface is quite akward to use: - one cannot set a 'default' iallocator script - one cannot use it to easily test if allocation would succeed - some new functionality, such as rebalancing clusters and calculating capacity estimates is needed Proposed changes ++++++++++++++++ There are two area of improvements proposed: - improving the use of the current interface - extending the IAllocator API to cover more automation Default iallocator names ^^^^^^^^^^^^^^^^^^^^^^^^ The cluster will hold, for each type of iallocator, a (possibly empty) list of modules that will be used automatically. If the list is empty, the behaviour will remain the same. If the list has one entry, then ganeti will behave as if '--iallocator' was specifyed on the command line. I.e. use this allocator by default. If the user however passed nodes, those will be used in preference. If the list has multiple entries, they will be tried in order until one gives a successful answer. Dry-run allocation ^^^^^^^^^^^^^^^^^^ The create instance LU will get a new 'dry-run' option that will just simulate the placement, and return the chosen node-lists after running all the usual checks. Cluster balancing ^^^^^^^^^^^^^^^^^ Instance add/removals/moves can create a situation where load on the nodes is not spread equally. For this, a new iallocator mode will be implemented called ``balance`` in which the plugin, given the current cluster state, and a maximum number of operations, will need to compute the instance relocations needed in order to achieve a "better" (for whatever the script believes it's better) cluster. Cluster capacity calculation ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In this mode, called ``capacity``, given an instance specification and the current cluster state (similar to the ``allocate`` mode), the plugin needs to return: - how many instances can be allocated on the cluster with that specification - on which nodes these will be allocated (in order) .. vim: set textwidth=72 : ganeti-2.9.3/doc/manpages-disabled.rst0000644000000000000000000000035212244641676017645 0ustar00rootroot00000000000000Man pages ========= Inclusion of man pages into documentation was not enabled at build time; use ``./configure [...] --enable-manpages-in-doc``. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/monitoring-query-format.rst0000644000000000000000000000315612244641676021130 0ustar00rootroot00000000000000The queries to the monitoring agent will be HTTP GET requests on port 1815. The answer will be encoded in JSON format and will depend on the specific accessed resource. If a request is sent to a non-existing resource, a 404 error will be returned by the HTTP server. The following paragraphs will present the existing resources supported by the current protocol version, that is version 1. ``/`` +++++ The root resource. It will return the list of the supported protocol version numbers. Currently, this will include only version 1. ``/1`` ++++++ Not an actual resource per-se, it is the root of all the resources of protocol version 1. If requested through GET, the null JSON value will be returned. ``/1/list/collectors`` ++++++++++++++++++++++ Returns a list of tuples (kind, category, name) showing all the collectors available in the system. ``/1/report/all`` +++++++++++++++++ A list of the reports of all the data collectors, as a JSON list. Status reporting collectors will provide their output in non-verbose format. The verbose format can be requested by adding the parameter ``verbose=1`` to the request. ``/1/report/[category]/[collector_name]`` +++++++++++++++++++++++++++++++++++++++++ Returns the report of the collector ``[collector_name]`` that belongs to the specified ``[category]``. The ``category`` has to be written in lowercase. If a collector does not belong to any category, ``default`` will have to be used as the value for ``[category]``. Status reporting collectors will provide their output in non-verbose format. The verbose format can be requested by adding the parameter ``verbose=1`` to the request. ganeti-2.9.3/doc/design-linuxha.rst0000644000000000000000000001443112244641676017227 0ustar00rootroot00000000000000==================== Linux HA integration ==================== .. contents:: :depth: 4 This is a design document detailing the integration of Ganeti and Linux HA. Current state and shortcomings ============================== Ganeti doesn't currently support any self-healing or self-monitoring. We are now working on trying to improve the situation in this regard: - The :doc:`autorepair system ` will take care of self repairing a cluster in the presence of offline nodes. - The :doc:`monitoring agent ` will take care of exporting data to monitoring. What is still missing is a way to self-detect "obvious" failures rapidly and to: - Maintain the master role active. - Offline resource that are obviously faulty so that the autorepair system can perform its work. Proposed changes ================ Linux-HA provides software that can be used to provide high availability of services through automatic failover of resources. In particular Pacemaker can be used together with Heartbeat or Corosync to make sure a resource is kept active on a self-monitoring cluster. Ganeti OCF agents ----------------- The Ganeti agents will be slightly special in the HA world. The following will apply: - The agents will be able to be configured cluster-wise by tags (which will be read on the nodes via ssconf_cluster_tags) and locally by files on the filesystem that will allow them to "simulate" a particular condition (eg. simulate a failure even if none is detected). - The agents will be able to run in "full" or "partial" mode: in "partial" mode they will always succeed, and thus never fail a resource as long as a node is online, is running the linux HA software and is responding to the network. In "full" mode they will also check resources like the cluster master ip or master daemon, and act if they are missing Note that for what Ganeti does OCF agents are needed: simply relying on the LSB scripts will not work for the Ganeti service. Master role agent ----------------- This agent will manage the Ganeti master role. It needs to be configured as a sticky resource (you don't want to flap the master role around, do you?) that is active on only one node. You can require quorum or fencing to protect your cluster from multiple masters. The agent will implement a stateless resource that considers itself "started" only the master node, "stopped" on all master candidates and in error mode for all other nodes. Note that if not all your nodes are master candidates this resource might have problems: - if all nodes are configured to run the resource, heartbeat may decide to "fence" (aka stonith) all your non-master-candidate nodes if told to do so. This might not be what you want. - if only master candidates are configured as nodes for the resource, beware of promotions and demotions, as nothing will update automatically pacemaker should a change happen at the Ganeti level. Other solutions, such as reporting the resource just as "stopped" on non master candidates as well might mean that pacemaker would choose the "wrong" node to promote to master, which is also a bad idea. Future improvements +++++++++++++++++++ - Ability to work better with non-master-candidate nodes - Stateful resource that can "safely" transfer the master role between online nodes (with queue drain and such) - Implement "full" mode, with detection of the cluster IP and the master node daemon. Node role agent --------------- This agent will manage the Ganeti node role. It needs to be configured as a cloned resource that is active on all nodes. In partial mode it will always return success (and thus trigger a failure only upon an HA level or network failure). Full mode, which initially will not be implemented, couls also check for the node daemon being unresponsive or other local conditions (TBD). When a failure happens the HA notification system will trigger on all other nodes, including the master. The master will then be able to offline the node. Any other work to restore instance availability should then be done by the autorepair system. The following cluster tags are supported: - ``ocf:node-offline:use-powercycle``: Try to powercycle a node using ``gnt-node powercycle`` when offlining. - ``ocf:node-offline:use-poweroff``: Try to power off a node using ``gnt-node power off`` when offlining (requires OOB support). Future improvements +++++++++++++++++++ - Handle draining differently than offlining - Handle different modes of "stopping" the service - Implement "full" mode Risks ----- Running Ganeti with Pacemaker increases the risk of stability for your Ganeti Cluster. Events like: - stopping heartbeat or corosync on a node - corosync or heartbeat being killed for any reason - temporary failure in a node's networking will trigger potentially dangerous operations such as node offlining or master role failover. Moreover if the autorepair system will be working they will be able to also trigger instance failovers or migrations, and disk replaces. Also note that operations like: master-failover, or manual node-modify might interact badly with this setup depending on the way your HA system is configured (see below). This of course is an inherent problem with any Linux-HA installation, but is probably more visible with Ganeti given that our resources tend to be more heavyweight than many others managed in HA clusters (eg. an IP address). Code status ----------- This code is heavily experimental, and Linux-HA is a very complex subsystem. *We might not be able to help you* if you decide to run this code: please make sure you understand fully high availability on your production machines. Ganeti only ships this code as an example but it might need customization or complex configurations on your side for it to run properly. *Ganeti does not automate HA configuration for your cluster*. You need to do this job by hand. Good luck, don't get it wrong. Future work =========== - Integrate the agents better with the ganeti monitoring - Add hooks for managing HA at node add/remove/modify/master-failover operations - Provide a stonith system through Ganeti's OOB system - Provide an OOB system that does "shunning" of offline nodes, for emulating a real OOB, at least on all nodes .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-chained-jobs.rst0000644000000000000000000002030312230001635020054 0ustar00rootroot00000000000000============ Chained jobs ============ .. contents:: :depth: 4 This is a design document about the innards of Ganeti's job processing. Readers are advised to study previous design documents on the topic: - :ref:`Original job queue ` - :ref:`Job priorities ` - :doc:`LU-generated jobs ` Current state and shortcomings ============================== Ever since the introduction of the job queue with Ganeti 2.0 there have been situations where we wanted to run several jobs in a specific order. Due to the job queue's current design, such a guarantee can not be given. Jobs are run according to their priority, their ability to acquire all necessary locks and other factors. One way to work around this limitation is to do some kind of job grouping in the client code. Once all jobs of a group have finished, the next group is submitted and waited for. There are different kinds of clients for Ganeti, some of which don't share code (e.g. Python clients vs. htools). This design proposes a solution which would be implemented as part of the job queue in the master daemon. Proposed changes ================ With the implementation of :ref:`job priorities ` the processing code was re-architectured and became a lot more versatile. It now returns jobs to the queue in case the locks for an opcode can't be acquired, allowing other jobs/opcodes to be run in the meantime. The proposal is to add a new, optional property to opcodes to define dependencies on other jobs. Job X could define opcodes with a dependency on the success of job Y and would only be run once job Y is finished. If there's a dependency on success and job Y failed, job X would fail as well. Since such dependencies would use job IDs, the jobs still need to be submitted in the right order. .. pyassert:: # Update description below if finalized job status change constants.JOBS_FINALIZED == frozenset([ constants.JOB_STATUS_CANCELED, constants.JOB_STATUS_SUCCESS, constants.JOB_STATUS_ERROR, ]) The new attribute's value would be a list of two-valued tuples. Each tuple contains a job ID and a list of requested status for the job depended upon. Only final status are accepted (:pyeval:`utils.CommaJoin(constants.JOBS_FINALIZED)`). An empty list is equivalent to specifying all final status (except :pyeval:`constants.JOB_STATUS_CANCELED`, which is treated specially). An opcode runs only once all its dependency requirements have been fulfilled. Any job referring to a cancelled job is also cancelled unless it explicitly lists :pyeval:`constants.JOB_STATUS_CANCELED` as a requested status. In case a referenced job can not be found in the normal queue or the archive, referring jobs fail as the status of the referenced job can't be determined. With this change, clients can submit all wanted jobs in the right order and proceed to wait for changes on all these jobs (see ``cli.JobExecutor``). The master daemon will take care of executing them in the right order, while still presenting the client with a simple interface. Clients using the ``SubmitManyJobs`` interface can use relative job IDs (negative integers) to refer to jobs in the same submission. .. highlight:: javascript Example data structures:: # First job { "job_id": "6151", "ops": [ { "OP_ID": "OP_INSTANCE_REPLACE_DISKS", ..., }, { "OP_ID": "OP_INSTANCE_FAILOVER", ..., }, ], } # Second job, runs in parallel with first job { "job_id": "7687", "ops": [ { "OP_ID": "OP_INSTANCE_MIGRATE", ..., }, ], } # Third job, depending on success of previous jobs { "job_id": "9218", "ops": [ { "OP_ID": "OP_NODE_SET_PARAMS", "depend": [ [6151, ["success"]], [7687, ["success"]], ], "offline": True, }, ], } Implementation details ---------------------- Status while waiting for dependencies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jobs waiting for dependencies are certainly not in the queue anymore and therefore need to change their status from "queued". While waiting for opcode locks the job is in the "waiting" status (the constant is named ``JOB_STATUS_WAITLOCK``, but the actual value is ``waiting``). There the following possibilities: #. Introduce a new status, e.g. "waitdeps". Pro: - Clients know for sure a job is waiting for dependencies, not locks Con: - Code and tests would have to be updated/extended for the new status - List of possible state transitions certainly wouldn't get simpler - Breaks backwards compatibility, older clients might get confused #. Use existing "waiting" status. Pro: - No client changes necessary, less code churn (note that there are clients which don't live in Ganeti core) - Clients don't need to know the difference between waiting for a job and waiting for a lock; it doesn't make a difference - Fewer state transitions (see commit ``5fd6b69479c0``, which removed many state transitions and disk writes) Con: - Not immediately visible what a job is waiting for, but it's the same issue with locks; this is the reason why the lock monitor (``gnt-debug locks``) was introduced; job dependencies can be shown as "locks" in the monitor Based on these arguments, the proposal is to do the following: - Rename ``JOB_STATUS_WAITLOCK`` constant to ``JOB_STATUS_WAITING`` to reflect its actual meanting: the job is waiting for something - While waiting for dependencies and locks, jobs are in the "waiting" status - Export dependency information in lock monitor; example output:: Name Mode Owner Pending job/27491 - - success:job/34709,job/21459 job/21459 - - success,error:job/14513 Cost of deserialization ~~~~~~~~~~~~~~~~~~~~~~~ To determine the status of a dependency job the job queue must have access to its data structure. Other queue operations already do this, e.g. archiving, watching a job's progress and querying jobs. Initially (Ganeti 2.0/2.1) the job queue shared the job objects in memory and protected them using locks. Ganeti 2.2 (see :doc:`design document `) changed the queue to read and deserialize jobs from disk. This significantly reduced locking and code complexity. Nowadays inotify is used to wait for changes on job files when watching a job's progress. Reading from disk and deserializing certainly has some cost associated with it, but it's a significantly simpler architecture than synchronizing in memory with locks. At the stage where dependencies are evaluated the queue lock is held in shared mode, so different workers can read at the same time (deliberately ignoring CPython's interpreter lock). It is expected that the majority of executed jobs won't use dependencies and therefore won't be affected. Other discussed solutions ========================= Job-level attribute ------------------- At a first look it might seem to be better to put dependencies on previous jobs at a job level. However, it turns out that having the option of defining only a single opcode in a job as having such a dependency can be useful as well. The code complexity in the job queue is equivalent if not simpler. Since opcodes are guaranteed to run in order, clients can just define the dependency on the first opcode. Another reason for the choice of an opcode-level attribute is that the current LUXI interface for submitting jobs is a bit restricted and would need to be changed to allow the addition of job-level attributes, potentially requiring changes in all LUXI clients and/or breaking backwards compatibility. Client-side logic ----------------- There's at least one implementation of a batched job executor twisted into the ``burnin`` tool's code. While certainly possible, a client-side solution should be avoided due to the different clients already in use. For one, the :doc:`remote API ` client shouldn't import non-standard modules. htools are written in Haskell and can't use Python modules. A batched job executor contains quite some logic. Even if cleanly abstracted in a (Python) library, sharing code between different clients is difficult if not impossible. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-x509-ca.rst0000644000000000000000000001462112230001635016622 0ustar00rootroot00000000000000======================================= Design for a X509 Certificate Authority ======================================= .. contents:: :depth: 4 Current state and shortcomings ------------------------------ Import/export in Ganeti have a need for many unique X509 certificates. So far these were all self-signed, but with the :doc:`new design for import/export ` they need to be signed by a Certificate Authority (CA). Proposed changes ---------------- The plan is to implement a simple CA in Ganeti. Interacting with an external CA is too difficult or impossible for automated processes like exporting instances, so each Ganeti cluster will have its own CA. The public key will be stored in ``…/lib/ganeti/ca/cert.pem``, the private key (only readable by the master daemon) in ``…/lib/ganeti/ca/key.pem``. Similar to the RAPI certificate, a new CA certificate can be installed using the ``gnt-cluster renew-crypto`` command. Such a CA could be an intermediate of a third-party CA. By default a self-signed CA is generated and used. .. _x509-ca-serial: Each certificate signed by the CA is required to have a unique serial number. The serial number is stored in the file ``…/lib/ganeti/ca/serial``, replicated to all master candidates and never reset, even when a new CA is installed. The threat model is expected to be the same as with self-signed certificates. To reinforce this, all certificates signed by the CA must be valid for less than one week (168 hours). Implementing support for Certificate Revocation Lists (CRL) using OpenSSL is non-trivial. Lighttpd doesn't support them at all and `apparently never will in version 1.4.x `_. Some CRL-related parts have only been added in the most recent version of pyOpenSSL (0.11). Instead of a CRL, Ganeti will gain a new cluster configuration property defining the minimum accepted serial number. In case of a lost or compromised private key this property can be set to the most recently generated serial number. While possible to implement in the future, other X509 certificates used by the cluster (e.g. RAPI or inter-node communication) will not be automatically signed by the per-cluster CA. The ``commonName`` attribute of signed certificates must be set to the the cluster name or the name of a node in the cluster. Software requirements --------------------- - pyOpenSSL 0.10 or above (lower versions can't set the X509v3 extension ``subjectKeyIdentifier`` recommended for certificate authority certificates by :rfc:`3280`, section 4.2.1.2) Code samples ------------ Generating X509 CA using pyOpenSSL ++++++++++++++++++++++++++++++++++ .. highlight:: python The following code sample shows how to generate a CA certificate using pyOpenSSL:: key = OpenSSL.crypto.PKey() key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048) ca = OpenSSL.crypto.X509() ca.set_version(3) ca.set_serial_number(1) ca.get_subject().CN = "ca.example.com" ca.gmtime_adj_notBefore(0) ca.gmtime_adj_notAfter(24 * 60 * 60) ca.set_issuer(ca.get_subject()) ca.set_pubkey(key) ca.add_extensions([ OpenSSL.crypto.X509Extension("basicConstraints", True, "CA:TRUE, pathlen:0"), OpenSSL.crypto.X509Extension("keyUsage", True, "keyCertSign, cRLSign"), OpenSSL.crypto.X509Extension("subjectKeyIdentifier", False, "hash", subject=ca), ]) ca.sign(key, "sha1") Signing X509 certificate using CA +++++++++++++++++++++++++++++++++ .. highlight:: python The following code sample shows how to sign an X509 certificate using a CA:: ca_cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, "ca.pem") ca_key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, "ca.pem") key = OpenSSL.crypto.PKey() key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048) cert = OpenSSL.crypto.X509() cert.get_subject().CN = "node1.example.com" cert.set_serial_number(1) cert.gmtime_adj_notBefore(0) cert.gmtime_adj_notAfter(24 * 60 * 60) cert.set_issuer(ca_cert.get_subject()) cert.set_pubkey(key) cert.sign(ca_key, "sha1") How to generate Certificate Signing Request +++++++++++++++++++++++++++++++++++++++++++ .. highlight:: python The following code sample shows how to generate an X509 Certificate Request (CSR):: key = OpenSSL.crypto.PKey() key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048) req = OpenSSL.crypto.X509Req() req.get_subject().CN = "node1.example.com" req.set_pubkey(key) req.sign(key, "sha1") # Write private key print OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, key) # Write request print OpenSSL.crypto.dump_certificate_request(OpenSSL.crypto.FILETYPE_PEM, req) X509 certificate from Certificate Signing Request +++++++++++++++++++++++++++++++++++++++++++++++++ .. highlight:: python The following code sample shows how to create an X509 certificate from a Certificate Signing Request and sign it with a CA:: ca_cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, "ca.pem") ca_key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, "ca.pem") req = OpenSSL.crypto.load_certificate_request(OpenSSL.crypto.FILETYPE_PEM, open("req.csr").read()) cert = OpenSSL.crypto.X509() cert.set_subject(req.get_subject()) cert.set_serial_number(1) cert.gmtime_adj_notBefore(0) cert.gmtime_adj_notAfter(24 * 60 * 60) cert.set_issuer(ca_cert.get_subject()) cert.set_pubkey(req.get_pubkey()) cert.sign(ca_key, "sha1") print OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, cert) Verify whether X509 certificate matches private key +++++++++++++++++++++++++++++++++++++++++++++++++++ .. highlight:: python The code sample below shows how to check whether a certificate matches with a certain private key. OpenSSL has a function for this, ``X509_check_private_key``, but pyOpenSSL provides no access to it. :: ctx = OpenSSL.SSL.Context(OpenSSL.SSL.TLSv1_METHOD) ctx.use_privatekey(key) ctx.use_certificate(cert) try: ctx.check_privatekey() except OpenSSL.SSL.Error: print "Incorrect key" else: print "Key matches certificate" .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/index.rst0000644000000000000000000001000112271422343015370 0ustar00rootroot00000000000000.. Ganeti documentation master file, created by sphinx-quickstart Welcome to Ganeti's documentation! ================================== This page is the starting point for browsing the Ganeti documentation. Below, the corpus of Ganeti documentation is grouped by topic. A few quick references: - :doc:`glossary`: Provides explanations of basic Ganeti terminology. - :doc:`news` file: Lists changes between Ganeti versions. - :ref:`search`: Allows you to search for key terms across Ganeti documentation. Installing Ganeti +++++++++++++++++ Use the following resources to install and/or upgrade Ganeti: - :doc:`install`: Comprehensive instructions for installing Ganeti. - :doc:`install-quick`: A shortened installation guide for the experienced Ganeti user. - :doc:`upgrade`: Instructions for upgrading an existing Ganeti installation to the latest version. Using Ganeti ++++++++++++ The following resources provide guidance on how to use Ganeti: - :doc:`admin`: Information about how to manage a Ganeti cluster after it is installed (including management of nodes and instances, and information about Ganeti's tools and monitoring agent). - :doc:`walkthrough`: An example-oriented guide to Ganeti. - :doc:`manpages`: Descriptions of the various tools that are part of Ganeti. - :doc:`security`: A description of the security model underlying a Ganeti cluster. - :doc:`hooks`: Information on hooking scripts, which extend Ganeti functionalities by automatically activating when certain events occur. - :doc:`iallocator`: Description of the API for external tools, which can allocate instances either manually or automatically. - :doc:`rapi`: Description of the Ganeti remote API, which allows programmatic access to most of the functionalities of Ganeti. - :doc:`ovfconverter`: Description of a tool that provides compatibility with the standard OVF virtual machine interchange format. - :doc:`virtual-cluster`: Explanation of how to use virtual cluster support, which is utilized mainly for testing reasons. Some features are explicitly targeted for large Ganeti installations, in which multiple clusters are present: - :doc:`cluster-merge`: Describes a tool for merging two existing clusters. - :doc:`move-instance`: Describes how to move instances between clusters. Developing Ganeti +++++++++++++++++ There are a few documents particularly useful for developers who want to modify Ganeti: - :doc:`locking`: Describes Ganeti's locking strategy and lock order dependencies. - :doc:`devnotes`: Details build dependencies and other useful development-related information. Implemented designs ------------------- Before actual implementation, all Ganeti features are described in a design document. Designs fall into two categories: released versions and draft versions (which are either incomplete or not implemented). .. toctree:: :maxdepth: 1 design-2.0.rst design-2.1.rst design-2.2.rst design-2.3.rst design-htools-2.3.rst design-2.4.rst design-2.5.rst design-2.6.rst design-2.7.rst design-2.8.rst design-2.9.rst Draft designs ------------- .. toctree:: :maxdepth: 2 design-draft.rst .. toctree:: :hidden: admin.rst cluster-merge.rst design-autorepair.rst design-bulk-create.rst design-chained-jobs.rst design-cpu-pinning.rst design-device-uuid-name.rst design-hroller.rst design-linuxha.rst design-lu-generated-jobs.rst design-monitoring-agent.rst design-multi-reloc.rst design-network.rst design-node-add.rst design-oob.rst design-opportunistic-locking.rst design-ovf-support.rst design-partitioned design-query2.rst design-reason-trail.rst design-restricted-commands.rst design-shared-storage.rst design-virtual-clusters.rst devnotes.rst glossary.rst hooks.rst iallocator.rst install.rst install-quick.rst locking.rst manpages.rst monitoring-query-format.rst move-instance.rst news.rst ovfconverter.rst rapi.rst security.rst upgrade.rst virtual-cluster.rst walkthrough .. vim: set textwidth=72 : ganeti-2.9.3/doc/conf.py0000644000000000000000000001604712244641676015062 0ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Ganeti documentation build configuration file, created by # sphinx-quickstart on Tue Apr 14 13:23:20 2009. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys, os enable_manpages = bool(os.getenv("ENABLE_MANPAGES")) # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. #sys.path.append(os.path.abspath(".")) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. needs_sphinx = "1.0" # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named "sphinx.ext.*") or your custom ones. extensions = [ "sphinx.ext.todo", "sphinx.ext.graphviz", "ganeti.build.sphinx_ext", "ganeti.build.shell_example_lexer", ] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix of source filenames. source_suffix = ".rst" # The encoding of source files. source_encoding = "utf-8" # The master toctree document. master_doc = "index" # General information about the project. project = u"Ganeti" copyright = u"%s Google Inc." % ", ".join(map(str, range(2006, 2013 + 1))) # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # These next two will be passed via the command line, see the makefile # The short X.Y version #version = VERSION_MAJOR + "." + VERSION_MINOR # The full version, including alpha/beta/rc tags. #release = PACKAGE_VERSION # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. language = "en" # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = "" # Else, today_fmt is used as the format for a strftime call. #today_fmt = "%B %d, %Y" # List of documents that shouldn't be included in the build. #unused_docs = [] if enable_manpages: exclude_patterns = [] else: exclude_patterns = [ "man-*.rst", ] # List of directories, relative to source directory, that shouldn't be searched # for source files. exclude_trees = [ "_build", "api", "coverage" "examples", ] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, "()" will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = "default" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["css"] html_style = "style.css" # If not "", a "Last updated on:" timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = "%b %d, %Y" # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. html_use_modindex = False # If false, no index is generated. html_use_index = False # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = "" # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = "" # Output file base name for HTML help builder. htmlhelp_basename = "Ganetidoc" # -- Options for LaTeX output -------------------------------------------------- # The paper size ("letter" or "a4"). #latex_paper_size = "a4" # The font size ("10pt", "11pt" or "12pt"). #latex_font_size = "10pt" # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ("index", "Ganeti.tex", u"Ganeti Documentation", u"Google Inc.", "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # If true, show page references after internal links. #latex_show_pagerefs = False # If true, show URL addresses after external links. #latex_show_urls = False # Additional stuff for the LaTeX preamble. #latex_preamble = "" # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. latex_use_modindex = False ganeti-2.9.3/doc/design-reason-trail.rst0000644000000000000000000001074212244641676020160 0ustar00rootroot00000000000000=================== Ganeti reason trail =================== .. contents:: :depth: 2 This is a design document detailing the implementation of a way for Ganeti to track the origin and the reason of every executed command, from its starting point (command line, remote API, some htool, etc.) to its actual execution time. Current state and shortcomings ============================== There is currently no way to track why a job and all the operations part of it were executed, and who or what triggered the execution. This is an inconvenience in general, and also it makes impossible to have certain information, such as finding the reason why an instance last changed its status (i.e.: why it was started/stopped/rebooted/etc.), or distinguishing an admin request from a scheduled maintenance or an automated tool's work. Proposed changes ================ We propose to introduce a new piece of information, that will be called "reason trail", to track the path from the issuing of a command to its execution. The reason trail will be a list of 3-tuples ``(source, reason, timestamp)``, with: ``source`` The entity deciding to perform (or forward) a command. It is represented by an arbitrary string, but strings prepended by "gnt:" are reserved for Ganeti components, and they will be refused by the interfaces towards the external world. ``reason`` The reason why the entity decided to perform the operation. It is represented by an arbitrary string. The string might possibly be empty, because certain components of the system might just "pass on" the operation (therefore wanting to be recorded in the trail) but without an explicit reason. ``timestamp`` The time when the element was added to the reason trail. It has to be expressed in nanoseconds since the unix epoch (0:00:00 January 01, 1970). If not enough precision is available (or needed) it can be padded with zeroes. The reason trail will be attached at the OpCode level. When it has to be serialized externally (such as on the RAPI interface), it will be serialized in JSON format. Specifically, it will be serialized as a list of elements. Each element will be a list with two strings (for ``source`` and ``reason``) and one integer number (the ``timestamp``). Any component the operation goes through is allowed (but not required) to append it's own reason to the list. Other than this, the list shouldn't be modified. As an example here is the reason trail for a shutdown operation invoked from the command line through the gnt-instance tool:: [("user", "Cleanup of unused instances", 1363088484000000000), ("gnt:client:gnt-instance", "stop", 1363088484020000000), ("gnt:opcode:shutdown", "job=1234;index=0", 1363088484026000000), ("gnt:daemon:noded:shutdown", "", 1363088484135000000)] where the first 3-tuple is determined by a user-specified message, passed to gnt-instance through a command line parameter. The same operation, launched by an external GUI tool, and executed through the remote API, would have a reason trail like:: [("user", "Cleanup of unused instances", 1363088484000000000), ("other-app:tool-name", "gui:stop", 1363088484000300000), ("gnt:client:rapi:shutdown", "", 1363088484020000000), ("gnt:library:rlib2:shutdown", "", 1363088484023000000), ("gnt:opcode:shutdown", "job=1234;index=0", 1363088484026000000), ("gnt:daemon:noded:shutdown", "", 1363088484135000000)] Implementation ============== The OpCode base class will be modified to include a new parameter, "reason". This will receive the reason trail as built by all the previous steps. When an OpCode is added to a job (in jqueue.py) the job number and the opcode index will be recorded as the reason for the existence of that opcode. From the command line tools down to the opcodes, the implementation of this design will be shared by all the components of the system. After the opcodes have been enqueued in a job queue and are dispatched for execution, the implementation will have to be OpCode specific because of the current structure of the ganeti backend. The implementation of opcode-specific parts will start from the operations that affect the instance status (as required by the design document about the monitoring daemon, for the instance status data collector). Such opcodes will be changed so that the "reason" is passed to them and they will then export the reason trail on a file. The implementation for other opcodes will follow when required. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/virtual-cluster.rst0000644000000000000000000001027012271422343017436 0ustar00rootroot00000000000000Virtual cluster support ======================= Documents Ganeti version 2.9 .. contents:: Introduction ------------ This is a description of Ganeti's support for virtual clusters introduced in version 2.7. The original design is described in a separate :doc:`design document `. A virtual cluster consists of multiple virtual nodes (instances of Ganeti daemons) running on the same physical machine within one operating system. This way multiple (virtual) nodes can be simulated using a single machine. Virtual clusters can be run as a user without root privileges (see :ref:`limitations `). While not implemented in the helper setup script at the time of this writing, virtual clusters can also be split over multiple physical machines, allowing for even more virtual nodes. .. _limitations: Limitations ----------- Due to historical and practical design decisions virtual clusters have several limitations. - "fake" hypervisor only - Instances must be diskless or file-backed - Node information is the same over multiple virtual nodes (e.g. free memory) - If running as a user without root privileges, certain operations are not available; some operations are not useful even when running as root (e.g. powercycle) - OS definitions must be prepared for this setup - Setup is partially manual, especially when not running as root Basics ------ Ganeti programs act as running on a virtual node if the environment variables ``GANETI_ROOTDIR`` and ``GANETI_HOSTNAME`` are set. The former must be an absolute path to a directory with the last component being equal to the value of ``GANETI_HOSTNAME``, which contains the name of the virtual node. The reason for this requirement is that one virtual node must be able to compute an absolute path on another node for copying files via SSH. The whole content of ``GANETI_ROOTDIR`` is the node directory, its parent directory (without hostname) is the cluster directory. Example for environment variables:: GANETI_ROOTDIR=/tmp/vcluster/node1.example.com GANETI_HOSTNAME=node1.example.com With this example the node directory is ``/tmp/vcluster/node1.example.com`` and the cluster directory ``/tmp/vcluster``. .. _vcluster-setup: Setup ----- A script to configure virtual clusters is included with Ganeti as ``tools/vcluster-setup`` (usually installed as ``/usr/lib/ganeti/tools/vcluster-setup``). Running it with the ``-h`` option prints a usage description. The script creates all necessary directories, configures network interfaces, adds or updates entries in ``/etc/hosts`` and generates a small number of helper scripts. .. TODO: Describe setup of non-root virtual cluster Use --- Once the virtual cluster has been :ref:`set up `, the cluster can be initialized. The instructions for doing so have been printed by the ``vcluster-setup`` script together with other useful information, such as the list of virtual nodes. The commands printed should be used to configure the list of enabled hypervisors and other settings. To run commands for a specific virtual node, the script named ``cmd`` located in the node directory can be used. It takes a command as its argument(s), sets the environment variables ``GANETI_ROOTDIR`` and ``GANETI_HOSTNAME`` and then runs the command. Example: .. highlight:: shell-example :: # Let's create a cluster with node1 as its master node $ cd /tmp/vcluster $ node1.example.com/cmd gnt-cluster info Cluster name: cluster.example.com … Master node: node1.example.com … # Configure cluster as per "vcluster-setup" script $ node1.example.com/cmd gnt-cluster modify … Scripts are provided in the cluster root directory to start, stop or restart all daemons for all virtual nodes. These are named ``start-all``, ``stop-all`` and ``restart-all``. ``ganeti-watcher`` can be run for all virtual nodes using ``watcher-all``. Adding an instance (assuming node1.example.com is the master node as per the example above): .. highlight:: shell-example :: $ node1.example.com/cmd gnt-instance add --os-size 1G \ --disk-template=file --os-type dummy -B memory=192 -I hail \ instance1.example.com .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-2.7.rst0000644000000000000000000000147512244641676016071 0ustar00rootroot00000000000000================= Ganeti 2.7 design ================= The following design documents have been implemented in Ganeti 2.7: - :doc:`design-bulk-create` - :doc:`design-opportunistic-locking` - :doc:`design-restricted-commands` - :doc:`design-node-add` - :doc:`design-virtual-clusters` - :doc:`design-network` - :doc:`design-linuxha` - :doc:`design-shared-storage` (Updated to reflect the new ExtStorage Interface) The following designs have been partially implemented in Ganeti 2.7: - :doc:`design-query-splitting`: only queries not needing RPC are supported, through confd - :doc:`design-partitioned`: only exclusive use of disks is implemented - :doc:`design-monitoring-agent`: an example standalone DRBD data collector is included .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-2.3.rst0000644000000000000000000011250112244641676016056 0ustar00rootroot00000000000000================= Ganeti 2.3 design ================= This document describes the major changes in Ganeti 2.3 compared to the 2.2 version. .. contents:: :depth: 4 As for 2.1 and 2.2 we divide the 2.3 design into three areas: - core changes, which affect the master daemon/job queue/locking or all/most logical units - logical unit/feature changes - external interface changes (e.g. command line, OS API, hooks, ...) Core changes ============ Node Groups ----------- Current state and shortcomings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Currently all nodes of a Ganeti cluster are considered as part of the same pool, for allocation purposes: DRBD instances for example can be allocated on any two nodes. This does cause a problem in cases where nodes are not all equally connected to each other. For example if a cluster is created over two set of machines, each connected to its own switch, the internal bandwidth between machines connected to the same switch might be bigger than the bandwidth for inter-switch connections. Moreover, some operations inside a cluster require all nodes to be locked together for inter-node consistency, and won't scale if we increase the number of nodes to a few hundreds. Proposed changes ~~~~~~~~~~~~~~~~ With this change we'll divide Ganeti nodes into groups. Nothing will change for clusters with only one node group. Bigger clusters will be able to have more than one group, and each node will belong to exactly one. Node group management +++++++++++++++++++++ To manage node groups and the nodes belonging to them, the following new commands and flags will be introduced:: gnt-group add # add a new node group gnt-group remove # delete an empty node group gnt-group list # list node groups gnt-group rename # rename a node group gnt-node {list,info} -g # list only nodes belonging to a node group gnt-node modify -g # assign a node to a node group Node group attributes +++++++++++++++++++++ In clusters with more than one node group, it may be desirable to establish local policies regarding which groups should be preferred when performing allocation of new instances, or inter-group instance migrations. To help with this, we will provide an ``alloc_policy`` attribute for node groups. Such attribute will be honored by iallocator plugins when making automatic decisions regarding instance placement. The ``alloc_policy`` attribute can have the following values: - unallocable: the node group should not be a candidate for instance allocations, and the operation should fail if only groups in this state could be found that would satisfy the requirements. - last_resort: the node group should not be used for instance allocations, unless this would be the only way to have the operation succeed. Prioritization among groups in this state will be deferred to the iallocator plugin that's being used. - preferred: the node group can be used freely for allocation of instances (this is the default state for newly created node groups). Note that prioritization among groups in this state will be deferred to the iallocator plugin that's being used. Node group operations +++++++++++++++++++++ One operation at the node group level will be initially provided:: gnt-group drain The purpose of this operation is to migrate all instances in a given node group to other groups in the cluster, e.g. to reclaim capacity if there are enough free resources in other node groups that share a storage pool with the evacuated group. Instance level changes ++++++++++++++++++++++ With the introduction of node groups, instances will be required to live in only one group at a time; this is mostly important for DRBD instances, which will not be allowed to have their primary and secondary nodes in different node groups. To support this, we envision the following changes: - The iallocator interface will be augmented, and node groups exposed, so that plugins will be able to make a decision regarding the group in which to place a new instance. By default, all node groups will be considered, but it will be possible to include a list of groups in the creation job, in which case the plugin will limit itself to considering those; in both cases, the ``alloc_policy`` attribute will be honored. - If, on the other hand, a primary and secondary nodes are specified for a new instance, they will be required to be on the same node group. - Moving an instance between groups can only happen via an explicit operation, which for example in the case of DRBD will work by performing internally a replace-disks, a migration, and a second replace-disks. It will be possible to clean up an interrupted group-move operation. - Cluster verify will signal an error if an instance has nodes belonging to different groups. Additionally, changing the group of a given node will be initially only allowed if the node is empty, as a straightforward mechanism to avoid creating such situation. - Inter-group instance migration will have the same operation modes as new instance allocation, defined above: letting an iallocator plugin decide the target group, possibly restricting the set of node groups to consider, or specifying a target primary and secondary nodes. In both cases, the target group or nodes must be able to accept the instance network- and storage-wise; the operation will fail otherwise, though in the future we may be able to allow some parameter to be changed together with the move (in the meantime, an import/export will be required in this scenario). Internal changes ++++++++++++++++ We expect the following changes for cluster management: - Frequent multinode operations, such as os-diagnose or cluster-verify, will act on one group at a time, which will have to be specified in all cases, except for clusters with just one group. Command line tools will also have a way to easily target all groups, by generating one job per group. - Groups will have a human-readable name, but will internally always be referenced by a UUID, which will be immutable; for example, nodes will contain the UUID of the group they belong to. This is done to simplify referencing while keeping it easy to handle renames and movements. If we see that this works well, we'll transition other config objects (instances, nodes) to the same model. - The addition of a new per-group lock will be evaluated, if we can transition some operations now requiring the BGL to it. - Master candidate status will be allowed to be spread among groups. For the first version we won't add any restriction over how this is done, although in the future we may have a minimum number of master candidates which Ganeti will try to keep in each group, for example. Other work and future changes +++++++++++++++++++++++++++++ Commands like ``gnt-cluster command``/``gnt-cluster copyfile`` will continue to work on the whole cluster, but it will be possible to target one group only by specifying it. Commands which allow selection of sets of resources (for example ``gnt-instance start``/``gnt-instance stop``) will be able to select them by node group as well. Initially node groups won't be taggable objects, to simplify the first implementation, but we expect this to be easy to add in a future version should we see it's useful. We envision groups as a good place to enhance cluster scalability. In the future we may want to use them as units for configuration diffusion, to allow a better master scalability. For example it could be possible to change some all-nodes RPCs to contact each group once, from the master, and make one node in the group perform internal diffusion. We won't implement this in the first version, but we'll evaluate it for the future, if we see scalability problems on big multi-group clusters. When Ganeti will support more storage models (e.g. SANs, Sheepdog, Ceph) we expect groups to be the basis for this, allowing for example a different Sheepdog/Ceph cluster, or a different SAN to be connected to each group. In some cases this will mean that inter-group move operation will be necessarily performed with instance downtime, unless the hypervisor has block-migrate functionality, and we implement support for it (this would be theoretically possible, today, with KVM, for example). Scalability issues with big clusters ------------------------------------ Current and future issues ~~~~~~~~~~~~~~~~~~~~~~~~~ Assuming the node groups feature will enable bigger clusters, other parts of Ganeti will be impacted even more by the (in effect) bigger clusters. While many areas will be impacted, one is the most important: the fact that the watcher still needs to be able to repair instance data on the current 5 minutes time-frame (a shorter time-frame would be even better). This means that the watcher itself needs to have parallelism when dealing with node groups. Also, the iallocator plugins are being fed data from Ganeti but also need access to the full cluster state, and in general we still rely on being able to compute the full cluster state somewhat “cheaply†and on-demand. This conflicts with the goal of disconnecting the different node groups, and to keep the same parallelism while growing the cluster size. Another issue is that the current capacity calculations are done completely outside Ganeti (and they need access to the entire cluster state), and this prevents keeping the capacity numbers in sync with the cluster state. While this is still acceptable for smaller clusters where a small number of allocations/removal are presumed to occur between two periodic capacity calculations, on bigger clusters where we aim to parallelize heavily between node groups this is no longer true. As proposed changes, the main change is introducing a cluster state cache (not serialised to disk), and to update many of the LUs and cluster operations to account for it. Furthermore, the capacity calculations will be integrated via a new OpCode/LU, so that we have faster feedback (instead of periodic computation). Cluster state cache ~~~~~~~~~~~~~~~~~~~ A new cluster state cache will be introduced. The cache relies on two main ideas: - the total node memory, CPU count are very seldom changing; the total node disk space is also slow changing, but can change at runtime; the free memory and free disk will change significantly for some jobs, but on a short timescale; in general, these values will be mostly “constant†during the lifetime of a job - we already have a periodic set of jobs that query the node and instance state, driven the by :command:`ganeti-watcher` command, and we're just discarding the results after acting on them Given the above, it makes sense to cache the results of node and instance state (with a focus on the node state) inside the master daemon. The cache will not be serialised to disk, and will be for the most part transparent to the outside of the master daemon. Cache structure +++++++++++++++ The cache will be oriented with a focus on node groups, so that it will be easy to invalidate an entire node group, or a subset of nodes, or the entire cache. The instances will be stored in the node group of their primary node. Furthermore, since the node and instance properties determine the capacity statistics in a deterministic way, the cache will also hold, at each node group level, the total capacity as determined by the new capacity iallocator mode. Cache updates +++++++++++++ The cache will be updated whenever a query for a node state returns “full†node information (so as to keep the cache state for a given node consistent). Partial results will not update the cache (see next paragraph). Since there will be no way to feed the cache from outside, and we would like to have a consistent cache view when driven by the watcher, we'll introduce a new OpCode/LU for the watcher to run, instead of the current separate opcodes (see below in the watcher section). Updates to a node that change a node's specs “downward†(e.g. less memory) will invalidate the capacity data. Updates that increase the node will not invalidate the capacity, as we're more interested in “at least available†correctness, not “at most availableâ€. Cache invalidation ++++++++++++++++++ If a partial node query is done (e.g. just for the node free space), and the returned values don't match with the cache, then the entire node state will be invalidated. By default, all LUs will invalidate the caches for all nodes and instances they lock. If an LU uses the BGL, then it will invalidate the entire cache. In time, it is expected that LUs will be modified to not invalidate, if they are not expected to change the node's and/or instance's state (e.g. ``LUInstanceConsole``, or ``LUInstanceActivateDisks``). Invalidation of a node's properties will also invalidate the capacity data associated with that node. Cache lifetime ++++++++++++++ The cache elements will have an upper bound on their lifetime; the proposal is to make this an hour, which should be a high enough value to cover the watcher being blocked by a medium-term job (e.g. 20-30 minutes). Cache usage +++++++++++ The cache will be used by default for most queries (e.g. a Luxi call, without locks, for the entire cluster). Since this will be a change from the current behaviour, we'll need to allow non-cached responses, e.g. via a ``--cache=off`` or similar argument (which will force the query). The cache will also be used for the iallocator runs, so that computing allocation solution can proceed independent from other jobs which lock parts of the cluster. This is important as we need to separate allocation on one group from exclusive blocking jobs on other node groups. The capacity calculations will also use the cache. This is detailed in the respective sections. Watcher operation ~~~~~~~~~~~~~~~~~ As detailed in the cluster cache section, the watcher also needs improvements in order to scale with the the cluster size. As a first improvement, the proposal is to introduce a new OpCode/LU pair that runs with locks held over the entire query sequence (the current watcher runs a job with two opcodes, which grab and release the locks individually). The new opcode will be called ``OpUpdateNodeGroupCache`` and will do the following: - try to acquire all node/instance locks (to examine in more depth, and possibly alter) in the given node group - invalidate the cache for the node group - acquire node and instance state (possibly via a new single RPC call that combines node and instance information) - update cache - return the needed data The reason for the per-node group query is that we don't want a busy node group to prevent instance maintenance in other node groups. Therefore, the watcher will introduce parallelism across node groups, and it will possible to have overlapping watcher runs. The new execution sequence will be: - the parent watcher process acquires global watcher lock - query the list of node groups (lockless or very short locks only) - fork N children, one for each node group - release the global lock - poll/wait for the children to finish Each forked children will do the following: - try to acquire the per-node group watcher lock - if fail to acquire, exit with special code telling the parent that the node group is already being managed by a watcher process - otherwise, submit a OpUpdateNodeGroupCache job - get results (possibly after a long time, due to busy group) - run the needed maintenance operations for the current group This new mode of execution means that the master watcher processes might overlap in running, but not the individual per-node group child processes. This change allows us to keep (almost) the same parallelism when using a bigger cluster with node groups versus two separate clusters. Cost of periodic cache updating +++++++++++++++++++++++++++++++ Currently the watcher only does “small†queries for the node and instance state, and at first sight changing it to use the new OpCode which populates the cache with the entire state might introduce additional costs, which must be payed every five minutes. However, the OpCodes that the watcher submits are using the so-called dynamic fields (need to contact the remote nodes), and the LUs are not selective—they always grab all the node and instance state. So in the end, we have the same cost, it just becomes explicit rather than implicit. This ‘grab all node state’ behaviour is what makes the cache worth implementing. Intra-node group scalability ++++++++++++++++++++++++++++ The design above only deals with inter-node group issues. It still makes sense to run instance maintenance for nodes A and B if only node C is locked (all being in the same node group). This problem is commonly encountered in previous Ganeti versions, and it should be handled similarly, by tweaking lock lifetime in long-duration jobs. TODO: add more ideas here. State file maintenance ++++++++++++++++++++++ The splitting of node group maintenance to different children which will run in parallel requires that the state file handling changes from monolithic updates to partial ones. There are two file that the watcher maintains: - ``$LOCALSTATEDIR/lib/ganeti/watcher.data``, its internal state file, used for deciding internal actions - ``$LOCALSTATEDIR/run/ganeti/instance-status``, a file designed for external consumption For the first file, since it's used only internally to the watchers, we can move to a per node group configuration. For the second file, even if it's used as an external interface, we will need to make some changes to it: because the different node groups can return results at different times, we need to either split the file into per-group files or keep the single file and add a per-instance timestamp (currently the file holds only the instance name and state). The proposal is that each child process maintains its own node group file, and the master process will, right after querying the node group list, delete any extra per-node group state file. This leaves the consumers to run a simple ``cat instance-status.group-*`` to obtain the entire list of instance and their states. If needed, the modify timestamp of each file can be used to determine the age of the results. Capacity calculations ~~~~~~~~~~~~~~~~~~~~~ Currently, the capacity calculations are done completely outside Ganeti. As explained in the current problems section, this needs to account better for the cluster state changes. Therefore a new OpCode will be introduced, ``OpComputeCapacity``, that will either return the current capacity numbers (if available), or trigger a new capacity calculation, via the iallocator framework, which will get a new method called ``capacity``. This method will feed the cluster state (for the complete set of node group, or alternative just a subset) to the iallocator plugin (either the specified one, or the default if none is specified), and return the new capacity in the format currently exported by the htools suite and known as the “tiered specs†(see :manpage:`hspace(1)`). tspec cluster parameters ++++++++++++++++++++++++ Currently, the “tspec†calculations done in :command:`hspace` require some additional parameters: - maximum instance size - type of instance storage - maximum ratio of virtual CPUs per physical CPUs - minimum disk free For the integration in Ganeti, there are multiple ways to pass these: - ignored by Ganeti, and being the responsibility of the iallocator plugin whether to use these at all or not - as input to the opcode - as proper cluster parameters Since the first option is not consistent with the intended changes, a combination of the last two is proposed: - at cluster level, we'll have cluster-wide defaults - at node groups, we'll allow overriding the cluster defaults - and if they are passed in via the opcode, they will override for the current computation the values Whenever the capacity is requested via different parameters, it will invalidate the cache, even if otherwise the cache is up-to-date. The new parameters are: - max_inst_spec: (int, int, int), the maximum instance specification accepted by this cluster or node group, in the order of memory, disk, vcpus; - default_template: string, the default disk template to use - max_cpu_ratio: double, the maximum ratio of VCPUs/PCPUs - max_disk_usage: double, the maximum disk usage (as a ratio) These might also be used in instance creations (to be determined later, after they are introduced). OpCode details ++++++++++++++ Input: - iallocator: string (optional, otherwise uses the cluster default) - cached: boolean, optional, defaults to true, and denotes whether we accept cached responses - the above new parameters, optional; if they are passed, they will overwrite all node group's parameters Output: - cluster: list of tuples (memory, disk, vcpu, count), in decreasing order of specifications; the first three members represent the instance specification, the last one the count of how many instances of this specification can be created on the cluster - node_groups: a dictionary keyed by node group UUID, with values a dictionary: - tspecs: a list like the cluster one - additionally, the new cluster parameters, denoting the input parameters that were used for this node group - ctime: the date the result has been computed; this represents the oldest creation time amongst all node groups (so as to accurately represent how much out-of-date the global response is) Note that due to the way the tspecs are computed, for any given specification, the total available count is the count for the given entry, plus the sum of counts for higher specifications. Node flags ---------- Current state and shortcomings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Currently all nodes are, from the point of view of their capabilities, homogeneous. This means the cluster considers all nodes capable of becoming master candidates, and of hosting instances. This prevents some deployment scenarios: e.g. having a Ganeti instance (in another cluster) be just a master candidate, in case all other master candidates go down (but not, of course, host instances), or having a node in a remote location just host instances but not become master, etc. Proposed changes ~~~~~~~~~~~~~~~~ Two new capability flags will be added to the node: - master_capable, denoting whether the node can become a master candidate or master - vm_capable, denoting whether the node can host instances In terms of the other flags, master_capable is a stronger version of "not master candidate", and vm_capable is a stronger version of "drained". For the master_capable flag, it will affect auto-promotion code and node modifications. The vm_capable flag will affect the iallocator protocol, capacity calculations, node checks in cluster verify, and will interact in novel ways with locking (unfortunately). It is envisaged that most nodes will be both vm_capable and master_capable, and just a few will have one of these flags removed. Ganeti itself will allow clearing of both flags, even though this doesn't make much sense currently. .. _jqueue-job-priority-design: Job priorities -------------- Current state and shortcomings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Currently all jobs and opcodes have the same priority. Once a job started executing, its thread won't be released until all opcodes got their locks and did their work. When a job is finished, the next job is selected strictly by its incoming order. This does not mean jobs are run in their incoming order—locks and other delays can cause them to be stalled for some time. In some situations, e.g. an emergency shutdown, one may want to run a job as soon as possible. This is not possible currently if there are pending jobs in the queue. Proposed changes ~~~~~~~~~~~~~~~~ Each opcode will be assigned a priority on submission. Opcode priorities are integers and the lower the number, the higher the opcode's priority is. Within the same priority, jobs and opcodes are initially processed in their incoming order. Submitted opcodes can have one of the priorities listed below. Other priorities are reserved for internal use. The absolute range is -20..+19. Opcodes submitted without a priority (e.g. by older clients) are assigned the default priority. - High (-10) - Normal (0, default) - Low (+10) As a change from the current model where executing a job blocks one thread for the whole duration, the new job processor must return the job to the queue after each opcode and also if it can't get all locks in a reasonable timeframe. This will allow opcodes of higher priority submitted in the meantime to be processed or opcodes of the same priority to try to get their locks. When added to the job queue's workerpool, the priority is determined by the first unprocessed opcode in the job. If an opcode is deferred, the job will go back to the "queued" status, even though it's just waiting to try to acquire its locks again later. If an opcode can not be processed after a certain number of retries or a certain amount of time, it should increase its priority. This will avoid starvation. A job's priority can never go below -20. If a job hits priority -20, it must acquire its locks in blocking mode. Opcode priorities are synchronised to disk in order to be restored after a restart or crash of the master daemon. Priorities also need to be considered inside the locking library to ensure opcodes with higher priorities get locks first. See :ref:`locking priorities ` for more details. Worker pool +++++++++++ To support job priorities in the job queue, the worker pool underlying the job queue must be enhanced to support task priorities. Currently tasks are processed in the order they are added to the queue (but, due to their nature, they don't necessarily finish in that order). All tasks are equal. To support tasks with higher or lower priority, a few changes have to be made to the queue inside a worker pool. Each task is assigned a priority when added to the queue. This priority can not be changed until the task is executed (this is fine as in all current use-cases, tasks are added to a pool and then forgotten about until they're done). A task's priority can be compared to Unix' process priorities. The lower the priority number, the closer to the queue's front it is. A task with priority 0 is going to be run before one with priority 10. Tasks with the same priority are executed in the order in which they were added. While a task is running it can query its own priority. If it's not ready yet for finishing, it can raise an exception to defer itself, optionally changing its own priority. This is useful for the following cases: - A task is trying to acquire locks, but those locks are still held by other tasks. By deferring itself, the task gives others a chance to run. This is especially useful when all workers are busy. - If a task decides it hasn't gotten its locks in a long time, it can start to increase its own priority. - Tasks waiting for long-running operations running asynchronously could defer themselves while waiting for a long-running operation. With these changes, the job queue will be able to implement per-job priorities. .. _locking-priorities: Locking +++++++ In order to support priorities in Ganeti's own lock classes, ``locking.SharedLock`` and ``locking.LockSet``, the internal structure of the former class needs to be changed. The last major change in this area was done for Ganeti 2.1 and can be found in the respective :doc:`design document `. The plain list (``[]``) used as a queue is replaced by a heap queue, similar to the `worker pool`_. The heap or priority queue does automatic sorting, thereby automatically taking care of priorities. For each priority there's a plain list with pending acquires, like the single queue of pending acquires before this change. When the lock is released, the code locates the list of pending acquires for the highest priority waiting. The first condition (index 0) is notified. Once all waiting threads received the notification, the condition is removed from the list. If the list of conditions is empty it's removed from the heap queue. Like before, shared acquires are grouped and skip ahead of exclusive acquires if there's already an existing shared acquire for a priority. To accomplish this, a separate dictionary of shared acquires per priority is maintained. To simplify the code and reduce memory consumption, the concept of the "active" and "inactive" condition for shared acquires is abolished. The lock can't predict what priorities the next acquires will use and even keeping a cache can become computationally expensive for arguable benefit (the underlying POSIX pipe, see ``pipe(2)``, needs to be re-created for each notification anyway). The following diagram shows a possible state of the internal queue from a high-level view. Conditions are shown as (waiting) threads. Assuming no modifications are made to the queue (e.g. more acquires or timeouts), the lock would be acquired by the threads in this order (concurrent acquires in parentheses): ``threadE1``, ``threadE2``, (``threadS1``, ``threadS2``, ``threadS3``), (``threadS4``, ``threadS5``), ``threadE3``, ``threadS6``, ``threadE4``, ``threadE5``. :: [ (0, [exc/threadE1, exc/threadE2, shr/threadS1/threadS2/threadS3]), (2, [shr/threadS4/threadS5]), (10, [exc/threadE3]), (33, [shr/threadS6, exc/threadE4, exc/threadE5]), ] IPv6 support ------------ Currently Ganeti does not support IPv6. This is true for nodes as well as instances. Due to the fact that IPv4 exhaustion is threateningly near the need of using IPv6 is increasing, especially given that bigger and bigger clusters are supported. Supported IPv6 setup ~~~~~~~~~~~~~~~~~~~~ In Ganeti 2.3 we introduce additionally to the ordinary pure IPv4 setup a hybrid IPv6/IPv4 mode. The latter works as follows: - all nodes in a cluster have a primary IPv6 address - the master has a IPv6 address - all nodes **must** have a secondary IPv4 address The reason for this hybrid setup is that key components that Ganeti depends on do not or only partially support IPv6. More precisely, Xen does not support instance migration via IPv6 in version 3.4 and 4.0. Similarly, KVM does not support instance migration nor VNC access for IPv6 at the time of this writing. This led to the decision of not supporting pure IPv6 Ganeti clusters, as very important cluster operations would not have been possible. Using IPv4 as secondary address does not affect any of the goals of the IPv6 support: since secondary addresses do not need to be publicly accessible, they need not be globally unique. In other words, one can practically use private IPv4 secondary addresses just for intra-cluster communication without propagating them across layer 3 boundaries. netutils: Utilities for handling common network tasks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Currently common utility functions are kept in the ``utils`` module. Since this module grows bigger and bigger network-related functions are moved to a separate module named *netutils*. Additionally all these utilities will be IPv6-enabled. Cluster initialization ~~~~~~~~~~~~~~~~~~~~~~ As mentioned above there will be two different setups in terms of IP addressing: pure IPv4 and hybrid IPv6/IPv4 address. To choose that a new cluster init parameter *--primary-ip-version* is introduced. This is needed as a given name can resolve to both an IPv4 and IPv6 address on a dual-stack host effectively making it impossible to infer that bit. Once a cluster is initialized and the primary IP version chosen all nodes that join have to conform to that setup. In the case of our IPv6/IPv4 setup all nodes *must* have a secondary IPv4 address. Furthermore we store the primary IP version in ssconf which is consulted every time a daemon starts to determine the default bind address (either *0.0.0.0* or *::*. In a IPv6/IPv4 setup we need to bind the Ganeti daemon listening on network sockets to the IPv6 address. Node addition ~~~~~~~~~~~~~ When adding a new node to a IPv6/IPv4 cluster it must have a IPv6 address to be used as primary and a IPv4 address used as secondary. As explained above, every time a daemon is started we use the cluster primary IP version to determine to which any address to bind to. The only exception to this is when a node is added to the cluster. In this case there is no ssconf available when noded is started and therefore the correct address needs to be passed to it. Name resolution ~~~~~~~~~~~~~~~ Since the gethostbyname*() functions do not support IPv6 name resolution will be done by using the recommended getaddrinfo(). IPv4-only components ~~~~~~~~~~~~~~~~~~~~ ============================ =================== ==================== Component IPv6 Status Planned Version ============================ =================== ==================== Xen instance migration Not supported Xen 4.1: libxenlight KVM instance migration Not supported Unknown KVM VNC access Not supported Unknown ============================ =================== ==================== Privilege Separation -------------------- Current state and shortcomings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In Ganeti 2.2 we introduced privilege separation for the RAPI daemon. This was done directly in the daemon's code in the process of daemonizing itself. Doing so leads to several potential issues. For example, a file could be opened while the code is still running as ``root`` and for some reason not be closed again. Even after changing the user ID, the file descriptor can be written to. Implementation ~~~~~~~~~~~~~~ To address these shortcomings, daemons will be started under the target user right away. The ``start-stop-daemon`` utility used to start daemons supports the ``--chuid`` option to change user and group ID before starting the executable. The intermediate solution for the RAPI daemon from Ganeti 2.2 will be removed again. Files written by the daemons may need to have an explicit owner and group set (easily done through ``utils.WriteFile``). All SSH-related code is removed from the ``ganeti.bootstrap`` module and core components and moved to a separate script. The core code will simply assume a working SSH setup to be in place. Security Domains ~~~~~~~~~~~~~~~~ In order to separate the permissions of file sets we separate them into the following 3 overall security domain chunks: 1. Public: ``0755`` respectively ``0644`` 2. Ganeti wide: shared between the daemons (gntdaemons) 3. Secret files: shared among a specific set of daemons/users So for point 3 this tables shows the correlation of the sets to groups and their users: === ========== ============================== ========================== Set Group Users Description === ========== ============================== ========================== A gntrapi gntrapi, gntmasterd Share data between gntrapi and gntmasterd B gntadmins gntrapi, gntmasterd, *users* Shared between users who needs to call gntmasterd C gntconfd gntconfd, gntmasterd Share data between gntconfd and gntmasterd D gntmasterd gntmasterd masterd only; Currently only to redistribute the configuration, has access to all files under ``lib/ganeti`` E gntdaemons gntmasterd, gntrapi, gntconfd Shared between the various Ganeti daemons to exchange data === ========== ============================== ========================== Restricted commands ~~~~~~~~~~~~~~~~~~~ The following commands still require root permissions to fulfill their functions: :: gnt-cluster {init|destroy|command|copyfile|rename|masterfailover|renew-crypto} gnt-node {add|remove} gnt-instance {console} Directory structure and permissions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Here's how we propose to change the filesystem hierarchy and their permissions. Assuming it follows the defaults: ``gnt${daemon}`` for user and the groups from the section `Security Domains`_:: ${localstatedir}/lib/ganeti/ (0755; gntmasterd:gntmasterd) cluster-domain-secret (0600; gntmasterd:gntmasterd) config.data (0640; gntmasterd:gntconfd) hmac.key (0440; gntmasterd:gntconfd) known_host (0644; gntmasterd:gntmasterd) queue/ (0700; gntmasterd:gntmasterd) archive/ (0700; gntmasterd:gntmasterd) * (0600; gntmasterd:gntmasterd) * (0600; gntmasterd:gntmasterd) rapi.pem (0440; gntrapi:gntrapi) rapi_users (0640; gntrapi:gntrapi) server.pem (0440; gntmasterd:gntmasterd) ssconf_* (0444; root:gntmasterd) uidpool/ (0750; root:gntmasterd) watcher.data (0600; root:gntmasterd) ${localstatedir}/run/ganeti/ (0770; gntmasterd:gntdaemons) socket/ (0750; gntmasterd:gntadmins) ganeti-master (0770; gntmasterd:gntadmins) ${localstatedir}/log/ganeti/ (0770; gntmasterd:gntdaemons) master-daemon.log (0600; gntmasterd:gntdaemons) rapi-daemon.log (0600; gntrapi:gntdaemons) conf-daemon.log (0600; gntconfd:gntdaemons) node-daemon.log (0600; gntnoded:gntdaemons) Feature changes =============== External interface changes ========================== .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-restricted-commands.rst0000644000000000000000000000440412244641676021525 0ustar00rootroot00000000000000Design for executing commands via RPC ===================================== .. contents:: :depth: 3 Current state and shortcomings ------------------------------ We have encountered situations where a node was no longer responding to attempts at connecting via SSH or SSH became unavailable through other means. Quite often the node daemon is still available, even in situations where there's little free memory. The latter is due to the node daemon being locked into main memory using ``mlock(2)``. Since the node daemon does not allow the execution of arbitrary commands, quite often the only solution left was either to attempt a powercycle request via said node daemon or to physically reset the node. Proposed changes ---------------- The goal of this design is to allow the execution of non-arbitrary commands via RPC requests. Since this can be dangerous in case the cluster certificate (``server.pem``) is leaked, some precautions need to be taken: - No parameters may be passed - No absolute or relative path may be passed, only a filename - Executable must reside in ``/etc/ganeti/restricted-commands``, which must be owned by root:root and have mode 0755 or stricter - Must be regular files or symlinks - Must be executable by root:root There shall be no way to list available commands or to retrieve an executable's contents. The result from a request to execute a specific command will either be its output and exit code, or a generic error message. Only the receiving node's log files shall contain information as to why executing the command failed. To slow down dictionary attacks on command names in case an attacker manages to obtain a copy of ``server.pem``, a system-wide, file-based lock is acquired before verifying the command name and its executable. If a command can not be executed for some reason, the lock is only released with a delay of several seconds, after which the generic error message will be returned to the caller. At first, restricted commands will not be made available through the :doc:`remote API `, though that could be done at a later point (with a separate password). On the command line, a new sub-command will be added to the ``gnt-node`` script. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-2.2.rst0000644000000000000000000011104712230001635016035 0ustar00rootroot00000000000000================= Ganeti 2.2 design ================= This document describes the major changes in Ganeti 2.2 compared to the 2.1 version. The 2.2 version will be a relatively small release. Its main aim is to avoid changing too much of the core code, while addressing issues and adding new features and improvements over 2.1, in a timely fashion. .. contents:: :depth: 4 As for 2.1 we divide the 2.2 design into three areas: - core changes, which affect the master daemon/job queue/locking or all/most logical units - logical unit/feature changes - external interface changes (e.g. command line, OS API, hooks, ...) Core changes ============ Master Daemon Scaling improvements ---------------------------------- Current state and shortcomings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Currently the Ganeti master daemon is based on four sets of threads: - The main thread (1 thread) just accepts connections on the master socket - The client worker pool (16 threads) handles those connections, one thread per connected socket, parses luxi requests, and sends data back to the clients - The job queue worker pool (25 threads) executes the actual jobs submitted by the clients - The rpc worker pool (10 threads) interacts with the nodes via http-based-rpc This means that every masterd currently runs 52 threads to do its job. Being able to reduce the number of thread sets would make the master's architecture a lot simpler. Moreover having less threads can help decrease lock contention, log pollution and memory usage. Also, with the current architecture, masterd suffers from quite a few scalability issues: Core daemon connection handling +++++++++++++++++++++++++++++++ Since the 16 client worker threads handle one connection each, it's very easy to exhaust them, by just connecting to masterd 16 times and not sending any data. While we could perhaps make those pools resizable, increasing the number of threads won't help with lock contention nor with better handling long running operations making sure the client is informed that everything is proceeding, and doesn't need to time out. Wait for job change +++++++++++++++++++ The REQ_WAIT_FOR_JOB_CHANGE luxi operation makes the relevant client thread block on its job for a relative long time. This is another easy way to exhaust the 16 client threads, and a place where clients often time out, moreover this operation is negative for the job queue lock contention (see below). Job Queue lock ++++++++++++++ The job queue lock is quite heavily contended, and certain easily reproducible workloads show that's it's very easy to put masterd in trouble: for example running ~15 background instance reinstall jobs, results in a master daemon that, even without having finished the client worker threads, can't answer simple job list requests, or submit more jobs. Currently the job queue lock is an exclusive non-fair lock insulating the following job queue methods (called by the client workers). - AddNode - RemoveNode - SubmitJob - SubmitManyJobs - WaitForJobChanges - CancelJob - ArchiveJob - AutoArchiveJobs - QueryJobs - Shutdown Moreover the job queue lock is acquired outside of the job queue in two other classes: - jqueue._JobQueueWorker (in RunTask) before executing the opcode, after finishing its executing and when handling an exception. - jqueue._OpExecCallbacks (in NotifyStart and Feedback) when the processor (mcpu.Processor) is about to start working on the opcode (after acquiring the necessary locks) and when any data is sent back via the feedback function. Of those the major critical points are: - Submit[Many]Job, QueryJobs, WaitForJobChanges, which can easily slow down and block client threads up to making the respective clients time out. - The code paths in NotifyStart, Feedback, and RunTask, which slow down job processing between clients and otherwise non-related jobs. To increase the pain: - WaitForJobChanges is a bad offender because it's implemented with a notified condition which awakes waiting threads, who then try to acquire the global lock again - Many should-be-fast code paths are slowed down by replicating the change to remote nodes, and thus waiting, with the lock held, on remote rpcs to complete (starting, finishing, and submitting jobs) Proposed changes ~~~~~~~~~~~~~~~~ In order to be able to interact with the master daemon even when it's under heavy load, and to make it simpler to add core functionality (such as an asynchronous rpc client) we propose three subsequent levels of changes to the master core architecture. After making this change we'll be able to re-evaluate the size of our thread pool, if we see that we can make most threads in the client worker pool always idle. In the future we should also investigate making the rpc client asynchronous as well, so that we can make masterd a lot smaller in number of threads, and memory size, and thus also easier to understand, debug, and scale. Connection handling +++++++++++++++++++ We'll move the main thread of ganeti-masterd to asyncore, so that it can share the mainloop code with all other Ganeti daemons. Then all luxi clients will be asyncore clients, and I/O to/from them will be handled by the master thread asynchronously. Data will be read from the client sockets as it becomes available, and kept in a buffer, then when a complete message is found, it's passed to a client worker thread for parsing and processing. The client worker thread is responsible for serializing the reply, which can then be sent asynchronously by the main thread on the socket. Wait for job change +++++++++++++++++++ The REQ_WAIT_FOR_JOB_CHANGE luxi request is changed to be subscription-based, so that the executing thread doesn't have to be waiting for the changes to arrive. Threads producing messages (job queue executors) will make sure that when there is a change another thread is awaken and delivers it to the waiting clients. This can be either a dedicated "wait for job changes" thread or pool, or one of the client workers, depending on what's easier to implement. In either case the main asyncore thread will only be involved in pushing of the actual data, and not in fetching/serializing it. Other features to look at, when implementing this code are: - Possibility not to need the job lock to know which updates to push: if the thread producing the data pushed a copy of the update for the waiting clients, the thread sending it won't need to acquire the lock again to fetch the actual data. - Possibility to signal clients about to time out, when no update has been received, not to despair and to keep waiting (luxi level keepalive). - Possibility to defer updates if they are too frequent, providing them at a maximum rate (lower priority). Job Queue lock ++++++++++++++ In order to decrease the job queue lock contention, we will change the code paths in the following ways, initially: - A per-job lock will be introduced. All operations affecting only one job (for example feedback, starting/finishing notifications, subscribing to or watching a job) will only require the job lock. This should be a leaf lock, but if a situation arises in which it must be acquired together with the global job queue lock the global one must always be acquired last (for the global section). - The locks will be converted to a sharedlock. Any read-only operation will be able to proceed in parallel. - During remote update (which happens already per-job) we'll drop the job lock level to shared mode, so that activities reading the lock (for example job change notifications or QueryJobs calls) will be able to proceed in parallel. - The wait for job changes improvements proposed above will be implemented. In the future other improvements may include splitting off some of the work (eg replication of a job to remote nodes) to a separate thread pool or asynchronous thread, not tied with the code path for answering client requests or the one executing the "real" work. This can be discussed again after we used the more granular job queue in production and tested its benefits. Inter-cluster instance moves ---------------------------- Current state and shortcomings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ With the current design of Ganeti, moving whole instances between different clusters involves a lot of manual work. There are several ways to move instances, one of them being to export the instance, manually copying all data to the new cluster before importing it again. Manual changes to the instances configuration, such as the IP address, may be necessary in the new environment. The goal is to improve and automate this process in Ganeti 2.2. Proposed changes ~~~~~~~~~~~~~~~~ Authorization, Authentication and Security ++++++++++++++++++++++++++++++++++++++++++ Until now, each Ganeti cluster was a self-contained entity and wouldn't talk to other Ganeti clusters. Nodes within clusters only had to trust the other nodes in the same cluster and the network used for replication was trusted, too (hence the ability the use a separate, local network for replication). For inter-cluster instance transfers this model must be weakened. Nodes in one cluster will have to talk to nodes in other clusters, sometimes in other locations and, very important, via untrusted network connections. Various option have been considered for securing and authenticating the data transfer from one machine to another. To reduce the risk of accidentally overwriting data due to software bugs, authenticating the arriving data was considered critical. Eventually we decided to use socat's OpenSSL options (``OPENSSL:``, ``OPENSSL-LISTEN:`` et al), which provide us with encryption, authentication and authorization when used with separate keys and certificates. Combinations of OpenSSH, GnuPG and Netcat were deemed too complex to set up from within Ganeti. Any solution involving OpenSSH would require a dedicated user with a home directory and likely automated modifications to the user's ``$HOME/.ssh/authorized_keys`` file. When using Netcat, GnuPG or another encryption method would be necessary to transfer the data over an untrusted network. socat combines both in one program and is already a dependency. Each of the two clusters will have to generate an RSA key. The public parts are exchanged between the clusters by a third party, such as an administrator or a system interacting with Ganeti via the remote API ("third party" from here on). After receiving each other's public key, the clusters can start talking to each other. All encrypted connections must be verified on both sides. Neither side may accept unverified certificates. The generated certificate should only be valid for the time necessary to move the instance. For additional protection of the instance data, the two clusters can verify the certificates and destination information exchanged via the third party by checking an HMAC signature using a key shared among the involved clusters. By default this secret key will be a random string unique to the cluster, generated by running SHA1 over 20 bytes read from ``/dev/urandom`` and the administrator must synchronize the secrets between clusters before instances can be moved. If the third party does not know the secret, it can't forge the certificates or redirect the data. Unless disabled by a new cluster parameter, verifying the HMAC signatures must be mandatory. The HMAC signature for X509 certificates will be prepended to the certificate similar to an :rfc:`822` header and only covers the certificate (from ``-----BEGIN CERTIFICATE-----`` to ``-----END CERTIFICATE-----``). The header name will be ``X-Ganeti-Signature`` and its value will have the format ``$salt/$hash`` (salt and hash separated by slash). The salt may only contain characters in the range ``[a-zA-Z0-9]``. On the web, the destination cluster would be equivalent to an HTTPS server requiring verifiable client certificates. The browser would be equivalent to the source cluster and must verify the server's certificate while providing a client certificate to the server. Copying data ++++++++++++ To simplify the implementation, we decided to operate at a block-device level only, allowing us to easily support non-DRBD instance moves. Intra-cluster instance moves will re-use the existing export and import scripts supplied by instance OS definitions. Unlike simply copying the raw data, this allows one to use filesystem-specific utilities to dump only used parts of the disk and to exclude certain disks from the move. Compression should be used to further reduce the amount of data transferred. The export scripts writes all data to stdout and the import script reads it from stdin again. To avoid copying data and reduce disk space consumption, everything is read from the disk and sent over the network directly, where it'll be written to the new block device directly again. Workflow ++++++++ #. Third party tells source cluster to shut down instance, asks for the instance specification and for the public part of an encryption key - Instance information can already be retrieved using an existing API (``OpInstanceQueryData``). - An RSA encryption key and a corresponding self-signed X509 certificate is generated using the "openssl" command. This key will be used to encrypt the data sent to the destination cluster. - Private keys never leave the cluster. - The public part (the X509 certificate) is signed using HMAC with salting and a secret shared between Ganeti clusters. #. Third party tells destination cluster to create an instance with the same specifications as on source cluster and to prepare for an instance move with the key received from the source cluster and receives the public part of the destination's encryption key - The current API to create instances (``OpInstanceCreate``) will be extended to support an import from a remote cluster. - A valid, unexpired X509 certificate signed with the destination cluster's secret will be required. By verifying the signature, we know the third party didn't modify the certificate. - The private keys never leave their cluster, hence the third party can not decrypt or intercept the instance's data by modifying the IP address or port sent by the destination cluster. - The destination cluster generates another key and certificate, signs and sends it to the third party, who will have to pass it to the API for exporting an instance (``OpBackupExport``). This certificate is used to ensure we're sending the disk data to the correct destination cluster. - Once a disk can be imported, the API sends the destination information (IP address and TCP port) together with an HMAC signature to the third party. #. Third party hands public part of the destination's encryption key together with all necessary information to source cluster and tells it to start the move - The existing API for exporting instances (``OpBackupExport``) will be extended to export instances to remote clusters. #. Source cluster connects to destination cluster for each disk and transfers its data using the instance OS definition's export and import scripts - Before starting, the source cluster must verify the HMAC signature of the certificate and destination information (IP address and TCP port). - When connecting to the remote machine, strong certificate checks must be employed. #. Due to the asynchronous nature of the whole process, the destination cluster checks whether all disks have been transferred every time after transferring a single disk; if so, it destroys the encryption key #. After sending all disks, the source cluster destroys its key #. Destination cluster runs OS definition's rename script to adjust instance settings if needed (e.g. IP address) #. Destination cluster starts the instance if requested at the beginning by the third party #. Source cluster removes the instance if requested Instance move in pseudo code ++++++++++++++++++++++++++++ .. highlight:: python The following pseudo code describes a script moving instances between clusters and what happens on both clusters. #. Script is started, gets the instance name and destination cluster:: (instance_name, dest_cluster_name) = sys.argv[1:] # Get destination cluster object dest_cluster = db.FindCluster(dest_cluster_name) # Use database to find source cluster src_cluster = db.FindClusterByInstance(instance_name) #. Script tells source cluster to stop instance:: # Stop instance src_cluster.StopInstance(instance_name) # Get instance specification (memory, disk, etc.) inst_spec = src_cluster.GetInstanceInfo(instance_name) (src_key_name, src_cert) = src_cluster.CreateX509Certificate() #. ``CreateX509Certificate`` on source cluster:: key_file = mkstemp() cert_file = "%s.cert" % key_file RunCmd(["/usr/bin/openssl", "req", "-new", "-newkey", "rsa:1024", "-days", "1", "-nodes", "-x509", "-batch", "-keyout", key_file, "-out", cert_file]) plain_cert = utils.ReadFile(cert_file) # HMAC sign using secret key, this adds a "X-Ganeti-Signature" # header to the beginning of the certificate signed_cert = utils.SignX509Certificate(plain_cert, utils.ReadFile(constants.X509_SIGNKEY_FILE)) # The certificate now looks like the following: # # X-Ganeti-Signature: $1234$28676f0516c6ab68062b[…] # -----BEGIN CERTIFICATE----- # MIICsDCCAhmgAwIBAgI[…] # -----END CERTIFICATE----- # Return name of key file and signed certificate in PEM format return (os.path.basename(key_file), signed_cert) #. Script creates instance on destination cluster and waits for move to finish:: dest_cluster.CreateInstance(mode=constants.REMOTE_IMPORT, spec=inst_spec, source_cert=src_cert) # Wait until destination cluster gives us its certificate dest_cert = None disk_info = [] while not (dest_cert and len(disk_info) < len(inst_spec.disks)): tmp = dest_cluster.WaitOutput() if tmp is Certificate: dest_cert = tmp elif tmp is DiskInfo: # DiskInfo contains destination address and port disk_info[tmp.index] = tmp # Tell source cluster to export disks for disk in disk_info: src_cluster.ExportDisk(instance_name, disk=disk, key_name=src_key_name, dest_cert=dest_cert) print ("Instance %s sucessfully moved to %s" % (instance_name, dest_cluster.name)) #. ``CreateInstance`` on destination cluster:: # … if mode == constants.REMOTE_IMPORT: # Make sure certificate was not modified since it was generated by # source cluster (which must use the same secret) if (not utils.VerifySignedX509Cert(source_cert, utils.ReadFile(constants.X509_SIGNKEY_FILE))): raise Error("Certificate not signed with this cluster's secret") if utils.CheckExpiredX509Cert(source_cert): raise Error("X509 certificate is expired") source_cert_file = utils.WriteTempFile(source_cert) # See above for X509 certificate generation and signing (key_name, signed_cert) = CreateSignedX509Certificate() SendToClient("x509-cert", signed_cert) for disk in instance.disks: # Start socat RunCmd(("socat" " OPENSSL-LISTEN:%s,…,key=%s,cert=%s,cafile=%s,verify=1" " stdout > /dev/disk…") % port, GetRsaKeyPath(key_name, private=True), GetRsaKeyPath(key_name, private=False), src_cert_file) SendToClient("send-disk-to", disk, ip_address, port) DestroyX509Cert(key_name) RunRenameScript(instance_name) #. ``ExportDisk`` on source cluster:: # Make sure certificate was not modified since it was generated by # destination cluster (which must use the same secret) if (not utils.VerifySignedX509Cert(cert_pem, utils.ReadFile(constants.X509_SIGNKEY_FILE))): raise Error("Certificate not signed with this cluster's secret") if utils.CheckExpiredX509Cert(cert_pem): raise Error("X509 certificate is expired") dest_cert_file = utils.WriteTempFile(cert_pem) # Start socat RunCmd(("socat stdin" " OPENSSL:%s:%s,…,key=%s,cert=%s,cafile=%s,verify=1" " < /dev/disk…") % disk.host, disk.port, GetRsaKeyPath(key_name, private=True), GetRsaKeyPath(key_name, private=False), dest_cert_file) if instance.all_disks_done: DestroyX509Cert(key_name) .. highlight:: text Miscellaneous notes +++++++++++++++++++ - A very similar system could also be used for instance exports within the same cluster. Currently OpenSSH is being used, but could be replaced by socat and SSL/TLS. - During the design of intra-cluster instance moves we also discussed encrypting instance exports using GnuPG. - While most instances should have exactly the same configuration as on the source cluster, setting them up with a different disk layout might be helpful in some use-cases. - A cleanup operation, similar to the one available for failed instance migrations, should be provided. - ``ganeti-watcher`` should remove instances pending a move from another cluster after a certain amount of time. This takes care of failures somewhere in the process. - RSA keys can be generated using the existing ``bootstrap.GenerateSelfSignedSslCert`` function, though it might be useful to not write both parts into a single file, requiring small changes to the function. The public part always starts with ``-----BEGIN CERTIFICATE-----`` and ends with ``-----END CERTIFICATE-----``. - The source and destination cluster might be different when it comes to available hypervisors, kernels, etc. The destination cluster should refuse to accept an instance move if it can't fulfill an instance's requirements. Privilege separation -------------------- Current state and shortcomings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ All Ganeti daemons are run under the user root. This is not ideal from a security perspective as for possible exploitation of any daemon the user has full access to the system. In order to overcome this situation we'll allow Ganeti to run its daemon under different users and a dedicated group. This also will allow some side effects, like letting the user run some ``gnt-*`` commands if one is in the same group. Implementation ~~~~~~~~~~~~~~ For Ganeti 2.2 the implementation will be focused on a the RAPI daemon only. This involves changes to ``daemons.py`` so it's possible to drop privileges on daemonize the process. Though, this will be a short term solution which will be replaced by a privilege drop already on daemon startup in Ganeti 2.3. It also needs changes in the master daemon to create the socket with new permissions/owners to allow RAPI access. There will be no other permission/owner changes in the file structure as the RAPI daemon is started with root permission. In that time it will read all needed files and then drop privileges before contacting the master daemon. Feature changes =============== KVM Security ------------ Current state and shortcomings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Currently all kvm processes run as root. Taking ownership of the hypervisor process, from inside a virtual machine, would mean a full compromise of the whole Ganeti cluster, knowledge of all Ganeti authentication secrets, full access to all running instances, and the option of subverting other basic services on the cluster (eg: ssh). Proposed changes ~~~~~~~~~~~~~~~~ We would like to decrease the surface of attack available if an hypervisor is compromised. We can do so adding different features to Ganeti, which will allow restricting the broken hypervisor possibilities, in the absence of a local privilege escalation attack, to subvert the node. Dropping privileges in kvm to a single user (easy) ++++++++++++++++++++++++++++++++++++++++++++++++++ By passing the ``-runas`` option to kvm, we can make it drop privileges. The user can be chosen by an hypervisor parameter, so that each instance can have its own user, but by default they will all run under the same one. It should be very easy to implement, and can easily be backported to 2.1.X. This mode protects the Ganeti cluster from a subverted hypervisor, but doesn't protect the instances between each other, unless care is taken to specify a different user for each. This would prevent the worst attacks, including: - logging in to other nodes - administering the Ganeti cluster - subverting other services But the following would remain an option: - terminate other VMs (but not start them again, as that requires root privileges to set up networking) (unless different users are used) - trace other VMs, and probably subvert them and access their data (unless different users are used) - send network traffic from the node - read unprotected data on the node filesystem Running kvm in a chroot (slightly harder) +++++++++++++++++++++++++++++++++++++++++ By passing the ``-chroot`` option to kvm, we can restrict the kvm process in its own (possibly empty) root directory. We need to set this area up so that the instance disks and control sockets are accessible, so it would require slightly more work at the Ganeti level. Breaking out in a chroot would mean: - a lot less options to find a local privilege escalation vector - the impossibility to write local data, if the chroot is set up correctly - the impossibility to read filesystem data on the host It would still be possible though to: - terminate other VMs - trace other VMs, and possibly subvert them (if a tracer can be installed in the chroot) - send network traffic from the node Running kvm with a pool of users (slightly harder) ++++++++++++++++++++++++++++++++++++++++++++++++++ If rather than passing a single user as an hypervisor parameter, we have a pool of useable ones, we can dynamically choose a free one to use and thus guarantee that each machine will be separate from the others, without putting the burden of this on the cluster administrator. This would mean interfering between machines would be impossible, and can still be combined with the chroot benefits. Running iptables rules to limit network interaction (easy) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ These don't need to be handled by Ganeti, but we can ship examples. If the users used to run VMs would be blocked from sending some or all network traffic, it would become impossible for a broken into hypervisor to send arbitrary data on the node network, which is especially useful when the instance and the node network are separated (using ganeti-nbma or a separate set of network interfaces), or when a separate replication network is maintained. We need to experiment to see how much restriction we can properly apply, without limiting the instance legitimate traffic. Running kvm inside a container (even harder) ++++++++++++++++++++++++++++++++++++++++++++ Recent linux kernels support different process namespaces through control groups. PIDs, users, filesystems and even network interfaces can be separated. If we can set up ganeti to run kvm in a separate container we could insulate all the host process from being even visible if the hypervisor gets broken into. Most probably separating the network namespace would require one extra hop in the host, through a veth interface, thus reducing performance, so we may want to avoid that, and just rely on iptables. Implementation plan ~~~~~~~~~~~~~~~~~~~ We will first implement dropping privileges for kvm processes as a single user, and most probably backport it to 2.1. Then we'll ship example iptables rules to show how the user can be limited in its network activities. After that we'll implement chroot restriction for kvm processes, and extend the user limitation to use a user pool. Finally we'll look into namespaces and containers, although that might slip after the 2.2 release. New OS states ------------- Separate from the OS external changes, described below, we'll add some internal changes to the OS. Current state and shortcomings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are two issues related to the handling of the OSes. First, it's impossible to disable an OS for new instances, since that will also break reinstallations and renames of existing instances. To phase out an OS definition, without actually having to modify the OS scripts, it would be ideal to be able to restrict new installations but keep the rest of the functionality available. Second, ``gnt-instance reinstall --select-os`` shows all the OSes available on the clusters. Some OSes might exist only for debugging and diagnose, and not for end-user availability. For this, it would be useful to "hide" a set of OSes, but keep it otherwise functional. Proposed changes ~~~~~~~~~~~~~~~~ Two new cluster-level attributes will be added, holding the list of OSes hidden from the user and respectively the list of OSes which are blacklisted from new installations. These lists will be modifiable via ``gnt-os modify`` (implemented via ``OpClusterSetParams``), such that even not-yet-existing OSes can be preseeded into a given state. For the hidden OSes, they are fully functional except that they are not returned in the default OS list (as computed via ``OpOsDiagnose``), unless the hidden state is requested. For the blacklisted OSes, they are also not shown (unless the blacklisted state is requested), and they are also prevented from installation via ``OpInstanceCreate`` (in create mode). Both these attributes are per-OS, not per-variant. Thus they apply to all of an OS' variants, and it's impossible to blacklist or hide just one variant. Further improvements might allow a given OS variant to be blacklisted, as opposed to whole OSes. External interface changes ========================== OS API ------ The OS variants implementation in Ganeti 2.1 didn't prove to be useful enough to alleviate the need to hack around the Ganeti API in order to provide flexible OS parameters. As such, for Ganeti 2.2 we will provide support for arbitrary OS parameters. However, since OSes are not registered in Ganeti, but instead discovered at runtime, the interface is not entirely straightforward. Furthermore, to support the system administrator in keeping OSes properly in sync across the nodes of a cluster, Ganeti will also verify (if existing) the consistence of a new ``os_version`` file. These changes to the OS API will bump the API version to 20. OS version ~~~~~~~~~~ A new ``os_version`` file will be supported by Ganeti. This file is not required, but if existing, its contents will be checked for consistency across nodes. The file should hold only one line of text (any extra data will be discarded), and its contents will be shown in the OS information and diagnose commands. It is recommended that OS authors increase the contents of this file for any changes; at a minimum, modifications that change the behaviour of import/export scripts must increase the version, since they break intra-cluster migration. Parameters ~~~~~~~~~~ The interface between Ganeti and the OS scripts will be based on environment variables, and as such the parameters and their values will need to be valid in this context. Names +++++ The parameter names will be declared in a new file, ``parameters.list``, together with a one-line documentation (whitespace-separated). Example:: $ cat parameters.list ns1 Specifies the first name server to add to /etc/resolv.conf extra_packages Specifies additional packages to install rootfs_size Specifies the root filesystem size (the rest will be left unallocated) track Specifies the distribution track, one of 'stable', 'testing' or 'unstable' As seen above, the documentation can be separate via multiple spaces/tabs from the names. The parameter names as read from the file will be used for the command line interface in lowercased form; as such, there shouldn't be any two parameters which differ in case only. Values ++++++ The values of the parameters are, from Ganeti's point of view, completely freeform. If a given parameter has, from the OS' point of view, a fixed set of valid values, these should be documented as such and verified by the OS, but Ganeti will not handle such parameters specially. An empty value must be handled identically as a missing parameter. In other words, the validation script should only test for non-empty values, and not for declared versus undeclared parameters. Furthermore, each parameter should have an (internal to the OS) default value, that will be used if not passed from Ganeti. More precisely, it should be possible for any parameter to specify a value that will have the same effect as not passing the parameter, and no in no case should the absence of a parameter be treated as an exceptional case (outside the value space). Environment variables ^^^^^^^^^^^^^^^^^^^^^ The parameters will be exposed in the environment upper-case and prefixed with the string ``OSP_``. For example, a parameter declared in the 'parameters' file as ``ns1`` will appear in the environment as the variable ``OSP_NS1``. Validation ++++++++++ For the purpose of parameter name/value validation, the OS scripts *must* provide an additional script, named ``verify``. This script will be called with the argument ``parameters``, and all the parameters will be passed in via environment variables, as described above. The script should signify result/failure based on its exit code, and show explanatory messages either on its standard output or standard error. These messages will be passed on to the master, and stored as in the OpCode result/error message. The parameters must be constructed to be independent of the instance specifications. In general, the validation script will only be called with the parameter variables set, but not with the normal per-instance variables, in order for Ganeti to be able to validate default parameters too, when they change. Validation will only be performed on one cluster node, and it will be up to the ganeti administrator to keep the OS scripts in sync between all nodes. Instance operations +++++++++++++++++++ The parameters will be passed, as described above, to all the other instance operations (creation, import, export). Ideally, these scripts will not abort with parameter validation errors, if the ``verify`` script has verified them correctly. Note: when changing an instance's OS type, any OS parameters defined at instance level will be kept as-is. If the parameters differ between the new and the old OS, the user should manually remove/update them as needed. Declaration and modification ++++++++++++++++++++++++++++ Since the OSes are not registered in Ganeti, we will only make a 'weak' link between the parameters as declared in Ganeti and the actual OSes existing on the cluster. It will be possible to declare parameters either globally, per cluster (where they are indexed per OS/variant), or individually, per instance. The declaration of parameters will not be tied to current existing OSes. When specifying a parameter, if the OS exists, it will be validated; if not, then it will simply be stored as-is. A special note is that it will not be possible to 'unset' at instance level a parameter that is declared globally. Instead, at instance level the parameter should be given an explicit value, or the default value as explained above. CLI interface +++++++++++++ The modification of global (default) parameters will be done via the ``gnt-os`` command, and the per-instance parameters via the ``gnt-instance`` command. Both these commands will take an addition ``--os-parameters`` or ``-O`` flag that specifies the parameters in the familiar comma-separated, key=value format. For removing a parameter, a ``-key`` syntax will be used, e.g.:: # initial modification $ gnt-instance modify -O use_dchp=true instance1 # later revert (to the cluster default, or the OS default if not # defined at cluster level) $ gnt-instance modify -O -use_dhcp instance1 Internal storage ++++++++++++++++ Internally, the OS parameters will be stored in a new ``osparams`` attribute. The global parameters will be stored on the cluster object, and the value of this attribute will be a dictionary indexed by OS name (this also accepts an OS+variant name, which will override a simple OS name, see below), and for values the key/name dictionary. For the instances, the value will be directly the key/name dictionary. Overriding rules ++++++++++++++++ Any instance-specific parameters will override any variant-specific parameters, which in turn will override any global parameters. The global parameters, in turn, override the built-in defaults (of the OS scripts). .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-bulk-create.rst0000644000000000000000000000636012244641676017757 0ustar00rootroot00000000000000================== Ganeti Bulk Create ================== .. contents:: :depth: 4 .. highlight:: python Current state and shortcomings ============================== Creation of instances happens a lot. A fair load is done by just creating instances and due to bad allocation shifting them around later again. Additionally, if you turn up a new cluster you already know a bunch of instances, which need to exists on the cluster. Doing this one-by-one is not only cumbersome but might also fail, due to lack of resources or lead to badly balanced clusters. Since the early Ganeti 2.0 alpha version there is a ``gnt-instance batch-create`` command to allocate a bunch of instances based on a json file. This feature, however, doesn't take any advantages of iallocator and submits jobs in a serialized manner. Proposed changes ---------------- To overcome this shortcoming we would extend the current iallocator interface to allow bulk requests. On the Ganeti side, a new opcode is introduced to handle the bulk creation and returning the resulting placement from the IAllocator_. Problems -------- Due to the design of chained jobs, we can guarantee, that with the state at which the ``multi-alloc`` opcode is run, all of the instances will fit (or all won't). But we can't guarantee that once the instance creation requests were submit, no other jobs have sneaked in between. This might still lead to failing jobs because the resources have changed in the meantime. Implementation ============== IAllocator ---------- A new additional ``type`` will be added called ``multi-allocate`` to distinguish between normal and bulk operation. For the bulk operation the ``request`` will be a finite list of request dicts. If ``multi-allocate`` is declared, ``request`` must exist and is a list of ``request`` dicts as described in :doc:`Operation specific input `. The ``result`` then is a list of instance name and node placements in the order of the ``request`` field. In addition, the old ``allocate`` request type will be deprecated and at latest in Ganeti 2.8 incooperated into this new request. Current code will need slight adaption to work with the new request. This needs careful testing. OpInstanceBulkAdd ----------------- We add a new opcode ``OpInstanceBulkAdd``. It receives a list of ``OpInstanceCreate`` on the ``instances`` field. This is done to make sure, that these two loosely coupled opcodes do not get out of sync. On the RAPI side, however, this just is a list of instance create definitions. And the client is adapted accordingly. The opcode itself does some sanity checks on the instance creation opcodes which includes: * ``mode`` is not set * ``pnode`` and ``snodes`` is not set * ``iallocator`` is not set Any of the above error will be aborted with ``OpPrereqError``. Once the list has been verified it is handed to the ``iallocator`` as described in IAllocator_. Upon success we then return the result of the IAllocator_ call. At this point the current instance allocation would work with the resources available on the cluster as perceived upon ``OpInstanceBulkAdd`` invocation. However, there might be corner cases where this is not true as described in Problems_. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/css/0000755000000000000000000000000012271445544014337 5ustar00rootroot00000000000000ganeti-2.9.3/doc/css/style.css0000644000000000000000000000070612271422343016204 0ustar00rootroot00000000000000@import url(default.css); a.external { /* Based on MediaWiki's monobook skin (licenced as GPL) */ background: url("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs%2B9AAAAVklEQVR4Xn3PgQkAMQhDUXfqTu7kTtkpd5RA8AInfArtQ2iRXFWT2QedAfttj2FsPIOE1eCOlEuoWWjgzYaB%2FIkeGOrxXhqB%2BuA9Bfcm0lAZuh%2BYIeAD%2BcAqSz4kCMUAAAAASUVORK5CYII%3D") no-repeat scroll right center transparent; padding-right: 13px; } a { text-decoration: underline; } ganeti-2.9.3/doc/design-opportunistic-locking.rst0000644000000000000000000001321412244641676022123 0ustar00rootroot00000000000000Design for parallelized instance creations and opportunistic locking ==================================================================== .. contents:: :depth: 3 Current state and shortcomings ------------------------------ As of Ganeti 2.6, instance creations acquire all node locks when an :doc:`instance allocator ` (henceforth "iallocator") is used. In situations where many instance should be created in a short timeframe, there is a lot of congestion on node locks. Effectively all instance creations are serialized, even on big clusters with multiple groups. The situation gets worse when disk wiping is enabled (see :manpage:`gnt-cluster(8)`) as that can take, depending on disk size and hardware performance, from minutes to hours. Not waiting for DRBD disks to synchronize (``wait_for_sync=false``) makes instance creations slightly faster, but there's a risk of impacting I/O of other instances. Proposed changes ---------------- The target is to speed up instance creations in combination with an iallocator even when the cluster's balance is sacrificed in the process. The cluster can later be re-balanced using ``hbal``. The main objective is to reduce the number of node locks acquired for creation and to release un-used locks as fast as possible (the latter is already being done). To do this safely, several changes are necessary. Locking library ~~~~~~~~~~~~~~~ Instead of forcibly acquiring all node locks for creating an instance using an iallocator, only those currently available will be acquired. To this end, the locking library must be extended to implement opportunistic locking. Lock sets must be able to only acquire all locks available at the time, ignoring and not waiting for those held by another thread. Locks (``SharedLock``) already support a timeout of zero. The latter is different from a blocking acquisition, in which case the timeout would be ``None``. Lock sets can essentially be acquired in two different modes. One is to acquire the whole set, which in turn will also block adding new locks from other threads, and the other is to acquire specific locks by name. The function to acquire locks in a set accepts a timeout which, if not ``None`` for blocking acquisitions, counts for the whole duration of acquiring, if necessary, the lock set's internal lock, as well as the member locks. For opportunistic acquisitions the timeout is only meaningful when acquiring the whole set, in which case it is only used for acquiring the set's internal lock (used to block lock additions). For acquiring member locks the timeout is effectively zero to make them opportunistic. A new and optional boolean parameter named ``opportunistic`` is added to ``LockSet.acquire`` and re-exported through ``GanetiLockManager.acquire`` for use by ``mcpu``. Internally, lock sets do the lock acquisition using a helper function, ``__acquire_inner``. It will be extended to support opportunistic acquisitions. The algorithm is very similar to acquiring the whole set with the difference that acquisitions timing out will be ignored (the timeout in this case is zero). New lock level ~~~~~~~~~~~~~~ With opportunistic locking used for instance creations (controlled by a parameter), multiple such requests can start at (essentially) the same time and compete for node locks. Some logical units, such as ``LUClusterVerifyGroup``, need to acquire all node locks. In the latter case all instance allocations would fail to get their locks. This also applies when multiple instance creations are started at roughly the same time. To avoid situations where an opcode holding all or many node locks causes allocations to fail, a new lock level must be added to control allocations. The logical units for instance failover and migration can only safely determine whether they need all node locks after the instance lock has been acquired. Therefore the new lock level, named "node-alloc" (shorthand for "node-allocation") will be inserted after instances (``LEVEL_INSTANCE``) and before node groups (``LEVEL_NODEGROUP``). Similar to the "big cluster lock" ("BGL") there is only a single lock at this level whose name is "node allocation lock" ("NAL"). As a rule-of-thumb, the node allocation lock must be acquired in the same mode as nodes and/or node resources. If all or a large number of node locks are acquired, the node allocation lock should be acquired as well. Special attention should be given to logical units started for all node groups, such as ``LUGroupVerifyDisks``, as they also block many nodes over a short amount of time. iallocator ~~~~~~~~~~ The :doc:`iallocator interface ` does not need any modification. When an instance is created, the information for all nodes is passed to the iallocator plugin. Nodes for which the lock couldn't be acquired and therefore shouldn't be used for the instance in question, will be shown as offline. Opcodes ~~~~~~~ The opcodes ``OpInstanceCreate`` and ``OpInstanceMultiAlloc`` will gain a new parameter to enable opportunistic locking. By default this mode is disabled as to not break backwards compatibility. A new error type is added to describe a temporary lack of resources. Its name will be ``ECODE_TEMP_NORES``. With opportunistic locks the opcodes mentioned before only have a partial view of the cluster and can no longer decide if an instance could not be allocated due to the locks it has been given or whether the whole cluster is lacking resources. Therefore it is required, upon encountering the error code for a temporary lack of resources, for the job submitter to make this decision by re-submitting the job or by re-directing it to another cluster. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-htools-2.3.rst0000644000000000000000000003143512230001635017346 0ustar00rootroot00000000000000==================================== Synchronising htools to Ganeti 2.3 ==================================== Ganeti 2.3 introduces a number of new features that change the cluster internals significantly enough that the htools suite needs to be updated accordingly in order to function correctly. Shared storage support ====================== Currently, the htools algorithms presume a model where all of an instance's resources is served from within the cluster, more specifically from the nodes comprising the cluster. While is this usual for memory and CPU, deployments which use shared storage will invalidate this assumption for storage. To account for this, we need to move some assumptions from being implicit (and hardcoded) to being explicitly exported from Ganeti. New instance parameters ----------------------- It is presumed that Ganeti will export for all instances a new ``storage_type`` parameter, that will denote either internal storage (e.g. *plain* or *drbd*), or external storage. Furthermore, a new ``storage_pool`` parameter will classify, for both internal and external storage, the pool out of which the storage is allocated. For internal storage, this will be either ``lvm`` (the pool that provides space to both ``plain`` and ``drbd`` instances) or ``file`` (for file-storage-based instances). For external storage, this will be the respective NAS/SAN/cloud storage that backs up the instance. Note that for htools, external storage pools are opaque; we only care that they have an identifier, so that we can distinguish between two different pools. If these two parameters are not present, the instances will be presumed to be ``internal/lvm``. New node parameters ------------------- For each node, it is expected that Ganeti will export what storage types it supports and pools it has access to. So a classic 2.2 cluster will have all nodes supporting ``internal/lvm`` and/or ``internal/file``, whereas a new shared storage only 2.3 cluster could have ``external/my-nas`` storage. Whatever the mechanism that Ganeti will use internally to configure the associations between nodes and storage pools, we consider that we'll have available two node attributes inside htools: the list of internal and external storage pools. External storage and instances ------------------------------ Currently, for an instance we allow one cheap move type: failover to the current secondary, if it is a healthy node, and four other “expensive†(as in, including data copies) moves that involve changing either the secondary or the primary node or both. In presence of an external storage type, the following things will change: - the disk-based moves will be disallowed; this is already a feature in the algorithm, controlled by a boolean switch, so adapting external storage here will be trivial - instead of the current one secondary node, the secondaries will become a list of potential secondaries, based on access to the instance's storage pool Except for this, the basic move algorithm remains unchanged. External storage and nodes -------------------------- Two separate areas will have to change for nodes and external storage. First, then allocating instances (either as part of a move or a new allocation), if the instance is using external storage, then the internal disk metrics should be ignored (for both the primary and secondary cases). Second, the per-node metrics used in the cluster scoring must take into account that nodes might not have internal storage at all, and handle this as a well-balanced case (score 0). N+1 status ---------- Currently, computing the N+1 status of a node is simple: - group the current secondary instances by their primary node, and compute the sum of each instance group memory - choose the maximum sum, and check if it's smaller than the current available memory on this node In effect, computing the N+1 status is a per-node matter. However, with shared storage, we don't have secondary nodes, just potential secondaries. Thus computing the N+1 status will be a cluster-level matter, and much more expensive. A simple version of the N+1 checks would be that for each instance having said node as primary, we have enough memory in the cluster for relocation. This means we would actually need to run allocation checks, and update the cluster status from within allocation on one node, while being careful that we don't recursively check N+1 status during this relocation, which is too expensive. However, the shared storage model has some properties that changes the rules of the computation. Speaking broadly (and ignoring hard restrictions like tag based exclusion and CPU limits), the exact location of an instance in the cluster doesn't matter as long as memory is available. This results in two changes: - simply tracking the amount of free memory buckets is enough, cluster-wide - moving an instance from one node to another would not change the N+1 status of any node, and only allocation needs to deal with N+1 checks Unfortunately, this very cheap solution fails in case of any other exclusion or prevention factors. TODO: find a solution for N+1 checks. Node groups support =================== The addition of node groups has a small impact on the actual algorithms, which will simply operate at node group level instead of cluster level, but it requires the addition of new algorithms for inter-node group operations. The following two definitions will be used in the following paragraphs: local group The local group refers to a node's own node group, or when speaking about an instance, the node group of its primary node regular cluster A cluster composed of a single node group, or pre-2.3 cluster super cluster This term refers to a cluster which comprises multiple node groups, as opposed to a 2.2 and earlier cluster with a single node group In all the below operations, it's assumed that Ganeti can gather the entire super cluster state cheaply. Balancing changes ----------------- Balancing will move from cluster-level balancing to group balancing. In order to achieve a reasonable improvement in a super cluster, without needing to keep state of what groups have been already balanced previously, the balancing algorithm will run as follows: #. the cluster data is gathered #. if this is a regular cluster, as opposed to a super cluster, balancing will proceed normally as previously #. otherwise, compute the cluster scores for all groups #. choose the group with the worst score and see if we can improve it; if not choose the next-worst group, so on #. once a group has been identified, run the balancing for it Of course, explicit selection of a group will be allowed. Super cluster operations ++++++++++++++++++++++++ Beside the regular group balancing, in a super cluster we have more operations. Redistribution ^^^^^^^^^^^^^^ In a regular cluster, once we run out of resources (offline nodes which can't be fully evacuated, N+1 failures, etc.) there is nothing we can do unless nodes are added or instances are removed. In a super cluster however, there might be resources available in another group, so there is the possibility of relocating instances between groups to re-establish N+1 success within each group. One difficulty in the presence of both super clusters and shared storage is that the move paths of instances are quite complicated; basically an instance can move inside its local group, and to any other groups which have access to the same storage type and storage pool pair. In effect, the super cluster is composed of multiple ‘partitions’, each containing one or more groups, but a node is simultaneously present in multiple partitions, one for each storage type and storage pool it supports. As such, the interactions between the individual partitions are too complex for non-trivial clusters to assume we can compute a perfect solution: we might need to move some instances using shared storage pool ‘A’ in order to clear some more memory to accept an instance using local storage, which will further clear more VCPUs in a third partition, etc. As such, we'll limit ourselves at simple relocation steps within a single partition. Algorithm: #. read super cluster data, and exit if cluster doesn't allow inter-group moves #. filter out any groups that are “alone†in their partition (i.e. no other group sharing at least one storage method) #. determine list of healthy versus unhealthy groups: #. a group which contains offline nodes still hosting instances is definitely not healthy #. a group which has nodes failing N+1 is ‘weakly’ unhealthy #. if either list is empty, exit (no work to do, or no way to fix problems) #. for each unhealthy group: #. compute the instances that are causing the problems: all instances living on offline nodes, all instances living as secondary on N+1 failing nodes, all instances living as primaries on N+1 failing nodes (in this order) #. remove instances, one by one, until the source group is healthy again #. try to run a standard allocation procedure for each instance on all potential groups in its partition #. if all instances were relocated successfully, it means we have a solution for repairing the original group Compression ^^^^^^^^^^^ In a super cluster which has had many instance reclamations, it is possible that while none of the groups is empty, overall there is enough empty capacity that an entire group could be removed. The algorithm for “compressing†the super cluster is as follows: #. read super cluster data #. compute total *(memory, disk, cpu)*, and free *(memory, disk, cpu)* for the super-cluster #. computer per-group used and free *(memory, disk, cpu)* #. select candidate groups for evacuation: #. they must be connected to other groups via a common storage type and pool #. they must have fewer used resources than the global free resources (minus their own free resources) #. for each of these groups, try to relocate all its instances to connected peer groups #. report the list of groups that could be evacuated, or if instructed so, perform the evacuation of the group with the largest free resources (i.e. in order to reclaim the most capacity) Load balancing ^^^^^^^^^^^^^^ Assuming a super cluster using shared storage, where instance failover is cheap, it should be possible to do a load-based balancing across groups. As opposed to the normal balancing, where we want to balance on all node attributes, here we should look only at the load attributes; in other words, compare the available (total) node capacity with the (total) load generated by instances in a given group, and computing such scores for all groups, trying to see if we have any outliers. Once a reliable load-weighting method for groups exists, we can apply a modified version of the cluster scoring method to score not imbalances across nodes, but imbalances across groups which result in a super cluster load-related score. Allocation changes ------------------ It is important to keep the allocation method across groups internal (in the Ganeti/Iallocator combination), instead of delegating it to an external party (e.g. a RAPI client). For this, the IAllocator protocol should be extended to provide proper group support. For htools, the new algorithm will work as follows: #. read/receive cluster data from Ganeti #. filter out any groups that do not supports the requested storage method #. for remaining groups, try allocation and compute scores after allocation #. sort valid allocation solutions accordingly and return the entire list to Ganeti The rationale for returning the entire group list, and not only the best choice, is that we anyway have the list, and Ganeti might have other criteria (e.g. the best group might be busy/locked down, etc.) so even if from the point of view of resources it is the best choice, it might not be the overall best one. Node evacuation changes ----------------------- While the basic concept in the ``multi-evac`` iallocator mode remains unchanged (it's a simple local group issue), when failing to evacuate and running in a super cluster, we could have resources available elsewhere in the cluster for evacuation. The algorithm for computing this will be the same as the one for super cluster compression and redistribution, except that the list of instances is fixed to the ones living on the nodes to-be-evacuated. If the inter-group relocation is successful, the result to Ganeti will not be a local group evacuation target, but instead (for each instance) a pair *(remote group, nodes)*. Ganeti itself will have to decide (based on user input) whether to continue with inter-group evacuation or not. In case that Ganeti doesn't provide complete cluster data, just the local group, the inter-group relocation won't be attempted. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/admin.rst0000644000000000000000000020472612271422343015374 0ustar00rootroot00000000000000Ganeti administrator's guide ============================ Documents Ganeti version |version| .. contents:: .. highlight:: shell-example Introduction ------------ Ganeti is a virtualization cluster management software. You are expected to be a system administrator familiar with your Linux distribution and the Xen or KVM virtualization environments before using it. The various components of Ganeti all have man pages and interactive help. This manual though will help you getting familiar with the system by explaining the most common operations, grouped by related use. After a terminology glossary and a section on the prerequisites needed to use this manual, the rest of this document is divided in sections for the different targets that a command affects: instance, nodes, etc. .. _terminology-label: Ganeti terminology ++++++++++++++++++ This section provides a small introduction to Ganeti terminology, which might be useful when reading the rest of the document. Cluster ~~~~~~~ A set of machines (nodes) that cooperate to offer a coherent, highly available virtualization service under a single administration domain. Node ~~~~ A physical machine which is member of a cluster. Nodes are the basic cluster infrastructure, and they don't need to be fault tolerant in order to achieve high availability for instances. Node can be added and removed (if they host no instances) at will from the cluster. In a HA cluster and only with HA instances, the loss of any single node will not cause disk data loss for any instance; of course, a node crash will cause the crash of its primary instances. A node belonging to a cluster can be in one of the following roles at a given time: - *master* node, which is the node from which the cluster is controlled - *master candidate* node, only nodes in this role have the full cluster configuration and knowledge, and only master candidates can become the master node - *regular* node, which is the state in which most nodes will be on bigger clusters (>20 nodes) - *drained* node, nodes in this state are functioning normally but the cannot receive new instances; the intention is that nodes in this role have some issue and they are being evacuated for hardware repairs - *offline* node, in which there is a record in the cluster configuration about the node, but the daemons on the master node will not talk to this node; any instances declared as having an offline node as either primary or secondary will be flagged as an error in the cluster verify operation Depending on the role, each node will run a set of daemons: - the :command:`ganeti-noded` daemon, which controls the manipulation of this node's hardware resources; it runs on all nodes which are in a cluster - the :command:`ganeti-confd` daemon (Ganeti 2.1+) which runs on all nodes, but is only functional on master candidate nodes; this daemon can be disabled at configuration time if you don't need its functionality - the :command:`ganeti-rapi` daemon which runs on the master node and offers an HTTP-based API for the cluster - the :command:`ganeti-masterd` daemon which runs on the master node and allows control of the cluster Beside the node role, there are other node flags that influence its behaviour: - the *master_capable* flag denotes whether the node can ever become a master candidate; setting this to 'no' means that auto-promotion will never make this node a master candidate; this flag can be useful for a remote node that only runs local instances, and having it become a master is impractical due to networking or other constraints - the *vm_capable* flag denotes whether the node can host instances or not; for example, one might use a non-vm_capable node just as a master candidate, for configuration backups; setting this flag to no disallows placement of instances of this node, deactivates hypervisor and related checks on it (e.g. bridge checks, LVM check, etc.), and removes it from cluster capacity computations Instance ~~~~~~~~ A virtual machine which runs on a cluster. It can be a fault tolerant, highly available entity. An instance has various parameters, which are classified in three categories: hypervisor related-parameters (called ``hvparams``), general parameters (called ``beparams``) and per network-card parameters (called ``nicparams``). All these parameters can be modified either at instance level or via defaults at cluster level. Disk template ~~~~~~~~~~~~~ The are multiple options for the storage provided to an instance; while the instance sees the same virtual drive in all cases, the node-level configuration varies between them. There are five disk templates you can choose from: diskless The instance has no disks. Only used for special purpose operating systems or for testing. file The instance will use plain files as backend for its disks. No redundancy is provided, and this is somewhat more difficult to configure for high performance. Note that for security reasons the file storage directory must be listed under ``/etc/ganeti/file-storage-paths``, and that file is not copied automatically to all nodes by Ganeti. sharedfile The instance will use plain files as backend, but Ganeti assumes that those files will be available and in sync automatically on all nodes. This allows live migration and failover of instances using this method. As for ``file`` the file storage directory must be listed under ``/etc/ganeti/file-storage-paths`` or ganeti will refuse to create instances under it. plain The instance will use LVM devices as backend for its disks. No redundancy is provided. drbd .. note:: This is only valid for multi-node clusters using DRBD 8.0+ A mirror is set between the local node and a remote one, which must be specified with the second value of the --node option. Use this option to obtain a highly available instance that can be failed over to a remote node should the primary one fail. .. note:: Ganeti does not support DRBD stacked devices: DRBD stacked setup is not fully symmetric and as such it is not working with live migration. rbd The instance will use Volumes inside a RADOS cluster as backend for its disks. It will access them using the RADOS block device (RBD). ext The instance will use an external storage provider. See :manpage:`ganeti-extstorage-interface(7)` for how to implement one. IAllocator ~~~~~~~~~~ A framework for using external (user-provided) scripts to compute the placement of instances on the cluster nodes. This eliminates the need to manually specify nodes in instance add, instance moves, node evacuate, etc. In order for Ganeti to be able to use these scripts, they must be place in the iallocator directory (usually ``lib/ganeti/iallocators`` under the installation prefix, e.g. ``/usr/local``). “Primary†and “secondary†concepts ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ An instance has a primary and depending on the disk configuration, might also have a secondary node. The instance always runs on the primary node and only uses its secondary node for disk replication. Similarly, the term of primary and secondary instances when talking about a node refers to the set of instances having the given node as primary, respectively secondary. Tags ~~~~ Tags are short strings that can be attached to either to cluster itself, or to nodes or instances. They are useful as a very simplistic information store for helping with cluster administration, for example by attaching owner information to each instance after it's created:: $ gnt-instance add … %instance1% $ gnt-instance add-tags %instance1% %owner:user2% And then by listing each instance and its tags, this information could be used for contacting the users of each instance. Jobs and OpCodes ~~~~~~~~~~~~~~~~ While not directly visible by an end-user, it's useful to know that a basic cluster operation (e.g. starting an instance) is represented internally by Ganeti as an *OpCode* (abbreviation from operation code). These OpCodes are executed as part of a *Job*. The OpCodes in a single Job are processed serially by Ganeti, but different Jobs will be processed (depending on resource availability) in parallel. They will not be executed in the submission order, but depending on resource availability, locks and (starting with Ganeti 2.3) priority. An earlier job may have to wait for a lock while a newer job doesn't need any locks and can be executed right away. Operations requiring a certain order need to be submitted as a single job, or the client must submit one job at a time and wait for it to finish before continuing. For example, shutting down the entire cluster can be done by running the command ``gnt-instance shutdown --all``, which will submit for each instance a separate job containing the “shutdown instance†OpCode. Prerequisites +++++++++++++ You need to have your Ganeti cluster installed and configured before you try any of the commands in this document. Please follow the :doc:`install` for instructions on how to do that. Instance management ------------------- Adding an instance ++++++++++++++++++ The add operation might seem complex due to the many parameters it accepts, but once you have understood the (few) required parameters and the customisation capabilities you will see it is an easy operation. The add operation requires at minimum five parameters: - the OS for the instance - the disk template - the disk count and size - the node specification or alternatively the iallocator to use - and finally the instance name The OS for the instance must be visible in the output of the command ``gnt-os list`` and specifies which guest OS to install on the instance. The disk template specifies what kind of storage to use as backend for the (virtual) disks presented to the instance; note that for instances with multiple virtual disks, they all must be of the same type. The node(s) on which the instance will run can be given either manually, via the ``-n`` option, or computed automatically by Ganeti, if you have installed any iallocator script. With the above parameters in mind, the command is:: $ gnt-instance add \ -n %TARGET_NODE%:%SECONDARY_NODE% \ -o %OS_TYPE% \ -t %DISK_TEMPLATE% -s %DISK_SIZE% \ %INSTANCE_NAME% The instance name must be resolvable (e.g. exist in DNS) and usually points to an address in the same subnet as the cluster itself. The above command has the minimum required options; other options you can give include, among others: - The maximum/minimum memory size (``-B maxmem``, ``-B minmem``) (``-B memory`` can be used to specify only one size) - The number of virtual CPUs (``-B vcpus``) - Arguments for the NICs of the instance; by default, a single-NIC instance is created. The IP and/or bridge of the NIC can be changed via ``--net 0:ip=IP,link=BRIDGE`` See :manpage:`ganeti-instance(8)` for the detailed option list. For example if you want to create an highly available instance, with a single disk of 50GB and the default memory size, having primary node ``node1`` and secondary node ``node3``, use the following command:: $ gnt-instance add -n node1:node3 -o debootstrap -t drbd -s 50G \ instance1 There is a also a command for batch instance creation from a specification file, see the ``batch-create`` operation in the gnt-instance manual page. Regular instance operations +++++++++++++++++++++++++++ Removal ~~~~~~~ Removing an instance is even easier than creating one. This operation is irreversible and destroys all the contents of your instance. Use with care:: $ gnt-instance remove %INSTANCE_NAME% .. _instance-startup-label: Startup/shutdown ~~~~~~~~~~~~~~~~ Instances are automatically started at instance creation time. To manually start one which is currently stopped you can run:: $ gnt-instance startup %INSTANCE_NAME% Ganeti will start an instance with up to its maximum instance memory. If not enough memory is available Ganeti will use all the available memory down to the instance minimum memory. If not even that amount of memory is free Ganeti will refuse to start the instance. Note, that this will not work when an instance is in a permanently stopped state ``offline``. In this case, you will first have to put it back to online mode by running:: $ gnt-instance modify --online %INSTANCE_NAME% The command to stop the running instance is:: $ gnt-instance shutdown %INSTANCE_NAME% If you want to shut the instance down more permanently, so that it does not require dynamically allocated resources (memory and vcpus), after shutting down an instance, execute the following:: $ gnt-instance modify --offline %INSTANCE_NAME% .. warning:: Do not use the Xen or KVM commands directly to stop instances. If you run for example ``xm shutdown`` or ``xm destroy`` on an instance Ganeti will automatically restart it (via the :command:`ganeti-watcher(8)` command which is launched via cron). Querying instances ~~~~~~~~~~~~~~~~~~ There are two ways to get information about instances: listing instances, which does a tabular output containing a given set of fields about each instance, and querying detailed information about a set of instances. The command to see all the instances configured and their status is:: $ gnt-instance list The command can return a custom set of information when using the ``-o`` option (as always, check the manpage for a detailed specification). Each instance will be represented on a line, thus making it easy to parse this output via the usual shell utilities (grep, sed, etc.). To get more detailed information about an instance, you can run:: $ gnt-instance info %INSTANCE% which will give a multi-line block of information about the instance, it's hardware resources (especially its disks and their redundancy status), etc. This is harder to parse and is more expensive than the list operation, but returns much more detailed information. Changing an instance's runtime memory +++++++++++++++++++++++++++++++++++++ Ganeti will always make sure an instance has a value between its maximum and its minimum memory available as runtime memory. As of version 2.6 Ganeti will only choose a size different than the maximum size when starting up, failing over, or migrating an instance on a node with less than the maximum memory available. It won't resize other instances in order to free up space for an instance. If you find that you need more memory on a node any instance can be manually resized without downtime, with the command:: $ gnt-instance modify -m %SIZE% %INSTANCE_NAME% The same command can also be used to increase the memory available on an instance, provided that enough free memory is available on its node, and the specified size is not larger than the maximum memory size the instance had when it was first booted (an instance will be unable to see new memory above the maximum that was specified to the hypervisor at its boot time, if it needs to grow further a reboot becomes necessary). Export/Import +++++++++++++ You can create a snapshot of an instance disk and its Ganeti configuration, which then you can backup, or import into another cluster. The way to export an instance is:: $ gnt-backup export -n %TARGET_NODE% %INSTANCE_NAME% The target node can be any node in the cluster with enough space under ``/srv/ganeti`` to hold the instance image. Use the ``--noshutdown`` option to snapshot an instance without rebooting it. Note that Ganeti only keeps one snapshot for an instance - any previous snapshot of the same instance existing cluster-wide under ``/srv/ganeti`` will be removed by this operation: if you want to keep them, you need to move them out of the Ganeti exports directory. Importing an instance is similar to creating a new one, but additionally one must specify the location of the snapshot. The command is:: $ gnt-backup import -n %TARGET_NODE% \ --src-node=%NODE% --src-dir=%DIR% %INSTANCE_NAME% By default, parameters will be read from the export information, but you can of course pass them in via the command line - most of the options available for the command :command:`gnt-instance add` are supported here too. Import of foreign instances +++++++++++++++++++++++++++ There is a possibility to import a foreign instance whose disk data is already stored as LVM volumes without going through copying it: the disk adoption mode. For this, ensure that the original, non-managed instance is stopped, then create a Ganeti instance in the usual way, except that instead of passing the disk information you specify the current volumes:: $ gnt-instance add -t plain -n %HOME_NODE% ... \ --disk 0:adopt=%lv_name%[,vg=%vg_name%] %INSTANCE_NAME% This will take over the given logical volumes, rename them to the Ganeti standard (UUID-based), and without installing the OS on them start directly the instance. If you configure the hypervisor similar to the non-managed configuration that the instance had, the transition should be seamless for the instance. For more than one disk, just pass another disk parameter (e.g. ``--disk 1:adopt=...``). Instance kernel selection +++++++++++++++++++++++++ The kernel that instances uses to bootup can come either from the node, or from instances themselves, depending on the setup. Xen-PVM ~~~~~~~ With Xen PVM, there are three options. First, you can use a kernel from the node, by setting the hypervisor parameters as such: - ``kernel_path`` to a valid file on the node (and appropriately ``initrd_path``) - ``kernel_args`` optionally set to a valid Linux setting (e.g. ``ro``) - ``root_path`` to a valid setting (e.g. ``/dev/xvda1``) - ``bootloader_path`` and ``bootloader_args`` to empty Alternatively, you can delegate the kernel management to instances, and use either ``pvgrub`` or the deprecated ``pygrub``. For this, you must install the kernels and initrds in the instance and create a valid GRUB v1 configuration file. For ``pvgrub`` (new in version 2.4.2), you need to set: - ``kernel_path`` to point to the ``pvgrub`` loader present on the node (e.g. ``/usr/lib/xen/boot/pv-grub-x86_32.gz``) - ``kernel_args`` to the path to the GRUB config file, relative to the instance (e.g. ``(hd0,0)/grub/menu.lst``) - ``root_path`` **must** be empty - ``bootloader_path`` and ``bootloader_args`` to empty While ``pygrub`` is deprecated, here is how you can configure it: - ``bootloader_path`` to the pygrub binary (e.g. ``/usr/bin/pygrub``) - the other settings are not important More information can be found in the Xen wiki pages for `pvgrub `_ and `pygrub `_. KVM ~~~ For KVM also the kernel can be loaded either way. For loading the kernels from the node, you need to set: - ``kernel_path`` to a valid value - ``initrd_path`` optionally set if you use an initrd - ``kernel_args`` optionally set to a valid value (e.g. ``ro``) If you want instead to have the instance boot from its disk (and execute its bootloader), simply set the ``kernel_path`` parameter to an empty string, and all the others will be ignored. Instance HA features -------------------- .. note:: This section only applies to multi-node clusters .. _instance-change-primary-label: Changing the primary node +++++++++++++++++++++++++ There are three ways to exchange an instance's primary and secondary nodes; the right one to choose depends on how the instance has been created and the status of its current primary node. See :ref:`rest-redundancy-label` for information on changing the secondary node. Note that it's only possible to change the primary node to the secondary and vice-versa; a direct change of the primary node with a third node, while keeping the current secondary is not possible in a single step, only via multiple operations as detailed in :ref:`instance-relocation-label`. Failing over an instance ~~~~~~~~~~~~~~~~~~~~~~~~ If an instance is built in highly available mode you can at any time fail it over to its secondary node, even if the primary has somehow failed and it's not up anymore. Doing it is really easy, on the master node you can just run:: $ gnt-instance failover %INSTANCE_NAME% That's it. After the command completes the secondary node is now the primary, and vice-versa. The instance will be started with an amount of memory between its ``maxmem`` and its ``minmem`` value, depending on the free memory on its target node, or the operation will fail if that's not possible. See :ref:`instance-startup-label` for details. If the instance's disk template is of type rbd, then you can specify the target node (which can be any node) explicitly, or specify an iallocator plugin. If you omit both, the default iallocator will be used to determine the target node:: $ gnt-instance failover -n %TARGET_NODE% %INSTANCE_NAME% Live migrating an instance ~~~~~~~~~~~~~~~~~~~~~~~~~~ If an instance is built in highly available mode, it currently runs and both its nodes are running fine, you can migrate it over to its secondary node, without downtime. On the master node you need to run:: $ gnt-instance migrate %INSTANCE_NAME% The current load on the instance and its memory size will influence how long the migration will take. In any case, for both KVM and Xen hypervisors, the migration will be transparent to the instance. If the destination node has less memory than the instance's current runtime memory, but at least the instance's minimum memory available Ganeti will automatically reduce the instance runtime memory before migrating it, unless the ``--no-runtime-changes`` option is passed, in which case the target node should have at least the instance's current runtime memory free. If the instance's disk template is of type rbd, then you can specify the target node (which can be any node) explicitly, or specify an iallocator plugin. If you omit both, the default iallocator will be used to determine the target node:: $ gnt-instance migrate -n %TARGET_NODE% %INSTANCE_NAME% Moving an instance (offline) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If an instance has not been create as mirrored, then the only way to change its primary node is to execute the move command:: $ gnt-instance move -n %NEW_NODE% %INSTANCE% This has a few prerequisites: - the instance must be stopped - its current primary node must be on-line and healthy - the disks of the instance must not have any errors Since this operation actually copies the data from the old node to the new node, expect it to take proportional to the size of the instance's disks and the speed of both the nodes' I/O system and their networking. Disk operations +++++++++++++++ Disk failures are a common cause of errors in any server deployment. Ganeti offers protection from single-node failure if your instances were created in HA mode, and it also offers ways to restore redundancy after a failure. Preparing for disk operations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It is important to note that for Ganeti to be able to do any disk operation, the Linux machines on top of which Ganeti runs must be consistent; for LVM, this means that the LVM commands must not return failures; it is common that after a complete disk failure, any LVM command aborts with an error similar to:: $ vgs /dev/sdb1: read failed after 0 of 4096 at 0: Input/output error /dev/sdb1: read failed after 0 of 4096 at 750153695232: Input/output error /dev/sdb1: read failed after 0 of 4096 at 0: Input/output error Couldn't find device with uuid 't30jmN-4Rcf-Fr5e-CURS-pawt-z0jU-m1TgeJ'. Couldn't find all physical volumes for volume group xenvg. Before restoring an instance's disks to healthy status, it's needed to fix the volume group used by Ganeti so that we can actually create and manage the logical volumes. This is usually done in a multi-step process: #. first, if the disk is completely gone and LVM commands exit with “Couldn't find device with uuid…†then you need to run the command:: $ vgreduce --removemissing %VOLUME_GROUP% #. after the above command, the LVM commands should be executing normally (warnings are normal, but the commands will not fail completely). #. if the failed disk is still visible in the output of the ``pvs`` command, you need to deactivate it from allocations by running:: $ pvs -x n /dev/%DISK% At this point, the volume group should be consistent and any bad physical volumes should not longer be available for allocation. Note that since version 2.1 Ganeti provides some commands to automate these two operations, see :ref:`storage-units-label`. .. _rest-redundancy-label: Restoring redundancy for DRBD-based instances ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A DRBD instance has two nodes, and the storage on one of them has failed. Depending on which node (primary or secondary) has failed, you have three options at hand: - if the storage on the primary node has failed, you need to re-create the disks on it - if the storage on the secondary node has failed, you can either re-create the disks on it or change the secondary and recreate redundancy on the new secondary node Of course, at any point it's possible to force re-creation of disks even though everything is already fine. For all three cases, the ``replace-disks`` operation can be used:: # re-create disks on the primary node $ gnt-instance replace-disks -p %INSTANCE_NAME% # re-create disks on the current secondary $ gnt-instance replace-disks -s %INSTANCE_NAME% # change the secondary node, via manual specification $ gnt-instance replace-disks -n %NODE% %INSTANCE_NAME% # change the secondary node, via an iallocator script $ gnt-instance replace-disks -I %SCRIPT% %INSTANCE_NAME% # since Ganeti 2.1: automatically fix the primary or secondary node $ gnt-instance replace-disks -a %INSTANCE_NAME% Since the process involves copying all data from the working node to the target node, it will take a while, depending on the instance's disk size, node I/O system and network speed. But it is (barring any network interruption) completely transparent for the instance. Re-creating disks for non-redundant instances ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. versionadded:: 2.1 For non-redundant instances, there isn't a copy (except backups) to re-create the disks. But it's possible to at-least re-create empty disks, after which a reinstall can be run, via the ``recreate-disks`` command:: $ gnt-instance recreate-disks %INSTANCE% Note that this will fail if the disks already exists. The instance can be assigned to new nodes automatically by specifying an iallocator through the ``--iallocator`` option. Conversion of an instance's disk type ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It is possible to convert between a non-redundant instance of type ``plain`` (LVM storage) and redundant ``drbd`` via the ``gnt-instance modify`` command:: # start with a non-redundant instance $ gnt-instance add -t plain ... %INSTANCE% # later convert it to redundant $ gnt-instance stop %INSTANCE% $ gnt-instance modify -t drbd -n %NEW_SECONDARY% %INSTANCE% $ gnt-instance start %INSTANCE% # and convert it back $ gnt-instance stop %INSTANCE% $ gnt-instance modify -t plain %INSTANCE% $ gnt-instance start %INSTANCE% The conversion must be done while the instance is stopped, and converting from plain to drbd template presents a small risk, especially if the instance has multiple disks and/or if one node fails during the conversion procedure). As such, it's recommended (as always) to make sure that downtime for manual recovery is acceptable and that the instance has up-to-date backups. Debugging instances +++++++++++++++++++ Accessing an instance's disks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From an instance's primary node you can have access to its disks. Never ever mount the underlying logical volume manually on a fault tolerant instance, or will break replication and your data will be inconsistent. The correct way to access an instance's disks is to run (on the master node, as usual) the command:: $ gnt-instance activate-disks %INSTANCE% And then, *on the primary node of the instance*, access the device that gets created. For example, you could mount the given disks, then edit files on the filesystem, etc. Note that with partitioned disks (as opposed to whole-disk filesystems), you will need to use a tool like :manpage:`kpartx(8)`:: # on node1 $ gnt-instance activate-disks %instance1% node3:disk/0:… $ ssh node3 # on node 3 $ kpartx -l /dev/… $ kpartx -a /dev/… $ mount /dev/mapper/… /mnt/ # edit files under mnt as desired $ umount /mnt/ $ kpartx -d /dev/… $ exit # back to node 1 After you've finished you can deactivate them with the deactivate-disks command, which works in the same way:: $ gnt-instance deactivate-disks %INSTANCE% Note that if any process started by you is still using the disks, the above command will error out, and you **must** cleanup and ensure that the above command runs successfully before you start the instance, otherwise the instance will suffer corruption. Accessing an instance's console ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The command to access a running instance's console is:: $ gnt-instance console %INSTANCE_NAME% Use the console normally and then type ``^]`` when done, to exit. Other instance operations +++++++++++++++++++++++++ Reboot ~~~~~~ There is a wrapper command for rebooting instances:: $ gnt-instance reboot %instance2% By default, this does the equivalent of shutting down and then starting the instance, but it accepts parameters to perform a soft-reboot (via the hypervisor), a hard reboot (hypervisor shutdown and then startup) or a full one (the default, which also de-configures and then configures again the disks of the instance). Instance OS definitions debugging +++++++++++++++++++++++++++++++++ Should you have any problems with instance operating systems the command to see a complete status for all your nodes is:: $ gnt-os diagnose .. _instance-relocation-label: Instance relocation ~~~~~~~~~~~~~~~~~~~ While it is not possible to move an instance from nodes ``(A, B)`` to nodes ``(C, D)`` in a single move, it is possible to do so in a few steps:: # instance is located on A, B $ gnt-instance replace-disks -n %nodeC% %instance1% # instance has moved from (A, B) to (A, C) # we now flip the primary/secondary nodes $ gnt-instance migrate %instance1% # instance lives on (C, A) # we can then change A to D via: $ gnt-instance replace-disks -n %nodeD% %instance1% Which brings it into the final configuration of ``(C, D)``. Note that we needed to do two replace-disks operation (two copies of the instance disks), because we needed to get rid of both the original nodes (A and B). Node operations --------------- There are much fewer node operations available than for instances, but they are equivalently important for maintaining a healthy cluster. Add/readd +++++++++ It is at any time possible to extend the cluster with one more node, by using the node add operation:: $ gnt-node add %NEW_NODE% If the cluster has a replication network defined, then you need to pass the ``-s REPLICATION_IP`` parameter to this option. A variation of this command can be used to re-configure a node if its Ganeti configuration is broken, for example if it has been reinstalled by mistake:: $ gnt-node add --readd %EXISTING_NODE% This will reinitialise the node as if it's been newly added, but while keeping its existing configuration in the cluster (primary/secondary IP, etc.), in other words you won't need to use ``-s`` here. Changing the node role ++++++++++++++++++++++ A node can be in different roles, as explained in the :ref:`terminology-label` section. Promoting a node to the master role is special, while the other roles are handled all via a single command. Failing over the master node ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If you want to promote a different node to the master role (for whatever reason), run on any other master-candidate node the command:: $ gnt-cluster master-failover and the node you ran it on is now the new master. In case you try to run this on a non master-candidate node, you will get an error telling you which nodes are valid. Changing between the other roles ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The ``gnt-node modify`` command can be used to select a new role:: # change to master candidate $ gnt-node modify -C yes %NODE% # change to drained status $ gnt-node modify -D yes %NODE% # change to offline status $ gnt-node modify -O yes %NODE% # change to regular mode (reset all flags) $ gnt-node modify -O no -D no -C no %NODE% Note that the cluster requires that at any point in time, a certain number of nodes are master candidates, so changing from master candidate to other roles might fail. It is recommended to either force the operation (via the ``--force`` option) or first change the number of master candidates in the cluster - see :ref:`cluster-config-label`. Evacuating nodes ++++++++++++++++ There are two steps of moving instances off a node: - moving the primary instances (actually converting them into secondary instances) - moving the secondary instances (including any instances converted in the step above) Primary instance conversion ~~~~~~~~~~~~~~~~~~~~~~~~~~~ For this step, you can use either individual instance move commands (as seen in :ref:`instance-change-primary-label`) or the bulk per-node versions; these are:: $ gnt-node migrate %NODE% $ gnt-node evacuate -s %NODE% Note that the instance “move†command doesn't currently have a node equivalent. Both these commands, or the equivalent per-instance command, will make this node the secondary node for the respective instances, whereas their current secondary node will become primary. Note that it is not possible to change in one step the primary node to another node as primary, while keeping the same secondary node. Secondary instance evacuation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For the evacuation of secondary instances, a command called :command:`gnt-node evacuate` is provided and its syntax is:: $ gnt-node evacuate -I %IALLOCATOR_SCRIPT% %NODE% $ gnt-node evacuate -n %DESTINATION_NODE% %NODE% The first version will compute the new secondary for each instance in turn using the given iallocator script, whereas the second one will simply move all instances to DESTINATION_NODE. Removal +++++++ Once a node no longer has any instances (neither primary nor secondary), it's easy to remove it from the cluster:: $ gnt-node remove %NODE_NAME% This will deconfigure the node, stop the ganeti daemons on it and leave it hopefully like before it joined to the cluster. Replication network changes +++++++++++++++++++++++++++ The :command:`gnt-node modify -s` command can be used to change the secondary IP of a node. This operation can only be performed if: - No instance is active on the target node - The new target IP is reachable from the master's secondary IP Also this operation will not allow to change a node from single-homed (same primary and secondary ip) to multi-homed (separate replication network) or vice versa, unless: - The target node is the master node and `--force` is passed. - The target cluster is single-homed and the new primary ip is a change to single homed for a particular node. - The target cluster is multi-homed and the new primary ip is a change to multi homed for a particular node. For example to do a single-homed to multi-homed conversion:: $ gnt-node modify --force -s %SECONDARY_IP% %MASTER_NAME% $ gnt-node modify -s %SECONDARY_IP% %NODE1_NAME% $ gnt-node modify -s %SECONDARY_IP% %NODE2_NAME% $ gnt-node modify -s %SECONDARY_IP% %NODE3_NAME% ... The same commands can be used for multi-homed to single-homed except the secondary IPs should be the same as the primaries for each node, for that case. Storage handling ++++++++++++++++ When using LVM (either standalone or with DRBD), it can become tedious to debug and fix it in case of errors. Furthermore, even file-based storage can become complicated to handle manually on many hosts. Ganeti provides a couple of commands to help with automation. Logical volumes ~~~~~~~~~~~~~~~ This is a command specific to LVM handling. It allows listing the logical volumes on a given node or on all nodes and their association to instances via the ``volumes`` command:: $ gnt-node volumes Node PhysDev VG Name Size Instance node1 /dev/sdb1 xenvg e61fbc97-….disk0 512M instance17 node1 /dev/sdb1 xenvg ebd1a7d1-….disk0 512M instance19 node2 /dev/sdb1 xenvg 0af08a3d-….disk0 512M instance20 node2 /dev/sdb1 xenvg cc012285-….disk0 512M instance16 node2 /dev/sdb1 xenvg f0fac192-….disk0 512M instance18 The above command maps each logical volume to a volume group and underlying physical volume and (possibly) to an instance. .. _storage-units-label: Generalized storage handling ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. versionadded:: 2.1 Starting with Ganeti 2.1, a new storage framework has been implemented that tries to abstract the handling of the storage type the cluster uses. First is listing the backend storage and their space situation:: $ gnt-node list-storage Node Name Size Used Free node1 /dev/sda7 673.8G 0M 673.8G node1 /dev/sdb1 698.6G 1.5G 697.1G node2 /dev/sda7 673.8G 0M 673.8G node2 /dev/sdb1 698.6G 1.0G 697.6G The default is to list LVM physical volumes. It's also possible to list the LVM volume groups:: $ gnt-node list-storage -t lvm-vg Node Name Size node1 xenvg 1.3T node2 xenvg 1.3T Next is repairing storage units, which is currently only implemented for volume groups and does the equivalent of ``vgreduce --removemissing``:: $ gnt-node repair-storage %node2% lvm-vg xenvg Sun Oct 25 22:21:45 2009 Repairing storage unit 'xenvg' on node2 ... Last is the modification of volume properties, which is (again) only implemented for LVM physical volumes and allows toggling the ``allocatable`` value:: $ gnt-node modify-storage --allocatable=no %node2% lvm-pv /dev/%sdb1% Use of the storage commands ~~~~~~~~~~~~~~~~~~~~~~~~~~~ All these commands are needed when recovering a node from a disk failure: - first, we need to recover from complete LVM failure (due to missing disk), by running the ``repair-storage`` command - second, we need to change allocation on any partially-broken disk (i.e. LVM still sees it, but it has bad blocks) by running ``modify-storage`` - then we can evacuate the instances as needed Cluster operations ------------------ Beside the cluster initialisation command (which is detailed in the :doc:`install` document) and the master failover command which is explained under node handling, there are a couple of other cluster operations available. .. _cluster-config-label: Standard operations +++++++++++++++++++ One of the few commands that can be run on any node (not only the master) is the ``getmaster`` command:: # on node2 $ gnt-cluster getmaster node1.example.com It is possible to query and change global cluster parameters via the ``info`` and ``modify`` commands:: $ gnt-cluster info Cluster name: cluster.example.com Cluster UUID: 07805e6f-f0af-4310-95f1-572862ee939c Creation time: 2009-09-25 05:04:15 Modification time: 2009-10-18 22:11:47 Master node: node1.example.com Architecture (this node): 64bit (x86_64) … Tags: foo Default hypervisor: xen-pvm Enabled hypervisors: xen-pvm Hypervisor parameters: - xen-pvm: root_path: /dev/sda1 … Cluster parameters: - candidate pool size: 10 … Default instance parameters: - default: memory: 128 … Default nic parameters: - default: link: xen-br0 … There various parameters above can be changed via the ``modify`` commands as follows: - the hypervisor parameters can be changed via ``modify -H xen-pvm:root_path=…``, and so on for other hypervisors/key/values - the "default instance parameters" are changeable via ``modify -B parameter=value…`` syntax - the cluster parameters are changeable via separate options to the modify command (e.g. ``--candidate-pool-size``, etc.) For detailed option list see the :manpage:`gnt-cluster(8)` man page. The cluster version can be obtained via the ``version`` command:: $ gnt-cluster version Software version: 2.1.0 Internode protocol: 20 Configuration format: 2010000 OS api version: 15 Export interface: 0 This is not very useful except when debugging Ganeti. Global node commands ++++++++++++++++++++ There are two commands provided for replicating files to all nodes of a cluster and for running commands on all the nodes:: $ gnt-cluster copyfile %/path/to/file% $ gnt-cluster command %ls -l /path/to/file% These are simple wrappers over scp/ssh and more advanced usage can be obtained using :manpage:`dsh(1)` and similar commands. But they are useful to update an OS script from the master node, for example. Cluster verification ++++++++++++++++++++ There are three commands that relate to global cluster checks. The first one is ``verify`` which gives an overview on the cluster state, highlighting any issues. In normal operation, this command should return no ``ERROR`` messages:: $ gnt-cluster verify Sun Oct 25 23:08:58 2009 * Verifying global settings Sun Oct 25 23:08:58 2009 * Gathering data (2 nodes) Sun Oct 25 23:09:00 2009 * Verifying node status Sun Oct 25 23:09:00 2009 * Verifying instance status Sun Oct 25 23:09:00 2009 * Verifying orphan volumes Sun Oct 25 23:09:00 2009 * Verifying remaining instances Sun Oct 25 23:09:00 2009 * Verifying N+1 Memory redundancy Sun Oct 25 23:09:00 2009 * Other Notes Sun Oct 25 23:09:00 2009 - NOTICE: 5 non-redundant instance(s) found. Sun Oct 25 23:09:00 2009 * Hooks Results The second command is ``verify-disks``, which checks that the instance's disks have the correct status based on the desired instance state (up/down):: $ gnt-cluster verify-disks Note that this command will show no output when disks are healthy. The last command is used to repair any discrepancies in Ganeti's recorded disk size and the actual disk size (disk size information is needed for proper activation and growth of DRBD-based disks):: $ gnt-cluster repair-disk-sizes Sun Oct 25 23:13:16 2009 - INFO: Disk 0 of instance instance1 has mismatched size, correcting: recorded 512, actual 2048 Sun Oct 25 23:13:17 2009 - WARNING: Invalid result from node node4, ignoring node results The above shows one instance having wrong disk size, and a node which returned invalid data, and thus we ignored all primary instances of that node. Configuration redistribution ++++++++++++++++++++++++++++ If the verify command complains about file mismatches between the master and other nodes, due to some node problems or if you manually modified configuration files, you can force an push of the master configuration to all other nodes via the ``redist-conf`` command:: $ gnt-cluster redist-conf This command will be silent unless there are problems sending updates to the other nodes. Cluster renaming ++++++++++++++++ It is possible to rename a cluster, or to change its IP address, via the ``rename`` command. If only the IP has changed, you need to pass the current name and Ganeti will realise its IP has changed:: $ gnt-cluster rename %cluster.example.com% This will rename the cluster to 'cluster.example.com'. If you are connected over the network to the cluster name, the operation is very dangerous as the IP address will be removed from the node and the change may not go through. Continue? y/[n]/?: %y% Failure: prerequisites not met for this operation: Neither the name nor the IP address of the cluster has changed In the above output, neither value has changed since the cluster initialisation so the operation is not completed. Queue operations ++++++++++++++++ The job queue execution in Ganeti 2.0 and higher can be inspected, suspended and resumed via the ``queue`` command:: $ gnt-cluster queue info The drain flag is unset $ gnt-cluster queue drain $ gnt-instance stop %instance1% Failed to submit job for instance1: Job queue is drained, refusing job $ gnt-cluster queue info The drain flag is set $ gnt-cluster queue undrain This is most useful if you have an active cluster and you need to upgrade the Ganeti software, or simply restart the software on any node: #. suspend the queue via ``queue drain`` #. wait until there are no more running jobs via ``gnt-job list`` #. restart the master or another node, or upgrade the software #. resume the queue via ``queue undrain`` .. note:: this command only stores a local flag file, and if you failover the master, it will not have effect on the new master. Watcher control +++++++++++++++ The :manpage:`ganeti-watcher(8)` is a program, usually scheduled via ``cron``, that takes care of cluster maintenance operations (restarting downed instances, activating down DRBD disks, etc.). However, during maintenance and troubleshooting, this can get in your way; disabling it via commenting out the cron job is not so good as this can be forgotten. Thus there are some commands for automated control of the watcher: ``pause``, ``info`` and ``continue``:: $ gnt-cluster watcher info The watcher is not paused. $ gnt-cluster watcher pause %1h% The watcher is paused until Mon Oct 26 00:30:37 2009. $ gnt-cluster watcher info The watcher is paused until Mon Oct 26 00:30:37 2009. $ ganeti-watcher -d 2009-10-25 23:30:47,984: pid=28867 ganeti-watcher:486 DEBUG Pause has been set, exiting $ gnt-cluster watcher continue The watcher is no longer paused. $ ganeti-watcher -d 2009-10-25 23:31:04,789: pid=28976 ganeti-watcher:345 DEBUG Archived 0 jobs, left 0 2009-10-25 23:31:05,884: pid=28976 ganeti-watcher:280 DEBUG Got data from cluster, writing instance status file 2009-10-25 23:31:06,061: pid=28976 ganeti-watcher:150 DEBUG Data didn't change, just touching status file $ gnt-cluster watcher info The watcher is not paused. The exact details of the argument to the ``pause`` command are available in the manpage. .. note:: this command only stores a local flag file, and if you failover the master, it will not have effect on the new master. Node auto-maintenance +++++++++++++++++++++ If the cluster parameter ``maintain_node_health`` is enabled (see the manpage for :command:`gnt-cluster`, the init and modify subcommands), then the following will happen automatically: - the watcher will shutdown any instances running on offline nodes - the watcher will deactivate any DRBD devices on offline nodes In the future, more actions are planned, so only enable this parameter if the nodes are completely dedicated to Ganeti; otherwise it might be possible to lose data due to auto-maintenance actions. Removing a cluster entirely +++++++++++++++++++++++++++ The usual method to cleanup a cluster is to run ``gnt-cluster destroy`` however if the Ganeti installation is broken in any way then this will not run. It is possible in such a case to cleanup manually most if not all traces of a cluster installation by following these steps on all of the nodes: 1. Shutdown all instances. This depends on the virtualisation method used (Xen, KVM, etc.): - Xen: run ``xm list`` and ``xm destroy`` on all the non-Domain-0 instances - KVM: kill all the KVM processes - chroot: kill all processes under the chroot mountpoints 2. If using DRBD, shutdown all DRBD minors (which should by at this time no-longer in use by instances); on each node, run ``drbdsetup /dev/drbdN down`` for each active DRBD minor. 3. If using LVM, cleanup the Ganeti volume group; if only Ganeti created logical volumes (and you are not sharing the volume group with the OS, for example), then simply running ``lvremove -f xenvg`` (replace 'xenvg' with your volume group name) should do the required cleanup. 4. If using file-based storage, remove recursively all files and directories under your file-storage directory: ``rm -rf /srv/ganeti/file-storage/*`` replacing the path with the correct path for your cluster. 5. Stop the ganeti daemons (``/etc/init.d/ganeti stop``) and kill any that remain alive (``pgrep ganeti`` and ``pkill ganeti``). 6. Remove the ganeti state directory (``rm -rf /var/lib/ganeti/*``), replacing the path with the correct path for your installation. 7. If using RBD, run ``rbd unmap /dev/rbdN`` to unmap the RBD disks. Then remove the RBD disk images used by Ganeti, identified by their UUIDs (``rbd rm uuid.rbd.diskN``). On the master node, remove the cluster from the master-netdev (usually ``xen-br0`` for bridged mode, otherwise ``eth0`` or similar), by running ``ip a del $clusterip/32 dev xen-br0`` (use the correct cluster ip and network device name). At this point, the machines are ready for a cluster creation; in case you want to remove Ganeti completely, you need to also undo some of the SSH changes and log directories: - ``rm -rf /var/log/ganeti /srv/ganeti`` (replace with the correct paths) - remove from ``/root/.ssh`` the keys that Ganeti added (check the ``authorized_keys`` and ``id_dsa`` files) - regenerate the host's SSH keys (check the OpenSSH startup scripts) - uninstall Ganeti Otherwise, if you plan to re-create the cluster, you can just go ahead and rerun ``gnt-cluster init``. Monitoring the cluster ---------------------- Starting with Ganeti 2.8, a monitoring daemon is available, providing information about the status and the performance of the system. The monitoring daemon runs on every node, listening on TCP port 1815. Each instance of the daemon provides information related to the node it is running on. .. include:: monitoring-query-format.rst Tags handling ------------- The tags handling (addition, removal, listing) is similar for all the objects that support it (instances, nodes, and the cluster). Limitations +++++++++++ Note that the set of characters present in a tag and the maximum tag length are restricted. Currently the maximum length is 128 characters, there can be at most 4096 tags per object, and the set of characters is comprised by alphanumeric characters and additionally ``.+*/:@-``. Operations ++++++++++ Tags can be added via ``add-tags``:: $ gnt-instance add-tags %INSTANCE% %a% %b% %c% $ gnt-node add-tags %INSTANCE% %a% %b% %c% $ gnt-cluster add-tags %a% %b% %c% The above commands add three tags to an instance, to a node and to the cluster. Note that the cluster command only takes tags as arguments, whereas the node and instance commands first required the node and instance name. Tags can also be added from a file, via the ``--from=FILENAME`` argument. The file is expected to contain one tag per line. Tags can also be remove via a syntax very similar to the add one:: $ gnt-instance remove-tags %INSTANCE% %a% %b% %c% And listed via:: $ gnt-instance list-tags $ gnt-node list-tags $ gnt-cluster list-tags Global tag search +++++++++++++++++ It is also possible to execute a global search on the all tags defined in the cluster configuration, via a cluster command:: $ gnt-cluster search-tags %REGEXP% The parameter expected is a regular expression (see :manpage:`regex(7)`). This will return all tags that match the search, together with the object they are defined in (the names being show in a hierarchical kind of way):: $ gnt-cluster search-tags %o% /cluster foo /instances/instance1 owner:bar Autorepair ---------- The tool ``harep`` can be used to automatically fix some problems that are present in the cluster. It is mainly meant to be regularly and automatically executed as a cron job. This is quite evident by considering that, when executed, it does not immediately fix all the issues of the instances of the cluster, but it cycles the instances through a series of states, one at every ``harep`` execution. Every state performs a step towards the resolution of the problem. This process goes on until the instance is brought back to the healthy state, or the tool realizes that it is not able to fix the instance, and therefore marks it as in failure state. Allowing harep to act on the cluster ++++++++++++++++++++++++++++++++++++ By default, ``harep`` checks the status of the cluster but it is not allowed to perform any modification. Modification must be explicitly allowed by an appropriate use of tags. Tagging can be applied at various levels, and can enable different kinds of autorepair, as hereafter described. All the tags that authorize ``harep`` to perform modifications follow this syntax:: ganeti:watcher:autorepair: where ```` indicates the kind of intervention that can be performed. Every possible value of ```` includes at least all the authorization of the previous one, plus its own. The possible values, in increasing order of severity, are: - ``fix-storage`` allows a disk replacement or another operation that fixes the instance backend storage without affecting the instance itself. This can for example recover from a broken drbd secondary, but risks data loss if something is wrong on the primary but the secondary was somehow recoverable. - ``migrate`` allows an instance migration. This can recover from a drained primary, but can cause an instance crash in some cases (bugs). - ``failover`` allows instance reboot on the secondary. This can recover from an offline primary, but the instance will lose its running state. - ``reinstall`` allows disks to be recreated and an instance to be reinstalled. This can recover from primary&secondary both being offline, or from an offline primary in the case of non-redundant instances. It causes data loss. These autorepair tags can be applied to a cluster, a nodegroup or an instance, and will act where they are applied and to everything in the entities sub-tree (e.g. a tag applied to a nodegroup will apply to all the instances contained in that nodegroup, but not to the rest of the cluster). If there are multiple ``ganeti:watcher:autorepair:`` tags in an object (cluster, node group or instance), the least destructive tag takes precedence. When multiplicity happens across objects, the nearest tag wins. For example, if in a cluster with two instances, *I1* and *I2*, *I1* has ``failover``, and the cluster itself has both ``fix-storage`` and ``reinstall``, *I1* will end up with ``failover`` and *I2* with ``fix-storage``. Limiting harep ++++++++++++++ Sometimes it is useful to stop harep from performing its task temporarily, and it is useful to be able to do so without distrupting its configuration, that is, without removing the authorization tags. In order to do this, suspend tags are provided. Suspend tags can be added to cluster, nodegroup or instances, and act on the entire entities sub-tree. No operation will be performed by ``harep`` on the instances protected by a suspend tag. Their syntax is as follows:: ganeti:watcher:autorepair:suspend[:] If there are multiple suspend tags in an object, the form without timestamp takes precedence (permanent suspension); or, if all object tags have a timestamp, the one with the highest timestamp. Tags with a timestamp will be automatically removed when the time indicated by the timestamp is passed. Indefinite suspension tags have to be removed manually. Result reporting ++++++++++++++++ Harep will report about the result of its actions both through its CLI, and by adding tags to the instances it operated on. Such tags will follow the syntax hereby described:: ganeti:watcher:autorepair:result::::: If this tag is present a repair of type ``type`` has been performed on the instance and has been completed by ``timestamp``. The result is either ``success``, ``failure`` or ``enoperm``, and jobs is a *+*-separated list of jobs that were executed for this repair. An ``enoperm`` result is an error state due to permission problems. It is returned when the repair cannot proceed because it would require to perform an operation that is not allowed by the ``ganeti:watcher:autorepair:`` tag that is defining the instance autorepair permissions. NB: if an instance repair ends up in a failure state, it will not be touched again by ``harep`` until it has been manually fixed by the system administrator and the ``ganeti:watcher:autorepair:result:failure:*`` tag has been manually removed. Job operations -------------- The various jobs submitted by the instance/node/cluster commands can be examined, canceled and archived by various invocations of the ``gnt-job`` command. First is the job list command:: $ gnt-job list 17771 success INSTANCE_QUERY_DATA 17773 success CLUSTER_VERIFY_DISKS 17775 success CLUSTER_REPAIR_DISK_SIZES 17776 error CLUSTER_RENAME(cluster.example.com) 17780 success CLUSTER_REDIST_CONF 17792 success INSTANCE_REBOOT(instance1.example.com) More detailed information about a job can be found via the ``info`` command:: $ gnt-job info %17776% Job ID: 17776 Status: error Received: 2009-10-25 23:18:02.180569 Processing start: 2009-10-25 23:18:02.200335 (delta 0.019766s) Processing end: 2009-10-25 23:18:02.279743 (delta 0.079408s) Total processing time: 0.099174 seconds Opcodes: OP_CLUSTER_RENAME Status: error Processing start: 2009-10-25 23:18:02.200335 Processing end: 2009-10-25 23:18:02.252282 Input fields: name: cluster.example.com Result: OpPrereqError [Neither the name nor the IP address of the cluster has changed] Execution log: During the execution of a job, it's possible to follow the output of a job, similar to the log that one get from the ``gnt-`` commands, via the watch command:: $ gnt-instance add --submit … %instance1% JobID: 17818 $ gnt-job watch %17818% Output from job 17818 follows ----------------------------- Mon Oct 26 00:22:48 2009 - INFO: Selected nodes for instance instance1 via iallocator dumb: node1, node2 Mon Oct 26 00:22:49 2009 * creating instance disks... Mon Oct 26 00:22:52 2009 adding instance instance1 to cluster config Mon Oct 26 00:22:52 2009 - INFO: Waiting for instance instance1 to sync disks. … Mon Oct 26 00:23:03 2009 creating os for instance instance1 on node node1 Mon Oct 26 00:23:03 2009 * running the instance OS create scripts... Mon Oct 26 00:23:13 2009 * starting instance... $ This is useful if you need to follow a job's progress from multiple terminals. A job that has not yet started to run can be canceled:: $ gnt-job cancel %17810% But not one that has already started execution:: $ gnt-job cancel %17805% Job 17805 is no longer waiting in the queue There are two queues for jobs: the *current* and the *archive* queue. Jobs are initially submitted to the current queue, and they stay in that queue until they have finished execution (either successfully or not). At that point, they can be moved into the archive queue using e.g. ``gnt-job autoarchive all``. The ``ganeti-watcher`` script will do this automatically 6 hours after a job is finished. The ``ganeti-cleaner`` script will then remove archived the jobs from the archive directory after three weeks. Note that ``gnt-job list`` only shows jobs in the current queue. Archived jobs can be viewed using ``gnt-job info ``. Special Ganeti deployments -------------------------- Since Ganeti 2.4, it is possible to extend the Ganeti deployment with two custom scenarios: Ganeti inside Ganeti and multi-site model. Running Ganeti under Ganeti +++++++++++++++++++++++++++ It is sometimes useful to be able to use a Ganeti instance as a Ganeti node (part of another cluster, usually). One example scenario is two small clusters, where we want to have an additional master candidate that holds the cluster configuration and can be used for helping with the master voting process. However, these Ganeti instance should not host instances themselves, and should not be considered in the normal capacity planning, evacuation strategies, etc. In order to accomplish this, mark these nodes as non-``vm_capable``:: $ gnt-node modify --vm-capable=no %node3% The vm_capable status can be listed as usual via ``gnt-node list``:: $ gnt-node list -oname,vm_capable Node VMCapable node1 Y node2 Y node3 N When this flag is set, the cluster will not do any operations that relate to instances on such nodes, e.g. hypervisor operations, disk-related operations, etc. Basically they will just keep the ssconf files, and if master candidates the full configuration. Multi-site model ++++++++++++++++ If Ganeti is deployed in multi-site model, with each site being a node group (so that instances are not relocated across the WAN by mistake), it is conceivable that either the WAN latency is high or that some sites have a lower reliability than others. In this case, it doesn't make sense to replicate the job information across all sites (or even outside of a “central†node group), so it should be possible to restrict which nodes can become master candidates via the auto-promotion algorithm. Ganeti 2.4 introduces for this purpose a new ``master_capable`` flag, which (when unset) prevents nodes from being marked as master candidates, either manually or automatically. As usual, the node modify operation can change this flag:: $ gnt-node modify --auto-promote --master-capable=no %node3% Fri Jan 7 06:23:07 2011 - INFO: Demoting from master candidate Fri Jan 7 06:23:08 2011 - INFO: Promoted nodes to master candidate role: node4 Modified node node3 - master_capable -> False - master_candidate -> False And the node list operation will list this flag:: $ gnt-node list -oname,master_capable %node1% %node2% %node3% Node MasterCapable node1 Y node2 Y node3 N Note that marking a node both not ``vm_capable`` and not ``master_capable`` makes the node practically unusable from Ganeti's point of view. Hence these two flags should be used probably in contrast: some nodes will be only master candidates (master_capable but not vm_capable), and other nodes will only hold instances (vm_capable but not master_capable). Ganeti tools ------------ Beside the usual ``gnt-`` and ``ganeti-`` commands which are provided and installed in ``$prefix/sbin`` at install time, there are a couple of other tools installed which are used seldom but can be helpful in some cases. lvmstrap ++++++++ The ``lvmstrap`` tool, introduced in :ref:`configure-lvm-label` section, has two modes of operation: - ``diskinfo`` shows the discovered disks on the system and their status - ``create`` takes all not-in-use disks and creates a volume group out of them .. warning:: The ``create`` argument to this command causes data-loss! cfgupgrade ++++++++++ The ``cfgupgrade`` tools is used to upgrade between major (and minor) Ganeti versions, and to roll back. Point-releases are usually transparent for the admin. More information about the upgrade procedure is listed on the wiki at http://code.google.com/p/ganeti/wiki/UpgradeNotes. There is also a script designed to upgrade from Ganeti 1.2 to 2.0, called ``cfgupgrade12``. cfgshell ++++++++ .. note:: This command is not actively maintained; make sure you backup your configuration before using it This can be used as an alternative to direct editing of the main configuration file if Ganeti has a bug and prevents you, for example, from removing an instance or a node from the configuration file. .. _burnin-label: burnin ++++++ .. warning:: This command will erase existing instances if given as arguments! This tool is used to exercise either the hardware of machines or alternatively the Ganeti software. It is safe to run on an existing cluster **as long as you don't pass it existing instance names**. The command will, by default, execute a comprehensive set of operations against a list of instances, these being: - creation - disk replacement (for redundant instances) - failover and migration (for redundant instances) - move (for non-redundant instances) - disk growth - add disks, remove disk - add NICs, remove NICs - export and then import - rename - reboot - shutdown/startup - and finally removal of the test instances Executing all these operations will test that the hardware performs well: the creation, disk replace, disk add and disk growth will exercise the storage and network; the migrate command will test the memory of the systems. Depending on the passed options, it can also test that the instance OS definitions are executing properly the rename, import and export operations. sanitize-config +++++++++++++++ This tool takes the Ganeti configuration and outputs a "sanitized" version, by randomizing or clearing: - DRBD secrets and cluster public key (always) - host names (optional) - IPs (optional) - OS names (optional) - LV names (optional, only useful for very old clusters which still have instances whose LVs are based on the instance name) By default, all optional items are activated except the LV name randomization. When passing ``--no-randomization``, which disables the optional items (i.e. just the DRBD secrets and cluster public keys are randomized), the resulting file can be used as a safety copy of the cluster config - while not trivial, the layout of the cluster can be recreated from it and if the instance disks have not been lost it permits recovery from the loss of all master candidates. move-instance +++++++++++++ See :doc:`separate documentation for move-instance `. users-setup +++++++++++ Ganeti can either be run entirely as root, or with every daemon running as its own specific user (if the parameters ``--with-user-prefix`` and/or ``--with-group-prefix`` have been specified at ``./configure``-time). In case split users are activated, they are required to exist on the system, and they need to belong to the proper groups in order for the access permissions to files and programs to be correct. The ``users-setup`` tool, when run, takes care of setting up the proper users and groups. When invoked without parameters, the tool runs in interactive mode, showing the list of actions it will perform and asking for confirmation before proceeding. Providing the ``--yes-do-it`` parameter to the tool prevents the confirmation from being asked, and the users and groups will be created immediately. .. TODO: document cluster-merge tool Other Ganeti projects --------------------- Below is a list (which might not be up-to-date) of additional projects that can be useful in a Ganeti deployment. They can be downloaded from the project site (http://code.google.com/p/ganeti/) and the repositories are also on the project git site (http://git.ganeti.org). NBMA tools ++++++++++ The ``ganeti-nbma`` software is designed to allow instances to live on a separate, virtual network from the nodes, and in an environment where nodes are not guaranteed to be able to reach each other via multicasting or broadcasting. For more information see the README in the source archive. ganeti-htools +++++++++++++ Before Ganeti version 2.5, this was a standalone project; since that version it is integrated into the Ganeti codebase (see :doc:`install-quick` for instructions on how to enable it). If you run an older Ganeti version, you will have to download and build it separately. For more information and installation instructions, see the README file in the source archive. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-monitoring-agent.rst0000644000000000000000000007231012271422343021024 0ustar00rootroot00000000000000======================= Ganeti monitoring agent ======================= .. contents:: :depth: 4 This is a design document detailing the implementation of a Ganeti monitoring agent report system, that can be queried by a monitoring system to calculate health information for a Ganeti cluster. Current state and shortcomings ============================== There is currently no monitoring support in Ganeti. While we don't want to build something like Nagios or Pacemaker as part of Ganeti, it would be useful if such tools could easily extract information from a Ganeti machine in order to take actions (example actions include logging an outage for future reporting or alerting a person or system about it). Proposed changes ================ Each Ganeti node should export a status page that can be queried by a monitoring system. Such status page will be exported on a network port and will be encoded in JSON (simple text) over HTTP. The choice of JSON is obvious as we already depend on it in Ganeti and thus we don't need to add extra libraries to use it, as opposed to what would happen for XML or some other markup format. Location of agent report ------------------------ The report will be available from all nodes, and be concerned for all node-local resources. This allows more real-time information to be available, at the cost of querying all nodes. Information reported -------------------- The monitoring agent system will report on the following basic information: - Instance status - Instance disk status - Status of storage for instances - Ganeti daemons status, CPU usage, memory footprint - Hypervisor resources report (memory, CPU, network interfaces) - Node OS resources report (memory, CPU, network interfaces) - Node OS CPU load average report - Information from a plugin system Format of the report -------------------- The report of the will be in JSON format, and it will present an array of report objects. Each report object will be produced by a specific data collector. Each report object includes some mandatory fields, to be provided by all the data collectors: ``name`` The name of the data collector that produced this part of the report. It is supposed to be unique inside a report. ``version`` The version of the data collector that produces this part of the report. Built-in data collectors (as opposed to those implemented as plugins) should have "B" as the version number. ``format_version`` The format of what is represented in the "data" field for each data collector might change over time. Every time this happens, the format_version should be changed, so that who reads the report knows what format to expect, and how to correctly interpret it. ``timestamp`` The time when the reported data were gathered. It has to be expressed in nanoseconds since the unix epoch (0:00:00 January 01, 1970). If not enough precision is available (or needed) it can be padded with zeroes. If a report object needs multiple timestamps, it can add more and/or override this one inside its own "data" section. ``category`` A collector can belong to a given category of collectors (e.g.: storage collectors, daemon collector). This means that it will have to provide a minumum set of prescribed fields, as documented for each category. This field will contain the name of the category the collector belongs to, if any, or just the ``null`` value. ``kind`` Two kinds of collectors are possible: `Performance reporting collectors`_ and `Status reporting collectors`_. The respective paragraphs will describe them and the value of this field. ``data`` This field contains all the data generated by the specific data collector, in its own independently defined format. The monitoring agent could check this syntactically (according to the JSON specifications) but not semantically. Here follows a minimal example of a report:: [ { "name" : "TheCollectorIdentifier", "version" : "1.2", "format_version" : 1, "timestamp" : 1351607182000000000, "category" : null, "kind" : 0, "data" : { "plugin_specific_data" : "go_here" } }, { "name" : "AnotherDataCollector", "version" : "B", "format_version" : 7, "timestamp" : 1351609526123854000, "category" : "storage", "kind" : 1, "data" : { "status" : { "code" : 1, "message" : "Error on disk 2" }, "plugin_specific" : "data", "some_late_data" : { "timestamp" : 1351609526123942720, ... } } } ] Performance reporting collectors ++++++++++++++++++++++++++++++++ These collectors only provide data about some component of the system, without giving any interpretation over their meaning. The value of the ``kind`` field of the report will be ``0``. Status reporting collectors +++++++++++++++++++++++++++ These collectors will provide information about the status of some component of ganeti, or managed by ganeti. The value of their ``kind`` field will be ``1``. The rationale behind this kind of collectors is that there are some situations where exporting data about the underlying subsystems would expose potential issues. But if Ganeti itself is able (and going) to fix the problem, conflicts might arise between Ganeti and something/somebody else trying to fix the same problem. Also, some external monitoring systems might not be aware of the internals of a particular subsystem (e.g.: DRBD) and might only exploit the high level response of its data collector, alerting an administrator if anything is wrong. Still, completely hiding the underlying data is not a good idea, as they might still be of use in some cases. So status reporting plugins will provide two output modes: one just exporting a high level information about the status, and one also exporting all the data they gathered. The default output mode will be the status-only one. Through a command line parameter (for stand-alone data collectors) or through the HTTP request to the monitoring agent (when collectors are executed as part of it) the verbose output mode providing all the data can be selected. When exporting just the status each status reporting collector will provide, in its ``data`` section, at least the following field: ``status`` summarizes the status of the component being monitored and consists of two subfields: ``code`` It assumes a numeric value, encoded in such a way to allow using a bitset to easily distinguish which states are currently present in the whole cluster. If the bitwise OR of all the ``status`` fields is 0, the cluster is completely healty. The status codes are as follows: ``0`` The collector can determine that everything is working as intended. ``1`` Something is temporarily wrong but it is being automatically fixed by Ganeti. There is no need of external intervention. ``2`` The collector has failed to understand whether the status is good or bad. Further analysis is required. Interpret this status as a potentially dangerous situation. ``4`` The collector can determine that something is wrong and Ganeti has no way to fix it autonomously. External intervention is required. ``message`` A message to better explain the reason of the status. The exact format of the message string is data collector dependent. The field is mandatory, but the content can be an empty string if the ``code`` is ``0`` (working as intended) or ``1`` (being fixed automatically). If the status code is ``2``, the message should specify what has gone wrong. If the status code is ``4``, the message shoud explain why it was not possible to determine a proper status. The ``data`` section will also contain all the fields describing the gathered data, according to a collector-specific format. Instance status +++++++++++++++ At the moment each node knows which instances are running on it, which instances it is primary for, but not the cause why an instance might not be running. On the other hand we don't want to distribute full instance "admin" status information to all nodes, because of the performance impact this would have. As such we propose that: - Any operation that can affect instance status will have an optional "reason" attached to it (at opcode level). This can be used for example to distinguish an admin request, from a scheduled maintenance or an automated tool's work. If this reason is not passed, Ganeti will just use the information it has about the source of the request. This reason information will be structured according to the :doc:`Ganeti reason trail ` design document. - RPCs that affect the instance status will be changed so that the "reason" and the version of the config object they ran on is passed to them. They will then export the new expected instance status, together with the associated reason and object version to the status report system, which then will export those themselves. Monitoring and auditing systems can then use the reason to understand the cause of an instance status, and they can use the timestamp to understand the freshness of their data even in the absence of an atomic cross-node reporting: for example if they see an instance "up" on a node after seeing it running on a previous one, they can compare these values to understand which data is freshest, and repoll the "older" node. Of course if they keep seeing this status this represents an error (either an instance continuously "flapping" between nodes, or an instance is constantly up on more than one), which should be reported and acted upon. The instance status will be on each node, for the instances it is primary for, and its ``data`` section of the report will contain a list of instances, named ``instances``, with at least the following fields for each instance: ``name`` The name of the instance. ``uuid`` The UUID of the instance (stable on name change). ``admin_state`` The status of the instance (up/down/offline) as requested by the admin. ``actual_state`` The actual status of the instance. It can be ``up``, ``down``, or ``hung`` if the instance is up but it appears to be completely stuck. ``uptime`` The uptime of the instance (if it is up, "null" otherwise). ``mtime`` The timestamp of the last known change to the instance state. ``state_reason`` The last known reason for state change of the instance, described according to the JSON representation of a reason trail, as detailed in the :doc:`reason trail design document `. ``status`` It represents the status of the instance, and its format is the same as that of the ``status`` field of `Status reporting collectors`_. Each hypervisor should provide its own instance status data collector, possibly with the addition of more, specific, fields. The ``category`` field of all of them will be ``instance``. The ``kind`` field will be ``1``. Note that as soon as a node knows it's not the primary anymore for an instance it will stop reporting status for it: this means the instance will either disappear, if it has been deleted, or appear on another node, if it's been moved. The ``code`` of the ``status`` field of the report of the Instance status data collector will be: ``0`` if ``status`` is ``0`` for all the instances it is reporting about. ``1`` otherwise. Storage collectors ++++++++++++++++++ The storage collectors will be a series of data collectors that will gather data about storage for the current node. The collection will be performed at different granularity and abstraction levels, from the physical disks, to partitions, logical volumes and to the specific storage types used by Ganeti itself (drbd, rbd, plain, file). The ``name`` of each of these collector will reflect what storage type each of them refers to. The ``category`` field of these collector will be ``storage``. The ``kind`` field will depend on the specific collector. Each ``storage`` collector's ``data`` section will provide collector-specific fields. The various storage collectors will provide keys to join the data they provide, in order to allow the user to get a better understanding of the system. E.g.: through device names, or instance names. Diskstats collector ******************* This storage data collector will gather information about the status of the disks installed in the system, as listed in the /proc/diskstats file. This means that not only physical hard drives, but also ramdisks and loopback devices will be listed. Its ``kind`` in the report will be ``0`` (`Performance reporting collectors`_). Its ``category`` field in the report will contain the value ``storage``. When executed in verbose mode, the ``data`` section of the report of this collector will be a list of items, each representing one disk, each providing the following fields: ``major`` The major number of the device. ``minor`` The minor number of the device. ``name`` The name of the device. ``readsNum`` This is the total number of reads completed successfully. ``mergedReads`` Reads which are adjacent to each other may be merged for efficiency. Thus two 4K reads may become one 8K read before it is ultimately handed to the disk, and so it will be counted (and queued) as only one I/O. This field specifies how often this was done. ``secRead`` This is the total number of sectors read successfully. ``timeRead`` This is the total number of milliseconds spent by all reads. ``writes`` This is the total number of writes completed successfully. ``mergedWrites`` Writes which are adjacent to each other may be merged for efficiency. Thus two 4K writes may become one 8K read before it is ultimately handed to the disk, and so it will be counted (and queued) as only one I/O. This field specifies how often this was done. ``secWritten`` This is the total number of sectors written successfully. ``timeWrite`` This is the total number of milliseconds spent by all writes. ``ios`` The number of I/Os currently in progress. The only field that should go to zero, it is incremented as requests are given to appropriate struct request_queue and decremented as they finish. ``timeIO`` The number of milliseconds spent doing I/Os. This field increases so long as field ``IOs`` is nonzero. ``wIOmillis`` The weighted number of milliseconds spent doing I/Os. This field is incremented at each I/O start, I/O completion, I/O merge, or read of these stats by the number of I/Os in progress (field ``IOs``) times the number of milliseconds spent doing I/O since the last update of this field. This can provide an easy measure of both I/O completion time and the backlog that may be accumulating. Logical Volume collector ************************ This data collector will gather information about the attributes of logical volumes present in the system. Its ``kind`` in the report will be ``0`` (`Performance reporting collectors`_). Its ``category`` field in the report will contain the value ``storage``. The ``data`` section of the report of this collector will be a list of items, each representing one logical volume and providing the following fields: ``uuid`` The UUID of the logical volume. ``name`` The name of the logical volume. ``attr`` The attributes of the logical volume. ``major`` Persistent major number or -1 if not persistent. ``minor`` Persistent minor number or -1 if not persistent. ``kernel_major`` Currently assigned major number or -1 if LV is not active. ``kernel_minor`` Currently assigned minor number or -1 if LV is not active. ``size`` Size of LV in bytes. ``seg_count`` Number of segments in LV. ``tags`` Tags, if any. ``modules`` Kernel device-mapper modules required for this LV, if any. ``vg_uuid`` Unique identifier of the volume group. ``vg_name`` Name of the volume group. ``segtype`` Type of LV segment. ``seg_start`` Offset within the LVto the start of the segment in bytes. ``seg_start_pe`` Offset within the LV to the start of the segment in physical extents. ``seg_size`` Size of the segment in bytes. ``seg_tags`` Tags for the segment, if any. ``seg_pe_ranges`` Ranges of Physical Extents of underlying devices in lvs command line format. ``devices`` Underlying devices used with starting extent numbers. ``instance`` The name of the instance this LV is used by, or ``null`` if it was not possible to determine it. DRBD status *********** This data collector will run only on nodes where DRBD is actually present and it will gather information about DRBD devices. Its ``kind`` in the report will be ``1`` (`Status reporting collectors`_). Its ``category`` field in the report will contain the value ``storage``. When executed in verbose mode, the ``data`` section of the report of this collector will provide the following fields: ``versionInfo`` Information about the DRBD version number, given by a combination of any (but at least one) of the following fields: ``version`` The DRBD driver version. ``api`` The API version number. ``proto`` The protocol version. ``srcversion`` The version of the source files. ``gitHash`` Git hash of the source files. ``buildBy`` Who built the binary, and, optionally, when. ``device`` A list of structures, each describing a DRBD device (a minor) and containing the following fields: ``minor`` The device minor number. ``connectionState`` The state of the connection. If it is "Unconfigured", all the following fields are not present. ``localRole`` The role of the local resource. ``remoteRole`` The role of the remote resource. ``localState`` The status of the local disk. ``remoteState`` The status of the remote disk. ``replicationProtocol`` The replication protocol being used. ``ioFlags`` The input/output flags. ``perfIndicators`` The performance indicators. This field will contain the following sub-fields: ``networkSend`` KiB of data sent on the network. ``networkReceive`` KiB of data received from the network. ``diskWrite`` KiB of data written on local disk. ``diskRead`` KiB of date read from the local disk. ``activityLog`` Number of updates of the activity log. ``bitMap`` Number of updates to the bitmap area of the metadata. ``localCount`` Number of open requests to the local I/O subsystem. ``pending`` Number of requests sent to the partner but not yet answered. ``unacknowledged`` Number of requests received by the partner but still to be answered. ``applicationPending`` Num of block input/output requests forwarded to DRBD but that have not yet been answered. ``epochs`` (Optional) Number of epoch objects. Not provided by all DRBD versions. ``writeOrder`` (Optional) Currently used write ordering method. Not provided by all DRBD versions. ``outOfSync`` (Optional) KiB of storage currently out of sync. Not provided by all DRBD versions. ``syncStatus`` (Optional) The status of the synchronization of the disk. This is present only if the disk is being synchronized, and includes the following fields: ``percentage`` The percentage of synchronized data. ``progress`` How far the synchronization is. Written as "x/y", where x and y are integer numbers expressed in the measurement unit stated in ``progressUnit`` ``progressUnit`` The measurement unit for the progress indicator. ``timeToFinish`` The expected time before finishing the synchronization. ``speed`` The speed of the synchronization. ``want`` The desiderd speed of the synchronization. ``speedUnit`` The measurement unit of the ``speed`` and ``want`` values. Expressed as "size/time". ``instance`` The name of the Ganeti instance this disk is associated to. Ganeti daemons status +++++++++++++++++++++ Ganeti will report what information it has about its own daemons. This should allow identifying possible problems with the Ganeti system itself: for example memory leaks, crashes and high resource utilization should be evident by analyzing this information. The ``kind`` field will be ``1`` (`Status reporting collectors`_). Each daemon will have its own data collector, and each of them will have a ``category`` field valued ``daemon``. When executed in verbose mode, their data section will include at least: ``memory`` The amount of used memory. ``size_unit`` The measurement unit used for the memory. ``uptime`` The uptime of the daemon. ``CPU usage`` How much cpu the daemon is using (percentage). Any other daemon-specific information can be included as well in the ``data`` section. Hypervisor resources report +++++++++++++++++++++++++++ Each hypervisor has a view of system resources that sometimes is different than the one the OS sees (for example in Xen the Node OS, running as Dom0, has access to only part of those resources). In this section we'll report all information we can in a "non hypervisor specific" way. Each hypervisor can then add extra specific information that is not generic enough be abstracted. The ``kind`` field will be ``0`` (`Performance reporting collectors`_). Each of the hypervisor data collectory will be of ``category``: ``hypervisor``. Node OS resources report ++++++++++++++++++++++++ Since Ganeti assumes it's running on Linux, it's useful to export some basic information as seen by the host system. The ``category`` field of the report will be ``null``. The ``kind`` field will be ``0`` (`Performance reporting collectors`_). The ``data`` section will include: ``cpu_number`` The number of available cpus. ``cpus`` A list with one element per cpu, showing its average load. ``memory`` The current view of memory (free, used, cached, etc.) ``filesystem`` A list with one element per filesystem, showing a summary of the total/available space. ``NICs`` A list with one element per network interface, showing the amount of sent/received data, error rate, IP address of the interface, etc. ``versions`` A map using the name of a component Ganeti interacts (Linux, drbd, hypervisor, etc) as the key and its version number as the value. Note that we won't go into any hardware specific details (e.g. querying a node RAID is outside the scope of this, and can be implemented as a plugin) but we can easily just report the information above, since it's standard enough across all systems. Node OS CPU load average report +++++++++++++++++++++++++++++++ This data collector will export CPU load statistics as seen by the host system. Apart from using the data from an external monitoring system we can also use the data to improve instance allocation and/or the Ganeti cluster balance. To compute the CPU load average we will use a number of values collected inside a time window. The collection process will be done by an independent thread (see `Mode of Operation`_). This report is a subset of the previous report (`Node OS resources report`_) and they might eventually get merged, once reporting for the other fields (memory, filesystem, NICs) gets implemented too. Specifically: The ``category`` field of the report will be ``null``. The ``kind`` field will be ``0`` (`Performance reporting collectors`_). The ``data`` section will include: ``cpu_number`` The number of available cpus. ``cpus`` A list with one element per cpu, showing its average load. ``cpu_total`` The total CPU load average as a sum of the all separate cpus. The CPU load report function will get N values, collected by the CPU load collection function and calculate the above averages. Please see the section `Mode of Operation`_ for more information one how the two functions of the data collector interact. Format of the query ------------------- .. include:: monitoring-query-format.rst Instance disk status propagation -------------------------------- As for the instance status Ganeti has now only partial information about its instance disks: in particular each node is unaware of the disk to instance mapping, that exists only on the master. For this design doc we plan to fix this by changing all RPCs that create a backend storage or that put an already existing one in use and passing the relevant instance to the node. The node can then export these to the status reporting tool. While we haven't implemented these RPC changes yet, we'll use Confd to fetch this information in the data collectors. Plugin system ------------- The monitoring system will be equipped with a plugin system that can export specific local information through it. The plugin system is expected to be used by local installations to export any installation specific information that they want to be monitored, about either hardware or software on their systems. The plugin system will be in the form of either scripts or binaries whose output will be inserted in the report. Eventually support for other kinds of plugins might be added as well, such as plain text files which will be inserted into the report, or local unix or network sockets from which the information has to be read. This should allow most flexibility for implementing an efficient system, while being able to keep it as simple as possible. Data collectors --------------- In order to ease testing as well as to make it simple to reuse this subsystem it will be possible to run just the "data collectors" on each node without passing through the agent daemon. If a data collector is run independently, it should print on stdout its report, according to the format corresponding to a single data collector report object, as described in the previous paragraphs. Mode of operation ----------------- In order to be able to report information fast the monitoring agent daemon will keep an in-memory or on-disk cache of the status, which will be returned when queries are made. The status system will then periodically check resources to make sure the status is up to date. Different parts of the report will be queried at different speeds. These will depend on: - how often they vary (or we expect them to vary) - how fast they are to query - how important their freshness is Of course the last parameter is installation specific, and while we'll try to have defaults, it will be configurable. The first two instead we can use adaptively to query a certain resource faster or slower depending on those two parameters. When run as stand-alone binaries, the data collector will not using any caching system, and just fetch and return the data immediately. Since some performance collectors have to operate on a number of values collected in previous times, we need a mechanism independent of the data collector which will trigger the collection of those values and also store them, so that they are available for calculation by the data collectors. To collect data periodically, a thread will be created by the monitoring agent which will run the collection function of every data collector that provides one. The values returned by the collection function of the data collector will be saved in an appropriate map, associating each value to the corresponding collector, using the collector's name as the key of the map. This map will be stored in mond's memory. For example: the collection function of the CPU load collector will collect a CPU load value and save it in the map mentioned above. The collection function will be called by the collector thread every t milliseconds. When the report function of the collector is called, it will process the last N values of the map and calculate the corresponding average. Implementation place -------------------- The status daemon will be implemented as a standalone Haskell daemon. In the future it should be easy to merge multiple daemons into one with multiple entry points, should we find out it saves resources and doesn't impact functionality. The libekg library should be looked at for easily providing metrics in json format. Implementation order -------------------- We will implement the agent system in this order: - initial example data collectors (eg. for drbd and instance status). - initial daemon for exporting data, integrating the existing collectors - plugin system - RPC updates for instance status reasons and disk to instance mapping - cache layer for the daemon - more data collectors Future work =========== As a future step it can be useful to "centralize" all this reporting data on a single place. This for example can be just the master node, or all the master candidates. We will evaluate doing this after the first node-local version has been developed and tested. Another possible change is replacing the "read-only" RPCs with queries to the agent system, thus having only one way of collecting information from the nodes from a monitoring system and for Ganeti itself. One extra feature we may need is a way to query for only sub-parts of the report (eg. instances status only). This can be done by passing arguments to the HTTP GET, which will be defined when we get to this funtionality. Finally the :doc:`autorepair system design `. system (see its design) can be expanded to use the monitoring agent system as a source of information to decide which repairs it can perform. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/examples/0000755000000000000000000000000012271445544015365 5ustar00rootroot00000000000000ganeti-2.9.3/doc/examples/ganeti-kvm-poweroff.initd.in0000644000000000000000000000361712230001635022701 0ustar00rootroot00000000000000#!/bin/bash # ganeti kvm instance poweroff # based on skeleton from Debian GNU/Linux ### BEGIN INIT INFO # Provides: ganeti-kvm-poweroff # Required-Start: # Required-Stop: drbd qemu-kvm $local_fs # Default-Start: # Default-Stop: 0 1 6 # Short-Description: Poweroff Ganeti KVM instances # Description: Sends system_powerdown command to Ganeti instances, otherwise # they will be killed. ### END INIT INFO shopt -s nullglob PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin DESC="Ganeti KVM instance poweroff " . /lib/lsb/init-functions CONTROL_PATH="@LOCALSTATEDIR@/run/ganeti/kvm-hypervisor/ctrl" SCRIPTNAME="@SYSCONFDIR@/init.d/ganeti-kvm-poweroff" TIMEOUT=60 do_kvm_poweroff () { # shutdown VMs and remove sockets of those not running for vm_monitor in $CONTROL_PATH/*.monitor; do if ! echo system_powerdown | \ socat -U UNIX:$vm_monitor STDIO > /dev/null 2>&1; then # remove disconnected socket rm -f $vm_monitor fi done log_action_begin_msg "Waiting VMs to poweroff" waiting=true remaning=$TIMEOUT while $waiting && [ $remaning -ne 0 ]; do if [[ -z "$(find $CONTROL_PATH -name '*.monitor')" ]]; then break fi echo -n "." for vm_monitor in $CONTROL_PATH/*.monitor; do if ! echo | socat -U UNIX:$vm_monitor STDIO > /dev/null 2>&1; then rm -rf $vm_monitor fi done sleep 5 let remaining-=5 1 done if [[ -z "$(find $CONTROL_PATH -name '*.monitor')" ]]; then log_action_end_msg 0 else log_action_end_msg 1 "some VMs did not shutdown" fi } case "$1" in start) # No-op ;; restart|reload|force-reload) echo "Error: argument '$1' not supported" >&2 exit 3 ;; stop) do_kvm_poweroff ;; *) echo "Usage: $0 start|stop" >&2 exit 3 ;; esac ganeti-2.9.3/doc/examples/gnt-debug/0000755000000000000000000000000012271445544017241 5ustar00rootroot00000000000000ganeti-2.9.3/doc/examples/gnt-debug/README0000644000000000000000000000157712230001635020113 0ustar00rootroot00000000000000In order to submit arbitrary jobs to ganeti one can call gnt-debug submit-job passing a suitably formatted json file. A few examples of those files are included here. Using delay0.json and delay50.json in conjunction with submit-job for example allows one to submit rapidly many short delay job (using --job-repeat), repeating the sleep opcode any number of times (using --op-repeat), either all at the same time or one at a time (with --each). This can be used to check the performance of the job queue. Examples: # Run 40 jobs with 10 opcodes each: gnt-debug submit-job --op-repeat 10 --job-repeat 40 --timing-stats delay0.json # Run 40 jobs with 1 opcode each: gnt-debug submit-job --op-repeat 1 --job-repeat 40 --timing-stats delay0.json # Run 40 jobs with 10 opcodes each and submit one at a time: gnt-debug submit-job --op-repeat 10 --job-repeat 40 --timing-stats --each delay0.json ganeti-2.9.3/doc/examples/gnt-debug/delay50.json0000644000000000000000000000017012230001635021355 0ustar00rootroot00000000000000[ {"OP_ID": "OP_TEST_DELAY", "debug_level": 0, "dry_run": false, "duration": 0.05, "on_master": true, "on_nodes": []} ] ganeti-2.9.3/doc/examples/gnt-debug/delay0.json0000644000000000000000000000016712230001635021276 0ustar00rootroot00000000000000[ {"OP_ID": "OP_TEST_DELAY", "debug_level": 0, "dry_run": false, "duration": 0.0, "on_master": true, "on_nodes": []} ] ganeti-2.9.3/doc/examples/ganeti-master-role.ocf.in0000644000000000000000000000563012244641676022172 0ustar00rootroot00000000000000#!/bin/bash # ganeti master role OCF resource # See http://linux-ha.org/wiki/OCF_Resource_Agents set -e -u @SHELL_ENV_INIT@ PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin SCRIPTNAME="@LIBDIR@/ocf/resource.d/ganeti/ganeti-master-role" # Master candidates list file MCFILE="$DATA_DIR/ssconf_master_candidates" # We'll need the hostname in a few places, so we'll get it once, now. MYHOSTNAME=$(hostname --fqdn) is_master() { local -r master=$(gnt-cluster getmaster) [[ "$MYHOSTNAME" == "$master" ]] } is_candidate() { grep -Fx $MYHOSTNAME $MCFILE } start_action() { if is_master; then exit 0 elif is_candidate; then gnt-cluster master-failover || exit 1 # OCF_ERR_GENERIC else exit 5 # OCF_ERR_INSTALLED (vital component missing) fi } stop_action() { # We can't really "stop" being a master. # TODO: investigate whether a fake approach will do. exit 1 # OCF_ERR_GENERIC } recover_action() { if is_master; then gnt-cluster redist-conf || exit 1 # OCF_ERR_GENERIC elif is_candidate; then gnt-cluster master-failover || exit 1 # OCF_ERR_GENERIC else exit 5 # OCF_ERR_INSTALLED (vital component missing) fi } monitor_action() { # monitor should exit: # 7 if the resource is not running # 1 if it failed # 0 if it's running if is_master; then exit 0 elif is_candidate; then exit 7 # OCF_NOT_RUNNING else exit 5 # OCF_ERR_INSTALLED (vital component missing) fi } return_meta() { cat < 0.1 OCF script to manage the ganeti master role in a cluster. Can be used to failover the ganeti master between master candidate nodes. Manages the ganeti cluster master END exit 0 } case "$1" in # Mandatory OCF commands start) start_action ;; stop) stop_action ;; monitor) monitor_action ;; meta-data) return_meta ;; # Optional OCF commands recover) recover_action ;; reload) # The ganeti master role has no "configuration" that is reloadable on # the pacemaker side. We declare the operation anyway to make sure # pacemaker doesn't decide to stop and start the service needlessly. exit 0 ;; promote|demote|migrate_to|migrate_from|validate-all) # Not implemented (nor declared by meta-data) exit 3 # OCF_ERR_UNIMPLEMENTED ;; *) log_success_msg "Usage: $SCRIPTNAME {start|stop|monitor|meta-data|recover|reload}" exit 1 ;; esac exit 0 ganeti-2.9.3/doc/examples/rapi_testutils.py0000744000000000000000000000401612230001635020774 0ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2012 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. """Example for using L{ganeti.rapi.testutils}""" import logging from ganeti import rapi import ganeti.rapi.testutils def main(): # Disable log output logging.getLogger("").setLevel(logging.CRITICAL) cl = rapi.testutils.InputTestClient() print "Testing features ..." assert isinstance(cl.GetFeatures(), list) print "Testing node evacuation ..." result = cl.EvacuateNode("inst1.example.com", mode=rapi.client.NODE_EVAC_PRI) assert result is NotImplemented print "Testing listing instances ..." for bulk in [False, True]: result = cl.GetInstances(bulk=bulk) assert result is NotImplemented print "Testing renaming instance ..." result = cl.RenameInstance("inst1.example.com", "inst2.example.com") assert result is NotImplemented print "Testing renaming instance with error ..." try: # This test deliberately uses an invalid value for the boolean parameter # "ip_check" result = cl.RenameInstance("inst1.example.com", "inst2.example.com", ip_check=["non-boolean", "value"]) except rapi.testutils.VerificationError: # Verification failed as expected pass else: raise Exception("This test should have failed") print "Success!" if __name__ == "__main__": main() ganeti-2.9.3/doc/examples/ganeti.logrotate.in0000644000000000000000000000041112244641676021163 0ustar00rootroot00000000000000/var/log/ganeti/*.log { weekly missingok rotate 52 notifempty compress delaycompress create 640 root root sharedscripts postrotate @PKGLIBDIR@/daemon-util rotate-all-logs endscript } ganeti-2.9.3/doc/examples/ganeti.cron.in0000644000000000000000000000067412271422343020123 0ustar00rootroot00000000000000PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin # Restart failed instances (every 5 minutes) */5 * * * * root [ -x @SBINDIR@/ganeti-watcher ] && @SBINDIR@/ganeti-watcher # Clean job archive (at 01:45 AM) 45 1 * * * @GNTMASTERUSER@ [ -x @SBINDIR@/ganeti-cleaner ] && @SBINDIR@/ganeti-cleaner master # Clean job archive (at 02:45 AM) 45 2 * * * @GNTNODEDUSER@ [ -x @SBINDIR@/ganeti-cleaner ] && @SBINDIR@/ganeti-cleaner node ganeti-2.9.3/doc/examples/ganeti.default-debug0000644000000000000000000000016512271422343021260 0ustar00rootroot00000000000000# Default arguments for Ganeti daemons (debug mode) NODED_ARGS="-d" MASTERD_ARGS="-d" RAPI_ARGS="-d" CONFD_ARGS="-d" ganeti-2.9.3/doc/examples/ganeti.default0000644000000000000000000000014012271422343020165 0ustar00rootroot00000000000000# Default arguments for Ganeti daemons NODED_ARGS="" MASTERD_ARGS="" RAPI_ARGS="" CONFD_ARGS="" ganeti-2.9.3/doc/examples/ganeti-node-role.ocf.in0000644000000000000000000000757512244641676021636 0ustar00rootroot00000000000000#!/bin/bash # ganeti node role OCF resource # See http://linux-ha.org/wiki/OCF_Resource_Agents set -e -u @SHELL_ENV_INIT@ PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin SCRIPTNAME="@LIBDIR@/ocf/resource.d/ganeti/ganeti-node-role" # If this file exists don't act on notifications, thus allowing them to happen # during the service configuration. NORUNFILE="$DATA_DIR/ha_node_role_config" # Where to grep for tags TAGSFILE="$DATA_DIR/ssconf_cluster_tags" # If this tag is set we won't try to powercycle nodes POWERCYCLETAG="ocf:node-offline:use-powercycle" # If this tag is set will use IPMI to power off an offline node POWEROFFTAG="ocf:node-offline:use-poweroff" # We'll need the hostname in a few places, so we'll get it once, now. MYHOSTNAME=$(hostname --fqdn) is_master() { local -r master=$(gnt-cluster getmaster) [[ "$MYHOSTNAME" == "$master" ]] } start_action() { # If we're alive we consider ourselves a node, without starting anything. # TODO: improve on this exit 0 } stop_action() { # We can't "really" stop the service locally. # TODO: investigate whether a "fake" stop will work. exit 1 } recover_action() { # Nothing to recover, as long as we're alive. exit 0 } monitor_action() { # If we're alive we consider ourselves a working node. # TODO: improve on this exit 0 } offline_node() { local -r node=$1 grep -Fx $POWERCYCLETAG $TAGSFILE && gnt-node powercycle $node grep -Fx $POWEROFFTAG $TAGSFILE && gnt-node power off $node # TODO: do better than just --auto-promote # (or make sure auto-promote gets better in Ganeti) gnt-node modify -O yes --auto-promote $node } drain_node() { node=$1 # TODO: do better than just --auto-promote # (or make sure auto-promote gets better in Ganeti) gnt-node modify -D yes --auto-promote $node || return 1 } notify_action() { is_master || exit 0 [[ -f $NORUNFILE ]] && exit 0 # TODO: also implement the "start" operation for readding a node [[ $OCF_RESKEY_CRM_meta_notify_operation == "stop" ]] || exit 0 [[ $OCF_RESKEY_CRM_meta_notify_type == "post" ]] || exit 0 local -r target=$OCF_RESKEY_CRM_meta_notify_stop_uname local -r node=$(gnt-node list --no-headers -o name $target) # TODO: use drain_node when we can offline_node $node exit 0 } return_meta() { cat < 0.1 OCF script to manage the ganeti node role in a cluster. Can be used to online and offline nodes. Should be cloned on all nodes of the cluster, with notification enabled. Manages the ganeti cluster nodes END exit 0 } case "$1" in # Mandatory OCF commands start) start_action ;; stop) stop_action ;; monitor) monitor_action ;; meta-data) return_meta ;; # Optional OCF commands recover) recover_action ;; reload) # The ganeti node role has no "configuration" that is reloadable on # the pacemaker side. We declare the operation anyway to make sure # pacemaker doesn't decide to stop and start the service needlessly. exit 0 ;; notify) # Notification of a change to the ganeti node role notify_action exit 0 ;; promote|demote|migrate_to|migrate_from|validate-all) # Not implemented (nor declared by meta-data) exit 3 # OCF_ERR_UNIMPLEMENTED ;; *) log_success_msg "Usage: $SCRIPTNAME {start|stop|monitor|meta-data|recover|reload}" exit 1 ;; esac exit 0 ganeti-2.9.3/doc/examples/ganeti.initd.in0000644000000000000000000000554012230001635020256 0ustar00rootroot00000000000000#!/bin/sh # ganeti daemons init script # # chkconfig: 2345 99 01 # description: Ganeti Cluster Manager ### BEGIN INIT INFO # Provides: ganeti # Required-Start: $syslog $remote_fs # Required-Stop: $syslog $remote_fs # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 # Short-Description: Ganeti Cluster Manager # Description: Ganeti Cluster Manager ### END INIT INFO PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin DESC="Ganeti cluster" DAEMON_UTIL=@PKGLIBDIR@/daemon-util SCRIPTNAME="@SYSCONFDIR@/init.d/ganeti" test -f "$DAEMON_UTIL" || exit 0 if [ -r /lib/lsb/init-functions ]; then . /lib/lsb/init-functions elif [ -r /etc/rc.d/init.d/functions ]; then . /etc/rc.d/init.d/functions else echo "Unable to find init functions" exit 1 fi check_exitcode() { RC=$1 if errmsg=$($DAEMON_UTIL check-exitcode $RC) then log_action_end_msg 0 "$errmsg" else log_action_end_msg 1 "$errmsg" fi } start_action() { # called as start_action daemon-name local daemon="$1" log_action_begin_msg "$daemon" $DAEMON_UTIL start "$@" check_exitcode $? } stop_action() { # called as stop_action daemon-name local daemon="$1" log_action_begin_msg "$daemon" $DAEMON_UTIL stop "$@" check_exitcode $? } maybe_do() { requested="$1"; shift action="$1"; shift target="$1" if [ -z "$requested" -o "$requested" = "$target" ]; then $action "$@" fi } start_all() { if ! $DAEMON_UTIL check-config; then log_warning_msg "Incomplete configuration, will not run." exit 0 fi for i in $($DAEMON_UTIL list-start-daemons); do maybe_do "$1" start_action $i done } stop_all() { for i in $($DAEMON_UTIL list-stop-daemons); do maybe_do "$1" stop_action $i done } status_all() { local daemons="$1" status ret if [ -z "$daemons" ]; then daemons=$($DAEMON_UTIL list-start-daemons) fi status=0 for i in $daemons; do if status_of_proc $($DAEMON_UTIL daemon-executable $i) $i; then ret=0 else ret=$? # Use exit code from first failed call if [ "$status" -eq 0 ]; then status=$ret fi fi done exit $status } if [ -n "$2" ] && ! errmsg=$($DAEMON_UTIL is-daemon-name "$2" 2>&1); then log_failure_msg "$errmsg" exit 1 fi case "$1" in start) log_daemon_msg "Starting $DESC" "$2" start_all "$2" ;; stop) log_daemon_msg "Stopping $DESC" "$2" stop_all "$2" ;; restart|force-reload) log_daemon_msg "Restarting $DESC" "$2" stop_all "$2" start_all "$2" ;; status) status_all "$2" ;; *) log_success_msg "Usage: $SCRIPTNAME {start|stop|force-reload|restart}" exit 1 ;; esac exit 0 ganeti-2.9.3/doc/examples/gnt-config-backup.in0000644000000000000000000000422512230001635021176 0ustar00rootroot00000000000000#!/bin/bash # Copyright (C) 2009 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # This is an example ganeti script that should be run from cron on all # nodes; it will archive the ganeti configuration into a separate # directory tree via GIT, so that it is possible to restore the # history of cluster configuration changes if needed # The script requires the lockfile-progs package and the git software # Note that since Ganeti 2.0, config.data is the authoritative source # of configuration; as such, we don't need to backup the ssconf files, # and the other files (server.pem, rapi.pem, hmac.key, known_hosts, # etc.) do no hold critical data (they can be regenerated at will, as # long as they are synchronised). set -e LOCALSTATEDIR=@LOCALSTATEDIR@ SYSCONFDIR=@SYSCONFDIR@ GANETIDIR=${LOCALSTATEDIR}/lib/ganeti CONFIGDATA=${GANETIDIR}/config.data GNTBKDIR=${LOCALSTATEDIR}/lib/gnt-config-backup LOCKFILE=${LOCALSTATEDIR}/lock/gnt-config-backup # exit if no ganeti config file (no cluster configured, or not M/MC) test -f $CONFIGDATA || exit 0 # We use a simple lock method, since our script should be fast enough # (no network, not talking to ganeti-masterd) that we don't expect to # run over 5 minutes if the system is healthy lockfile-create "$LOCKFILE" || exit 1 trap 'lockfile-remove $LOCKFILE' EXIT test -d $GNTBKDIR || mkdir $GNTBKDIR cd $GNTBKDIR test -d .git || git init cp -f $CONFIGDATA config.data git add config.data git commit -q -m "Automatic commit by gnt-config-backup" touch last_run ganeti-2.9.3/doc/examples/hooks/0000755000000000000000000000000012271445544016510 5ustar00rootroot00000000000000ganeti-2.9.3/doc/examples/hooks/ethers0000744000000000000000000000646312230001635017717 0ustar00rootroot00000000000000#!/bin/bash # Copyright (C) 2009 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # This is an example ganeti hook that writes the instance mac addresses in the # node's /etc/ether file. It will pic up the first nic connected to the # TARGET_BRIDGE bridge, and write it down with the syntax "MAC INSTANCE_NAME". # The hook will also send a HUP signal the daemon whose PID is in # DAEMON_PID_FILE, so that it can load the new /etc/ethers file and use it. # This has been tested in conjunction with dnsmasq's dhcp implementation. # It will also remove any other occurrences for the same instance in the # aformentioned file. This hook supports the "instance-add", "instance-modify" # "instance-remove", and "instance-mirror-replace" ganeti post hook paths. To # install it add a symlink from those hooks' directories to where this file is # installed (with a mode which permits execution). # TARGET_BRIDGE: We'll only add the first nic which gets connected to this # bridge to /etc/ethers. TARGET_BRIDGE="br0" DAEMON_PID_FILES="/var/run/dnsmasq.pid /var/run/dnsmasq/dnsmasq.pid" # In order to handle concurrent execution of this lock, we use the $LOCKFILE. # LOCKFILE_CREATE and LOCKFILE_REMOVE are the path names for the lockfile-progs # programs which we use as helpers. LOCKFILE="/var/lock/ganeti_ethers" LOCKFILE_CREATE="/usr/bin/lockfile-create" LOCKFILE_REMOVE="/usr/bin/lockfile-remove" hooks_path=$GANETI_HOOKS_PATH [ -n "$hooks_path" ] || exit 1 instance=$GANETI_INSTANCE_NAME [ -n "$instance" ] || exit 1 nic_count=$GANETI_INSTANCE_NIC_COUNT acquire_lockfile() { $LOCKFILE_CREATE $LOCKFILE || exit 1 trap "$LOCKFILE_REMOVE $LOCKFILE" EXIT } update_ethers_from_new() { chmod 644 /etc/ethers.new mv /etc/ethers.new /etc/ethers for file in $DAEMON_PID_FILES; do [ -f "$file" ] && kill -HUP $(< $file) done } if [ "$hooks_path" = "instance-add" -o \ "$hooks_path" = "instance-modify" -o \ "$hooks_path" = "instance-mirror-replace" ] then for i in $(seq 0 $((nic_count - 1)) ); do bridge_var="GANETI_INSTANCE_NIC${i}_BRIDGE" bridge=${!bridge_var} if [ -n "$bridge" -a "$bridge" = "$TARGET_BRIDGE" ]; then mac_var="GANETI_INSTANCE_NIC${i}_MAC" mac=${!mac_var} acquire_lockfile cat /etc/ethers | awk -- "! /^([[:xdigit:]:]*)[[:blank:]]+$instance\>/; END {print \"$mac\t$instance\"}" > /etc/ethers.new update_ethers_from_new break fi done fi if [ "$hooks_path" = "instance-remove" -o \ \( "$hooks_path" = "instance-modify" -a "$nic_count" -eq 0 \) ]; then acquire_lockfile cat /etc/ethers | awk -- "! /^([[:xdigit:]:]*)[[:blank:]]+$instance\>/" \ > /etc/ethers.new update_ethers_from_new fi ganeti-2.9.3/doc/examples/hooks/ipsec.in0000644000000000000000000001662012230001635020130 0ustar00rootroot00000000000000#!/bin/bash # Copyright (C) 2009 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # This is an example ganeti hook that sets up an IPsec ESP link between all the # nodes of a cluster for a given list of protocols. # When run on cluster initialization it will create the shared key to be used # for all the links. When run on node add/removal it will reconfigure IPsec # on each node of the cluster. set -e LOCALSTATEDIR=@LOCALSTATEDIR@ SYSCONFDIR=@SYSCONFDIR@ GNTDATA=${LOCALSTATEDIR}/lib/ganeti LOCKFILE=${LOCALSTATEDIR}/lock/ganeti_ipsec CRYPTALGO=rijndael-cbc KEYPATH=${GNTDATA}/ipsec.key KEYSIZE=24 PROTOSTOSEC="icmp tcp" TCPTOIGNORE="22 1811" # On debian/ubuntu this file is automatically reloaded on boot SETKEYCONF=${SYSCONFDIR}/ipsec-tools.conf SETKEYCUSTOMCONF=${SYSCONFDIR}/ipsec-tools-custom.conf AUTOMATIC_MARKER="# Automatically generated rules" REGEN_KEY_WAIT=2 NODES=${GNTDATA}/ssconf_node_secondary_ips MASTERNAME_FILE=${GNTDATA}/ssconf_master_node MASTERIP_FILE=${GNTDATA}/ssconf_master_ip SSHOPTS="-q -oUserKnownHostsFile=/dev/null -oStrictHostKeyChecking=no \ -oGlobalKnownHostsFile=${GNTDATA}/known_hosts" SCPOPTS="-p $SSHOPTS" CLEANUP=( ) cleanup() { # Perform all registered cleanup operation local i for (( i=${#CLEANUP[@]}; i >= 0 ; --i )); do ${CLEANUP[$i]} done } acquire_lockfile() { # Acquire the lockfile associated with system ipsec configuration. lockfile-create "$LOCKFILE" || exit 1 CLEANUP+=("lockfile-remove $LOCKFILE") } update_system_ipsec() { # Update system ipsec configuration. # $1 : temporary location of a working configuration local TMPCONF="$1" acquire_lockfile mv "$TMPCONF" "$SETKEYCONF" setkey -f "$SETKEYCONF" } update_keyfile() { # Obtain the IPsec keyfile from the master. local MASTERIP=$(< "$MASTERIP_FILE") scp $SCPOPTS "$MASTERIP":"$KEYPATH" "$KEYPATH" } gather_key() { # Output IPsec key, if no key is present on the node # obtain it from master. if [[ ! -f "$KEYPATH" ]]; then update_keyfile fi cut -d ' ' -f2 "$KEYPATH" } gather_key_seqno() { # Output IPsec key sequence number, if no key is present # on the node exit with error. if [[ ! -f "$KEYPATH" ]]; then echo 'Cannot obtain key timestamp, no key file.' >&2 exit 1 fi cut -d ' ' -f1 "$KEYPATH" } update_ipsec_conf() { # Generate a new IPsec configuration and update the system. local TMPCONF=$(mktemp) CLEANUP+=("rm -f $TMPCONF") ESCAPED_HOSTNAME=$(sed 's/\./\\./g' <<< "$HOSTNAME") local MYADDR=$(grep -E "^$ESCAPED_HOSTNAME\\>" "$NODES" | cut -d ' ' -f2) local KEY=$(gather_key) local SETKEYPATH=$(which setkey) { echo "#!$SETKEYPATH -f" echo echo "# Configuration for $MYADDR" echo echo '# This file has been automatically generated. Do not modify by hand,' echo "# add your own rules to $SETKEYCUSTOMCONF instead." echo echo '# Flush SAD and SPD' echo 'flush;' echo 'spdflush;' echo if [[ -f "$SETKEYCUSTOMCONF" ]]; then echo "# Begin custom rules from $SETKEYCUSTOMCONF" cat "$SETKEYCUSTOMCONF" echo "# End custom rules from $SETKEYCUSTOMCONF" echo fi echo "$AUTOMATIC_MARKER" for node in $(cut -d ' ' -f2 "$NODES") ; do if [[ "$node" != "$MYADDR" ]]; then # Traffic to ignore for port in $TCPTOIGNORE ; do echo "spdadd $MYADDR[$port] $node tcp -P out none;" echo "spdadd $node $MYADDR[$port] tcp -P in none;" echo "spdadd $MYADDR $node[$port] tcp -P out none;" echo "spdadd $node[$port] $MYADDR tcp -P in none;" done # IPsec ESP rules echo "add $MYADDR $node esp 0x201 -E $CRYPTALGO $KEY;" echo "add $node $MYADDR esp 0x201 -E $CRYPTALGO $KEY;" for proto in $PROTOSTOSEC ; do echo "spdadd $MYADDR $node $proto -P out ipsec esp/transport//require;" echo "spdadd $node $MYADDR $proto -P in ipsec esp/transport//require;" done echo fi done } > "$TMPCONF" chmod 400 "$TMPCONF" update_system_ipsec "$TMPCONF" } regen_ipsec_conf() { # Reconfigure IPsec on the system when a new key is generated # on the master (assuming the current configuration is working # and a new key is about to be generated on the master). if [[ ! -f "$KEYPATH" ]]; then echo 'Asking to regenerate with new key, but no old key.' >&2 exit 1 fi local CURSEQNO=$(gather_key_seqno) update_keyfile local NEWSEQNO=$(gather_key_seqno) while [[ $NEWSEQNO -le $CURSEQNO ]]; do # Master did not update yet, wait.. sleep $REGEN_KEY_WAIT update_keyfile NEWSEQNO=$(gather_key_seqno) done update_ipsec_conf } clean_ipsec_conf() { # Unconfigure IPsec on the system, removing the key and # the rules previously generated. rm -f "$KEYPATH" local TMPCONF=$(mktemp) CLEANUP+=("rm -f $TMPCONF") # Remove all auto-generated rules sed "/$AUTOMATIC_MARKER/q" "$SETKEYCONF" > "$TMPCONF" chmod 400 "$TMPCONF" update_system_ipsec "$TMPCONF" } generate_secret() { # Generate a random HEX string (length specified by global variable KEYSIZE) python -c "from ganeti import utils; print utils.GenerateSecret($KEYSIZE)" } gen_key() { # Generate a new random key to be used for IPsec, the key is associated with # a sequence number. local KEY=$(generate_secret) if [[ ! -f "$KEYPATH" ]]; then # New environment/cluster, let's start from scratch local SEQNO="0" else local SEQNO=$(( $(gather_key_seqno) + 1 )) fi local TMPKEYPATH=$(mktemp) CLEANUP+=("rm -f $TMPKEYPATH") echo -n "$SEQNO 0x$KEY" > "$TMPKEYPATH" chmod 400 "$TMPKEYPATH" mv "$TMPKEYPATH" "$KEYPATH" } trap cleanup EXIT hooks_path="$GANETI_HOOKS_PATH" if [[ ! -n "$hooks_path" ]]; then echo '\$GANETI_HOOKS_PATH not specified.' >&2 exit 1 fi hooks_phase="$GANETI_HOOKS_PHASE" if [[ ! -n "$hooks_phase" ]]; then echo '\$GANETI_HOOKS_PHASE not specified.' >&2 exit 1 fi if [[ "$hooks_phase" = post ]]; then case "$hooks_path" in cluster-init) gen_key ;; cluster-destroy) clean_ipsec_conf ;; cluster-regenkey) # This hook path is not yet implemented in Ganeti, here we suppose it # runs on all the nodes. MASTERNAME=$(< "$MASTERNAME_FILE") if [[ "$MASTERNAME" = "$HOSTNAME" ]]; then gen_key update_ipsec_conf else regen_ipsec_conf fi ;; node-add) update_ipsec_conf ;; node-remove) node_name="$GANETI_NODE_NAME" if [[ ! -n "$node_name" ]]; then echo '\$GANETI_NODE_NAME not specified.' >&2 exit 1 fi if [[ "$node_name" = "$HOSTNAME" ]]; then clean_ipsec_conf else update_ipsec_conf fi ;; *) echo "Hooks path $hooks_path is not for us." >&2 ;; esac else echo "Hooks phase $hooks_phase is not for us." >&2 fi ganeti-2.9.3/doc/design-ovf-support.rst0000644000000000000000000004501412230001635020040 0ustar00rootroot00000000000000============================================================== Ganeti Instance Import/Export using Open Virtualization Format ============================================================== Background ========== Open Virtualization Format is an open standard for packaging information regarding virtual machines. It is used, among other, by VMWare, VirtualBox and XenServer. OVF allows users to migrate between virtualization software without the need of reconfiguring hardware, network or operating system. Currently, exporting instance in Ganeti results with a configuration file that is readable only for Ganeti. It disallows the users to change the platform they use without loosing all the machine's configuration. Import function in Ganeti is also currently limited to the previously prepared instances. Implementation of OVF support allows users to migrate to Ganeti from other platforms, thus potentially increasing the usage. It also enables virtual machine end-users to create their own machines (e.g. in VirtualBox or SUSE Studio) and then add them to Ganeti cluster, thus providing better personalization. Overview ======== Open Virtualization Format description -------------------------------------- According to the DMTF document introducing the standard: "The Open Virtualization Format (OVF) Specification describes an open, secure, portable, efficient and extensible format for the packaging and distribution of software to be run in virtual machines." OVF supports both single and multiple- configurations of VMs in one package, is host- and virtualization platform-independent and optimized for distribution (e.g. by allowing usage of public key infrastructure and providing tools for management of basic software licensing). There are no limitations regarding disk images used, as long as the description is provided. Any hardware described in a proper format (i.e. CIM - Common Information Model) is accepted, although there is no guarantee that every virtualization software will support all types of hardware. OVF package should contain exactly one file with ``.ovf`` extension, which is an XML file specifying the following (per virtual machine): - virtual disks - network description - list of virtual hardware - operating system, if any Each of the elements in ``.ovf`` file may, if desired, contain a human-readable description to every piece of information given. Additionally, the package may have some disk image files and other additional resources (e.g. ISO images). In order to provide secure means of distribution for OVF packages, the manifest and certificate are provided. Manifest (``.mf`` file) contains checksums for all the files in OVF package, whereas certificate (``.cert`` file) contains X.509 certificate and a checksum of manifest file. Both files are not compulsory, but certificate requires manifest to be present. Supported disk formats ---------------------- Although OVF is claimed to support 'any disk format', what we are interested in is which formats are supported by VM managers that currently use OVF. - VMWare: ``.vmdk`` (which comes in at least 3 different flavours: ``sparse``, ``compressed`` and ``streamOptimized``) - VirtualBox: ``.vdi`` (VirtualBox's format), ``.vmdk``, ``.vhd`` (Microsoft and XenServer); export disk format is always ``.vmdk`` - XenServer: ``.vmdk``, ``.vhd``; export disk format is always ``.vhd`` - Red Hat Enterprise Virtualization: ``.raw`` (raw disk format), ``.cow`` (qemu's ``QCOW2``) - other: AbiCloud, OpenNode Cloud, SUSE Studio, Morfeo Claudia, OpenStack: mostly ``.vmdk`` In our implementation of the OVF we allow a choice between raw, cow and vmdk disk formats for both import and export. Other formats covertable using ``qemu-img`` are allowed in import mode, but not tested. The justification is the following: - Raw format is supported as it is the main format of disk images used in Ganeti, thus it is effortless to provide support for this format - Cow is used in Qemu - Vmdk is most commonly supported in virtualization software, it also has the advantage of producing relatively small disk images, which is extremely important advantage when moving instances. Import and export - the closer look =================================== This section contains an overview of how different parts of Ganeti's export info are included in ``.ovf`` configuration file. It also explains how import is designed to work with incomplete information. Ganeti's backup format vs OVF ----------------------------- .. highlight:: xml The basic structure of Ganeti ``.ovf`` file is the following:: .. note :: Tags with ``gnt:`` prefix are Ganeti-specific and are not a part of OVF standard. .. highlight:: text Whereas Ganeti's export info is of the following form, ``=>`` showing where will the data be in OVF format:: [instance] disk0_dump = filename => File in References disk0_ivname = name => generated automatically disk0_size = size_in_mb => calculated after disk conversion disk_count = number => generated automatically disk_template = disk_type => gnt:DiskTemplate hypervisor = hyp-name => gnt:Name in gnt:Hypervisor name = inst-name => Name in VirtualSystem nic0_ip = ip => gnt:IPAddress in gnt:Network nic0_link = link => gnt:Link in gnt:Network nic0_mac = mac => gnt:MACAddress in gnt:Network or Item in VirtualHardwareSection nic0_mode = mode => gnt:Mode in gnt:Network nic_count = number => generated automatically tags => gnt:Tags [backend] auto_balanced => gnt:AutoBalance memory = mem_in_mb => Item in VirtualHardwareSection vcpus = number => Item in VirtualHardwareSection [export] compression => ignored os => gnt:Name in gnt:OperatingSystem source => ignored timestamp => ignored version => gnt:VersionId or constants.EXPORT_VERSION [os] => gnt:Parameters in gnt:OperatingSystem [hypervisor] => gnt:Parameters in gnt:Hypervisor In case of multiple networks/disks used by an instance, they will all be saved in appropriate sections as specified above for the first network/disk. Import from other virtualization software ----------------------------------------- In case of importing to Ganeti OVF package generated in other software, e.g. VirtualBox, some fields required for Ganeti to properly handle import may be missing. Most often it will happen that such OVF package will lack the ``gnt:GanetiSection``. If this happens you can specify all the missing parameters in the command line. Please refer to `Command Line`_ section. In the :doc:`ovfconverter` we provide examples of options when converting from VirtualBox, VMWare and OpenSuseStudio. Export to other virtualization software --------------------------------------- When exporting to other virtualization software, you may notice that there is a section ``gnt:GanetiSection``, containing Ganeti-specific information. This may on **rare** cases cause trouble in importing your instance. If that is the case please do one of the two: 1. Export from Ganeti to OVF with ``--external`` option - this will cause to skip the non-standard information. 2. Manually remove the gnt:GanetiSection from the ``.ovf`` file. You will also have to recompute sha1 sum (``sha1sum`` command) of the .ovf file and update your ``.mf`` file with new value. .. note:: Manual change option is only recommended when you have exported your instance with ``-format`` option other that ``raw`` or selected ``--compress``. It saves you the time of converting or compressing the disk image. Planned limitations =================== The limitations regarding import of the OVF instances generated outside Ganeti will be (in general) the same, as limitations for Ganeti itself. The desired behavior in case of encountering unsupported element will be to ignore this element's tag without interruption of the import process. Package ------- There are no limitations regarding support for multiple files in package or packing the OVF package into one OVA (Open Virtual Appliance) file. As for certificates and licenses in the package, their support will be under discussion after completion of the basic features implementation. Multiple Virtual Systems ------------------------ At first only singular instances (i.e. VirtualSystem, not VirtualSystemCollection) will be supported. In the future multi-tiered appliances containing whole nodes (or even clusters) are considered an option. Disks ----- As mentioned, Ganeti will allow export in ``raw``, ``cow`` and ``vmdk`` formats. This means i.e. that the appropriate ``ovf:format`` will be provided. As for import, we will support all formats that ``qemu-img`` can convert to ``raw``. At this point this means ``raw``, ``cow``, ``qcow``, ``qcow2``, ``vmdk`` and ``cloop``. We do not plan for now to support ``vdi`` or ``vhd`` unless they become part of qemu-img supported formats. We plan to support compression both for import and export - in gzip format. There is also a possibility to provide virtual disk in chunks of equal size. The latter will not be implemented in the first version, but we do plan to support it eventually. The ``ovf:format`` tag is not used in our case when importing. Instead we use ``qemu-img info``, which provides enough information for our purposes and is better standardized. Please note, that due to security reasons we require the disk image to be in the same directory as the ``.ovf`` description file for both import and export. In order to completely ignore disk-related information in resulting config file, please use ``--disk-template=diskless`` option. Network ------- Ganeti provides support for routed and bridged mode for the networks. Since the standard OVF format does not contain any information regarding used network type, we add our own source of such information in ``gnt:GanetiSection``. In case this additional information is not present, we perform a simple check - if network name specified in ``NetworkSection`` contains words ``bridged`` or ``routed``, we consider this to be the network type. Otherwise option ``auto`` is chosen, in which case the cluster's default value for that field will be used when importing. This provides a safe fallback in case of NAT networks usage, which are commonly used e.g. in VirtualBox. Hardware -------- The supported hardware is limited to virtual CPUs, RAM memory, disks and networks. In particular, no USB support is currently provided, as Ganeti does not support them. Operating Systems ----------------- Support for different operating systems depends solely on their accessibility for Ganeti instances. List of installed OSes can be checked using ``gnt-os list`` command. References ---------- Files listed in ``ovf:References`` section cannot be hyperlinks. Other ----- The instance name (``gnt:VirtualSystem\gnt:Name`` or command line's ``--name`` option ) has to be resolvable in order for successful import using ``gnt-backup import``. _`Command Line` =============== The basic usage of the ovf tool is one of the following:: ovfconverter import filename ovfconverter export --format= filename This will result in a conversion based solely on the content of provided file. In case some information required to make the conversion is missing, an error will occur. If output directory should be different than the standard Ganeti export directory (usually ``/srv/ganeti/export``), option ``--output-dir`` can be used. If name of resulting entity should be different than the one read from the file, use ``--name`` option. Import options -------------- Import options that ``ovfconverter`` supports include options for backend, disks, hypervisor, networks and operating system. If an option is given, it overrides the values provided in the OVF file. Backend ^^^^^^^ ``--backend=option=value`` can be used to set auto balance, number of vcpus and amount of RAM memory. Please note that when you do not provide full set of options, the omitted ones will be set to cluster defaults (``auto``). Disks ^^^^^ ``--disk-template=diskless`` causes the converter to ignore all other disk option - both from .ovf file and the command line. Other disk template options include ``plain``, ``drdb``, ``file``, ``sharedfile`` and ``blockdev``. ``--disk=number:size=value`` causes to create disks instead of converting them from OVF package; numbers should start with ``0`` and be consecutive. Hypervisor ^^^^^^^^^^ ``-H hypervisor_name`` and ``-H hypervisor_name:option=value`` provide options for hypervisor. Network ^^^^^^^ ``--no-nics`` option causes converter to ignore any network information provided. ``--network=number:option=value`` sets network information according to provided data, ignoring the OVF package configuration. Operating System ^^^^^^^^^^^^^^^^ ``--os-type=type`` sets os type accordingly, this option is **required** when importing from OVF instance not created from Ganeti config file. ``--os-parameters`` provides options for chosen operating system. Tags ^^^^ ``--tags=tag1,tag2,tag3`` is a means of providing tags specific for the instance. After the conversion is completed, you may use ``gnt-backup import`` to import the instance into Ganeti. Example:: ovfconverter import file.ovf --disk-template=diskless \ --os-type=lenny-image \ --backend=vcpus=1,memory=512,auto_balance \ -H:xen-pvm \ --net=0:mode=bridged,link=xen-br0 \ --name=xen.i1 [...] gnt-backup import xen.i1 [...] gnt-instance list Export options -------------- Export options include choice of disk formats to convert the disk image (``--format``) and compression of the disk into gzip format (``--compress``). User has also the choice of allowing to skip the Ganeti-specific part of the OVF document (``--external``). By default, exported OVF package will not be contained in the OVA package, but this may be changed by adding ``--ova`` option. Please note that in order to create an OVF package, it is first required that you export your VM using ``gnt-backup export``. Example:: gnt-backup export -n node1.xen xen.i1 [...] ovfconverter export --format=vmdk --ova --external \ --output-dir=~/xen.i1 \ /srv/ganeti/export/xen.i1.node1.xen/config.ini Implementation details ====================== Disk conversion --------------- Disk conversion for both import and export is done using external tool called ``qemu-img``. The same tool is used to determine the type of disk, as well as its virtual size. Import ------ Import functionality is implemented using two classes - OVFReader and OVFImporter. OVFReader class is used to read the contents of the ``.ovf`` file. Every action that requires ``.ovf`` file access is done through that class. It also performs validation of manifest, if one is present. The result of reading some part of file is typically a dictionary or a string, containing options which correspond to the ones in ``config.ini`` file. Only in case of disks, the resulting value is different - it is then a list of disk names. The reason for that is the need for conversion. OVFImporter class performs all the command-line-like tasks, such as unpacking OVA package, removing temporary directory, converting disk file to raw format or saving the configuration file on disk. It also contains a set of functions that read the options provided in the command line. Typical workflow for the import is very simple: - read the ``.ovf`` file into memory - verify manifest - parse each element of the configuration file: name, disk template, hypervisor, operating system, backend parameters, network and disks - check if option for the element can be read from command line options - if yes: parse options from command line - otherwise: read the appropriate portion of ``.ovf`` file - save gathered information in ``config.ini`` file Export ------ Similar to import, export functionality also uses two classes - OVFWriter and OVFExporter. OVFWriter class produces XML output based on the information given. Its sole role is to separate the creation of ``.ovf`` file content. OVFExporter class gathers information from ``config.ini`` file or command line and performs necessary operations like disk conversion, disk compression, manifest creation and OVA package creation. Typical workflow for the export is even simpler, than for the import: - read the ``config.ini`` file into memory - gather information about certain parts of the instance, convert and compress disks if desired - save each of these elements as a fragment of XML tree - save the XML tree as ``.ovf`` file - create manifest file and fill it with appropriate checksums - if ``--ova`` option was chosen, pack the results into ``.ova`` tarfile Work in progress ---------------- - conversion to/from raw disk should be quicker - add graphic card memory to export information (12 MB of memory) - space requirements for conversion + compression + ova are currently enormous - add support for disks in chunks - add support for certificates - investigate why VMWare's ovftool does not work with ovfconverter's compression and ova packaging -- maybe noteworty: if OVA archive does not have a disk (i.e. in OVA package there is only .ovf ad .mf file), then the ovftool works - investigate why new versions of VirtualBox have problems with OVF created by ovfconverter (everything works fine with 3.16 version, but not with 4.0) .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-2.6.rst0000644000000000000000000000041412230001635016034 0ustar00rootroot00000000000000================= Ganeti 2.6 design ================= The following design documents have been implemented in Ganeti 2.6: - :doc:`design-cpu-pinning` - :doc:`design-ovf-support` .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/devnotes.rst0000644000000000000000000002011312271422343016115 0ustar00rootroot00000000000000Developer notes =============== .. highlight:: shell-example Build dependencies ------------------ Most dependencies from :doc:`install-quick`, including ``qemu-img`` (marked there as optional) plus (for Python): - `GNU make `_ - `GNU tar `_ - `Gzip `_ - `pandoc `_ - `python-epydoc `_ - `python-sphinx `_ (tested with version 1.1.3) - `python-mock `_ - `graphviz `_ - the `en_US.UTF-8` locale must be enabled on the system - `pylint `_ and its associated dependencies - `pep8 `_ - `PyYAML `_ For older developement (Ganeti < 2.4) ``docbook`` was used instead of ``pandoc``. Note that for pylint, at the current moment the following versions must be used:: $ pylint --version pylint 0.26.0, astng 0.24.1, common 0.58.3 The same with pep8, other versions may give you errors:: $ pep8 --version 1.3.3 Both these versions are the ones shipped with Ubuntu 13.04. To generate unittest coverage reports (``make coverage``), `coverage `_ needs to be installed. Installation of all dependencies listed here:: $ apt-get install python-setuptools automake git fakeroot $ apt-get install pandoc python-epydoc graphviz $ apt-get install python-yaml python-mock $ cd / && sudo easy_install \ sphinx \ logilab-astng==0.24.1 \ logilab-common==0.58.3 \ pylint==0.26.0 \ pep8==1.3.3 \ coverage For Haskell development, again all things from the quick install document, plus: - `haddock `_, documentation generator (equivalent to epydoc for Python) - `HsColour `_, again used for documentation (it's source-code pretty-printing) - `hlint `_, a source code linter (equivalent to pylint for Python), recommended version 1.8 or above (tested with 1.8.43) - the `QuickCheck `_ library, version 2.x - the `HUnit `_ library (tested with 1.2.x) - the `test-framework `_ libraries, tested versions: ``test-framework``: 0.6, ``test-framework-hunit``: 0.2.7, ``test-framework-quickcheck2``: 0.2.12.1 - ``hpc``, which comes with the compiler, so you should already have it - `shelltestrunner `_, used for running shell-based unit-tests - `temporary `_ library, tested with version 1.1.2.3 Under Debian Wheezy or later, these can be installed (on top of the required ones from the quick install document) via:: $ apt-get install libghc-quickcheck2-dev libghc-hunit-dev \ libghc-test-framework-dev \ libghc-test-framework-quickcheck2-dev \ libghc-test-framework-hunit-dev \ libghc-temporary-dev \ hscolour hlint Or alternatively via ``cabal``:: $ cabal install QuickCheck HUnit \ test-framework test-framework-quickcheck2 test-framework-hunit \ temporary hscolour hlint shelltestrunner Configuring for development --------------------------- Run the following command (only use ``PYTHON=...`` if you need to use a different python version):: $ ./autogen.sh && \ ./configure --prefix=/usr/local --sysconfdir=/etc --localstatedir=/var Note that doing development on a machine which already has Ganeti installed is problematic, as ``PYTHONPATH`` behaviour can be confusing (see Issue 170 for a bit of history/details; in general it works if the installed and developed versions are very similar, and/or if PYTHONPATH is customised correctly). As such, in general it's recommended to use a "clean" machine for ganeti development. Haskell development notes ------------------------- There are a few things which can help writing or debugging the Haskell code. You can run the Haskell linter :command:`hlint` via:: $ make hlint This is not enabled by default (as the htools component is optional). The above command will generate both output on the terminal and, if any warnings are found, also an HTML report at ``doc/hs-lint.html``. When writing or debugging TemplateHaskell code, it's useful to see what the splices are converted to. This can be done via:: $ make HEXTRA="-ddump-splices" Or, more interactively:: $ ghci λ> :set -ddump-splices λ> :l src/Ganeti/Objects.hs And you will get the spliced code as the module is loaded. To build profiling code you must install the ``ghc-prof`` (or ``gch6-prof``) package, and all the relevant libraries with their ``-prof`` counterparts. If installing libraries through cabal the config file should include ``library-profiling: True`` or the ``-p`` flag should be used. Any library already installed can be updated by passing ``--reinstall`` as well. Due to the way TemplateHaskell works, it's not straightforward to build profiling code. The recommended way is to run ``make hs-prof``, or alternatively the manual sequence is:: $ make clean $ make src/htools HEXTRA="-osuf .o" $ rm src/htools $ make src/htools HEXTRA="-osuf .prof_o -prof -auto-all" This will build the binary twice, per the TemplateHaskell documentation, the second one with profiling enabled. The binary files generated by compilation and the profiling/coverage files can "break" tab-completion in the sources; they can be ignored, for example, in bash via ``.bashrc``:: FIGNORE='.o:.hi:.prof_o:.tix' or in emacs via ``completion-ignored-extensions`` (run ``M-x customize-var completion-ignored-extensions``). Running individual tests ~~~~~~~~~~~~~~~~~~~~~~~~ When developing code, running the entire test suite can be slow. Running individual tests is possible. There are different Makefile targets for running individual Python and Haskell tests. For Python tests:: $ export PYTHONPATH=$PWD $ python ./test/py/ganeti.%mytest% For Haskell tests:: $ make hs-test-%pattern% Where ``pattern`` can be a simple test pattern (e.g. ``comma``, matching any test whose name contains ``comma``), a test pattern denoting a group (ending with a slash, e.g. ``Utils/``), or more complex glob pattern. For more details, search for glob patterns in the documentation of `test-framework `_). For individual Haskell shelltests:: $ make hs-shell-%name% which runs the test ``test/hs/shelltests/htools-%name%.test``. For example, to run the test ``test/hs/shelltests/htools-balancing.test``, use:: $ make hs-shell-balancing For combined Haskell shelltests:: $ make hs-shell-{%name1%,%name2%,...} for example:: $ make hs-shell-{balancing,basic} Packaging notes =============== Ganeti is mostly developed and tested on `Debian `_-based distributions, while still keeping adaptability to other Linux distributions in mind. The ``doc/examples/`` directory contains a number of potentially useful scripts and configuration files. Some of them might need adjustment before use. ``daemon-util`` --------------- This script, in the source code as ``daemons/daemon-util.in``, is used to start/stop Ganeti and do a few other things related to system daemons. It is recommended to use ``daemon-util`` also from the system's init scripts. That way the code starting and stopping daemons is shared and future changes have to be made in only one place. ``daemon-util`` reads extra arguments from variables (``*_ARGS``) in ``/etc/default/ganeti``. When modifying ``daemon-util``, keep in mind to not remove support for the ``EXTRA_*_ARGS`` variables for starting daemons. Some parts of Ganeti use them to pass additional arguments when starting a daemon. The ``reload_ssh_keys`` function can be adjusted to use another command for reloading the OpenSSH daemon's host keys. .. vim: set textwidth=72 : ganeti-2.9.3/doc/design-shared-storage.rst0000644000000000000000000003022212244641676020463 0ustar00rootroot00000000000000============================= Ganeti shared storage support ============================= This document describes the changes in Ganeti 2.3+ compared to Ganeti 2.3 storage model. It also documents the ExtStorage Interface. .. contents:: :depth: 4 .. highlight:: shell-example Objective ========= The aim is to introduce support for externally mirrored, shared storage. This includes two distinct disk templates: - A shared filesystem containing instance disks as regular files typically residing on a networked or cluster filesystem (e.g. NFS, AFS, Ceph, OCFS2, etc.). - Instance images being shared block devices, typically LUNs residing on a SAN appliance. Background ========== DRBD is currently the only shared storage backend supported by Ganeti. DRBD offers the advantages of high availability while running on commodity hardware at the cost of high network I/O for block-level synchronization between hosts. DRBD's master-slave model has greatly influenced Ganeti's design, primarily by introducing the concept of primary and secondary nodes and thus defining an instance's “mobility domainâ€. Although DRBD has many advantages, many sites choose to use networked storage appliances for Virtual Machine hosting, such as SAN and/or NAS, which provide shared storage without the administrative overhead of DRBD nor the limitation of a 1:1 master-slave setup. Furthermore, new distributed filesystems such as Ceph are becoming viable alternatives to expensive storage appliances. Support for both modes of operation, i.e. shared block storage and shared file storage backend would make Ganeti a robust choice for high-availability virtualization clusters. Throughout this document, the term “externally mirrored storage†will refer to both modes of shared storage, suggesting that Ganeti does not need to take care about the mirroring process from one host to another. Use cases ========= We consider the following use cases: - A virtualization cluster with FibreChannel shared storage, mapping at least one LUN per instance, accessible by the whole cluster. - A virtualization cluster with instance images stored as files on an NFS server. - A virtualization cluster storing instance images on a Ceph volume. Design Overview =============== The design addresses the following procedures: - Refactoring of all code referring to constants.DTS_NET_MIRROR. - Obsolescence of the primary-secondary concept for externally mirrored storage. - Introduction of a shared file storage disk template for use with networked filesystems. - Introduction of a shared block device disk template with device adoption. - Introduction of the External Storage Interface. Additionally, mid- to long-term goals include: - Support for external “storage poolsâ€. Refactoring of all code referring to constants.DTS_NET_MIRROR ============================================================= Currently, all storage-related decision-making depends on a number of frozensets in lib/constants.py, typically constants.DTS_NET_MIRROR. However, constants.DTS_NET_MIRROR is used to signify two different attributes: - A storage device that is shared - A storage device whose mirroring is supervised by Ganeti We propose the introduction of two new frozensets to ease decision-making: - constants.DTS_EXT_MIRROR, holding externally mirrored disk templates - constants.DTS_MIRRORED, being a union of constants.DTS_EXT_MIRROR and DTS_NET_MIRROR. Additionally, DTS_NET_MIRROR will be renamed to DTS_INT_MIRROR to reflect the status of the storage as internally mirrored by Ganeti. Thus, checks could be grouped into the following categories: - Mobility checks, like whether an instance failover or migration is possible should check against constants.DTS_MIRRORED - Syncing actions should be performed only for templates in constants.DTS_NET_MIRROR Obsolescence of the primary-secondary node model ================================================ The primary-secondary node concept has primarily evolved through the use of DRBD. In a globally shared storage framework without need for external sync (e.g. SAN, NAS, etc.), such a notion does not apply for the following reasons: 1. Access to the storage does not necessarily imply different roles for the nodes (e.g. primary vs secondary). 2. The same storage is available to potentially more than 2 nodes. Thus, an instance backed by a SAN LUN for example may actually migrate to any of the other nodes and not just a pre-designated failover node. The proposed solution is using the iallocator framework for run-time decision making during migration and failover, for nodes with disk templates in constants.DTS_EXT_MIRROR. Modifications to gnt-instance and gnt-node will be required to accept target node and/or iallocator specification for these operations. Modifications of the iallocator protocol will be required to address at least the following needs: - Allocation tools must be able to distinguish between internal and external storage - Migration/failover decisions must take into account shared storage availability Introduction of a shared file disk template =========================================== Basic shared file storage support can be implemented by creating a new disk template based on the existing FileStorage class, with only minor modifications in lib/bdev.py. The shared file disk template relies on a shared filesystem (e.g. NFS, AFS, Ceph, OCFS2 over SAN or DRBD) being mounted on all nodes under the same path, where instance images will be saved. A new cluster initialization option is added to specify the mountpoint of the shared filesystem. The remainder of this document deals with shared block storage. Introduction of a shared block device template ============================================== Basic shared block device support will be implemented with an additional disk template. This disk template will not feature any kind of storage control (provisioning, removal, resizing, etc.), but will instead rely on the adoption of already-existing block devices (e.g. SAN LUNs, NBD devices, remote iSCSI targets, etc.). The shared block device template will make the following assumptions: - The adopted block device has a consistent name across all nodes, enforced e.g. via udev rules. - The device will be available with the same path under all nodes in the node group. Introduction of the External Storage Interface ============================================== Overview -------- To extend the shared block storage template and give Ganeti the ability to control and manipulate external storage (provisioning, removal, growing, etc.) we need a more generic approach. The generic method for supporting external shared storage in Ganeti will be to have an ExtStorage provider for each external shared storage hardware type. The ExtStorage provider will be a set of files (executable scripts and text files), contained inside a directory which will be named after the provider. This directory must be present across all nodes of a nodegroup (Ganeti doesn't replicate it), in order for the provider to be usable by Ganeti for this nodegroup (valid). The external shared storage hardware should also be accessible by all nodes of this nodegroup too. An “ExtStorage provider†will have to provide the following methods: - Create a disk - Remove a disk - Grow a disk - Attach a disk to a given node - Detach a disk from a given node - SetInfo to a disk (add metadata) - Verify its supported parameters The proposed ExtStorage interface borrows heavily from the OS interface and follows a one-script-per-function approach. An ExtStorage provider is expected to provide the following scripts: - ``create`` - ``remove`` - ``grow`` - ``attach`` - ``detach`` - ``setinfo`` - ``verify`` All scripts will be called with no arguments and get their input via environment variables. A common set of variables will be exported for all commands, and some of them might have extra ones. ``VOL_NAME`` The name of the volume. This is unique for Ganeti and it uses it to refer to a specific volume inside the external storage. ``VOL_SIZE`` The volume's size in mebibytes. ``VOL_NEW_SIZE`` Available only to the `grow` script. It declares the new size of the volume after grow (in mebibytes). ``EXTP_name`` ExtStorage parameter, where `name` is the parameter in upper-case (same as OS interface's ``OSP_*`` parameters). ``VOL_METADATA`` A string containing metadata to be set for the volume. This is exported only to the ``setinfo`` script. All scripts except `attach` should return 0 on success and non-zero on error, accompanied by an appropriate error message on stderr. The `attach` script should return a string on stdout on success, which is the block device's full path, after it has been successfully attached to the host node. On error it should return non-zero. Implementation -------------- To support the ExtStorage interface, we will introduce a new disk template called `ext`. This template will implement the existing Ganeti disk interface in `lib/bdev.py` (create, remove, attach, assemble, shutdown, grow, setinfo), and will simultaneously pass control to the external scripts to actually handle the above actions. The `ext` disk template will act as a translation layer between the current Ganeti disk interface and the ExtStorage providers. We will also introduce a new IDISK_PARAM called `IDISK_PROVIDER = provider`, which will be used at the command line to select the desired ExtStorage provider. This parameter will be valid only for template `ext` e.g.:: $ gnt-instance add -t ext --disk=0:size=2G,provider=sample_provider1 The Extstorage interface will support different disks to be created by different providers. e.g.:: $ gnt-instance add -t ext --disk=0:size=2G,provider=sample_provider1 \ --disk=1:size=1G,provider=sample_provider2 \ --disk=2:size=3G,provider=sample_provider1 Finally, the ExtStorage interface will support passing of parameters to the ExtStorage provider. This will also be done per disk, from the command line:: $ gnt-instance add -t ext --disk=0:size=1G,provider=sample_provider1,\ param1=value1,param2=value2 The above parameters will be exported to the ExtStorage provider's scripts as the enviromental variables: - `EXTP_PARAM1 = str(value1)` - `EXTP_PARAM2 = str(value2)` We will also introduce a new Ganeti client called `gnt-storage` which will be used to diagnose ExtStorage providers and show information about them, similarly to the way `gnt-os diagose` and `gnt-os info` handle OS definitions. Long-term shared storage goals ============================== Storage pool handling --------------------- A new cluster configuration attribute will be introduced, named “storage_poolsâ€, modeled as a dictionary mapping storage pools to external storage providers (see below), e.g.:: { "nas1": "foostore", "nas2": "foostore", "cloud1": "barcloud", } Ganeti will not interpret the contents of this dictionary, although it will provide methods for manipulating them under some basic constraints (pool identifier uniqueness, driver existence). The manipulation of storage pools will be performed by implementing new options to the `gnt-cluster` command:: $ gnt-cluster modify --add-pool nas1 foostore $ gnt-cluster modify --remove-pool nas1 # There must be no instances using # the pool to remove it Furthermore, the storage pools will be used to indicate the availability of storage pools to different node groups, thus specifying the instances' “mobility domainâ€. The pool, in which to put the new instance's disk, will be defined at the command line during `instance add`. This will become possible by replacing the IDISK_PROVIDER parameter with a new one, called `IDISK_POOL = pool`. The cmdlib logic will then look at the cluster-level mapping dictionary to determine the ExtStorage provider for the given pool. gnt-storage ----------- The ``gnt-storage`` client can be extended to support pool management (creation/modification/deletion of pools, connection/disconnection of pools to nodegroups, etc.). It can also be extended to diagnose and provide information for internal disk templates too, such as lvm and drbd. .. vim: set textwidth=72 : ganeti-2.9.3/doc/ovfconverter.rst0000644000000000000000000001573012230001635017011 0ustar00rootroot00000000000000============= OVF converter ============= Using ``ovfconverter`` from the ``tools`` directory, one can easily convert previously exported Ganeti instance into OVF package, supported by VMWare, VirtualBox and some other virtualization software. It is also possible to use instance exported from such a tool and convert it to Ganeti config file, used by ``gnt-backup import`` command. For the internal design of the converter and more detailed description, including listing of available command line options, please refer to :doc:`design-ovf-support` As the amount of Ganeti-specific details, that need to be provided in order to import an external instance, is rather large, we will present here some examples of importing instances from different sources. It is also worth noting that there are some limitations regarding support for different hardware. Limitations on import ===================== Network ------- Available modes for the network include ``bridged`` and ``routed``. There is no ``NIC`` mode, which is typically used e.g. by VirtualBox. For most usecases this should not be of any effect, since if ``NetworkSection`` contains any networks which are not discovered as ``bridged`` or ``routed``, the network mode is assigned automatically, using Ganeti's cluster defaults. Backend ------- The only values that are taken into account regarding Virtual Hardware (described in ``VirtualHardwareSection`` of the ``.ovf`` file) are: - number of virtual CPUs - RAM memory - hard disks - networks Neither USB nor CD-ROM drive are used in Ganeti. We decided to simply ignore unused elements of this section, so their presence won't raise any warnings. Operating System ---------------- List of operating systems available on a cluster is viewable using ``gnt-os list`` command. When importing from external source, providing OS type in a command line (``--os-type=...``) is **required**. This is because even if the type is given in OVF description, it is not detailed enough for Ganeti to know which os-specific scripts to use. Please note, that instance containing disks may only be imported using OS script that supports raw disk images. References ---------- Files listed in ``ovf:References`` section cannot be hyperlinks. Limitations on export ===================== Disk content ------------ Most Ganeti instances do not contain grub. This results in some problems when importing to virtualization software that does expect it. Examples of such software include VirtualBox and VMWare. To avoid trouble, please install grub inside the instance before exporting it. Import to VirtualBox -------------------- ``format`` option should be set to ``vmdk`` in order for instance to be importable by VirtualBox. Tests using existing versions of VirtualBox (3.16) suggest, that VirtualBox does not support disk compression or OVA packaging. In future versions this might change. Import to VMWare ---------------- Importing Ganeti instance to VMWare was tested using ``ovftool``. ``format`` option should be set to ``vmdk`` in order for instance to be importable by VMWare. Presence of Ganeti section does seem to cause some problems and therefore it is recommended to use ``--external`` option on export. Import of compressed disks generated by ovfconverter was impossible in current version of ``ovftool`` (2.1.0). This seems to be related to old ``vmdk`` version. Since the conversion to ``vmdk`` format is done using ``qemu-img``, it is possible and in fact expected, that future versions of the latter tool will resolve this problem. Import examples =============== Ganeti's OVF ------------ If you are importing instance created using ``ovfconverter export`` -- you most probably will not have to provide any additional information. In that case, the following is all you need (unless you wish to change some configuration options):: ovfconverter import ganeti.ovf [...] gnt-instance import -n Virtualbox, VMWare and other external sources --------------------------------------------- In case of importing from external source, you will most likely have to provide the following details: - ``os-type`` can be any operating system listed on ``gnt-os list`` - ``name`` that has to be resolvable, as it will be used as instance name (even if your external instance has a name, it most probably is not resolvable to an IP address) These are not the only options, but the recommended ones. For the complete list of available options please refer to `Command Line description ` Minimalistic but complete example of importing Virtualbox's OVF instance may look like:: ovfconverter virtualbox.ovf --os-type=lenny-image \ --name=xen.test.i1 --disk-template=diskless [...] gnt-instance import -n node1.xen xen.test.i1 Export example ============== Exporting instance into ``.ovf`` format is pretty streightforward and requires little - if any - explanation. The only compulsory detail is the required disk format, provided using the ``--format`` option. Export to another Ganeti instance --------------------------------- If for some reason it is convenient for you to use ``ovfconverter`` to move instance between clusters (e.g. because of the disk compression), the complete example of export may look like this:: gnt-backup export -n node1.xen xen.test.i1 [...] ovfconverter export --format=vmdk --ova \ /srv/ganeti/export/xen.i1.node1.xen/config.ini [...] The result is then in ``/srv/ganeti/export/xen.i1.node1.xen/xen.test.i1.ova`` Export to Virtualbox/VMWare/other external tool ----------------------------------------------- Typically, when exporting to external tool we do not want Ganeti-specific configuration to be saved. In that case, simply use the ``--external`` option:: gnt-backup export -n node1.xen xen.test.i1 [...] ovfconverter export --external --output-dir ~/ganeti-instance/ \ /srv/ganeti/export/xen.i1.node1.xen/config.ini Known issues ============ Conversion errors ----------------- If you are encountering trouble when converting the disk, please ensure that you have newest ``qemu-img`` version. OVA and compression ------------------- The compressed disks and OVA packaging do not work correctly in either VirtualBox (old version) or VMWare. VirtualBox (3.16 OSE) does not seem to support those two, so there is very little we can do about this. As for VMWare, the reason behind it not accepting compressed or packed instances created by ovfconverter seems to be related to the old vmdk version. Problems on newest VirtualBox ----------------------------- In Oracle VM Virtualbox 4.0+ there seems to be a problem when importing any OVF instance created by ovfconverter. Reasons are again unknown, this will be investigated. Disk space ---------- The disk space requirements for both import and export are at the moment very large - we require free space up to about 3-4 times the size of disks. This will most likely be changed in future versions. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-hroller.rst0000644000000000000000000001631512267470014017220 0ustar00rootroot00000000000000============ HRoller tool ============ .. contents:: :depth: 4 This is a design document detailing the cluster maintenance scheduler, HRoller. Current state and shortcomings ============================== To enable automating cluster-wide reboots a new htool, called HRoller, was added to Ganeti starting from version 2.7. This tool helps parallelizing cluster offline maintenances by calculating which nodes are not both primary and secondary for a DRBD instance, and thus can be rebooted at the same time, when all instances are down. The way this is done is documented in the :manpage:`hroller(1)` manpage. We would now like to perform online maintenance on the cluster by rebooting nodes after evacuating their primary instances (rolling reboots). Proposed changes ================ New options ----------- - HRoller should be able to operate on single nodegroups (-G flag) or select its target node through some other mean (eg. via a tag, or a regexp). (Note that individual node selection is already possible via the -O flag, that makes hroller ignore a node altogether). - HRoller should handle non redundant instances: currently these are ignored but there should be a way to select its behavior between "it's ok to reboot a node when a non-redundant instance is on it" or "skip nodes with non-redundant instances". This will only be selectable globally, and not per instance. - Hroller will make sure to keep any instance which is up in its current state, via live migrations, unless explicitly overridden. The algorithm that will be used calculate the rolling reboot with live migrations is described below, and any override on considering the instance status will only be possible on the whole run, and not per-instance. Calculating rolling maintenances -------------------------------- In order to perform rolling maintenance we need to migrate instances off the nodes before a reboot. How this can be done depends on the instance's disk template and status: Down instances ++++++++++++++ If an instance was shutdown when the maintenance started it will be considered for avoiding contemporary reboot of its primary and secondary nodes, but will *not* be considered as a target for the node evacuation. This allows avoiding needlessly moving its primary around, since it won't suffer a downtime anyway. Note that a node with non-redundant instances will only ever be considered good for rolling-reboot if these are down (or the checking of status is overridden) *and* an explicit option to allow it is set. DRBD ++++ Each node must migrate all instances off to their secondaries, and then can either be rebooted, or the secondaries can be evacuated as well. Since currently doing a ``replace-disks`` on DRBD breaks redundancy, it's not any safer than temporarily rebooting a node with secondaries on them (citation needed). As such we'll implement for now just the "migrate+reboot" mode, and focus later on replace-disks as well. In order to do that we can use the following algorithm: 1) Compute node sets that don't contain both the primary and the secondary of any instance, and also don't contain the primary nodes of two instances that have the same node as secondary. These can be obtained by computing a coloring of the graph with nodes as vertexes and an edge between two nodes, if either condition prevents simultaneous maintenance. (This is the current algorithm of :manpage:`hroller(1)` with the extension that the graph to be colored has additional edges between the primary nodes of two instances sharing their secondary node.) 2) It is then possible to migrate in parallel all nodes in a set created at step 1, and then reboot/perform maintenance on them, and migrate back their original primaries, which allows the computation above to be reused for each following set without N+1 failures being triggered, if none were present before. See below about the actual execution of the maintenance. Non-DRBD ++++++++ All non-DRBD disk templates that can be migrated have no "secondary" concept. As such instances can be migrated to any node (in the same nodegroup). In order to do the job we can either: - Perform migrations on one node at a time, perform the maintenance on that node, and proceed (the node will then be targeted again to host instances automatically, as hail chooses targets for the instances between all nodes in a group. Nodes in different nodegroups can be handled in parallel. - Perform migrations on one node at a time, but without waiting for the first node to come back before proceeding. This allows us to continue, restricting the cluster, until no more capacity in the nodegroup is available, and then having to wait for some nodes to come back so that capacity is available again for the last few nodes. - Pre-Calculate sets of nodes that can be migrated together (probably with a greedy algorithm) and parallelize between them, with the migrate-back approach discussed for DRBD to perform the calculation only once. Note that for non-DRBD disks that still use local storage (eg. RBD and plain) redundancy might break anyway, and nothing except the first algorithm might be safe. This perhaps would be a good reason to consider managing better RBD pools, if those are implemented on top of nodes storage, rather than on dedicated storage machines. Full-Evacuation +++++++++++++++ If full evacuation of the nodes to be rebooted is desired, a simple migration is not enough for the DRBD instances. To keep the number of disk operations small, we restrict moves to ``migrate, replace-secondary``. That is, after migrating instances out of the nodes to be rebooted, replacement secondaries are searched for, for all instances that have their then secondary on one of the rebooted nodes. This is done by a greedy algorithm, refining the initial reboot partition, if necessary. Future work =========== Hroller should become able to execute rolling maintenances, rather than just calculate them. For this to succeed properly one of the following must happen: - HRoller handles rolling maintenances that happen at the same time as unrelated cluster jobs, and thus recalculates the maintenance at each step - HRoller can selectively drain the cluster so it's sure that only the rolling maintenance can be going on DRBD nodes' ``replace-disks``' functionality should be implemented. Note that when we will support a DRBD version that allows multi-secondary this can be done safely, without losing replication at any time, by adding a temporary secondary and only when the sync is finished dropping the previous one. Non-redundant (plain or file) instances should have a way to be moved off as well via plain storage live migration or ``gnt-instance move`` (which requires downtime). If/when RBD pools can be managed inside Ganeti, care can be taken so that the pool is evacuated as well from a node before it's put into maintenance. This is equivalent to evacuating DRBD secondaries. Master failovers during the maintenance should be performed by hroller. This requires RPC/RAPI support for master failover. Hroller should also be modified to better support running on the master itself and continuing on the new master. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/users/0000755000000000000000000000000012271445544014710 5ustar00rootroot00000000000000ganeti-2.9.3/doc/users/groupmemberships.in0000644000000000000000000000054212244641676020640 0ustar00rootroot00000000000000@GNTMASTERUSER@ @GNTDAEMONSGROUP@ @GNTCONFDUSER@ @GNTDAEMONSGROUP@ @GNTLUXIDUSER@ @GNTDAEMONSGROUP@ @GNTRAPIUSER@ @GNTDAEMONSGROUP@ @GNTMONDUSER@ @GNTDAEMONSGROUP@ @GNTMASTERUSER@ @GNTADMINGROUP@ @GNTRAPIUSER@ @GNTADMINGROUP@ @GNTMASTERUSER@ @GNTCONFDGROUP@ @GNTMONDUSER@ @GNTMASTERDGROUP@ @GNTLUXIDUSER@ @GNTMASTERDGROUP@ @GNTLUXIDUSER@ @GNTCONFDGROUP@ ganeti-2.9.3/doc/users/groups.in0000644000000000000000000000015412244641676016563 0ustar00rootroot00000000000000@GNTDAEMONSGROUP@ @GNTADMINGROUP@ @GNTMASTERUSER@ @GNTRAPIUSER@ @GNTCONFDUSER@ @GNTLUXIDUSER@ @GNTMONDUSER@ ganeti-2.9.3/doc/users/users.in0000644000000000000000000000025112244641676016403 0ustar00rootroot00000000000000@GNTMASTERUSER@ @GNTMASTERDGROUP@ @GNTRAPIUSER@ @GNTRAPIGROUP@ @GNTCONFDUSER@ @GNTCONFDGROUP@ @GNTLUXIDUSER@ @GNTLUXIDGROUP@ @GNTMONDUSER@ @GNTMONDGROUP@ @GNTNODEDUSER@ ganeti-2.9.3/doc/design-query-splitting.rst0000644000000000000000000001474012271422343020726 0ustar00rootroot00000000000000=========================================== Splitting the query and job execution paths =========================================== Introduction ============ Currently, the master daemon does two main roles: - execute jobs that change the cluster state - respond to queries Due to the technical details of the implementation, the job execution and query paths interact with each other, and for example the "masterd hang" issue that we had late in the 2.5 release cycle was due to the interaction between job queries and job execution. Furthermore, also because technical implementations (Python lacking read-only variables being one example), we can't share internal data structures for jobs; instead, in the query path, we read them from disk in order to not block job execution due to locks. All these point to the fact that the integration of both queries and job execution in the same process (multi-threaded) creates more problems than advantages, and hence we should look into separating them. Proposed design =============== In Ganeti 2.7, we will introduce a separate, optional daemon to handle queries (note: whether this is an actual "new" daemon, or its functionality is folded into confd, remains to be seen). This daemon will expose exactly the same Luxi interface as masterd, except that job submission will be disabled. If so configured (at build time), clients will be changed to: - keep sending REQ_SUBMIT_JOB, REQ_SUBMIT_MANY_JOBS, and all requests except REQ_QUERY_* to the masterd socket (but also QR_LOCK) - redirect all REQ_QUERY_* requests to the new Luxi socket of the new daemon (except generic query with QR_LOCK) This new daemon will serve both pure configuration queries (which confd can already serve), and run-time queries (which currently only masterd can serve). Since the RPC can be done from any node to any node, the new daemon can run on all master candidates, not only on the master node. This means that all gnt-* list options can be now run on other nodes than the master node. If we implement this as a separate daemon that talks to confd, then we could actually run this on all nodes of the cluster (to be decided). During the 2.7 release, masterd will still respond to queries itself, but it will log all such queries for identification of "misbehaving" clients. Advantages ---------- As far as I can see, this will bring some significant advantages. First, we remove any interaction between the job execution and cluster query state. This means that bugs in the locking code (job execution) will not impact the query of the cluster state, nor the query of the job execution itself. Furthermore, we will be able to have different tuning parameters between job execution (e.g. 25 threads for job execution) versus query (since these are transient, we could practically have unlimited numbers of query threads). As a result of the above split, we move from the current model, where shutdown of the master daemon practically "breaks" the entire Ganeti functionality (no job execution nor queries, not even connecting to the instance console), to a split model: - if just masterd is stopped, then other cluster functionality remains available: listing instances, connecting to the console of an instance, etc. - if just "luxid" is stopped, masterd can still process jobs, and one can furthermore run queries from other nodes (MCs) - only if both are stopped, we end up with the previous state This will help, for example, in the case where the master node has crashed and we haven't failed it over yet: querying and investigating the cluster state will still be possible from other master candidates (on small clusters, this will mean from all nodes). A last advantage is that we finally will be able to reduce the footprint of masterd; instead of previous discussion of splitting individual jobs, which requires duplication of all the base functionality, this will just split the queries, a more trivial piece of code than job execution. This should be a reasonable work effort, with a much smaller impact in case of failure (we can still run masterd as before). Disadvantages ------------- We might get increased inconsistency during queries, as there will be a delay between masterd saving an updated configuration and confd/query loading and parsing it. However, this could be compensated by the fact that queries will only look at "snapshots" of the configuration, whereas before it could also look at "in-progress" modifications (due to the non-atomic updates). I think these will cancel each other out, we will have to see in practice how it works. Another disadvantage *might* be that we have a more complex setup, due to the introduction of a new daemon. However, the query path will be much simpler, and when we remove the query functionality from masterd we should have a more robust system. Finally, we have QR_LOCK, which is an internal query related to the master daemon, using the same infrastructure as the other queries (related to cluster state). This is unfortunate, and will require untangling in order to keep code duplication low. Long-term plans =============== If this works well, the plan would be (tentatively) to disable the query functionality in masterd completely in Ganeti 2.8, in order to remove the duplication. This might change based on how/if we split the configuration/locking daemon out, or not. Once we split this out, there is not technical reason why we can't execute any query from any node; except maybe practical reasons (network topology, remote nodes, etc.) or security reasons (if/whether we want to change the cluster security model). In any case, it should be possible to do this in a reliable way from all master candidates. Some implementation details --------------------------- We will fold this in confd, at least initially, to reduce the proliferation of daemons. Haskell will limit (if used properly) any too deep integration between the old "confd" functionality and the new query one. As advantages, we'll have a single daemons that handles configuration queries. The redirection of Luxi requests can be easily done based on the request type, if we have both sockets open, or if we open on demand. We don't want the masterd to talk to the luxid itself (hidden redirection), since we want to be able to run queries while masterd is down. During the 2.7 release cycle, we can test all queries against both masterd and luxid in QA, so we know we have exactly the same interface and it is consistent. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/security.rst0000644000000000000000000002416612271422343016151 0ustar00rootroot00000000000000Security in Ganeti ================== Documents Ganeti version 2.9 Ganeti was developed to run on internal, trusted systems. As such, the security model is all-or-nothing. Up to version 2.3 all Ganeti code ran as root. Since version 2.4 it is possible to run all daemons except the node daemon and the monitoring daemon as non-root users by specifying user names and groups at build time. The node daemon continues to require root privileges to create logical volumes, DRBD devices, start instances, etc. Cluster commands can be run as root or by users in a group specified at build time. The monitoring daemon requires root privileges in order to be able to access and present information that are only avilable to root (such as the output of the ``xm`` command of Xen). Host issues ----------- For a host on which the Ganeti software has been installed, but not joined to a cluster, there are no changes to the system. For a host that has been joined to the cluster, there are very important changes: - The host will have its SSH host key replaced with the one of the cluster (which is the one the initial node had at the cluster creation) - A new public key will be added to root's ``authorized_keys`` file, granting root access to all nodes of the cluster. The private part of the key is also distributed to all nodes. Old files are renamed. - Communication between nodes is encrypted using SSL/TLS. A common key and certificate combo is shared between all nodes of the cluster. At this time, no CA is used. - The Ganeti node daemon will accept RPC requests from any host within the cluster with the correct certificate, and the operations it will do as a result of these requests are: - running commands under the ``/etc/ganeti/hooks`` directory - creating DRBD disks between it and the IP it has been told - overwrite a defined list of files on the host As you can see, as soon as a node is joined, it becomes equal to all other nodes in the cluster, and the security of the cluster is determined by the weakest node. Note that only the SSH key will allow other machines to run any command on this node; the RPC method will run only: - well defined commands to create, remove, activate logical volumes, drbd devices, start/stop instances, etc; - run well-defined SSH commands on other nodes in the cluster - scripts under the ``/etc/ganeti/hooks`` directory - scripts under the ``/etc/ganeti/restricted-commands`` directory, if this feature has been enabled at build time (see below) It is therefore important to make sure that the contents of the ``/etc/ganeti/hooks`` and ``/etc/ganeti/restricted-commands`` directories are supervised and only trusted sources can populate them. Restricted commands ~~~~~~~~~~~~~~~~~~~ The restricted commands feature is new in Ganeti 2.7. It enables the administrator to run any commands in the ``/etc/ganeti/restricted-commands`` directory, if the feature has been enabled at build time, subject to the following restrictions: - No parameters may be passed - No absolute or relative path may be passed, only a filename - The ``/etc/ganeti/restricted-commands`` directory must be owned by root:root and have mode 0755 or stricter - Executables must be regular files or symlinks, and must be executable by root:root Note that it's not possible to list the contents of the directory, and there is an intentional delay when trying to execute a non-existing command (to slow-down dictionary attacks). Since for Ganeti itself this functionality is not needed, and is only provided as a way to help administrate or recover nodes, it is a local site decision whether to enable or not the restricted commands feature. By default, this feature is disabled. Cluster issues -------------- As mentioned above, there are multiple ways of communication between cluster nodes: - SSH-based, for high-volume traffic like image dumps or for low-level command, e.g. restarting the Ganeti node daemon - RPC communication between master and nodes - DRBD real-time disk replication traffic The SSH traffic is protected (after the initial login to a new node) by the cluster-wide shared SSH key. RPC communication between the master and nodes is protected using SSL/TLS encryption. Both the client and the server must have the cluster-wide shared SSL/TLS certificate and verify it when establishing the connection by comparing fingerprints. We decided not to use a CA to simplify the key handling. The DRBD traffic is not protected by encryption, as DRBD does not support this. It's therefore recommended to implement host-level firewalling or to use a separate range of IP addresses for the DRBD traffic (this is supported in Ganeti through the use of a secondary interface) which is not routed outside the cluster. DRBD connections are protected from erroneous connections to other machines (as may happen due to software issues), and from accepting connections from other machines, by using a shared secret, exchanged via RPC requests from the master to the nodes when configuring the device. Master daemon ------------- The command-line tools to master daemon communication is done via a UNIX socket, whose permissions are reset to ``0660`` after listening but before serving requests. This permission-based protection is documented and works on Linux, but is not-portable; however, Ganeti doesn't work on non-Linux system at the moment. Luxi daemon ----------- The ``luxid`` daemon (automatically enabled if ``confd`` is enabled at build time) serves local (UNIX socket) queries about the run-time configuration. Answering these means talking to other cluster nodes, exactly as ``masterd`` does. See the notes for ``masterd`` regarding permission-based protection. Conf daemon ----------- In Ganeti 2.8, the ``confd`` daemon (if enabled at build time), serves network-originated queries about parts of the static cluster configuration. If Ganeti is not configured (at build time) to use separate users, ``confd`` has access to all Ganeti related files (including internal RPC SSL certificates). This makes it a bit more sensitive to bugs (a remote attacker could get direct access to the intra-cluster RPC), so to harden security it's recommended to: - disable confd at build time if it (and ``luxid``) is not needed in your setup. - configure Ganeti (at build time) to use separate users, so that the confd daemon doesn't also have access to the server SSL/TLS certificates. - add firewall rules to protect the ``confd`` port or bind it to a trusted address. Make sure that all nodes can access the daemon, as the monitoring daemon requires it. Monitoring daemon ----------------- The monitoring daemon provides information about the status and the performance of the cluster over HTTP. It is currently unencrypted and non-authenticated, therefore it is strongly advised to set proper firewalling rules to prevent unwanted access. The monitoring daemon runs as root, because it needs to be able to access privileged information (such as the state of the instances as provided by the Xen hypervisor). Nevertheless, the security implications are mitigated by the fact that the agent only provides reporting functionalities, without the ability to actually modify the state of the cluster. Remote API ---------- Starting with Ganeti 2.0, Remote API traffic is encrypted using SSL/TLS by default. It supports Basic authentication as per :rfc:`2617`. Users can be granted different capabilities. Details can be found in the :ref:`RAPI documentation `. Paths for certificate, private key and CA files required for SSL/TLS will be set at source configure time. Symlinks or command line parameters may be used to use different files. Inter-cluster instance moves ---------------------------- To move instances between clusters, different clusters must be able to communicate with each other over a secure channel. Up to and including Ganeti 2.1, clusters were self-contained entities and had no knowledge of other clusters. With Ganeti 2.2, clusters can exchange data if tokens (an encryption certificate) was exchanged by a trusted third party before. KVM Security ------------ When running KVM instances under Ganeti three security models ara available: "none", "user" and "pool". Under security model "none" instances run by default as root. This means that, if an instance gets jail broken, it will be able to own the host node, and thus the ganeti cluster. This is the default model, and the only one available before Ganeti 2.1.2. Under security model "user" an instance is run as the user specified by the hypervisor parameter "security_domain". This makes it easy to run all instances as non privileged users, and allows one to manually allocate specific users to specific instances or sets of instances. If the specified user doesn't have permissions a jail broken instance will need some local privilege escalation before being able to take over the node and the cluster. It's possible though for a jail broken instance to affect other ones running under the same user. Under security model "pool" a global cluster-level uid pool is used to start each instance on the same node under a different user. The uids in the cluster pool can be set with ``gnt-cluster init`` and ``gnt-cluster modify``, and must correspond to existing users on all nodes. Ganeti will then allocate one to each instance, as needed. This way a jail broken instance won't be able to affect any other. Since the users are handed out by ganeti in a per-node randomized way, in this mode there is no way to make sure a particular instance is always run as a certain user. Use mode "user" for that. In addition to these precautions, if you want to avoid instances sending traffic on your node network, you can use an iptables rule such as:: iptables -A OUTPUT -m owner --uid-owner [-] -j LOG \ --log-prefix "ganeti uid pool user network traffic" iptables -A OUTPUT -m owner --uid-owner [-] -j DROP This won't affect regular instance traffic (that comes out of the tapX allocated to the instance, and can be filtered or subject to appropriate policy routes) but will stop any user generated traffic that might come from a jailbroken instance. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/design-oob.rst0000644000000000000000000004005312230001635016311 0ustar00rootroot00000000000000Ganeti Node OOB Management Framework ==================================== Objective --------- Extend Ganeti with Out of Band (:term:`OOB`) Cluster Node Management Capabilities. Background ---------- Ganeti currently has no support for Out of Band management of the nodes in a cluster. It relies on the OS running on the nodes and has therefore limited possibilities when the OS is not responding. The command ``gnt-node powercycle`` can be issued to attempt a reboot of a node that crashed but there are no means to power a node off and power it back on. Supporting this is very handy in the following situations: * **Emergency Power Off**: During emergencies, time is critical and manual tasks just add latency which can be avoided through automation. If a server room overheats, halting the OS on the nodes is not enough. The nodes need to be powered off cleanly to prevent damage to equipment. * **Repairs**: In most cases, repairing a node means that the node has to be powered off. * **Crashes**: Software bugs may crash a node. Having an OS independent way to power-cycle a node helps to recover the node without human intervention. Overview -------- Ganeti will be extended with OOB capabilities through adding a new **Cluster Parameter** (``--oob-program``), a new **Node Property** (``--oob-program``), a new **Node State (powered)** and support in ``gnt-node`` for invoking an **External Helper Command** which executes the actual OOB command (``gnt-node nodename ...``). The supported commands are: ``power on``, ``power off``, ``power cycle``, ``power status`` and ``health``. .. note:: The new **Node State (powered)** is a **State of Record** (:term:`SoR`), not a **State of World** (:term:`SoW`). The maximum execution time of the **External Helper Command** will be limited to 60s to prevent the cluster from getting locked for an undefined amount of time. Detailed Design --------------- New ``gnt-cluster`` Parameter +++++++++++++++++++++++++++++ | Program: ``gnt-cluster`` | Command: ``modify|init`` | Parameters: ``--oob-program`` | Options: ``--oob-program``: executable OOB program (absolute path) New ``gnt-cluster epo`` Command +++++++++++++++++++++++++++++++ | Program: ``gnt-cluster`` | Command: ``epo`` | Parameter: ``--on`` ``--force`` ``--groups`` ``--all`` | Options: ``--on``: By default epo turns off, with ``--on`` it tries to get the | cluster back online | ``--force``: To force the operation without asking for confirmation | ``--groups``: To operate on groups instead of nodes | ``--all``: To operate on the whole cluster This is a convenience command to allow easy emergency power off of a whole cluster or part of it. It takes care of all steps needed to get the cluster into a sane state to turn off the nodes. With ``--on`` it does the reverse and tries to bring the rest of the cluster back to life. .. note:: The master node is not able to shut itself cleanly down. Therefore, this command will not do all the work on single node clusters. On multi node clusters the command tries to find another master or if that is not possible prepares everything to the point where the user has to shutdown the master node itself alone this applies also to the single node cluster configuration. New ``gnt-node`` Property +++++++++++++++++++++++++ | Program: ``gnt-node`` | Command: ``modify|add`` | Parameters: ``--oob-program`` | Options: ``--oob-program``: executable OOB program (absolute path) .. note:: If ``--oob-program`` is set to ``!`` then the node has no OOB capabilities. Otherwise, we will inherit the node group respectively the cluster wide value. I.e. the nodes have to opt out from OOB capabilities. Addition to ``gnt-cluster verify`` ++++++++++++++++++++++++++++++++++ | Program: ``gnt-cluster`` | Command: ``verify`` | Parameter: None | Option: None | Additional Checks: 1. existence and execution flag of OOB program on all Master Candidates if the cluster parameter ``--oob-program`` is set or at least one node has the property ``--oob-program`` set. The OOB helper is just invoked on the master 2. check if node state powered matches actual power state of the machine for those nodes where ``--oob-program`` is set New Node State ++++++++++++++ Ganeti supports the following two boolean states related to the nodes: **drained** The cluster still communicates with drained nodes but excludes them from allocation operations **offline** if offline, the cluster does not communicate with offline nodes; useful for nodes that are not reachable in order to avoid delays And will extend this list with the following boolean state: **powered** if not powered, the cluster does not communicate with not powered nodes if the node property ``--oob-program`` is not set, the state powered is not displayed Additionally modify the meaning of the offline state as follows: **offline** if offline, the cluster does not communicate with offline nodes (**with the exception of OOB commands for nodes where** ``--oob-program`` **is set**); useful for nodes that are not reachable in order to avoid delays The corresponding command extensions are: | Program: ``gnt-node`` | Command: ``info`` | Parameter: [ ``nodename`` ... ] | Option: None Additional Output (:term:`SoR`, ommited if node property ``--oob-program`` is not set): powered: ``[True|False]`` | Program: ``gnt-node`` | Command: ``modify`` | Parameter: nodename | Option: [ ``--powered=yes|no`` ] | Reasoning: sometimes you will need to sync the :term:`SoR` with the :term:`SoW` manually | Caveat: ``--powered`` can only be modified if ``--oob-program`` is set for | the node in question New ``gnt-node`` commands: ``power [on|off|cycle|status]`` ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | Program: ``gnt-node`` | Command: ``power [on|off|cycle|status]`` | Parameters: [ ``nodename`` ... ] | Options: None | Caveats: * If no nodenames are passed to ``power [on|off|cycle]``, the user will be prompted with ``"Do you really want to power [on|off|cycle] the following nodes: `. The instance move tool talks to the Ganeti clusters via RAPI and can run on any machine which can connect to the cluster's RAPI. Despite their similar name, the instance move tool should not be confused with the ``gnt-instance move`` command, which is used to move without changes (instead of export/import plus rename) an instance within the cluster. Configuring clusters for instance moves --------------------------------------- To prevent third parties from accessing the instance data, all data exchanged between the clusters is signed using a secret key, the "cluster domain secret". It is recommended to assign the same domain secret to all clusters of the same security domain, so that instances can be easily moved between them. By checking the signatures, the destination cluster can be sure the third party (e.g. this tool) didn't modify the received crypto keys and connection information. .. highlight:: shell-example To create a new, random cluster domain secret, run the following command on the master node:: $ gnt-cluster renew-crypto --new-cluster-domain-secret To read and set the cluster domain secret from the contents of a file, run the following command on the master node:: $ gnt-cluster renew-crypto --cluster-domain-secret=%/.../ganeti.cds% More information about the ``renew-crypto`` command can be found in :manpage:`gnt-cluster(8)`. Moving instances ---------------- As soon as the clusters share a cluster domain secret, instances can be moved. The tool usage is as follows:: $ move-instance %[options]% %source-cluster% %destination-cluster% %instance-name...% Multiple instances can be moved with one invocation of the instance move tool, though a few options are only available when moving a single instance. The most important options are listed below. Unless specified otherwise, destination-related options default to the source value (e.g. setting ``--src-rapi-port=1234`` will make ``--dest-rapi-port``'s default 1234). ``--src-rapi-port``/``--dest-rapi-port`` RAPI server TCP port, defaults to 5080. ``--src-ca-file``/``--dest-ca-file`` Path to file containing source cluster Certificate Authority (CA) in PEM format. For self-signed certificates, this is the certificate itself (see more details below in :ref:`instance-move-certificates`). For certificates signed by a third party CA, the complete chain must be in the file (see documentation for :manpage:`SSL_CTX_load_verify_locations(3)`). ``--src-username``/``--dest-username`` RAPI username, must have write access to cluster. ``--src-password-file``/``--dest-password-file`` Path to file containing RAPI password (make sure to restrict access to this file). ``--dest-instance-name`` When moving a single instance: Change name of instance on destination cluster. ``--dest-primary-node`` When moving a single instance: Primary node on destination cluster. ``--dest-secondary-node`` When moving a single instance: Secondary node on destination cluster. ``--iallocator`` Iallocator for creating instance on destination cluster. ``--hypervisor-parameters``/``--backend-parameters``/``--os-parameters``/``--net`` When moving a single instance: Override instances' parameters. ``--parallel`` Number of instance moves to run in parallel. ``--verbose``/``--debug`` Increase output verbosity. The exit value of the tool is zero if and only if all instance moves were successful. .. _instance-move-certificates: Certificates ------------ If using certificates signed by a CA, then you need to pass the same CA certificate via both ``--src-ca-file`` and ``dest-ca-file``. However, if you're using self-signed certificates, this has a few (security) implications: - the certificates of both the source and destinations clusters (``rapi.pem`` from the Ganeti configuration directory, usually ``/var/lib/ganeti/rapi.pem``) must be available to the tool - by default, the certificates include the private key as well, so simply copying them to a third machine means that machine can now impersonate both the source and destination clusters RAPI endpoint It is therefore recommended to copy only the certificate from the ``rapi.pem`` files, and pass these to ``--src-ca-file`` and ``--dest-ca-file`` appropriately. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/cluster-merge.rst0000644000000000000000000000472112230001635017043 0ustar00rootroot00000000000000================ Merging clusters ================ With ``cluster-merge`` from the ``tools`` directory it is possible to merge two or more clusters into one single cluster. If anything goes wrong at any point the script suggests you rollback steps you've to perform *manually* if there are any. The point of no return is when the master daemon is started the first time after merging the configuration files. A rollback at this point would involve a lot of manual work. For the internal design of this tool have a look at the `Automated Ganeti Cluster Merger ` document. Merge Clusters ============== The tool has to be invoked on the cluster you like to merge the other clusters into. The usage of ``cluster-merge`` is as follows:: cluster-merge [--debug|--verbose] [--watcher-pause-period SECONDS] \ [--groups [merge|rename]] [] You can provide multiple clusters. The tool will then go over every cluster in serial and perform the steps to merge it into the invoking cluster. These options can be used to control the behaviour of the tool: ``--debug``/``--verbose`` These options are mutually exclusive and increase the level of output to either debug output or just more verbose output like action performed right now. ``--watcher-pause-period`` Define the period of time in seconds the watcher shall be disabled, default is 1800 seconds (30 minutes). ``--groups`` This option controls how ``cluster-merge`` handles duplicate node group names on the merging clusters. If ``merge`` is specified then all node groups with the same name will be merged into one. If ``rename`` is specified, then conflicting node groups on the remove clusters will have their cluster name appended to the group name. If this option is not speicifed, then ``cluster-merge`` will refuse to continue if it finds conflicting group names, otherwise it will proceed as normal. Rollback ======== If for any reason something in the merge doesn't work the way it should ``cluster-merge`` will abort, provide an error message and optionally rollback steps. Please be aware that after a certain point there's no easy way to rollback the cluster to its previous state. If you've reached that point the tool will not provide any rollback steps. If you end up with rollback steps, please perform them before invoking the tool again. It doesn't keep state over invokations. .. vim: set textwidth=72 : .. Local Variables: .. mode: rst .. fill-column: 72 .. End: ganeti-2.9.3/doc/html/0000755000000000000000000000000012271443673014514 5ustar00rootroot00000000000000ganeti-2.9.3/doc/html/design-multi-reloc.html0000644000000000000000000003200312271443666021105 0ustar00rootroot00000000000000 Moving instances accross node groups — Ganeti 2.9.3 documentation

Moving instances accross node groups¶

This design document explains the changes needed in Ganeti to perform instance moves across node groups. Reader familiarity with the following existing documents is advised:

Motivation and and design proposal¶

At the moment, moving instances away from their primary or secondary nodes with the relocate and multi-evacuate IAllocator calls restricts target nodes to those on the same node group. This ensures a mobility domain is never crossed, and allows normal operation of each node group to be confined within itself.

It is desirable, however, to have a way of moving instances across node groups so that, for example, it is possible to move a set of instances to another group for policy reasons, or completely empty a given group to perform maintenance operations.

To implement this, we propose the addition of new IAllocator calls to compute inter-group instance moves and group-aware node evacuation, taking into account mobility domains as appropriate. The interface proposed below should be enough to cover the use cases mentioned above.

With the implementation of this design proposal, the previous multi-evacuate mode will be deprecated.

Detailed design¶

All requests honor the groups’ alloc_policy attribute.

Changing instance’s groups¶

Takes a list of instances and a list of node group UUIDs; the instances will be moved away from their current group, to any of the groups in the target list. All instances need to have their primary node in the same group, which may not be a target group. If the target group list is empty, the request is simply “change group” and the instances are placed in any group but their original one.

Node evacuation¶

Evacuates instances off their primary nodes. The evacuation mode can be given as primary-only, secondary-only or all. The call is given a list of instances whose primary nodes need to be in the same node group. The returned nodes need to be in the same group as the original primary node.

Result¶

In all storage models, an inter-group move can be modeled as a sequence of replace secondary, migration and failover operations (when shared storage is used, they will all be failover or migration operations within the corresponding mobility domain).

The result of the operations described above must contain two lists of instances and a list of jobs (each of which is a list of serialized opcodes) to actually execute the operation. Job dependencies can be used to force jobs to run in a certain order while still making use of parallelism.

The two lists of instances describe which instances could be moved/migrated and which couldn’t for some reason (“unsuccessful”). The union of the instances in the two lists must be equal to the set of instances given in the original request. The successful list of instances contains elements as follows:

(instance name, target group name, [chosen node names])

The choice of names is simply for readability reasons (for example, Ganeti could log the computed solution in the job information) and for being able to check (manually) for consistency that the generated opcodes match the intended target groups/nodes. Note that for the node-evacuate operation, the group is not changed, but it should still be returned as such (as it’s easier to have the same return type for both operations).

The unsuccessful list of instances contains elements as follows:

(instance name, explanation)

where explanation is a string describing why the plugin was not able to relocate the instance.

The client is given a list of job IDs (see the design for LU-generated jobs) which it can watch. Failures should be reported to the user.

Example job list:

[
  # First job
  [
    { "OP_ID": "OP_INSTANCE_MIGRATE",
      "instance_name": "inst1.example.com",
    },
    { "OP_ID": "OP_INSTANCE_MIGRATE",
      "instance_name": "inst2.example.com",
    },
  ],
  # Second job
  [
    { "OP_ID": "OP_INSTANCE_REPLACE_DISKS",
      "depends": [
        [-1, ["success"]],
        ],
      "instance_name": "inst2.example.com",
      "mode": "replace_new_secondary",
      "remote_node": "node4.example.com",
    },
  ],
  # Third job
  [
    { "OP_ID": "OP_INSTANCE_FAILOVER",
      "depends": [
        [-2, []],
        ],
      "instance_name": "inst8.example.com",
    },
  ],
]

Accepted opcodes:

  • OP_INSTANCE_FAILOVER
  • OP_INSTANCE_MIGRATE
  • OP_INSTANCE_REPLACE_DISKS

Table Of Contents

Previous topic

Ganeti monitoring agent

Next topic

Network management

This Page

ganeti-2.9.3/doc/html/design-virtual-clusters.html0000644000000000000000000004520212271443670022201 0ustar00rootroot00000000000000 Design for virtual clusters support — Ganeti 2.9.3 documentation

Design for virtual clusters support¶

Introduction¶

Currently there are two ways to test the Ganeti (including HTools) code base:

  • unittests, which run using mocks as normal user and test small bits of the code
  • QA/burnin/live-test, which require actual hardware (either physical or virtual) and will build an actual cluster, with one machine to one node correspondence

The difference in time between these two is significant:

  • the unittests run in about 1-2 minutes
  • a so-called ‘quick’ QA (without burnin) runs in about an hour, and a full QA could be double that time

On one hand, the unittests have a clear advantage: quick to run, not requiring many machines, but on the other hand QA is actually able to run end-to-end tests (including HTools, for example).

Ideally, we would have an intermediate step between these two extremes: be able to test most, if not all, of Ganeti’s functionality but without requiring actual hardware, full machine ownership or root access.

Current situation¶

Ganeti¶

It is possible, given a manually built config.data and _autoconf.py, to run the masterd under the current user as a single-node cluster master. However, the node daemon and related functionality (cluster initialisation, master failover, etc.) are not directly runnable in this model.

Also, masterd only works as a master of a single node cluster, due to our current “hostname†method of identifying nodes, which results in a limit of maximum one node daemon per machine, unless we use multiple name and IP aliases.

HTools¶

In HTools the situation is better, since it doesn’t have to deal with actual machine management: all tools can use a custom LUXI path, and can even load RAPI data from the filesystem (so the RAPI backend can be tested), and both the ‘text’ backend for hbal/hspace and the input files for hail are text-based, loaded from the file-system.

Proposed changes¶

The end-goal is to have full support for “virtual clustersâ€, i.e. be able to run a “big†(hundreds of virtual nodes and towards thousands of virtual instances) on a reasonably powerful, but single machine, under a single user account and without any special privileges.

This would have significant advantages:

  • being able to test end-to-end certain changes, without requiring a complicated setup
  • better able to estimate Ganeti’s behaviour and performance as the cluster size grows; this is something that we haven’t been able to test reliably yet, and as such we still have not yet diagnosed scaling problems
  • easier integration with external tools (and even with HTools)

masterd¶

As described above, masterd already works reasonably well in a virtual setup, as it won’t execute external programs and it shouldn’t directly read files from the local filesystem (or at least not virtualisation-related, as the master node can be a non-vm_capable node).

noded¶

The node daemon executes many privileged operations, but they can be split in a few general categories:

Category Description Solution
disk operations Disk creation and removal Use only diskless or file-based instances
disk query Node disk total/free, used in node listing and htools Not supported currently, could use file-based
hypervisor operations Instance start, stop and query Use the fake hypervisor
instance networking Bridge existence query Unprivileged operation, can be used with an existing bridge at system level or use NIC-less instances
instance OS operations OS add, OS rename, export and import Only used with non-diskless instances; could work with custom OS scripts that just dd without mounting filesystems
node networking IP address management (master ip), IP query, etc. Not supported; Ganeti will need to work without a master IP; for the IP query operations the test machine would need externally-configured IPs
node add
SSH command must be adjusted
node setup ssh, /etc/hosts, so on Can already be disabled from the cluster config
master failover start/stop the master daemon Doable (as long as we use a single user), might get tricky w.r.t. paths to executables
file upload Uploading of system files, job queue files and ganeti config The only issue could be with system files, which are not owned by the current user; internal ganeti files should be working fine
node oob Out-of-band commands Since these are user-defined, we can mock them easily
node OS discovery List the existing OSes and their properties No special privileges needed, so works fine as-is
hooks Running hooks for given operations No special privileges needed
iallocator Calling an iallocator script No special privileges needed
export/import Exporting and importing instances When exporting/importing file-based instances, this should work, as the listening ports are dynamically chosen
hypervisor validation The validation of hypervisor parameters As long as the hypervisors don’t call to privileged commands, it should work
node powercycle The ability to power cycle a node remotely Privileged, so not supported, but anyway not very interesting for testing

It seems that much of the functionality works as is, or could work with small adjustments, even in a non-privileged setup. The bigger problem is the actual use of multiple node daemons per machine.

Multiple noded per machine¶

Currently Ganeti identifies node simply by their hostname. Since changing this method would imply significant changes to tracking the nodes, the proposal is to simply have as many IPs per the (single) machine that is used for tests as nodes, and have each IP correspond to a different name, and thus no changes are needed to the core RPC library. Unfortunately this has the downside of requiring root rights for setting up the extra IPs and hostnames.

An alternative option is to implement per-node IP/port support in Ganeti (especially in the RPC layer), which would eliminate the root rights. We expect that this will get implemented as a second step of this design, but as the port is currently static will require changes in many places.

The only remaining problem is with sharing the localstatedir structure (lib, run, log) amongst the daemons, for which we propose to introduce an environment variable (GANETI_ROOTDIR) acting as a prefix for essentially all paths. An environment variable is easier to transport through several levels of programs (shell scripts, Python, etc.) than a command line parameter. In Python code this prefix will be applied to all paths in constants.py. Every virtual node will get its own root directory. The rationale for this is two-fold:

  • having two or more node daemons writing to the same directory might introduce artificial scenarios not existent in real life; currently noded either owns the entire /var/lib/ganeti directory or shares it with masterd, but never with another noded
  • having separate directories allows cluster verify to check correctly consistency of file upload operations; otherwise, as long as one node daemon wrote a file successfully, the results from all others are “lostâ€

In case the use of an environment variable turns out to be too difficult a compile-time prefix path could be used. This would then require one Ganeti installation per virtual node, but it might be good enough.

rapi¶

The RAPI daemon is not privileged and furthermore we only need one per cluster, so it presents no issues.

confd¶

confd has somewhat the same issues as the node daemon regarding multiple daemons per machine, but the per-address binding still works.

ganeti-watcher¶

Since the startup of daemons will be customised with per-IP binds, the watcher either has to be modified to not activate the daemons, or the start-stop tool has to take this into account. Due to watcher’s use of the hostname, it’s recommended that the master node is set to the machine hostname (also a requirement for the master daemon).

CLI scripts¶

As long as the master node is set to the machine hostname, these should work fine.

Cluster initialisation¶

It could be possible that the cluster initialisation procedure is a bit more involved (this was not tried yet). A script will be used to set up all necessary IP addresses and hostnames, as well as creating the initial directory structure. Building config.data manually should not be necessary.

Needed tools¶

With the above investigation results in mind, the only thing we need are:

  • a tool to setup per-virtual node tree structure of localstatedir (with the help of ensure-dirs) and setup correctly the extra IP/hostnames
  • changes to the startup daemon tools to launch correctly the daemons per virtual node
  • changes to constants.py to override the localstatedir path
  • documentation for running such a virtual cluster
  • and eventual small fixes to the node daemon backend functionality, to better separate privileged and non-privileged code
ganeti-2.9.3/doc/html/install.html0000644000000000000000000016611012271443671017053 0ustar00rootroot00000000000000 Ganeti installation tutorial — Ganeti 2.9.3 documentation

Ganeti installation tutorial¶

Documents Ganeti version 2.9

Introduction¶

Ganeti is a cluster virtualization management system based on Xen or KVM. This document explains how to bootstrap a Ganeti node (Xen dom0, the host Linux system for KVM), create a running cluster and install virtual instances (Xen domUs, KVM guests). You need to repeat most of the steps in this document for every node you want to install, but of course we recommend creating some semi-automatic procedure if you plan to deploy Ganeti on a medium/large scale.

A basic Ganeti terminology glossary is provided in the introductory section of the Ganeti administrator’s guide. Please refer to that document if you are uncertain about the terms we are using.

Ganeti has been developed for Linux and should be distribution-agnostic. This documentation will use Debian Squeeze as an example system but the examples can be translated to any other distribution. You are expected to be familiar with your distribution, its package management system, and Xen or KVM before trying to use Ganeti.

This document is divided into two main sections:

  • Installation of the base system and base components
  • Configuration of the environment for Ganeti

Each of these is divided into sub-sections. While a full Ganeti system will need all of the steps specified, some are not strictly required for every environment. Which ones they are, and why, is specified in the corresponding sections.

Installing the base system and base components¶

Hardware requirements¶

Any system supported by your Linux distribution is fine. 64-bit systems are better as they can support more memory.

Any disk drive recognized by Linux (IDE/SCSI/SATA/etc.) is supported in Ganeti. Note that no shared storage (e.g. SAN) is needed to get high-availability features (but of course, one can be used to store the images). Whilte it is highly recommended to use more than one disk drive in order to improve speed, Ganeti also works with one disk per machine.

Installing the base system¶

Mandatory on all nodes.

It is advised to start with a clean, minimal install of the operating system. The only requirement you need to be aware of at this stage is to partition leaving enough space for a big (minimum 20GiB) LVM volume group which will then host your instance filesystems, if you want to use all Ganeti features. The volume group name Ganeti uses (by default) is xenvg.

You can also use file-based storage only, without LVM, but this setup is not detailed in this document.

If you choose to use RBD-based instances, there’s no need for LVM provisioning. However, this feature is experimental, and is not yet recommended for production clusters.

While you can use an existing system, please note that the Ganeti installation is intrusive in terms of changes to the system configuration, and it’s best to use a newly-installed system without important data on it.

Also, for best results, it’s advised that the nodes have as much as possible the same hardware and software configuration. This will make administration much easier.

Hostname issues¶

Note that Ganeti requires the hostnames of the systems (i.e. what the hostname command outputs to be a fully-qualified name, not a short name. In other words, you should use node1.example.com as a hostname and not just node1.

Debian

Debian usually configures the hostname differently than you need it for Ganeti. For example, this is what it puts in /etc/hosts in certain situations:

127.0.0.1       localhost
127.0.1.1       node1.example.com node1

but for Ganeti you need to have:

127.0.0.1       localhost
192.0.2.1       node1.example.com node1

replacing 192.0.2.1 with your node’s address. Also, the file /etc/hostname which configures the hostname of the system should contain node1.example.com and not just node1 (you need to run the command /etc/init.d/hostname.sh start after changing the file).

Why a fully qualified host name

Although most distributions use only the short name in the /etc/hostname file, we still think Ganeti nodes should use the full name. The reason for this is that calling ‘hostname –fqdn’ requires the resolver library to work and is a ‘guess’ via heuristics at what is your domain name. Since Ganeti can be used among other things to host DNS servers, we don’t want to depend on them as much as possible, and we’d rather have the uname() syscall return the full node name.

We haven’t ever found any breakage in using a full hostname on a Linux system, and anyway we recommend to have only a minimal installation on Ganeti nodes, and to use instances (or other dedicated machines) to run the rest of your network services. By doing this you can change the /etc/hostname file to contain an FQDN without the fear of breaking anything unrelated.

Installing The Hypervisor¶

Mandatory on all nodes.

While Ganeti is developed with the ability to modularly run on different virtualization environments in mind the only two currently useable on a live system are Xen and KVM. Supported Xen versions are: 3.0.3 and later 3.x versions, and 4.x (tested up to 4.1). Supported KVM versions are 72 and above.

Please follow your distribution’s recommended way to install and set up Xen, or install Xen from the upstream source, if you wish, following their manual. For KVM, make sure you have a KVM-enabled kernel and the KVM tools.

After installing Xen, you need to reboot into your new system. On some distributions this might involve configuring GRUB appropriately, whereas others will configure it automatically when you install the respective kernels. For KVM no reboot should be necessary.

Xen on Debian

Under Debian you can install the relevant xen-linux-system package, which will pull in both the hypervisor and the relevant kernel. Also, if you are installing a 32-bit system, you should install the libc6-xen package (run apt-get install libc6-xen).

Xen settings¶

It’s recommended that dom0 is restricted to a low amount of memory (512MiB or 1GiB is reasonable) and that memory ballooning is disabled in the file /etc/xen/xend-config.sxp by setting the value dom0-min-mem to 0, like this:

(dom0-min-mem 0)

For optimum performance when running both CPU and I/O intensive instances, it’s also recommended that the dom0 is restricted to one CPU only. For example you can add dom0_max_vcpus=1,dom0_vcpus_pin to your kernels boot command line and set dom0-cpus in /etc/xen/xend-config.sxp like this:

(dom0-cpus 1)

It is recommended that you disable xen’s automatic save of virtual machines at system shutdown and subsequent restore of them at reboot. To obtain this make sure the variable XENDOMAINS_SAVE in the file /etc/default/xendomains is set to an empty value.

If you want to use live migration make sure you have, in the xen config file, something that allows the nodes to migrate instances between each other. For example:

(xend-relocation-server yes)
(xend-relocation-port 8002)
(xend-relocation-address '')
(xend-relocation-hosts-allow '^192\\.0\\.2\\.[0-9]+$')

The second line assumes that the hypervisor parameter migration_port is set 8002, otherwise modify it to match. The last line assumes that all your nodes have secondary IPs in the 192.0.2.0/24 network, adjust it accordingly to your setup.

If you want to run HVM instances too with Ganeti and want VNC access to the console of your instances, set the following two entries in /etc/xen/xend-config.sxp:

(vnc-listen '0.0.0.0') (vncpasswd '')

You need to restart the Xen daemon for these settings to take effect:

$ /etc/init.d/xend restart

Selecting the instance kernel¶

After you have installed Xen, you need to tell Ganeti exactly what kernel to use for the instances it will create. This is done by creating a symlink from your actual kernel to /boot/vmlinuz-3-xenU, and one from your initrd to /boot/initrd-3-xenU [1]. Note that if you don’t use an initrd for the domU kernel, you don’t need to create the initrd symlink.

Debian

After installation of the xen-linux-system package, you need to run (replace the exact version number with the one you have):

$ cd /boot
$ ln -s vmlinuz-2.6.26-1-xen-amd64 vmlinuz-3-xenU
$ ln -s initrd.img-2.6.26-1-xen-amd64 initrd-3-xenU

By default, the initrd doesn’t contain the Xen block drivers needed to mount the root device, so it is recommended to update the initrd by following these two steps:

  • edit /etc/initramfs-tools/modules and add xen_blkfront
  • run update-initramfs -u

Installing DRBD¶

Recommended on all nodes: DRBD is required if you want to use the high availability (HA) features of Ganeti, but optional if you don’t require them or only run Ganeti on single-node clusters. You can upgrade a non-HA cluster to an HA one later, but you might need to convert all your instances to DRBD to take advantage of the new features.

Supported DRBD versions: 8.0-8.3. It’s recommended to have at least version 8.0.12. Note that for version 8.2 and newer it is needed to pass the usermode_helper=/bin/true parameter to the module, either by configuring /etc/modules or when inserting it manually.

Now the bad news: unless your distribution already provides it installing DRBD might involve recompiling your kernel or anyway fiddling with it. Hopefully at least the Xen-ified kernel source to start from will be provided (if you intend to use Xen).

The good news is that you don’t need to configure DRBD at all. Ganeti will do it for you for every instance you set up. If you have the DRBD utils installed and the module in your kernel you’re fine. Please check that your system is configured to load the module at every boot, and that it passes the following option to the module: minor_count=NUMBER. We recommend that you use 128 as the value of the minor_count - this will allow you to use up to 64 instances in total per node (both primary and secondary, when using only one disk per instance). You can increase the number up to 255 if you need more instances on a node.

Debian

On Debian, you can just install (build) the DRBD module with the following commands, making sure you are running the target (Xen or KVM) kernel:

$ apt-get install drbd8-source drbd8-utils
$ m-a update
$ m-a a-i drbd8

Or on newer versions, if the kernel already has modules:

$ apt-get install drbd8-utils

Then to configure it for Ganeti:

$ echo drbd minor_count=128 usermode_helper=/bin/true >> /etc/modules
$ depmod -a
$ modprobe drbd minor_count=128 usermode_helper=/bin/true

It is also recommended that you comment out the default resources (if any) in the /etc/drbd.conf file, so that the init script doesn’t try to configure any drbd devices. You can do this by prefixing all resource lines in the file with the keyword skip, like this:

skip {
  resource r0 {
    ...
  }
}

skip {
  resource "r1" {
    ...
  }
}

Installing RBD¶

Recommended on all nodes: RBD is required if you want to create instances with RBD disks residing inside a RADOS cluster (make use of the rbd disk template). RBD-based instances can failover or migrate to any other node in the ganeti cluster, enabling you to exploit of all Ganeti’s high availabilily (HA) features.

Attention

Be careful though: rbd is still experimental! For now it is recommended only for testing purposes. No sensitive data should be stored there.

You will need the rbd and libceph kernel modules, the RBD/Ceph userspace utils (ceph-common Debian package) and an appropriate Ceph/RADOS configuration file on every VM-capable node.

You will also need a working RADOS Cluster accessible by the above nodes.

RADOS Cluster¶

You will need a working RADOS Cluster accesible by all VM-capable nodes to use the RBD template. For more information on setting up a RADOS Cluster, refer to the official docs.

If you want to use a pool for storing RBD disk images other than the default (rbd), you should first create the pool in the RADOS Cluster, and then set the corresponding rbd disk parameter named pool.

Kernel Modules¶

Unless your distribution already provides it, you might need to compile the rbd and libceph modules from source. You will need Linux Kernel 3.2 or above for the kernel modules. Alternatively you will have to build them as external modules (from Linux Kernel source 3.2 or above), if you want to run a less recent kernel, or your kernel doesn’t include them.

Userspace Utils¶

The RBD template has been tested with ceph-common v0.38 and above. We recommend using the latest version of ceph-common.

Debian

On Debian, you can just install the RBD/Ceph userspace utils with the following command:

$ apt-get install ceph-common

Configuration file¶

You should also provide an appropriate configuration file (ceph.conf) in /etc/ceph. For the rbd userspace utils, you’ll only need to specify the IP addresses of the RADOS Cluster monitors.

ceph.conf

Sample configuration file:

[mon.a]
       host = example_monitor_host1
       mon addr = 1.2.3.4:6789
[mon.b]
       host = example_monitor_host2
       mon addr = 1.2.3.5:6789
[mon.c]
       host = example_monitor_host3
       mon addr = 1.2.3.6:6789

For more information, please see the Ceph Docs

Other required software¶

Please install all software requirements mentioned in Ganeti quick installation guide. If you want to build Ganeti from source, don’t forget to follow the steps required for that as well.

Setting up the environment for Ganeti¶

Configuring the network¶

Mandatory on all nodes.

You can run Ganeti either in “bridged mode”, “routed mode” or “openvswitch mode”. In bridged mode, the default, the instances network interfaces will be attached to a software bridge running in dom0. Xen by default creates such a bridge at startup, but your distribution might have a different way to do things, and you’ll definitely need to manually set it up under KVM.

Beware that the default name Ganeti uses is xen-br0 (which was used in Xen 2.0) while Xen 3.0 uses xenbr0 by default. See the Initializing the cluster section to learn how to choose a different bridge, or not to use one at all and use “routed mode”.

In order to use “routed mode” under Xen, you’ll need to change the relevant parameters in the Xen config file. Under KVM instead, no config change is necessary, but you still need to set up your network interfaces correctly.

By default, under KVM, the “link” parameter you specify per-nic will represent, if non-empty, a different routing table name or number to use for your instances. This allows isolation between different instance groups, and different routing policies between node traffic and instance traffic.

You will need to configure your routing table basic routes and rules outside of ganeti. The vif scripts will only add /32 routes to your instances, through their interface, in the table you specified (under KVM, and in the main table under Xen).

Also for “openvswitch mode” under Xen a custom network script is needed. Under KVM everything should work, but you’ll need to configure your switches outside of Ganeti (as for bridges).

Bridging issues with certain kernels

Some kernel versions (e.g. 2.6.32) have an issue where the bridge will automatically change its MAC address to the lower-numbered slave on port addition and removal. This means that, depending on the MAC address of the actual NIC on the node and the addresses of the instances, it could be that starting, stopping or migrating instances will lead to timeouts due to the address of the bridge (and thus node itself) changing.

To prevent this, it’s enough to set the bridge manually to a specific MAC address, which will disable this automatic address change. In Debian, this can be done as follows in the bridge configuration snippet:

up ip link set addr $(cat /sys/class/net/$IFACE/address) dev $IFACE

which will “set” the bridge address to the initial one, disallowing changes.

Bridging under Debian

The recommended way to configure the Xen bridge is to edit your /etc/network/interfaces file and substitute your normal Ethernet stanza with the following snippet:

auto xen-br0
iface xen-br0 inet static
   address YOUR_IP_ADDRESS
   netmask YOUR_NETMASK
   network YOUR_NETWORK
   broadcast YOUR_BROADCAST_ADDRESS
   gateway YOUR_GATEWAY
   bridge_ports eth0
   bridge_stp off
   bridge_fd 0
   # example for setting manually the bridge address to the eth0 NIC
   up ip link set addr $(cat /sys/class/net/eth0/address) dev $IFACE

The following commands need to be executed on the local console:

$ ifdown eth0
$ ifup xen-br0

To check if the bridge is setup, use the ip and brctl show commands:

$ ip a show xen-br0
9: xen-br0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc noqueue
    link/ether 00:20:fc:1e:d5:5d brd ff:ff:ff:ff:ff:ff
    inet 10.1.1.200/24 brd 10.1.1.255 scope global xen-br0
    inet6 fe80::220:fcff:fe1e:d55d/64 scope link
       valid_lft forever preferred_lft forever

$ brctl show xen-br0
bridge name     bridge id               STP enabled     interfaces
xen-br0         8000.0020fc1ed55d       no              eth0

In order to have a custom and more advanced networking configuration in Xen which can vary among instances, after having successfully installed Ganeti you have to create a symbolic link to the vif-script provided by Ganeti inside /etc/xen/scripts (assuming you installed Ganeti under /usr/lib):

$ ln -s /usr/lib/ganeti/vif-ganeti /etc/xen/scripts/vif-ganeti

This has to be done on all nodes. Afterwards you can set the vif_script hypervisor parameter to point to that script by:

$ gnt-cluster modify -H xen-pvm:vif_script=/etc/xen/scripts/vif-ganeti

Having this hypervisor parameter you are able to create your own scripts and create instances with different networking configurations.

Configuring LVM¶

Mandatory on all nodes.

The volume group is required to be at least 20GiB.

If you haven’t configured your LVM volume group at install time you need to do it before trying to initialize the Ganeti cluster. This is done by formatting the devices/partitions you want to use for it and then adding them to the relevant volume group:

$ pvcreate /dev/sda3
$ vgcreate xenvg /dev/sda3

or:

$ pvcreate /dev/sdb1
$ pvcreate /dev/sdc1
$ vgcreate xenvg /dev/sdb1 /dev/sdc1

If you want to add a device later you can do so with the vgextend command:

$ pvcreate /dev/sdd1
$ vgextend xenvg /dev/sdd1

Optional: it is recommended to configure LVM not to scan the DRBD devices for physical volumes. This can be accomplished by editing /etc/lvm/lvm.conf and adding the /dev/drbd[0-9]+ regular expression to the filter variable, like this:

filter = ["r|/dev/cdrom|", "r|/dev/drbd[0-9]+|" ]

Note that with Ganeti a helper script is provided - lvmstrap which will erase and configure as LVM any not in-use disk on your system. This is dangerous and it’s recommended to read its --help output if you want to use it.

Installing Ganeti¶

Mandatory on all nodes.

It’s now time to install the Ganeti software itself. Download the source from the project page at http://code.google.com/p/ganeti/, and install it (replace 2.6.0 with the latest version):

$ tar xvzf ganeti-2.6.0.tar.gz
$ cd ganeti-2.6.0
$ ./configure --localstatedir=/var --sysconfdir=/etc
$ make
$ make install
$ mkdir /srv/ganeti/ /srv/ganeti/os /srv/ganeti/export

You also need to copy the file doc/examples/ganeti.initd from the source archive to /etc/init.d/ganeti and register it with your distribution’s startup scripts, for example in Debian:

$ chmod +x /etc/init.d/ganeti
$ update-rc.d ganeti defaults 20 80

In order to automatically restart failed instances, you need to setup a cron job run the ganeti-watcher command. A sample cron file is provided in the source at doc/examples/ganeti.cron and you can copy that (eventually altering the path) to /etc/cron.d/ganeti. Finally, a sample logrotate snippet is provided in the source at doc/examples/ganeti.logrotate and you can copy it to /etc/logrotate.d/ganeti to have Ganeti’s logs rotated automatically.

What gets installed¶

The above make install invocation, or installing via your distribution mechanisms, will install on the system:

  • a set of python libraries under the ganeti namespace (depending on the python version this can be located in either lib/python-$ver/site-packages or various other locations)
  • a set of programs under /usr/local/sbin or /usr/sbin
  • if the htools component was enabled, a set of programs under /usr/local/bin or /usr/bin/
  • man pages for the above programs
  • a set of tools under the lib/ganeti/tools directory
  • an example iallocator script (see the admin guide for details) under lib/ganeti/iallocators
  • a cron job that is needed for cluster maintenance
  • an init script for automatic startup of Ganeti daemons
  • provided but not installed automatically by make install is a bash completion script that hopefully will ease working with the many cluster commands

Installing the Operating System support packages¶

Mandatory on all nodes.

To be able to install instances you need to have an Operating System installation script. An example OS that works under Debian and can install Debian and Ubuntu instace OSes is provided on the project web site. Download it from the project page and follow the instructions in the README file. Here is the installation procedure (replace 0.12 with the latest version that is compatible with your ganeti version):

$ cd /usr/local/src/
$ wget http://ganeti.googlecode.com/files/ganeti-instance-debootstrap-0.12.tar.gz
$ tar xzf ganeti-instance-debootstrap-0.12.tar.gz
$ cd ganeti-instance-debootstrap-0.12
$ ./configure --with-os-dir=/srv/ganeti/os
$ make
$ make install

In order to use this OS definition, you need to have internet access from your nodes and have the debootstrap, dump and restore commands installed on all nodes. Also, if the OS is configured to partition the instance’s disk in /etc/default/ganeti-instance-debootstrap, you will need kpartx installed.

Debian

Use this command on all nodes to install the required packages:

$ apt-get install debootstrap dump kpartx

Or alternatively install the OS definition from the Debian package:

$ apt-get install ganeti-instance-debootstrap

KVM

In order for debootstrap instances to be able to shutdown cleanly they must install have basic ACPI support inside the instance. Which packages are needed depend on the exact flavor of Debian or Ubuntu which you’re installing, but the example defaults file has a commented out configuration line that works for Debian Lenny and Squeeze:

EXTRA_PKGS="acpi-support-base,console-tools,udev"

kbd can be used instead of console-tools, and more packages can be added, of course, if needed.

Please refer to the README file of ganeti-instance-debootstrap for further documentation.

Alternatively, you can create your own OS definitions. See the manpage ganeti-os-interface(7).

Initializing the cluster¶

Mandatory once per cluster, on the first node.

The last step is to initialize the cluster. After you have repeated the above process on all of your nodes and choose one as the master. Make sure there is a SSH key pair on the master node (optionally generating one using ssh-keygen). Finally execute:

$ gnt-cluster init CLUSTERNAME

The CLUSTERNAME is a hostname, which must be resolvable (e.g. it must exist in DNS or in /etc/hosts) by all the nodes in the cluster. You must choose a name different from any of the nodes names for a multi-node cluster. In general the best choice is to have a unique name for a cluster, even if it consists of only one machine, as you will be able to expand it later without any problems. Please note that the hostname used for this must resolve to an IP address reserved exclusively for this purpose, and cannot be the name of the first (master) node.

If you want to use a bridge which is not xen-br0, or no bridge at all, change it with the --nic-parameters option. For example to bridge on br0 you can add:

--nic-parameters link=br0

Or to not bridge at all, and use a separate routing table:

--nic-parameters mode=routed,link=100

If you don’t have a xen-br0 interface you also have to specify a different network interface which will get the cluster IP, on the master node, by using the --master-netdev <device> option.

You can use a different name than xenvg for the volume group (but note that the name must be identical on all nodes). In this case you need to specify it by passing the –vg-name <VGNAME> option to gnt-cluster init.

To set up the cluster as an Xen HVM cluster, use the --enabled-hypervisors=xen-hvm option to enable the HVM hypervisor (you can also add ,xen-pvm to enable the PVM one too). You will also need to create the VNC cluster password file /etc/ganeti/vnc-cluster-password which contains one line with the default VNC password for the cluster.

To setup the cluster for KVM-only usage (KVM and Xen cannot be mixed), pass --enabled-hypervisors=kvm to the init command.

You can also invoke the command with the --help option in order to see all the possibilities.

Hypervisor/Network/Cluster parameters¶

Please note that the default hypervisor/network/cluster parameters may not be the correct one for your environment. Carefully check them, and change them either at cluster init time, or later with gnt-cluster modify.

Your instance types, networking environment, hypervisor type and version may all affect what kind of parameters should be used on your cluster.

KVM

Instances are by default configured to use a host kernel, and to be reached via serial console, which works nice for Linux paravirtualized instances. If you want fully virtualized instances you may want to handle their kernel inside the instance, and to use VNC.

Some versions of KVM have a bug that will make an instance hang when configured to use the serial console (which is the default) unless a connection is made to it within about 2 seconds of the instance’s startup. For such case it’s recommended to disable the serial_console option.

Joining the nodes to the cluster¶

Mandatory for all the other nodes.

After you have initialized your cluster you need to join the other nodes to it. You can do so by executing the following command on the master node:

$ gnt-node add NODENAME

Separate replication network¶

Optional

Ganeti uses DRBD to mirror the disk of the virtual instances between nodes. To use a dedicated network interface for this (in order to improve performance or to enhance security) you need to configure an additional interface for each node. Use the -s option with gnt-cluster init and gnt-node add to specify the IP address of this secondary interface to use for each node. Note that if you specified this option at cluster setup time, you must afterwards use it for every node add operation.

Testing the setup¶

Execute the gnt-node list command to see all nodes in the cluster:

$ gnt-node list
Node              DTotal  DFree MTotal MNode MFree Pinst Sinst
node1.example.com 197404 197404   2047  1896   125     0     0

The above shows a couple of things:

  • The various Ganeti daemons can talk to each other
  • Ganeti can examine the storage of the node (DTotal/DFree)
  • Ganeti can talk to the selected hypervisor (MTotal/MNode/MFree)

Cluster burnin¶

With Ganeti a tool called burnin is provided that can test most of the Ganeti functionality. The tool is installed under the lib/ganeti/tools directory (either under /usr or /usr/local based on the installation method). See more details under burnin.

Further steps¶

You can now proceed either to the Ganeti administrator’s guide, or read the manpages of the various commands (ganeti(7), gnt-cluster(8), gnt-node(8), gnt-instance(8), gnt-job(8)).

Footnotes

[1]The kernel and initrd paths can be changed at either cluster level (which changes the default for all instances) or at instance level.
ganeti-2.9.3/doc/html/move-instance.html0000644000000000000000000002774212271443671020164 0ustar00rootroot00000000000000 Moving instances between clusters — Ganeti 2.9.3 documentation

Moving instances between clusters¶

Starting with Ganeti 2.2, instances can be moved between separate Ganeti clusters using a new tool, move-instance. The tool has a number of features:

  • Moving a single or multiple instances
  • Moving instances in parallel (--parallel option)
  • Renaming instance (only when moving a single instance)
  • SSL certificate verification for RAPI connections

The design of the inter-cluster instances moves is described in detail in the Ganeti 2.2 design document. The instance move tool talks to the Ganeti clusters via RAPI and can run on any machine which can connect to the cluster’s RAPI. Despite their similar name, the instance move tool should not be confused with the gnt-instance move command, which is used to move without changes (instead of export/import plus rename) an instance within the cluster.

Configuring clusters for instance moves¶

To prevent third parties from accessing the instance data, all data exchanged between the clusters is signed using a secret key, the “cluster domain secret”. It is recommended to assign the same domain secret to all clusters of the same security domain, so that instances can be easily moved between them. By checking the signatures, the destination cluster can be sure the third party (e.g. this tool) didn’t modify the received crypto keys and connection information.

To create a new, random cluster domain secret, run the following command on the master node:

$ gnt-cluster renew-crypto --new-cluster-domain-secret

To read and set the cluster domain secret from the contents of a file, run the following command on the master node:

$ gnt-cluster renew-crypto --cluster-domain-secret=/.../ganeti.cds

More information about the renew-crypto command can be found in gnt-cluster(8).

Moving instances¶

As soon as the clusters share a cluster domain secret, instances can be moved. The tool usage is as follows:

$ move-instance [options] source-cluster destination-cluster instance-name...

Multiple instances can be moved with one invocation of the instance move tool, though a few options are only available when moving a single instance.

The most important options are listed below. Unless specified otherwise, destination-related options default to the source value (e.g. setting --src-rapi-port=1234 will make --dest-rapi-port‘s default 1234).

--src-rapi-port/--dest-rapi-port
RAPI server TCP port, defaults to 5080.
--src-ca-file/--dest-ca-file
Path to file containing source cluster Certificate Authority (CA) in PEM format. For self-signed certificates, this is the certificate itself (see more details below in Certificates). For certificates signed by a third party CA, the complete chain must be in the file (see documentation for SSL_CTX_load_verify_locations(3)).
--src-username/--dest-username
RAPI username, must have write access to cluster.
--src-password-file/--dest-password-file
Path to file containing RAPI password (make sure to restrict access to this file).
--dest-instance-name
When moving a single instance: Change name of instance on destination cluster.
--dest-primary-node
When moving a single instance: Primary node on destination cluster.
--dest-secondary-node
When moving a single instance: Secondary node on destination cluster.
--iallocator
Iallocator for creating instance on destination cluster.
--hypervisor-parameters/--backend-parameters/--os-parameters/--net
When moving a single instance: Override instances’ parameters.
--parallel
Number of instance moves to run in parallel.
--verbose/--debug
Increase output verbosity.

The exit value of the tool is zero if and only if all instance moves were successful.

Certificates¶

If using certificates signed by a CA, then you need to pass the same CA certificate via both --src-ca-file and dest-ca-file.

However, if you’re using self-signed certificates, this has a few (security) implications:

  • the certificates of both the source and destinations clusters (rapi.pem from the Ganeti configuration directory, usually /var/lib/ganeti/rapi.pem) must be available to the tool
  • by default, the certificates include the private key as well, so simply copying them to a third machine means that machine can now impersonate both the source and destination clusters RAPI endpoint

It is therefore recommended to copy only the certificate from the rapi.pem files, and pass these to --src-ca-file and --dest-ca-file appropriately.

Table Of Contents

Previous topic

/

Next topic

News

This Page

ganeti-2.9.3/doc/html/security.html0000644000000000000000000004341712271443673017262 0ustar00rootroot00000000000000 Security in Ganeti — Ganeti 2.9.3 documentation

Security in Ganeti¶

Documents Ganeti version 2.9

Ganeti was developed to run on internal, trusted systems. As such, the security model is all-or-nothing.

Up to version 2.3 all Ganeti code ran as root. Since version 2.4 it is possible to run all daemons except the node daemon and the monitoring daemon as non-root users by specifying user names and groups at build time. The node daemon continues to require root privileges to create logical volumes, DRBD devices, start instances, etc. Cluster commands can be run as root or by users in a group specified at build time. The monitoring daemon requires root privileges in order to be able to access and present information that are only avilable to root (such as the output of the xm command of Xen).

Host issues¶

For a host on which the Ganeti software has been installed, but not joined to a cluster, there are no changes to the system.

For a host that has been joined to the cluster, there are very important changes:

  • The host will have its SSH host key replaced with the one of the cluster (which is the one the initial node had at the cluster creation)
  • A new public key will be added to root’s authorized_keys file, granting root access to all nodes of the cluster. The private part of the key is also distributed to all nodes. Old files are renamed.
  • Communication between nodes is encrypted using SSL/TLS. A common key and certificate combo is shared between all nodes of the cluster. At this time, no CA is used.
  • The Ganeti node daemon will accept RPC requests from any host within the cluster with the correct certificate, and the operations it will do as a result of these requests are:
    • running commands under the /etc/ganeti/hooks directory
    • creating DRBD disks between it and the IP it has been told
    • overwrite a defined list of files on the host

As you can see, as soon as a node is joined, it becomes equal to all other nodes in the cluster, and the security of the cluster is determined by the weakest node.

Note that only the SSH key will allow other machines to run any command on this node; the RPC method will run only:

  • well defined commands to create, remove, activate logical volumes, drbd devices, start/stop instances, etc;
  • run well-defined SSH commands on other nodes in the cluster
  • scripts under the /etc/ganeti/hooks directory
  • scripts under the /etc/ganeti/restricted-commands directory, if this feature has been enabled at build time (see below)

It is therefore important to make sure that the contents of the /etc/ganeti/hooks and /etc/ganeti/restricted-commands directories are supervised and only trusted sources can populate them.

Restricted commands¶

The restricted commands feature is new in Ganeti 2.7. It enables the administrator to run any commands in the /etc/ganeti/restricted-commands directory, if the feature has been enabled at build time, subject to the following restrictions:

  • No parameters may be passed
  • No absolute or relative path may be passed, only a filename
  • The /etc/ganeti/restricted-commands directory must be owned by root:root and have mode 0755 or stricter
  • Executables must be regular files or symlinks, and must be executable by root:root

Note that it’s not possible to list the contents of the directory, and there is an intentional delay when trying to execute a non-existing command (to slow-down dictionary attacks).

Since for Ganeti itself this functionality is not needed, and is only provided as a way to help administrate or recover nodes, it is a local site decision whether to enable or not the restricted commands feature.

By default, this feature is disabled.

Cluster issues¶

As mentioned above, there are multiple ways of communication between cluster nodes:

  • SSH-based, for high-volume traffic like image dumps or for low-level command, e.g. restarting the Ganeti node daemon
  • RPC communication between master and nodes
  • DRBD real-time disk replication traffic

The SSH traffic is protected (after the initial login to a new node) by the cluster-wide shared SSH key.

RPC communication between the master and nodes is protected using SSL/TLS encryption. Both the client and the server must have the cluster-wide shared SSL/TLS certificate and verify it when establishing the connection by comparing fingerprints. We decided not to use a CA to simplify the key handling.

The DRBD traffic is not protected by encryption, as DRBD does not support this. It’s therefore recommended to implement host-level firewalling or to use a separate range of IP addresses for the DRBD traffic (this is supported in Ganeti through the use of a secondary interface) which is not routed outside the cluster. DRBD connections are protected from erroneous connections to other machines (as may happen due to software issues), and from accepting connections from other machines, by using a shared secret, exchanged via RPC requests from the master to the nodes when configuring the device.

Master daemon¶

The command-line tools to master daemon communication is done via a UNIX socket, whose permissions are reset to 0660 after listening but before serving requests. This permission-based protection is documented and works on Linux, but is not-portable; however, Ganeti doesn’t work on non-Linux system at the moment.

Luxi daemon¶

The luxid daemon (automatically enabled if confd is enabled at build time) serves local (UNIX socket) queries about the run-time configuration. Answering these means talking to other cluster nodes, exactly as masterd does. See the notes for masterd regarding permission-based protection.

Conf daemon¶

In Ganeti 2.8, the confd daemon (if enabled at build time), serves network-originated queries about parts of the static cluster configuration.

If Ganeti is not configured (at build time) to use separate users, confd has access to all Ganeti related files (including internal RPC SSL certificates). This makes it a bit more sensitive to bugs (a remote attacker could get direct access to the intra-cluster RPC), so to harden security it’s recommended to:

  • disable confd at build time if it (and luxid) is not needed in your setup.
  • configure Ganeti (at build time) to use separate users, so that the confd daemon doesn’t also have access to the server SSL/TLS certificates.
  • add firewall rules to protect the confd port or bind it to a trusted address. Make sure that all nodes can access the daemon, as the monitoring daemon requires it.

Monitoring daemon¶

The monitoring daemon provides information about the status and the performance of the cluster over HTTP. It is currently unencrypted and non-authenticated, therefore it is strongly advised to set proper firewalling rules to prevent unwanted access.

The monitoring daemon runs as root, because it needs to be able to access privileged information (such as the state of the instances as provided by the Xen hypervisor). Nevertheless, the security implications are mitigated by the fact that the agent only provides reporting functionalities, without the ability to actually modify the state of the cluster.

Remote API¶

Starting with Ganeti 2.0, Remote API traffic is encrypted using SSL/TLS by default. It supports Basic authentication as per RFC 2617. Users can be granted different capabilities. Details can be found in the RAPI documentation.

Paths for certificate, private key and CA files required for SSL/TLS will be set at source configure time. Symlinks or command line parameters may be used to use different files.

Inter-cluster instance moves¶

To move instances between clusters, different clusters must be able to communicate with each other over a secure channel. Up to and including Ganeti 2.1, clusters were self-contained entities and had no knowledge of other clusters. With Ganeti 2.2, clusters can exchange data if tokens (an encryption certificate) was exchanged by a trusted third party before.

KVM Security¶

When running KVM instances under Ganeti three security models ara available: “none”, “user” and “pool”.

Under security model “none” instances run by default as root. This means that, if an instance gets jail broken, it will be able to own the host node, and thus the ganeti cluster. This is the default model, and the only one available before Ganeti 2.1.2.

Under security model “user” an instance is run as the user specified by the hypervisor parameter “security_domain”. This makes it easy to run all instances as non privileged users, and allows one to manually allocate specific users to specific instances or sets of instances. If the specified user doesn’t have permissions a jail broken instance will need some local privilege escalation before being able to take over the node and the cluster. It’s possible though for a jail broken instance to affect other ones running under the same user.

Under security model “pool” a global cluster-level uid pool is used to start each instance on the same node under a different user. The uids in the cluster pool can be set with gnt-cluster init and gnt-cluster modify, and must correspond to existing users on all nodes. Ganeti will then allocate one to each instance, as needed. This way a jail broken instance won’t be able to affect any other. Since the users are handed out by ganeti in a per-node randomized way, in this mode there is no way to make sure a particular instance is always run as a certain user. Use mode “user” for that.

In addition to these precautions, if you want to avoid instances sending traffic on your node network, you can use an iptables rule such as:

iptables -A OUTPUT -m owner --uid-owner <uid>[-<uid>] -j LOG \
  --log-prefix "ganeti uid pool user network traffic"
iptables -A OUTPUT -m owner --uid-owner <uid>[-<uid>] -j DROP

This won’t affect regular instance traffic (that comes out of the tapX allocated to the instance, and can be filtered or subject to appropriate policy routes) but will stop any user generated traffic that might come from a jailbroken instance.

ganeti-2.9.3/doc/html/walkthrough.html0000644000000000000000000020166512271443673017753 0ustar00rootroot00000000000000 Ganeti walk-through — Ganeti 2.9.3 documentation

Ganeti walk-through¶

Documents Ganeti version 2.9

Introduction¶

This document serves as a more example-oriented guide to Ganeti; while the administration guide shows a conceptual approach, here you will find a step-by-step example to managing instances and the cluster.

Our simulated, example cluster will have three machines, named node1, node2, node3. Note that in real life machines will usually have FQDNs but here we use short names for brevity. We will use a secondary network for replication data, 192.0.2.0/24, with nodes having the last octet the same as their index. The cluster name will be example-cluster. All nodes have the same simulated hardware configuration, two disks of 750GB, 32GB of memory and 4 CPUs.

On this cluster, we will create up to seven instances, named instance1 to instance7.

Cluster creation¶

Follow the Ganeti installation tutorial document and prepare the nodes. Then it’s time to initialise the cluster:

$ gnt-cluster init -s 192.0.2.1 --enabled-hypervisors=xen-pvm example-cluster
$

The creation was fine. Let’s check that one node we have is functioning correctly:

$ gnt-node list
Node  DTotal DFree MTotal MNode MFree Pinst Sinst
node1   1.3T  1.3T  32.0G  1.0G 30.5G     0     0
$ gnt-cluster verify
Mon Oct 26 02:08:51 2009 * Verifying global settings
Mon Oct 26 02:08:51 2009 * Gathering data (1 nodes)
Mon Oct 26 02:08:52 2009 * Verifying node status
Mon Oct 26 02:08:52 2009 * Verifying instance status
Mon Oct 26 02:08:52 2009 * Verifying orphan volumes
Mon Oct 26 02:08:52 2009 * Verifying remaining instances
Mon Oct 26 02:08:52 2009 * Verifying N+1 Memory redundancy
Mon Oct 26 02:08:52 2009 * Other Notes
Mon Oct 26 02:08:52 2009 * Hooks Results
$

Since this proceeded correctly, let’s add the other two nodes:

$ gnt-node add -s 192.0.2.2 node2
-- WARNING --
Performing this operation is going to replace the ssh daemon keypair
on the target machine (node2) with the ones of the current one
and grant full intra-cluster ssh root access to/from it

Unable to verify hostkey of host xen-devi-5.fra.corp.google.com:
f7:…. Do you want to accept it?
y/[n]/?: y
Mon Oct 26 02:11:53 2009  Authentication to node2 via public key failed, trying password
root password:
Mon Oct 26 02:11:54 2009  - INFO: Node will be a master candidate
$ gnt-node add -s 192.0.2.3 node3
-- WARNING --
Performing this operation is going to replace the ssh daemon keypair
on the target machine (node3) with the ones of the current one
and grant full intra-cluster ssh root access to/from it

…
Mon Oct 26 02:12:43 2009  - INFO: Node will be a master candidate

Checking the cluster status again:

$ gnt-node list
Node  DTotal DFree MTotal MNode MFree Pinst Sinst
node1   1.3T  1.3T  32.0G  1.0G 30.5G     0     0
node2   1.3T  1.3T  32.0G  1.0G 30.5G     0     0
node3   1.3T  1.3T  32.0G  1.0G 30.5G     0     0
$ gnt-cluster verify
Mon Oct 26 02:15:14 2009 * Verifying global settings
Mon Oct 26 02:15:14 2009 * Gathering data (3 nodes)
Mon Oct 26 02:15:16 2009 * Verifying node status
Mon Oct 26 02:15:16 2009 * Verifying instance status
Mon Oct 26 02:15:16 2009 * Verifying orphan volumes
Mon Oct 26 02:15:16 2009 * Verifying remaining instances
Mon Oct 26 02:15:16 2009 * Verifying N+1 Memory redundancy
Mon Oct 26 02:15:16 2009 * Other Notes
Mon Oct 26 02:15:16 2009 * Hooks Results
$

And let’s check that we have a valid OS:

$ gnt-os list
Name
debootstrap
node1#

Running a burn-in¶

Now that the cluster is created, it is time to check that the hardware works correctly, that the hypervisor can actually create instances, etc. This is done via the debootstrap tool as described in the admin guide. Similar output lines are replaced with … in the below log:

$ /usr/lib/ganeti/tools/burnin -o debootstrap -p instance{1..5}
- Testing global parameters
- Creating instances
  * instance instance1
    on node1, node2
  * instance instance2
    on node2, node3
  …
  * instance instance5
    on node2, node3
  * Submitted job ID(s) 157, 158, 159, 160, 161
    waiting for job 157 for instance1
    …
    waiting for job 161 for instance5
- Replacing disks on the same nodes
  * instance instance1
    run replace_on_secondary
    run replace_on_primary
  …
  * instance instance5
    run replace_on_secondary
    run replace_on_primary
  * Submitted job ID(s) 162, 163, 164, 165, 166
    waiting for job 162 for instance1
    …
- Changing the secondary node
  * instance instance1
    run replace_new_secondary node3
  * instance instance2
    run replace_new_secondary node1
  …
  * instance instance5
    run replace_new_secondary node1
  * Submitted job ID(s) 167, 168, 169, 170, 171
    waiting for job 167 for instance1
    …
- Growing disks
  * instance instance1
    increase disk/0 by 128 MB
  …
  * instance instance5
    increase disk/0 by 128 MB
  * Submitted job ID(s) 173, 174, 175, 176, 177
    waiting for job 173 for instance1
    …
- Failing over instances
  * instance instance1
  …
  * instance instance5
  * Submitted job ID(s) 179, 180, 181, 182, 183
    waiting for job 179 for instance1
    …
- Migrating instances
  * instance instance1
    migration and migration cleanup
  …
  * instance instance5
    migration and migration cleanup
  * Submitted job ID(s) 184, 185, 186, 187, 188
    waiting for job 184 for instance1
    …
- Exporting and re-importing instances
  * instance instance1
    export to node node3
    remove instance
    import from node3 to node1, node2
    remove export
  …
  * instance instance5
    export to node node1
    remove instance
    import from node1 to node2, node3
    remove export
  * Submitted job ID(s) 196, 197, 198, 199, 200
    waiting for job 196 for instance1
    …
- Reinstalling instances
  * instance instance1
    reinstall without passing the OS
    reinstall specifying the OS
  …
  * instance instance5
    reinstall without passing the OS
    reinstall specifying the OS
  * Submitted job ID(s) 203, 204, 205, 206, 207
    waiting for job 203 for instance1
    …
- Rebooting instances
  * instance instance1
    reboot with type 'hard'
    reboot with type 'soft'
    reboot with type 'full'
  …
  * instance instance5
    reboot with type 'hard'
    reboot with type 'soft'
    reboot with type 'full'
  * Submitted job ID(s) 208, 209, 210, 211, 212
    waiting for job 208 for instance1
  …
- Adding and removing disks
  * instance instance1
    adding a disk
    removing last disk
  …
  * instance instance5
    adding a disk
    removing last disk
  * Submitted job ID(s) 213, 214, 215, 216, 217
    waiting for job 213 for instance1
    …
- Adding and removing NICs
  * instance instance1
    adding a NIC
    removing last NIC
  …
  * instance instance5
    adding a NIC
    removing last NIC
  * Submitted job ID(s) 218, 219, 220, 221, 222
    waiting for job 218 for instance1
    …
- Activating/deactivating disks
  * instance instance1
    activate disks when online
    activate disks when offline
    deactivate disks (when offline)
  …
  * instance instance5
    activate disks when online
    activate disks when offline
    deactivate disks (when offline)
  * Submitted job ID(s) 223, 224, 225, 226, 227
    waiting for job 223 for instance1
    …
- Stopping and starting instances
  * instance instance1
  …
  * instance instance5
  * Submitted job ID(s) 230, 231, 232, 233, 234
    waiting for job 230 for instance1
    …
- Removing instances
  * instance instance1
  …
  * instance instance5
  * Submitted job ID(s) 235, 236, 237, 238, 239
    waiting for job 235 for instance1
    …
$

You can see in the above what operations the burn-in does. Ideally, the burn-in log would proceed successfully through all the steps and end cleanly, without throwing errors.

Instance operations¶

Creation¶

At this point, Ganeti and the hardware seems to be functioning correctly, so we’ll follow up with creating the instances manually:

$ gnt-instance add -t drbd -o debootstrap -s 256m instance1
Mon Oct 26 04:06:52 2009  - INFO: Selected nodes for instance instance1 via iallocator hail: node2, node3
Mon Oct 26 04:06:53 2009 * creating instance disks...
Mon Oct 26 04:06:57 2009 adding instance instance1 to cluster config
Mon Oct 26 04:06:57 2009  - INFO: Waiting for instance instance1 to sync disks.
Mon Oct 26 04:06:57 2009  - INFO: - device disk/0: 20.00% done, 4 estimated seconds remaining
Mon Oct 26 04:07:01 2009  - INFO: Instance instance1's disks are in sync.
Mon Oct 26 04:07:01 2009 creating os for instance instance1 on node node2
Mon Oct 26 04:07:01 2009 * running the instance OS create scripts...
Mon Oct 26 04:07:14 2009 * starting instance...
$ gnt-instance add -t drbd -o debootstrap -s 256m -n node1:node2 instance2
Mon Oct 26 04:11:37 2009 * creating instance disks...
Mon Oct 26 04:11:40 2009 adding instance instance2 to cluster config
Mon Oct 26 04:11:41 2009  - INFO: Waiting for instance instance2 to sync disks.
Mon Oct 26 04:11:41 2009  - INFO: - device disk/0: 35.40% done, 1 estimated seconds remaining
Mon Oct 26 04:11:42 2009  - INFO: - device disk/0: 58.50% done, 1 estimated seconds remaining
Mon Oct 26 04:11:43 2009  - INFO: - device disk/0: 86.20% done, 0 estimated seconds remaining
Mon Oct 26 04:11:44 2009  - INFO: - device disk/0: 92.40% done, 0 estimated seconds remaining
Mon Oct 26 04:11:44 2009  - INFO: - device disk/0: 97.00% done, 0 estimated seconds remaining
Mon Oct 26 04:11:44 2009  - INFO: Instance instance2's disks are in sync.
Mon Oct 26 04:11:44 2009 creating os for instance instance2 on node node1
Mon Oct 26 04:11:44 2009 * running the instance OS create scripts...
Mon Oct 26 04:11:57 2009 * starting instance...
$

The above shows one instance created via an iallocator script, and one being created with manual node assignment. The other three instances were also created and now it’s time to check them:

$ gnt-instance list
Instance  Hypervisor OS          Primary_node Status  Memory
instance1 xen-pvm    debootstrap node2        running   128M
instance2 xen-pvm    debootstrap node1        running   128M
instance3 xen-pvm    debootstrap node1        running   128M
instance4 xen-pvm    debootstrap node3        running   128M
instance5 xen-pvm    debootstrap node2        running   128M

Accessing instances¶

Accessing an instance’s console is easy:

$ gnt-instance console instance2
[    0.000000] Bootdata ok (command line is root=/dev/sda1 ro)
[    0.000000] Linux version 2.6…
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  Xen: 0000000000000000 - 0000000008800000 (usable)
[13138176.018071] Built 1 zonelists.  Total pages: 34816
[13138176.018074] Kernel command line: root=/dev/sda1 ro
[13138176.018694] Initializing CPU#0
…
Checking file systems...fsck 1.41.3 (12-Oct-2008)
done.
Setting kernel variables (/etc/sysctl.conf)...done.
Mounting local filesystems...done.
Activating swapfile swap...done.
Setting up networking....
Configuring network interfaces...done.
Setting console screen modes and fonts.
INIT: Entering runlevel: 2
Starting enhanced syslogd: rsyslogd.
Starting periodic command scheduler: crond.

Debian GNU/Linux 5.0 instance2 tty1

instance2 login:

At this moment you can login to the instance and, after configuring the network (and doing this on all instances), we can check their connectivity:

$ fping instance{1..5}
instance1 is alive
instance2 is alive
instance3 is alive
instance4 is alive
instance5 is alive
$

Removal¶

Removing unwanted instances is also easy:

$ gnt-instance remove instance5
This will remove the volumes of the instance instance5 (including
mirrors), thus removing all the data of the instance. Continue?
y/[n]/?: y
$

Recovering from hardware failures¶

Recovering from node failure¶

We are now left with four instances. Assume that at this point, node3, which has one primary and one secondary instance, crashes:

$ gnt-node info node3
Node name: node3
  primary ip: 198.51.100.1
  secondary ip: 192.0.2.3
  master candidate: True
  drained: False
  offline: False
  primary for instances:
    - instance4
  secondary for instances:
    - instance1
$ fping node3
node3 is unreachable

At this point, the primary instance of that node (instance4) is down, but the secondary instance (instance1) is not affected except it has lost disk redundancy:

$ fping instance{1,4}
instance1 is alive
instance4 is unreachable
$

If we try to check the status of instance4 via the instance info command, it fails because it tries to contact node3 which is down:

$ gnt-instance info instance4
Failure: command execution error:
Error checking node node3: Connection failed (113: No route to host)
$

So we need to mark node3 as being offline, and thus Ganeti won’t talk to it anymore:

$ gnt-node modify -O yes -f node3
Mon Oct 26 04:34:12 2009  - WARNING: Not enough master candidates (desired 10, new value will be 2)
Mon Oct 26 04:34:15 2009  - WARNING: Communication failure to node node3: Connection failed (113: No route to host)
Modified node node3
 - offline -> True
 - master_candidate -> auto-demotion due to offline
$

And now we can failover the instance:

$ gnt-instance failover instance4
Failover will happen to image instance4. This requires a shutdown of
the instance. Continue?
y/[n]/?: y
Mon Oct 26 04:35:34 2009 * checking disk consistency between source and target
Failure: command execution error:
Disk disk/0 is degraded on target node, aborting failover.
$ gnt-instance failover --ignore-consistency instance4
Failover will happen to image instance4. This requires a shutdown of
the instance. Continue?
y/[n]/?: y
Mon Oct 26 04:35:47 2009 * checking disk consistency between source and target
Mon Oct 26 04:35:47 2009 * shutting down instance on source node
Mon Oct 26 04:35:47 2009  - WARNING: Could not shutdown instance instance4 on node node3. Proceeding anyway. Please make sure node node3 is down. Error details: Node is marked offline
Mon Oct 26 04:35:47 2009 * deactivating the instance's disks on source node
Mon Oct 26 04:35:47 2009  - WARNING: Could not shutdown block device disk/0 on node node3: Node is marked offline
Mon Oct 26 04:35:47 2009 * activating the instance's disks on target node
Mon Oct 26 04:35:47 2009  - WARNING: Could not prepare block device disk/0 on node node3 (is_primary=False, pass=1): Node is marked offline
Mon Oct 26 04:35:48 2009 * starting the instance on the target node
$

Note in our first attempt, Ganeti refused to do the failover since it wasn’t sure what is the status of the instance’s disks. We pass the --ignore-consistency flag and then we can failover:

$ gnt-instance list
Instance  Hypervisor OS          Primary_node Status  Memory
instance1 xen-pvm    debootstrap node2        running   128M
instance2 xen-pvm    debootstrap node1        running   128M
instance3 xen-pvm    debootstrap node1        running   128M
instance4 xen-pvm    debootstrap node1        running   128M
$

But at this point, both instance1 and instance4 are without disk redundancy:

$ gnt-instance info instance1
Instance name: instance1
UUID: 45173e82-d1fa-417c-8758-7d582ab7eef4
Serial number: 2
Creation time: 2009-10-26 04:06:57
Modification time: 2009-10-26 04:07:14
State: configured to be up, actual state is up
  Nodes:
    - primary: node2
    - secondaries: node3
  Operating system: debootstrap
  Allocated network port: None
  Hypervisor: xen-pvm
    - root_path: default (/dev/sda1)
    - kernel_args: default (ro)
    - use_bootloader: default (False)
    - bootloader_args: default ()
    - bootloader_path: default ()
    - kernel_path: default (/boot/vmlinuz-2.6-xenU)
    - initrd_path: default ()
  Hardware:
    - VCPUs: 1
    - maxmem: 256MiB
    - minmem: 512MiB
    - NICs:
      - nic/0: MAC: aa:00:00:78:da:63, IP: None, mode: bridged, link: xen-br0
  Disks:
    - disk/0: drbd8, size 256M
      access mode: rw
      nodeA:       node2, minor=0
      nodeB:       node3, minor=0
      port:        11035
      auth key:    8e950e3cec6854b0181fbc3a6058657701f2d458
      on primary:  /dev/drbd0 (147:0) in sync, status *DEGRADED*
      child devices:
        - child 0: lvm, size 256M
          logical_id: xenvg/22459cf8-117d-4bea-a1aa-791667d07800.disk0_data
          on primary: /dev/xenvg/22459cf8-117d-4bea-a1aa-791667d07800.disk0_data (254:0)
        - child 1: lvm, size 128M
          logical_id: xenvg/22459cf8-117d-4bea-a1aa-791667d07800.disk0_meta
          on primary: /dev/xenvg/22459cf8-117d-4bea-a1aa-791667d07800.disk0_meta (254:1)

The output is similar for instance4. In order to recover this, we need to run the node evacuate command which will change from the current secondary node to a new one (in this case, we only have two working nodes, so all instances will be end on nodes one and two):

$ gnt-node evacuate -I hail node3
Relocate instance(s) 'instance1','instance4' from node
 node3 using iallocator hail?
y/[n]/?: y
Mon Oct 26 05:05:39 2009  - INFO: Selected new secondary for instance 'instance1': node1
Mon Oct 26 05:05:40 2009  - INFO: Selected new secondary for instance 'instance4': node2
Mon Oct 26 05:05:40 2009 Replacing disk(s) 0 for instance1
Mon Oct 26 05:05:40 2009 STEP 1/6 Check device existence
Mon Oct 26 05:05:40 2009  - INFO: Checking disk/0 on node2
Mon Oct 26 05:05:40 2009  - INFO: Checking volume groups
Mon Oct 26 05:05:40 2009 STEP 2/6 Check peer consistency
Mon Oct 26 05:05:40 2009  - INFO: Checking disk/0 consistency on node node2
Mon Oct 26 05:05:40 2009 STEP 3/6 Allocate new storage
Mon Oct 26 05:05:40 2009  - INFO: Adding new local storage on node1 for disk/0
Mon Oct 26 05:05:41 2009 STEP 4/6 Changing drbd configuration
Mon Oct 26 05:05:41 2009  - INFO: activating a new drbd on node1 for disk/0
Mon Oct 26 05:05:42 2009  - INFO: Shutting down drbd for disk/0 on old node
Mon Oct 26 05:05:42 2009  - WARNING: Failed to shutdown drbd for disk/0 on oldnode: Node is marked offline
Mon Oct 26 05:05:42 2009       Hint: Please cleanup this device manually as soon as possible
Mon Oct 26 05:05:42 2009  - INFO: Detaching primary drbds from the network (=> standalone)
Mon Oct 26 05:05:42 2009  - INFO: Updating instance configuration
Mon Oct 26 05:05:45 2009  - INFO: Attaching primary drbds to new secondary (standalone => connected)
Mon Oct 26 05:05:46 2009 STEP 5/6 Sync devices
Mon Oct 26 05:05:46 2009  - INFO: Waiting for instance instance1 to sync disks.
Mon Oct 26 05:05:46 2009  - INFO: - device disk/0: 13.90% done, 7 estimated seconds remaining
Mon Oct 26 05:05:53 2009  - INFO: Instance instance1's disks are in sync.
Mon Oct 26 05:05:53 2009 STEP 6/6 Removing old storage
Mon Oct 26 05:05:53 2009  - INFO: Remove logical volumes for 0
Mon Oct 26 05:05:53 2009  - WARNING: Can't remove old LV: Node is marked offline
Mon Oct 26 05:05:53 2009       Hint: remove unused LVs manually
Mon Oct 26 05:05:53 2009  - WARNING: Can't remove old LV: Node is marked offline
Mon Oct 26 05:05:53 2009       Hint: remove unused LVs manually
Mon Oct 26 05:05:53 2009 Replacing disk(s) 0 for instance4
Mon Oct 26 05:05:53 2009 STEP 1/6 Check device existence
Mon Oct 26 05:05:53 2009  - INFO: Checking disk/0 on node1
Mon Oct 26 05:05:53 2009  - INFO: Checking volume groups
Mon Oct 26 05:05:53 2009 STEP 2/6 Check peer consistency
Mon Oct 26 05:05:53 2009  - INFO: Checking disk/0 consistency on node node1
Mon Oct 26 05:05:54 2009 STEP 3/6 Allocate new storage
Mon Oct 26 05:05:54 2009  - INFO: Adding new local storage on node2 for disk/0
Mon Oct 26 05:05:54 2009 STEP 4/6 Changing drbd configuration
Mon Oct 26 05:05:54 2009  - INFO: activating a new drbd on node2 for disk/0
Mon Oct 26 05:05:55 2009  - INFO: Shutting down drbd for disk/0 on old node
Mon Oct 26 05:05:55 2009  - WARNING: Failed to shutdown drbd for disk/0 on oldnode: Node is marked offline
Mon Oct 26 05:05:55 2009       Hint: Please cleanup this device manually as soon as possible
Mon Oct 26 05:05:55 2009  - INFO: Detaching primary drbds from the network (=> standalone)
Mon Oct 26 05:05:55 2009  - INFO: Updating instance configuration
Mon Oct 26 05:05:55 2009  - INFO: Attaching primary drbds to new secondary (standalone => connected)
Mon Oct 26 05:05:56 2009 STEP 5/6 Sync devices
Mon Oct 26 05:05:56 2009  - INFO: Waiting for instance instance4 to sync disks.
Mon Oct 26 05:05:56 2009  - INFO: - device disk/0: 12.40% done, 8 estimated seconds remaining
Mon Oct 26 05:06:04 2009  - INFO: Instance instance4's disks are in sync.
Mon Oct 26 05:06:04 2009 STEP 6/6 Removing old storage
Mon Oct 26 05:06:04 2009  - INFO: Remove logical volumes for 0
Mon Oct 26 05:06:04 2009  - WARNING: Can't remove old LV: Node is marked offline
Mon Oct 26 05:06:04 2009       Hint: remove unused LVs manually
Mon Oct 26 05:06:04 2009  - WARNING: Can't remove old LV: Node is marked offline
Mon Oct 26 05:06:04 2009       Hint: remove unused LVs manually
$

And now node3 is completely free of instances and can be repaired:

$ gnt-node list
Node  DTotal DFree MTotal MNode MFree Pinst Sinst
node1   1.3T  1.3T  32.0G  1.0G 30.2G     3     1
node2   1.3T  1.3T  32.0G  1.0G 30.4G     1     3
node3      ?     ?      ?     ?     ?     0     0

Re-adding a node to the cluster¶

Let’s say node3 has been repaired and is now ready to be reused. Re-adding it is simple:

$ gnt-node add --readd node3
The authenticity of host 'node3 (198.51.100.1)' can't be established.
RSA key fingerprint is 9f:2e:5a:2e:e0:bd:00:09:e4:5c:32:f2:27:57:7a:f4.
Are you sure you want to continue connecting (yes/no)? yes
Mon Oct 26 05:27:39 2009  - INFO: Readding a node, the offline/drained flags were reset
Mon Oct 26 05:27:39 2009  - INFO: Node will be a master candidate

And it is now working again:

$ gnt-node list
Node  DTotal DFree MTotal MNode MFree Pinst Sinst
node1   1.3T  1.3T  32.0G  1.0G 30.2G     3     1
node2   1.3T  1.3T  32.0G  1.0G 30.4G     1     3
node3   1.3T  1.3T  32.0G  1.0G 30.4G     0     0

Note

If Ganeti has been built with the htools component enabled, you can shuffle the instances around to have a better use of the nodes.

Disk failures¶

A disk failure is simpler than a full node failure. First, a single disk failure should not cause data-loss for any redundant instance; only the performance of some instances might be reduced due to more network traffic.

Let take the cluster status in the above listing, and check what volumes are in use:

$ gnt-node volumes -o phys,instance node2
PhysDev   Instance
/dev/sdb1 instance4
/dev/sdb1 instance4
/dev/sdb1 instance1
/dev/sdb1 instance1
/dev/sdb1 instance3
/dev/sdb1 instance3
/dev/sdb1 instance2
/dev/sdb1 instance2
$

You can see that all instances on node2 have logical volumes on /dev/sdb1. Let’s simulate a disk failure on that disk:

$ ssh node2
# on node2
$ echo offline > /sys/block/sdb/device/state
$ vgs
  /dev/sdb1: read failed after 0 of 4096 at 0: Input/output error
  /dev/sdb1: read failed after 0 of 4096 at 750153695232: Input/output error
  /dev/sdb1: read failed after 0 of 4096 at 0: Input/output error
  Couldn't find device with uuid '954bJA-mNL0-7ydj-sdpW-nc2C-ZrCi-zFp91c'.
  Couldn't find all physical volumes for volume group xenvg.
  /dev/sdb1: read failed after 0 of 4096 at 0: Input/output error
  /dev/sdb1: read failed after 0 of 4096 at 0: Input/output error
  Couldn't find device with uuid '954bJA-mNL0-7ydj-sdpW-nc2C-ZrCi-zFp91c'.
  Couldn't find all physical volumes for volume group xenvg.
  Volume group xenvg not found
$

At this point, the node is broken and if we are to examine instance2 we get (simplified output shown):

$ gnt-instance info instance2
Instance name: instance2
State: configured to be up, actual state is up
  Nodes:
    - primary: node1
    - secondaries: node2
  Disks:
    - disk/0: drbd8, size 256M
      on primary:   /dev/drbd0 (147:0) in sync, status ok
      on secondary: /dev/drbd1 (147:1) in sync, status *DEGRADED* *MISSING DISK*

This instance has a secondary only on node2. Let’s verify a primary instance of node2:

$ gnt-instance info instance1
Instance name: instance1
State: configured to be up, actual state is up
  Nodes:
    - primary: node2
    - secondaries: node1
  Disks:
    - disk/0: drbd8, size 256M
      on primary:   /dev/drbd0 (147:0) in sync, status *DEGRADED* *MISSING DISK*
      on secondary: /dev/drbd3 (147:3) in sync, status ok
$ gnt-instance console instance1

Debian GNU/Linux 5.0 instance1 tty1

instance1 login: root
Last login: Tue Oct 27 01:24:09 UTC 2009 on tty1
instance1:~# date > test
instance1:~# sync
instance1:~# cat test
Tue Oct 27 01:25:20 UTC 2009
instance1:~# dmesg|tail
[5439785.235448] NET: Registered protocol family 15
[5439785.235489] 802.1Q VLAN Support v1.8 Ben Greear <greearb@candelatech.com>
[5439785.235495] All bugs added by David S. Miller <davem@redhat.com>
[5439785.235517] XENBUS: Device with no driver: device/console/0
[5439785.236576] kjournald starting.  Commit interval 5 seconds
[5439785.236588] EXT3-fs: mounted filesystem with ordered data mode.
[5439785.236625] VFS: Mounted root (ext3 filesystem) readonly.
[5439785.236663] Freeing unused kernel memory: 172k freed
[5439787.533779] EXT3 FS on sda1, internal journal
[5440655.065431] eth0: no IPv6 routers present
instance1:~#

As you can see, the instance is running fine and doesn’t see any disk issues. It is now time to fix node2 and re-establish redundancy for the involved instances.

Note

For Ganeti 2.0 we need to fix manually the volume group on node2 by running vgreduce --removemissing xenvg

$ gnt-node repair-storage node2 lvm-vg xenvg
Mon Oct 26 18:14:03 2009 Repairing storage unit 'xenvg' on node2 ...
$ ssh node2 vgs
VG    #PV #LV #SN Attr   VSize   VFree
xenvg   1   8   0 wz--n- 673.84G 673.84G
$

This has removed the ‘bad’ disk from the volume group, which is now left with only one PV. We can now replace the disks for the involved instances:

$ for i in instance{1..4}; do gnt-instance replace-disks -a $i; done
Mon Oct 26 18:15:38 2009 Replacing disk(s) 0 for instance1
Mon Oct 26 18:15:38 2009 STEP 1/6 Check device existence
Mon Oct 26 18:15:38 2009  - INFO: Checking disk/0 on node1
Mon Oct 26 18:15:38 2009  - INFO: Checking disk/0 on node2
Mon Oct 26 18:15:38 2009  - INFO: Checking volume groups
Mon Oct 26 18:15:38 2009 STEP 2/6 Check peer consistency
Mon Oct 26 18:15:38 2009  - INFO: Checking disk/0 consistency on node node1
Mon Oct 26 18:15:39 2009 STEP 3/6 Allocate new storage
Mon Oct 26 18:15:39 2009  - INFO: Adding storage on node2 for disk/0
Mon Oct 26 18:15:39 2009 STEP 4/6 Changing drbd configuration
Mon Oct 26 18:15:39 2009  - INFO: Detaching disk/0 drbd from local storage
Mon Oct 26 18:15:40 2009  - INFO: Renaming the old LVs on the target node
Mon Oct 26 18:15:40 2009  - INFO: Renaming the new LVs on the target node
Mon Oct 26 18:15:40 2009  - INFO: Adding new mirror component on node2
Mon Oct 26 18:15:41 2009 STEP 5/6 Sync devices
Mon Oct 26 18:15:41 2009  - INFO: Waiting for instance instance1 to sync disks.
Mon Oct 26 18:15:41 2009  - INFO: - device disk/0: 12.40% done, 9 estimated seconds remaining
Mon Oct 26 18:15:50 2009  - INFO: Instance instance1's disks are in sync.
Mon Oct 26 18:15:50 2009 STEP 6/6 Removing old storage
Mon Oct 26 18:15:50 2009  - INFO: Remove logical volumes for disk/0
Mon Oct 26 18:15:52 2009 Replacing disk(s) 0 for instance2
Mon Oct 26 18:15:52 2009 STEP 1/6 Check device existence
…
Mon Oct 26 18:16:01 2009 STEP 6/6 Removing old storage
Mon Oct 26 18:16:01 2009  - INFO: Remove logical volumes for disk/0
Mon Oct 26 18:16:02 2009 Replacing disk(s) 0 for instance3
Mon Oct 26 18:16:02 2009 STEP 1/6 Check device existence
…
Mon Oct 26 18:16:09 2009 STEP 6/6 Removing old storage
Mon Oct 26 18:16:09 2009  - INFO: Remove logical volumes for disk/0
Mon Oct 26 18:16:10 2009 Replacing disk(s) 0 for instance4
Mon Oct 26 18:16:10 2009 STEP 1/6 Check device existence
…
Mon Oct 26 18:16:18 2009 STEP 6/6 Removing old storage
Mon Oct 26 18:16:18 2009  - INFO: Remove logical volumes for disk/0
$

As this point, all instances should be healthy again.

Note

Ganeti 2.0 doesn’t have the -a option to replace-disks, so for it you have to run the loop twice, once over primary instances with argument -p and once secondary instances with argument -s, but otherwise the operations are similar:

$ gnt-instance replace-disks -p instance1
…
$ for i in instance{2..4}; do gnt-instance replace-disks -s $i; done

Common cluster problems¶

There are a number of small issues that might appear on a cluster that can be solved easily as long as the issue is properly identified. For this exercise we will consider the case of node3, which was broken previously and re-added to the cluster without reinstallation. Running cluster verify on the cluster reports:

$ gnt-cluster verify
Mon Oct 26 18:30:08 2009 * Verifying global settings
Mon Oct 26 18:30:08 2009 * Gathering data (3 nodes)
Mon Oct 26 18:30:10 2009 * Verifying node status
Mon Oct 26 18:30:10 2009   - ERROR: node node3: unallocated drbd minor 0 is in use
Mon Oct 26 18:30:10 2009   - ERROR: node node3: unallocated drbd minor 1 is in use
Mon Oct 26 18:30:10 2009 * Verifying instance status
Mon Oct 26 18:30:10 2009   - ERROR: instance instance4: instance should not run on node node3
Mon Oct 26 18:30:10 2009 * Verifying orphan volumes
Mon Oct 26 18:30:10 2009   - ERROR: node node3: volume 22459cf8-117d-4bea-a1aa-791667d07800.disk0_data is unknown
Mon Oct 26 18:30:10 2009   - ERROR: node node3: volume 1aaf4716-e57f-4101-a8d6-03af5da9dc50.disk0_data is unknown
Mon Oct 26 18:30:10 2009   - ERROR: node node3: volume 1aaf4716-e57f-4101-a8d6-03af5da9dc50.disk0_meta is unknown
Mon Oct 26 18:30:10 2009   - ERROR: node node3: volume 22459cf8-117d-4bea-a1aa-791667d07800.disk0_meta is unknown
Mon Oct 26 18:30:10 2009 * Verifying remaining instances
Mon Oct 26 18:30:10 2009 * Verifying N+1 Memory redundancy
Mon Oct 26 18:30:10 2009 * Other Notes
Mon Oct 26 18:30:10 2009 * Hooks Results
$

Instance status¶

As you can see, instance4 has a copy running on node3, because we forced the failover when node3 failed. This case is dangerous as the instance will have the same IP and MAC address, wreaking havoc on the network environment and anyone who tries to use it.

Ganeti doesn’t directly handle this case. It is recommended to logon to node3 and run:

$ xm destroy instance4

Unallocated DRBD minors¶

There are still unallocated DRBD minors on node3. Again, these are not handled by Ganeti directly and need to be cleaned up via DRBD commands:

$ ssh node3
# on node 3
$ drbdsetup /dev/drbd0 down
$ drbdsetup /dev/drbd1 down
$

Orphan volumes¶

At this point, the only remaining problem should be the so-called orphan volumes. This can happen also in the case of an aborted disk-replace, or similar situation where Ganeti was not able to recover automatically. Here you need to remove them manually via LVM commands:

$ ssh node3
# on node3
$ lvremove xenvg
Do you really want to remove active logical volume "22459cf8-117d-4bea-a1aa-791667d07800.disk0_data"? [y/n]: y
  Logical volume "22459cf8-117d-4bea-a1aa-791667d07800.disk0_data" successfully removed
Do you really want to remove active logical volume "22459cf8-117d-4bea-a1aa-791667d07800.disk0_meta"? [y/n]: y
  Logical volume "22459cf8-117d-4bea-a1aa-791667d07800.disk0_meta" successfully removed
Do you really want to remove active logical volume "1aaf4716-e57f-4101-a8d6-03af5da9dc50.disk0_data"? [y/n]: y
  Logical volume "1aaf4716-e57f-4101-a8d6-03af5da9dc50.disk0_data" successfully removed
Do you really want to remove active logical volume "1aaf4716-e57f-4101-a8d6-03af5da9dc50.disk0_meta"? [y/n]: y
  Logical volume "1aaf4716-e57f-4101-a8d6-03af5da9dc50.disk0_meta" successfully removed
node3#

At this point cluster verify shouldn’t complain anymore:

$ gnt-cluster verify
Mon Oct 26 18:37:51 2009 * Verifying global settings
Mon Oct 26 18:37:51 2009 * Gathering data (3 nodes)
Mon Oct 26 18:37:53 2009 * Verifying node status
Mon Oct 26 18:37:53 2009 * Verifying instance status
Mon Oct 26 18:37:53 2009 * Verifying orphan volumes
Mon Oct 26 18:37:53 2009 * Verifying remaining instances
Mon Oct 26 18:37:53 2009 * Verifying N+1 Memory redundancy
Mon Oct 26 18:37:53 2009 * Other Notes
Mon Oct 26 18:37:53 2009 * Hooks Results
$

N+1 errors¶

Since redundant instances in Ganeti have a primary/secondary model, it is needed to leave aside on each node enough memory so that if one of its peer node fails, all the secondary instances that have that node as primary can be relocated. More specifically, if instance2 has node1 as primary and node2 as secondary (and node1 and node2 do not have any other instances in this layout), then it means that node2 must have enough free memory so that if node1 fails, we can failover instance2 without any other operations (for reducing the downtime window). Let’s increase the memory of the current instances to 4G, and add three new instances, two on node2:node3 with 8GB of RAM and one on node1:node2, with 12GB of RAM (numbers chosen so that we run out of memory):

$ gnt-instance modify -B memory=4G instance1
Modified instance instance1
 - be/maxmem -> 4096
 - be/minmem -> 4096
Please don't forget that these parameters take effect only at the next start of the instance.
$ gnt-instance modify …

$ gnt-instance add -t drbd -n node2:node3 -s 512m -B memory=8G -o debootstrap instance5
…
$ gnt-instance add -t drbd -n node2:node3 -s 512m -B memory=8G -o debootstrap instance6
…
$ gnt-instance add -t drbd -n node1:node2 -s 512m -B memory=8G -o debootstrap instance7
$ gnt-instance reboot --all
The reboot will operate on 7 instances.
Do you want to continue?
Affected instances:
  instance1
  instance2
  instance3
  instance4
  instance5
  instance6
  instance7
y/[n]/?: y
Submitted jobs 677, 678, 679, 680, 681, 682, 683
Waiting for job 677 for instance1...
Waiting for job 678 for instance2...
Waiting for job 679 for instance3...
Waiting for job 680 for instance4...
Waiting for job 681 for instance5...
Waiting for job 682 for instance6...
Waiting for job 683 for instance7...
$

We rebooted the instances for the memory changes to have effect. Now the cluster looks like:

$ gnt-node list
Node  DTotal DFree MTotal MNode MFree Pinst Sinst
node1   1.3T  1.3T  32.0G  1.0G  6.5G     4     1
node2   1.3T  1.3T  32.0G  1.0G 10.5G     3     4
node3   1.3T  1.3T  32.0G  1.0G 30.5G     0     2
$ gnt-cluster verify
Mon Oct 26 18:59:36 2009 * Verifying global settings
Mon Oct 26 18:59:36 2009 * Gathering data (3 nodes)
Mon Oct 26 18:59:37 2009 * Verifying node status
Mon Oct 26 18:59:37 2009 * Verifying instance status
Mon Oct 26 18:59:37 2009 * Verifying orphan volumes
Mon Oct 26 18:59:37 2009 * Verifying remaining instances
Mon Oct 26 18:59:37 2009 * Verifying N+1 Memory redundancy
Mon Oct 26 18:59:37 2009   - ERROR: node node2: not enough memory to accommodate instance failovers should node node1 fail
Mon Oct 26 18:59:37 2009 * Other Notes
Mon Oct 26 18:59:37 2009 * Hooks Results
$

The cluster verify error above shows that if node1 fails, node2 will not have enough memory to failover all primary instances on node1 to it. To solve this, you have a number of options:

  • try to manually move instances around (but this can become complicated for any non-trivial cluster)
  • try to reduce the minimum memory of some instances on the source node of the N+1 failure (in the example above node1): this will allow it to start and be failed over/migrated with less than its maximum memory
  • try to reduce the runtime/maximum memory of some instances on the destination node of the N+1 failure (in the example above node2) to create additional available node memory (check the Ganeti administrator’s guide guide for what Ganeti will and won’t automatically do in regards to instance runtime memory modification)
  • if Ganeti has been built with the htools package enabled, you can run the hbal tool which will try to compute an automated cluster solution that complies with the N+1 rule

Network issues¶

In case a node has problems with the network (usually the secondary network, as problems with the primary network will render the node unusable for ganeti commands), it will show up in cluster verify as:

$ gnt-cluster verify
Mon Oct 26 19:07:19 2009 * Verifying global settings
Mon Oct 26 19:07:19 2009 * Gathering data (3 nodes)
Mon Oct 26 19:07:23 2009 * Verifying node status
Mon Oct 26 19:07:23 2009   - ERROR: node node1: tcp communication with node 'node3': failure using the secondary interface(s)
Mon Oct 26 19:07:23 2009   - ERROR: node node2: tcp communication with node 'node3': failure using the secondary interface(s)
Mon Oct 26 19:07:23 2009   - ERROR: node node3: tcp communication with node 'node1': failure using the secondary interface(s)
Mon Oct 26 19:07:23 2009   - ERROR: node node3: tcp communication with node 'node2': failure using the secondary interface(s)
Mon Oct 26 19:07:23 2009   - ERROR: node node3: tcp communication with node 'node3': failure using the secondary interface(s)
Mon Oct 26 19:07:23 2009 * Verifying instance status
Mon Oct 26 19:07:23 2009 * Verifying orphan volumes
Mon Oct 26 19:07:23 2009 * Verifying remaining instances
Mon Oct 26 19:07:23 2009 * Verifying N+1 Memory redundancy
Mon Oct 26 19:07:23 2009 * Other Notes
Mon Oct 26 19:07:23 2009 * Hooks Results
$

This shows that both node1 and node2 have problems contacting node3 over the secondary network, and node3 has problems contacting them. From this output is can be deduced that since node1 and node2 can communicate between themselves, node3 is the one having problems, and you need to investigate its network settings/connection.

Migration problems¶

Since live migration can sometimes fail and leave the instance in an inconsistent state, Ganeti provides a --cleanup argument to the migrate command that does:

  • check on which node the instance is actually running (has the command failed before or after the actual migration?)
  • reconfigure the DRBD disks accordingly

It is always safe to run this command as long as the instance has good data on its primary node (i.e. not showing as degraded). If so, you can simply run:

$ gnt-instance migrate --cleanup instance1
Instance instance1 will be recovered from a failed migration. Note
that the migration procedure (including cleanup) is **experimental**
in this version. This might impact the instance if anything goes
wrong. Continue?
y/[n]/?: y
Mon Oct 26 19:13:49 2009 Migrating instance instance1
Mon Oct 26 19:13:49 2009 * checking where the instance actually runs (if this hangs, the hypervisor might be in a bad state)
Mon Oct 26 19:13:49 2009 * instance confirmed to be running on its primary node (node2)
Mon Oct 26 19:13:49 2009 * switching node node1 to secondary mode
Mon Oct 26 19:13:50 2009 * wait until resync is done
Mon Oct 26 19:13:50 2009 * changing into standalone mode
Mon Oct 26 19:13:50 2009 * changing disks into single-master mode
Mon Oct 26 19:13:50 2009 * wait until resync is done
Mon Oct 26 19:13:51 2009 * done
$

In use disks at instance shutdown¶

If you see something like the following when trying to shutdown or deactivate disks for an instance:

$ gnt-instance shutdown instance1
Mon Oct 26 19:16:23 2009  - WARNING: Could not shutdown block device disk/0 on node node2: drbd0: can't shutdown drbd device: /dev/drbd0: State change failed: (-12) Device is held open by someone\n

It most likely means something is holding open the underlying DRBD device. This can be bad if the instance is not running, as it might mean that there was concurrent access from both the node and the instance to the disks, but not always (e.g. you could only have had the partitions activated via kpartx).

To troubleshoot this issue you need to follow standard Linux practices, and pay attention to the hypervisor being used:

  • check if (in the above example) /dev/drbd0 on node2 is being mounted somewhere (cat /proc/mounts)
  • check if the device is not being used by device mapper itself: dmsetup ls and look for entries of the form drbd0pX, and if so remove them with either kpartx -d or dmsetup remove

For Xen, check if it’s not using the disks itself:

$ xenstore-ls /local/domain/0/backend/vbd|grep -e "domain =" -e physical-device
domain = "instance2"
physical-device = "93:0"
domain = "instance3"
physical-device = "93:1"
domain = "instance4"
physical-device = "93:2"
$

You can see in the above output that the node exports three disks, to three instances. The physical-device key is in major:minor format in hexadecimal, and 0x93 represents DRBD’s major number. Thus we can see from the above that instance2 has /dev/drbd0, instance3 /dev/drbd1, and instance4 /dev/drbd2.

LUXI version mismatch¶

LUXI is the protocol used for communication between clients and the master daemon. Starting in Ganeti 2.3, the peers exchange their version in each message. When they don’t match, an error is raised:

$ gnt-node modify -O yes node3
Unhandled Ganeti error: LUXI version mismatch, server 2020000, request 2030000

Usually this means that server and client are from different Ganeti versions or import their libraries from different, consistent paths (e.g. an older version installed in another place). You can print the import path for Ganeti’s modules using the following command (note that depending on your setup you might have to use an explicit version in the Python command, e.g. python2.6):

python -c 'import ganeti; print ganeti.__file__'
ganeti-2.9.3/doc/html/admin.html0000644000000000000000000042713012271443663016500 0ustar00rootroot00000000000000 Ganeti administrator’s guide — Ganeti 2.9.3 documentation

Ganeti administrator’s guide¶

Documents Ganeti version 2.9

Contents

Introduction¶

Ganeti is a virtualization cluster management software. You are expected to be a system administrator familiar with your Linux distribution and the Xen or KVM virtualization environments before using it.

The various components of Ganeti all have man pages and interactive help. This manual though will help you getting familiar with the system by explaining the most common operations, grouped by related use.

After a terminology glossary and a section on the prerequisites needed to use this manual, the rest of this document is divided in sections for the different targets that a command affects: instance, nodes, etc.

Ganeti terminology¶

This section provides a small introduction to Ganeti terminology, which might be useful when reading the rest of the document.

Cluster¶

A set of machines (nodes) that cooperate to offer a coherent, highly available virtualization service under a single administration domain.

Node¶

A physical machine which is member of a cluster. Nodes are the basic cluster infrastructure, and they don’t need to be fault tolerant in order to achieve high availability for instances.

Node can be added and removed (if they host no instances) at will from the cluster. In a HA cluster and only with HA instances, the loss of any single node will not cause disk data loss for any instance; of course, a node crash will cause the crash of its primary instances.

A node belonging to a cluster can be in one of the following roles at a given time:

  • master node, which is the node from which the cluster is controlled
  • master candidate node, only nodes in this role have the full cluster configuration and knowledge, and only master candidates can become the master node
  • regular node, which is the state in which most nodes will be on bigger clusters (>20 nodes)
  • drained node, nodes in this state are functioning normally but the cannot receive new instances; the intention is that nodes in this role have some issue and they are being evacuated for hardware repairs
  • offline node, in which there is a record in the cluster configuration about the node, but the daemons on the master node will not talk to this node; any instances declared as having an offline node as either primary or secondary will be flagged as an error in the cluster verify operation

Depending on the role, each node will run a set of daemons:

  • the ganeti-noded daemon, which controls the manipulation of this node’s hardware resources; it runs on all nodes which are in a cluster
  • the ganeti-confd daemon (Ganeti 2.1+) which runs on all nodes, but is only functional on master candidate nodes; this daemon can be disabled at configuration time if you don’t need its functionality
  • the ganeti-rapi daemon which runs on the master node and offers an HTTP-based API for the cluster
  • the ganeti-masterd daemon which runs on the master node and allows control of the cluster

Beside the node role, there are other node flags that influence its behaviour:

  • the master_capable flag denotes whether the node can ever become a master candidate; setting this to ‘no’ means that auto-promotion will never make this node a master candidate; this flag can be useful for a remote node that only runs local instances, and having it become a master is impractical due to networking or other constraints
  • the vm_capable flag denotes whether the node can host instances or not; for example, one might use a non-vm_capable node just as a master candidate, for configuration backups; setting this flag to no disallows placement of instances of this node, deactivates hypervisor and related checks on it (e.g. bridge checks, LVM check, etc.), and removes it from cluster capacity computations

Instance¶

A virtual machine which runs on a cluster. It can be a fault tolerant, highly available entity.

An instance has various parameters, which are classified in three categories: hypervisor related-parameters (called hvparams), general parameters (called beparams) and per network-card parameters (called nicparams). All these parameters can be modified either at instance level or via defaults at cluster level.

Disk template¶

The are multiple options for the storage provided to an instance; while the instance sees the same virtual drive in all cases, the node-level configuration varies between them.

There are five disk templates you can choose from:

diskless
The instance has no disks. Only used for special purpose operating systems or for testing.
file
The instance will use plain files as backend for its disks. No redundancy is provided, and this is somewhat more difficult to configure for high performance. Note that for security reasons the file storage directory must be listed under /etc/ganeti/file-storage-paths, and that file is not copied automatically to all nodes by Ganeti.
sharedfile
The instance will use plain files as backend, but Ganeti assumes that those files will be available and in sync automatically on all nodes. This allows live migration and failover of instances using this method. As for file the file storage directory must be listed under /etc/ganeti/file-storage-paths or ganeti will refuse to create instances under it.
plain
The instance will use LVM devices as backend for its disks. No redundancy is provided.
drbd

Note

This is only valid for multi-node clusters using DRBD 8.0+

A mirror is set between the local node and a remote one, which must be specified with the second value of the –node option. Use this option to obtain a highly available instance that can be failed over to a remote node should the primary one fail.

Note

Ganeti does not support DRBD stacked devices: DRBD stacked setup is not fully symmetric and as such it is not working with live migration.

rbd
The instance will use Volumes inside a RADOS cluster as backend for its disks. It will access them using the RADOS block device (RBD).
ext
The instance will use an external storage provider. See ganeti-extstorage-interface(7) for how to implement one.

IAllocator¶

A framework for using external (user-provided) scripts to compute the placement of instances on the cluster nodes. This eliminates the need to manually specify nodes in instance add, instance moves, node evacuate, etc.

In order for Ganeti to be able to use these scripts, they must be place in the iallocator directory (usually lib/ganeti/iallocators under the installation prefix, e.g. /usr/local).

“Primary†and “secondary†concepts¶

An instance has a primary and depending on the disk configuration, might also have a secondary node. The instance always runs on the primary node and only uses its secondary node for disk replication.

Similarly, the term of primary and secondary instances when talking about a node refers to the set of instances having the given node as primary, respectively secondary.

Tags¶

Tags are short strings that can be attached to either to cluster itself, or to nodes or instances. They are useful as a very simplistic information store for helping with cluster administration, for example by attaching owner information to each instance after it’s created:

$ gnt-instance add … instance1
$ gnt-instance add-tags instance1 owner:user2

And then by listing each instance and its tags, this information could be used for contacting the users of each instance.

Jobs and OpCodes¶

While not directly visible by an end-user, it’s useful to know that a basic cluster operation (e.g. starting an instance) is represented internally by Ganeti as an OpCode (abbreviation from operation code). These OpCodes are executed as part of a Job. The OpCodes in a single Job are processed serially by Ganeti, but different Jobs will be processed (depending on resource availability) in parallel. They will not be executed in the submission order, but depending on resource availability, locks and (starting with Ganeti 2.3) priority. An earlier job may have to wait for a lock while a newer job doesn’t need any locks and can be executed right away. Operations requiring a certain order need to be submitted as a single job, or the client must submit one job at a time and wait for it to finish before continuing.

For example, shutting down the entire cluster can be done by running the command gnt-instance shutdown --all, which will submit for each instance a separate job containing the “shutdown instance†OpCode.

Prerequisites¶

You need to have your Ganeti cluster installed and configured before you try any of the commands in this document. Please follow the Ganeti installation tutorial for instructions on how to do that.

Instance management¶

Adding an instance¶

The add operation might seem complex due to the many parameters it accepts, but once you have understood the (few) required parameters and the customisation capabilities you will see it is an easy operation.

The add operation requires at minimum five parameters:

  • the OS for the instance
  • the disk template
  • the disk count and size
  • the node specification or alternatively the iallocator to use
  • and finally the instance name

The OS for the instance must be visible in the output of the command gnt-os list and specifies which guest OS to install on the instance.

The disk template specifies what kind of storage to use as backend for the (virtual) disks presented to the instance; note that for instances with multiple virtual disks, they all must be of the same type.

The node(s) on which the instance will run can be given either manually, via the -n option, or computed automatically by Ganeti, if you have installed any iallocator script.

With the above parameters in mind, the command is:

$ gnt-instance add \
  -n TARGET_NODE:SECONDARY_NODE \
  -o OS_TYPE \
  -t DISK_TEMPLATE -s DISK_SIZE \
  INSTANCE_NAME

The instance name must be resolvable (e.g. exist in DNS) and usually points to an address in the same subnet as the cluster itself.

The above command has the minimum required options; other options you can give include, among others:

  • The maximum/minimum memory size (-B maxmem, -B minmem) (-B memory can be used to specify only one size)
  • The number of virtual CPUs (-B vcpus)
  • Arguments for the NICs of the instance; by default, a single-NIC instance is created. The IP and/or bridge of the NIC can be changed via --net 0:ip=IP,link=BRIDGE

See ganeti-instance(8) for the detailed option list.

For example if you want to create an highly available instance, with a single disk of 50GB and the default memory size, having primary node node1 and secondary node node3, use the following command:

$ gnt-instance add -n node1:node3 -o debootstrap -t drbd -s 50G \
  instance1

There is a also a command for batch instance creation from a specification file, see the batch-create operation in the gnt-instance manual page.

Regular instance operations¶

Removal¶

Removing an instance is even easier than creating one. This operation is irreversible and destroys all the contents of your instance. Use with care:

$ gnt-instance remove INSTANCE_NAME

Startup/shutdown¶

Instances are automatically started at instance creation time. To manually start one which is currently stopped you can run:

$ gnt-instance startup INSTANCE_NAME

Ganeti will start an instance with up to its maximum instance memory. If not enough memory is available Ganeti will use all the available memory down to the instance minimum memory. If not even that amount of memory is free Ganeti will refuse to start the instance.

Note, that this will not work when an instance is in a permanently stopped state offline. In this case, you will first have to put it back to online mode by running:

$ gnt-instance modify --online INSTANCE_NAME

The command to stop the running instance is:

$ gnt-instance shutdown INSTANCE_NAME

If you want to shut the instance down more permanently, so that it does not require dynamically allocated resources (memory and vcpus), after shutting down an instance, execute the following:

$ gnt-instance modify --offline INSTANCE_NAME

Warning

Do not use the Xen or KVM commands directly to stop instances. If you run for example xm shutdown or xm destroy on an instance Ganeti will automatically restart it (via the ganeti-watcher(8) command which is launched via cron).

Querying instances¶

There are two ways to get information about instances: listing instances, which does a tabular output containing a given set of fields about each instance, and querying detailed information about a set of instances.

The command to see all the instances configured and their status is:

$ gnt-instance list

The command can return a custom set of information when using the -o option (as always, check the manpage for a detailed specification). Each instance will be represented on a line, thus making it easy to parse this output via the usual shell utilities (grep, sed, etc.).

To get more detailed information about an instance, you can run:

$ gnt-instance info INSTANCE

which will give a multi-line block of information about the instance, it’s hardware resources (especially its disks and their redundancy status), etc. This is harder to parse and is more expensive than the list operation, but returns much more detailed information.

Changing an instance’s runtime memory¶

Ganeti will always make sure an instance has a value between its maximum and its minimum memory available as runtime memory. As of version 2.6 Ganeti will only choose a size different than the maximum size when starting up, failing over, or migrating an instance on a node with less than the maximum memory available. It won’t resize other instances in order to free up space for an instance.

If you find that you need more memory on a node any instance can be manually resized without downtime, with the command:

$ gnt-instance modify -m SIZE INSTANCE_NAME

The same command can also be used to increase the memory available on an instance, provided that enough free memory is available on its node, and the specified size is not larger than the maximum memory size the instance had when it was first booted (an instance will be unable to see new memory above the maximum that was specified to the hypervisor at its boot time, if it needs to grow further a reboot becomes necessary).

Export/Import¶

You can create a snapshot of an instance disk and its Ganeti configuration, which then you can backup, or import into another cluster. The way to export an instance is:

$ gnt-backup export -n TARGET_NODE INSTANCE_NAME

The target node can be any node in the cluster with enough space under /srv/ganeti to hold the instance image. Use the --noshutdown option to snapshot an instance without rebooting it. Note that Ganeti only keeps one snapshot for an instance - any previous snapshot of the same instance existing cluster-wide under /srv/ganeti will be removed by this operation: if you want to keep them, you need to move them out of the Ganeti exports directory.

Importing an instance is similar to creating a new one, but additionally one must specify the location of the snapshot. The command is:

$ gnt-backup import -n TARGET_NODE \
  --src-node=NODE --src-dir=DIR INSTANCE_NAME

By default, parameters will be read from the export information, but you can of course pass them in via the command line - most of the options available for the command gnt-instance add are supported here too.

Import of foreign instances¶

There is a possibility to import a foreign instance whose disk data is already stored as LVM volumes without going through copying it: the disk adoption mode.

For this, ensure that the original, non-managed instance is stopped, then create a Ganeti instance in the usual way, except that instead of passing the disk information you specify the current volumes:

$ gnt-instance add -t plain -n HOME_NODE ... \
  --disk 0:adopt=lv_name[,vg=vg_name] INSTANCE_NAME

This will take over the given logical volumes, rename them to the Ganeti standard (UUID-based), and without installing the OS on them start directly the instance. If you configure the hypervisor similar to the non-managed configuration that the instance had, the transition should be seamless for the instance. For more than one disk, just pass another disk parameter (e.g. --disk 1:adopt=...).

Instance kernel selection¶

The kernel that instances uses to bootup can come either from the node, or from instances themselves, depending on the setup.

Xen-PVM¶

With Xen PVM, there are three options.

First, you can use a kernel from the node, by setting the hypervisor parameters as such:

  • kernel_path to a valid file on the node (and appropriately initrd_path)
  • kernel_args optionally set to a valid Linux setting (e.g. ro)
  • root_path to a valid setting (e.g. /dev/xvda1)
  • bootloader_path and bootloader_args to empty

Alternatively, you can delegate the kernel management to instances, and use either pvgrub or the deprecated pygrub. For this, you must install the kernels and initrds in the instance and create a valid GRUB v1 configuration file.

For pvgrub (new in version 2.4.2), you need to set:

  • kernel_path to point to the pvgrub loader present on the node (e.g. /usr/lib/xen/boot/pv-grub-x86_32.gz)
  • kernel_args to the path to the GRUB config file, relative to the instance (e.g. (hd0,0)/grub/menu.lst)
  • root_path must be empty
  • bootloader_path and bootloader_args to empty

While pygrub is deprecated, here is how you can configure it:

  • bootloader_path to the pygrub binary (e.g. /usr/bin/pygrub)
  • the other settings are not important

More information can be found in the Xen wiki pages for pvgrub and pygrub.

KVM¶

For KVM also the kernel can be loaded either way.

For loading the kernels from the node, you need to set:

  • kernel_path to a valid value
  • initrd_path optionally set if you use an initrd
  • kernel_args optionally set to a valid value (e.g. ro)

If you want instead to have the instance boot from its disk (and execute its bootloader), simply set the kernel_path parameter to an empty string, and all the others will be ignored.

Instance HA features¶

Note

This section only applies to multi-node clusters

Changing the primary node¶

There are three ways to exchange an instance’s primary and secondary nodes; the right one to choose depends on how the instance has been created and the status of its current primary node. See Restoring redundancy for DRBD-based instances for information on changing the secondary node. Note that it’s only possible to change the primary node to the secondary and vice-versa; a direct change of the primary node with a third node, while keeping the current secondary is not possible in a single step, only via multiple operations as detailed in Instance relocation.

Failing over an instance¶

If an instance is built in highly available mode you can at any time fail it over to its secondary node, even if the primary has somehow failed and it’s not up anymore. Doing it is really easy, on the master node you can just run:

$ gnt-instance failover INSTANCE_NAME

That’s it. After the command completes the secondary node is now the primary, and vice-versa.

The instance will be started with an amount of memory between its maxmem and its minmem value, depending on the free memory on its target node, or the operation will fail if that’s not possible. See Startup/shutdown for details.

If the instance’s disk template is of type rbd, then you can specify the target node (which can be any node) explicitly, or specify an iallocator plugin. If you omit both, the default iallocator will be used to determine the target node:

$ gnt-instance failover -n TARGET_NODE INSTANCE_NAME

Live migrating an instance¶

If an instance is built in highly available mode, it currently runs and both its nodes are running fine, you can migrate it over to its secondary node, without downtime. On the master node you need to run:

$ gnt-instance migrate INSTANCE_NAME

The current load on the instance and its memory size will influence how long the migration will take. In any case, for both KVM and Xen hypervisors, the migration will be transparent to the instance.

If the destination node has less memory than the instance’s current runtime memory, but at least the instance’s minimum memory available Ganeti will automatically reduce the instance runtime memory before migrating it, unless the --no-runtime-changes option is passed, in which case the target node should have at least the instance’s current runtime memory free.

If the instance’s disk template is of type rbd, then you can specify the target node (which can be any node) explicitly, or specify an iallocator plugin. If you omit both, the default iallocator will be used to determine the target node:

$ gnt-instance migrate -n TARGET_NODE INSTANCE_NAME

Moving an instance (offline)¶

If an instance has not been create as mirrored, then the only way to change its primary node is to execute the move command:

$ gnt-instance move -n NEW_NODE INSTANCE

This has a few prerequisites:

  • the instance must be stopped
  • its current primary node must be on-line and healthy
  • the disks of the instance must not have any errors

Since this operation actually copies the data from the old node to the new node, expect it to take proportional to the size of the instance’s disks and the speed of both the nodes’ I/O system and their networking.

Disk operations¶

Disk failures are a common cause of errors in any server deployment. Ganeti offers protection from single-node failure if your instances were created in HA mode, and it also offers ways to restore redundancy after a failure.

Preparing for disk operations¶

It is important to note that for Ganeti to be able to do any disk operation, the Linux machines on top of which Ganeti runs must be consistent; for LVM, this means that the LVM commands must not return failures; it is common that after a complete disk failure, any LVM command aborts with an error similar to:

$ vgs
/dev/sdb1: read failed after 0 of 4096 at 0: Input/output error
/dev/sdb1: read failed after 0 of 4096 at 750153695232: Input/output error
/dev/sdb1: read failed after 0 of 4096 at 0: Input/output error
Couldn't find device with uuid 't30jmN-4Rcf-Fr5e-CURS-pawt-z0jU-m1TgeJ'.
Couldn't find all physical volumes for volume group xenvg.

Before restoring an instance’s disks to healthy status, it’s needed to fix the volume group used by Ganeti so that we can actually create and manage the logical volumes. This is usually done in a multi-step process:

  1. first, if the disk is completely gone and LVM commands exit with “Couldn’t find device with uuid…†then you need to run the command:

    $ vgreduce --removemissing VOLUME_GROUP
    
  2. after the above command, the LVM commands should be executing normally (warnings are normal, but the commands will not fail completely).

  3. if the failed disk is still visible in the output of the pvs command, you need to deactivate it from allocations by running:

    $ pvs -x n /dev/DISK
    

At this point, the volume group should be consistent and any bad physical volumes should not longer be available for allocation.

Note that since version 2.1 Ganeti provides some commands to automate these two operations, see Generalized storage handling.

Restoring redundancy for DRBD-based instances¶

A DRBD instance has two nodes, and the storage on one of them has failed. Depending on which node (primary or secondary) has failed, you have three options at hand:

  • if the storage on the primary node has failed, you need to re-create the disks on it
  • if the storage on the secondary node has failed, you can either re-create the disks on it or change the secondary and recreate redundancy on the new secondary node

Of course, at any point it’s possible to force re-creation of disks even though everything is already fine.

For all three cases, the replace-disks operation can be used:

# re-create disks on the primary node
$ gnt-instance replace-disks -p INSTANCE_NAME
# re-create disks on the current secondary
$ gnt-instance replace-disks -s INSTANCE_NAME
# change the secondary node, via manual specification
$ gnt-instance replace-disks -n NODE INSTANCE_NAME
# change the secondary node, via an iallocator script
$ gnt-instance replace-disks -I SCRIPT INSTANCE_NAME
# since Ganeti 2.1: automatically fix the primary or secondary node
$ gnt-instance replace-disks -a INSTANCE_NAME

Since the process involves copying all data from the working node to the target node, it will take a while, depending on the instance’s disk size, node I/O system and network speed. But it is (barring any network interruption) completely transparent for the instance.

Re-creating disks for non-redundant instances¶

New in version 2.1.

For non-redundant instances, there isn’t a copy (except backups) to re-create the disks. But it’s possible to at-least re-create empty disks, after which a reinstall can be run, via the recreate-disks command:

$ gnt-instance recreate-disks INSTANCE

Note that this will fail if the disks already exists. The instance can be assigned to new nodes automatically by specifying an iallocator through the --iallocator option.

Conversion of an instance’s disk type¶

It is possible to convert between a non-redundant instance of type plain (LVM storage) and redundant drbd via the gnt-instance modify command:

# start with a non-redundant instance
$ gnt-instance add -t plain ... INSTANCE

# later convert it to redundant
$ gnt-instance stop INSTANCE
$ gnt-instance modify -t drbd -n NEW_SECONDARY INSTANCE
$ gnt-instance start INSTANCE

# and convert it back
$ gnt-instance stop INSTANCE
$ gnt-instance modify -t plain INSTANCE
$ gnt-instance start INSTANCE

The conversion must be done while the instance is stopped, and converting from plain to drbd template presents a small risk, especially if the instance has multiple disks and/or if one node fails during the conversion procedure). As such, it’s recommended (as always) to make sure that downtime for manual recovery is acceptable and that the instance has up-to-date backups.

Debugging instances¶

Accessing an instance’s disks¶

From an instance’s primary node you can have access to its disks. Never ever mount the underlying logical volume manually on a fault tolerant instance, or will break replication and your data will be inconsistent. The correct way to access an instance’s disks is to run (on the master node, as usual) the command:

$ gnt-instance activate-disks INSTANCE

And then, on the primary node of the instance, access the device that gets created. For example, you could mount the given disks, then edit files on the filesystem, etc.

Note that with partitioned disks (as opposed to whole-disk filesystems), you will need to use a tool like kpartx(8):

# on node1
$ gnt-instance activate-disks instance1
node3:disk/0:…
$ ssh node3
# on node 3
$ kpartx -l /dev/…
$ kpartx -a /dev/…
$ mount /dev/mapper/… /mnt/
# edit files under mnt as desired
$ umount /mnt/
$ kpartx -d /dev/…
$ exit
# back to node 1

After you’ve finished you can deactivate them with the deactivate-disks command, which works in the same way:

$ gnt-instance deactivate-disks INSTANCE

Note that if any process started by you is still using the disks, the above command will error out, and you must cleanup and ensure that the above command runs successfully before you start the instance, otherwise the instance will suffer corruption.

Accessing an instance’s console¶

The command to access a running instance’s console is:

$ gnt-instance console INSTANCE_NAME

Use the console normally and then type ^] when done, to exit.

Other instance operations¶

Reboot¶

There is a wrapper command for rebooting instances:

$ gnt-instance reboot instance2

By default, this does the equivalent of shutting down and then starting the instance, but it accepts parameters to perform a soft-reboot (via the hypervisor), a hard reboot (hypervisor shutdown and then startup) or a full one (the default, which also de-configures and then configures again the disks of the instance).

Instance OS definitions debugging¶

Should you have any problems with instance operating systems the command to see a complete status for all your nodes is:

$ gnt-os diagnose

Instance relocation¶

While it is not possible to move an instance from nodes (A, B) to nodes (C, D) in a single move, it is possible to do so in a few steps:

# instance is located on A, B
$ gnt-instance replace-disks -n nodeC instance1
# instance has moved from (A, B) to (A, C)
# we now flip the primary/secondary nodes
$ gnt-instance migrate instance1
# instance lives on (C, A)
# we can then change A to D via:
$ gnt-instance replace-disks -n nodeD instance1

Which brings it into the final configuration of (C, D). Note that we needed to do two replace-disks operation (two copies of the instance disks), because we needed to get rid of both the original nodes (A and B).

Node operations¶

There are much fewer node operations available than for instances, but they are equivalently important for maintaining a healthy cluster.

Add/readd¶

It is at any time possible to extend the cluster with one more node, by using the node add operation:

$ gnt-node add NEW_NODE

If the cluster has a replication network defined, then you need to pass the -s REPLICATION_IP parameter to this option.

A variation of this command can be used to re-configure a node if its Ganeti configuration is broken, for example if it has been reinstalled by mistake:

$ gnt-node add --readd EXISTING_NODE

This will reinitialise the node as if it’s been newly added, but while keeping its existing configuration in the cluster (primary/secondary IP, etc.), in other words you won’t need to use -s here.

Changing the node role¶

A node can be in different roles, as explained in the Ganeti terminology section. Promoting a node to the master role is special, while the other roles are handled all via a single command.

Failing over the master node¶

If you want to promote a different node to the master role (for whatever reason), run on any other master-candidate node the command:

$ gnt-cluster master-failover

and the node you ran it on is now the new master. In case you try to run this on a non master-candidate node, you will get an error telling you which nodes are valid.

Changing between the other roles¶

The gnt-node modify command can be used to select a new role:

# change to master candidate
$ gnt-node modify -C yes NODE
# change to drained status
$ gnt-node modify -D yes NODE
# change to offline status
$ gnt-node modify -O yes NODE
# change to regular mode (reset all flags)
$ gnt-node modify -O no -D no -C no NODE

Note that the cluster requires that at any point in time, a certain number of nodes are master candidates, so changing from master candidate to other roles might fail. It is recommended to either force the operation (via the --force option) or first change the number of master candidates in the cluster - see Standard operations.

Evacuating nodes¶

There are two steps of moving instances off a node:

  • moving the primary instances (actually converting them into secondary instances)
  • moving the secondary instances (including any instances converted in the step above)

Primary instance conversion¶

For this step, you can use either individual instance move commands (as seen in Changing the primary node) or the bulk per-node versions; these are:

$ gnt-node migrate NODE
$ gnt-node evacuate -s NODE

Note that the instance “move†command doesn’t currently have a node equivalent.

Both these commands, or the equivalent per-instance command, will make this node the secondary node for the respective instances, whereas their current secondary node will become primary. Note that it is not possible to change in one step the primary node to another node as primary, while keeping the same secondary node.

Secondary instance evacuation¶

For the evacuation of secondary instances, a command called gnt-node evacuate is provided and its syntax is:

$ gnt-node evacuate -I IALLOCATOR_SCRIPT NODE
$ gnt-node evacuate -n DESTINATION_NODE NODE

The first version will compute the new secondary for each instance in turn using the given iallocator script, whereas the second one will simply move all instances to DESTINATION_NODE.

Removal¶

Once a node no longer has any instances (neither primary nor secondary), it’s easy to remove it from the cluster:

$ gnt-node remove NODE_NAME

This will deconfigure the node, stop the ganeti daemons on it and leave it hopefully like before it joined to the cluster.

Replication network changes¶

The gnt-node modify -s command can be used to change the secondary IP of a node. This operation can only be performed if:

  • No instance is active on the target node
  • The new target IP is reachable from the master’s secondary IP

Also this operation will not allow to change a node from single-homed (same primary and secondary ip) to multi-homed (separate replication network) or vice versa, unless:

  • The target node is the master node and –force is passed.
  • The target cluster is single-homed and the new primary ip is a change to single homed for a particular node.
  • The target cluster is multi-homed and the new primary ip is a change to multi homed for a particular node.

For example to do a single-homed to multi-homed conversion:

$ gnt-node modify --force -s SECONDARY_IP MASTER_NAME
$ gnt-node modify -s SECONDARY_IP NODE1_NAME
$ gnt-node modify -s SECONDARY_IP NODE2_NAME
$ gnt-node modify -s SECONDARY_IP NODE3_NAME
...

The same commands can be used for multi-homed to single-homed except the secondary IPs should be the same as the primaries for each node, for that case.

Storage handling¶

When using LVM (either standalone or with DRBD), it can become tedious to debug and fix it in case of errors. Furthermore, even file-based storage can become complicated to handle manually on many hosts. Ganeti provides a couple of commands to help with automation.

Logical volumes¶

This is a command specific to LVM handling. It allows listing the logical volumes on a given node or on all nodes and their association to instances via the volumes command:

$ gnt-node volumes
Node  PhysDev   VG    Name             Size Instance
node1 /dev/sdb1 xenvg e61fbc97-….disk0 512M instance17
node1 /dev/sdb1 xenvg ebd1a7d1-….disk0 512M instance19
node2 /dev/sdb1 xenvg 0af08a3d-….disk0 512M instance20
node2 /dev/sdb1 xenvg cc012285-….disk0 512M instance16
node2 /dev/sdb1 xenvg f0fac192-….disk0 512M instance18

The above command maps each logical volume to a volume group and underlying physical volume and (possibly) to an instance.

Generalized storage handling¶

New in version 2.1.

Starting with Ganeti 2.1, a new storage framework has been implemented that tries to abstract the handling of the storage type the cluster uses.

First is listing the backend storage and their space situation:

$ gnt-node list-storage
Node  Name        Size Used   Free
node1 /dev/sda7 673.8G   0M 673.8G
node1 /dev/sdb1 698.6G 1.5G 697.1G
node2 /dev/sda7 673.8G   0M 673.8G
node2 /dev/sdb1 698.6G 1.0G 697.6G

The default is to list LVM physical volumes. It’s also possible to list the LVM volume groups:

$ gnt-node list-storage -t lvm-vg
Node  Name  Size
node1 xenvg 1.3T
node2 xenvg 1.3T

Next is repairing storage units, which is currently only implemented for volume groups and does the equivalent of vgreduce --removemissing:

$ gnt-node repair-storage node2 lvm-vg xenvg
Sun Oct 25 22:21:45 2009 Repairing storage unit 'xenvg' on node2 ...

Last is the modification of volume properties, which is (again) only implemented for LVM physical volumes and allows toggling the allocatable value:

$ gnt-node modify-storage --allocatable=no node2 lvm-pv /dev/sdb1

Use of the storage commands¶

All these commands are needed when recovering a node from a disk failure:

  • first, we need to recover from complete LVM failure (due to missing disk), by running the repair-storage command
  • second, we need to change allocation on any partially-broken disk (i.e. LVM still sees it, but it has bad blocks) by running modify-storage
  • then we can evacuate the instances as needed

Cluster operations¶

Beside the cluster initialisation command (which is detailed in the Ganeti installation tutorial document) and the master failover command which is explained under node handling, there are a couple of other cluster operations available.

Standard operations¶

One of the few commands that can be run on any node (not only the master) is the getmaster command:

# on node2
$ gnt-cluster getmaster
node1.example.com

It is possible to query and change global cluster parameters via the info and modify commands:

$ gnt-cluster info
Cluster name: cluster.example.com
Cluster UUID: 07805e6f-f0af-4310-95f1-572862ee939c
Creation time: 2009-09-25 05:04:15
Modification time: 2009-10-18 22:11:47
Master node: node1.example.com
Architecture (this node): 64bit (x86_64)
…
Tags: foo
Default hypervisor: xen-pvm
Enabled hypervisors: xen-pvm
Hypervisor parameters:
  - xen-pvm:
      root_path: /dev/sda1
      …
Cluster parameters:
  - candidate pool size: 10
    …
Default instance parameters:
  - default:
      memory: 128
      …
Default nic parameters:
  - default:
      link: xen-br0
      …

There various parameters above can be changed via the modify commands as follows:

  • the hypervisor parameters can be changed via modify -H xen-pvm:root_path=…, and so on for other hypervisors/key/values
  • the “default instance parameters” are changeable via modify -B parameter=value… syntax
  • the cluster parameters are changeable via separate options to the modify command (e.g. --candidate-pool-size, etc.)

For detailed option list see the gnt-cluster(8) man page.

The cluster version can be obtained via the version command::
$ gnt-cluster version Software version: 2.1.0 Internode protocol: 20 Configuration format: 2010000 OS api version: 15 Export interface: 0

This is not very useful except when debugging Ganeti.

Global node commands¶

There are two commands provided for replicating files to all nodes of a cluster and for running commands on all the nodes:

$ gnt-cluster copyfile /path/to/file
$ gnt-cluster command ls -l /path/to/file

These are simple wrappers over scp/ssh and more advanced usage can be obtained using dsh(1) and similar commands. But they are useful to update an OS script from the master node, for example.

Cluster verification¶

There are three commands that relate to global cluster checks. The first one is verify which gives an overview on the cluster state, highlighting any issues. In normal operation, this command should return no ERROR messages:

$ gnt-cluster verify
Sun Oct 25 23:08:58 2009 * Verifying global settings
Sun Oct 25 23:08:58 2009 * Gathering data (2 nodes)
Sun Oct 25 23:09:00 2009 * Verifying node status
Sun Oct 25 23:09:00 2009 * Verifying instance status
Sun Oct 25 23:09:00 2009 * Verifying orphan volumes
Sun Oct 25 23:09:00 2009 * Verifying remaining instances
Sun Oct 25 23:09:00 2009 * Verifying N+1 Memory redundancy
Sun Oct 25 23:09:00 2009 * Other Notes
Sun Oct 25 23:09:00 2009   - NOTICE: 5 non-redundant instance(s) found.
Sun Oct 25 23:09:00 2009 * Hooks Results

The second command is verify-disks, which checks that the instance’s disks have the correct status based on the desired instance state (up/down):

$ gnt-cluster verify-disks

Note that this command will show no output when disks are healthy.

The last command is used to repair any discrepancies in Ganeti’s recorded disk size and the actual disk size (disk size information is needed for proper activation and growth of DRBD-based disks):

$ gnt-cluster repair-disk-sizes
Sun Oct 25 23:13:16 2009  - INFO: Disk 0 of instance instance1 has mismatched size, correcting: recorded 512, actual 2048
Sun Oct 25 23:13:17 2009  - WARNING: Invalid result from node node4, ignoring node results

The above shows one instance having wrong disk size, and a node which returned invalid data, and thus we ignored all primary instances of that node.

Configuration redistribution¶

If the verify command complains about file mismatches between the master and other nodes, due to some node problems or if you manually modified configuration files, you can force an push of the master configuration to all other nodes via the redist-conf command:

$ gnt-cluster redist-conf

This command will be silent unless there are problems sending updates to the other nodes.

Cluster renaming¶

It is possible to rename a cluster, or to change its IP address, via the rename command. If only the IP has changed, you need to pass the current name and Ganeti will realise its IP has changed:

$ gnt-cluster rename cluster.example.com
This will rename the cluster to 'cluster.example.com'. If
you are connected over the network to the cluster name, the operation
is very dangerous as the IP address will be removed from the node and
the change may not go through. Continue?
y/[n]/?: y
Failure: prerequisites not met for this operation:
Neither the name nor the IP address of the cluster has changed

In the above output, neither value has changed since the cluster initialisation so the operation is not completed.

Queue operations¶

The job queue execution in Ganeti 2.0 and higher can be inspected, suspended and resumed via the queue command:

$ gnt-cluster queue info
The drain flag is unset
$ gnt-cluster queue drain
$ gnt-instance stop instance1
Failed to submit job for instance1: Job queue is drained, refusing job
$ gnt-cluster queue info
The drain flag is set
$ gnt-cluster queue undrain

This is most useful if you have an active cluster and you need to upgrade the Ganeti software, or simply restart the software on any node:

  1. suspend the queue via queue drain
  2. wait until there are no more running jobs via gnt-job list
  3. restart the master or another node, or upgrade the software
  4. resume the queue via queue undrain

Note

this command only stores a local flag file, and if you failover the master, it will not have effect on the new master.

Watcher control¶

The ganeti-watcher(8) is a program, usually scheduled via cron, that takes care of cluster maintenance operations (restarting downed instances, activating down DRBD disks, etc.). However, during maintenance and troubleshooting, this can get in your way; disabling it via commenting out the cron job is not so good as this can be forgotten. Thus there are some commands for automated control of the watcher: pause, info and continue:

$ gnt-cluster watcher info
The watcher is not paused.
$ gnt-cluster watcher pause 1h
The watcher is paused until Mon Oct 26 00:30:37 2009.
$ gnt-cluster watcher info
The watcher is paused until Mon Oct 26 00:30:37 2009.
$ ganeti-watcher -d
2009-10-25 23:30:47,984:  pid=28867 ganeti-watcher:486 DEBUG Pause has been set, exiting
$ gnt-cluster watcher continue
The watcher is no longer paused.
$ ganeti-watcher -d
2009-10-25 23:31:04,789:  pid=28976 ganeti-watcher:345 DEBUG Archived 0 jobs, left 0
2009-10-25 23:31:05,884:  pid=28976 ganeti-watcher:280 DEBUG Got data from cluster, writing instance status file
2009-10-25 23:31:06,061:  pid=28976 ganeti-watcher:150 DEBUG Data didn't change, just touching status file
$ gnt-cluster watcher info
The watcher is not paused.

The exact details of the argument to the pause command are available in the manpage.

Note

this command only stores a local flag file, and if you failover the master, it will not have effect on the new master.

Node auto-maintenance¶

If the cluster parameter maintain_node_health is enabled (see the manpage for gnt-cluster, the init and modify subcommands), then the following will happen automatically:

  • the watcher will shutdown any instances running on offline nodes
  • the watcher will deactivate any DRBD devices on offline nodes

In the future, more actions are planned, so only enable this parameter if the nodes are completely dedicated to Ganeti; otherwise it might be possible to lose data due to auto-maintenance actions.

Removing a cluster entirely¶

The usual method to cleanup a cluster is to run gnt-cluster destroy however if the Ganeti installation is broken in any way then this will not run.

It is possible in such a case to cleanup manually most if not all traces of a cluster installation by following these steps on all of the nodes:

  1. Shutdown all instances. This depends on the virtualisation method used (Xen, KVM, etc.):
  • Xen: run xm list and xm destroy on all the non-Domain-0 instances
  • KVM: kill all the KVM processes
  • chroot: kill all processes under the chroot mountpoints
  1. If using DRBD, shutdown all DRBD minors (which should by at this time no-longer in use by instances); on each node, run drbdsetup /dev/drbdN down for each active DRBD minor.
  2. If using LVM, cleanup the Ganeti volume group; if only Ganeti created logical volumes (and you are not sharing the volume group with the OS, for example), then simply running lvremove -f xenvg (replace ‘xenvg’ with your volume group name) should do the required cleanup.
  3. If using file-based storage, remove recursively all files and directories under your file-storage directory: rm -rf /srv/ganeti/file-storage/* replacing the path with the correct path for your cluster.
  4. Stop the ganeti daemons (/etc/init.d/ganeti stop) and kill any that remain alive (pgrep ganeti and pkill ganeti).
  5. Remove the ganeti state directory (rm -rf /var/lib/ganeti/*), replacing the path with the correct path for your installation.
  6. If using RBD, run rbd unmap /dev/rbdN to unmap the RBD disks. Then remove the RBD disk images used by Ganeti, identified by their UUIDs (rbd rm uuid.rbd.diskN).

On the master node, remove the cluster from the master-netdev (usually xen-br0 for bridged mode, otherwise eth0 or similar), by running ip a del $clusterip/32 dev xen-br0 (use the correct cluster ip and network device name).

At this point, the machines are ready for a cluster creation; in case you want to remove Ganeti completely, you need to also undo some of the SSH changes and log directories:

  • rm -rf /var/log/ganeti /srv/ganeti (replace with the correct paths)
  • remove from /root/.ssh the keys that Ganeti added (check the authorized_keys and id_dsa files)
  • regenerate the host’s SSH keys (check the OpenSSH startup scripts)
  • uninstall Ganeti

Otherwise, if you plan to re-create the cluster, you can just go ahead and rerun gnt-cluster init.

Monitoring the cluster¶

Starting with Ganeti 2.8, a monitoring daemon is available, providing information about the status and the performance of the system.

The monitoring daemon runs on every node, listening on TCP port 1815. Each instance of the daemon provides information related to the node it is running on.

The queries to the monitoring agent will be HTTP GET requests on port 1815. The answer will be encoded in JSON format and will depend on the specific accessed resource.

If a request is sent to a non-existing resource, a 404 error will be returned by the HTTP server.

The following paragraphs will present the existing resources supported by the current protocol version, that is version 1.

/¶

The root resource. It will return the list of the supported protocol version numbers.

Currently, this will include only version 1.

/1¶

Not an actual resource per-se, it is the root of all the resources of protocol version 1.

If requested through GET, the null JSON value will be returned.

/1/list/collectors¶

Returns a list of tuples (kind, category, name) showing all the collectors available in the system.

/1/report/all¶

A list of the reports of all the data collectors, as a JSON list.

Status reporting collectors will provide their output in non-verbose format. The verbose format can be requested by adding the parameter verbose=1 to the request.

/1/report/[category]/[collector_name]¶

Returns the report of the collector [collector_name] that belongs to the specified [category].

The category has to be written in lowercase.

If a collector does not belong to any category, default will have to be used as the value for [category].

Status reporting collectors will provide their output in non-verbose format. The verbose format can be requested by adding the parameter verbose=1 to the request.

Tags handling¶

The tags handling (addition, removal, listing) is similar for all the objects that support it (instances, nodes, and the cluster).

Limitations¶

Note that the set of characters present in a tag and the maximum tag length are restricted. Currently the maximum length is 128 characters, there can be at most 4096 tags per object, and the set of characters is comprised by alphanumeric characters and additionally .+*/:@-.

Operations¶

Tags can be added via add-tags:

$ gnt-instance add-tags INSTANCE a b c
$ gnt-node add-tags INSTANCE a b c
$ gnt-cluster add-tags a b c

The above commands add three tags to an instance, to a node and to the cluster. Note that the cluster command only takes tags as arguments, whereas the node and instance commands first required the node and instance name.

Tags can also be added from a file, via the --from=FILENAME argument. The file is expected to contain one tag per line.

Tags can also be remove via a syntax very similar to the add one:

$ gnt-instance remove-tags INSTANCE a b c

And listed via:

$ gnt-instance list-tags
$ gnt-node list-tags
$ gnt-cluster list-tags

Autorepair¶

The tool harep can be used to automatically fix some problems that are present in the cluster.

It is mainly meant to be regularly and automatically executed as a cron job. This is quite evident by considering that, when executed, it does not immediately fix all the issues of the instances of the cluster, but it cycles the instances through a series of states, one at every harep execution. Every state performs a step towards the resolution of the problem. This process goes on until the instance is brought back to the healthy state, or the tool realizes that it is not able to fix the instance, and therefore marks it as in failure state.

Allowing harep to act on the cluster¶

By default, harep checks the status of the cluster but it is not allowed to perform any modification. Modification must be explicitly allowed by an appropriate use of tags. Tagging can be applied at various levels, and can enable different kinds of autorepair, as hereafter described.

All the tags that authorize harep to perform modifications follow this syntax:

ganeti:watcher:autorepair:<type>

where <type> indicates the kind of intervention that can be performed. Every possible value of <type> includes at least all the authorization of the previous one, plus its own. The possible values, in increasing order of severity, are:

  • fix-storage allows a disk replacement or another operation that fixes the instance backend storage without affecting the instance itself. This can for example recover from a broken drbd secondary, but risks data loss if something is wrong on the primary but the secondary was somehow recoverable.
  • migrate allows an instance migration. This can recover from a drained primary, but can cause an instance crash in some cases (bugs).
  • failover allows instance reboot on the secondary. This can recover from an offline primary, but the instance will lose its running state.
  • reinstall allows disks to be recreated and an instance to be reinstalled. This can recover from primary&secondary both being offline, or from an offline primary in the case of non-redundant instances. It causes data loss.

These autorepair tags can be applied to a cluster, a nodegroup or an instance, and will act where they are applied and to everything in the entities sub-tree (e.g. a tag applied to a nodegroup will apply to all the instances contained in that nodegroup, but not to the rest of the cluster).

If there are multiple ganeti:watcher:autorepair:<type> tags in an object (cluster, node group or instance), the least destructive tag takes precedence. When multiplicity happens across objects, the nearest tag wins. For example, if in a cluster with two instances, I1 and I2, I1 has failover, and the cluster itself has both fix-storage and reinstall, I1 will end up with failover and I2 with fix-storage.

Limiting harep¶

Sometimes it is useful to stop harep from performing its task temporarily, and it is useful to be able to do so without distrupting its configuration, that is, without removing the authorization tags. In order to do this, suspend tags are provided.

Suspend tags can be added to cluster, nodegroup or instances, and act on the entire entities sub-tree. No operation will be performed by harep on the instances protected by a suspend tag. Their syntax is as follows:

ganeti:watcher:autorepair:suspend[:<timestamp>]

If there are multiple suspend tags in an object, the form without timestamp takes precedence (permanent suspension); or, if all object tags have a timestamp, the one with the highest timestamp.

Tags with a timestamp will be automatically removed when the time indicated by the timestamp is passed. Indefinite suspension tags have to be removed manually.

Result reporting¶

Harep will report about the result of its actions both through its CLI, and by adding tags to the instances it operated on. Such tags will follow the syntax hereby described:

ganeti:watcher:autorepair:result:<type>:<id>:<timestamp>:<result>:<jobs>

If this tag is present a repair of type type has been performed on the instance and has been completed by timestamp. The result is either success, failure or enoperm, and jobs is a +-separated list of jobs that were executed for this repair.

An enoperm result is an error state due to permission problems. It is returned when the repair cannot proceed because it would require to perform an operation that is not allowed by the ganeti:watcher:autorepair:<type> tag that is defining the instance autorepair permissions.

NB: if an instance repair ends up in a failure state, it will not be touched again by harep until it has been manually fixed by the system administrator and the ganeti:watcher:autorepair:result:failure:* tag has been manually removed.

Job operations¶

The various jobs submitted by the instance/node/cluster commands can be examined, canceled and archived by various invocations of the gnt-job command.

First is the job list command:

$ gnt-job list
17771 success INSTANCE_QUERY_DATA
17773 success CLUSTER_VERIFY_DISKS
17775 success CLUSTER_REPAIR_DISK_SIZES
17776 error   CLUSTER_RENAME(cluster.example.com)
17780 success CLUSTER_REDIST_CONF
17792 success INSTANCE_REBOOT(instance1.example.com)

More detailed information about a job can be found via the info command:

$ gnt-job info 17776
Job ID: 17776
  Status: error
  Received:         2009-10-25 23:18:02.180569
  Processing start: 2009-10-25 23:18:02.200335 (delta 0.019766s)
  Processing end:   2009-10-25 23:18:02.279743 (delta 0.079408s)
  Total processing time: 0.099174 seconds
  Opcodes:
    OP_CLUSTER_RENAME
      Status: error
      Processing start: 2009-10-25 23:18:02.200335
      Processing end:   2009-10-25 23:18:02.252282
      Input fields:
        name: cluster.example.com
      Result:
        OpPrereqError
        [Neither the name nor the IP address of the cluster has changed]
      Execution log:

During the execution of a job, it’s possible to follow the output of a job, similar to the log that one get from the gnt- commands, via the watch command:

$ gnt-instance add --submit … instance1
JobID: 17818
$ gnt-job watch 17818
Output from job 17818 follows
-----------------------------
Mon Oct 26 00:22:48 2009  - INFO: Selected nodes for instance instance1 via iallocator dumb: node1, node2
Mon Oct 26 00:22:49 2009 * creating instance disks...
Mon Oct 26 00:22:52 2009 adding instance instance1 to cluster config
Mon Oct 26 00:22:52 2009  - INFO: Waiting for instance instance1 to sync disks.
…
Mon Oct 26 00:23:03 2009 creating os for instance instance1 on node node1
Mon Oct 26 00:23:03 2009 * running the instance OS create scripts...
Mon Oct 26 00:23:13 2009 * starting instance...
$

This is useful if you need to follow a job’s progress from multiple terminals.

A job that has not yet started to run can be canceled:

$ gnt-job cancel 17810

But not one that has already started execution:

$ gnt-job cancel 17805
Job 17805 is no longer waiting in the queue

There are two queues for jobs: the current and the archive queue. Jobs are initially submitted to the current queue, and they stay in that queue until they have finished execution (either successfully or not). At that point, they can be moved into the archive queue using e.g. gnt-job autoarchive all. The ganeti-watcher script will do this automatically 6 hours after a job is finished. The ganeti-cleaner script will then remove archived the jobs from the archive directory after three weeks.

Note that gnt-job list only shows jobs in the current queue. Archived jobs can be viewed using gnt-job info <id>.

Special Ganeti deployments¶

Since Ganeti 2.4, it is possible to extend the Ganeti deployment with two custom scenarios: Ganeti inside Ganeti and multi-site model.

Running Ganeti under Ganeti¶

It is sometimes useful to be able to use a Ganeti instance as a Ganeti node (part of another cluster, usually). One example scenario is two small clusters, where we want to have an additional master candidate that holds the cluster configuration and can be used for helping with the master voting process.

However, these Ganeti instance should not host instances themselves, and should not be considered in the normal capacity planning, evacuation strategies, etc. In order to accomplish this, mark these nodes as non-vm_capable:

$ gnt-node modify --vm-capable=no node3

The vm_capable status can be listed as usual via gnt-node list:

$ gnt-node list -oname,vm_capable
Node  VMCapable
node1 Y
node2 Y
node3 N

When this flag is set, the cluster will not do any operations that relate to instances on such nodes, e.g. hypervisor operations, disk-related operations, etc. Basically they will just keep the ssconf files, and if master candidates the full configuration.

Multi-site model¶

If Ganeti is deployed in multi-site model, with each site being a node group (so that instances are not relocated across the WAN by mistake), it is conceivable that either the WAN latency is high or that some sites have a lower reliability than others. In this case, it doesn’t make sense to replicate the job information across all sites (or even outside of a “central†node group), so it should be possible to restrict which nodes can become master candidates via the auto-promotion algorithm.

Ganeti 2.4 introduces for this purpose a new master_capable flag, which (when unset) prevents nodes from being marked as master candidates, either manually or automatically.

As usual, the node modify operation can change this flag:

$ gnt-node modify --auto-promote --master-capable=no node3
Fri Jan  7 06:23:07 2011  - INFO: Demoting from master candidate
Fri Jan  7 06:23:08 2011  - INFO: Promoted nodes to master candidate role: node4
Modified node node3
 - master_capable -> False
 - master_candidate -> False

And the node list operation will list this flag:

$ gnt-node list -oname,master_capable node1 node2 node3
Node  MasterCapable
node1 Y
node2 Y
node3 N

Note that marking a node both not vm_capable and not master_capable makes the node practically unusable from Ganeti’s point of view. Hence these two flags should be used probably in contrast: some nodes will be only master candidates (master_capable but not vm_capable), and other nodes will only hold instances (vm_capable but not master_capable).

Ganeti tools¶

Beside the usual gnt- and ganeti- commands which are provided and installed in $prefix/sbin at install time, there are a couple of other tools installed which are used seldom but can be helpful in some cases.

lvmstrap¶

The lvmstrap tool, introduced in Configuring LVM section, has two modes of operation:

  • diskinfo shows the discovered disks on the system and their status
  • create takes all not-in-use disks and creates a volume group out of them

Warning

The create argument to this command causes data-loss!

cfgupgrade¶

The cfgupgrade tools is used to upgrade between major (and minor) Ganeti versions, and to roll back. Point-releases are usually transparent for the admin.

More information about the upgrade procedure is listed on the wiki at http://code.google.com/p/ganeti/wiki/UpgradeNotes.

There is also a script designed to upgrade from Ganeti 1.2 to 2.0, called cfgupgrade12.

cfgshell¶

Note

This command is not actively maintained; make sure you backup your configuration before using it

This can be used as an alternative to direct editing of the main configuration file if Ganeti has a bug and prevents you, for example, from removing an instance or a node from the configuration file.

burnin¶

Warning

This command will erase existing instances if given as arguments!

This tool is used to exercise either the hardware of machines or alternatively the Ganeti software. It is safe to run on an existing cluster as long as you don’t pass it existing instance names.

The command will, by default, execute a comprehensive set of operations against a list of instances, these being:

  • creation
  • disk replacement (for redundant instances)
  • failover and migration (for redundant instances)
  • move (for non-redundant instances)
  • disk growth
  • add disks, remove disk
  • add NICs, remove NICs
  • export and then import
  • rename
  • reboot
  • shutdown/startup
  • and finally removal of the test instances

Executing all these operations will test that the hardware performs well: the creation, disk replace, disk add and disk growth will exercise the storage and network; the migrate command will test the memory of the systems. Depending on the passed options, it can also test that the instance OS definitions are executing properly the rename, import and export operations.

sanitize-config¶

This tool takes the Ganeti configuration and outputs a “sanitized” version, by randomizing or clearing:

  • DRBD secrets and cluster public key (always)
  • host names (optional)
  • IPs (optional)
  • OS names (optional)
  • LV names (optional, only useful for very old clusters which still have instances whose LVs are based on the instance name)

By default, all optional items are activated except the LV name randomization. When passing --no-randomization, which disables the optional items (i.e. just the DRBD secrets and cluster public keys are randomized), the resulting file can be used as a safety copy of the cluster config - while not trivial, the layout of the cluster can be recreated from it and if the instance disks have not been lost it permits recovery from the loss of all master candidates.

users-setup¶

Ganeti can either be run entirely as root, or with every daemon running as its own specific user (if the parameters --with-user-prefix and/or --with-group-prefix have been specified at ./configure-time).

In case split users are activated, they are required to exist on the system, and they need to belong to the proper groups in order for the access permissions to files and programs to be correct.

The users-setup tool, when run, takes care of setting up the proper users and groups.

When invoked without parameters, the tool runs in interactive mode, showing the list of actions it will perform and asking for confirmation before proceeding.

Providing the --yes-do-it parameter to the tool prevents the confirmation from being asked, and the users and groups will be created immediately.

Other Ganeti projects¶

Below is a list (which might not be up-to-date) of additional projects that can be useful in a Ganeti deployment. They can be downloaded from the project site (http://code.google.com/p/ganeti/) and the repositories are also on the project git site (http://git.ganeti.org).

NBMA tools¶

The ganeti-nbma software is designed to allow instances to live on a separate, virtual network from the nodes, and in an environment where nodes are not guaranteed to be able to reach each other via multicasting or broadcasting. For more information see the README in the source archive.

ganeti-htools¶

Before Ganeti version 2.5, this was a standalone project; since that version it is integrated into the Ganeti codebase (see Ganeti quick installation guide for instructions on how to enable it). If you run an older Ganeti version, you will have to download and build it separately.

For more information and installation instructions, see the README file in the source archive.

Table Of Contents

Previous topic

Ganeti daemons refactoring

Next topic

Merging clusters

This Page

ganeti-2.9.3/doc/html/design-x509-ca.html0000644000000000000000000006562512271443670017752 0ustar00rootroot00000000000000 Design for a X509 Certificate Authority — Ganeti 2.9.3 documentation

Design for a X509 Certificate Authority¶

Current state and shortcomings¶

Import/export in Ganeti have a need for many unique X509 certificates. So far these were all self-signed, but with the new design for import/export they need to be signed by a Certificate Authority (CA).

Proposed changes¶

The plan is to implement a simple CA in Ganeti.

Interacting with an external CA is too difficult or impossible for automated processes like exporting instances, so each Ganeti cluster will have its own CA. The public key will be stored in …/lib/ganeti/ca/cert.pem, the private key (only readable by the master daemon) in …/lib/ganeti/ca/key.pem.

Similar to the RAPI certificate, a new CA certificate can be installed using the gnt-cluster renew-crypto command. Such a CA could be an intermediate of a third-party CA. By default a self-signed CA is generated and used.

Each certificate signed by the CA is required to have a unique serial number. The serial number is stored in the file …/lib/ganeti/ca/serial, replicated to all master candidates and never reset, even when a new CA is installed.

The threat model is expected to be the same as with self-signed certificates. To reinforce this, all certificates signed by the CA must be valid for less than one week (168 hours).

Implementing support for Certificate Revocation Lists (CRL) using OpenSSL is non-trivial. Lighttpd doesn’t support them at all and apparently never will in version 1.4.x. Some CRL-related parts have only been added in the most recent version of pyOpenSSL (0.11). Instead of a CRL, Ganeti will gain a new cluster configuration property defining the minimum accepted serial number. In case of a lost or compromised private key this property can be set to the most recently generated serial number.

While possible to implement in the future, other X509 certificates used by the cluster (e.g. RAPI or inter-node communication) will not be automatically signed by the per-cluster CA.

The commonName attribute of signed certificates must be set to the the cluster name or the name of a node in the cluster.

Software requirements¶

  • pyOpenSSL 0.10 or above (lower versions can’t set the X509v3 extension subjectKeyIdentifier recommended for certificate authority certificates by RFC 3280, section 4.2.1.2)

Code samples¶

Generating X509 CA using pyOpenSSL¶

The following code sample shows how to generate a CA certificate using pyOpenSSL:

key = OpenSSL.crypto.PKey()
key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)

ca = OpenSSL.crypto.X509()
ca.set_version(3)
ca.set_serial_number(1)
ca.get_subject().CN = "ca.example.com"
ca.gmtime_adj_notBefore(0)
ca.gmtime_adj_notAfter(24 * 60 * 60)
ca.set_issuer(ca.get_subject())
ca.set_pubkey(key)
ca.add_extensions([
  OpenSSL.crypto.X509Extension("basicConstraints", True,
                               "CA:TRUE, pathlen:0"),
  OpenSSL.crypto.X509Extension("keyUsage", True,
                               "keyCertSign, cRLSign"),
  OpenSSL.crypto.X509Extension("subjectKeyIdentifier", False, "hash",
                               subject=ca),
  ])
ca.sign(key, "sha1")

Signing X509 certificate using CA¶

The following code sample shows how to sign an X509 certificate using a CA:

ca_cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
                                          "ca.pem")
ca_key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM,
                                        "ca.pem")

key = OpenSSL.crypto.PKey()
key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)

cert = OpenSSL.crypto.X509()
cert.get_subject().CN = "node1.example.com"
cert.set_serial_number(1)
cert.gmtime_adj_notBefore(0)
cert.gmtime_adj_notAfter(24 * 60 * 60)
cert.set_issuer(ca_cert.get_subject())
cert.set_pubkey(key)
cert.sign(ca_key, "sha1")

How to generate Certificate Signing Request¶

The following code sample shows how to generate an X509 Certificate Request (CSR):

key = OpenSSL.crypto.PKey()
key.generate_key(OpenSSL.crypto.TYPE_RSA, 2048)

req = OpenSSL.crypto.X509Req()
req.get_subject().CN = "node1.example.com"
req.set_pubkey(key)
req.sign(key, "sha1")

# Write private key
print OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, key)

# Write request
print OpenSSL.crypto.dump_certificate_request(OpenSSL.crypto.FILETYPE_PEM, req)

X509 certificate from Certificate Signing Request¶

The following code sample shows how to create an X509 certificate from a Certificate Signing Request and sign it with a CA:

ca_cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
                                          "ca.pem")
ca_key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM,
                                        "ca.pem")
req = OpenSSL.crypto.load_certificate_request(OpenSSL.crypto.FILETYPE_PEM,
                                              open("req.csr").read())

cert = OpenSSL.crypto.X509()
cert.set_subject(req.get_subject())
cert.set_serial_number(1)
cert.gmtime_adj_notBefore(0)
cert.gmtime_adj_notAfter(24 * 60 * 60)
cert.set_issuer(ca_cert.get_subject())
cert.set_pubkey(req.get_pubkey())
cert.sign(ca_key, "sha1")

print OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, cert)

Verify whether X509 certificate matches private key¶

The code sample below shows how to check whether a certificate matches with a certain private key. OpenSSL has a function for this, X509_check_private_key, but pyOpenSSL provides no access to it.

ctx = OpenSSL.SSL.Context(OpenSSL.SSL.TLSv1_METHOD)
ctx.use_privatekey(key)
ctx.use_certificate(cert)
try:
  ctx.check_privatekey()
except OpenSSL.SSL.Error:
  print "Incorrect key"
else:
  print "Key matches certificate"
ganeti-2.9.3/doc/html/design-2.7.html0000644000000000000000000001252312271443665017163 0ustar00rootroot00000000000000 Ganeti 2.7 design — Ganeti 2.9.3 documentation

Ganeti 2.7 design¶

The following design documents have been implemented in Ganeti 2.7:

The following designs have been partially implemented in Ganeti 2.7:

Previous topic

Ganeti 2.6 design

Next topic

Ganeti 2.8 design

This Page

ganeti-2.9.3/doc/html/design-2.8.html0000644000000000000000000001202312271443665017157 0ustar00rootroot00000000000000 Ganeti 2.8 design — Ganeti 2.9.3 documentation

Ganeti 2.8 design¶

The following design documents have been implemented in Ganeti 2.8:

The following designs have been partially implemented in Ganeti 2.8:

Previous topic

Ganeti 2.7 design

Next topic

Ganeti 2.9 design

This Page

ganeti-2.9.3/doc/html/design-monitoring-agent.html0000644000000000000000000016767612271443666022163 0ustar00rootroot00000000000000 Ganeti monitoring agent — Ganeti 2.9.3 documentation

Ganeti monitoring agent¶

This is a design document detailing the implementation of a Ganeti monitoring agent report system, that can be queried by a monitoring system to calculate health information for a Ganeti cluster.

Current state and shortcomings¶

There is currently no monitoring support in Ganeti. While we don’t want to build something like Nagios or Pacemaker as part of Ganeti, it would be useful if such tools could easily extract information from a Ganeti machine in order to take actions (example actions include logging an outage for future reporting or alerting a person or system about it).

Proposed changes¶

Each Ganeti node should export a status page that can be queried by a monitoring system. Such status page will be exported on a network port and will be encoded in JSON (simple text) over HTTP.

The choice of JSON is obvious as we already depend on it in Ganeti and thus we don’t need to add extra libraries to use it, as opposed to what would happen for XML or some other markup format.

Location of agent report¶

The report will be available from all nodes, and be concerned for all node-local resources. This allows more real-time information to be available, at the cost of querying all nodes.

Information reported¶

The monitoring agent system will report on the following basic information:

  • Instance status
  • Instance disk status
  • Status of storage for instances
  • Ganeti daemons status, CPU usage, memory footprint
  • Hypervisor resources report (memory, CPU, network interfaces)
  • Node OS resources report (memory, CPU, network interfaces)
  • Node OS CPU load average report
  • Information from a plugin system

Format of the report¶

The report of the will be in JSON format, and it will present an array of report objects. Each report object will be produced by a specific data collector. Each report object includes some mandatory fields, to be provided by all the data collectors:

name
The name of the data collector that produced this part of the report. It is supposed to be unique inside a report.
version
The version of the data collector that produces this part of the report. Built-in data collectors (as opposed to those implemented as plugins) should have “B” as the version number.
format_version
The format of what is represented in the “data” field for each data collector might change over time. Every time this happens, the format_version should be changed, so that who reads the report knows what format to expect, and how to correctly interpret it.
timestamp
The time when the reported data were gathered. It has to be expressed in nanoseconds since the unix epoch (0:00:00 January 01, 1970). If not enough precision is available (or needed) it can be padded with zeroes. If a report object needs multiple timestamps, it can add more and/or override this one inside its own “data” section.
category
A collector can belong to a given category of collectors (e.g.: storage collectors, daemon collector). This means that it will have to provide a minumum set of prescribed fields, as documented for each category. This field will contain the name of the category the collector belongs to, if any, or just the null value.
kind
Two kinds of collectors are possible: Performance reporting collectors and Status reporting collectors. The respective paragraphs will describe them and the value of this field.
data
This field contains all the data generated by the specific data collector, in its own independently defined format. The monitoring agent could check this syntactically (according to the JSON specifications) but not semantically.

Here follows a minimal example of a report:

[
{
    "name" : "TheCollectorIdentifier",
    "version" : "1.2",
    "format_version" : 1,
    "timestamp" : 1351607182000000000,
    "category" : null,
    "kind" : 0,
    "data" : { "plugin_specific_data" : "go_here" }
},
{
    "name" : "AnotherDataCollector",
    "version" : "B",
    "format_version" : 7,
    "timestamp" : 1351609526123854000,
    "category" : "storage",
    "kind" : 1,
    "data" : { "status" : { "code" : 1,
                            "message" : "Error on disk 2"
                          },
               "plugin_specific" : "data",
               "some_late_data" : { "timestamp" : 1351609526123942720,
                                    ...
                                  }
             }
}
]

Performance reporting collectors¶

These collectors only provide data about some component of the system, without giving any interpretation over their meaning.

The value of the kind field of the report will be 0.

Status reporting collectors¶

These collectors will provide information about the status of some component of ganeti, or managed by ganeti.

The value of their kind field will be 1.

The rationale behind this kind of collectors is that there are some situations where exporting data about the underlying subsystems would expose potential issues. But if Ganeti itself is able (and going) to fix the problem, conflicts might arise between Ganeti and something/somebody else trying to fix the same problem. Also, some external monitoring systems might not be aware of the internals of a particular subsystem (e.g.: DRBD) and might only exploit the high level response of its data collector, alerting an administrator if anything is wrong. Still, completely hiding the underlying data is not a good idea, as they might still be of use in some cases. So status reporting plugins will provide two output modes: one just exporting a high level information about the status, and one also exporting all the data they gathered. The default output mode will be the status-only one. Through a command line parameter (for stand-alone data collectors) or through the HTTP request to the monitoring agent (when collectors are executed as part of it) the verbose output mode providing all the data can be selected.

When exporting just the status each status reporting collector will provide, in its data section, at least the following field:

status

summarizes the status of the component being monitored and consists of two subfields:

code

It assumes a numeric value, encoded in such a way to allow using a bitset to easily distinguish which states are currently present in the whole cluster. If the bitwise OR of all the status fields is 0, the cluster is completely healty. The status codes are as follows:

0
The collector can determine that everything is working as intended.
1
Something is temporarily wrong but it is being automatically fixed by Ganeti. There is no need of external intervention.
2
The collector has failed to understand whether the status is good or bad. Further analysis is required. Interpret this status as a potentially dangerous situation.
4
The collector can determine that something is wrong and Ganeti has no way to fix it autonomously. External intervention is required.
message

A message to better explain the reason of the status. The exact format of the message string is data collector dependent.

The field is mandatory, but the content can be an empty string if the code is 0 (working as intended) or 1 (being fixed automatically).

If the status code is 2, the message should specify what has gone wrong. If the status code is 4, the message shoud explain why it was not possible to determine a proper status.

The data section will also contain all the fields describing the gathered data, according to a collector-specific format.

Instance status¶

At the moment each node knows which instances are running on it, which instances it is primary for, but not the cause why an instance might not be running. On the other hand we don’t want to distribute full instance “admin” status information to all nodes, because of the performance impact this would have.

As such we propose that:

  • Any operation that can affect instance status will have an optional “reason” attached to it (at opcode level). This can be used for example to distinguish an admin request, from a scheduled maintenance or an automated tool’s work. If this reason is not passed, Ganeti will just use the information it has about the source of the request. This reason information will be structured according to the Ganeti reason trail design document.
  • RPCs that affect the instance status will be changed so that the “reason” and the version of the config object they ran on is passed to them. They will then export the new expected instance status, together with the associated reason and object version to the status report system, which then will export those themselves.

Monitoring and auditing systems can then use the reason to understand the cause of an instance status, and they can use the timestamp to understand the freshness of their data even in the absence of an atomic cross-node reporting: for example if they see an instance “up” on a node after seeing it running on a previous one, they can compare these values to understand which data is freshest, and repoll the “older” node. Of course if they keep seeing this status this represents an error (either an instance continuously “flapping” between nodes, or an instance is constantly up on more than one), which should be reported and acted upon.

The instance status will be on each node, for the instances it is primary for, and its data section of the report will contain a list of instances, named instances, with at least the following fields for each instance:

name
The name of the instance.
uuid
The UUID of the instance (stable on name change).
admin_state
The status of the instance (up/down/offline) as requested by the admin.
actual_state
The actual status of the instance. It can be up, down, or hung if the instance is up but it appears to be completely stuck.
uptime
The uptime of the instance (if it is up, “null” otherwise).
mtime
The timestamp of the last known change to the instance state.
state_reason
The last known reason for state change of the instance, described according to the JSON representation of a reason trail, as detailed in the reason trail design document.
status
It represents the status of the instance, and its format is the same as that of the status field of Status reporting collectors.

Each hypervisor should provide its own instance status data collector, possibly with the addition of more, specific, fields. The category field of all of them will be instance. The kind field will be 1.

Note that as soon as a node knows it’s not the primary anymore for an instance it will stop reporting status for it: this means the instance will either disappear, if it has been deleted, or appear on another node, if it’s been moved.

The code of the status field of the report of the Instance status data collector will be:

0
if status is 0 for all the instances it is reporting about.
1
otherwise.

Storage collectors¶

The storage collectors will be a series of data collectors that will gather data about storage for the current node. The collection will be performed at different granularity and abstraction levels, from the physical disks, to partitions, logical volumes and to the specific storage types used by Ganeti itself (drbd, rbd, plain, file).

The name of each of these collector will reflect what storage type each of them refers to.

The category field of these collector will be storage.

The kind field will depend on the specific collector.

Each storage collector’s data section will provide collector-specific fields.

The various storage collectors will provide keys to join the data they provide, in order to allow the user to get a better understanding of the system. E.g.: through device names, or instance names.

Diskstats collector¶

This storage data collector will gather information about the status of the disks installed in the system, as listed in the /proc/diskstats file. This means that not only physical hard drives, but also ramdisks and loopback devices will be listed.

Its kind in the report will be 0 (Performance reporting collectors).

Its category field in the report will contain the value storage.

When executed in verbose mode, the data section of the report of this collector will be a list of items, each representing one disk, each providing the following fields:

major
The major number of the device.
minor
The minor number of the device.
name
The name of the device.
readsNum
This is the total number of reads completed successfully.
mergedReads
Reads which are adjacent to each other may be merged for efficiency. Thus two 4K reads may become one 8K read before it is ultimately handed to the disk, and so it will be counted (and queued) as only one I/O. This field specifies how often this was done.
secRead
This is the total number of sectors read successfully.
timeRead
This is the total number of milliseconds spent by all reads.
writes
This is the total number of writes completed successfully.
mergedWrites
Writes which are adjacent to each other may be merged for efficiency. Thus two 4K writes may become one 8K read before it is ultimately handed to the disk, and so it will be counted (and queued) as only one I/O. This field specifies how often this was done.
secWritten
This is the total number of sectors written successfully.
timeWrite
This is the total number of milliseconds spent by all writes.
ios
The number of I/Os currently in progress. The only field that should go to zero, it is incremented as requests are given to appropriate struct request_queue and decremented as they finish.
timeIO
The number of milliseconds spent doing I/Os. This field increases so long as field IOs is nonzero.
wIOmillis
The weighted number of milliseconds spent doing I/Os. This field is incremented at each I/O start, I/O completion, I/O merge, or read of these stats by the number of I/Os in progress (field IOs) times the number of milliseconds spent doing I/O since the last update of this field. This can provide an easy measure of both I/O completion time and the backlog that may be accumulating.
Logical Volume collector¶

This data collector will gather information about the attributes of logical volumes present in the system.

Its kind in the report will be 0 (Performance reporting collectors).

Its category field in the report will contain the value storage.

The data section of the report of this collector will be a list of items, each representing one logical volume and providing the following fields:

uuid
The UUID of the logical volume.
name
The name of the logical volume.
attr
The attributes of the logical volume.
major
Persistent major number or -1 if not persistent.
minor
Persistent minor number or -1 if not persistent.
kernel_major
Currently assigned major number or -1 if LV is not active.
kernel_minor
Currently assigned minor number or -1 if LV is not active.
size
Size of LV in bytes.
seg_count
Number of segments in LV.
tags
Tags, if any.
modules
Kernel device-mapper modules required for this LV, if any.
vg_uuid
Unique identifier of the volume group.
vg_name
Name of the volume group.
segtype
Type of LV segment.
seg_start
Offset within the LVto the start of the segment in bytes.
seg_start_pe
Offset within the LV to the start of the segment in physical extents.
seg_size
Size of the segment in bytes.
seg_tags
Tags for the segment, if any.
seg_pe_ranges
Ranges of Physical Extents of underlying devices in lvs command line format.
devices
Underlying devices used with starting extent numbers.
instance
The name of the instance this LV is used by, or null if it was not possible to determine it.
DRBD status¶

This data collector will run only on nodes where DRBD is actually present and it will gather information about DRBD devices.

Its kind in the report will be 1 (Status reporting collectors).

Its category field in the report will contain the value storage.

When executed in verbose mode, the data section of the report of this collector will provide the following fields:

versionInfo

Information about the DRBD version number, given by a combination of any (but at least one) of the following fields:

version
The DRBD driver version.
api
The API version number.
proto
The protocol version.
srcversion
The version of the source files.
gitHash
Git hash of the source files.
buildBy
Who built the binary, and, optionally, when.
device

A list of structures, each describing a DRBD device (a minor) and containing the following fields:

minor
The device minor number.
connectionState
The state of the connection. If it is “Unconfigured”, all the following fields are not present.
localRole
The role of the local resource.
remoteRole
The role of the remote resource.
localState
The status of the local disk.
remoteState
The status of the remote disk.
replicationProtocol
The replication protocol being used.
ioFlags
The input/output flags.
perfIndicators

The performance indicators. This field will contain the following sub-fields:

networkSend
KiB of data sent on the network.
networkReceive
KiB of data received from the network.
diskWrite
KiB of data written on local disk.
diskRead
KiB of date read from the local disk.
activityLog
Number of updates of the activity log.
bitMap
Number of updates to the bitmap area of the metadata.
localCount
Number of open requests to the local I/O subsystem.
pending
Number of requests sent to the partner but not yet answered.
unacknowledged
Number of requests received by the partner but still to be answered.
applicationPending
Num of block input/output requests forwarded to DRBD but that have not yet been answered.
epochs
(Optional) Number of epoch objects. Not provided by all DRBD versions.
writeOrder
(Optional) Currently used write ordering method. Not provided by all DRBD versions.
outOfSync
(Optional) KiB of storage currently out of sync. Not provided by all DRBD versions.
syncStatus

(Optional) The status of the synchronization of the disk. This is present only if the disk is being synchronized, and includes the following fields:

percentage
The percentage of synchronized data.
progress
How far the synchronization is. Written as “x/y”, where x and y are integer numbers expressed in the measurement unit stated in progressUnit
progressUnit
The measurement unit for the progress indicator.
timeToFinish
The expected time before finishing the synchronization.
speed
The speed of the synchronization.
want
The desiderd speed of the synchronization.
speedUnit
The measurement unit of the speed and want values. Expressed as “size/time”.
instance
The name of the Ganeti instance this disk is associated to.

Ganeti daemons status¶

Ganeti will report what information it has about its own daemons. This should allow identifying possible problems with the Ganeti system itself: for example memory leaks, crashes and high resource utilization should be evident by analyzing this information.

The kind field will be 1 (Status reporting collectors).

Each daemon will have its own data collector, and each of them will have a category field valued daemon.

When executed in verbose mode, their data section will include at least:

memory
The amount of used memory.
size_unit
The measurement unit used for the memory.
uptime
The uptime of the daemon.
CPU usage
How much cpu the daemon is using (percentage).

Any other daemon-specific information can be included as well in the data section.

Hypervisor resources report¶

Each hypervisor has a view of system resources that sometimes is different than the one the OS sees (for example in Xen the Node OS, running as Dom0, has access to only part of those resources). In this section we’ll report all information we can in a “non hypervisor specific” way. Each hypervisor can then add extra specific information that is not generic enough be abstracted.

The kind field will be 0 (Performance reporting collectors).

Each of the hypervisor data collectory will be of category: hypervisor.

Node OS resources report¶

Since Ganeti assumes it’s running on Linux, it’s useful to export some basic information as seen by the host system.

The category field of the report will be null.

The kind field will be 0 (Performance reporting collectors).

The data section will include:

cpu_number
The number of available cpus.
cpus
A list with one element per cpu, showing its average load.
memory
The current view of memory (free, used, cached, etc.)
filesystem
A list with one element per filesystem, showing a summary of the total/available space.
NICs
A list with one element per network interface, showing the amount of sent/received data, error rate, IP address of the interface, etc.
versions
A map using the name of a component Ganeti interacts (Linux, drbd, hypervisor, etc) as the key and its version number as the value.

Note that we won’t go into any hardware specific details (e.g. querying a node RAID is outside the scope of this, and can be implemented as a plugin) but we can easily just report the information above, since it’s standard enough across all systems.

Node OS CPU load average report¶

This data collector will export CPU load statistics as seen by the host system. Apart from using the data from an external monitoring system we can also use the data to improve instance allocation and/or the Ganeti cluster balance. To compute the CPU load average we will use a number of values collected inside a time window. The collection process will be done by an independent thread (see Mode of Operation).

This report is a subset of the previous report (Node OS resources report) and they might eventually get merged, once reporting for the other fields (memory, filesystem, NICs) gets implemented too.

Specifically:

The category field of the report will be null.

The kind field will be 0 (Performance reporting collectors).

The data section will include:

cpu_number
The number of available cpus.
cpus
A list with one element per cpu, showing its average load.
cpu_total
The total CPU load average as a sum of the all separate cpus.

The CPU load report function will get N values, collected by the CPU load collection function and calculate the above averages. Please see the section Mode of Operation for more information one how the two functions of the data collector interact.

Format of the query¶

The queries to the monitoring agent will be HTTP GET requests on port 1815. The answer will be encoded in JSON format and will depend on the specific accessed resource.

If a request is sent to a non-existing resource, a 404 error will be returned by the HTTP server.

The following paragraphs will present the existing resources supported by the current protocol version, that is version 1.

/¶

The root resource. It will return the list of the supported protocol version numbers.

Currently, this will include only version 1.

/1¶

Not an actual resource per-se, it is the root of all the resources of protocol version 1.

If requested through GET, the null JSON value will be returned.

/1/list/collectors¶

Returns a list of tuples (kind, category, name) showing all the collectors available in the system.

/1/report/all¶

A list of the reports of all the data collectors, as a JSON list.

Status reporting collectors will provide their output in non-verbose format. The verbose format can be requested by adding the parameter verbose=1 to the request.

/1/report/[category]/[collector_name]¶

Returns the report of the collector [collector_name] that belongs to the specified [category].

The category has to be written in lowercase.

If a collector does not belong to any category, default will have to be used as the value for [category].

Status reporting collectors will provide their output in non-verbose format. The verbose format can be requested by adding the parameter verbose=1 to the request.

Instance disk status propagation¶

As for the instance status Ganeti has now only partial information about its instance disks: in particular each node is unaware of the disk to instance mapping, that exists only on the master.

For this design doc we plan to fix this by changing all RPCs that create a backend storage or that put an already existing one in use and passing the relevant instance to the node. The node can then export these to the status reporting tool.

While we haven’t implemented these RPC changes yet, we’ll use Confd to fetch this information in the data collectors.

Plugin system¶

The monitoring system will be equipped with a plugin system that can export specific local information through it.

The plugin system is expected to be used by local installations to export any installation specific information that they want to be monitored, about either hardware or software on their systems.

The plugin system will be in the form of either scripts or binaries whose output will be inserted in the report.

Eventually support for other kinds of plugins might be added as well, such as plain text files which will be inserted into the report, or local unix or network sockets from which the information has to be read. This should allow most flexibility for implementing an efficient system, while being able to keep it as simple as possible.

Data collectors¶

In order to ease testing as well as to make it simple to reuse this subsystem it will be possible to run just the “data collectors” on each node without passing through the agent daemon.

If a data collector is run independently, it should print on stdout its report, according to the format corresponding to a single data collector report object, as described in the previous paragraphs.

Mode of operation¶

In order to be able to report information fast the monitoring agent daemon will keep an in-memory or on-disk cache of the status, which will be returned when queries are made. The status system will then periodically check resources to make sure the status is up to date.

Different parts of the report will be queried at different speeds. These will depend on: - how often they vary (or we expect them to vary) - how fast they are to query - how important their freshness is

Of course the last parameter is installation specific, and while we’ll try to have defaults, it will be configurable. The first two instead we can use adaptively to query a certain resource faster or slower depending on those two parameters.

When run as stand-alone binaries, the data collector will not using any caching system, and just fetch and return the data immediately.

Since some performance collectors have to operate on a number of values collected in previous times, we need a mechanism independent of the data collector which will trigger the collection of those values and also store them, so that they are available for calculation by the data collectors.

To collect data periodically, a thread will be created by the monitoring agent which will run the collection function of every data collector that provides one. The values returned by the collection function of the data collector will be saved in an appropriate map, associating each value to the corresponding collector, using the collector’s name as the key of the map. This map will be stored in mond’s memory.

For example: the collection function of the CPU load collector will collect a CPU load value and save it in the map mentioned above. The collection function will be called by the collector thread every t milliseconds. When the report function of the collector is called, it will process the last N values of the map and calculate the corresponding average.

Implementation place¶

The status daemon will be implemented as a standalone Haskell daemon. In the future it should be easy to merge multiple daemons into one with multiple entry points, should we find out it saves resources and doesn’t impact functionality.

The libekg library should be looked at for easily providing metrics in json format.

Implementation order¶

We will implement the agent system in this order:

  • initial example data collectors (eg. for drbd and instance status).
  • initial daemon for exporting data, integrating the existing collectors
  • plugin system
  • RPC updates for instance status reasons and disk to instance mapping
  • cache layer for the daemon
  • more data collectors

Future work¶

As a future step it can be useful to “centralize” all this reporting data on a single place. This for example can be just the master node, or all the master candidates. We will evaluate doing this after the first node-local version has been developed and tested.

Another possible change is replacing the “read-only” RPCs with queries to the agent system, thus having only one way of collecting information from the nodes from a monitoring system and for Ganeti itself.

One extra feature we may need is a way to query for only sub-parts of the report (eg. instances status only). This can be done by passing arguments to the HTTP GET, which will be defined when we get to this funtionality.

Finally the autorepair system design. system (see its design) can be expanded to use the monitoring agent system as a source of information to decide which repairs it can perform.

ganeti-2.9.3/doc/html/design-autorepair.html0000644000000000000000000007434112271443665021036 0ustar00rootroot00000000000000 Instance auto-repair — Ganeti 2.9.3 documentation

Instance auto-repair¶

This is a design document detailing the implementation of self-repair and recreation of instances in Ganeti. It also discusses ideas that might be useful for more future self-repair situations.

Current state and shortcomings¶

Ganeti currently doesn’t do any sort of self-repair or self-recreate of instances:

  • If a drbd instance is broken (its primary of secondary nodes go offline or need to be drained) an admin or an external tool must fail it over if necessary, and then trigger a disk replacement.
  • If a plain instance is broken (or both nodes of a drbd instance are) an admin or an external tool must recreate its disk and reinstall it.

Moreover in an oversubscribed cluster operations mentioned above might fail for lack of capacity until a node is repaired or a new one added. In this case an external tool would also need to go through any “pending-recreate” or “pending-repair” instances and fix them.

Proposed changes¶

We’d like to increase the self-repair capabilities of Ganeti, at least with regards to instances. In order to do so we plan to add mechanisms to mark an instance as “due for being repaired” and then the relevant repair to be performed as soon as it’s possible, on the cluster.

The self repair will be written as part of ganeti-watcher or as an extra watcher component that is called less often.

As the first version we’ll only handle the case in which an instance lives on an offline or drained node. In the future we may add more self-repair capabilities for errors ganeti can detect.

New attributes (or tags)¶

In order to know when to perform a self-repair operation we need to know whether they are allowed by the cluster administrator.

This can be implemented as either new attributes or tags. Tags could be acceptable as they would only be read and interpreted by the self-repair tool (part of the watcher), and not by the ganeti core opcodes and node rpcs. The following tags would be needed:

ganeti:watcher:autorepair:<type>¶

(instance/nodegroup/cluster) Allow repairs to happen on an instance that has the tag, or that lives in a cluster or nodegroup which does. Types of repair are in order of perceived risk, lower to higher, and each type includes allowing the operations in the lower ones:

  • fix-storage allows a disk replacement or another operation that fixes the instance backend storage without affecting the instance itself. This can for example recover from a broken drbd secondary, but risks data loss if something is wrong on the primary but the secondary was somehow recoverable.
  • migrate allows an instance migration. This can recover from a drained primary, but can cause an instance crash in some cases (bugs).
  • failover allows instance reboot on the secondary. This can recover from an offline primary, but the instance will lose its running state.
  • reinstall allows disks to be recreated and an instance to be reinstalled. This can recover from primary&secondary both being offline, or from an offline primary in the case of non-redundant instances. It causes data loss.

Each repair type allows all the operations in the previous types, in the order above, in order to ensure a repair can be completed fully. As such a repair of a lower type might not be able to proceed if it detects an error condition that requires a more risky or drastic solution, but never vice versa (if a worse solution is allowed then so is a better one).

If there are multiple ganeti:watcher:autorepair:<type> tags in an object (cluster, node group or instance), the least destructive tag takes precedence. When multiplicity happens across objects, the nearest tag wins. For example, if in a cluster with two instances, I1 and I2, I1 has failover, and the cluster itself has both fix-storage and reinstall, I1 will end up with failover and I2 with fix-storage.

ganeti:watcher:autorepair:suspend[:<timestamp>]¶

(instance/nodegroup/cluster) If this tag is encountered no autorepair operations will start for the instance (or for any instance, if present at the cluster or group level). Any job which already started will be allowed to finish, but then the autorepair system will not proceed further until this tag is removed, or the timestamp passes (in which case the tag will be removed automatically by the watcher).

Note that depending on how this tag is used there might still be race conditions related to it for an external tool that uses it programmatically, as no “lock tag” or tag “test-and-set” operation is present at this time. While this is known we won’t solve these race conditions in the first version.

It might also be useful to easily have an operation that tags all instances matching a filter on some charateristic. But again, this wouldn’t be specific to this tag.

If there are multiple ganeti:watcher:autorepair:suspend[:<timestamp>] tags in an object, the form without timestamp takes precedence (permanent suspension); or, if all object tags have a timestamp, the one with the highest timestamp. When multiplicity happens across objects, the nearest tag wins, as above. This makes it possible to suspend cluster-enabled repairs with a single tag in the cluster object; or to suspend them only for a certain node group or instance. At the same time, it is possible to re-enable cluster-suspended repairs in a particular instance or group by applying an enable tag to them.

ganeti:watcher:autorepair:pending:<type>:<id>:<timestamp>:<jobs>¶

(instance) If this tag is present a repair of type type is pending on the target instance. This means that either jobs are being run, or it’s waiting for resource availability. id is the unique id identifying this repair, timestamp is the time when this tag was first applied to this instance for this id (we will “update” the tag by adding a “new copy” of it and removing the old version as we run more jobs, but the timestamp will never change for the same repair)

jobs is the list of jobs already run or being run to repair the instance (separated by a plus sign, +). If the instance has just been put in pending state but no job has run yet, this list is empty.

This tag will be set by ganeti if an equivalent autorepair tag is present and a a repair is needed, or can be set by an external tool to request a repair as a “once off”.

If multiple instances of this tag are present they will be handled in order of timestamp.

ganeti:watcher:autorepair:result:<type>:<id>:<timestamp>:<result>:<jobs>¶

(instance) If this tag is present a repair of type type has been performed on the instance and has been completed by timestamp. The result is either success, failure or enoperm, and jobs is a +-separated list of jobs that were executed for this repair.

An enoperm result is returned when the repair was brought on until possible, but the repair type doesn’t consent to proceed further.

Possible states, and transitions¶

At any point an instance can be in one of the following health states:

Healthy¶

The instance lives on only online nodes. The autorepair system will never touch these instances. Any repair:pending tags will be removed and marked success with no jobs attached to them.

This state can transition to:

  • Needs-repair, repair disallowed (node offlined or drained, no autorepair tag)
  • Needs-repair, autorepair allowed (node offlined or drained, autorepair tag present)
  • Suspended (a suspend tag is added)

Suspended¶

Whenever a repair:suspend tag is added the autorepair code won’t touch the instance until the timestamp on the tag has passed, if present. The tag will be removed afterwards (and the instance will transition to its correct state, depending on its health and other tags).

Note that when an instance is suspended any pending repair is interrupted, but jobs which were submitted before the suspension are allowed to finish.

Needs-repair, repair disallowed¶

The instance lives on an offline or drained node, but no autorepair tag is set, or the autorepair tag set is of a type not powerful enough to finish the repair. The autorepair system will never touch these instances, and they can transition to:

  • Healthy (manual repair)
  • Pending repair (a repair:pending tag is added)
  • Needs-repair, repair allowed always (an autorepair always tag is added)
  • Suspended (a suspend tag is added)

Needs-repair, repair allowed always¶

A repair:pending tag is added, and the instance transitions to the Pending Repair state. The autorepair tag is preserved.

Of course if a repair:suspended tag is found no pending tag will be added, and the instance will instead transition to the Suspended state.

Pending repair¶

When an instance is in this stage the following will happen:

If a repair:suspended tag is found the instance won’t be touched and moved to the Suspended state. Any jobs which were already running will be left untouched.

If there are still jobs running related to the instance and scheduled by this repair they will be given more time to run, and the instance will be checked again later. The state transitions to itself.

If no jobs are running and the instance is detected to be healthy, the repair:result tag will be added, and the current active repair:pending tag will be removed. It will then transition to the Healthy state if there are no repair:pending tags, or to the Pending state otherwise: there, the instance being healthy, those tags will be resolved without any operation as well (note that this is the same as transitioning to the Healthy state, where repair:pending tags would also be resolved).

If no jobs are running and the instance still has issues:

  • if the last job(s) failed it can either be retried a few times, if deemed to be safe, or the repair can transition to the Failed state. The repair:result tag will be added, and the active repair:pending tag will be removed (further repair:pending tags will not be able to proceed, as explained by the Failed state, until the failure state is cleared)
  • if the last job(s) succeeded but there are not enough resources to proceed, the state will transition to itself and no jobs are scheduled. The tag is left untouched (and later checked again). This basically just delays any repairs, the current pending tag stays active, and any others are untouched).
  • if the last job(s) succeeded but the repair type cannot allow to proceed any further the repair:result tag is added with an enoperm result, and the current repair:pending tag is removed. The instance is now back to “Needs-repair, repair disallowed”, “Needs-repair, autorepair allowed”, or “Pending” if there is already a future tag that can repair the instance.
  • if the last job(s) succeeded and the repair can continue new job(s) can be submitted, and the repair:pending tag can be updated.

Failed¶

If repairing an instance has failed a repair:result:failure is added. The presence of this tag is used to detect that an instance is in this state, and it will not be touched until the failure is investigated and the tag is removed.

An external tool or person needs to investigate the state of the instance and remove this tag when he is sure the instance is repaired and safe to turn back to the normal autorepair system.

(Alternatively we can use the suspended state (indefinitely or temporarily) to mark the instance as “not touch” when we think a human needs to look at it. To be decided).

A graph with the possible transitions follows; note that in the graph, following the implementation, the two Needs repair states have been coalesced into one; and the Suspended state disapears, for it becames an attribute of the instance object (its auto-repair policy).

digraph "auto-repair-states" {
node     [shape=circle, style=filled, fillcolor="#BEDEF1",
          width=2, fixedsize=true];
healthy  [label="Healthy"];
needsrep [label="Needs repair"];
pendrep  [label="Pending repair"];
failed   [label="Failed repair"];
disabled [label="(no state)", width=1.25];

{rank=same; needsrep}
{rank=same; healthy}
{rank=same; pendrep}
{rank=same; failed}
{rank=same; disabled}

// These nodes are needed to be the "origin" of the "initial state" arrows.
node [width=.5, label="", style=invis];
inih;
inin;
inip;
inif;
inix;

edge [fontsize=10, fontname="Arial Bold", fontcolor=blue]

inih -> healthy  [label="No tags or\nresult:success"];
inip -> pendrep  [label="Tag:\nautorepair:pending"];
inif -> failed   [label="Tag:\nresult:failure"];
inix -> disabled [fontcolor=black, label="ArNotEnabled"];

edge [fontcolor="orange"];

healthy -> healthy [label="No problems\ndetected"];

healthy -> needsrep [
           label="Brokeness\ndetected in\nfirst half of\nthe tool run"];

pendrep -> healthy [
           label="All jobs\ncompleted\nsuccessfully /\ninstance healthy"];

pendrep -> failed [label="Some job(s)\nfailed"];

edge [fontcolor="red"];

needsrep -> pendrep [
            label="Repair\nallowed and\ninitial job(s)\nsubmitted"];

needsrep -> needsrep [
            label="Repairs suspended\n(no-op) or enabled\nbut not powerful enough\n(result: enoperm)"];

pendrep -> pendrep [label="More jobs\nsubmitted"];
}

Repair operation¶

Possible repairs are:

  • Replace-disks (drbd, if the secondary is down), (or other storage specific fixes)
  • Migrate (shared storage, rbd, drbd, if the primary is drained)
  • Failover (shared storage, rbd, drbd, if the primary is down)
  • Recreate disks + reinstall (all nodes down, plain, files or drbd)

Note that more than one of these operations may need to happen before a full repair is completed (eg. if a drbd primary goes offline first a failover will happen, then a replce-disks).

The self-repair tool will first take care of all needs-repair instance that can be brought into pending state, and transition them as described above.

Then it will go through any repair:pending instances and handle them as described above.

Note that the repair tool MAY “group” instances by performing common repair jobs for them (eg: node evacuate).

Staging of work¶

First version: recreate-disks + reinstall (2.6.1) Second version: failover and migrate repairs (2.7) Third version: replace disks repair (2.7 or 2.8)

Future work¶

One important piece of work will be reporting what the autorepair system is “thinking” and exporting this in a form that can be read by an outside user or system. In order to do this we need a better communication system than embedding this information into tags. This should be thought in an extensible way that can be used in general for Ganeti to provide “advisory” information about entities it manages, and for an external system to “advise” ganeti over what it can do, but in a less direct manner than submitting individual jobs.

Note that cluster verify checks some errors that are actually instance specific, (eg. a missing backend disk on a drbd node) or node-specific (eg. an extra lvm device). If we were to split these into “instance verify”, “node verify” and “cluster verify”, then we could easily use this tool to perform some of those repairs as well.

Finally self-repairs could also be extended to the cluster level, for example concepts like “N+1 failures”, missing master candidates, etc. or node level for some specific types of errors.

ganeti-2.9.3/doc/html/design-2.9.html0000644000000000000000000001132312271443665017162 0ustar00rootroot00000000000000 Ganeti 2.9 design — Ganeti 2.9.3 documentation

Ganeti 2.9 design¶

The following design documents have been implemented in Ganeti 2.9.

The following designs have been partially implemented in Ganeti 2.9.

Previous topic

Ganeti 2.8 design

Next topic

Design document drafts

This Page

ganeti-2.9.3/doc/html/design-lu-generated-jobs.html0000644000000000000000000002371012271443666022165 0ustar00rootroot00000000000000 Submitting jobs from logical units — Ganeti 2.9.3 documentation

Submitting jobs from logical units¶

This is a design document about the innards of Ganeti’s job processing. Readers are advised to study previous design documents on the topic:

Current state and shortcomings¶

Some Ganeti operations want to execute as many operations in parallel as possible. Examples are evacuating or failing over a node (gnt-node evacuate/gnt-node failover). Without changing large parts of the code, e.g. the RPC layer, to be asynchronous, or using threads inside a logical unit, only a single operation can be executed at a time per job.

Currently clients work around this limitation by retrieving the list of desired targets and then re-submitting a number of jobs. This requires logic to be kept in the client, in some cases leading to duplication (e.g. CLI and RAPI).

Proposed changes¶

The job queue lock is guaranteed to be released while executing an opcode/logical unit. This means an opcode can talk to the job queue and submit more jobs. It then receives the job IDs, like any job submitter using the LUXI interface would. These job IDs are returned to the client, who then will then proceed to wait for the jobs to finish.

Technically, the job queue already passes a number of callbacks to the opcode processor. These are used for giving user feedback, notifying the job queue of an opcode having gotten its locks, and checking whether the opcode has been cancelled. A new callback function is added to submit jobs. Its signature and result will be equivalent to the job queue’s existing SubmitManyJobs function.

Logical units can submit jobs by returning an instance of a special container class with a list of jobs, each of which is a list of opcodes (e.g. [[op1, op2], [op3]]). The opcode processor will recognize instances of the special class when used a return value and will submit the contained jobs. The submission status and job IDs returned by the submission callback are used as the opcode’s result. It should be encapsulated in a dictionary allowing for future extensions.

Example:

{
  "jobs": [
    (True, "8149"),
    (True, "21019"),
    (False, "Submission failed"),
    (True, "31594"),
    ],
}

Job submissions can fail for variety of reasons, e.g. a full or drained job queue. Lists of jobs can not be submitted atomically, meaning some might fail while others succeed. The client is responsible for handling such cases.

Other discussed solutions¶

Instead of requiring the client to wait for the returned jobs, another idea was to do so from within the submitting opcode in the master daemon. While technically possible, doing so would have two major drawbacks:

  • Opcodes waiting for other jobs to finish block one job queue worker thread
  • All locks must be released before starting the waiting process, failure to do so can lead to deadlocks

Instead of returning the job IDs as part of the normal opcode result, introducing a new opcode field, e.g. op_jobids, was discussed and dismissed. A new field would touch many areas and possibly break some assumptions. There were also questions about the semantics.

Table Of Contents

Previous topic

Linux HA integration

Next topic

Ganeti monitoring agent

This Page

ganeti-2.9.3/doc/html/design-openvswitch.html0000644000000000000000000002307112271443667021230 0ustar00rootroot00000000000000 Support for Open vSwitch — Ganeti 2.9.3 documentation

Support for Open vSwitch¶

This is a design document detailing the implementation of support for Open vSwitch in the Ganeti tool chain.

Current state and shortcomings¶

At the moment Ganeti’s support for Open vSwitch is very basic and limited to connecting instances to an existing vSwitch.

The shortcomings of this approach are:

  1. The full functionality (VLANs, QoS and trunking) of Open vSwitch is not used.
  2. Open vSwitch cannot be managed centrally.

Proposed changes¶

  1. Implement functions into gnt-network to manage Open vSwitch through Ganeti gnt-network should be able to create, modify and delete vSwitches. The resulting configuration shall automatically be done on all members of the node group. Connecting Ethernet devices to vSwitches should be managed through this interface as well.
  2. Implement VLAN-capabilities: Instances shall have additional information for every NIC: VLAN-ID and port type. These are used to determine their type of connection to Open vSwitch. This will require modifying the methods for instance creation and modification
  3. Implement NIC bonding: Functions to bond NICs for performance improvement, load-balancing and failover should be added. It is preferable to have a configuration option to determine the type of the trunk, as there are different types of trunks (LACP dynamic and static, different failover and load-balancing mechanisms)
  4. Set QoS level on per instance basis: Instances shall have an additional information: maximum bandwidth and maximum burst. This helps to balance the bandwidth needs between the VMs and to ensure fair sharing of the bandwidth.

Configuration changes for VLANs¶

nicparams shall be extended by a value “vlan” that will store the VLAN information for each NIC. This parameter will only be used if nicparams[constants.NIC_MODE] == constants.NIC_MODE_OVS, since it doesn’t make sense in other modes.

Each VLAN the NIC belongs to shall be stored in this single value. The format of storing this information is the same as the one which is used in Xen 4.3, since Xen 4.3 comes with functionality to support OpenvSwitch.

This parameter will, at first, only be implemented for Xen and will have no effects on other hypervisors. Support for KVM will be added in the future.

Example: switch1 will connect the VM to the default VLAN of the switch1. switch1.3 means that the VM is connected to an access port of VLAN 3. switch1.2:10:20 means that the VM is connected to a trunk port on switch1, carrying VLANs 2, 10 and 20.

This configuration string is split at the dot and stored in nicparams[constants.NIC_LINK] and nicparams[constants.NIC_VLAN] respectively.

For Xen hypervisors, this information can be concatenated again and stored in the vif config as the bridge parameter and will be fully compatible with vif-openvswitch as of Xen 4.3.

Users of older Xen versions should be able to grab vif-openvswitch from the Xen repo and use it (tested in 4.2).

The differentiation between access port and trunk port is given by the number of VLANs that are specified.

gnt-instance modify shall be able to add or remove single VLANs from the vlan string without users needing to specify the complete new string.

Configuration changes for QoS¶

Instances shall be extended with configuration options for

  • maximum bandwidth
  • maximum burst rate

New configuration objects need to be created for the Open vSwitch configuration.

All these configuration changes need to be made available on the whole node group.

Table Of Contents

Previous topic

GlusterFS Ganeti support

Next topic

Ganeti daemons refactoring

This Page

ganeti-2.9.3/doc/html/design-glusterfs-ganeti-support.html0000644000000000000000000002624612271443666023662 0ustar00rootroot00000000000000 GlusterFS Ganeti support — Ganeti 2.9.3 documentation

GlusterFS Ganeti support¶

This document describes the plan for adding GlusterFS support inside Ganeti.

Objective¶

The aim is to let Ganeti support GlusterFS as one of its backend storage. This includes three aspects to finish:

  • Add Gluster as a storage backend.
  • Make sure Ganeti VMs can use GlusterFS backends in userspace mode (for newer QEMU/KVM which has this support) and otherwise, if possible, through some kernel exported block device.
  • Make sure Ganeti can configure GlusterFS by itself, by just joining storage space on new nodes to a GlusterFS nodes pool. Note that this may need another design document that explains how it interacts with storage pools, and that the node might or might not host VMs as well.

Background¶

There are two possible ways to implement “GlusterFS Ganeti Support”. One is GlusterFS as one of external backend storage, the other one is realizing GlusterFS inside Ganeti, that is, as a new disk type for Ganeti. The benefit of the latter one is that it would not be opaque but fully supported and integrated in Ganeti, which would not need to add infrastructures for testing/QAing and such. Having it internal we can also provide a monitoring agent for it and more visibility into what’s going on. For these reasons, GlusterFS support will be added directly inside Ganeti.

Implementation Plan¶

Ganeti Side¶

To realize an internal storage backend for Ganeti, one should realize BlockDev class in ganeti/lib/storage/base.py that is a specific class including create, remove and such. These functions should be realized in ganeti/lib/storage/bdev.py. Actually, the differences between implementing inside and outside (external) Ganeti are how to finish these functions in BlockDev class and how to combine with Ganeti itself. The internal implementation is not based on external scripts and combines with Ganeti in a more compact way. RBD patches may be a good reference here. Adding a backend storage steps are as follows:

  • Implement the BlockDev interface in bdev.py.
  • Add the logic in cmdlib (eg, migration, verify).
  • Add the new storage type name to constants.
  • Modify objects.Disk to support GlusterFS storage type.
  • The implementation will be performed similarly to the RBD one (see commit 7181fba).

GlusterFS side¶

GlusterFS is a distributed file system implemented in user space. The way to access GlusterFS namespace is via FUSE based Gluster native client except NFS and CIFS. The efficiency of this way is lower because the data would be pass the kernel space and then come to user space. Now, there are two specific enhancements:

  • A new library called libgfapi is now available as part of GlusterFS that provides POSIX-like C APIs for accessing Gluster volumes. libgfapi support will be available from GlusterFS-3.4 release.
  • QEMU/KVM (starting from QEMU-1.3) will have GlusterFS block driver that uses libgfapi and hence there is no FUSE overhead any longer when QEMU/KVM works with VM images on Gluster volumes.

Proposed implementation¶

QEMU/KVM includes support for GlusterFS and Ganeti could support GlusterFS through QEMU/KVM. However, this way could just let VMs of QEMU/KVM use GlusterFS backend storage but not other VMs like XEN and such. There are two parts that need to be implemented for supporting GlusterFS inside Ganeti so that it can not only support QEMU/KVM VMs, but also XEN and other VMs. One part is GlusterFS for XEN VM, which is similar to sharedfile disk template. The other part is GlusterFS for QEMU/KVM VM, which is supported by the GlusterFS driver for QEMU/KVM. After gnt-instance add -t gluster instance.example.com command is executed, the added instance should be checked. If the instance is a XEN VM, it would run the GlusterFS sharedfile way. However, if the instance is a QEMU/KVM VM, it would run the QEMU/KVM + GlusterFS way. For the first part (GlusterFS for XEN VMs), sharedfile disk template would be a good reference. For the second part (GlusterFS for QEMU/KVM VMs), RBD disk template would be a good reference. The first part would be finished at first and then the second part would be completed, which is based on the first part.

ganeti-2.9.3/doc/html/devnotes.html0000644000000000000000000005123712271443670017236 0ustar00rootroot00000000000000 Developer notes — Ganeti 2.9.3 documentation

Developer notes¶

Build dependencies¶

Most dependencies from Ganeti quick installation guide, including qemu-img (marked there as optional) plus (for Python):

For older developement (Ganeti < 2.4) docbook was used instead of pandoc.

Note that for pylint, at the current moment the following versions must be used:

$ pylint --version
pylint 0.26.0,
astng 0.24.1, common 0.58.3

The same with pep8, other versions may give you errors:

$ pep8 --version
1.3.3

Both these versions are the ones shipped with Ubuntu 13.04.

To generate unittest coverage reports (make coverage), coverage needs to be installed.

Installation of all dependencies listed here:

$ apt-get install python-setuptools automake git fakeroot
$ apt-get install pandoc python-epydoc graphviz
$ apt-get install python-yaml python-mock
$ cd / && sudo easy_install \
          sphinx \
          logilab-astng==0.24.1 \
          logilab-common==0.58.3 \
          pylint==0.26.0 \
          pep8==1.3.3 \
          coverage

For Haskell development, again all things from the quick install document, plus:

  • haddock, documentation generator (equivalent to epydoc for Python)
  • HsColour, again used for documentation (it’s source-code pretty-printing)
  • hlint, a source code linter (equivalent to pylint for Python), recommended version 1.8 or above (tested with 1.8.43)
  • the QuickCheck library, version 2.x
  • the HUnit library (tested with 1.2.x)
  • the test-framework libraries, tested versions: test-framework: 0.6, test-framework-hunit: 0.2.7, test-framework-quickcheck2: 0.2.12.1
  • hpc, which comes with the compiler, so you should already have it
  • shelltestrunner, used for running shell-based unit-tests
  • temporary library, tested with version 1.1.2.3

Under Debian Wheezy or later, these can be installed (on top of the required ones from the quick install document) via:

$ apt-get install libghc-quickcheck2-dev libghc-hunit-dev \
      libghc-test-framework-dev \
      libghc-test-framework-quickcheck2-dev \
      libghc-test-framework-hunit-dev \
      libghc-temporary-dev \
      hscolour hlint

Or alternatively via cabal:

$ cabal install QuickCheck HUnit \
        test-framework test-framework-quickcheck2 test-framework-hunit \
        temporary hscolour hlint shelltestrunner

Configuring for development¶

Run the following command (only use PYTHON=... if you need to use a different python version):

$ ./autogen.sh && \
  ./configure --prefix=/usr/local --sysconfdir=/etc --localstatedir=/var

Note that doing development on a machine which already has Ganeti installed is problematic, as PYTHONPATH behaviour can be confusing (see Issue 170 for a bit of history/details; in general it works if the installed and developed versions are very similar, and/or if PYTHONPATH is customised correctly). As such, in general it’s recommended to use a “clean” machine for ganeti development.

Haskell development notes¶

There are a few things which can help writing or debugging the Haskell code.

You can run the Haskell linter hlint via:

$ make hlint

This is not enabled by default (as the htools component is optional). The above command will generate both output on the terminal and, if any warnings are found, also an HTML report at doc/hs-lint.html.

When writing or debugging TemplateHaskell code, it’s useful to see what the splices are converted to. This can be done via:

$ make HEXTRA="-ddump-splices"

Or, more interactively:

$ ghci
λ> :set -ddump-splices
λ> :l src/Ganeti/Objects.hs

And you will get the spliced code as the module is loaded.

To build profiling code you must install the ghc-prof (or gch6-prof) package, and all the relevant libraries with their -prof counterparts. If installing libraries through cabal the config file should include library-profiling: True or the -p flag should be used. Any library already installed can be updated by passing --reinstall as well.

Due to the way TemplateHaskell works, it’s not straightforward to build profiling code. The recommended way is to run make hs-prof, or alternatively the manual sequence is:

$ make clean
$ make src/htools HEXTRA="-osuf .o"
$ rm src/htools
$ make src/htools HEXTRA="-osuf .prof_o -prof -auto-all"

This will build the binary twice, per the TemplateHaskell documentation, the second one with profiling enabled.

The binary files generated by compilation and the profiling/coverage files can “break” tab-completion in the sources; they can be ignored, for example, in bash via .bashrc:

FIGNORE='.o:.hi:.prof_o:.tix'

or in emacs via completion-ignored-extensions (run M-x customize-var completion-ignored-extensions).

Running individual tests¶

When developing code, running the entire test suite can be slow. Running individual tests is possible. There are different Makefile targets for running individual Python and Haskell tests.

For Python tests:

$ export PYTHONPATH=$PWD
$ python ./test/py/ganeti.mytest

For Haskell tests:

$ make hs-test-pattern

Where pattern can be a simple test pattern (e.g. comma, matching any test whose name contains comma), a test pattern denoting a group (ending with a slash, e.g. Utils/), or more complex glob pattern. For more details, search for glob patterns in the documentation of test-framework).

For individual Haskell shelltests:

$ make hs-shell-name

which runs the test test/hs/shelltests/htools-%name%.test. For example, to run the test test/hs/shelltests/htools-balancing.test, use:

$ make hs-shell-balancing

For combined Haskell shelltests:

$ make hs-shell-{name1,name2,...}

for example:

$ make hs-shell-{balancing,basic}

Packaging notes¶

Ganeti is mostly developed and tested on Debian-based distributions, while still keeping adaptability to other Linux distributions in mind.

The doc/examples/ directory contains a number of potentially useful scripts and configuration files. Some of them might need adjustment before use.

daemon-util¶

This script, in the source code as daemons/daemon-util.in, is used to start/stop Ganeti and do a few other things related to system daemons. It is recommended to use daemon-util also from the system’s init scripts. That way the code starting and stopping daemons is shared and future changes have to be made in only one place.

daemon-util reads extra arguments from variables (*_ARGS) in /etc/default/ganeti. When modifying daemon-util, keep in mind to not remove support for the EXTRA_*_ARGS variables for starting daemons. Some parts of Ganeti use them to pass additional arguments when starting a daemon.

The reload_ssh_keys function can be adjusted to use another command for reloading the OpenSSH daemon’s host keys.

Table Of Contents

Previous topic

Design for virtual clusters support

Next topic

Glossary

This Page

ganeti-2.9.3/doc/html/design-node-add.html0000644000000000000000000003437212271443667020340 0ustar00rootroot00000000000000 Design for adding a node to a cluster — Ganeti 2.9.3 documentation

Design for adding a node to a cluster¶

Current state and shortcomings¶

Before a node can be added to a cluster, its SSH daemon must be re-configured to use the cluster-wide SSH host key. Ganeti 2.3.0 changed the way this is done by moving all related code to a separate script, tools/setup-ssh, using Paramiko. Before all such configuration was done from lib/bootstrap.py using the system’s own SSH client and a shell script given to said client through parameters.

Both solutions controlled all actions on the connecting machine; the newly added node was merely executing commands. This implies and requires a tight coupling and equality between nodes (e.g. paths to files being the same). Most of the logic and error handling is also done on the connecting machine.

Once a node’s SSH daemon has been configured, more than 25 files need to be copied using scp before the node daemon can be started. No verification is being done before files are copied. Once the node daemon is started, an opcode is submitted to the master daemon, which will then copy more files, such as the configuration and job queue for master candidates, using RPC. This process is somewhat fragile and requires initiating many SSH connections.

Proposed changes¶

SSH¶

The main goal is to move more logic to the newly added node. Instead of having a relatively large script executed on the master node, most of it is moved over to the added node.

A new script named prepare-node-join is added. It receives a JSON data structure (defined below) on its standard input. Once the data has been successfully decoded, it proceeds to configure the local node’s SSH daemon and root’s SSH settings, after which the SSH daemon is restarted.

All the master node has to do to add a new node is to gather all required data, build the data structure, and invoke the script on the node to be added. This will enable us to once again use the system’s own SSH client and to drop the dependency on Paramiko for Ganeti itself (ganeti-listrunner is going to continue using Paramiko).

Eventually setup-ssh can be removed.

Node daemon¶

Similar to SSH setup changes, the process of copying files and starting the node daemon will be moved into a dedicated program. On its standard input it will receive a standardized JSON structure (defined below). Once the input data has been successfully decoded and the received values were verified for sanity, the program proceeds to write the values to files and then starts the node daemon (ganeti-noded).

To add a new node to the cluster, the master node will have to gather all values, build the data structure, and then invoke the newly added node-daemon-setup program via SSH. In this way only a single SSH connection is needed and the values can be verified before being written to files.

If the program exits successfully, the node is ready to be added to the master daemon’s configuration. The node daemon will be running, but OpNodeAdd needs to be run before it becomes a full node. The opcode will copy more files, such as the RAPI certificate.

Data structures¶

JSON structure for SSH setup¶

The data is given in an object containing the keys described below. Unless specified otherwise, all entries are optional.

cluster_name
Required string with the cluster name. If a local cluster name is found, the join process is aborted unless the passed cluster name matches the local name.
node_daemon_certificate
Public part of cluster’s node daemon certificate in PEM format. If a local node certificate and key is found, the join process is aborted unless this passed public part can be verified with the local key.
ssh_host_key
List containing public and private parts of SSH host key. See below for definition.
ssh_root_key
List containing public and private parts of root’s key for SSH authorization. See below for definition.

Lists of SSH keys use a tuple with three values. The first describes the key variant (rsa or dsa). The second and third are the private and public part of the key. Example:

[
  ("rsa", "-----BEGIN RSA PRIVATE KEY-----...", "ssh-rss AAAA..."),
  ("dsa", "-----BEGIN DSA PRIVATE KEY-----...", "ssh-dss AAAA..."),
]

JSON structure for node daemon setup¶

The data is given in an object containing the keys described below. Unless specified otherwise, all entries are optional.

cluster_name
Required string with the cluster name. If a local cluster name is found, the join process is aborted unless the passed cluster name matches the local name. The cluster name is also included in the dictionary given via the ssconf entry.
node_daemon_certificate
Public and private part of cluster’s node daemon certificate in PEM format. If a local node certificate is found, the process is aborted unless it matches.
ssconf

Dictionary with ssconf names and their values. Both are strings. Example:

{
  "cluster_name": "cluster.example.com",
  "master_ip": "192.168.2.1",
  "master_netdev": "br0",
  # …
}
start_node_daemon
Boolean denoting whether the node daemon should be started (or restarted if it was running for some reason).
ganeti-2.9.3/doc/html/locking.html0000644000000000000000000001773012271443671017036 0ustar00rootroot00000000000000 Ganeti locking — Ganeti 2.9.3 documentation

Ganeti locking¶

Introduction¶

This document describes lock order dependencies in Ganeti. It is divided by functional sections

Opcode Execution Locking¶

These locks are declared by Logical Units (LUs) (in cmdlib.py) and acquired by the Processor (in mcpu.py) with the aid of the Ganeti Locking Library (locking.py). They are acquired in the following order:

  • BGL: this is the Big Ganeti Lock, it exists for retrocompatibility. New LUs acquire it in a shared fashion, and are able to execute all toghether (baring other lock waits) while old LUs acquire it exclusively and can only execute one at a time, and not at the same time with new LUs.
  • Instance locks: can be declared in ExpandNames() or DeclareLocks() by an LU, and have the same name as the instance itself. They are acquired as a set. Internally the locking library acquired them in alphabetical order.
  • Node locks: can be declared in ExpandNames() or DeclareLocks() by an LU, and have the same name as the node itself. They are acquired as a set. Internally the locking library acquired them in alphabetical order. Given this order it’s possible to safely acquire a set of instances, and then the nodes they reside on.

The ConfigWriter (in config.py) is also protected by a SharedLock, which is shared by functions that read the config and acquired exclusively by functions that modify it. Since the ConfigWriter calls rpc.call_upload_file to all nodes to distribute the config without holding the node locks, this call must be able to execute on the nodes in parallel with other operations (but not necessarily concurrently with itself on the same file, as inside the ConfigWriter this is called with the internal config lock held.

Job Queue Locking¶

The job queue is designed to be thread-safe. This means that its public functions can be called from any thread. The job queue can be called from functions called by the queue itself (e.g. logical units), but special attention must be paid not to create deadlocks or an invalid state.

The single queue lock is used from all classes involved in the queue handling. During development we tried to split locks, but deemed it to be too dangerous and difficult at the time. Job queue functions acquiring the lock can be safely called from all the rest of the code, as the lock is released before leaving the job queue again. Unlocked functions should only be called from job queue related classes (e.g. in jqueue.py) and the lock must be acquired beforehand.

In the job queue worker (_JobQueueWorker), the lock must be released before calling the LU processor. Otherwise a deadlock can occur when log messages are added to opcode results.

Node Daemon Locking¶

The node daemon contains a lock for the job queue. In order to avoid conflicts and/or corruption when an eventual master daemon or another node daemon is running, it must be held for all job queue operations

There’s one special case for the node daemon running on the master node. If grabbing the lock in exclusive fails on startup, the code assumes all checks have been done by the process keeping the lock.

Table Of Contents

Previous topic

Ganeti quick installation guide

Next topic

Man pages

This Page

ganeti-2.9.3/doc/html/design-reason-trail.html0000644000000000000000000003027512271443667021263 0ustar00rootroot00000000000000 Ganeti reason trail — Ganeti 2.9.3 documentation

Ganeti reason trail¶

This is a design document detailing the implementation of a way for Ganeti to track the origin and the reason of every executed command, from its starting point (command line, remote API, some htool, etc.) to its actual execution time.

Current state and shortcomings¶

There is currently no way to track why a job and all the operations part of it were executed, and who or what triggered the execution. This is an inconvenience in general, and also it makes impossible to have certain information, such as finding the reason why an instance last changed its status (i.e.: why it was started/stopped/rebooted/etc.), or distinguishing an admin request from a scheduled maintenance or an automated tool’s work.

Proposed changes¶

We propose to introduce a new piece of information, that will be called “reason trail”, to track the path from the issuing of a command to its execution.

The reason trail will be a list of 3-tuples (source, reason, timestamp), with:

source
The entity deciding to perform (or forward) a command. It is represented by an arbitrary string, but strings prepended by “gnt:” are reserved for Ganeti components, and they will be refused by the interfaces towards the external world.
reason
The reason why the entity decided to perform the operation. It is represented by an arbitrary string. The string might possibly be empty, because certain components of the system might just “pass on” the operation (therefore wanting to be recorded in the trail) but without an explicit reason.
timestamp
The time when the element was added to the reason trail. It has to be expressed in nanoseconds since the unix epoch (0:00:00 January 01, 1970). If not enough precision is available (or needed) it can be padded with zeroes.

The reason trail will be attached at the OpCode level. When it has to be serialized externally (such as on the RAPI interface), it will be serialized in JSON format. Specifically, it will be serialized as a list of elements. Each element will be a list with two strings (for source and reason) and one integer number (the timestamp).

Any component the operation goes through is allowed (but not required) to append it’s own reason to the list. Other than this, the list shouldn’t be modified.

As an example here is the reason trail for a shutdown operation invoked from the command line through the gnt-instance tool:

[("user", "Cleanup of unused instances", 1363088484000000000),
 ("gnt:client:gnt-instance", "stop", 1363088484020000000),
 ("gnt:opcode:shutdown", "job=1234;index=0", 1363088484026000000),
 ("gnt:daemon:noded:shutdown", "", 1363088484135000000)]

where the first 3-tuple is determined by a user-specified message, passed to gnt-instance through a command line parameter.

The same operation, launched by an external GUI tool, and executed through the remote API, would have a reason trail like:

[("user", "Cleanup of unused instances", 1363088484000000000),
 ("other-app:tool-name", "gui:stop", 1363088484000300000),
 ("gnt:client:rapi:shutdown", "", 1363088484020000000),
 ("gnt:library:rlib2:shutdown", "", 1363088484023000000),
 ("gnt:opcode:shutdown", "job=1234;index=0", 1363088484026000000),
 ("gnt:daemon:noded:shutdown", "", 1363088484135000000)]

Implementation¶

The OpCode base class will be modified to include a new parameter, “reason”. This will receive the reason trail as built by all the previous steps.

When an OpCode is added to a job (in jqueue.py) the job number and the opcode index will be recorded as the reason for the existence of that opcode.

From the command line tools down to the opcodes, the implementation of this design will be shared by all the components of the system. After the opcodes have been enqueued in a job queue and are dispatched for execution, the implementation will have to be OpCode specific because of the current structure of the ganeti backend.

The implementation of opcode-specific parts will start from the operations that affect the instance status (as required by the design document about the monitoring daemon, for the instance status data collector). Such opcodes will be changed so that the “reason” is passed to them and they will then export the reason trail on a file.

The implementation for other opcodes will follow when required.

Table Of Contents

Previous topic

Query version 2 design

Next topic

Design for executing commands via RPC

This Page

ganeti-2.9.3/doc/html/design-network.html0000644000000000000000000006000512271443667020346 0ustar00rootroot00000000000000 Network management — Ganeti 2.9.3 documentation

Network management¶

This is a design document detailing the implementation of network resource management in Ganeti.

Current state and shortcomings¶

Currently Ganeti supports two configuration modes for instance NICs: routed and bridged mode. The ip NIC parameter, which is mandatory for routed NICs and optional for bridged ones, holds the given NIC’s IP address and may be filled either manually, or via a DNS lookup for the instance’s hostname.

This approach presents some shortcomings:

  1. It relies on external systems to perform network resource management. Although large organizations may already have IP pool management software in place, this is not usually the case with stand-alone deployments. For smaller installations it makes sense to allocate a pool of IP addresses to Ganeti and let it transparently assign these IPs to instances as appropriate.

  2. The NIC network information is incomplete, lacking netmask and gateway. Operating system providers could for example use the complete network information to fully configure an instance’s network parameters upon its creation.

    Furthermore, having full network configuration information would enable Ganeti nodes to become more self-contained and be able to infer system configuration (e.g. /etc/network/interfaces content) from Ganeti configuration. This should make configuration of newly-added nodes a lot easier and less dependant on external tools/procedures.

  3. Instance placement must explicitly take network availability in different node groups into account; the same link is implicitly expected to connect to the same network across the whole cluster, which may not always be the case with large clusters with multiple node groups.

Proposed changes¶

In order to deal with the above shortcomings, we propose to extend Ganeti with high-level network management logic, which consists of a new NIC slot called network, a new Network configuration object (cluster level) and logic to perform IP address pool management, i.e. maintain a set of available and occupied IP addresses.

Configuration changes¶

We propose the introduction of a new high-level Network object, containing (at least) the following data:

  • Symbolic name
  • UUID
  • Network in CIDR notation (IPv4 + IPv6)
  • Default gateway, if one exists (IPv4 + IPv6)
  • IP pool management data (reservations)
  • Default NIC connectivity mode (bridged, routed). This is the functional equivalent of the current NIC mode.
  • Default host interface (e.g. br0). This is the functional equivalent of the current NIC link.
  • Tags

Each network will be connected to any number of node groups. During the connection of a network to a nodegroup, we define the corresponding connectivity mode (bridged or routed) and the host interface (br100 or routing_table_200). This is achieved by adding a networks slot to the NodeGroup object and using the networks’ UUIDs as keys. The value for each key is a dictionary containing the network’s mode and link (netparams). Every NIC assigned to the network will eventually inherit the network’s netparams, as its nicparams.

IP pool management¶

A new helper library is introduced, wrapping around Network objects to give IP pool management capabilities. A network’s pool is defined by two bitfields, the length of the network size each:

reservations
This field holds all IP addresses reserved by Ganeti instances, as well as cluster IP addresses (node addresses + cluster master)
external reservations
This field holds all IP addresses that are manually reserved by the administrator, because some other equipment is using them outside the scope of Ganeti.

The bitfields are implemented using the python-bitarray package for space efficiency and their binary value stored base64-encoded for JSON compatibility. This approach gives relatively compact representations even for large IPv4 networks (e.g. /20).

Ganeti-owned IP addresses (node + master IPs) are reserved automatically if the cluster’s data network itself is placed under pool management.

Helper ConfigWriter methods provide free IP address generation and reservation, using a TemporaryReservationManager.

It should be noted that IP pool management is performed only for IPv4 networks, as they are expected to be densely populated. IPv6 networks can use different approaches, e.g. sequential address asignment or EUI-64 addresses.

New NIC parameter: network¶

In order to be able to use the new network facility while maintaining compatibility with the current networking model, a new NIC parameter is introduced, called network to reflect the fact that the given NIC belongs to the given network and its configuration is managed by Ganeti itself. To keep backwards compatibility, existing code is executed if the network value is ‘none’ or omitted during NIC creation. If we want our NIC to be assigned to a network, then only the ip (optional) and the network parameters should be passed. Mode and link are inherited from the network-nodegroup mapping configuration (netparams). This provides the desired abstraction between the VM’s network and the node-specific underlying infrastructure.

We also introduce a new ip address value, constants.NIC_IP_POOL, that specifies that a given NIC’s IP address should be obtained using the IP address pool of the specified network. This value is only valid for NICs belonging to a network. A NIC’s IP address can also be specified manually, as long as it is contained in the network the NIC is connected to.

Hooks¶

Introduce new hooks concerning network operations:

OP_NETWORK_ADD

Add a network to Ganeti

directory:network-add
pre-execution:master node
post-execution:master node
OP_NETWORK_REMOVE

Remove a network from Ganeti

directory:network-remove
pre-execution:master node
post-execution:master node
OP_NETWORK_SET_PARAMS

Modify a network

directory:network-modify
pre-execution:master node
post-execution:master node

For connect/disconnect operations use existing:

OP_GROUP_SET_PARAMS

Modify a nodegroup

directory:group-modify
pre-execution:master node
post-execution:master node
Hook variables¶

During instance related operations:

INSTANCE_NICn_NETWORK
The friendly name of the network

During network related operations:

NETWORK_NAME
The friendly name of the network
NETWORK_SUBNET
The ip range of the network
NETWORK_GATEWAY
The gateway of the network

During nodegroup related operations:

GROUP_NETWORK
The friendly name of the network
GROUP_NETWORK_MODE
The mode (bridged or routed) of the netparams
GROUP_NETWORK_LINK
The link of the netparams

Backend changes¶

To keep the hypervisor-visible changes to a minimum, and maintain compatibility with the existing network configuration scripts, the instance’s hypervisor configuration will have host-level mode and link replaced by the connectivity mode and host interface (netparams) of the given network on the current node group.

Network configuration scripts detect if a NIC is assigned to a Network by the presence of the new environment variable:

Network configuration script variables¶
NETWORK
The friendly name of the network

Conflicting IPs¶

To ensure IP uniqueness inside a nodegroup, we introduce the term conflicting ips. Conflicting IPs occur: (a) when creating a networkless NIC with IP contained in a network already connected to the instance’s nodegroup (b) when connecting/disconnecting a network to/from a nodegroup and at the same time instances with IPs inside the network’s range still exist. Conflicting IPs produce prereq errors.

Handling of conflicting IP with –force option:

For case (a) reserve the IP and assign the NIC to the Network. For case (b) during connect same as (a), during disconnect release IP and reset NIC’s network parameter to None

Userland interface¶

A new client script is introduced, gnt-network, which handles network-related configuration in Ganeti.

Network addition/deletion¶
gnt-network add --network=192.168.100.0/28 --gateway=192.168.100.1 \
                --network6=2001:db8:2ffc::/64 --gateway6=2001:db8:2ffc::1 \
                --add-reserved-ips=192.168.100.10,192.168.100.11 net100
 (Checks for already exising name and valid IP values)
gnt-network remove network_name
 (Checks if not connected to any nodegroup)
Network modification¶
gnt-network modify --gateway=192.168.100.5 net100
 (Changes the gateway only if ip is available)
gnt-network modify --add-reserved-ips=192.168.100.11 net100
 (Adds externally reserved ip)
gnt-network modify --remove-reserved-ips=192.168.100.11 net100
 (Removes externally reserved ip)
Assignment to node groups¶
gnt-network connect net100 nodegroup1 bridged br100
 (Checks for existing bridge among nodegroup)
gnt-network connect net100 nodegroup2 routed rt_table
 (Checks for conflicting IPs)
gnt-network disconnect net101 nodegroup1
 (Checks for conflicting IPs)
Network listing¶
gnt-network list

Network      Subnet           Gateway       NodeGroups GroupList
net100       192.168.100.0/28 192.168.100.1          1 default(bridged, br100)
net101       192.168.101.0/28 192.168.101.1          1 default(routed, rt_tab)
Network information¶
gnt-network info testnet1

Network name: testnet1
 subnet: 192.168.100.0/28
 gateway: 192.168.100.1
 size: 16
 free: 10 (62.50%)
 usage map:
       0 XXXXX..........X                                                 63
         (X) used    (.) free
 externally reserved IPs:
   192.168.100.0, 192.168.100.1, 192.168.100.15
 connected to node groups:
   default(bridged, br100)
 used by 3 instances:
   test1 : 0:192.168.100.4
   test2 : 0:192.168.100.2
   test3 : 0:192.168.100.3

IAllocator changes¶

The IAllocator protocol can be made network-aware, i.e. also consider network availability for node group selection. Networks, as well as future shared storage pools, can be seen as constraints used to rule out the placement on certain node groups.

ganeti-2.9.3/doc/html/design-htools-2.3.html0000644000000000000000000005250712271443666020474 0ustar00rootroot00000000000000 Synchronising htools to Ganeti 2.3 — Ganeti 2.9.3 documentation

Synchronising htools to Ganeti 2.3¶

Ganeti 2.3 introduces a number of new features that change the cluster internals significantly enough that the htools suite needs to be updated accordingly in order to function correctly.

Shared storage support¶

Currently, the htools algorithms presume a model where all of an instance’s resources is served from within the cluster, more specifically from the nodes comprising the cluster. While is this usual for memory and CPU, deployments which use shared storage will invalidate this assumption for storage.

To account for this, we need to move some assumptions from being implicit (and hardcoded) to being explicitly exported from Ganeti.

New instance parameters¶

It is presumed that Ganeti will export for all instances a new storage_type parameter, that will denote either internal storage (e.g. plain or drbd), or external storage.

Furthermore, a new storage_pool parameter will classify, for both internal and external storage, the pool out of which the storage is allocated. For internal storage, this will be either lvm (the pool that provides space to both plain and drbd instances) or file (for file-storage-based instances). For external storage, this will be the respective NAS/SAN/cloud storage that backs up the instance. Note that for htools, external storage pools are opaque; we only care that they have an identifier, so that we can distinguish between two different pools.

If these two parameters are not present, the instances will be presumed to be internal/lvm.

New node parameters¶

For each node, it is expected that Ganeti will export what storage types it supports and pools it has access to. So a classic 2.2 cluster will have all nodes supporting internal/lvm and/or internal/file, whereas a new shared storage only 2.3 cluster could have external/my-nas storage.

Whatever the mechanism that Ganeti will use internally to configure the associations between nodes and storage pools, we consider that we’ll have available two node attributes inside htools: the list of internal and external storage pools.

External storage and instances¶

Currently, for an instance we allow one cheap move type: failover to the current secondary, if it is a healthy node, and four other “expensive†(as in, including data copies) moves that involve changing either the secondary or the primary node or both.

In presence of an external storage type, the following things will change:

  • the disk-based moves will be disallowed; this is already a feature in the algorithm, controlled by a boolean switch, so adapting external storage here will be trivial
  • instead of the current one secondary node, the secondaries will become a list of potential secondaries, based on access to the instance’s storage pool

Except for this, the basic move algorithm remains unchanged.

External storage and nodes¶

Two separate areas will have to change for nodes and external storage.

First, then allocating instances (either as part of a move or a new allocation), if the instance is using external storage, then the internal disk metrics should be ignored (for both the primary and secondary cases).

Second, the per-node metrics used in the cluster scoring must take into account that nodes might not have internal storage at all, and handle this as a well-balanced case (score 0).

N+1 status¶

Currently, computing the N+1 status of a node is simple:

  • group the current secondary instances by their primary node, and compute the sum of each instance group memory
  • choose the maximum sum, and check if it’s smaller than the current available memory on this node

In effect, computing the N+1 status is a per-node matter. However, with shared storage, we don’t have secondary nodes, just potential secondaries. Thus computing the N+1 status will be a cluster-level matter, and much more expensive.

A simple version of the N+1 checks would be that for each instance having said node as primary, we have enough memory in the cluster for relocation. This means we would actually need to run allocation checks, and update the cluster status from within allocation on one node, while being careful that we don’t recursively check N+1 status during this relocation, which is too expensive.

However, the shared storage model has some properties that changes the rules of the computation. Speaking broadly (and ignoring hard restrictions like tag based exclusion and CPU limits), the exact location of an instance in the cluster doesn’t matter as long as memory is available. This results in two changes:

  • simply tracking the amount of free memory buckets is enough, cluster-wide
  • moving an instance from one node to another would not change the N+1 status of any node, and only allocation needs to deal with N+1 checks

Unfortunately, this very cheap solution fails in case of any other exclusion or prevention factors.

TODO: find a solution for N+1 checks.

Node groups support¶

The addition of node groups has a small impact on the actual algorithms, which will simply operate at node group level instead of cluster level, but it requires the addition of new algorithms for inter-node group operations.

The following two definitions will be used in the following paragraphs:

local group
The local group refers to a node’s own node group, or when speaking about an instance, the node group of its primary node
regular cluster
A cluster composed of a single node group, or pre-2.3 cluster
super cluster
This term refers to a cluster which comprises multiple node groups, as opposed to a 2.2 and earlier cluster with a single node group

In all the below operations, it’s assumed that Ganeti can gather the entire super cluster state cheaply.

Balancing changes¶

Balancing will move from cluster-level balancing to group balancing. In order to achieve a reasonable improvement in a super cluster, without needing to keep state of what groups have been already balanced previously, the balancing algorithm will run as follows:

  1. the cluster data is gathered
  2. if this is a regular cluster, as opposed to a super cluster, balancing will proceed normally as previously
  3. otherwise, compute the cluster scores for all groups
  4. choose the group with the worst score and see if we can improve it; if not choose the next-worst group, so on
  5. once a group has been identified, run the balancing for it

Of course, explicit selection of a group will be allowed.

Super cluster operations¶

Beside the regular group balancing, in a super cluster we have more operations.

Redistribution¶

In a regular cluster, once we run out of resources (offline nodes which can’t be fully evacuated, N+1 failures, etc.) there is nothing we can do unless nodes are added or instances are removed.

In a super cluster however, there might be resources available in another group, so there is the possibility of relocating instances between groups to re-establish N+1 success within each group.

One difficulty in the presence of both super clusters and shared storage is that the move paths of instances are quite complicated; basically an instance can move inside its local group, and to any other groups which have access to the same storage type and storage pool pair. In effect, the super cluster is composed of multiple ‘partitions’, each containing one or more groups, but a node is simultaneously present in multiple partitions, one for each storage type and storage pool it supports. As such, the interactions between the individual partitions are too complex for non-trivial clusters to assume we can compute a perfect solution: we might need to move some instances using shared storage pool ‘A’ in order to clear some more memory to accept an instance using local storage, which will further clear more VCPUs in a third partition, etc. As such, we’ll limit ourselves at simple relocation steps within a single partition.

Algorithm:

  1. read super cluster data, and exit if cluster doesn’t allow inter-group moves

  2. filter out any groups that are “alone†in their partition (i.e. no other group sharing at least one storage method)

  3. determine list of healthy versus unhealthy groups:

    1. a group which contains offline nodes still hosting instances is definitely not healthy
    2. a group which has nodes failing N+1 is ‘weakly’ unhealthy
  4. if either list is empty, exit (no work to do, or no way to fix problems)

  5. for each unhealthy group:

    1. compute the instances that are causing the problems: all instances living on offline nodes, all instances living as secondary on N+1 failing nodes, all instances living as primaries on N+1 failing nodes (in this order)
    2. remove instances, one by one, until the source group is healthy again
    3. try to run a standard allocation procedure for each instance on all potential groups in its partition
    4. if all instances were relocated successfully, it means we have a solution for repairing the original group
Compression¶

In a super cluster which has had many instance reclamations, it is possible that while none of the groups is empty, overall there is enough empty capacity that an entire group could be removed.

The algorithm for “compressing†the super cluster is as follows:

  1. read super cluster data

  2. compute total (memory, disk, cpu), and free (memory, disk, cpu) for the super-cluster

  3. computer per-group used and free (memory, disk, cpu)

  4. select candidate groups for evacuation:

    1. they must be connected to other groups via a common storage type and pool
    2. they must have fewer used resources than the global free resources (minus their own free resources)
  5. for each of these groups, try to relocate all its instances to connected peer groups

  6. report the list of groups that could be evacuated, or if instructed so, perform the evacuation of the group with the largest free resources (i.e. in order to reclaim the most capacity)

Load balancing¶

Assuming a super cluster using shared storage, where instance failover is cheap, it should be possible to do a load-based balancing across groups.

As opposed to the normal balancing, where we want to balance on all node attributes, here we should look only at the load attributes; in other words, compare the available (total) node capacity with the (total) load generated by instances in a given group, and computing such scores for all groups, trying to see if we have any outliers.

Once a reliable load-weighting method for groups exists, we can apply a modified version of the cluster scoring method to score not imbalances across nodes, but imbalances across groups which result in a super cluster load-related score.

Allocation changes¶

It is important to keep the allocation method across groups internal (in the Ganeti/Iallocator combination), instead of delegating it to an external party (e.g. a RAPI client). For this, the IAllocator protocol should be extended to provide proper group support.

For htools, the new algorithm will work as follows:

  1. read/receive cluster data from Ganeti
  2. filter out any groups that do not supports the requested storage method
  3. for remaining groups, try allocation and compute scores after allocation
  4. sort valid allocation solutions accordingly and return the entire list to Ganeti

The rationale for returning the entire group list, and not only the best choice, is that we anyway have the list, and Ganeti might have other criteria (e.g. the best group might be busy/locked down, etc.) so even if from the point of view of resources it is the best choice, it might not be the overall best one.

Node evacuation changes¶

While the basic concept in the multi-evac iallocator mode remains unchanged (it’s a simple local group issue), when failing to evacuate and running in a super cluster, we could have resources available elsewhere in the cluster for evacuation.

The algorithm for computing this will be the same as the one for super cluster compression and redistribution, except that the list of instances is fixed to the ones living on the nodes to-be-evacuated.

If the inter-group relocation is successful, the result to Ganeti will not be a local group evacuation target, but instead (for each instance) a pair (remote group, nodes). Ganeti itself will have to decide (based on user input) whether to continue with inter-group evacuation or not.

In case that Ganeti doesn’t provide complete cluster data, just the local group, the inter-group relocation won’t be attempted.

ganeti-2.9.3/doc/html/design-draft.html0000644000000000000000000002332212271443666017755 0ustar00rootroot00000000000000 Design document drafts — Ganeti 2.9.3 documentation ganeti-2.9.3/doc/html/install-quick.html0000644000000000000000000006446512271443671020177 0ustar00rootroot00000000000000 Ganeti quick installation guide — Ganeti 2.9.3 documentation

Ganeti quick installation guide¶

Please note that a more detailed installation procedure is described in the Ganeti installation tutorial. Refer to it if you are setting up Ganeti the first time. This quick installation guide is mainly meant as reference for experienced users. A glossary of terms can be found in the Glossary.

Software Requirements¶

Before installing, please verify that you have the following programs:

These programs are supplied as part of most Linux distributions, so usually they can be installed via the standard package manager. Also many of them will already be installed on a standard machine. On Debian/Ubuntu, you can use this command line to install all required packages, except for RBD, DRBD and Xen:

$ apt-get install lvm2 ssh bridge-utils iproute iputils-arping make \
                  ndisc6 python python-openssl openssl \
                  python-pyparsing python-simplejson python-bitarray \
                  python-pyinotify python-pycurl python-ipaddr socat fping

For older distributions (eg. Debian Squeeze) the package names are different.:

$ apt-get install lvm2 ssh bridge-utils iproute iputils-arping make \
                  ndisc6 python python-pyopenssl openssl \
                  python-pyparsing python-simplejson python-bitarray \
                  python-pyinotify python-pycurl python-ipaddr socat fping

If bitarray is missing it can be installed from easy-install:

$ easy_install bitarray

Note that this does not install optional packages:

$ apt-get install python-paramiko python-affinity qemu-img

If some of the python packages are not available in your system, you can try installing them using easy_install command. For example:

$ apt-get install python-setuptools python-dev
$ cd / && sudo easy_install \
          affinity \
          bitarray \
          ipaddr

On Fedora to install all required packages except RBD, DRBD and Xen:

$ yum install openssh openssh-clients bridge-utils iproute ndisc6 make \
              pyOpenSSL pyparsing python-simplejson python-inotify \
              python-lxm socat fping python-bitarray python-ipaddr

For optional packages use the command:

$ yum install python-paramiko python-affinity qemu-img

If you want to build from source, please see doc/devnotes.rst for more dependencies.

Note

Ganeti’s import/export functionality uses socat with OpenSSL for transferring data between nodes. By default, OpenSSL 0.9.8 and above employ transparent compression of all data using zlib if supported by both sides of a connection. In cases where a lot of data is transferred, this can lead to an increased CPU usage. Additionally, Ganeti already compresses all data using gzip where it makes sense (for inter-cluster instance moves).

To remedey this situation, patches implementing a new socat option for disabling OpenSSL compression have been contributed and will likely be included in the next feature release. Until then, users or distributions need to apply the patches on their own.

Ganeti will use the option if it’s detected by the configure script; auto-detection can be disabled by explicitly passing --enable-socat-compress (use the option to disable compression) or --disable-socat-compress (don’t use the option).

The patches and more information can be found on http://www.dest-unreach.org/socat/contrib/socat-opensslcompress.html.

Haskell requirements¶

Starting with Ganeti 2.7, the Haskell GHC compiler and a few base libraries are required in order to build Ganeti (but not to run and deploy Ganeti on production machines). More specifically:

  • GHC version 6.12 or higher
  • or even better, The Haskell Platform which gives you a simple way to bootstrap Haskell
  • json, a JSON library
  • network, a basic network library
  • parallel, a parallel programming library (note: tested with up to version 3.x)
  • bytestring and utf8-string libraries; these usually come with the GHC compiler
  • deepseq
  • curl, tested with versions 1.3.4 and above
  • hslogger, version 1.1 and above (note that Debian Squeeze only has version 1.0.9)

Some of these are also available as package in Debian/Ubuntu:

$ apt-get install ghc libghc-json-dev libghc-network-dev \
                  libghc-parallel-dev libghc-deepseq-dev \
                  libghc-utf8-string-dev libghc-curl-dev \
                  libghc-hslogger-dev

Or in older versions of these distributions (using GHC 6.x):

$ apt-get install ghc6 libghc6-json-dev libghc6-network-dev \
                  libghc6-parallel-dev libghc6-deepseq-dev \
                  libghc6-curl-dev

In Fedora, some of them are available via packages as well:

$ yum install ghc ghc-json-devel ghc-network-devel \
                  ghc-parallel-devel ghc-deepseq-devel

If using a distribution which does not provide them, first install the Haskell platform. You can also install cabal manually:

$ apt-get install cabal-install
$ cabal update

Then install the additional libraries (only the ones not available in your distribution packages) via cabal:

$ cabal install json network parallel utf8-string curl hslogger

Haskell optional features¶

Optionally, more functionality can be enabled if your build machine has a few more Haskell libraries enabled: the ganeti-confd and ganeti-luxid daemon (--enable-confd) and the monitoring daemon (--enable-mond). The list of extra dependencies for these is:

These libraries are available in Debian Wheezy (but not in Squeeze), so you can use either apt:

$ apt-get install libghc-crypto-dev libghc-text-dev \
                  libghc-hinotify-dev libghc-regex-pcre-dev \
                  libghc-attoparsec-dev libghc-vector-dev \
                  libghc-snap-server-dev

or cabal, after installing a required non-Haskell dependency:

$ apt-get install libpcre3-dev libcurl4-openssl-dev
$ cabal install Crypto text hinotify==0.3.2 regex-pcre \
                attoparsec vector snap-server

to install them.

In case you still use ghc-6.12, note that cabal would automatically try to install newer versions of some of the libraries snap-server depends on, that cannot be compiled with ghc-6.12, so you have to install snap-server on its own, esplicitly forcing the installation of compatible versions:

$ cabal install MonadCatchIO-transformers==0.2.2.0 mtl==2.0.1.0 \
                hashable==1.1.2.0 case-insensitive==0.3 parsec==3.0.1 \
                network==2.3 snap-server==0.8.1

The most recent Fedora doesn’t provide crypto, inotify. So these need to be installed using cabal, if desired. The other packages can be installed via yum:

$ yum install ghc-hslogger-devel ghc-text-devel \
              ghc-regex-pcre-devel

Note

If one of the cabal packages fails to install due to unfulfilled dependencies, you can try enabling symlinks in ~/.cabal/config.

Make sure that your ~/.cabal/bin directory (or whatever else is defined as bindir) is in your PATH.

Installation of the software¶

To install, simply run the following command:

$ ./configure --localstatedir=/var --sysconfdir=/etc && \
  make && \
  make install

This will install the software under /usr/local. You then need to copy doc/examples/ganeti.initd to /etc/init.d/ganeti and integrate it into your boot sequence (chkconfig, update-rc.d, etc.).

Cluster initialisation¶

Before initialising the cluster, on each node you need to create the following directories:

  • /etc/ganeti
  • /var/lib/ganeti
  • /var/log/ganeti
  • /srv/ganeti
  • /srv/ganeti/os
  • /srv/ganeti/export

After this, use gnt-cluster init.

ganeti-2.9.3/doc/html/index.html0000644000000000000000000002670112271443673016517 0ustar00rootroot00000000000000 Welcome to Ganeti’s documentation! — Ganeti 2.9.3 documentation

Welcome to Ganeti’s documentation!¶

This page is the starting point for browsing the Ganeti documentation. Below, the corpus of Ganeti documentation is grouped by topic.

A few quick references:

  • Glossary: Provides explanations of basic Ganeti terminology.
  • News file: Lists changes between Ganeti versions.
  • Search Page: Allows you to search for key terms across Ganeti documentation.

Installing Ganeti¶

Use the following resources to install and/or upgrade Ganeti:

Using Ganeti¶

The following resources provide guidance on how to use Ganeti:

  • Ganeti administrator’s guide: Information about how to manage a Ganeti cluster after it is installed (including management of nodes and instances, and information about Ganeti’s tools and monitoring agent).
  • Ganeti walk-through: An example-oriented guide to Ganeti.
  • Man pages: Descriptions of the various tools that are part of Ganeti.
  • Security in Ganeti: A description of the security model underlying a Ganeti cluster.
  • Ganeti customisation using hooks: Information on hooking scripts, which extend Ganeti functionalities by automatically activating when certain events occur.
  • Ganeti automatic instance allocation: Description of the API for external tools, which can allocate instances either manually or automatically.
  • Ganeti remote API: Description of the Ganeti remote API, which allows programmatic access to most of the functionalities of Ganeti.
  • OVF converter: Description of a tool that provides compatibility with the standard OVF virtual machine interchange format.
  • Virtual cluster support: Explanation of how to use virtual cluster support, which is utilized mainly for testing reasons.

Some features are explicitly targeted for large Ganeti installations, in which multiple clusters are present:

Developing Ganeti¶

There are a few documents particularly useful for developers who want to modify Ganeti:

  • Ganeti locking: Describes Ganeti’s locking strategy and lock order dependencies.
  • Developer notes: Details build dependencies and other useful development-related information.

Implemented designs¶

Before actual implementation, all Ganeti features are described in a design document. Designs fall into two categories: released versions and draft versions (which are either incomplete or not implemented).

Table Of Contents

Next topic

Ganeti 2.0 design

This Page

ganeti-2.9.3/doc/html/design-internal-shutdown.html0000644000000000000000000003372412271443666022351 0ustar00rootroot00000000000000 Detection of user-initiated shutdown from inside an instance — Ganeti 2.9.3 documentation

Detection of user-initiated shutdown from inside an instance¶

This is a design document detailing the implementation of a way for Ganeti to detect whether a machine marked as up but not running was shutdown gracefully by the user from inside the machine itself.

Current state and shortcomings¶

Ganeti keeps track of the desired status of instances in order to be able to take proper actions (e.g.: reboot) on the ones that happen to crash. Currently, the only way to properly shut down a machine is through Ganeti’s own commands, that will mark an instance as ADMIN_down. If a user shuts down an instance from inside, through the proper command of the operating system it is running, the instance will be shutdown gracefully, but Ganeti is not aware of that: the desired status of the instance will still be marked as running, so when the watcher realises that the instance is down, it will restart it. This behaviour is usually not what the user expects.

Proposed changes¶

We propose to modify Ganeti in such a way that it will detect when an instance was shutdown because of an explicit user request. When such a situation is detected, instead of presenting an error as it happens now, either the state of the instance will be set to ADMIN_down, or the instance will be automatically rebooted, depending on a instance-specific configuration value. The default behavior in case no such parameter is found will be to follow the apparent will of the user, and setting to ADMIN_down an instance that was shut down correctly from inside.

This design document applies to the Xen backend of Ganeti, because it uses features specific of such hypervisor. Initial analysis suggests that a similar approach might be used for KVM as well, so this design document will be later extended to add more details about it.

Implementation¶

Xen knows why a domain is being shut down (a crash or an explicit shutdown or poweroff request), but such information is not usually readily available externally, because all such cases lead to the virtual machine being destroyed immediately after the event is detected.

Still, Xen allows the instance configuration file to define what action to be taken in all those cases through the on_poweroff, on_shutdown and on_crash variables. By setting them to preserve, Xen will avoid destroying the domains automatically.

When the domain is not destroyed, it can be viewed by using xm list (or xl list in newer Xen versions), and the State field of the output will provide useful information.

If the state is ----c- it means the instance has crashed.

If the state is ---s-- it means the instance was properly shutdown.

If the instance was properly shutdown and it is still marked as running by Ganeti, it means that it was shutdown from inside by the user, and the ganeti status of the instance needs to be changed to ADMIN_down.

This will be done at regular intervals by the group watcher, just before deciding which instances to reboot.

On top of that, at the same times, the watcher will also need to issue xm destroy commands for all the domains that are in crashed or shutdown state, since this will not be done automatically by Xen anymore because of the preserve setting in their config files.

This behavior will be limited to the domains shut down from inside, because it will actually keep the resources of the domain busy until the watcher will do the cleaning job (that, with the default setting, is up to every 5 minutes). Still, this is considered acceptable, because it is not frequent for a domain to be shut down this way. The cleanup function will be also run automatically just before performing any job that requires resources to be available (such as when creating a new instance), in order to ensure that the new resource allocation happens starting from a clean state. Functionalities that only query the state of instances will not run the cleanup function.

The cleanup operation includes both node-specific operations (the actual destruction of the stopped domains) and configuration changes, to be performed on the master node (marking as offline an instance that was shut down internally). The watcher, on the master node, will fetch the list of instances that have been shutdown from inside (recognizable by their oper_state as described below). It will then submit a series of InstanceShutdown jobs that will mark such instances as ADMIN_down and clean them up (after the functionality of InstanceShutdown will have been extended as specified in the rest of this design document).

LUs performing operations other than an explicit cleanup will have to be modified to perform the cleanup as well, either by submitting a job to perform the cleanup (to be completed before actually performing the task at hand) or by explicitly performing the cleanup themselves through the RPC calls.

Other required changes¶

The implementation of this design document will require some commands to be changed in order to cope with the new shutdown procedure.

With the default shutdown action in Xen set to preserve, the Ganeti command for shutting down instances would leave them in a shutdown but preserved state. Therefore, it will have to be changed in such a way to immediately perform the cleanup of the instance after verifying its correct shutdown. Also, it will correctly deal with instances that have been shutdown from inside but are still active according to Ganeti, by detecting this situation, destroying the instance and carrying out the rest of the Ganeti shutdown procedure as usual.

The gnt-instance list command will need to be able to handle the situation where an instance was shutdown internally but not yet cleaned up. The admin_state field will maintain the current meaning unchanged. The oper_state field will get a new possible state, S, meaning that the instance was shutdown internally.

The gnt-instance info command State field, in such case, will show a message stating that the instance was supposed to be run but was shut down internally.

ganeti-2.9.3/doc/html/design-http-server.html0000644000000000000000000003465612271443666021154 0ustar00rootroot00000000000000 Design for replacing Ganeti’s HTTP server — Ganeti 2.9.3 documentation

Design for replacing Ganeti’s HTTP server¶

Current state and shortcomings¶

The new design for import/export depends on an HTTP server. Ganeti includes a home-grown HTTP server based on Python’s BaseHTTPServer. While it served us well so far, it only implements the very basics of the HTTP protocol. It is, for example, not structured well enough to support chunked transfers (RFC 2616, section 3.6.1), which would have some advantages. In addition, it has not been designed for sending large responses.

In the case of the node daemon the HTTP server can not easily be separated from the actual backend code and therefore must run as “root”. The RAPI daemon does request parsing in the same process as talking to the master daemon via LUXI.

Proposed changes¶

The proposal is to start using a full-fledged HTTP server in Ganeti and to run Ganeti’s code as FastCGI applications. Reasons:

  • Simplify Ganeti’s code by delegating the details of HTTP and SSL to another piece of software
  • Run HTTP frontend and handler backend as separate processes and users (esp. useful for node daemon, but also import/export and Remote API)
  • Allows implementation of RPC feedback

Software choice¶

Theoretically any server able of speaking FastCGI to a backend process could be used. However, to keep the number of steps required for setting up a new cluster at roughly the same level, the implementation will be geared for one specific HTTP server at the beginning. Support for other HTTP servers can still be implemented.

After a rough selection of available HTTP servers lighttpd and nginx were the most likely candidates. Both are widely used and tested.

Nginx’ original documentation is in Russian, translations are available in a Wiki. Nginx does not support old-style CGI programs.

The author found lighttpd’s documentation easier to understand and was able to configure a test server quickly. This, together with the support for more technologies, made deciding easier.

With its use as a public-facing web server on a large number of websites (and possibly more behind proxies), lighttpd should be a safe choice. Unlike other webservers, such as the Apache HTTP Server, lighttpd’s codebase is of manageable size.

Initially the HTTP server would only be used for import/export transfers, but its use can be expanded to the Remote API and node daemon (see RPC feedback).

To reduce the attack surface, an option will be provided to configure services (e.g. import/export) to only listen on certain network interfaces.

RPC feedback¶

HTTP/1.1 supports chunked transfers (RFC 2616, section 3.6.1). They could be used to provide feedback from node daemons to the master, similar to the feedback from jobs. A good use would be to provide feedback to the user during long-running operations, e.g. downloading an instance’s data from another cluster.

WSGI 1.0 (PEP 333) includes the following requirement:

WSGI servers, gateways, and middleware must not delay the transmission of any block; they must either fully transmit the block to the client, or guarantee that they will continue transmission even while the application is producing its next block

This behaviour was confirmed to work with lighttpd and the flup library. FastCGI by itself has no such guarantee; webservers with buffering might require artificial padding to force the message to be transmitted.

The node daemon can send JSON-encoded messages back to the master daemon by separating them using a predefined character (see LUXI). The final message contains the method’s result. pycURL passes each received chunk to the callback set as CURLOPT_WRITEFUNCTION. Once a message is complete, the master daemon can pass it to a callback function inside the job, which then decides on what to do (e.g. forward it as job feedback to the user).

A more detailed design may have to be written before deciding whether to implement RPC feedback.

Software requirements¶

Lighttpd SSL configuration¶

The following sample shows how to configure SSL with client certificates in Lighttpd:

$SERVER["socket"] == ":443" {
  ssl.engine = "enable"
  ssl.pemfile = "server.pem"
  ssl.ca-file = "ca.pem"
  ssl.use-sslv2  = "disable"
  ssl.cipher-list = "HIGH:-DES:-3DES:-EXPORT:-ADH"
  ssl.verifyclient.activate = "enable"
  ssl.verifyclient.enforce = "enable"
  ssl.verifyclient.exportcert = "enable"
  ssl.verifyclient.username = "SSL_CLIENT_S_DN_CN"
}
ganeti-2.9.3/doc/html/search.html0000644000000000000000000000544212271443673016654 0ustar00rootroot00000000000000 Search — Ganeti 2.9.3 documentation

Search

Please activate JavaScript to enable the search functionality.

From here you can search these documents. Enter your search words into the box below and click "search". Note that the search function will automatically search for all of the words. Pages containing fewer words won't appear in the result list.

ganeti-2.9.3/doc/html/design-hroller.html0000644000000000000000000003572412271443666020335 0ustar00rootroot00000000000000 HRoller tool — Ganeti 2.9.3 documentation

HRoller tool¶

This is a design document detailing the cluster maintenance scheduler, HRoller.

Current state and shortcomings¶

To enable automating cluster-wide reboots a new htool, called HRoller, was added to Ganeti starting from version 2.7. This tool helps parallelizing cluster offline maintenances by calculating which nodes are not both primary and secondary for a DRBD instance, and thus can be rebooted at the same time, when all instances are down.

The way this is done is documented in the hroller(1) manpage.

We would now like to perform online maintenance on the cluster by rebooting nodes after evacuating their primary instances (rolling reboots).

Proposed changes¶

New options¶

  • HRoller should be able to operate on single nodegroups (-G flag) or select its target node through some other mean (eg. via a tag, or a regexp). (Note that individual node selection is already possible via the -O flag, that makes hroller ignore a node altogether).
  • HRoller should handle non redundant instances: currently these are ignored but there should be a way to select its behavior between “it’s ok to reboot a node when a non-redundant instance is on it” or “skip nodes with non-redundant instances”. This will only be selectable globally, and not per instance.
  • Hroller will make sure to keep any instance which is up in its current state, via live migrations, unless explicitly overridden. The algorithm that will be used calculate the rolling reboot with live migrations is described below, and any override on considering the instance status will only be possible on the whole run, and not per-instance.

Calculating rolling maintenances¶

In order to perform rolling maintenance we need to migrate instances off the nodes before a reboot. How this can be done depends on the instance’s disk template and status:

Down instances¶

If an instance was shutdown when the maintenance started it will be considered for avoiding contemporary reboot of its primary and secondary nodes, but will not be considered as a target for the node evacuation. This allows avoiding needlessly moving its primary around, since it won’t suffer a downtime anyway.

Note that a node with non-redundant instances will only ever be considered good for rolling-reboot if these are down (or the checking of status is overridden) and an explicit option to allow it is set.

DRBD¶

Each node must migrate all instances off to their secondaries, and then can either be rebooted, or the secondaries can be evacuated as well.

Since currently doing a replace-disks on DRBD breaks redundancy, it’s not any safer than temporarily rebooting a node with secondaries on them (citation needed). As such we’ll implement for now just the “migrate+reboot” mode, and focus later on replace-disks as well.

In order to do that we can use the following algorithm:

  1. Compute node sets that don’t contain both the primary and the secondary of any instance, and also don’t contain the primary nodes of two instances that have the same node as secondary. These can be obtained by computing a coloring of the graph with nodes as vertexes and an edge between two nodes, if either condition prevents simultaneous maintenance. (This is the current algorithm of hroller(1) with the extension that the graph to be colored has additional edges between the primary nodes of two instances sharing their secondary node.)
  2. It is then possible to migrate in parallel all nodes in a set created at step 1, and then reboot/perform maintenance on them, and migrate back their original primaries, which allows the computation above to be reused for each following set without N+1 failures being triggered, if none were present before. See below about the actual execution of the maintenance.

Non-DRBD¶

All non-DRBD disk templates that can be migrated have no “secondary” concept. As such instances can be migrated to any node (in the same nodegroup). In order to do the job we can either:

  • Perform migrations on one node at a time, perform the maintenance on that node, and proceed (the node will then be targeted again to host instances automatically, as hail chooses targets for the instances between all nodes in a group. Nodes in different nodegroups can be handled in parallel.
  • Perform migrations on one node at a time, but without waiting for the first node to come back before proceeding. This allows us to continue, restricting the cluster, until no more capacity in the nodegroup is available, and then having to wait for some nodes to come back so that capacity is available again for the last few nodes.
  • Pre-Calculate sets of nodes that can be migrated together (probably with a greedy algorithm) and parallelize between them, with the migrate-back approach discussed for DRBD to perform the calculation only once.

Note that for non-DRBD disks that still use local storage (eg. RBD and plain) redundancy might break anyway, and nothing except the first algorithm might be safe. This perhaps would be a good reason to consider managing better RBD pools, if those are implemented on top of nodes storage, rather than on dedicated storage machines.

Full-Evacuation¶

If full evacuation of the nodes to be rebooted is desired, a simple migration is not enough for the DRBD instances. To keep the number of disk operations small, we restrict moves to migrate, replace-secondary. That is, after migrating instances out of the nodes to be rebooted, replacement secondaries are searched for, for all instances that have their then secondary on one of the rebooted nodes. This is done by a greedy algorithm, refining the initial reboot partition, if necessary.

Future work¶

Hroller should become able to execute rolling maintenances, rather than just calculate them. For this to succeed properly one of the following must happen:

  • HRoller handles rolling maintenances that happen at the same time as unrelated cluster jobs, and thus recalculates the maintenance at each step
  • HRoller can selectively drain the cluster so it’s sure that only the rolling maintenance can be going on

DRBD nodes’ replace-disks‘ functionality should be implemented. Note that when we will support a DRBD version that allows multi-secondary this can be done safely, without losing replication at any time, by adding a temporary secondary and only when the sync is finished dropping the previous one.

Non-redundant (plain or file) instances should have a way to be moved off as well via plain storage live migration or gnt-instance move (which requires downtime).

If/when RBD pools can be managed inside Ganeti, care can be taken so that the pool is evacuated as well from a node before it’s put into maintenance. This is equivalent to evacuating DRBD secondaries.

Master failovers during the maintenance should be performed by hroller. This requires RPC/RAPI support for master failover. Hroller should also be modified to better support running on the master itself and continuing on the new master.

ganeti-2.9.3/doc/html/design-daemons.html0000644000000000000000000005312312271443665020304 0ustar00rootroot00000000000000 Ganeti daemons refactoring — Ganeti 2.9.3 documentation

Ganeti daemons refactoring¶

This is a design document detailing the plan for refactoring the internal structure of Ganeti, and particularly the set of daemons it is divided into.

Current state and shortcomings¶

Ganeti is comprised of a growing number of daemons, each dealing with part of the tasks the cluster has to face, and communicating with the other daemons using a variety of protocols.

Specifically, as of Ganeti 2.8, the situation is as follows:

Master daemon (MasterD)

It is responsible for managing the entire cluster, and it’s written in Python. It is executed on a single node (the master node). It receives the commands given by the cluster administrator (through the remote API daemon or the command line tools) over the LUXI protocol. The master daemon is responsible for creating and managing the jobs that will execute such commands, and for managing the locks that ensure the cluster will not incur in race conditions.

Each job is managed by a separate Python thread, that interacts with the node daemons via RPC calls.

The master daemon is also responsible for managing the configuration of the cluster, changing it when required by some job. It is also responsible for copying the configuration to the other master candidates after updating it.

RAPI daemon (RapiD)
It is written in Python and runs on the master node only. It waits for requests issued remotely through the remote API protocol. Then, it forwards them, using the LUXI protocol, to the master daemon (if they are commands) or to the query daemon if they are queries about the configuration (including live status) of the cluster.
Node daemon (NodeD)
It is written in Python. It runs on all the nodes. It is responsible for receiving the master requests over RPC and execute them, using the appropriate backend (hypervisors, DRBD, LVM, etc.). It also receives requests over RPC for the execution of queries gathering live data on behalf of the query daemon.
Configuration daemon (ConfD)
It is written in Haskell. It runs on all the master candidates. Since the configuration is replicated only on the master node, this daemon exists in order to provide information about the configuration to nodes needing them. The requests are done through ConfD’s own protocol, HMAC signed, implemented over UDP, and meant to be used by parallely querying all the master candidates (or a subset thereof) and getting the most up to date answer. This is meant as a way to provide a robust service even in case master is temporarily unavailable.
Query daemon (QueryD)
It is written in Haskell. It runs on all the master candidates. It replies to Luxi queries about the current status of the system, including live data it obtains by querying the node daemons through RPCs.
Monitoring daemon (MonD)
It is written in Haskell. It runs on all nodes, including the ones that are not vm-capable. It is meant to provide information on the status of the system. Such information is related only to the specific node the daemon is running on, and it is provided as JSON encoded data over HTTP, to be easily readable by external tools. The monitoring daemon communicates with ConfD to get information about the configuration of the cluster. The choice of communicating with ConfD instead of MasterD allows it to obtain configuration information even when the cluster is heavily degraded (e.g.: when master and some, but not all, of the master candidates are unreachable).

The current structure of the Ganeti daemons is inefficient because there are many different protocols involved, and each daemon needs to be able to use multiple ones, and has to deal with doing different things, thus making sometimes unclear which daemon is responsible for performing a specific task.

Also, with the current configuration, jobs are managed by the master daemon using python threads. This makes terminating a job after it has started a difficult operation, and it is the main reason why this is not possible yet.

The master daemon currently has too many different tasks, that could be handled better if split among different daemons.

Proposed changes¶

In order to improve on the current situation, a new daemon subdivision is proposed, and presented hereafter.

digraph "new-daemons-structure" {
{rank=same; RConfD LuxiD;}
{rank=same; Jobs rconfigdata;}
node [shape=box]
RapiD [label="RapiD [M]"]
LuxiD [label="LuxiD [M]"]
WConfD [label="WConfD [M]"]
Jobs [label="Jobs [M]"]
RConfD [label="RConfD [MC]"]
MonD [label="MonD [All]"]
NodeD [label="NodeD [All]"]
Clients [label="gnt-*\nclients [M]"]
p1 [shape=none, label=""]
p2 [shape=none, label=""]
p3 [shape=none, label=""]
p4 [shape=none, label=""]
configdata [shape=none, label="config.data"]
rconfigdata [shape=none, label="config.data\n[MC copy]"]
locksdata [shape=none, label="locks.data"]

RapiD -> LuxiD [label="LUXI"]
LuxiD -> WConfD [label="WConfD\nproto"]
LuxiD -> Jobs [label="fork/exec"]
Jobs -> WConfD [label="WConfD\nproto"]
Jobs -> NodeD [label="RPC"]
LuxiD -> NodeD [label="RPC"]
rconfigdata -> RConfD
configdata -> rconfigdata [label="sync via\nNodeD RPC"]
WConfD -> NodeD [label="RPC"]
WConfD -> configdata
WConfD -> locksdata
MonD -> RConfD [label="RConfD\nproto"]
Clients -> LuxiD [label="LUXI"]
p1 -> MonD [label="MonD proto"]
p2 -> RapiD [label="RAPI"]
p3 -> RConfD [label="RConfD\nproto"]
p4 -> Clients [label="CLI"]
}

LUXI daemon (LuxiD)
It will be written in Haskell. It will run on the master node and it will be the only LUXI server, replying to all the LUXI queries. These includes both the queries about the live configuration of the cluster, previously served by QueryD, and the commands actually changing the status of the cluster by submitting jobs. Therefore, this daemon will also be the one responsible with managing the job queue. When a job needs to be executed, the LuxiD will spawn a separate process tasked with the execution of that specific job, thus making it easier to terminate the job itself, if needeed. When a job requires locks, LuxiD will request them from WConfD. In order to keep availability of the cluster in case of failure of the master node, LuxiD will replicate the job queue to the other master candidates, by RPCs to the NodeD running there (the choice of RPCs for this task might be reviewed at a second time, after implementing this design).
Configuration management daemon (WConfD)
It will run on the master node and it will be responsible for the management of the authoritative copy of the cluster configuration (that is, it will be the daemon actually modifying the config.data file). All the requests of configuration changes will have to pass through this daemon, and will be performed using a LUXI-like protocol (“WConfD proto” in the graph. The exact protocol will be defined in the separate design document that will detail the WConfD separation). Having a single point of configuration management will also allow Ganeti to get rid of possible race conditions due to concurrent modifications of the configuration. When the configuration is updated, it will have to push the received changes to the other master candidates, via RPCs, so that RConfD daemons and (in case of a failure on the master node) the WConfD daemon on the new master can access an up-to-date version of it (the choice of RPCs for this task might be reviewed at a second time). This daemon will also be the one responsible for managing the locks, granting them to the jobs requesting them, and taking care of freeing them up if the jobs holding them crash or are terminated before releasing them. In order to do this, each job, after being spawned by LuxiD, will open a local unix socket that will be used to communicate with it, and will be destroyed when the job terminates. LuxiD will be able to check, after a timeout, whether the job is still running by connecting here, and to ask WConfD to forcefully remove the locks if the socket is closed. Also, WConfD should hold a serialized list of the locks and their owners in a file (locks.data), so that it can keep track of their status in case it crashes and needs to be restarted (by asking LuxiD which of them are still running). Interaction with this daemon will be performed using Unix sockets.
Configuration query daemon (RConfD)
It is written in Haskell, and it corresponds to the old ConfD. It will run on all the master candidates and it will serve information about the the static configuration of the cluster (the one contained in config.data). The provided information will be highly available (as in: a response will be available as long as a stable-enough connection between the client and at least one working master candidate is available) and its freshness will be best effort (the most recent reply from any of the master candidates will be returned, but it might still be older than the one available through WConfD). The information will be served through the ConfD protocol.
Rapi daemon (RapiD)
It remains basically unchanged, with the only difference that all of its LUXI query are directed towards LuxiD instead of being split between MasterD and QueryD.
Monitoring daemon (MonD)
It remains unaffected by the changes in this design document. It will just get some of the data it needs from RConfD instead of the old ConfD, but the interfaces of the two are identical.
Node daemon (NodeD)
It remains unaffected by the changes proposed in the design document. The only difference being that it will receive its RPCs from LuxiD (for job queue replication), from WConfD (for configuration replication) and for the processes executing single jobs (for all the operations to be performed by nodes) instead of receiving them just from MasterD.

This restructuring will allow us to reorganize and improve the codebase, introducing cleaner interfaces and giving well defined and more restricted tasks to each daemon.

Furthermore, having more well-defined interfaces will allow us to have easier upgrade procedures, and to work towards the possibility of upgrading single components of a cluster one at a time, without the need for immediately upgrading the entire cluster in a single step.

Implementation¶

While performing this refactoring, we aim to increase the amount of Haskell code, thus benefiting from the additional type safety provided by its wide compile-time checks. In particular, all the job queue management and the configuration management daemon will be written in Haskell, taking over the role currently fulfilled by Python code executed as part of MasterD.

The changes describe by this design document are quite extensive, therefore they will not be implemented all at the same time, but through a sequence of steps, leaving the codebase in a consistent and usable state.

  1. Rename QueryD to LuxiD. A part of LuxiD, the one replying to configuration queries including live information about the system, already exists in the form of QueryD. This is being renamed to LuxiD, and will form the first part of the new daemon. NB: this is happening starting from Ganeti 2.8. At the beginning, only the already existing queries will be replied to by LuxiD. More queries will be implemented in the next versions.
  2. Let LuxiD be the interface for the queries and MasterD be their executor. Currently, MasterD is the only responsible for receiving and executing LUXI queries, and for managing the jobs they create. Receiving the queries and managing the job queue will be extracted from MasterD into LuxiD. Actually executing jobs will still be done by MasterD, that contains all the logic for doing that and for properly managing locks and the configuration. A separate design document will detail how the system will decide which jobs to send over for execution, and how to rate-limit them.
  3. Extract WConfD from MasterD. The logic for managing the configuration file is factored out to the dedicated WConfD daemon. All configuration changes, currently executed directly by MasterD, will be changed to be IPC requests sent to the new daemon.
  4. Extract locking management from MasterD. The logic for managing and granting locks is extracted to WConfD as well. Locks will not be taken directly anymore, but asked via IPC to WConfD. This step can be executed on its own or at the same time as the previous one.
  5. Jobs are executed as processes. The logic for running jobs is rewritten so that each job can be managed by an independent process. LuxiD will spawn a new (Python) process for every single job. The RPCs will remain unchanged, and the LU code will stay as is as much as possible. MasterD will cease to exist as a deamon on its own at this point, but not before.

Further considerations¶

There is a possibility that a job will finish performing its task while LuxiD and/or WConfD will not be available. In order to deal with this situation, each job will write the results of its execution on a file. The name of this file will be known to LuxiD before starting the job, and will be stored together with the job ID, and the name of the job-unique socket.

The job, upon ending its execution, will signal LuxiD (through the socket), so that it can read the result of the execution and release the locks as needed.

In case LuxiD is not available at that time, the job will just terminate without signalling it, and writing the results on file as usual. When a new LuxiD becomes available, it will have the most up-to-date list of running jobs (received via replication from the former LuxiD), and go through it, cleaning up all the terminated jobs.

Table Of Contents

Previous topic

Support for Open vSwitch

Next topic

Ganeti administrator’s guide

This Page

ganeti-2.9.3/doc/html/design-cpu-pinning.html0000644000000000000000000003775712271443665021124 0ustar00rootroot00000000000000 Ganeti CPU Pinning — Ganeti 2.9.3 documentation

Ganeti CPU Pinning¶

Objective¶

This document defines Ganeti’s support for CPU pinning (aka CPU affinity).

CPU pinning enables mapping and unmapping entire virtual machines or a specific virtual CPU (vCPU), to a physical CPU or a range of CPUs.

At this stage Pinning will be implemented for Xen and KVM.

Command Line¶

Suggested command line parameters for controlling CPU pinning are as follows:

gnt-instance modify -H cpu_mask=<cpu-pinning-info> <instance>

cpu-pinning-info can be any of the following:

  • One vCPU mapping, which can be the word “all” or a combination of CPU numbers and ranges separated by comma. In this case, all vCPUs will be mapped to the indicated list.

  • A list of vCPU mappings, separated by a colon ‘:’. In this case each vCPU is mapped to an entry in the list, and the size of the list must match the number of vCPUs defined for the instance. This is enforced when setting CPU pinning or when setting the number of vCPUs using -B vcpus=#.

    The mapping list is matched to consecutive virtual CPUs, so the first entry would be the CPU pinning information for vCPU 0, the second entry for vCPU 1, etc.

The default setting for new instances is “all”, which maps the entire instance to all CPUs, thus effectively turning off CPU pinning.

Here are some usage examples:

# Map vCPU 0 to physical CPU 1 and vCPU 1 to CPU 3 (assuming 2 vCPUs)
gnt-instance modify -H cpu_mask=1:3 my-inst

# Pin vCPU 0 to CPUs 1 or 2, and vCPU 1 to any CPU
gnt-instance modify -H cpu_mask=1-2:all my-inst

# Pin vCPU 0 to any CPU, vCPU 1 to CPUs 1, 3, 4 or 5, and CPU 2 to
# CPU 0
gnt-instance modify -H cpu_mask=all:1\\,3-5:0 my-inst

# Pin entire VM to CPU 0
gnt-instance modify -H cpu_mask=0 my-inst

# Turn off CPU pinning (default setting)
gnt-instance modify -H cpu_mask=all my-inst

Assuming an instance has 3 vCPUs, the following commands will fail:

# not enough mappings
gnt-instance modify -H cpu_mask=0:1 my-inst

# too many
gnt-instance modify -H cpu_mask=2:1:1:all my-inst

Validation¶

CPU pinning information is validated by making sure it matches the number of vCPUs. This validation happens when changing either the cpu_mask or vcpus parameters. Changing either parameter in a way that conflicts with the other will fail with a proper error message. To make such a change, both parameters should be modified at the same time. For example: gnt-instance modify -B vcpus=4 -H cpu_mask=1:1:2-3:4\\,6 my-inst

Besides validating CPU configuration, i.e. the number of vCPUs matches the requested CPU pinning, Ganeti will also verify the number of physical CPUs is enough to support the required configuration. For example, trying to run a configuration of vcpus=2,cpu_mask=0:4 on a node with 4 cores will fail (Note: CPU numbers are 0-based).

This validation should repeat every time an instance is started or migrated live. See more details under Migration below.

Cluster verification should also test the compatibility of other nodes in the cluster to required configuration and alert if a minimum requirement is not met.

Failover¶

CPU pinning configuration can be transferred from node to node, unless the number of physical CPUs is smaller than what the configuration calls for. It is suggested that unless this is the case, all transfers and migrations will succeed.

In case the number of physical CPUs is smaller than the numbers indicated by CPU pinning information, instance failover will fail.

In case of emergency, to force failover to ignore mismatching CPU information, the following switch can be used: gnt-instance failover --fix-cpu-mismatch my-inst. This command will try to failover the instance with the current cpu mask, but if that fails, it will change the mask to be “all”.

Migration¶

In case of live migration, and in addition to failover considerations, it is required to remap CPU pinning after migration. This can be done in realtime for instances for both Xen and KVM, and only depends on the number of physical CPUs being sufficient to support the migrated instance.

Data¶

Pinning information will be kept as a list of integers per vCPU. To mark a mapping of any CPU, we will use (-1). A single entry, no matter what the number of vCPUs is, will always mean that all vCPUs have the same mapping.

Configuration file¶

The pinning information is kept for each instance’s hypervisor params section of the configuration file as the original string.

Xen¶

There are 2 ways to control pinning in Xen, either via the command line or through the configuration file.

The commands to make direct pinning changes are the following:

# To pin a vCPU to a specific CPU
xm vcpu-pin <domain> <vcpu> <cpu>

# To unpin a vCPU
xm vcpu-pin <domain> <vcpu> all

# To get the current pinning status
xm vcpu-list <domain>

Since currently controlling Xen in Ganeti is done in the configuration file, it is straight forward to use the same method for CPU pinning. There are 2 different parameters that control Xen’s CPU pinning and configuration:

vcpus
controls the number of vCPUs
cpus
maps vCPUs to physical CPUs

When no pinning is required (pinning information is “all”), the “cpus” entry is removed from the configuration file.

For all other cases, the configuration is “translated” to Xen, which expects either cpus = "a" or cpus = [ "a", "b", "c", ...], where each a, b or c are a physical CPU number, CPU range, or a combination, and the number of entries (if a list is used) must match the number of vCPUs, and are mapped in order.

For example, CPU pinning information of 1:2,4-7:0-1 is translated to this entry in Xen’s configuration cpus = [ "1", "2,4-7", "0-1" ]

KVM¶

Controlling pinning in KVM is a little more complicated as there is no configuration to control pinning before instances are started.

The way to change or assign CPU pinning under KVM is to use taskset or its underlying system call sched_setaffinity. Setting the affinity for the VM process will change CPU pinning for the entire VM, and setting it for specific vCPU threads will control specific vCPUs.

The sequence of commands to control pinning is this: start the instance with the -S switch, so it halts before starting execution, get the process ID or identify thread IDs of each vCPU by sending info cpus to the monitor, map vCPUs as required by the cpu-pinning information, and issue a cont command on the KVM monitor to allow the instance to start execution.

For example, a sequence of commands to control CPU affinity under KVM may be:

  • Start KVM: /usr/bin/kvm … <kvm-command-line-options> … -S
  • Use socat to connect to monitor
  • send info cpus to monitor to get thread/vCPU information
  • call sched_setaffinity for each thread with the CPU mask
  • send cont to KVM’s monitor

A CPU mask is a hexadecimal bit mask where each bit represents one physical CPU. See man page for sched_setaffinity(2) for more details.

For example, to run a specific thread-id on CPUs 1 or 3 the mask is 0x0000000A.

We will control process and thread affinity using the python affinity package (http://pypi.python.org/pypi/affinity). This package is a Python wrapper around the two affinity system calls, and has no other requirements.

Alternative Design Options¶

  1. There’s an option to ignore the limitations of the underlying hypervisor and instead of requiring explicit pinning information for all vCPUs, assume a mapping of “all” to vCPUs not mentioned. This can lead to inadvertent missing information, but either way, since using cpu-pinning options is probably not going to be frequent, there’s no real advantage.

Table Of Contents

Previous topic

Chained jobs

Next topic

Design for adding UUID and name to devices

This Page

ganeti-2.9.3/doc/html/design-device-uuid-name.html0000644000000000000000000002246212271443666022002 0ustar00rootroot00000000000000 Design for adding UUID and name to devices — Ganeti 2.9.3 documentation

Design for adding UUID and name to devices¶

This is a design document about adding UUID and name to instance devices (Disks/NICs) and the ability to reference them by those identifiers.

Current state and shortcomings¶

Currently, the only way to refer to a device (Disk/NIC) is by its index inside the VM (e.g. gnt-instance modify –disk 2:remove).

Using indices as identifiers has the drawback that addition/removal of a device results in changing the identifiers(indices) of other devices and makes the net effect of commands depend on their strict ordering. A device reference is not absolute, meaning an external entity controlling Ganeti, e.g., over RAPI, cannot keep permanent identifiers for referring to devices, nor can it have more than one outstanding commands, since their order of execution is not guaranteed.

Proposed Changes¶

To be able to reference a device in a unique way, we propose to extend Disks and NICs by assigning to them a UUID and a name. The UUID will be assigned by Ganeti upon creation, while the name will be an optional user parameter. Renaming a device will also be supported.

Commands (e.g. gnt-instance modify) will be able to reference each device by its index, UUID, or name. To be able to refer to devices by name, we must guarantee that device names are unique. Unlike other objects (instances, networks, nodegroups, etc.), NIC and Disk objects will not have unique names across the cluster, since they are still not independent entities, but rather part of the instance object. This makes global uniqueness of names hard to achieve at this point. Instead their names will be unique at instance level.

Apart from unique device names, we must also guarantee that a device name can not be the UUID of another device. Also, to remove ambiguity while supporting both indices and names as identifiers, we forbid purely numeric device names.

Implementation Details¶

Modify OpInstanceSetParams to accept not only indexes, but also device names and UUIDs. So, the accepted NIC and disk modifications will have the following format:

identifier:action,key=value

where, from now on, identifier can be an index (-1 for the last device), UUID, or name and action should be add, modify, or remove.

Configuration Changes¶

Disk and NIC config objects get two extra slots:

  • uuid
  • name

Instance Queries¶

We will extend the query mechanism to expose names and UUIDs of NICs and Disks.

Hook Variables¶

We will expose the name of NICs and Disks to the hook environment of instance-related operations:

INSTANCE_NIC%d_NAME INSTANCE_DISK%d_NAME

ganeti-2.9.3/doc/html/design-2.6.html0000644000000000000000000001000712271443665017155 0ustar00rootroot00000000000000 Ganeti 2.6 design — Ganeti 2.9.3 documentation

Ganeti 2.6 design¶

The following design documents have been implemented in Ganeti 2.6:

Previous topic

Ganeti 2.5 design

Next topic

Ganeti 2.7 design

This Page

ganeti-2.9.3/doc/html/design-2.1.html0000644000000000000000000025362412271443664017165 0ustar00rootroot00000000000000 Ganeti 2.1 design — Ganeti 2.9.3 documentation

Ganeti 2.1 design¶

This document describes the major changes in Ganeti 2.1 compared to the 2.0 version.

The 2.1 version will be a relatively small release. Its main aim is to avoid changing too much of the core code, while addressing issues and adding new features and improvements over 2.0, in a timely fashion.

Objective¶

Ganeti 2.1 will add features to help further automatization of cluster operations, further improve scalability to even bigger clusters, and make it easier to debug the Ganeti core.

Detailed design¶

As for 2.0 we divide the 2.1 design into three areas:

  • core changes, which affect the master daemon/job queue/locking or all/most logical units
  • logical unit/feature changes
  • external interface changes (eg. command line, os api, hooks, ...)

Core changes¶

Storage units modelling¶

Currently, Ganeti has a good model of the block devices for instances (e.g. LVM logical volumes, files, DRBD devices, etc.) but none of the storage pools that are providing the space for these front-end devices. For example, there are hardcoded inter-node RPC calls for volume group listing, file storage creation/deletion, etc.

The storage units framework will implement a generic handling for all kinds of storage backends:

  • LVM physical volumes
  • LVM volume groups
  • File-based storage directories
  • any other future storage method

There will be a generic list of methods that each storage unit type will provide, like:

  • list of storage units of this type
  • check status of the storage unit

Additionally, there will be specific methods for each method, for example:

  • enable/disable allocations on a specific PV
  • file storage directory creation/deletion
  • VG consistency fixing

This will allow a much better modeling and unification of the various RPC calls related to backend storage pool in the future. Ganeti 2.1 is intended to add the basics of the framework, and not necessarilly move all the curent VG/FileBased operations to it.

Note that while we model both LVM PVs and LVM VGs, the framework will not model any relationship between the different types. In other words, we don’t model neither inheritances nor stacking, since this is too complex for our needs. While a vgreduce operation on a LVM VG could actually remove a PV from it, this will not be handled at the framework level, but at individual operation level. The goal is that this is a lightweight framework, for abstracting the different storage operation, and not for modelling the storage hierarchy.

Locking improvements¶

Current State and shortcomings¶

The class LockSet (see lib/locking.py) is a container for one or many SharedLock instances. It provides an interface to add/remove locks and to acquire and subsequently release any number of those locks contained in it.

Locks in a LockSet are always acquired in alphabetic order. Due to the way we’re using locks for nodes and instances (the single cluster lock isn’t affected by this issue) this can lead to long delays when acquiring locks if another operation tries to acquire multiple locks but has to wait for yet another operation.

In the following demonstration we assume to have the instance locks inst1, inst2, inst3 and inst4.

  1. Operation A grabs lock for instance inst4.
  2. Operation B wants to acquire all instance locks in alphabetic order, but it has to wait for inst4.
  3. Operation C tries to lock inst1, but it has to wait until Operation B (which is trying to acquire all locks) releases the lock again.
  4. Operation A finishes and releases lock on inst4. Operation B can continue and eventually releases all locks.
  5. Operation C can get inst1 lock and finishes.

Technically there’s no need for Operation C to wait for Operation A, and subsequently Operation B, to finish. Operation B can’t continue until Operation A is done (it has to wait for inst4), anyway.

Proposed changes¶
Non-blocking lock acquiring¶

Acquiring locks for OpCode execution is always done in blocking mode. They won’t return until the lock has successfully been acquired (or an error occurred, although we won’t cover that case here).

SharedLock and LockSet must be able to be acquired in a non-blocking way. They must support a timeout and abort trying to acquire the lock(s) after the specified amount of time.

Retry acquiring locks¶

To prevent other operations from waiting for a long time, such as described in the demonstration before, LockSet must not keep locks for a prolonged period of time when trying to acquire two or more locks. Instead it should, with an increasing timeout for acquiring all locks, release all locks again and sleep some time if it fails to acquire all requested locks.

A good timeout value needs to be determined. In any case should LockSet proceed to acquire locks in blocking mode after a few (unsuccessful) attempts to acquire all requested locks.

One proposal for the timeout is to use 2**tries seconds, where tries is the number of unsuccessful tries.

In the demonstration before this would allow Operation C to continue after Operation B unsuccessfully tried to acquire all locks and released all acquired locks (inst1, inst2 and inst3) again.

Other solutions discussed¶

There was also some discussion on going one step further and extend the job queue (see lib/jqueue.py) to select the next task for a worker depending on whether it can acquire the necessary locks. While this may reduce the number of necessary worker threads and/or increase throughput on large clusters with many jobs, it also brings many potential problems, such as contention and increased memory usage, with it. As this would be an extension of the changes proposed before it could be implemented at a later point in time, but we decided to stay with the simpler solution for now.

Implementation details¶
SharedLock redesign¶

The current design of SharedLock is not good for supporting timeouts when acquiring a lock and there are also minor fairness issues in it. We plan to address both with a redesign. A proof of concept implementation was written and resulted in significantly simpler code.

Currently SharedLock uses two separate queues for shared and exclusive acquires and waiters get to run in turns. This means if an exclusive acquire is released, the lock will allow shared waiters to run and vice versa. Although it’s still fair in the end there is a slight bias towards shared waiters in the current implementation. The same implementation with two shared queues can not support timeouts without adding a lot of complexity.

Our proposed redesign changes SharedLock to have only one single queue. There will be one condition (see Condition for a note about performance) in the queue per exclusive acquire and two for all shared acquires (see below for an explanation). The maximum queue length will always be 2 + (number of exclusive acquires waiting). The number of queue entries for shared acquires can vary from 0 to 2.

The two conditions for shared acquires are a bit special. They will be used in turn. When the lock is instantiated, no conditions are in the queue. As soon as the first shared acquire arrives (and there are holder(s) or waiting acquires; see Acquire), the active condition is added to the queue. Until it becomes the topmost condition in the queue and has been notified, any shared acquire is added to this active condition. When the active condition is notified, the conditions are swapped and further shared acquires are added to the previously inactive condition (which has now become the active condition). After all waiters on the previously active (now inactive) and now notified condition received the notification, it is removed from the queue of pending acquires.

This means shared acquires will skip any exclusive acquire in the queue. We believe it’s better to improve parallelization on operations only asking for shared (or read-only) locks. Exclusive operations holding the same lock can not be parallelized.

Acquire¶

For exclusive acquires a new condition is created and appended to the queue. Shared acquires are added to the active condition for shared acquires and if the condition is not yet on the queue, it’s appended.

The next step is to wait for our condition to be on the top of the queue (to guarantee fairness). If the timeout expired, we return to the caller without acquiring the lock. On every notification we check whether the lock has been deleted, in which case an error is returned to the caller.

The lock can be acquired if we’re on top of the queue (there is no one else ahead of us). For an exclusive acquire, there must not be other exclusive or shared holders. For a shared acquire, there must not be an exclusive holder. If these conditions are all true, the lock is acquired and we return to the caller. In any other case we wait again on the condition.

If it was the last waiter on a condition, the condition is removed from the queue.

Optimization: There’s no need to touch the queue if there are no pending acquires and no current holders. The caller can have the lock immediately.

digraph "design-2.1-lock-acquire" {
graph[fontsize=8, fontname="Helvetica"]
node[fontsize=8, fontname="Helvetica", width="0", height="0"]
edge[fontsize=8, fontname="Helvetica"]

/* Actions */
abort[label="Abort\n(couldn't acquire)"]
acquire[label="Acquire lock"]
add_to_queue[label="Add condition to queue"]
wait[label="Wait for notification"]
remove_from_queue[label="Remove from queue"]

/* Conditions */
alone[label="Empty queue\nand can acquire?", shape=diamond]
have_timeout[label="Do I have\ntimeout?", shape=diamond]
top_of_queue_and_can_acquire[
  label="On top of queue and\ncan acquire lock?",
  shape=diamond,
  ]

/* Lines */
alone->acquire[label="Yes"]
alone->add_to_queue[label="No"]

have_timeout->abort[label="Yes"]
have_timeout->wait[label="No"]

top_of_queue_and_can_acquire->acquire[label="Yes"]
top_of_queue_and_can_acquire->have_timeout[label="No"]

add_to_queue->wait
wait->top_of_queue_and_can_acquire
acquire->remove_from_queue
}

Release¶

First the lock removes the caller from the internal owner list. If there are pending acquires in the queue, the first (the oldest) condition is notified.

If the first condition was the active condition for shared acquires, the inactive condition will be made active. This ensures fairness with exclusive locks by forcing consecutive shared acquires to wait in the queue.

digraph "design-2.1-lock-release" {
graph[fontsize=8, fontname="Helvetica"]
node[fontsize=8, fontname="Helvetica", width="0", height="0"]
edge[fontsize=8, fontname="Helvetica"]

/* Actions */
remove_from_owners[label="Remove from owner list"]
notify[label="Notify topmost"]
swap_shared[label="Swap shared conditions"]
success[label="Success"]

/* Conditions */
have_pending[label="Any pending\nacquires?", shape=diamond]
was_active_queue[
  label="Was active condition\nfor shared acquires?",
  shape=diamond,
  ]

/* Lines */
remove_from_owners->have_pending

have_pending->notify[label="Yes"]
have_pending->success[label="No"]

notify->was_active_queue

was_active_queue->swap_shared[label="Yes"]
was_active_queue->success[label="No"]

swap_shared->success
}

Delete¶

The caller must either hold the lock in exclusive mode already or the lock must be acquired in exclusive mode. Trying to delete a lock while it’s held in shared mode must fail.

After ensuring the lock is held in exclusive mode, the lock will mark itself as deleted and continue to notify all pending acquires. They will wake up, notice the deleted lock and return an error to the caller.

Condition¶

Note: This is not necessary for the locking changes above, but it may be a good optimization (pending performance tests).

The existing locking code in Ganeti 2.0 uses Python’s built-in threading.Condition class. Unfortunately Condition implements timeouts by sleeping 1ms to 20ms between tries to acquire the condition lock in non-blocking mode. This requires unnecessary context switches and contention on the CPython GIL (Global Interpreter Lock).

By using POSIX pipes (see pipe(2)) we can use the operating system’s support for timeouts on file descriptors (see select(2)). A custom condition class will have to be written for this.

On instantiation the class creates a pipe. After each notification the previous pipe is abandoned and re-created (technically the old pipe needs to stay around until all notifications have been delivered).

All waiting clients of the condition use select(2) or poll(2) to wait for notifications, optionally with a timeout. A notification will be signalled to the waiting clients by closing the pipe. If the pipe wasn’t closed during the timeout, the waiting function returns to its caller nonetheless.

Node daemon availability¶

Current State and shortcomings¶

Currently, when a Ganeti node suffers serious system disk damage, the migration/failover of an instance may not correctly shutdown the virtual machine on the broken node causing instances duplication. The gnt-node powercycle command can be used to force a node reboot and thus to avoid duplicated instances. This command relies on node daemon availability, though, and thus can fail if the node daemon has some pages swapped out of ram, for example.

Proposed changes¶

The proposed solution forces node daemon to run exclusively in RAM. It uses python ctypes to to call mlockall(MCL_CURRENT | MCL_FUTURE) on the node daemon process and all its children. In addition another log handler has been implemented for node daemon to redirect to /dev/console messages that cannot be written on the logfile.

With these changes node daemon can successfully run basic tasks such as a powercycle request even when the system disk is heavily damaged and reading/writing to disk fails constantly.

New Features¶

Automated Ganeti Cluster Merger¶

Current situation¶

Currently there’s no easy way to merge two or more clusters together. But in order to optimize resources this is a needed missing piece. The goal of this design doc is to come up with a easy to use solution which allows you to merge two or more cluster together.

Initial contact¶

As the design of Ganeti is based on an autonomous system, Ganeti by itself has no way to reach nodes outside of its cluster. To overcome this situation we’re required to prepare the cluster before we can go ahead with the actual merge: We’ve to replace at least the ssh keys on the affected nodes before we can do any operation within gnt- commands.

To make this a automated process we’ll ask the user to provide us with the root password of every cluster we’ve to merge. We use the password to grab the current id_dsa key and then rely on that ssh key for any further communication to be made until the cluster is fully merged.

Cluster merge¶

After initial contact we do the cluster merge:

  1. Grab the list of nodes
  2. On all nodes add our own id_dsa.pub key to authorized_keys
  3. Stop all instances running on the merging cluster
  4. Disable ganeti-watcher as it tries to restart Ganeti daemons
  5. Stop all Ganeti daemons on all merging nodes
  6. Grab the config.data from the master of the merging cluster
  7. Stop local ganeti-masterd
  8. Merge the config:
    1. Open our own cluster config.data
    2. Open cluster config.data of the merging cluster
    3. Grab all nodes of the merging cluster
    4. Set master_candidate to false on all merging nodes
    5. Add the nodes to our own cluster config.data
    6. Grab all the instances on the merging cluster
    7. Adjust the port if the instance has drbd layout:
      1. In logical_id (index 2)
      2. In physical_id (index 1 and 3)
    8. Add the instances to our own cluster config.data
  9. Start ganeti-masterd with --no-voting --yes-do-it
  10. gnt-node add --readd on all merging nodes
  11. gnt-cluster redist-conf
  12. Restart ganeti-masterd normally
  13. Enable ganeti-watcher again
  14. Start all merging instances again
Rollback¶

Until we actually (re)add any nodes we can abort and rollback the merge at any point. After merging the config, though, we’ve to get the backup copy of config.data (from another master candidate node). And for security reasons it’s a good idea to undo id_dsa.pub distribution by going on every affected node and remove the id_dsa.pub key again. Also we’ve to keep in mind, that we’ve to start the Ganeti daemons and starting up the instances again.

Verification¶

Last but not least we should verify that the merge was successful. Therefore we run gnt-cluster verify, which ensures that the cluster overall is in a healthy state. Additional it’s also possible to compare the list of instances/nodes with a list made prior to the upgrade to make sure we didn’t lose any data/instance/node.

Appendix¶
cluster-merge.py¶

Used to merge the cluster config. This is a POC and might differ from actual production code.

#!/usr/bin/python

import sys
from ganeti import config
from ganeti import constants

c_mine = config.ConfigWriter(offline=True)
c_other = config.ConfigWriter(sys.argv[1])

fake_id = 0
for node in c_other.GetNodeList():
  node_info = c_other.GetNodeInfo(node)
  node_info.master_candidate = False
  c_mine.AddNode(node_info, str(fake_id))
  fake_id += 1

for instance in c_other.GetInstanceList():
  instance_info = c_other.GetInstanceInfo(instance)
  for dsk in instance_info.disks:
    if dsk.dev_type in constants.LDS_DRBD:
       port = c_mine.AllocatePort()
       logical_id = list(dsk.logical_id)
       logical_id[2] = port
       dsk.logical_id = tuple(logical_id)
       physical_id = list(dsk.physical_id)
       physical_id[1] = physical_id[3] = port
       dsk.physical_id = tuple(physical_id)
  c_mine.AddInstance(instance_info, str(fake_id))
  fake_id += 1

Feature changes¶

Ganeti Confd¶

Current State and shortcomings¶

In Ganeti 2.0 all nodes are equal, but some are more equal than others. In particular they are divided between “master”, “master candidates” and “normal”. (Moreover they can be offline or drained, but this is not important for the current discussion). In general the whole configuration is only replicated to master candidates, and some partial information is spread to all nodes via ssconf.

This change was done so that the most frequent Ganeti operations didn’t need to contact all nodes, and so clusters could become bigger. If we want more information to be available on all nodes, we need to add more ssconf values, which is counter-balancing the change, or to talk with the master node, which is not designed to happen now, and requires its availability.

Information such as the instance->primary_node mapping will be needed on all nodes, and we also want to make sure services external to the cluster can query this information as well. This information must be available at all times, so we can’t query it through RAPI, which would be a single point of failure, as it’s only available on the master.

Proposed changes¶

In order to allow fast and highly available access read-only to some configuration values, we’ll create a new ganeti-confd daemon, which will run on master candidates. This daemon will talk via UDP, and authenticate messages using HMAC with a cluster-wide shared key. This key will be generated at cluster init time, and stored on the clusters alongside the ganeti SSL keys, and readable only by root.

An interested client can query a value by making a request to a subset of the cluster master candidates. It will then wait to get a few responses, and use the one with the highest configuration serial number. Since the configuration serial number is increased each time the ganeti config is updated, and the serial number is included in all answers, this can be used to make sure to use the most recent answer, in case some master candidates are stale or in the middle of a configuration update.

In order to prevent replay attacks queries will contain the current unix timestamp according to the client, and the server will verify that its timestamp is in the same 5 minutes range (this requires synchronized clocks, which is a good idea anyway). Queries will also contain a “salt” which they expect the answers to be sent with, and clients are supposed to accept only answers which contain salt generated by them.

The configuration daemon will be able to answer simple queries such as:

  • master candidates list
  • master node
  • offline nodes
  • instance list
  • instance primary nodes
Wire protocol¶

A confd query will look like this, on the wire:

plj0{
  "msg": "{\"type\": 1,
           \"rsalt\": \"9aa6ce92-8336-11de-af38-001d093e835f\",
           \"protocol\": 1,
           \"query\": \"node1.example.com\"}\n",
  "salt": "1249637704",
  "hmac": "4a4139b2c3c5921f7e439469a0a45ad200aead0f"
}

plj0 is a fourcc that details the message content. It stands for plain json 0, and can be changed as we move on to different type of protocols (for example protocol buffers, or encrypted json). What follows is a json encoded string, with the following fields:

  • msg contains a JSON-encoded query, its fields are:
    • protocol, integer, is the confd protocol version (initially just constants.CONFD_PROTOCOL_VERSION, with a value of 1)
    • type, integer, is the query type. For example “node role by name” or “node primary ip by instance ip”. Constants will be provided for the actual available query types
    • query is a multi-type field (depending on the type field):
      • it can be missing, when the request is fully determined by the type field
      • it can contain a string which denotes the search key: for example an IP, or a node name
      • it can contain a dictionary, in which case the actual details vary further per request type
    • rsalt, string, is the required response salt; the client must use it to recognize which answer it’s getting.
  • salt must be the current unix timestamp, according to the client; servers should refuse messages which have a wrong timing, according to their configuration and clock
  • hmac is an hmac signature of salt+msg, with the cluster hmac key

If an answer comes back (which is optional, since confd works over UDP) it will be in this format:

plj0{
  "msg": "{\"status\": 0,
           \"answer\": 0,
           \"serial\": 42,
           \"protocol\": 1}\n",
  "salt": "9aa6ce92-8336-11de-af38-001d093e835f",
  "hmac": "aaeccc0dff9328fdf7967cb600b6a80a6a9332af"
}

Where:

  • plj0 the message type magic fourcc, as discussed above
  • msg contains a JSON-encoded answer, its fields are:
    • protocol, integer, is the confd protocol version (initially just constants.CONFD_PROTOCOL_VERSION, with a value of 1)
    • status, integer, is the error code; initially just 0 for ‘ok’ or 1 for ‘error’ (in which case answer contains an error detail, rather than an answer), but in the future it may be expanded to have more meanings (e.g. 2 if the answer is compressed)
    • answer, is the actual answer; its type and meaning is query specific: for example for “node primary ip by instance ip” queries it will be a string containing an IP address, for “node role by name” queries it will be an integer which encodes the role (master, candidate, drained, offline) according to constants
  • salt is the requested salt from the query; a client can use it to recognize what query the answer is answering.
  • hmac is an hmac signature of salt+msg, with the cluster hmac key

Redistribute Config¶

Current State and shortcomings¶

Currently LUClusterRedistConf triggers a copy of the updated configuration file to all master candidates and of the ssconf files to all nodes. There are other files which are maintained manually but which are important to keep in sync. These are:

  • rapi SSL key certificate file (rapi.pem) (on master candidates)
  • rapi user/password file rapi_users (on master candidates)

Furthermore there are some files which are hypervisor specific but we may want to keep in sync:

  • the xen-hvm hypervisor uses one shared file for all vnc passwords, and copies the file once, during node add. This design is subject to revision to be able to have different passwords for different groups of instances via the use of hypervisor parameters, and to allow xen-hvm and kvm to use an equal system to provide password-protected vnc sessions. In general, though, it would be useful if the vnc password files were copied as well, to avoid unwanted vnc password changes on instance failover/migrate.

Optionally the admin may want to also ship files such as the global xend.conf file, and the network scripts to all nodes.

Proposed changes¶

RedistributeConfig will be changed to copy also the rapi files, and to call every enabled hypervisor asking for a list of additional files to copy. Users will have the possibility to populate a file containing a list of files to be distributed; this file will be propagated as well. Such solution is really simple to implement and it’s easily usable by scripts.

This code will be also shared (via tasklets or by other means, if tasklets are not ready for 2.1) with the AddNode and SetNodeParams LUs (so that the relevant files will be automatically shipped to new master candidates as they are set).

VNC Console Password¶

Current State and shortcomings¶

Currently just the xen-hvm hypervisor supports setting a password to connect the the instances’ VNC console, and has one common password stored in a file.

This doesn’t allow different passwords for different instances/groups of instances, and makes it necessary to remember to copy the file around the cluster when the password changes.

Proposed changes¶

We’ll change the VNC password file to a vnc_password_file hypervisor parameter. This way it can have a cluster default, but also a different value for each instance. The VNC enabled hypervisors (xen and kvm) will publish all the password files in use through the cluster so that a redistribute-config will ship them to all nodes (see the Redistribute Config proposed changes above).

The current VNC_PASSWORD_FILE constant will be removed, but its value will be used as the default HV_VNC_PASSWORD_FILE value, thus retaining backwards compatibility with 2.0.

The code to export the list of VNC password files from the hypervisors to RedistributeConfig will be shared between the KVM and xen-hvm hypervisors.

Disk/Net parameters¶

Current State and shortcomings¶

Currently disks and network interfaces have a few tweakable options and all the rest is left to a default we chose. We’re finding that we need more and more to tweak some of these parameters, for example to disable barriers for DRBD devices, or allow striping for the LVM volumes.

Moreover for many of these parameters it will be nice to have cluster-wide defaults, and then be able to change them per disk/interface.

Proposed changes¶

We will add new cluster level diskparams and netparams, which will contain all the tweakable parameters. All values which have a sensible cluster-wide default will go into this new structure while parameters which have unique values will not.

Example of network parameters:
  • mode: bridge/route
  • link: for mode “bridge” the bridge to connect to, for mode route it can contain the routing table, or the destination interface
Example of disk parameters:
  • stripe: lvm stripes
  • stripe_size: lvm stripe size
  • meta_flushes: drbd, enable/disable metadata “barriers”
  • data_flushes: drbd, enable/disable data “barriers”

Some parameters are bound to be disk-type specific (drbd, vs lvm, vs files) or hypervisor specific (nic models for example), but for now they will all live in the same structure. Each component is supposed to validate only the parameters it knows about, and ganeti itself will make sure that no “globally unknown” parameters are added, and that no parameters have overridden meanings for different components.

The parameters will be kept, as for the BEPARAMS into a “default” category, which will allow us to expand on by creating instance “classes” in the future. Instance classes is not a feature we plan implementing in 2.1, though.

Global hypervisor parameters¶

Current State and shortcomings¶

Currently all hypervisor parameters are modifiable both globally (cluster level) and at instance level. However, there is no other framework to held hypervisor-specific parameters, so if we want to add a new class of hypervisor parameters that only makes sense on a global level, we have to change the hvparams framework.

Proposed changes¶

We add a new (global, not per-hypervisor) list of parameters which are not changeable on a per-instance level. The create, modify and query instance operations are changed to not allow/show these parameters.

Furthermore, to allow transition of parameters to the global list, and to allow cleanup of inadverdently-customised parameters, the UpgradeConfig() method of instances will drop any such parameters from their list of hvparams, such that a restart of the master daemon is all that is needed for cleaning these up.

Also, the framework is simple enough that if we need to replicate it at beparams level we can do so easily.

Non bridged instances support¶

Current State and shortcomings¶

Currently each instance NIC must be connected to a bridge, and if the bridge is not specified the default cluster one is used. This makes it impossible to use the vif-route xen network scripts, or other alternative mechanisms that don’t need a bridge to work.

Proposed changes¶

The new “mode” network parameter will distinguish between bridged interfaces and routed ones.

When mode is “bridge” the “link” parameter will contain the bridge the instance should be connected to, effectively making things as today. The value has been migrated from a nic field to a parameter to allow for an easier manipulation of the cluster default.

When mode is “route” the ip field of the interface will become mandatory, to allow for a route to be set. In the future we may want also to accept multiple IPs or IP/mask values for this purpose. We will evaluate possible meanings of the link parameter to signify a routing table to be used, which would allow for insulation between instance groups (as today happens for different bridges).

For now we won’t add a parameter to specify which network script gets called for which instance, so in a mixed cluster the network script must be able to handle both cases. The default kvm vif script will be changed to do so. (Xen doesn’t have a ganeti provided script, so nothing will be done for that hypervisor)

Introducing persistent UUIDs¶

Current state and shortcomings¶

Some objects in the Ganeti configurations are tracked by their name while also supporting renames. This creates an extra difficulty, because neither Ganeti nor external management tools can then track the actual entity, and due to the name change it behaves like a new one.

Proposed changes part 1¶

We will change Ganeti to use UUIDs for entity tracking, but in a staggered way. In 2.1, we will simply add an “uuid†attribute to each of the instances, nodes and cluster itself. This will be reported on instance creation for nodes, and on node adds for the nodes. It will be of course avaiblable for querying via the OpNodeQuery/Instance and cluster information, and via RAPI as well.

Note that Ganeti will not provide any way to change this attribute.

Upgrading from Ganeti 2.0 will automatically add an ‘uuid’ attribute to all entities missing it.

Proposed changes part 2¶

In the next release (e.g. 2.2), the tracking of objects will change from the name to the UUID internally, and externally Ganeti will accept both forms of identification; e.g. an RAPI call would be made either against /2/instances/foo.bar or against /2/instances/bb3b2e42…. Since an FQDN must have at least a dot, and dots are not valid characters in UUIDs, we will not have namespace issues.

Another change here is that node identification (during cluster operations/queries like master startup, “am I the master?†and similar) could be done via UUIDs which is more stable than the current hostname-based scheme.

Internal tracking refers to the way the configuration is stored; a DRBD disk of an instance refers to the node name (so that IPs can be changed easily), but this is still a problem for name changes; thus these will be changed to point to the node UUID to ease renames.

The advantages of this change (after the second round of changes), is that node rename becomes trivial, whereas today node rename would require a complete lock of all instances.

Automated disk repairs infrastructure¶

Replacing defective disks in an automated fashion is quite difficult with the current version of Ganeti. These changes will introduce additional functionality and interfaces to simplify automating disk replacements on a Ganeti node.

Fix node volume group¶

This is the most difficult addition, as it can lead to dataloss if it’s not properly safeguarded.

The operation must be done only when all the other nodes that have instances in common with the target node are fine, i.e. this is the only node with problems, and also we have to double-check that all instances on this node have at least a good copy of the data.

This might mean that we have to enhance the GetMirrorStatus calls, and introduce and a smarter version that can tell us more about the status of an instance.

Stop allocation on a given PV¶

This is somewhat simple. First we need a “list PVs” opcode (and its associated logical unit) and then a set PV status opcode/LU. These in combination should allow both checking and changing the disk/PV status.

Instance disk status¶

This new opcode or opcode change must list the instance-disk-index and node combinations of the instance together with their status. This will allow determining what part of the instance is broken (if any).

Repair instance¶

This new opcode/LU/RAPI call will run replace-disks -p as needed, in order to fix the instance status. It only affects primary instances; secondaries can just be moved away.

Migrate node¶

This new opcode/LU/RAPI call will take over the current gnt-node migrate code and run migrate for all instances on the node.

Evacuate node¶

This new opcode/LU/RAPI call will take over the current gnt-node evacuate code and run replace-secondary with an iallocator script for all instances on the node.

User-id pool¶

In order to allow running different processes under unique user-ids on a node, we introduce the user-id pool concept.

The user-id pool is a cluster-wide configuration parameter. It is a list of user-ids and/or user-id ranges that are reserved for running Ganeti processes (including KVM instances). The code guarantees that on a given node a given user-id is only handed out if there is no other process running with that user-id.

Please note, that this can only be guaranteed if all processes in the system - that run under a user-id belonging to the pool - are started by reserving a user-id first. That can be accomplished either by using the RequestUnusedUid() function to get an unused user-id or by implementing the same locking mechanism.

Implementation¶

The functions that are specific to the user-id pool feature are located in a separate module: lib/uidpool.py.

Storage¶

The user-id pool is a single cluster parameter. It is stored in the Cluster object under the uid_pool name as a list of integer tuples. These tuples represent the boundaries of user-id ranges. For single user-ids, the boundaries are equal.

The internal user-id pool representation is converted into a string: a newline separated list of user-ids or user-id ranges. This string representation is distributed to all the nodes via the ssconf mechanism. This means that the user-id pool can be accessed in a read-only way on any node without consulting the master node or master candidate nodes.

Initial value¶

The value of the user-id pool cluster parameter can be initialized at cluster initialization time using the

gnt-cluster init --uid-pool <uid-pool definition> ...

command.

As there is no sensible default value for the user-id pool parameter, it is initialized to an empty list if no --uid-pool option is supplied at cluster init time.

If the user-id pool is empty, the user-id pool feature is considered to be disabled.

Manipulation¶

The user-id pool cluster parameter can be modified from the command-line with the following commands:

  • gnt-cluster modify --uid-pool <uid-pool definition>
  • gnt-cluster modify --add-uids <uid-pool definition>
  • gnt-cluster modify --remove-uids <uid-pool definition>

The --uid-pool option overwrites the current setting with the supplied <uid-pool definition>, while --add-uids/--remove-uids adds/removes the listed uids or uid-ranges from the pool.

The <uid-pool definition> should be a comma-separated list of user-ids or user-id ranges. A range should be defined by a lower and a higher boundary. The boundaries should be separated with a dash. The boundaries are inclusive.

The <uid-pool definition> is parsed into the internal representation, sanity-checked and stored in the uid_pool attribute of the Cluster object.

It is also immediately converted into a string (formatted in the input format) and distributed to all nodes via the ssconf mechanism.

Inspection¶

The current value of the user-id pool cluster parameter is printed by the gnt-cluster info command.

The output format is accepted by the gnt-cluster modify --uid-pool command.

Locking¶

The uidpool.py module provides a function (RequestUnusedUid) for requesting an unused user-id from the pool.

This will try to find a random user-id that is not currently in use. The algorithm is the following:

  1. Randomize the list of user-ids in the user-id pool
  2. Iterate over this randomized UID list
  3. Create a lock file (it doesn’t matter if it already exists)
  4. Acquire an exclusive POSIX lock on the file, to provide mutual exclusion for the following non-atomic operations
  5. Check if there is a process in the system with the given UID
  6. If there isn’t, return the UID, otherwise unlock the file and continue the iteration over the user-ids

The user can than start a new process with this user-id. Once a process is successfully started, the exclusive POSIX lock can be released, but the lock file will remain in the filesystem. The presence of such a lock file means that the given user-id is most probably in use. The lack of a uid lock file does not guarantee that there are no processes with that user-id.

After acquiring the exclusive POSIX lock, RequestUnusedUid always performs a check to see if there is a process running with the given uid.

A user-id can be returned to the pool, by calling the ReleaseUid function. This will remove the corresponding lock file. Note, that it doesn’t check if there is any process still running with that user-id. The removal of the lock file only means that there are most probably no processes with the given user-id. This helps in speeding up the process of finding a user-id that is guaranteed to be unused.

There is a convenience function, called ExecWithUnusedUid that wraps the execution of a function (or any callable) that requires a unique user-id. ExecWithUnusedUid takes care of requesting an unused user-id and unlocking the lock file. It also automatically returns the user-id to the pool if the callable raises an exception.

Code examples¶

Requesting a user-id from the pool:

from ganeti import ssconf
from ganeti import uidpool

# Get list of all user-ids in the uid-pool from ssconf
ss = ssconf.SimpleStore()
uid_pool = uidpool.ParseUidPool(ss.GetUidPool(), separator="\n")
all_uids = set(uidpool.ExpandUidPool(uid_pool))

uid = uidpool.RequestUnusedUid(all_uids)
try:
  <start a process with the UID>
  # Once the process is started, we can release the file lock
  uid.Unlock()
except ..., err:
  # Return the UID to the pool
  uidpool.ReleaseUid(uid)

Releasing a user-id:

from ganeti import uidpool

uid = <get the UID the process is running under>
<stop the process>
uidpool.ReleaseUid(uid)

External interface changes¶

OS API¶

The OS API of Ganeti 2.0 has been built with extensibility in mind. Since we pass everything as environment variables it’s a lot easier to send new information to the OSes without breaking retrocompatibility. This section of the design outlines the proposed extensions to the API and their implementation.

API Version Compatibility Handling¶

In 2.1 there will be a new OS API version (eg. 15), which should be mostly compatible with api 10, except for some new added variables. Since it’s easy not to pass some variables we’ll be able to handle Ganeti 2.0 OSes by just filtering out the newly added piece of information. We will still encourage OSes to declare support for the new API after checking that the new variables don’t provide any conflict for them, and we will drop api 10 support after ganeti 2.1 has released.

New Environment variables¶

Some variables have never been added to the OS api but would definitely be useful for the OSes. We plan to add an INSTANCE_HYPERVISOR variable to allow the OS to make changes relevant to the virtualization the instance is going to use. Since this field is immutable for each instance, the os can tight the install without caring of making sure the instance can run under any virtualization technology.

We also want the OS to know the particular hypervisor parameters, to be able to customize the install even more. Since the parameters can change, though, we will pass them only as an “FYI”: if an OS ties some instance functionality to the value of a particular hypervisor parameter manual changes or a reinstall may be needed to adapt the instance to the new environment. This is not a regression as of today, because even if the OSes are left blind about this information, sometimes they still need to make compromises and cannot satisfy all possible parameter values.

OS Variants¶

Currently we are assisting to some degree of “os proliferation” just to change a simple installation behavior. This means that the same OS gets installed on the cluster multiple times, with different names, to customize just one installation behavior. Usually such OSes try to share as much as possible through symlinks, but this still causes complications on the user side, especially when multiple parameters must be cross-matched.

For example today if you want to install debian etch, lenny or squeeze you probably need to install the debootstrap OS multiple times, changing its configuration file, and calling it debootstrap-etch, debootstrap-lenny or debootstrap-squeeze. Furthermore if you have for example a “server” and a “development” environment which installs different packages/configuration files and must be available for all installs you’ll probably end up with deboostrap-etch-server, debootstrap-etch-dev, debootrap-lenny-server, debootstrap-lenny-dev, etc. Crossing more than two parameters quickly becomes not manageable.

In order to avoid this we plan to make OSes more customizable, by allowing each OS to declare a list of variants which can be used to customize it. The variants list is mandatory and must be written, one variant per line, in the new “variants.list” file inside the main os dir. At least one supported variant must be supported. When choosing the OS exactly one variant will have to be specified, and will be encoded in the os name as <OS-name>+<variant>. As for today it will be possible to change an instance’s OS at creation or install time.

The 2.1 OS list will be the combination of each OS, plus its supported variants. This will cause the name name proliferation to remain, but at least the internal OS code will be simplified to just parsing the passed variant, without the need for symlinks or code duplication.

Also we expect the OSes to declare only “interesting” variants, but to accept some non-declared ones which a user will be able to pass in by overriding the checks ganeti does. This will be useful for allowing some variations to be used without polluting the OS list (per-OS documentation should list all supported variants). If a variant which is not internally supported is forced through, the OS scripts should abort.

In the future (post 2.1) we may want to move to full fledged parameters all orthogonal to each other (for example “architecture” (i386, amd64), “suite” (lenny, squeeze, ...), etc). (As opposed to the variant, which is a single parameter, and you need a different variant for all the set of combinations you want to support). In this case we envision the variants to be moved inside of Ganeti and be associated with lists parameter->values associations, which will then be passed to the OS.

IAllocator changes¶

Current State and shortcomings¶

The iallocator interface allows creation of instances without manually specifying nodes, but instead by specifying plugins which will do the required computations and produce a valid node list.

However, the interface is quite akward to use:

  • one cannot set a ‘default’ iallocator script
  • one cannot use it to easily test if allocation would succeed
  • some new functionality, such as rebalancing clusters and calculating capacity estimates is needed
Proposed changes¶

There are two area of improvements proposed:

  • improving the use of the current interface
  • extending the IAllocator API to cover more automation
Default iallocator names¶

The cluster will hold, for each type of iallocator, a (possibly empty) list of modules that will be used automatically.

If the list is empty, the behaviour will remain the same.

If the list has one entry, then ganeti will behave as if ‘–iallocator’ was specifyed on the command line. I.e. use this allocator by default. If the user however passed nodes, those will be used in preference.

If the list has multiple entries, they will be tried in order until one gives a successful answer.

Dry-run allocation¶

The create instance LU will get a new ‘dry-run’ option that will just simulate the placement, and return the chosen node-lists after running all the usual checks.

Cluster balancing¶

Instance add/removals/moves can create a situation where load on the nodes is not spread equally. For this, a new iallocator mode will be implemented called balance in which the plugin, given the current cluster state, and a maximum number of operations, will need to compute the instance relocations needed in order to achieve a “better” (for whatever the script believes it’s better) cluster.

Cluster capacity calculation¶

In this mode, called capacity, given an instance specification and the current cluster state (similar to the allocate mode), the plugin needs to return:

  • how many instances can be allocated on the cluster with that specification
  • on which nodes these will be allocated (in order)

Table Of Contents

Previous topic

Ganeti 2.0 design

Next topic

Ganeti 2.2 design

This Page

ganeti-2.9.3/doc/html/design-2.2.html0000644000000000000000000021262712271443665017165 0ustar00rootroot00000000000000 Ganeti 2.2 design — Ganeti 2.9.3 documentation

Ganeti 2.2 design¶

This document describes the major changes in Ganeti 2.2 compared to the 2.1 version.

The 2.2 version will be a relatively small release. Its main aim is to avoid changing too much of the core code, while addressing issues and adding new features and improvements over 2.1, in a timely fashion.

As for 2.1 we divide the 2.2 design into three areas:

  • core changes, which affect the master daemon/job queue/locking or all/most logical units
  • logical unit/feature changes
  • external interface changes (e.g. command line, OS API, hooks, ...)

Core changes¶

Master Daemon Scaling improvements¶

Current state and shortcomings¶

Currently the Ganeti master daemon is based on four sets of threads:

  • The main thread (1 thread) just accepts connections on the master socket
  • The client worker pool (16 threads) handles those connections, one thread per connected socket, parses luxi requests, and sends data back to the clients
  • The job queue worker pool (25 threads) executes the actual jobs submitted by the clients
  • The rpc worker pool (10 threads) interacts with the nodes via http-based-rpc

This means that every masterd currently runs 52 threads to do its job. Being able to reduce the number of thread sets would make the master’s architecture a lot simpler. Moreover having less threads can help decrease lock contention, log pollution and memory usage. Also, with the current architecture, masterd suffers from quite a few scalability issues:

Core daemon connection handling¶

Since the 16 client worker threads handle one connection each, it’s very easy to exhaust them, by just connecting to masterd 16 times and not sending any data. While we could perhaps make those pools resizable, increasing the number of threads won’t help with lock contention nor with better handling long running operations making sure the client is informed that everything is proceeding, and doesn’t need to time out.

Wait for job change¶

The REQ_WAIT_FOR_JOB_CHANGE luxi operation makes the relevant client thread block on its job for a relative long time. This is another easy way to exhaust the 16 client threads, and a place where clients often time out, moreover this operation is negative for the job queue lock contention (see below).

Job Queue lock¶

The job queue lock is quite heavily contended, and certain easily reproducible workloads show that’s it’s very easy to put masterd in trouble: for example running ~15 background instance reinstall jobs, results in a master daemon that, even without having finished the client worker threads, can’t answer simple job list requests, or submit more jobs.

Currently the job queue lock is an exclusive non-fair lock insulating the following job queue methods (called by the client workers).

  • AddNode
  • RemoveNode
  • SubmitJob
  • SubmitManyJobs
  • WaitForJobChanges
  • CancelJob
  • ArchiveJob
  • AutoArchiveJobs
  • QueryJobs
  • Shutdown

Moreover the job queue lock is acquired outside of the job queue in two other classes:

  • jqueue._JobQueueWorker (in RunTask) before executing the opcode, after finishing its executing and when handling an exception.
  • jqueue._OpExecCallbacks (in NotifyStart and Feedback) when the processor (mcpu.Processor) is about to start working on the opcode (after acquiring the necessary locks) and when any data is sent back via the feedback function.

Of those the major critical points are:

  • Submit[Many]Job, QueryJobs, WaitForJobChanges, which can easily slow down and block client threads up to making the respective clients time out.
  • The code paths in NotifyStart, Feedback, and RunTask, which slow down job processing between clients and otherwise non-related jobs.

To increase the pain:

  • WaitForJobChanges is a bad offender because it’s implemented with a notified condition which awakes waiting threads, who then try to acquire the global lock again
  • Many should-be-fast code paths are slowed down by replicating the change to remote nodes, and thus waiting, with the lock held, on remote rpcs to complete (starting, finishing, and submitting jobs)

Proposed changes¶

In order to be able to interact with the master daemon even when it’s under heavy load, and to make it simpler to add core functionality (such as an asynchronous rpc client) we propose three subsequent levels of changes to the master core architecture.

After making this change we’ll be able to re-evaluate the size of our thread pool, if we see that we can make most threads in the client worker pool always idle. In the future we should also investigate making the rpc client asynchronous as well, so that we can make masterd a lot smaller in number of threads, and memory size, and thus also easier to understand, debug, and scale.

Connection handling¶

We’ll move the main thread of ganeti-masterd to asyncore, so that it can share the mainloop code with all other Ganeti daemons. Then all luxi clients will be asyncore clients, and I/O to/from them will be handled by the master thread asynchronously. Data will be read from the client sockets as it becomes available, and kept in a buffer, then when a complete message is found, it’s passed to a client worker thread for parsing and processing. The client worker thread is responsible for serializing the reply, which can then be sent asynchronously by the main thread on the socket.

Wait for job change¶

The REQ_WAIT_FOR_JOB_CHANGE luxi request is changed to be subscription-based, so that the executing thread doesn’t have to be waiting for the changes to arrive. Threads producing messages (job queue executors) will make sure that when there is a change another thread is awaken and delivers it to the waiting clients. This can be either a dedicated “wait for job changes” thread or pool, or one of the client workers, depending on what’s easier to implement. In either case the main asyncore thread will only be involved in pushing of the actual data, and not in fetching/serializing it.

Other features to look at, when implementing this code are:

  • Possibility not to need the job lock to know which updates to push: if the thread producing the data pushed a copy of the update for the waiting clients, the thread sending it won’t need to acquire the lock again to fetch the actual data.
  • Possibility to signal clients about to time out, when no update has been received, not to despair and to keep waiting (luxi level keepalive).
  • Possibility to defer updates if they are too frequent, providing them at a maximum rate (lower priority).
Job Queue lock¶

In order to decrease the job queue lock contention, we will change the code paths in the following ways, initially:

  • A per-job lock will be introduced. All operations affecting only one job (for example feedback, starting/finishing notifications, subscribing to or watching a job) will only require the job lock. This should be a leaf lock, but if a situation arises in which it must be acquired together with the global job queue lock the global one must always be acquired last (for the global section).
  • The locks will be converted to a sharedlock. Any read-only operation will be able to proceed in parallel.
  • During remote update (which happens already per-job) we’ll drop the job lock level to shared mode, so that activities reading the lock (for example job change notifications or QueryJobs calls) will be able to proceed in parallel.
  • The wait for job changes improvements proposed above will be implemented.

In the future other improvements may include splitting off some of the work (eg replication of a job to remote nodes) to a separate thread pool or asynchronous thread, not tied with the code path for answering client requests or the one executing the “real” work. This can be discussed again after we used the more granular job queue in production and tested its benefits.

Inter-cluster instance moves¶

Current state and shortcomings¶

With the current design of Ganeti, moving whole instances between different clusters involves a lot of manual work. There are several ways to move instances, one of them being to export the instance, manually copying all data to the new cluster before importing it again. Manual changes to the instances configuration, such as the IP address, may be necessary in the new environment. The goal is to improve and automate this process in Ganeti 2.2.

Proposed changes¶

Authorization, Authentication and Security¶

Until now, each Ganeti cluster was a self-contained entity and wouldn’t talk to other Ganeti clusters. Nodes within clusters only had to trust the other nodes in the same cluster and the network used for replication was trusted, too (hence the ability the use a separate, local network for replication).

For inter-cluster instance transfers this model must be weakened. Nodes in one cluster will have to talk to nodes in other clusters, sometimes in other locations and, very important, via untrusted network connections.

Various option have been considered for securing and authenticating the data transfer from one machine to another. To reduce the risk of accidentally overwriting data due to software bugs, authenticating the arriving data was considered critical. Eventually we decided to use socat’s OpenSSL options (OPENSSL:, OPENSSL-LISTEN: et al), which provide us with encryption, authentication and authorization when used with separate keys and certificates.

Combinations of OpenSSH, GnuPG and Netcat were deemed too complex to set up from within Ganeti. Any solution involving OpenSSH would require a dedicated user with a home directory and likely automated modifications to the user’s $HOME/.ssh/authorized_keys file. When using Netcat, GnuPG or another encryption method would be necessary to transfer the data over an untrusted network. socat combines both in one program and is already a dependency.

Each of the two clusters will have to generate an RSA key. The public parts are exchanged between the clusters by a third party, such as an administrator or a system interacting with Ganeti via the remote API (“third party” from here on). After receiving each other’s public key, the clusters can start talking to each other.

All encrypted connections must be verified on both sides. Neither side may accept unverified certificates. The generated certificate should only be valid for the time necessary to move the instance.

For additional protection of the instance data, the two clusters can verify the certificates and destination information exchanged via the third party by checking an HMAC signature using a key shared among the involved clusters. By default this secret key will be a random string unique to the cluster, generated by running SHA1 over 20 bytes read from /dev/urandom and the administrator must synchronize the secrets between clusters before instances can be moved. If the third party does not know the secret, it can’t forge the certificates or redirect the data. Unless disabled by a new cluster parameter, verifying the HMAC signatures must be mandatory. The HMAC signature for X509 certificates will be prepended to the certificate similar to an RFC 822 header and only covers the certificate (from -----BEGIN CERTIFICATE----- to -----END CERTIFICATE-----). The header name will be X-Ganeti-Signature and its value will have the format $salt/$hash (salt and hash separated by slash). The salt may only contain characters in the range [a-zA-Z0-9].

On the web, the destination cluster would be equivalent to an HTTPS server requiring verifiable client certificates. The browser would be equivalent to the source cluster and must verify the server’s certificate while providing a client certificate to the server.

Copying data¶

To simplify the implementation, we decided to operate at a block-device level only, allowing us to easily support non-DRBD instance moves.

Intra-cluster instance moves will re-use the existing export and import scripts supplied by instance OS definitions. Unlike simply copying the raw data, this allows one to use filesystem-specific utilities to dump only used parts of the disk and to exclude certain disks from the move. Compression should be used to further reduce the amount of data transferred.

The export scripts writes all data to stdout and the import script reads it from stdin again. To avoid copying data and reduce disk space consumption, everything is read from the disk and sent over the network directly, where it’ll be written to the new block device directly again.

Workflow¶
  1. Third party tells source cluster to shut down instance, asks for the instance specification and for the public part of an encryption key
    • Instance information can already be retrieved using an existing API (OpInstanceQueryData).
    • An RSA encryption key and a corresponding self-signed X509 certificate is generated using the “openssl” command. This key will be used to encrypt the data sent to the destination cluster.
      • Private keys never leave the cluster.
      • The public part (the X509 certificate) is signed using HMAC with salting and a secret shared between Ganeti clusters.
  2. Third party tells destination cluster to create an instance with the same specifications as on source cluster and to prepare for an instance move with the key received from the source cluster and receives the public part of the destination’s encryption key
    • The current API to create instances (OpInstanceCreate) will be extended to support an import from a remote cluster.
    • A valid, unexpired X509 certificate signed with the destination cluster’s secret will be required. By verifying the signature, we know the third party didn’t modify the certificate.
      • The private keys never leave their cluster, hence the third party can not decrypt or intercept the instance’s data by modifying the IP address or port sent by the destination cluster.
    • The destination cluster generates another key and certificate, signs and sends it to the third party, who will have to pass it to the API for exporting an instance (OpBackupExport). This certificate is used to ensure we’re sending the disk data to the correct destination cluster.
    • Once a disk can be imported, the API sends the destination information (IP address and TCP port) together with an HMAC signature to the third party.
  3. Third party hands public part of the destination’s encryption key together with all necessary information to source cluster and tells it to start the move
    • The existing API for exporting instances (OpBackupExport) will be extended to export instances to remote clusters.
  4. Source cluster connects to destination cluster for each disk and transfers its data using the instance OS definition’s export and import scripts
    • Before starting, the source cluster must verify the HMAC signature of the certificate and destination information (IP address and TCP port).
    • When connecting to the remote machine, strong certificate checks must be employed.
  5. Due to the asynchronous nature of the whole process, the destination cluster checks whether all disks have been transferred every time after transferring a single disk; if so, it destroys the encryption key
  6. After sending all disks, the source cluster destroys its key
  7. Destination cluster runs OS definition’s rename script to adjust instance settings if needed (e.g. IP address)
  8. Destination cluster starts the instance if requested at the beginning by the third party
  9. Source cluster removes the instance if requested
Instance move in pseudo code¶

The following pseudo code describes a script moving instances between clusters and what happens on both clusters.

  1. Script is started, gets the instance name and destination cluster:

    (instance_name, dest_cluster_name) = sys.argv[1:]
    
    # Get destination cluster object
    dest_cluster = db.FindCluster(dest_cluster_name)
    
    # Use database to find source cluster
    src_cluster = db.FindClusterByInstance(instance_name)
    
  2. Script tells source cluster to stop instance:

    # Stop instance
    src_cluster.StopInstance(instance_name)
    
    # Get instance specification (memory, disk, etc.)
    inst_spec = src_cluster.GetInstanceInfo(instance_name)
    
    (src_key_name, src_cert) = src_cluster.CreateX509Certificate()
    
  3. CreateX509Certificate on source cluster:

    key_file = mkstemp()
    cert_file = "%s.cert" % key_file
    RunCmd(["/usr/bin/openssl", "req", "-new",
             "-newkey", "rsa:1024", "-days", "1",
             "-nodes", "-x509", "-batch",
             "-keyout", key_file, "-out", cert_file])
    
    plain_cert = utils.ReadFile(cert_file)
    
    # HMAC sign using secret key, this adds a "X-Ganeti-Signature"
    # header to the beginning of the certificate
    signed_cert = utils.SignX509Certificate(plain_cert,
      utils.ReadFile(constants.X509_SIGNKEY_FILE))
    
    # The certificate now looks like the following:
    #
    #   X-Ganeti-Signature: $1234$28676f0516c6ab68062b[…]
    #   -----BEGIN CERTIFICATE-----
    #   MIICsDCCAhmgAwIBAgI[…]
    #   -----END CERTIFICATE-----
    
    # Return name of key file and signed certificate in PEM format
    return (os.path.basename(key_file), signed_cert)
    
  4. Script creates instance on destination cluster and waits for move to finish:

    dest_cluster.CreateInstance(mode=constants.REMOTE_IMPORT,
                                spec=inst_spec,
                                source_cert=src_cert)
    
    # Wait until destination cluster gives us its certificate
    dest_cert = None
    disk_info = []
    while not (dest_cert and len(disk_info) < len(inst_spec.disks)):
      tmp = dest_cluster.WaitOutput()
      if tmp is Certificate:
        dest_cert = tmp
      elif tmp is DiskInfo:
        # DiskInfo contains destination address and port
        disk_info[tmp.index] = tmp
    
    # Tell source cluster to export disks
    for disk in disk_info:
      src_cluster.ExportDisk(instance_name, disk=disk,
                             key_name=src_key_name,
                             dest_cert=dest_cert)
    
    print ("Instance %s sucessfully moved to %s" %
           (instance_name, dest_cluster.name))
    
  5. CreateInstance on destination cluster:

    # …
    
    if mode == constants.REMOTE_IMPORT:
      # Make sure certificate was not modified since it was generated by
      # source cluster (which must use the same secret)
      if (not utils.VerifySignedX509Cert(source_cert,
            utils.ReadFile(constants.X509_SIGNKEY_FILE))):
        raise Error("Certificate not signed with this cluster's secret")
    
      if utils.CheckExpiredX509Cert(source_cert):
        raise Error("X509 certificate is expired")
    
      source_cert_file = utils.WriteTempFile(source_cert)
    
      # See above for X509 certificate generation and signing
      (key_name, signed_cert) = CreateSignedX509Certificate()
    
      SendToClient("x509-cert", signed_cert)
    
      for disk in instance.disks:
        # Start socat
        RunCmd(("socat"
                " OPENSSL-LISTEN:%s,…,key=%s,cert=%s,cafile=%s,verify=1"
                " stdout > /dev/disk…") %
               port, GetRsaKeyPath(key_name, private=True),
               GetRsaKeyPath(key_name, private=False), src_cert_file)
        SendToClient("send-disk-to", disk, ip_address, port)
    
      DestroyX509Cert(key_name)
    
      RunRenameScript(instance_name)
    
  6. ExportDisk on source cluster:

    # Make sure certificate was not modified since it was generated by
    # destination cluster (which must use the same secret)
    if (not utils.VerifySignedX509Cert(cert_pem,
          utils.ReadFile(constants.X509_SIGNKEY_FILE))):
      raise Error("Certificate not signed with this cluster's secret")
    
    if utils.CheckExpiredX509Cert(cert_pem):
      raise Error("X509 certificate is expired")
    
    dest_cert_file = utils.WriteTempFile(cert_pem)
    
    # Start socat
    RunCmd(("socat stdin"
            " OPENSSL:%s:%s,…,key=%s,cert=%s,cafile=%s,verify=1"
            " < /dev/disk…") %
           disk.host, disk.port,
           GetRsaKeyPath(key_name, private=True),
           GetRsaKeyPath(key_name, private=False), dest_cert_file)
    
    if instance.all_disks_done:
      DestroyX509Cert(key_name)
    
Miscellaneous notes¶
  • A very similar system could also be used for instance exports within the same cluster. Currently OpenSSH is being used, but could be replaced by socat and SSL/TLS.
  • During the design of intra-cluster instance moves we also discussed encrypting instance exports using GnuPG.
  • While most instances should have exactly the same configuration as on the source cluster, setting them up with a different disk layout might be helpful in some use-cases.
  • A cleanup operation, similar to the one available for failed instance migrations, should be provided.
  • ganeti-watcher should remove instances pending a move from another cluster after a certain amount of time. This takes care of failures somewhere in the process.
  • RSA keys can be generated using the existing bootstrap.GenerateSelfSignedSslCert function, though it might be useful to not write both parts into a single file, requiring small changes to the function. The public part always starts with -----BEGIN CERTIFICATE----- and ends with -----END CERTIFICATE-----.
  • The source and destination cluster might be different when it comes to available hypervisors, kernels, etc. The destination cluster should refuse to accept an instance move if it can’t fulfill an instance’s requirements.

Privilege separation¶

Current state and shortcomings¶

All Ganeti daemons are run under the user root. This is not ideal from a security perspective as for possible exploitation of any daemon the user has full access to the system.

In order to overcome this situation we’ll allow Ganeti to run its daemon under different users and a dedicated group. This also will allow some side effects, like letting the user run some gnt-* commands if one is in the same group.

Implementation¶

For Ganeti 2.2 the implementation will be focused on a the RAPI daemon only. This involves changes to daemons.py so it’s possible to drop privileges on daemonize the process. Though, this will be a short term solution which will be replaced by a privilege drop already on daemon startup in Ganeti 2.3.

It also needs changes in the master daemon to create the socket with new permissions/owners to allow RAPI access. There will be no other permission/owner changes in the file structure as the RAPI daemon is started with root permission. In that time it will read all needed files and then drop privileges before contacting the master daemon.

Feature changes¶

KVM Security¶

Current state and shortcomings¶

Currently all kvm processes run as root. Taking ownership of the hypervisor process, from inside a virtual machine, would mean a full compromise of the whole Ganeti cluster, knowledge of all Ganeti authentication secrets, full access to all running instances, and the option of subverting other basic services on the cluster (eg: ssh).

Proposed changes¶

We would like to decrease the surface of attack available if an hypervisor is compromised. We can do so adding different features to Ganeti, which will allow restricting the broken hypervisor possibilities, in the absence of a local privilege escalation attack, to subvert the node.

Dropping privileges in kvm to a single user (easy)¶

By passing the -runas option to kvm, we can make it drop privileges. The user can be chosen by an hypervisor parameter, so that each instance can have its own user, but by default they will all run under the same one. It should be very easy to implement, and can easily be backported to 2.1.X.

This mode protects the Ganeti cluster from a subverted hypervisor, but doesn’t protect the instances between each other, unless care is taken to specify a different user for each. This would prevent the worst attacks, including:

  • logging in to other nodes
  • administering the Ganeti cluster
  • subverting other services

But the following would remain an option:

  • terminate other VMs (but not start them again, as that requires root privileges to set up networking) (unless different users are used)
  • trace other VMs, and probably subvert them and access their data (unless different users are used)
  • send network traffic from the node
  • read unprotected data on the node filesystem
Running kvm in a chroot (slightly harder)¶

By passing the -chroot option to kvm, we can restrict the kvm process in its own (possibly empty) root directory. We need to set this area up so that the instance disks and control sockets are accessible, so it would require slightly more work at the Ganeti level.

Breaking out in a chroot would mean:

  • a lot less options to find a local privilege escalation vector
  • the impossibility to write local data, if the chroot is set up correctly
  • the impossibility to read filesystem data on the host

It would still be possible though to:

  • terminate other VMs
  • trace other VMs, and possibly subvert them (if a tracer can be installed in the chroot)
  • send network traffic from the node
Running kvm with a pool of users (slightly harder)¶

If rather than passing a single user as an hypervisor parameter, we have a pool of useable ones, we can dynamically choose a free one to use and thus guarantee that each machine will be separate from the others, without putting the burden of this on the cluster administrator.

This would mean interfering between machines would be impossible, and can still be combined with the chroot benefits.

Running iptables rules to limit network interaction (easy)¶

These don’t need to be handled by Ganeti, but we can ship examples. If the users used to run VMs would be blocked from sending some or all network traffic, it would become impossible for a broken into hypervisor to send arbitrary data on the node network, which is especially useful when the instance and the node network are separated (using ganeti-nbma or a separate set of network interfaces), or when a separate replication network is maintained. We need to experiment to see how much restriction we can properly apply, without limiting the instance legitimate traffic.

Running kvm inside a container (even harder)¶

Recent linux kernels support different process namespaces through control groups. PIDs, users, filesystems and even network interfaces can be separated. If we can set up ganeti to run kvm in a separate container we could insulate all the host process from being even visible if the hypervisor gets broken into. Most probably separating the network namespace would require one extra hop in the host, through a veth interface, thus reducing performance, so we may want to avoid that, and just rely on iptables.

Implementation plan¶

We will first implement dropping privileges for kvm processes as a single user, and most probably backport it to 2.1. Then we’ll ship example iptables rules to show how the user can be limited in its network activities. After that we’ll implement chroot restriction for kvm processes, and extend the user limitation to use a user pool.

Finally we’ll look into namespaces and containers, although that might slip after the 2.2 release.

New OS states¶

Separate from the OS external changes, described below, we’ll add some internal changes to the OS.

Current state and shortcomings¶

There are two issues related to the handling of the OSes.

First, it’s impossible to disable an OS for new instances, since that will also break reinstallations and renames of existing instances. To phase out an OS definition, without actually having to modify the OS scripts, it would be ideal to be able to restrict new installations but keep the rest of the functionality available.

Second, gnt-instance reinstall --select-os shows all the OSes available on the clusters. Some OSes might exist only for debugging and diagnose, and not for end-user availability. For this, it would be useful to “hide” a set of OSes, but keep it otherwise functional.

Proposed changes¶

Two new cluster-level attributes will be added, holding the list of OSes hidden from the user and respectively the list of OSes which are blacklisted from new installations.

These lists will be modifiable via gnt-os modify (implemented via OpClusterSetParams), such that even not-yet-existing OSes can be preseeded into a given state.

For the hidden OSes, they are fully functional except that they are not returned in the default OS list (as computed via OpOsDiagnose), unless the hidden state is requested.

For the blacklisted OSes, they are also not shown (unless the blacklisted state is requested), and they are also prevented from installation via OpInstanceCreate (in create mode).

Both these attributes are per-OS, not per-variant. Thus they apply to all of an OS’ variants, and it’s impossible to blacklist or hide just one variant. Further improvements might allow a given OS variant to be blacklisted, as opposed to whole OSes.

External interface changes¶

OS API¶

The OS variants implementation in Ganeti 2.1 didn’t prove to be useful enough to alleviate the need to hack around the Ganeti API in order to provide flexible OS parameters.

As such, for Ganeti 2.2 we will provide support for arbitrary OS parameters. However, since OSes are not registered in Ganeti, but instead discovered at runtime, the interface is not entirely straightforward.

Furthermore, to support the system administrator in keeping OSes properly in sync across the nodes of a cluster, Ganeti will also verify (if existing) the consistence of a new os_version file.

These changes to the OS API will bump the API version to 20.

OS version¶

A new os_version file will be supported by Ganeti. This file is not required, but if existing, its contents will be checked for consistency across nodes. The file should hold only one line of text (any extra data will be discarded), and its contents will be shown in the OS information and diagnose commands.

It is recommended that OS authors increase the contents of this file for any changes; at a minimum, modifications that change the behaviour of import/export scripts must increase the version, since they break intra-cluster migration.

Parameters¶

The interface between Ganeti and the OS scripts will be based on environment variables, and as such the parameters and their values will need to be valid in this context.

Names¶

The parameter names will be declared in a new file, parameters.list, together with a one-line documentation (whitespace-separated). Example:

$ cat parameters.list
ns1    Specifies the first name server to add to /etc/resolv.conf
extra_packages  Specifies additional packages to install
rootfs_size     Specifies the root filesystem size (the rest will be left unallocated)
track  Specifies the distribution track, one of 'stable', 'testing' or 'unstable'

As seen above, the documentation can be separate via multiple spaces/tabs from the names.

The parameter names as read from the file will be used for the command line interface in lowercased form; as such, there shouldn’t be any two parameters which differ in case only.

Values¶

The values of the parameters are, from Ganeti’s point of view, completely freeform. If a given parameter has, from the OS’ point of view, a fixed set of valid values, these should be documented as such and verified by the OS, but Ganeti will not handle such parameters specially.

An empty value must be handled identically as a missing parameter. In other words, the validation script should only test for non-empty values, and not for declared versus undeclared parameters.

Furthermore, each parameter should have an (internal to the OS) default value, that will be used if not passed from Ganeti. More precisely, it should be possible for any parameter to specify a value that will have the same effect as not passing the parameter, and no in no case should the absence of a parameter be treated as an exceptional case (outside the value space).

Environment variables¶

The parameters will be exposed in the environment upper-case and prefixed with the string OSP_. For example, a parameter declared in the ‘parameters’ file as ns1 will appear in the environment as the variable OSP_NS1.

Validation¶

For the purpose of parameter name/value validation, the OS scripts must provide an additional script, named verify. This script will be called with the argument parameters, and all the parameters will be passed in via environment variables, as described above.

The script should signify result/failure based on its exit code, and show explanatory messages either on its standard output or standard error. These messages will be passed on to the master, and stored as in the OpCode result/error message.

The parameters must be constructed to be independent of the instance specifications. In general, the validation script will only be called with the parameter variables set, but not with the normal per-instance variables, in order for Ganeti to be able to validate default parameters too, when they change. Validation will only be performed on one cluster node, and it will be up to the ganeti administrator to keep the OS scripts in sync between all nodes.

Instance operations¶

The parameters will be passed, as described above, to all the other instance operations (creation, import, export). Ideally, these scripts will not abort with parameter validation errors, if the verify script has verified them correctly.

Note: when changing an instance’s OS type, any OS parameters defined at instance level will be kept as-is. If the parameters differ between the new and the old OS, the user should manually remove/update them as needed.

Declaration and modification¶

Since the OSes are not registered in Ganeti, we will only make a ‘weak’ link between the parameters as declared in Ganeti and the actual OSes existing on the cluster.

It will be possible to declare parameters either globally, per cluster (where they are indexed per OS/variant), or individually, per instance. The declaration of parameters will not be tied to current existing OSes. When specifying a parameter, if the OS exists, it will be validated; if not, then it will simply be stored as-is.

A special note is that it will not be possible to ‘unset’ at instance level a parameter that is declared globally. Instead, at instance level the parameter should be given an explicit value, or the default value as explained above.

CLI interface¶

The modification of global (default) parameters will be done via the gnt-os command, and the per-instance parameters via the gnt-instance command. Both these commands will take an addition --os-parameters or -O flag that specifies the parameters in the familiar comma-separated, key=value format. For removing a parameter, a -key syntax will be used, e.g.:

# initial modification
$ gnt-instance modify -O use_dchp=true instance1
# later revert (to the cluster default, or the OS default if not
# defined at cluster level)
$ gnt-instance modify -O -use_dhcp instance1
Internal storage¶

Internally, the OS parameters will be stored in a new osparams attribute. The global parameters will be stored on the cluster object, and the value of this attribute will be a dictionary indexed by OS name (this also accepts an OS+variant name, which will override a simple OS name, see below), and for values the key/name dictionary. For the instances, the value will be directly the key/name dictionary.

Overriding rules¶

Any instance-specific parameters will override any variant-specific parameters, which in turn will override any global parameters. The global parameters, in turn, override the built-in defaults (of the OS scripts).

ganeti-2.9.3/doc/html/searchindex.js0000644000000000000000000032242012271443673017352 0ustar00rootroot00000000000000Search.setIndex({objects:{},terms:{mastercap:15,diagos:40,vif_script:56,orthogon:[55,50],"_opexeccallback":49,cluster_verify_disk:15,untrust:49,localstatedir:[48,51,31,28,56],interchang:17,four:[29,49,13,7,9,55],fcgi:14,prefix:[18,15,3,28,49,13,8,31,56,35,59,14,16],sleep:50,uid_pool:[8,50],nmem:55,oldest:[48,50],forget:[7,56,13],whose:[40,62,15,3,28,13,8,55,27,36,53,59,24],authorit:[44,2,55],accur:48,temp_insufficient_resourc:[35,8],authoris:13,aug:35,umask:35,diskstat:[35,36],swap:[50,13,7,27,14,24],under:[40,18,41,27,15,48,28,49,50,13,51,31,16,56,35,59,14,38,24],validateparamet:13,bootloader_arg:[15,7,35],ramdisk:36,digit:[3,35],virtio:[35,51,13],everi:[1,2,3,8,10,14,16,21,24,56,31,33,35,36,38,41,15,48,49,50,13,27],risk:[15,5,49,13,35,53,60],lookup:[35,41],dtotal:[7,8,19,56],opinstanceconsol:35,jack:8,upstream:56,upgradenot:15,affect:[18,15,19,48,6,50,49,7,56,33,35,59,13,60,36,55],writetempfil:49,viewabl:58,maxmem:[15,7,35],macaddress:16,contemporari:39,offend:49,cmd:[35,25,13],upload:[35,31],checkparamsyntax:13,vector:[51,49],unmanag:35,libekg:36,tty1:7,verif:[15,3,50,22,54,35,14,38],"572862ee939c":15,parenthes:[34,48],instance_nicn_mod:3,initialis:[15,13,7,51,31,35],x86_64:[15,8],zlib:51,instance_statu:3,miller:7,net100:41,direct:[53,2,15,3,19,60,9,34,59,14,38],checkprereq:13,histor:[34,8,25,55,13],consequ:[35,19],second:[2,28,6,7,8,10,62,14,19,22,55,27,29,31,34,35,38,15,48,49,50,13,24,56,57,60,61],op_instance_add_mddrbd:3,groupmembership:35,nbma:[15,35,49],even:[2,3,5,6,8,14,23,51,16,24,25,27,29,30,31,34,35,36,41,52,48,49,50,13,15,55,56,58,53,61],supervis:[40,59],actual_st:36,hide:[34,36,49],commonnam:23,autoarch:15,asid:7,scp:[15,22],"7d582ab7eef4":7,"new":[0,1,2,3,4,6,7,8,9,53,62,13,14,23,17,18,19,20,21,22,16,24,27,29,30,33,34,35,36,37,38,39,40,41,52,48,49,50,51,15,54,55,56,57,59,60,61],symmetr:15,ever:[15,8,6,56,39],topolog:52,told:[5,13,35,59,14,55],"4b25ecb5df5c":8,elimin:[15,35,31],subtre:13,abov:[62,3,4,28,7,8,14,23,18,51,16,55,25,30,31,35,36,39,40,41,15,48,49,50,13,52,56,59,60],flip:15,mem:[34,35,55,56],envelop:16,never:[62,15,48,19,5,49,50,13,31,35,14,60,23],here:[2,15,3,19,29,28,48,50,49,7,8,57,56,33,34,58,36,13,38,55],lvto:36,met:[15,38,27],studio:16,argv:[49,50],path:[11,3,7,9,14,17,18,19,22,51,0,25,27,29,31,33,35,40,42,43,52,49,13,15,54,56,59,61],ca_cert:23,use_privatekei:23,interpret:[40,3,6,50,13,35,36,14,60],"0af08a3d":15,dry:[35,8,18,50],haskel:[2,52,28,6,51,35,36],anymor:[1,2,15,3,6,13,7,34,35,36,55],use_external_mip_script:8,greearb:7,precis:[48,49,33,35,36,55],keymap:35,synchronis:[17,3,29,48,13,55],permit:[15,35,55],use_dhcp:49,aka:[5,38,13],exclusive_storag:[35,19,24],portabl:[59,16],signifi:[40,49,50],exp_size_fd:[35,14],unif:50,qcow:16,brought:[15,35,60,13],releas:[17,18,2,41,13,52,48,20,51,50,49,15,8,57,35,53,37,14,61],unix:[2,44,48,50,13,33,34,59,14,36],quickcheck:28,total:[27,15,29,48,7,31,56,35,36,55],master_nam:15,unit:[53,13,15,20,19,28,48,50,49,7,8,34,35,36,37,46,55],highli:[15,2,50,56],overheat:9,describ:[1,2,4,7,8,62,14,16,17,20,51,22,24,25,31,34,35,36,39,40,44,48,49,50,13,15,54,55,27,57,58,53,60],would:[1,3,4,5,6,7,8,10,14,19,51,24,29,30,31,33,34,35,36,37,38,39,40,41,52,48,49,50,13,15,55,57,53,60],behind:[58,35,36,30],init:[18,15,3,19,28,48,50,13,7,51,56,9,35,59],dure:[40,18,41,13,52,3,29,20,48,50,30,15,9,35,49,55,39],getuidpool:50,choosen:13,readi:[15,48,50,13,7,22,35,14],call:[1,2,3,4,7,62,14,16,19,20,55,31,33,34,35,36,38,39,40,41,15,48,49,50,13,56,57,60],typo:35,recommend:[15,48,28,51,49,7,8,31,54,16,56,35,58,59,13,55,23],old_nam:3,type:[2,3,4,7,8,11,14,16,17,18,19,21,55,25,27,29,34,35,36,40,42,43,52,48,49,50,13,15,56,57,58,53,60,62],until:[1,15,60,29,48,49,50,13,7,51,35,14,55,39],fastcgi:[14,30],inst5:9,inst4:[50,9],inst7:9,inst6:9,inst1:[62,50,9],inst3:[50,9],inst2:[62,50,9],readm:[15,35,55,56],relat:[2,28,9,12,23,17,18,20,22,16,24,29,31,35,40,41,52,48,49,50,13,15,54,55,58,59,60],inst8:62,notic:[15,18,50,16,13],br0:[41,27,15,3,22,7,8,56,16],warn:[18,15,19,28,7,8,9,34,58,35,55],verifycli:[14,30],exce:9,compatbl:8,loss:[15,7,13,60,35],ssconf_hypervisor:18,hole:55,herebi:[15,14],unpack:16,must:[62,3,4,28,6,7,8,12,14,23,53,20,22,24,25,27,29,30,31,34,35,37,38,39,40,41,15,48,49,50,13,54,55,56,59,60,61],word:[15,3,29,48,49,50,13,16,56,55,38,24],room:[24,9],err:50,restor:[18,15,3,5,48,13,56,35,14],setup:[40,18,31,52,48,5,22,13,7,8,25,56,35,59,14,15,55],work:[2,4,28,6,7,8,9,10,14,16,18,19,24,5,29,30,31,33,34,35,36,37,39,43,52,48,49,50,13,15,55,56,57,58,59,60],monolith:48,spec:[35,8,48,49,55],stonith:5,conceptu:7,coalesc:60,raid1:44,instance_disk:12,slash:[28,49,55],rework:[35,55],root:[25,26,15,19,49,48,8,50,30,7,22,31,61,56,35,59,13,14,36],overrid:[48,49,50,13,8,31,54,55,35,36,16,39],defer:[48,49,13],hlint:28,give:[40,18,2,41,15,48,28,51,50,49,8,35,36,37,55],ovfexport:16,sysconfdir:[3,51,28,56,35],gntmasterd:48,corollari:13,indic:[40,15,38,8,34,35,36,13,14,12],fqdn:[27,7,50,56,35],aaaa:22,"117d":7,repol:36,want:[3,5,6,7,9,17,18,19,51,24,29,33,35,36,37,41,15,48,49,50,13,52,55,56,58,59],generateselfsignedsslcert:49,keep:[1,2,3,28,8,10,12,14,20,55,29,30,34,35,36,39,41,15,48,49,50,13,52],czf:18,"_autoconf":31,hvm_cdrom_image_path:[35,13],end:[2,34,52,48,28,49,50,13,7,8,31,60,24,55,10,35,53,14,15,16],manipul:[15,40,51,50],turn:[15,4,49,6,50,38,31,9,34,35,53,60,19,24],hackag:[35,51],ordinari:48,faulti:[35,5],classifi:[15,29],shrunk:19,how:[2,8,10,14,15,23,17,19,16,55,27,30,35,36,39,44,48,49,50,13,52,24,56,57,60],hot:55,enforc:[40,30,24,35,38,55],recoveri:[15,13],env:3,answer:[26,15,49,50,13,35,2,59,36,55],verifi:[1,3,4,7,8,9,14,23,18,51,22,16,24,31,35,38,40,15,48,49,50,13,55,57,59,60,61],more:[1,2,3,28,6,7,8,10,12,14,15,18,19,51,22,24,25,27,5,29,30,31,34,35,36,37,38,39,40,41,44,48,49,50,13,52,54,55,56,57,58,59,60],perspect:[49,13],bindir:51,updat:[2,28,6,7,14,15,16,18,19,51,55,25,5,29,35,36,0,48,49,50,13,52,56,60],grown:[35,14,30],subcommand:[15,9],recogn:[37,50,56],haven:[52,31,18,36,56],x509:[17,11,49,8,35,14,23],after:[1,2,3,7,8,9,10,14,16,17,18,19,51,22,24,53,29,30,33,34,35,36,38,39,40,15,48,49,50,13,55,56,57,59,61],openvswitch:[56,8,21,24],superus:8,diagram:48,befor:[1,2,3,28,7,8,9,10,14,17,18,20,51,22,24,53,30,35,36,37,38,39,52,48,49,50,13,15,55,56,58,59,60,61],wrong:[18,15,5,50,13,7,8,10,35,36,60,55],ova:[58,16],ovf:[58,35,16,17],"4rcf":15,parallel:[62,13,15,3,20,6,50,49,0,51,54,48,35,53,37,14,39],demonstr:50,handl:[1,2,3,4,5,7,9,10,14,16,18,19,20,22,55,29,35,37,39,40,41,52,48,49,50,13,15,56,59,60],source_cert:49,attempt:[29,50,13,7,8,9,35,61],custom_ipolici:8,third:[62,15,60,29,6,8,49,22,54,35,59,13,14,23],"_ip":13,classmethod:13,opaqu:[57,29,14],grant:[2,13,7,8,35,59],pvgrub:[15,35],imposs:[48,49,50,6,33,58,13,23],perform:[1,2,5,7,10,11,14,16,19,21,24,27,29,31,33,35,36,53,39,40,41,15,48,49,50,13,55,56,57,59,60,62],maintain:[1,41,15,48,5,49,50,13,8,35,24],environ:[40,25,41,15,3,49,50,13,7,8,31,56,35,12,14],reloc:[62,27,15,3,29,50,7,8,56],enter:[7,13,35],exclus:[0,48,29,20,49,50,13,55,56,10,35,24],spindle_ratio:[55,24],first:[2,6,7,8,10,62,13,14,16,19,21,22,24,27,29,33,35,36,38,39,15,48,49,50,51,52,55,56,57,60,61],order:[1,2,3,4,6,7,8,9,62,12,16,17,18,19,20,51,24,27,29,34,35,36,38,39,40,41,52,48,49,50,13,15,55,56,58,59,60],op_network_remov:[3,41],origin:[25,62,15,29,6,30,33,35,59,37,38,39],frontend:30,feedback:[48,49,30,35,37,14],instance_nicn_network:[3,41],diagnos:[40,15,48,49,31,35],consent:60,over:[2,3,7,8,10,12,19,22,55,25,52,35,36,37,53,40,15,48,49,50,13,44,59,60],failur:[2,62,52,3,29,5,49,50,13,7,8,55,27,34,35,37,60,15,24,39],becaus:[15,1,2,41,52,4,48,49,50,13,7,8,36,33,57,58,59,35,19,55],veri:[53,13,15,28,29,5,21,35,30,31,55,24,27,9,34,58,59,49,14,48,16],privileg:[25,48,49,13,31,35,59],appar:[1,23],flexibl:[49,13,34,35,36,55],vari:[27,15,50,13,56,35,36,55],digest:8,sha1sum:16,streamlin:19,cli:[13,44,6,49,15,31,34,37],fit:[4,13],ganeti_instance_hv_use_bootload:3,drbd_usermode_help:35,fix:[18,15,60,19,29,49,50,13,7,31,35,36,14,38,55],inadvert:38,better:[2,27,48,19,5,6,50,49,7,51,31,56,34,35,36,13,14,60,16,39],crlsign:23,drawback:[34,12,37],vmdk:[58,16],persist:[35,36,50],comprehens:[15,17],hidden:[52,35,8,49],easier:[2,41,62,15,19,49,50,30,8,31,55,56,35,13,14,24],inadverd:50,count:[15,48,8,55,27,35,36,53,24],them:[1,2,3,4,28,6,7,8,9,10,12,14,23,18,19,20,51,16,55,5,30,31,33,35,36,53,39,40,41,52,48,49,50,13,15,54,56,59,60],scrutinis:13,thei:[2,3,28,7,8,62,12,14,23,19,20,51,16,24,27,5,29,30,31,33,34,35,36,41,15,48,49,50,13,55,56,53,60],proce:[13,15,29,48,6,50,49,7,22,56,10,35,37,60,39],safe:[15,5,20,30,7,55,35,53,13,60,16,39],"8gb":7,mergedread:36,"break":[13,52,28,6,50,49,15,8,55,56,34,35,53,37,24,39],band:[44,35,31,9],"9second":13,closer:[48,16],interrupt:[15,60,48,16],hvm_acpi:[35,13],d55d:56,choic:[40,2,29,6,30,56,62,36,13,14,16],drbdsetup:[15,7,55],use_bootload:[7,35],categori:[17,26,40,15,50,31,36,55],acquisit:53,instance20:15,timeout:[2,3,48,50,13,8,56,9,35,53,14],each:[2,3,6,7,8,9,62,12,14,23,18,19,21,51,16,24,27,29,30,31,33,34,35,36,37,38,39,40,41,52,48,49,50,13,15,55,56,59,60],debug:[15,28,6,50,49,54,10,35,13,14],higher:[18,15,48,50,13,51,35,60,55],side:[4,49,5,6,50,51,57,8,34,35,13,14,55],mean:[1,5,7,8,9,12,14,15,16,18,20,21,24,27,29,34,35,36,37,38,39,52,48,49,50,13,44,54,55,56,59,60,61],nochang:13,instance_query_data:15,inexist:35,migration_port:[35,56],resum:[15,35,14],enorm:16,free_count:8,op_node_migr:3,target_node_uuid:8,vote:[15,35,50,13],your_ip_address:56,xmem:55,ignore_remove_failur:8,ommit:9,collector:[26,43,15,0,33,35,36],contend:49,logicalunit:[44,8,13],network:[3,5,7,8,12,14,15,16,18,21,51,0,24,25,27,30,31,34,35,36,40,41,52,48,49,50,13,44,55,56,58,59],goe:[18,15,13,7,33,10,60],newli:[41,15,19,48,50,13,22,56,35,55],segtyp:36,dss:22,predefin:30,content:[1,2,3,4,5,6,7,8,12,14,23,19,21,22,16,24,25,27,30,33,34,35,36,37,53,39,40,41,15,48,49,50,13,54,55,56,57,58,59,60,61],rewrit:[35,14,13],adjust:[28,49,50,31,55,56,35,24],dsk:50,dsh:[15,18],reader:[37,62,6],got:[15,48],opensslcompress:51,impract:15,kernel_minor:36,unwarr:24,barrier:[35,55,50],sdc1:56,attoparsec:51,precaut:[59,61],situat:[1,2,5,6,7,9,11,19,51,55,31,35,36,15,48,49,50,13,56,53,60,61],free:[25,2,41,15,19,29,48,49,13,7,31,55,27,35,34,58,36,61,24],ineffici:2,small:[25,52,29,48,49,50,13,7,31,18,55,35,14,15,16,39],fixm:[8,55],ancient:35,undocu:35,fcff:56,repeat:[34,38,56,55],md5:8,reconfigur:[7,16],node2:[15,7,8,27,9,34],workaround:[35,55,13],openssh:[15,51,28,49],switch1:21,openssl:[49,51,8,35,30,23],ghc6:51,node4:[15,62,9],filter:[19,29,50,8,56,34,35,59,60],renew:[35,48,54,23],fuse:57,iso:[35,16,13],unabl:[15,7,14],ism:35,unknown:[48,50,13,7,8,9,34,58,35,14],regress:[35,50],temporari:[5,28,13,8,55,35,53,16,39],confus:[54,28,6,55],user:[1,11,3,8,9,62,12,14,16,17,18,19,21,51,55,25,29,30,31,33,34,35,36,37,15,48,49,50,13,27,57,59,60],pristin:13,rang:[41,48,49,50,38,8,35,36,14,59],instance_nam:[62,15,3,49,13,8,35],abolish:48,render:7,independ:[2,44,48,49,13,16,9,35,12,36,24],thereof:2,system:[1,2,28,7,8,14,16,19,51,22,24,25,26,27,5,31,33,34,35,36,38,41,52,49,50,13,15,55,56,57,58,59,60,61],restrict:[2,62,15,19,29,48,6,49,54,55,56,35,59,13,14,61,24,39],hook:[17,41,15,3,5,48,50,49,7,31,35,59,12],reinitialis:15,alreadi:[2,4,28,6,8,9,14,19,51,24,27,29,31,34,35,36,37,39,40,41,52,48,49,50,13,15,55,56,53,60],wrapper:[15,35,14,38],wasn:[7,50,35],agre:13,primari:[40,62,27,15,3,29,48,50,13,7,8,54,56,9,34,35,36,14,60,55,39],ifac:56,total_cpu:27,optim:[19,50,13,16,27,55],rewritten:[35,2],wildli:55,top:[1,15,28,50,13,35,55,39],sometim:[2,15,49,50,13,7,9,35,36,55],downsid:31,setnodeparam:50,master:[1,2,3,5,6,7,8,9,10,14,15,23,18,20,22,25,27,30,31,34,35,36,37,39,40,41,44,48,49,50,13,52,54,56,59,60],too:[40,18,2,52,29,20,49,50,13,15,8,31,55,56,34,35,36,14,38,23],ip_check:[35,8],similarli:[15,40,48,57,55],recent:[2,49,50,51,56,35,23],outag:36,listen:[15,48,49,30,31,56,35,59,14],iptabl:[59,14,49],consol:[44,48,50,7,8,52,56,35,15],namespac:[3,49,50,13,56,57],sdd1:56,tool:[2,6,7,8,10,16,17,18,21,22,24,25,31,33,35,36,39,40,41,42,43,44,48,50,13,15,54,55,56,58,59,60],use_dchp:49,lighttpd:[14,23,30],lower:[18,27,15,3,48,49,50,23,56,55,57,60,24],net101:41,sched_setaffin:38,task:[1,2,44,48,50,13,15,9,16],incur:2,queryjob:[35,49,13],somewhat:[15,48,50,22,31,14,55],dump_certificate_request:23,oversubscript:[35,55],technic:[52,37,50],cluster_redist_conf:15,vcpu_ratio:[55,24],x509_check_private_kei:23,target:[17,62,40,15,3,29,28,48,50,13,7,8,55,56,35,53,37,14,60,24,39],keyword:[56,13],provid:[1,2,3,5,7,8,9,10,14,23,17,18,19,51,16,24,25,26,29,30,34,35,36,40,41,44,48,49,50,13,15,55,56,57,58,59,60],load_privatekei:23,older:[2,15,48,28,6,21,7,8,51,27,35,36,13],tree:[15,35,31,14,16],rate:[2,49,21,13,35,36,55],bootloader_path:[15,7,35],project:[15,35,56],matter:[29,38,50],gnupg:49,previous_job_info:8,nic0_link:16,minut:[1,48,50,31,10,53],provis:[40,56,24],mini:13,fashion:[35,20,49,50],custom_nicparam:8,fold:[52,31],ran:[15,59,36,13],ram:[44,50,7,55,16,35,58,14,24],mind:[15,31,28,50,56],sinst_cnt:8,st_file:8,raw:[49,8,35,58,14,16],op_instance_remov:3,manner:[4,60,13],increment:36,further:[11,27,15,60,19,29,49,50,13,55,56,2,36,14,24],seen:[41,52,19,49,13,15,35,36,55],seem:[15,6,7,31,58,14],incompat:[35,13],minu:29,barcloud:40,realm:8,recreat:[15,3,8,60,35],epo:[35,9],latter:[48,13,16,57,58,53,14,61,55],especi:[25,15,48,49,50,13,31,35,55],wipe:[35,8,53],rpcresult:13,high:[40,41,15,48,5,30,56,59,13,36,55],client:[2,4,6,7,8,62,14,51,22,29,30,33,34,35,37,40,41,52,48,49,50,13,15,57,59],cumbersom:4,instance_disk_s:3,usb_devic:35,transmit:[13,30],opinstancequeri:13,all_uid:50,expens:[15,40,29,48],simplifi:[48,49,50,30,7,35,59,13,24],thumb:53,plenti:24,though:[15,3,48,49,50,13,8,54,55,56,35,59,61,24],usernam:[35,8,54,30],glob:28,object:[11,3,28,6,8,9,12,19,21,22,55,34,35,36,38,40,41,15,48,49,50,13,57,53,60],what:[1,3,28,6,7,9,16,19,24,27,5,29,30,33,34,35,36,38,15,48,49,50,13,44,55,56,57,60],regular:[40,1,15,29,13,8,56,9,34,35,59,14,61],waiter:50,letter:13,phase:[3,49,35],insul:[49,50],op_instance_grow_disk:3,tradit:14,broadcast:[15,56],simplic:[34,13],don:[3,5,6,7,14,15,18,19,51,24,29,30,31,35,36,39,44,48,49,50,13,52,55,56],plugin_specific_data:36,doc:[19,28,50,13,51,32,56,34,35,36],clock:[35,50],doe:[3,4,5,7,8,9,14,16,18,51,55,26,30,35,36,53,40,15,48,49,50,13,52,27,58,59,60,61],dummi:25,ganeti_post_:3,declar:[40,15,4,20,49,50,13,35,55],hcheck:35,neg:[8,6,49],unchang:[29,1,2,13,9],section:[13,15,48,20,49,50,30,8,55,16,56,35,58,36,24,14,38,23],dot:[21,50],libxenlight:48,virtualhardwaresect:[58,16],reactor:13,set_issu:23,external_reserv:8,pacemak:[36,5],random:[15,49,50,54,35,59,14],speedup:35,hvm_boot_ord:13,syntax:[15,49,13,8,35,55],freeform:49,advisori:60,directli:[18,2,44,19,48,49,13,7,31,57,34,35,14,15,55],node_daemon_certif:22,radic:13,protocol:[40,26,41,15,19,29,48,50,30,7,8,27,35,2,36,13,14],involv:[2,15,29,20,49,7,8,31,55,56,10,35,14,24],absolut:[25,61,48,13,8,9,35,59,12],networkrec:36,layout:[15,49,50,13,7,27],acquir:[20,48,6,50,49,35,53,13,61,55],delet:[40,41,48,21,50,13,8,35,36,24],"_frontend_typ":13,menu:15,extp_nam:40,configur:[1,2,3,5,7,8,9,10,12,13,14,15,23,18,19,21,22,16,24,25,27,28,29,30,31,32,35,36,38,40,41,44,48,49,50,51,52,54,55,56,57,58,59],apach:[13,30],stanza:56,preseed:49,mnl0:7,latenc:[15,55,9],seg_pe_rang:36,rss:22,oct:[15,7,35],your_broadcast_address:56,buse:24,remotest:36,primary_ip:27,pinst:[34,7,19,56],patter:13,timetofinish:36,localrol:36,watch:[62,15,6,49,8,35],compli:[7,13],foremost:13,report:[11,28,7,9,62,14,17,19,55,26,5,29,35,36,42,43,15,50,13,24,27,59,60],unmap:[15,38],recalcul:39,net:[27,15,50,13,7,54,55,56,35,12,16],op_instance_mov:3,bar:[15,50],cpython:[6,50],removemiss:[15,7,13,35],changeabl:[15,55,50,13],"public":[15,48,20,49,30,7,22,23,59,14,16],runa:49,cleanli:[6,13,7,56,9,35],bad:[15,4,5,49,13,7,56,35,36,24],metadata:[40,18,50,13,35,36,55],mem_tot:55,valid:[40,41,15,29,49,50,13,7,31,16,55,35,38,23],respond:[52,35,5,61,9],fair:[4,21,50,49,35,55],job:[1,2,4,5,6,7,8,11,14,17,18,20,22,0,55,30,31,33,34,35,37,39,42,43,52,46,48,49,50,13,15,56,53,60,62],nicparam:[15,8,21,41],dump_privatekei:23,num:36,mandatori:[41,49,50,13,56,35,36],result:[2,3,4,7,8,9,62,12,16,20,21,24,27,29,30,31,34,35,37,15,48,49,50,13,52,55,56,58,59,60,61],respons:[2,13,19,48,49,50,30,8,27,34,35,36,37,14,55],corrupt:[15,35,20],themselv:[1,15,48,13,7,8,35,36],key_fil:49,charact:[15,49,50,30,34,35,13],optimum:56,allow_failov:8,bb3b2e42:50,subject:[59,50,23],min_ispec:55,awar:[1,62,41,44,56,10,36],said:[22,29,61],hopefulli:[34,35,15,56],heterogen:55,databas:[3,49,13],wikipedia:44,discoveri:[31,13],instance16:15,instance17:15,outstand:12,awak:49,posix:[57,48,50],awai:[62,15,48,50,13,27,35,14],rsyslogd:7,instance18:15,instance19:15,attribut:[40,18,62,19,29,48,6,50,49,55,27,35,36,13,60,23],instance_nicn_ip:3,accord:[1,19,6,50,8,55,36,16],req_query_:52,af38:50,weak:49,extens:[2,60,28,50,13,16,9,34,35,37,14,23,39],replace_auto:8,extent:36,toler:15,opnodeadd:22,behaviour:[1,15,48,28,49,50,30,31,10,35,13,55],protect:[15,5,20,6,50,49,8,35,59,13,55],accident:49,easi:[15,48,49,50,13,7,51,9,10,35,59,36,24],cow:16,hashabl:51,fault:15,howev:[62,3,4,6,9,19,55,29,30,31,35,40,52,48,49,50,13,15,54,56,57,59],cfgupgrad:[15,35,18],checkexpiredx509cert:49,against:[40,52,3,50,13,15,35,24],instance_info:50,source_instance_nam:8,docbook:[35,28],logic:[2,6,7,14,19,20,22,55,35,36,37,59,40,41,15,46,48,49,50,13,57,53],xyz789:8,seri:[18,15,13,1,35,36,55],com:[25,62,27,15,3,22,50,7,8,56,9,34,35,14,57,23],con:6,compromis:[35,49,23,50],disk3:34,findclust:49,applianc:[40,16],createsignedx509certif:49,sdb1:[15,7,56],inher:[5,13],"50gb":15,loader:15,node_info:50,spindl:[19,8,55,27,35,24],framework:[40,15,48,47,28,50,13,27,9,35],ssconf_node_pass:18,inst_spec:49,assum:[25,15,3,19,29,20,48,50,13,7,56,36,14,38],summar:36,duplic:[52,50,8,10,35,37,55],replicationprotocol:36,fra:7,strong:49,sucessfulli:49,outlier:29,union:[40,62],fri:[15,35],listrunn:[35,22,51],filesystem:[40,15,3,5,48,50,49,7,31,56,35,36,13,14],three:[15,48,8,50,49,7,22,27,57,35,59,13,55],been:[1,3,4,6,7,8,14,23,18,20,51,22,24,25,27,29,30,31,33,35,36,37,53,40,42,43,15,45,46,47,48,49,50,13,0,55,56,59,60],accumul:[35,36],much:[2,27,52,29,48,49,50,13,15,31,55,56,34,35,36,14,24],beta3:35,beta2:[35,8],interest:[48,50,31,55,34,16],basic:[2,28,8,16,17,21,51,55,25,29,30,35,36,40,44,49,50,13,15,56,59,60],replace_on_primari:[7,8],"001d093e835f":50,vnc_bind_address:13,quickli:[50,30],life:[7,31,13,9],rather:[19,48,49,50,13,55,56,35,58,12,14,24,39],nplus1:35,type_rsa:23,worker:[13,48,20,49,50,6,35,37],anywher:13,multithread:[24,13],rsalt:50,child:[7,8,48,13],cdrom:[35,56,13],emploi:[8,51,55,49],ident:[2,49,13,56,35,14,55],gnt:[1,3,4,5,6,7,8,9,12,23,53,18,19,21,51,16,25,27,33,35,37,38,39,40,41,52,48,49,50,13,15,54,56,57,58,59,61],gnu:[7,51,28],hat:16,servic:[2,15,5,49,50,30,56,9,13],properti:[44,3,29,48,6,13,15,31,55,9,35,23],jobqueue_purg:13,op_instance_startup:3,asign:41,aim:[40,2,48,49,50,57],calcul:[19,48,50,16,35,36,24,39],publicli:48,aid:20,conflicts_check:8,spawn:[14,2],timefram:[53,48],seven:7,fe80:56,cont:38,coher:[15,13],parseuidpool:50,node1_nam:15,need:[0,1,2,3,4,28,6,7,9,62,13,14,23,18,19,21,22,16,24,27,5,29,31,33,34,35,36,53,39,40,44,48,49,50,51,15,54,55,56,57,58,59,60,61],ganeti_instance_nam:3,conf:[18,15,48,49,50,7,56,35,59],enabled_disk_templ:[35,8,19],tediou:[15,13],network_subnet6:3,disappear:[36,13],"0beta1":35,instance_reboot:15,disabl:[53,18,52,19,49,51,50,30,34,8,31,55,56,10,35,59,14,15,24],filestorag:40,cpu_nod:55,receiv:[2,13,15,3,4,29,48,8,50,30,22,49,54,33,34,35,36,37,14,61,55],suggest:[40,1,35,10,58,38],make:[2,4,28,6,7,8,62,12,16,53,18,19,21,51,24,5,33,34,35,36,38,39,40,41,15,48,49,50,13,54,55,56,57,59,60],format:[7,8,9,12,16,17,18,21,22,26,33,34,35,36,15,45,48,49,50,13,54,56,58],who:[17,48,49,7,8,33,36,37],complex:[52,5,29,28,6,50,49,15,8,55,35,13,14,24],split:[17,25,11,42,43,52,48,20,21,49,15,31,18,0,35,2,13,60,55],big:[11,48,20,13,31,56,35,53,14,55],lockset:[35,53,48,50],ganeti_mast:3,complet:[1,62,3,28,7,14,16,18,21,24,27,29,30,35,36,41,52,48,49,50,13,15,54,55,56,57,58,60],raid:[36,24],fragil:22,evid:[15,36],nic:[41,27,15,3,21,50,13,7,8,31,55,56,35,34,58,12,36,16],hang:[52,7,56,35],your_netmask:56,hand:[1,15,48,4,5,49,50,13,8,31,59,36,55],codebas:[15,35,2,30],rais:[48,49,50,7,34,58],ownership:[35,31,49,13],refin:[24,39],burnin:[15,6,7,31,56,35],studi:[37,6],tune:[52,55],environment_error:8,versioninfo:36,kept:[48,5,49,50,13,55,35,37,38,24],bewar:[5,56],thu:[40,2,15,3,19,29,5,49,50,38,7,31,55,56,35,59,13,36,16,39],itself:[1,2,4,5,7,9,62,14,16,20,22,24,29,30,35,36,39,41,15,48,50,13,52,54,55,56,57,59,60],taint:35,inherit:[35,55,41,50,9],magnitud:13,gateway6:[8,41],op_instance_failov:[3,62,6],weakli:29,thi:[1,2,3,4,5,6,7,8,9,10,62,12,13,14,15,23,17,18,19,20,21,22,16,24,25,26,27,28,29,30,31,33,34,35,36,37,38,39,40,41,44,48,49,50,51,52,53,54,55,56,57,58,59,60,61],createx509certif:49,gzip:[28,51,14,16],everyth:[43,15,49,50,13,56,9,36,14,16],paradigm:13,left:[15,3,49,50,13,7,27,60,61,55],logical_id:[7,50],rbdn:15,identifi:[40,15,60,29,38,7,8,31,9,12,13,14,36],op1:37,just:[1,2,4,5,6,8,9,10,14,19,55,27,29,31,33,34,35,36,39,40,15,48,49,50,13,52,56,57,60],op3:37,"49af":8,overridden:[35,13,50,39],bandwidth:[48,21,24],partner:36,human:[34,60,48,16,9],vif:[50,21,56],nowadai:[19,6,55],node_cnt:8,yet:[1,2,52,48,51,50,49,15,8,31,56,35,36,13,60,24],languag:8,previous:[2,29,50,13,7,8,35,58,16],iff_vnet_hdr:35,shoud:36,xmlhttprequest:8,"20gib":[55,56],expos:[52,48,49,13,35,12,36],sdpw:7,had:[52,29,49,13,7,35,59,15,55],ideal:[7,31,49],ganeti_instance_vcpu:3,spread:[48,55,50,13],els:[51,50,13,8,35,36,23],save:[40,52,13,56,58,36,16],explanatori:49,gave:3,sanit:15,applic:[35,8,13,14,30],advis:[18,62,3,6,56,59,37,60],mayb:[52,24,16],preserv:[35,1,60,13],hint:7,max_ispec:55,disk_reserv:55,kjournald:7,greear:7,background:[40,11,19,49,13,9,57,16],submitmanyjob:[8,37,6,49],apart:[12,36],measur:[18,24,27,36,14,55],daemon:[2,28,6,7,8,10,11,14,15,23,17,18,20,51,22,25,5,30,31,33,34,35,36,37,44,48,49,50,13,52,56,59,61],ctime:[8,48],specif:[1,2,3,4,6,7,8,62,14,16,53,18,19,51,24,25,26,27,29,30,33,34,35,36,38,40,41,44,48,49,50,13,15,55,56,57,58,59,60,61],arbitrari:[35,61,49,33],kernel_path:[15,7,13,35],op_:3,manual:[62,28,7,9,10,16,17,18,19,51,24,25,5,31,35,41,44,49,50,13,15,55,56,59,60],findclusterbyinst:49,unstabl:49,hvparam:[15,35,8,50,13],hung:[36,13],seg_tag:36,luxi:[2,13,44,49,48,6,30,7,31,52,34,35,59,37,55],fileid:35,unnecessari:50,underli:[17,41,15,19,48,7,8,35,36,38,24],www:[35,8,14,51],right:[15,3,19,48,6,13,31,55,10,35,14,24],old:[18,2,52,3,4,20,49,50,30,7,8,60,35,34,58,59,13,14,15],candelatech:7,deal:[40,1,2,41,19,29,48,13,31,55,35,24],exportcert:[14,30],somehow:[15,60,14,13],percentag:36,node3_nam:15,bootup:15,intern:[1,2,7,8,10,20,55,29,31,35,36,53,40,15,48,49,50,13,52,57,58,59],group_network:41,opinstanceimportprepar:14,inact:[8,48,50],successfulli:[40,18,15,29,50,13,7,22,31,56,35,36],op_network_set_param:[3,41],transmiss:30,brd:56,cooper:15,trace:[15,49],deploy:[15,29,48,41,55],subclass:34,"_arg":28,exectut:3,overcom:[4,49,50,13],cno:8,foo:[15,27,50],rootfs_siz:49,localhost:56,core:[43,48,6,50,49,31,35,13,60,38,55],log_entri:8,sensibl:50,uncompress:14,mynode2:19,disk_info:49,curl:[35,8,14,51],spindle_count:[35,8,24],discount:13,corp:7,promot:[15,3,5,48,13,8,35],taskset:38,ha1:8,peer:[7,29,13,55,35],xenvg:[15,7,56,35],post:[41,3,50,13,8,35,14,55],"super":29,error_msg:9,tlsv1_method:23,vnc_consol:35,req:[49,23],slightli:[35,53,5,49],unfortun:[52,29,48,50,8,31,35,14,55],despair:49,transcendent_memori:44,commit:[57,7,6,35],breakag:[35,56],produc:[41,49,50,30,36,13,16],unrespons:5,total_disk:27,grouplist:41,mem_in_mb:16,required_nod:27,xyz:34,"float":[35,8,55],encod:[26,41,15,50,30,8,27,34,2,36,13,14,55],bound:[35,48,55,50],funtion:36,down:[1,3,7,8,9,14,19,55,29,33,35,36,39,15,48,49,13,52,27,59,60,61],spice_streaming_video:35,sor:[44,55,9],resili:[35,24,13],"_access":13,wrap:[8,41,50],prv0:8,storag:[11,7,8,62,17,18,19,0,24,27,29,34,35,36,39,40,41,42,43,44,46,48,49,50,13,15,55,56,57,60],eth0:[15,7,56],wan:15,accordingli:[4,29,7,8,56,35,14,16],git:[15,35,36,28],fmtjson:35,cc012285:15,support:[11,3,28,7,8,9,53,12,14,23,17,18,19,21,51,16,55,25,26,27,5,29,30,31,34,35,36,38,39,40,41,15,46,48,49,50,13,0,56,57,58,59],transform:[51,13],"class":[40,26,48,19,20,49,50,13,55,56,33,57,35,37,16],avail:[1,2,4,5,7,8,14,18,19,21,51,24,25,26,27,29,30,33,34,35,36,53,39,40,41,52,48,49,50,13,15,54,55,56,57,58,59,60,61],stuck:36,reli:[40,41,3,5,48,50,49,9,35,13],gid:35,queryd:2,cluster_nam:[22,27],overhead:[57,40,55,24,13],gil:50,interv:[7,1,55],fork:[48,13],homogen:[48,24],medium:[35,48,56],form:[2,15,49,50,13,7,8,55,34,35,36,60,16],offer:[15,40],forc:[18,62,41,15,3,19,48,51,50,30,7,8,9,35,13,38,55],reassign:24,heal:[35,5],forg:49,authorized_kei:[15,59,49,50],heap:48,cidr:41,safeguard:50,"true":[27,13,48,4,28,6,50,49,7,8,56,9,34,35,37,23],lvmstrap:[15,35,56],reset:[18,41,15,3,13,7,8,35,59,61,23],absent:27,userland:[14,41],flavour:16,attr:[7,36],createinst:49,new_nam:[3,8],mebibyt:[40,27,13],max_node_failur:55,unpin:38,max_inst_spec:48,maximum:[44,29,48,21,50,49,7,8,31,55,9,35,13,15,24],tell:[15,3,48,49,50,13,56,14],url:[8,14,13],minor:[40,18,15,50,13,7,35,36,14],absenc:[36,49],rerun:15,unrel:[35,56,39],emit:[35,19],kernel_arg:[15,7],classif:[8,13],new_nod:15,alongsid:50,wrong_stat:8,classic:[29,55],hardcod:[34,35,29,55,50],"abstract":[41,15,6,50,35,36],decrypt:49,backoff:14,shelltest:28,exist:[2,3,4,6,7,8,9,62,14,17,18,19,20,21,24,26,27,29,31,33,34,35,36,37,40,41,15,48,49,50,13,55,56,58,59],aliv:[15,7,13],"528a":27,ship:[28,5,49,50,35],virtualsystem:16,check:[2,3,4,5,7,8,9,62,14,23,18,20,16,24,29,31,34,35,36,37,39,40,41,15,48,49,50,13,54,55,56,57,60],sticki:5,assembl:40,pipe:[35,14,48,50],readonli:[7,35],encrypt:[59,14,49,50],mem_hv:55,relax:35,when:[1,2,3,28,6,7,8,9,10,62,14,23,17,18,19,20,16,24,25,27,5,29,31,33,34,35,36,37,38,39,41,52,48,49,50,13,15,53,54,55,56,57,58,59,60],"__slots__":35,refactor:[11,35,40,2,17],connectionst:36,sda7:15,fakeroot:28,role:[40,2,52,5,50,13,15,8,35,36,16],sda3:56,sda1:[15,7],test:[4,28,6,7,16,17,21,51,56,30,31,35,36,38,15,49,50,13,52,27,57,58,60],tie:24,presum:[29,48,55],shrink:[35,8],node:[0,1,2,3,4,5,7,8,9,10,62,13,14,15,23,17,18,19,20,21,22,16,24,25,27,53,29,30,31,33,34,35,36,37,38,39,40,41,44,46,47,48,49,50,51,52,54,55,56,57,58,59,60,61],legitim:49,notif:[48,5,49,50,35,14],intend:[62,48,50,13,56,35,36],opprereqerror:[34,15,4,8],turnup:3,security_domain:59,felt:13,urllib2:8,instance_vcpu:3,aaeccc0dff9328fdf7967cb600b6a80a6a9332af:50,intens:56,intent:[15,35,59,55],consid:[1,11,5,7,8,9,14,16,19,55,29,35,39,40,41,44,48,49,50,13,15,24,27],easili:[2,52,48,49,50,30,7,8,31,54,55,35,34,58,36,13,60,24],forcibl:53,filetype_pem:23,subdivis:2,src_node_uuid:8,longer:[15,3,48,13,8,57,35,53,14,61,55],furthermor:[40,2,41,52,29,48,49,50,15,31,35,55],pseudo:49,cluster_ip_vers:3,rough:30,ignor:[18,15,3,19,29,28,6,13,7,8,55,24,35,58,53,48,14,38,16,39],ovfimport:16,time:[1,2,3,6,7,8,9,10,14,16,53,18,19,20,51,24,25,27,31,32,33,34,35,36,37,38,39,40,41,52,48,49,50,13,15,55,56,58,59,60],push:[15,2,49],backward:[11,41,6,50,13,8,27,35,53,14],corpu:17,ganeti_hooks_vers:3,concept:[40,15,3,29,48,50,13,60,24,39],rom:[58,35],chain:[46,4,54,6,21],skip:[48,50,13,56,35,16,39],global:[40,15,29,48,49,50,13,7,56,59,12,55,39],focus:[49,13],known:[2,19,48,13,35,34,58,36,60],signific:[52,35,31,55,13],computation:48,boot_ord:13,ignore_ipolici:8,start_node_daemon:22,gntdaemon:48,operatingsystem:16,row:14,hierarch:15,decid:[1,2,5,8,9,14,19,55,29,30,33,35,36,59,52,48,49,50,13,24,58,53,60],hold:[40,2,41,15,48,19,20,49,50,13,7,8,27,53,24],depend:[1,62,3,28,6,7,8,12,16,17,18,19,20,51,22,24,26,27,5,30,34,35,36,38,39,40,41,44,48,49,50,13,15,55,56,53,60],cloud1:40,setdiskid:35,destination_nod:15,decim:13,readabl:[2,62,48,50,16,34,35,23],worth:[58,48,55],plj0:50,std_spec:55,certainli:[14,6],decis:[40,25,48,59,53,55],mainloop:49,downtim:[15,48,13,7,35,39],sourc:[11,3,28,7,8,14,16,18,51,55,29,33,35,36,15,49,44,54,24,56,58,59],string:[62,3,8,9,16,19,21,22,55,51,33,34,35,36,38,40,15,48,49,50,13,27],congest:53,redist:[15,35,18,50],rpcrunner:13,instance_os_add:35,auto_promot:[35,8],feasibl:13,sendtocli:49,pgrep:15,broadli:29,condit:[2,48,5,49,50,13,55,9,35,60,24,39],join:[15,48,22,56,57,35,59,36],brows:17,readyst:8,rel:[41,15,6,8,49,22,50,35,59,14,61,16],enoperm:[15,60],set_subject:23,administr:[17,2,41,40,15,3,19,49,13,7,55,56,9,35,59,60,36,24],level:[5,6,8,10,12,21,24,29,30,31,33,35,36,53,40,41,15,48,49,50,13,55,56,59,60],did:[3,19,14,48,13],should_run:27,gui:33,placat:35,iter:[35,50,13],item:[18,15,8,9,34,35,36,16],m1tgej:15,unsupport:[13,16,9],relev:[28,49,50,13,56,36,60],d1fa:7,ndisc6:51,activitylog:36,unaccount:55,round:[35,55,50],dir:[18,15,19,50,31,56,35,58,16],prevent:[27,15,29,48,49,50,13,54,56,9,35,59,55,39],slower:[36,55],powercycl:[25,5,50,8,31,9,35,61],nic_mod:21,plu:[15,3,28,48,50,13,54,35,60],sign:[2,60,49,8,54,35,14,23],heavyweight:5,cost:[40,48,6,13,36,14,55],group1:8,unprotect:49,minimis:55,jobid:[15,8],port:[26,15,21,50,49,7,8,31,54,56,35,59,13,14,36],os_hvp:8,autobal:[18,16],replc:60,appear:[49,13,7,36,14,55],repli:[2,49],"64bit":[15,8],havoc:7,uniform:[35,13],current:[1,2,3,4,5,6,7,8,9,11,12,14,15,23,53,18,19,21,22,16,24,26,27,28,29,30,31,33,34,35,36,37,38,39,40,41,44,48,49,50,13,52,55,56,58,59,60,61,62],sinc:[1,2,3,4,6,7,8,9,12,14,16,18,19,20,21,55,31,33,34,35,36,38,39,52,48,49,50,13,15,56,58,59,61],wors:[35,53,60,13],reqv1:8,graph:[60,2,39],del:15,nodegroup:[40,41,15,3,19,8,27,35,12,60,39],acpi:[35,56],corosync:5,xml:[36,16,13],pose:[8,13],deriv:35,x509req:23,urlopen:8,gener:[62,28,6,8,14,23,18,16,24,25,27,29,31,33,35,36,40,41,52,48,49,50,13,15,55,56,58,59,60,61],unauthor:35,onli:[0,1,2,3,4,5,6,7,8,9,62,12,13,14,15,23,53,18,19,20,21,22,16,24,25,26,27,28,29,30,31,34,35,36,37,38,39,40,41,44,48,49,50,51,52,54,55,56,57,58,59,60,61],agreement:13,satisfi:[48,14,50],ganeti_forc:3,explicitli:[17,1,41,15,19,29,6,51,8,34,13,39],modif:[40,2,41,52,48,21,49,7,8,9,35,12,13,53,15,55],splice:[28,14],address:[3,5,7,8,14,51,55,56,31,35,36,40,41,15,48,49,50,13,24,27,58,59],waitdep:6,along:[35,18,13],popul:[59,48,41,50],wait:[2,13,15,3,20,6,50,49,7,8,48,35,53,37,14,60,55,39],nagio:36,netdev:[15,35,56],shift:[4,55],queue:[2,13,15,20,5,6,50,49,22,31,48,33,35,37],multicast:[15,56],throughput:50,behav:[35,50,24],healthi:[15,7,29,60,50],extrem:[31,16],reclaim:[29,48],unreason:13,ipc:[44,2],semant:[36,37,27,55,13],elect:13,repositori:15,extra:[40,48,19,28,49,50,13,51,31,27,35,12,60,36],tweak:[35,48,55,50],modul:[48,28,6,50,13,7,51,56,34,35,36,14],prefer:[19,48,21,50,13,8,27,14],forev:[56,13],gntrapi:48,total_memori:27,default_templ:48,instac:[27,56],visibl:[41,15,5,49,6,27,57,35,13,55],instal:[28,7,8,23,17,18,51,16,55,25,27,5,31,35,36,41,15,49,50,13,56,58,59],devel:[35,51],new_disk_templ:3,mobil:[40,62],regex:[15,35,51],jan:[15,35],useabl:[49,56],memori:[3,6,7,8,16,24,25,56,29,34,35,36,44,48,49,50,13,15,55,27,58,61],subvert:49,mnode:[7,8,19,56],instance_nicn_network_:3,linter:28,todai:[48,50,13],impos:[35,13],live:[15,2,44,29,48,6,50,51,7,8,31,56,35,13,60,38,55,39],handler:[14,50,30],value2:40,value1:40,criteria:29,msg:[8,50],scope:[41,19,13,56,36,55],vol_new_s:40,analyz:[35,36,13],plug:55,logfil:[50,13],idempot:[3,35],oob:[44,47,5,31,9,35],peopl:[35,13],"60mb":35,pylint:[35,28],finit:[4,24],ctype:[35,14,50],enhanc:[3,48,50,13,7,55,56,57,35,24],appendix:50,rigid:13,osparam:[8,49],examin:[15,7,48,56,13],outofsync:36,unseri:13,ebd1a7d1:15,effort:[52,2,24],old_secondari:3,getinstanceinfo:[49,50],voum:55,manpag:[56,15,32,27,35,39],filebas:50,opcomputecapac:48,instance_diskn_s:3,pyinotifi:51,local:[2,3,28,7,8,14,18,51,22,5,29,31,35,36,39,15,48,49,50,13,44,56,59],prepar:[25,15,22,50,49,7,8,9,35,13,14,16],cap:55,hextra:28,uniqu:[40,2,41,60,19,48,49,50,13,8,24,56,35,12,14,36,23],cat:[7,14,48,56,49],descriptor:[48,14,50],whatev:[15,3,51,29,50],nostartup:8,purpos:[15,48,49,50,13,56,35,16],logon:7,vfree:7,unavail:[34,35,2,61],nearest:[15,60],opresult:8,encapsul:[44,37,24],stream:[35,14,13],predict:[48,24],add_extens:23,socat:[35,51,14,38,49],sharedfil:[15,19,8,57,35,16],agent:[17,26,42,43,15,5,0,57,35,59,36],topic:[17,6,13,35,37,24],critic:[35,49,9],abort:[15,3,4,49,50,13,7,22,9,10,35,14],disadvantag:[52,13],physical_id:50,simul:[25,5,50,13,7,55],occur:[17,41,20,48,50,35,16],level_inst:53,alwai:[41,15,48,19,5,49,50,13,7,8,24,27,55,34,35,59,60,38,16],differenti:[34,35,21,13],chroot:[15,35,8,49,55],multipl:[2,3,5,8,10,16,17,18,24,25,29,31,35,36,53,41,15,48,49,50,13,54,55,27,59,60],a378:8,get:[1,2,4,28,6,7,8,9,12,14,53,18,51,24,26,5,31,34,35,36,38,40,52,48,49,50,13,15,56,59],uptim:36,"500gib":55,write:[25,2,15,3,49,28,22,50,6,8,31,54,48,55,35,36,13,14,23],till:[19,13],upgradeconfig:[35,50],anyon:[7,13],actual:[1,2,3,6,7,8,9,62,14,17,19,24,26,27,29,30,31,33,35,36,39,40,52,49,50,13,15,55,56,57,59,60],pure:[52,12,48,55,13],nodenam:[56,9],abovement:19,loopback:36,secread:36,map:[40,41,15,19,50,13,7,8,34,35,36,38,55],product:[50,51,5,49,56],mar:35,replace_new_secondari:[7,8,62],clone:5,partwai:13,spot:13,usabl:[40,2,50,13,7,35,14],intrus:56,membership:35,xzf:56,known_host:[18,48],mai:[62,28,8,9,14,16,18,24,5,30,35,36,38,40,41,44,48,49,50,13,15,56,57,58,59,60,61],drastic:60,underscor:3,secondari:[40,62,27,44,3,29,48,50,13,7,8,54,56,9,35,59,60,15,55,39],data:[2,3,5,6,7,8,9,14,15,16,18,51,22,0,55,26,27,29,30,31,33,34,35,36,38,41,44,48,49,50,13,52,54,56,57,59,60],grow:[40,2,15,3,48,13,7,8,31,55,35,24],"4a4139b2c3c5921f7e439469a0a45ad200aead0f":50,man:[17,44,19,13,15,8,32,56,35,38,55],repo:21,autotool:35,practic:[25,52,48,7,9,15,55],ssl:[49,50,30,8,54,35,59,13,14,23],devi:7,divid:[2,15,48,20,49,50,56],explicit:[1,29,48,49,7,33,35,14,38,55,39],acces:56,ganeti_instance_disk_count:3,inform:[1,2,3,6,8,53,62,14,16,17,18,19,21,51,24,25,27,33,34,35,36,38,40,41,15,48,49,50,13,54,55,56,58,59,60,61],"switch":[18,19,29,48,50,13,7,56,35,38],preced:[15,60,27,55],combin:[53,3,29,28,48,50,49,57,34,35,36,13,14,38,55],generate_kei:23,callabl:50,hvm_vnc_console_port:13,talk:[13,52,3,49,50,30,7,54,56,35,59,37,14,15,55],volume_group_nam:19,ovfread:16,tty:3,lsb:5,getmast:[15,35],virtualis:[15,31],op_cluster_verify_group:[3,35],os_typ:[15,8],graphviz:28,lst:15,existing_nod:15,equip:[36,41,9],still:[1,2,4,28,6,7,8,9,62,12,14,19,51,24,5,29,30,31,35,36,39,41,52,48,49,50,13,15,55,56,60,61],mainli:[15,17,51,13],seg_count:36,dynam:[44,48,21,49,15,31,35,13,55],entiti:[15,49,50,13,8,33,35,59,60,12,16],busi:[1,29,48,13,35,55],conjunct:[35,13],depmod:56,overhaul:35,monitor:[17,2,42,43,15,33,5,6,51,38,0,8,56,9,57,26,59,13,35,36],sata:56,polici:[62,27,3,48,8,55,56,35,59,60,24],invalid:[15,35,29,20,48],avil:59,f4e06e0d:27,nic0_mac:16,instantli:13,operatingsystemsect:16,platform:[51,16],window:[7,36],req_submit_job:52,rconfd:2,mail:[8,55],main:[2,52,19,48,22,50,49,15,8,55,56,35,53,13,61,16],keyout:49,confin:62,non:[3,5,6,7,8,14,23,19,51,16,55,26,27,29,31,34,35,36,39,40,15,48,49,50,13,52,56,59,60,61],initd:[51,56],synopsi:35,hop:49,dmtf:16,smarter:50,remote_node_uuid:8,initi:[1,11,3,5,6,7,14,17,19,22,55,25,30,31,52,35,36,39,40,15,48,49,50,13,44,56,59],"28676f0516c6ab68062b":49,pkill:15,therebi:[35,48],half:[55,13],nov:35,now:[1,5,6,7,8,10,12,16,19,24,27,34,35,36,39,15,48,49,50,13,52,54,55,56,57,60],discuss:[13,52,60,6,50,49,55,34,37,14,16,39],nor:[40,52,48,49,50,15,55,58,12,14,24],introduct:[40,25,11,41,27,52,3,20,6,7,8,31,56,48,15,55],wont:24,term:[17,11,41,40,44,29,48,49,52,51,56,9,14,15,55],workload:[35,49],name:[2,3,4,28,6,7,8,9,10,62,12,14,23,18,19,20,51,22,16,24,25,26,27,31,33,34,35,36,53,40,41,42,43,15,48,49,50,13,54,55,56,57,58,59,61],readfil:49,nil:13,config:[1,2,3,28,7,8,12,16,18,19,20,21,51,31,35,36,15,48,50,13,56,58],dmsetup:7,revert:[18,49,13],crypto:[35,51,48,54,23],separ:[2,3,8,9,14,16,19,22,24,25,27,29,30,31,34,35,36,38,52,48,49,50,13,15,54,55,56,59,60,61],keypair:7,januari:[36,33],hijack:35,csr:[14,23],compil:[2,28,51,31,56,34,35],failov:[40,62,15,3,29,5,21,50,13,7,8,31,56,35,53,37,60,38,55,39],domain:[40,1,62,15,48,13,7,8,54,56,35,14,38,24],replai:[3,50],c_mine:50,citat:39,agil:55,replac:[11,3,5,7,8,62,14,17,18,19,24,27,30,34,35,36,39,40,41,15,48,49,50,13,55,56,59,60],individu:[52,48,29,28,49,50,13,15,8,35,60,55,39],continu:[18,15,48,29,34,22,50,30,7,8,60,10,59,13,14,36,24,39],ensur:[18,2,31,62,15,60,19,48,21,50,49,8,1,35,58,13,14,55,41],target_nod:[15,3,8,35],rt_tab:41,vnet_hdr:35,request_queu:36,oikonomopoulo:35,backport:[35,49],significantli:[29,48,6,50,35,55],network_tag:3,resourc:[1,11,4,5,8,14,16,17,19,55,26,29,34,35,36,41,15,48,50,13,24,56,53,60],happen:[60,1,2,15,48,4,5,49,50,38,7,8,36,24,55,59,13,14,19,16,39],cmdlib:[57,40,18,20,13],os_vers:49,subnet:[15,8,41],shown:[48,6,49,7,51,27,53,13,14],accomplish:[15,48,50,56],referenc:[35,19,48,6],"3rd":14,space:[11,3,8,16,17,19,24,56,29,35,36,41,42,43,15,48,49,50,13,55,27,57,58],hypervisor_nam:16,profil:28,mess:9,foostor:40,rational:[31,29,36,55,13],internet:56,correct:[18,15,60,48,49,13,1,55,56,35,59,14,24],cnode:8,necessarilli:50,timeread:36,z0ju:15,migrat:[62,3,5,7,8,14,16,18,51,24,35,38,39,40,15,48,49,50,13,55,56,57,53,60],contain:[2,3,28,6,8,62,14,16,18,19,20,22,24,25,27,29,30,34,35,36,37,39,40,41,15,48,49,50,13,54,55,56,58,59,61],libpcre3:51,"1023m":19,compulsori:[58,16,13],dataloss:50,mime:14,theori:[18,13],debootstrap:[56,15,3,50,7,27],oplog:8,org:[44,15,51,35,14,38],"byte":[36,14,49],expandnam:[20,13],card:[15,16,13],care:[40,2,15,48,4,29,5,6,50,49,56,9,34,13,14,60,39],ganeti_instance_disk0_mod:3,suffici:[38,55],network_gatewai:[3,41],badli:[4,5,55],prescrib:36,frequenc:35,op2:37,suid:14,deconfigur:15,ssl_ctx_load_verify_loc:54,refus:[15,49,50,13,7,55,33,10,14,24],recov:[15,13,7,9,59,60],libcurl4:51,vmlinuz:[7,56],waitforjobchang:[49,13],place:[62,41,15,48,19,28,49,7,31,27,34,35,36,14,55],propos:[1,2,4,5,6,11,12,14,23,19,21,22,24,30,31,33,34,36,37,39,40,41,52,48,49,50,13,55,57,53,60,61,62],get_pubkei:23,router:7,technolgi:19,summari:[35,8,36],think:[52,13,24,56,60,55],frequent:[1,48,38,50,49],op_network_connect:3,oper:[1,2,3,4,5,6,7,8,9,62,12,14,16,20,24,25,27,29,30,31,33,34,35,36,37,39,40,41,44,48,49,50,13,15,55,56,58,59,60],suspend:[15,35,60],reimplement:35,redhat:7,carri:[1,21,27],onc:[25,13,52,60,4,29,48,6,8,30,7,22,50,56,34,35,36,49,14,15,39],arrai:[8,36],ssconf_cluster_tag:5,yourself:3,reopen:35,suspens:[15,60],fast:[53,36,49,50,55],rebal:35,oppos:[44,29,49,50,13,15,35,36,55],open:[17,2,52,23,48,21,50,13,7,8,45,16,35,58,36,14,11],lexicograph:3,size:[3,7,8,14,16,18,24,25,30,31,34,35,36,38,40,41,43,15,48,49,50,13,55,27,58,53],op_group_renam:3,br100:41,given:[2,3,5,6,8,62,14,16,18,20,21,22,55,29,31,35,36,40,41,15,48,49,50,13,27,58,53,60],breviti:7,silent:[15,35],convent:3,kvm_extra:35,netparam:[41,50],download:[15,14,56,30],silenc:35,necessarili:[40,20,48,55],draft:[17,11],averag:[36,24],internal_error:8,conveni:[58,50,9],libc:35,includ:[1,2,4,28,7,8,14,16,17,19,51,22,0,25,26,27,5,29,30,31,33,34,35,36,40,44,48,49,50,13,15,54,56,57,58,59,60],attach:[40,44,3,7,60,56,33,36,14,15],speedunit:36,cope:1,copi:[18,2,15,3,29,51,50,49,7,22,25,54,61,56,35,13,14,60],gch6:28,specifi:[1,3,6,7,8,10,14,16,19,21,22,24,26,27,33,34,35,36,40,41,15,48,49,50,13,54,56,59],blacklist:[35,8,49],netmask:[3,8,41,56,35],mostli:[48,28,50,55,35,14,16],date:[18,2,15,48,7,35,36],floppi:35,custom_hvparam:8,holder:50,than:[1,2,5,6,7,8,12,14,15,23,19,22,16,24,27,29,31,33,35,36,38,39,40,52,48,49,50,13,44,55,56,60],serv:[2,52,29,30,7,59,13,14],wide:[2,15,19,29,48,22,50,30,8,27,9,35,59,13,14,61,55,39],constantli:[36,50],drbd0:[7,35],e57f:7,cfgupgrade12:[15,35,18],ganeti_clust:3,param2:40,param1:40,were:[3,4,7,8,14,23,19,22,55,29,30,33,34,35,36,37,39,15,48,49,50,13,54,59,60],posit:[8,27,13],nuisanc:27,browser:[8,49],pre:[40,18,26,41,3,29,8,35,55,39],analysi:[36,1],sai:[3,24,7],bootstrap:[48,51,49,22,56,35,14],san:[40,29,48,56,13],nicer:[35,13],on_shutdown:1,pro:6,argument:[40,25,15,3,28,6,49,7,8,48,27,35,36,13],dash:50,deliv:[49,50],cloop:16,recover:[15,60],"20m":50,disk_stat:[8,55],bitwis:36,engin:30,squar:55,alloc_polici:[62,8,48,27],recompil:56,destroi:[1,2,15,3,48,49,13,7,14],moreov:[5,48,49,50,13,60,55],note:[11,3,28,6,7,8,9,62,14,16,17,18,19,51,55,27,5,29,34,35,36,38,39,41,52,48,49,50,13,15,56,57,58,59,60],sendfil:14,emphasis:55,curent:50,take:[1,2,3,4,5,6,7,8,9,62,14,19,24,25,27,29,31,34,35,36,53,40,41,15,48,49,50,13,55,56,59,60],ganeti_op_cod:3,enabled_hypervisor:[8,27],similiar:27,hslogger:[35,51],noth:[48,19,29,5,50,13,35,59,39],channel:59,auto_balanc:16,begin:[2,49,13,22,30,14,55],sure:[18,15,4,49,5,6,50,38,7,51,54,55,56,57,35,59,13,60,36,24,39],pain:[49,13],streamoptim:16,normal:[18,62,13,15,4,29,48,6,50,49,34,8,31,56,10,37,60,24],track:[1,2,29,49,50,31,55,33,35,24],compress:[29,49,50,13,51,35,58,14,16],clearer:13,paid:20,configwrit:[20,41,50],beta:[35,18],pathlen:23,i_pri_up_memori:27,ethernet:[21,56],pair:[29,48,13,56,35,24],ghci:28,unalloc:[7,8,48,49],disk_templ:[15,19,13,8,27,16],allevi:49,renam:[40,2,15,3,19,48,6,50,49,7,8,31,54,10,35,59,13,12],correctli:[1,29,28,49,50,13,7,31,56,35,58,36,24],adopt:[15,35,19,55,40],drive:[44,13,15,56,35,58,36,55],spill:13,op_instance_change_group:3,getjob:35,"0x93":7,runtim:[15,48,49,13,7,8,9,35,55],fillb:13,lenni:[58,56,16,50],destroyx509cert:49,effect:[53,15,29,48,21,50,49,7,8,55,56,58,12,13,38,24],salt:[49,50],hmac:[48,2,49,50],gracefulli:[35,1],sda:[18,13],disk0_dump:16,newer:[18,15,51,8,1,56,57,35],"6a430034bf43":8,show:[40,18,26,15,48,49,50,30,7,1,16,56,55,35,36,13,14,23],opportunist:[0,35,8,53],atom:[52,36,37,50,13],serialis:[35,48],f5a277e7:8,unprocess:48,poweroff:[1,5],concurr:[2,48,20,13,7,55],permiss:[18,15,48,49,13,8,35,59],hack:49,corner:[35,4],pull:56,retrocompat:[20,50],fifth:35,ground:8,slot:[35,12,41],master_cap:[15,3,8,48,35],singlenotifypipecondit:35,slow:[28,48,49,13,8,35,59,61,55],ratio:[44,48,8,27,35,55],new_alloc_polici:3,fenc:5,load_certif:23,brigd:13,imbal:29,proportion:24,pump:14,iv_nam:35,dict:[4,13,8,27,35,55],jqueue:[20,49,50,33],harep:[15,35],curlopt_http_content_decod:14,offici:56,sighup:35,query2:8,variou:[17,15,48,49,50,13,56,34,35,36,55],diskparam:[8,50],allocat:[15,8,19,24],ganeti_instance_memori:3,newkei:49,"128mb":55,op_cluster_post_init:3,ssh:[25,15,49,48,8,50,51,7,22,31,56,35,59,13,61],wheezi:[51,28],primary_nod:[7,50,13],mem_nod:55,requir:[1,2,3,28,6,7,8,11,13,14,15,16,53,18,19,21,22,23,24,25,27,5,29,30,31,33,34,35,36,37,38,39,40,44,48,49,50,51,52,55,56,58,59,60],cif:57,mapper:[15,7,36],wconfd:2,output_field:8,lv_name:15,seldom:[15,48],borrow:40,irrespect:55,data_flush:50,bugfix:[35,18],"1gib":56,overwrot:35,where:[1,62,3,4,28,6,7,8,9,12,16,19,51,55,27,29,33,35,36,38,40,15,48,49,50,13,52,56,53,60,61],nic_mode_ov:21,wiki:[44,35,15,30],kernel:[44,49,13,7,51,55,56,57,35,36,15,24],caller:[61,50],shutdown_timeout:[3,8],burst:[21,24,55],bitarrai:[51,41],iprout:51,nodee1:27,assumpt:[40,29,55,37,13],drbd1:7,hierarchi:[48,50],drbd3:7,add_reserved_ip:8,drbd7:[18,13],dom0:[36,55,56],drbd8:[7,13,56,35],impexpd:35,reiter:34,xenserv:16,concern:[35,19,36,41],instance_tag:[3,35],detect:[17,1,11,41,5,51,13,8,35,60,55],kei:[28,7,8,12,14,23,17,18,19,22,16,55,27,34,35,36,41,15,48,49,50,13,54,56,59],rado:[15,35,19,56],tarfil:16,review:[2,13],forecast:19,cpu_mask:[35,38],enough:[2,7,8,9,62,14,16,19,24,29,30,31,33,35,36,38,39,15,48,49,50,13,55,56,58,60],"172k":7,between:[2,4,5,6,7,8,13,14,15,16,17,18,21,22,24,27,29,31,34,35,36,39,40,41,52,48,49,50,51,44,54,55,56,57,58,59],timeio:36,getinstancelist:50,across:[17,62,41,40,44,60,29,48,49,13,15,8,27,35,12,14,36,55],spars:16,hv_vnc_password_fil:50,mtl:51,parent:[35,25,48],node_nam:[15,3,8,35],screen:7,disk_overhead:55,cheap:29,drbdn:15,cycl:[52,35,31,15,9],bitset:36,come:[44,19,28,21,50,49,15,51,55,57,35,59,13,16,39],domu:56,home_nod:15,uuid:[62,41,42,43,15,48,50,7,8,27,35,36,12],reaction:3,mond:[51,2,36],img:[28,51,56,35,58,16],githash:36,blockdev_prefix:35,quiet:35,lugroupverifydisk:53,technolog:[19,14,50,30],tutori:[17,15,7,51,56,35],present:[1,2,3,6,7,16,17,55,26,29,31,35,36,39,40,41,15,13,27,58,59,60],idisk_pool:40,abc123:8,"0020fc1ed55d":56,mani:[2,5,6,23,22,51,24,27,29,31,34,35,37,38,40,44,48,49,50,13,15,55,56,53],starvat:[48,13],threads1:48,among:[2,41,15,27,48,49,13,56,35,16],netutil:48,threads2:48,hvm:[18,44,50,13,8,56,35],threads4:48,threads6:48,color:39,overview:[15,40,13,16,9],unittest:[35,31,28],disonnect:8,period:[48,50,13,7,10,36],dispatch:33,exploit:[36,49,56],featur:[1,4,8,16,17,18,51,24,29,34,35,36,40,15,48,49,50,13,54,55,56,59],colon:[35,38],memfre:34,exclud:[35,49,24,9],cancel:[52,6,13,15,8,35,37],user2:15,poll:[48,50,13,8,35,14],poc:50,damag:[35,50,9],needlessli:[55,39],software_vers:8,ultim:[36,13],"7046452df2cbb530877058712cf17bd4":8,clust:35,hd0:15,altogeth:39,size_unit:36,mark:[1,15,19,28,50,13,7,8,34,35,60,38],pend:[48,6,50,49,36,60],meant:[15,51,2,6],covert:16,addit:[2,3,4,28,6,7,9,62,12,14,16,53,18,19,21,51,24,27,29,30,34,35,36,38,39,40,41,15,48,49,50,13,55,56,58,59],nc2c:7,procedur:[40,18,2,41,15,3,29,51,13,7,31,1,56],gmtime_adj_notaft:23,cpu_tot:[36,55],thousand:31,resolut:[15,48],suscept:13,oposdiagnos:49,catastroph:35,wake:50,vmcapabl:15,src_cluster:49,unreleas:35,instance_hypervisor:[3,50],former:[25,2,48,13,8,35,55],those:[53,1,62,15,60,19,48,49,50,13,8,55,9,58,12,35,36,24,39],instance_nicn_network_subnet:3,"case":[1,2,3,4,6,7,8,9,62,14,23,18,19,20,51,16,24,27,29,30,31,34,35,36,37,38,40,41,52,48,49,50,13,15,55,56,58,53,60,61],instance_nicn_bridg:3,"50g":15,mount:[15,7,31,56,40],keygen:56,invok:[15,19,9,22,56,33,10,35],hdd:8,cannot:[15,21,50,13,51,55,56,35,58,12,60,16],transfer_id:14,invoc:[15,4,54,56],vncpasswd:56,margin:24,anotherdatacollector:36,"07805e6f":15,misbehav:52,suse:16,advantag:[40,52,4,50,30,31,55,56,13,38,16],stdout:[40,3,49,13,27,9,36,14],metric:[35,36,29,55],henc:[52,49,15,55,57,24],uncov:35,destin:[15,49,50,7,8,54,35,14,24],cluster:[0,2,3,4,5,7,8,9,10,12,14,15,23,17,18,19,51,22,16,24,25,27,53,29,30,31,34,35,36,38,39,40,41,44,48,49,50,13,52,54,55,56,58,59,60,61],eras:[15,56],ambient:9,ganeti_instance_statu:3,ascii:[35,13],parsec:51,csocket:8,subscrib:49,nonetheless:50,source_x509_ca:8,develop:[17,20,28,50,13,56,35,59,36,24],author:[17,11,15,3,49,30,22,54,9,14,23],alphabet:[20,50,13],kvm_vnc_console_port:13,outils_unittest:35,queryexport:35,same:[1,2,3,28,6,7,8,10,62,14,23,53,18,19,20,21,22,16,24,25,27,29,30,31,33,34,35,36,38,39,40,41,52,48,49,50,13,15,54,55,56,59,60],binari:[15,28,36,14,41],threade5:48,threade4:48,threade3:48,threade2:48,threade1:48,autocr:8,pai:[7,48],document:[1,2,3,28,6,7,8,9,10,11,12,14,16,17,18,20,21,24,25,27,5,30,31,32,33,35,36,37,38,39,40,41,42,43,15,45,46,47,48,49,50,13,0,54,55,56,57,59,60,62],config_vers:8,max_memori:55,cert_pem:49,byterang:14,exhaust:[48,49],xen_blkfront:56,finish:[18,2,13,15,60,48,6,50,49,55,27,57,36,37,14,24,39],webserv:[8,14,30],asymmetri:13,utf8:51,nest:55,op_instance_set_param:3,driver:[40,44,7,8,56,57,35,36,55],someon:7,snode:[35,4,8],decompress:14,driven:48,unhandl:7,luinstanceconsol:48,nas1:40,improv:[2,48,29,5,21,50,49,56,35,36,13,14,55],hv_state:[8,55],tix:28,postpon:13,ipaddress:16,stdin:[3,49,13],drain:[15,3,5,48,50,13,7,8,27,9,35,37,60,39],cherri:35,inconsist:[52,19,13,7,8,35,15,55],markup:36,pep8:[35,28],waitoutput:49,without:[2,7,8,9,14,16,18,20,21,24,25,29,31,33,35,36,37,39,40,15,48,49,50,13,54,55,56,59,60],op_instance_replace_disk:[3,62,6],model:[17,11,41,52,40,44,29,48,49,50,13,7,62,31,23,55,35,59,14,15,16],roughli:[53,13,55,30],zonelist:7,execut:[2,3,6,7,8,9,11,12,15,17,18,20,22,0,55,31,33,34,35,36,37,38,39,40,41,42,43,44,48,49,50,13,52,56,57,59,60,61,62],pkglibdir:35,maintain_node_health:[15,35,8],unarchiv:13,threads3:48,rest:[1,15,20,49,50,13,8,56,9,35],bitmap:36,disk0_ivnam:16,unclear:[2,13],kill:[15,5],op_instance_cr:3,aspect:[44,57],part:[2,3,28,6,9,12,14,23,17,18,19,22,51,16,24,29,33,34,35,36,37,43,44,48,49,50,13,15,55,27,57,59,60],touch:[15,37,60,50,13],threads5:48,flavor:56,speed:[15,50,13,56,35,36,53,55],inet6:56,versu:[52,29,48,49,35,24],gibibyt:13,temporarili:[2,15,60,35,36,14,39],miscellan:[35,49],stai:[34,15,2,60,50],flood:35,xenu:[7,18,56],op_node_remov:3,except:[3,6,7,9,23,18,51,55,29,34,35,39,40,43,52,48,49,50,13,15,57,59],param:[3,19,38],paral:2,identif:[52,50],priori:55,bigger:[15,48,50,13,31,35,14,55],exercis:[15,7],verifysignedx509cert:49,xend:[50,56],disrupt:34,pragma:14,real:[5,49,38,7,8,31,36,13,59,55],around:[41,13,4,5,6,50,49,7,34,35,37,14,38,55,39],use_certif:23,read:[2,3,28,6,7,8,14,23,20,16,27,5,29,31,35,36,52,49,50,13,15,54,56,60],res1:8,xendomain:56,suppos:[36,1,50,13],deepseq:51,op_id:[62,6],envis:[48,50],mon:[15,7,56,35],loos:[4,16],world:[44,33,5,13,9],unsign:13,oldnam:48,network_port:[8,13],seg_start_p:36,confd_protocol_vers:50,whitespac:[34,8,49],realtim:38,sendfile2:14,rlib2:[8,33],integ:[48,6,50,13,8,27,33,36,38],roll:[15,35,39],benefit:[57,14,2,48,49],either:[1,3,7,8,10,14,17,18,51,24,27,29,30,31,35,36,38,39,41,15,48,49,50,13,55,56,58,60,61],oversubscrib:[60,55],output:[1,28,6,7,8,9,10,14,16,19,26,27,34,35,36,15,48,49,50,13,54,56,58,59,61],downward:48,inter:[62,3,29,48,49,50,13,51,54,35,59,14,23],ovfwrit:16,manag:[2,5,7,8,9,11,16,17,19,21,51,55,30,31,35,36,39,40,41,42,43,15,47,48,50,13,0,56,60,61],fulfil:[48,2,6,49],"512m":[15,7,27],deduct:55,grub:[15,58,56],allow_runtime_chang:8,udev:[40,56],showmap:35,handshak:[8,13],default_hypervisor:8,lvremov:[15,7],level_nodegroup:53,instance_nicn_network_gatewai:3,hscolour:28,constitut:13,blacklisted_o:8,nonzero:36,freez:14,writedata:14,accommod:7,remove_inst:[3,8],qemu:[28,51,35,57,58,16],characteris:55,definit:[40,18,27,15,3,4,29,8,50,49,22,25,56,34,35,13,14,55],recomput:16,legal:24,opclustersetparam:[35,49],"84g":7,exis:41,exit:[15,29,48,49,22,54,27,34,35,61],fdsk:55,os_api_vers:[8,13],network_subnet:[3,41],einval:35,complic:[15,29,50,13,7,31,14,38],spindle_us:[35,24],refer:[17,27,40,44,3,29,6,50,51,15,8,24,56,35,57,58,12,13,55,36,16],extra_pkg:56,kvm:[1,15,49,48,21,50,51,8,55,56,57,35,59,13,38,24],compris:[15,29,2],power:[5,8,31,9,35,60],sixth:35,scarc:24,inspect:[15,19,50],broker:13,remote_nod:[8,62],unsuccessfulli:50,broken:[15,19,49,50,7,35,59,60],streightforward:58,fulli:[41,13,15,29,5,21,50,30,55,56,57,35,49,60,24],regexp:[34,3,15,39],op_backup_export:3,appli:[40,1,15,29,5,49,13,51,31,9,35,53,60,55],"throw":7,earlier:[18,15,29,30,8,35],comparison:[3,13,35],reservemac:35,opnodequeri:50,coupl:[15,4,22,56,35,55],tailor:[19,13],device_model:35,sinst:[7,19,56],ndparam:8,degre:50,srv:[15,13,51,56,58,16],stand:[44,35,36,41,50],act:[40,25,15,48,5,13,31,36],instance_n:12,luck:5,backup:[18,15,50,35,58,16],processor:[20,48,49,13,37,55],effici:[41,13,8,57,36,14,16],synchron:[40,6,50,49,8,34,35,36,13,53],seg_start:36,lastli:35,ignore_s:8,nic0_mod:16,contrib:51,surviv:55,seg_siz:36,runrenamescript:49,multinod:48,strip:[35,55],"import":[11,3,6,7,8,14,23,17,18,51,16,55,29,30,31,35,36,15,45,48,49,50,13,54,56,58,59,60],remoterol:36,your:[18,15,3,5,51,7,8,55,56,35,58,59,16],complianc:35,area:[29,48,49,50,13,35,36,37],aren:24,overwrit:[59,48,49,50],start:[1,2,3,28,7,8,10,14,16,17,18,19,51,22,24,25,5,30,31,33,34,35,36,37,38,39,44,48,49,50,13,15,53,54,55,56,57,59,60],instance_be_x:3,interfac:[2,3,4,6,7,8,62,14,15,21,0,24,25,27,30,33,34,35,36,37,53,40,41,44,48,49,50,13,52,55,56,57,59],low:[52,48,13,56,35,59],lot:[41,4,49,6,50,51,34,8,10,35,53,13],ipv6:[41,48,51,7,8,35,14],clusterip:15,submiss:[52,48,6,13,15,35,37],ganeti_data_dir:3,old_primari:3,machin:[1,28,7,8,9,14,16,17,51,22,24,25,27,5,31,35,36,38,39,40,44,48,49,50,15,54,55,56,59],apollon:35,unam:56,disk_count:[55,16,13],media:14,from:[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,27,28,29,30,31,33,34,35,36,37,38,39,40,41,44,46,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62],hard:[15,44,29,13,7,8,35,58,12,36,55],tupl:[26,15,19,48,22,50,6,8,27,33,34,35,36],hroller:[35,43,42,39],jun:35,level_nod:55,amongst:[31,48],state_reason:36,tspec:48,htpasswd:13,diffus:48,crontab:35,ganeti_instance_disk0_s:3,categor:13,untag:35,faster:[48,13,35,53,14,36],example_monitor_host3:56,notat:41,example_monitor_host1:56,node_uuid:8,algorithm:[15,29,50,13,8,55,27,35,53,24,39],possibl:[1,2,28,6,7,8,9,10,62,14,23,19,20,16,24,29,30,31,33,34,35,36,37,53,39,40,52,48,49,50,13,15,55,56,57,58,59,60],"default":[1,28,7,8,9,10,23,53,18,19,21,51,16,55,26,27,34,35,36,38,41,15,48,49,50,13,54,56,58,59],network_link:8,master_netdev:[3,8,22],bucket:29,jailbroken:59,fake_id:50,oblivi:44,vhd:16,embed:60,deadlock:[37,20,13],expect:[1,3,6,9,14,23,55,56,29,31,34,35,36,38,40,41,15,48,50,13,27,58],gone:[15,35,36],nic1:34,findmatch:34,creat:[0,1,2,3,4,7,8,9,14,23,18,19,20,21,51,16,24,25,27,31,35,36,53,39,40,41,52,48,49,50,13,15,54,55,56,57,58,59],certain:[17,25,62,41,15,60,48,49,16,30,31,23,56,33,10,59,13,55,36,24],lint:28,cpu_count:55,deep:[52,13],watcher:[18,25,44,48,49,50,13,15,31,1,56,10,35,60],strongli:59,deem:[60,20,49],decreas:[35,48,49,13],file:[1,2,3,4,28,6,7,8,10,14,23,17,18,19,20,22,16,55,25,5,29,30,31,33,35,36,38,39,40,15,48,49,50,13,54,56,57,58,59,60,61],proport:15,intra:[48,49,13,7,59,14],fill:[55,41,16,13],incorrect:23,again:[3,4,28,7,8,10,18,20,21,22,29,34,35,39,15,48,49,50,13,27,58,60],googl:[15,7,14,56],op_cluster_destroi:3,extract:[35,36,2],hybrid:[48,55],prepend:[49,33],field:[1,41,15,4,48,50,13,8,9,34,35,36,37,16],wai:[1,2,3,28,6,9,10,62,12,14,19,22,51,24,25,5,29,31,33,34,35,36,38,39,40,52,48,49,50,13,15,55,56,57,59,60,61],compet:[53,24],hvm_nic_typ:13,export_devic:13,writabl:27,you:[17,28,18,15,3,4,5,51,50,13,7,8,54,55,56,9,10,58,59,35,16],architectur:[11,15,6,50,49,8,13,55],poor:13,collector_nam:[15,36,26],prereq:41,sequenc:[2,62,48,28,13,51,35,38,55],call_:13,on_poweroff:1,vertex:39,fsck:[3,7],disk_siz:[15,55],monadcatchio:51,networkless:41,reproduc:49,"_bridg":13,unless:[55,15,29,48,22,6,8,31,54,24,56,35,58,49,14,38,16,39],pool:[40,41,15,19,29,48,49,50,13,8,55,56,57,35,59,24,39],drbdadmin:55,reduc:[52,49,48,6,50,30,7,35,53,13,15,55],bulk:[15,4,0,8,27,35],set_pubkei:23,escal:[59,49],directori:[3,28,8,10,16,18,19,51,25,27,31,35,40,41,15,48,49,50,13,54,56,58,59],descript:[17,25,48,13,8,31,55,27,35,34,58,16],stopinst:49,vcpu:[15,44,29,48,13,7,8,16,27,55,35,38,24],week:[15,23],potenti:[40,5,29,28,6,50,8,48,55,35,36,16],escap:35,degrad:[7,2,35],gmtime_adj_notbefor:23,unset:[15,35,49],internod:15,represent:[34,36,41,50,13],all:[1,2,3,4,5,6,7,8,9,10,62,13,14,15,23,17,18,19,20,21,22,16,24,25,26,27,28,29,31,33,34,35,36,37,38,39,40,41,43,44,48,49,50,51,52,53,54,55,56,58,59,60],mountpoint:[15,40],dist:[35,18],consider:[11,14,2,38,55],op_master_failov:3,disk_:13,ssconf_master_candidates_ip:35,forbidden:13,instance_new_nam:3,mnt:15,fragment:16,lacp:21,correl:48,abil:[40,5,6,49,31,56,34,35,59,13,12],follow:[1,2,3,5,6,7,8,9,10,62,12,14,23,17,18,19,20,51,16,24,26,27,28,29,30,33,34,35,36,38,39,40,41,42,43,15,45,46,47,48,49,50,13,0,54,55,56,57,58,59,60],disk:[11,3,5,6,7,8,12,14,16,17,18,19,0,24,25,27,29,31,34,35,36,53,39,40,42,43,52,48,49,50,13,15,55,56,57,58,59,60],children:[48,50],abid:35,batcher:35,testnet1:41,rewrot:35,nodea:7,dest_cert:49,nodec:15,nodeb:7,tail:7,program:[25,27,15,49,22,30,8,31,51,56,9,34,35,14],optimis:[35,13],reload_ssh_kei:28,queri:[1,2,6,8,9,11,12,17,19,0,24,26,31,34,35,36,42,43,52,47,48,50,13,15,55,59],megabyt:[34,8,14,13],neglig:14,introduc:[2,4,6,14,16,19,24,25,29,31,33,34,35,37,40,41,52,48,49,50,13,15,55],getrsakeypath:49,queryfield:[34,35],liter:26,straightforward:[48,28,49,13,55,14,24],determinist:48,fals:[15,3,49,50,13,7,8,27,9,34,35,53,37,23],opinstancequerydata:49,ganeti_instance_be_auto_bal:3,offlin:[1,15,3,29,5,6,50,13,7,8,27,9,34,35,36,60,53,55,39],util:[17,15,48,28,49,51,56,34,35,36,14],rapi_us:[35,48,50],candid:[2,52,48,29,5,22,50,30,7,8,27,9,35,36,13,60,15,23],worst:[29,49],fall:[17,13,35],esplicitli:51,bottleneck:13,strang:55,enviroment:40,bridge_stp:56,ssl_client_s_dn_cn:30,list:[1,2,3,4,28,6,7,8,9,62,14,23,17,18,19,51,22,16,24,25,26,27,29,30,31,33,34,35,36,37,38,41,52,48,49,50,13,15,54,55,56,58,59,60,61],brctl:56,hyp:16,emul:[44,35,5],sane:9,stderr:[3,35,40,13,9],node1:[25,27,15,3,50,7,8,23,56,9,34,58,16],node3:[15,3,7,8,27,9,34],parallelis:13,node5:[3,9],mcl_current:50,enterpris:16,dfree:[7,8,19,56],tag1:[8,16],infinitesim:55,tag3:[8,16],drbd:[2,3,7,8,18,19,51,0,24,27,29,35,36,53,39,40,15,48,49,50,13,44,55,56,59,60],sync:[40,15,4,48,49,50,13,7,8,9,35,36,39],disconnect:[3,8,48,41,40],syslog:35,zero:[40,3,8,54,27,33,34,35,36,14,53,55],pressur:24,design:[0,1,2,4,5,6,8,9,10,11,12,14,23,17,19,20,21,22,16,24,25,30,31,33,34,35,36,37,38,39,40,41,42,43,52,45,46,47,48,49,50,13,15,54,55,27,57,58,53,60,61,62],pass:[2,3,28,7,8,9,14,19,51,22,24,27,30,33,34,35,36,37,53,40,41,15,48,49,50,13,54,55,56,57,59,60,61],extp_param1:40,extp_param2:40,suboptim:[35,55],nbd:40,deleg:[15,29,55,30],default_ialloc:8,sub:[15,3,13,8,56,35,36,61],"11de":50,sun:15,sum:[29,48,16,35,36,55],abl:[1,2,5,7,9,62,12,20,21,24,25,30,31,34,35,36,53,39,40,41,15,48,49,50,13,52,55,56,59,60],overload:55,ssconf_node_secondary_ip:35,pnode:[35,4,8],abbrevi:15,version:[1,2,3,4,28,7,8,11,14,23,17,18,19,21,51,16,24,25,26,27,29,30,34,35,36,39,15,47,48,49,50,13,55,56,58,59,60],consecut:[50,38,16],ioflag:36,method:[8,14,19,21,55,29,30,31,34,35,36,38,40,41,44,48,49,50,13,15,56,59],contrast:[15,13],movement:48,millisecond:36,hasn:[3,48],fping:[7,51,35],hash:[35,8,36,23,49],a8d6:7,variat:[15,50],unmodifi:34,nic_vlan:21,lack:[41,52,4,50,13,55,35,53,60,16],modular:13,shouldn:[3,6,49,7,8,31,55,33,53,24],middl:50,morfeo:16,trunk:21,eval:8,ver:56,standard:[17,22,13,15,3,29,6,51,7,8,24,55,35,36,49,14,16],modifi:[1,2,3,28,7,8,9,12,17,18,19,20,21,24,25,5,29,31,33,35,38,39,40,41,15,48,49,50,13,54,55,56,57,59],valu:[1,3,6,7,8,9,12,16,18,19,21,22,24,25,26,27,34,35,36,37,41,44,48,49,50,13,15,54,55,56,58],evaltotru:8,search:[17,15,28,50,27,35,39],tweakabl:50,ahead:[15,48,55,50],nth:13,reason:[2,5,6,8,9,10,62,14,16,17,18,19,22,24,25,29,30,31,33,34,35,36,37,39,40,42,43,52,48,50,13,15,55,56,57,58,61],oper_vcpu:8,mac:[27,3,13,7,8,55,56,35,16],prior:[35,50],amount:[2,3,8,9,14,16,19,55,29,34,35,36,44,48,49,50,13,15,24,56,58,53],base:[4,28,6,8,9,14,16,19,51,55,27,29,30,31,52,33,35,38,40,15,49,50,13,44,56,57,59,61],isn:[15,50],reserved_lv:[35,8,55],pick:[35,19,13],action:[40,1,15,3,48,22,9,10,35,12,36,16],introductori:56,narrow:19,spfree:8,believ:50,via:[2,3,28,7,8,14,18,51,22,0,24,25,27,5,29,30,35,38,39,40,41,44,48,49,50,13,15,54,55,56,57,59,61],shorthand:53,beparam:[15,35,8,50,13],root_path:[15,7,18,35],intermedi:[31,48,23],transit:[15,60,19,48,6,50,13,35,14,55],instance_os_typ:3,network_gateway6:3,readili:1,cafil:49,filenam:[15,27,59,14,61,16],identify_default:8,famili:[7,35],heurist:[19,56],id_dsa:[15,50],decrement:36,establish:[29,48,13,7,35,59],dangl:35,select:[40,41,15,27,29,48,49,50,30,7,8,56,34,35,36,13,16,39],cluster_repair_disk_s:15,aggress:35,hexadecim:[7,38],proceed:[15,7,13,49,39],distinct:40,rt_tabl:41,ctrl:35,regist:[7,49,56],two:[2,3,4,6,7,8,9,10,62,12,14,16,17,18,19,55,27,29,31,33,34,35,36,37,38,39,40,41,52,48,49,50,13,15,56,57,58,53,60],coverag:[35,28,55],zfp91c:7,ctotal:8,autonom:[36,50],taken:[1,2,49,13,55,35,58,61,24,39],initrd_path:[15,7,18,13],metalock:13,network_mac_prefix:3,reachabl:[15,35,9],desir:[40,1,62,41,15,48,51,13,7,8,55,34,35,37,14,16,39],flap:[36,5],hundr:[35,31,14,48,55],iallocator_script:15,problem:[4,5,7,8,9,14,16,18,19,55,29,31,35,36,15,48,50,13,52,24,56,58],site:[40,27,15,3,56,35,59,55],runtime_memori:3,adapt:[4,29,28,50,8,35,36,55],flag:[15,3,19,28,48,49,7,27,9,35,36,13,24,39],worri:18,broke:35,particular:[2,15,5,50,13,16,35,59,60,36,24],op_group_set_param:[3,41],build_chroot:35,nic_count:[55,16,13],offload:35,cach:[35,36,14,48],sinst_list:8,none:[41,3,29,5,48,50,49,7,8,9,34,35,59,13,53,55,39],endpoint:[35,54],nodegroup1:41,pycurl:[35,51,14,30],hour:[18,15,48,31,35,53,23],nodegroup2:41,outlin:50,dev:[18,15,3,28,49,50,7,51,56,14],histori:[35,19,28,13],blockdev:[57,8,19,16],remain:[2,52,29,49,50,13,7,31,9,35,14,15,55],paragraph:[15,48,36,29,26],opennod:16,edit:[15,35,18,56],caveat:[13,55,9],learn:56,abandon:50,dec:35,obtain:[2,41,15,48,13,56,35,61,39],wrong_input:[34,8],transcend:[44,24],omit:[15,13,41,16,9],prompt:9,dual:[3,13,14,48,35],greaterthanzero:8,scan:[56,13],ddump:28,share:[62,28,6,8,15,18,19,20,21,0,24,27,29,31,33,35,39,40,41,44,46,48,49,50,13,52,54,55,56,59,60],templat:[17,18,11,27,42,40,15,19,43,48,13,8,25,24,56,35,57,58,55,16,39],dts_net_mirror:40,tabular:15,minimum:[41,15,48,49,7,23,56,55,35,38,24],resync:[7,13,55,35],luxid:[52,35,59,2,51],routing_table_200:41,huge:14,mfree:[34,7,8,19,56],"128m":7,newlin:[34,50],secur:[17,18,52,19,48,49,50,13,15,54,56,35,59,16],programmat:[34,17,60],export_do_shutdown:3,mcl_futur:50,snippet:56,neede:2,reject:[35,55],dest_clust:49,simpl:[18,13,15,48,29,28,6,50,49,7,51,16,27,55,34,35,36,24,14,23,39],plain_cert:49,regener:15,job_status_waitlock:6,threat:23,op_network_disconnect:3,overcommit:24,vlan:[7,21],variant:[19,22,50,49,8,35],sneak:4,reflect:[40,41,0,6,35,36],applicationpend:36,buffer:[55,49,50,30],numa:24,heartbeat:5,data_dir:3,associ:[44,3,29,28,6,50,13,15,48,34,35,36,14,24],std_ispec:55,set_serial_numb:23,stabil:[5,13],circumst:55,"short":[15,48,49,13,7,8,56,35,53],cabal:[51,28],ani:[1,2,3,4,28,6,7,8,9,10,62,14,15,16,53,18,19,20,24,26,5,29,30,31,33,34,35,36,37,38,39,40,41,44,48,49,50,13,52,54,55,56,57,58,59,60],davem:7,proto:[36,2],ambigu:[34,12],caus:[18,15,3,19,29,48,50,13,7,8,60,27,35,34,58,36,14,53,16],callback:[37,13,30],opinstancemultialloc:53,call_upload_fil:20,rebalanc:[19,50],scsi:[56,13],basehttpserv:30,egg:19,iscsi:40,logrot:[35,56],group_network_link:[3,41],sysctl:7,blktap2:[35,8],burn:7,check_privatekei:23,soon:[48,50,13,7,8,54,35,59,60,36],cross:[36,62,50],new_secondari:[15,3],held:[20,48,49,50,6,7,35,53,13],i386:50,through:[1,2,28,7,8,9,53,14,16,17,19,21,22,24,26,5,31,33,35,36,38,39,40,15,48,49,50,13,0,55,56,57,59,60,61],reconnect:35,minmax:[8,27],suffer:[15,13,49,50,39],paramet:[1,3,7,8,9,12,14,15,16,53,18,19,21,22,24,26,29,31,33,34,35,36,38,40,41,52,48,49,50,13,44,54,55,56,59,61],src_node:[3,8],style:[13,8,34,35,30,14],job_id:[8,6,13],undrain:[15,35],exact:[2,27,15,29,13,56,34,35,36,14,55],epoch:[34,36,33],instance_reinstal:35,late:[52,13],resort:13,html:[28,51,14],rapidli:5,op_init_clust:3,curlopt_writefunct:30,pad:[36,33,30],might:[1,2,4,28,6,7,8,14,15,18,19,24,27,5,29,30,31,33,34,35,36,37,39,40,44,48,49,50,13,52,56,57,58,59,60],alter:[48,56,13],pae:[35,13],wouldn:[35,60,14,6,49],good:[18,15,48,5,50,30,7,51,31,56,57,36,13,14,55,39],"return":[2,3,4,5,6,8,9,10,62,14,26,27,29,34,35,36,37,40,15,48,49,50,13,56,60,61],vg_uuid:36,no_instal:[35,8],"22459cf8":7,timestamp:[15,48,50,33,34,35,36,60,16],pollut:[49,50],chuid:48,configobject:34,oldnod:7,op_cluster_ip_turndown:3,somebodi:36,instance_primari:3,detach:[7,40],complain:[15,7,35,9],export_nod:3,gethostbynam:48,turndown:3,eventu:[41,20,49,50,22,31,56,36,16],ignore_offline_nod:8,"3de":30,troubleshoot:[15,7,13],instruct:[17,25,15,29,56,35],authent:[49,50,13,7,8,35,59,14],tear:13,token:59,instance_nicn_network_mac_prefix:3,libghc:[51,28],vg_name:[15,8,36],ceas:2,found:[1,22,27,15,48,19,49,28,6,30,7,8,51,56,34,35,59,13,60,54,55],intervent:[15,36,18,9],add_mod:3,ifi:56,pcre:51,subsystem:[36,5,55],status:13,instancesetparam:35,weight:[36,29],a5ad:27,serial_no:[8,13],lugroupassignnod:35,idea:[5,19,48,50,36,37,60],group_list:8,realli:[15,7,13,50,9],heavi:[49,55],initrd:[15,35,18,56],connect:[2,3,7,8,13,14,21,22,27,29,52,35,36,38,40,41,15,48,49,50,51,44,54,56,59,61],stabl:[18,2,49,50,13,35,36],http:[17,26,11,44,49,51,30,38,15,8,56,35,2,59,13,14,36],thing:[2,29,28,50,13,8,31,56,55],todo:[19,29,48],orient:[7,48,17],hostnam:[25,41,50,31,56,35],archivejob:[49,13],voidspac:35,safeti:[15,35,18,2,13],oper_ram:8,shortcom:[1,2,4,5,6,11,12,14,23,21,22,24,30,33,34,36,37,39,41,48,49,50,53,60,61],publish:50,footnot:56,recoomend:18,health:[35,8,60,36,9],healti:36,req_submit_many_job:52,uidpool:[48,50],print:[25,28,49,50,7,8,34,35,36,23],occurr:[24,13],file_nam:13,difficulti:[34,35,29,50,13],qualifi:56,assist:50,sheepdog:48,proxi:[14,30],advanc:[15,51,55,56,13],upon:[53,2,41,44,4,5,6,13,35,12,14,36],pcpu:[35,48],src_path:[3,8],gntconfd:48,pub:50,snode_uuid:8,quick:[17,27,15,28,13,51,31,56,35],ovfconvert:[58,51,16,35],source_cert_fil:49,rmem:55,ask:[18,2,15,19,49,50,9,35],opinstancebulkadd:4,bash:[28,14,56,35],basi:[35,48,21],instance_uuid:8,luclusterverifygroup:[35,53],pyyaml:28,bring:[18,52,50,13,15,9,35,55],thread:[53,2,52,48,20,49,50,13,35,36,37,38],capabl:[2,41,15,3,48,21,56,9,35,59,60],exponenti:14,perhap:[49,39],perman:[15,35,12,60,55],unknownfield:34,virtualsystemcollect:16,relocate_from:27,instanceshutdown:1,ganeti_instance_os_typ:3,name2:28,feed:48,pleas:[18,15,5,50,7,51,55,56,35,10,58,36,16],major:[13,15,48,6,50,49,7,35,36,37],notifi:[48,37,14,49,50],obviou:[19,5,36,24],lvm2:51,diskless:[25,15,19,13,8,31,35,58,16],exchang:[15,48,49,7,54,59],misc:35,number:[2,3,28,7,8,14,23,18,21,16,24,25,26,27,29,30,33,34,35,36,37,38,39,40,41,52,48,49,50,13,15,54,55,56,58,53],sptotal:8,mcpu:[53,20,49],extern:[1,2,8,9,12,14,23,17,16,27,29,31,33,35,36,40,41,15,48,49,50,13,56,57,58,60],dsa:[35,22],smaller:[41,52,29,48,49,13,35,38,55],done:[1,2,3,4,28,7,8,14,16,53,18,20,21,22,24,5,34,35,36,38,39,40,52,48,49,50,13,15,56,58,59,61],unhealthi:29,preferred_lft:56,"262e":8,unconf:18,miss:[18,15,3,5,49,50,13,7,51,55,34,35,60,38,16],differ:[2,3,28,6,7,8,14,16,53,19,21,51,24,27,5,29,31,35,36,38,39,40,41,52,48,49,50,13,15,55,56,57,58,59],master_netmask:[3,8],guest:[15,35,56,24],pandoc:[35,28],interact:[2,52,5,19,29,28,48,49,15,55,34,35,36,13,57,23],construct:[49,55,13],all_disks_don:49,zealou:35,expand:[50,30,56,34,35,36,13,55],accept:[1,62,6,7,8,12,14,23,19,16,55,29,35,59,40,15,48,49,50,13,27,58,53,60],mlock:[35,61],gpt:35,tasklet:[50,13],idisk_provid:40,store:[40,2,41,15,48,21,50,49,8,55,56,34,35,36,13,14,23],assign:[41,15,48,38,7,8,54,55,27,35,58,12,13,36,24],option:[3,28,6,7,8,9,10,12,13,14,16,18,19,21,22,24,25,27,30,31,34,35,36,38,39,40,41,52,48,49,50,51,15,54,55,56,58,53],para:44,blindli:13,compens:[52,55],instance2:[15,3,27,55,7],illeg:24,appropri:[40,2,41,15,13,8,54,16,56,62,59,36,24],pars:[52,49,50,30,15,8,27,35,13,14,16],foo555:8,instance4:[7,27],consult:[48,27,50],fred:8,reinstal:[15,3,28,49,50,7,8,35,60],anoth:[62,3,28,6,7,8,9,12,14,20,24,25,29,30,31,34,35,36,37,40,15,48,49,50,13,52,55,57,58,53,60],kind:[40,26,15,3,6,50,13,8,56,34,36,55],scheme:[8,50],grep:[15,7,18],whenev:[60,18,48,24,13],remot:[17,2,15,40,44,3,29,48,6,30,52,8,31,61,33,35,59,49,14,36,13],gotten:[37,48],basenam:49,riski:[18,60],onam:15,ca_kei:23,perfind:36,stp:56,str:[40,50],consumpt:[35,48,49,55],stale:[35,50,13],toward:[2,15,50,31,33,55],opstatu:8,randomli:13,cleaner:[15,35,19,2],comput:[25,62,15,19,29,48,49,50,13,7,8,55,27,36,24,39],disk1:34,group2:8,beforehand:20,defect:50,ceph:[35,51,48,56,40],packag:[18,41,28,49,50,7,51,55,56,35,58,38,16],opupdatenodegroupcach:48,expir:[35,49,50,13],f0af:15,opensusestudio:16,dedic:[2,15,49,22,56,14,24,39],"null":[26,15,3,8,27,36],max_cpu_ratio:48,"2ffc":41,unintend:35,relationship:50,lib:[40,18,31,27,15,3,19,48,22,50,51,7,8,25,54,56,57,35,13,23],hunit:28,hspace:[19,48,31,24,35,55],self:[41,60,5,49,13,54,35,59,14,23],violat:[8,24],"10f3e114232d":27,oper_st:[8,1],qr_lock:[52,35],undeclar:49,also:[1,2,3,4,5,6,7,8,9,12,15,16,53,18,19,20,51,22,24,25,27,28,30,31,33,34,35,36,37,38,39,40,41,44,48,49,50,13,52,55,56,57,58,59,60],build:[17,15,27,44,28,51,52,22,31,32,56,35,59,36],swapfil:7,localst:36,brace:8,secondary_nod:15,recurs:[15,29],vnc:[35,8,48,50,56],distribut:[40,18,15,20,28,49,50,13,51,55,56,57,35,59,36,16],exec:13,op_network_add:[3,41],previou:[18,2,62,52,48,6,50,13,34,33,10,35,36,37,60,15,55,39],reach:[10,15,50,56,13],most:[2,3,28,7,8,9,14,23,17,51,22,16,24,29,30,31,34,35,36,15,48,49,50,13,54,55,56,58],ifup:[35,56],prolifer:[52,50],plan:[11,52,60,48,49,50,13,15,16,56,35,34,2,36,55,57,23],disk_tot:55,alpha:4,addr:56,chkconfig:51,hereaft:[15,2],quorum:[5,13],bug:[52,60,19,49,7,56,9,35,59,14,15],instance3:[7,27],op_group_evacu:3,clear:[15,3,29,48,13,8,31,55,35,60,24],evacu:[62,15,3,29,48,50,13,7,8,27,35,37,60,55,39],cover:[62,19,48,49,50,13,55],equalorgreaterthanzero:8,destruct:[15,35,1,60],"2nd":35,ext:[15,35,8,19,40],instance1:[25,15,49,7,27,55],quickcheck2:28,clean:[1,2,48,28,50,13,7,8,56,35],dt_:35,x86_32:15,usual:[1,2,41,44,3,29,51,50,13,7,8,18,54,55,56,35,25,14,15,16],microsoft:16,respect:[15,3,19,29,48,21,49,56,9,35,36,13,14,55],instance7:7,sector:[36,55],test1:41,wsgi:30,sample_provider2:40,canceljob:[49,13],sample_provider1:40,carefulli:[56,13],finer:35,alphanumer:15,progressunit:36,session:[35,50],particularli:[17,2],checksum:[14,16],ganeti_instance_nic0_mac:3,jobqueue_upd:13,font:7,fine:[15,48,50,13,7,31,55,56,14,16],find:[18,15,29,49,50,13,7,9,27,33,10,36],impact:[52,29,48,13,7,27,35,36,53,55],access:[2,6,7,8,14,23,17,21,16,26,29,31,35,36,40,15,48,49,50,13,44,54,56,57,59],firewal:[59,13],libgfapi:57,pretti:[58,28,14],bfd:55,merger:[10,35,50],solut:[11,5,6,7,8,62,14,22,55,29,31,34,35,37,40,48,49,50,13,24,27,60,61],"1aaf4716":7,notifystart:49,couldn:[15,7,53,62],queu:[35,36,48,6,13],factor:[14,29,2,6,55],curlopt_proxi:14,disksiz:13,std:[35,8,27],remedi:13,key_nam:49,hit:48,unus:[15,33,50,13,7,35,58],exposur:55,no_rememb:[35,8],"__file__":7,express:[15,56,33,34,36,55],cheaper:[35,55],drbd_helper:8,nativ:57,simplist:15,"3400m":27,network6:[8,41],asyncor:[35,49],desiderd:36,fillhv:13,network2:8,restart:[1,2,44,3,48,50,13,25,15,22,18,56,35,59,14,55],network1:8,instance_secondari:3,pnode_uuid:8,syslogd:7,irrevers:15,instancenam:13,umount:15,rfc:[49,30,8,35,59,14,23],matchobject:34,common:[40,27,15,3,19,29,28,48,50,13,7,51,56,59,60,16],userspac:[57,51,56],srcversion:36,remov:[2,3,5,6,7,8,10,12,14,16,18,19,21,22,55,28,29,31,35,38,40,41,52,48,49,50,13,15,56,57,59,60],wrote:31,commod:40,arp:51,certif:[17,18,11,49,50,30,22,54,23,35,59,14,61,16],set:[1,2,3,4,28,7,8,9,62,13,14,23,53,18,19,20,21,22,16,24,25,27,30,31,34,35,36,38,39,40,41,15,48,49,50,51,54,55,56,58,59,60],dump:[59,14,49,56,13],taggabl:48,ganeti:[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,29,30,31,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62],startup:[44,3,20,49,50,13,15,8,31,56,35,55],"256mib":7,ara:59,see:[62,28,6,7,8,9,14,15,53,18,19,51,22,24,25,27,5,29,30,34,35,36,38,39,40,44,48,49,50,13,52,54,56,57,59],sed:15,dumb:[15,35],disk0_meta:7,arg:[35,13],reserv:[41,48,50,13,8,55,56,33,35,14,24],pvm:[18,27,44,13,7,8,56,35,15,16],set_vers:23,hvm_pae:[35,13],instance_nicn_network_uuid:3,inconveni:33,op_instance_migr:[3,62,6],someth:[15,60,19,6,13,7,31,56,10,35,36,14,24],equalis:55,topmost:50,stall:[48,13],forcefulli:2,kpartx:[15,7,56],drbd2:7,readsnum:36,subscript:[44,35,49,55],experi:[49,13],altern:[40,15,48,28,50,13,8,31,60,56,14,38],signatur:[54,37,14,49,50],latin:35,syntact:[36,13],numer:[34,35,12,36],"9aa6ce92":50,javascript:8,isol:[56,24],disallow:[15,60,29,56,35,14,16],lowercas:[15,36,26,49],succeed:[60,27,9],frozenset:40,op_instance_recreate_disk:3,opportunistic_lock:[35,8],ssh_root_kei:22,both:[1,2,28,7,8,62,12,16,19,51,22,24,27,29,30,31,35,36,38,39,40,52,48,49,50,13,15,54,55,56,58,59,60],last:[15,25,52,48,49,50,13,7,8,56,33,35,12,60,36,39],delimit:[3,13],getnodeinfo:50,incooper:4,alon:[41,29,13,9,35,36],event:[17,1,5,13,35],foreign:15,priority_opt:35,speicif:10,alow:55,disksect:16,forgotten:[15,48,13],bond:21,whole:[40,25,41,15,48,21,50,49,16,9,34,35,36,13,55,53,24,39],physdev:[15,7],load:[52,4,29,28,21,50,49,15,31,55,56,34,35,36,13,24],old_instance_nam:13,troubl:[58,8,49,16],simpli:[62,15,48,29,5,49,50,13,7,51,31,54,35,58,14,55],figur:24,instanti:50,schedul:[15,13,7,33,36,60,24,39],i_pri_memori:27,uptod:35,header:[18,49,13,8,35,14],node_group:48,littl:[58,38,61],shutdown:[17,1,11,40,52,3,33,48,49,50,13,7,8,56,9,35,15,39],linux:[15,5,28,49,7,51,0,56,55,35,59,36,24],mistak:[15,35,13],bridg:[41,27,15,3,21,50,51,7,8,31,24,56,35,58,13,16],sever:[25,15,19,48,6,49,8,31,34,35,53,13,61],simpler:[18,52,6,50,49,7,34,13,16],backend:[1,2,7,8,14,16,19,55,30,31,33,35,36,40,41,15,50,13,44,54,57,58,60],mynode1:19,mynode3:19,unsuccess:[62,50],"30mb":35,help:[18,52,28,5,21,50,49,15,31,48,56,9,35,59,39],overlook:13,devic:[3,7,8,12,14,18,19,21,55,35,36,40,42,43,15,49,50,13,44,56,57,59,60],due:[2,4,28,6,7,8,14,16,51,24,25,31,35,53,52,48,49,50,13,15,55,56,59,60,61],empti:[62,27,15,3,19,29,48,6,50,49,8,56,33,34,35,36,13,60],x509_signkey_fil:49,"7181fba":57,unlock:[20,50,13],instance_disk_templ:3,devis:13,luinstanceactivatedisk:48,strategi:[15,17],autogen:28,setopt:14,etc:[2,3,28,7,8,12,15,18,51,24,25,29,31,33,34,35,36,38,40,41,44,48,49,50,13,52,55,56,59,60,61],fire:13,imag:[40,15,3,13,7,56,35,57,58,59,14,16],shuffl:7,gap:35,coordin:13,"44d3":8,understand:[5,49,13,36,30,55],demand:[52,48,13],ganeti_instance_primari:3,bitfield:41,opbackupprepar:14,cfgshell:15,dom0_vcpus_pin:56,e61fbc97:15,whilt:56,uncertain:56,look:[40,52,19,29,6,50,49,7,8,55,27,35,10,58,36,13,60,16],evac_mod:27,straight:[8,38,55],erron:[59,13],batch:[15,4,49,6,13,35],durat:[53,48,13],"while":[2,3,28,6,7,8,62,12,14,15,23,18,20,24,25,27,29,30,34,35,36,37,40,41,52,48,49,50,13,44,55,56,60],runtask:49,"256m":7,match:[60,27,62,15,3,28,22,50,13,7,8,55,48,56,9,34,35,14,38,23],behavior:[1,50,51,16,35,24,39],error:[1,3,4,28,6,7,8,9,10,14,23,18,22,16,24,26,5,34,35,36,38,40,41,15,48,49,50,13,58,53,60,61],cleanup:[1,15,49,50,13,7,8,33,35],loop:[7,8,13,35],pack:[58,16],propag:[8,48,36,50,13],xvzf:56,readd:[15,7,50,35],cpu_numb:36,zrci:7,ctx:23,demon:18,activ:[17,1,31,15,3,49,5,48,50,30,7,8,18,60,55,35,59,13,14,36,24],unitari:55,costli:8,demot:[15,7,13,5,35],rid:[15,2,13],deamon:[35,2],availabilili:56,churn:6,fedora:[35,51],obsolet:[3,18,13],belong:[26,41,15,48,21,50,27,35,36],nanosecond:[36,33],node_list:8,shorter:48,destination_x509_ca:8,lengthi:55,decod:[8,22,13],nic0_ip:16,curle_range_error:14,custom_diskparam:8,disk0_siz:16,agnost:56,conflict:[41,48,20,50,8,10,35,36,38,55],inotifi:[51,6],htool:[17,27,15,48,29,28,6,7,31,56,33,35,55,39],concaten:21,ganeti_instance_nic_count:3,max_disk_usag:48,messsag:35,tmem:[44,55],wherea:[27,52,3,29,50,15,55,56,16],alert:[8,36,38],moment:[62,28,21,13,7,58,36,59,55],ganeti_hooks_path:3,lxc:[35,8,55,24],stripe:[35,50,24,55],libc6:56,impedi:13,robust:[52,35,13,2,40],lxm:51,specialis:3,stack:[15,48,50,13,14,55],en_u:[35,28],stateless:[5,13],travers:35,sha1:[23,16,49],equival:[41,13,15,28,6,49,35,37,60,39],discourag:35,eleg:14,entri:[25,27,48,50,7,22,56,9,34,35,36,38],spent:[36,14],honor:[35,48,62],pickl:35,person:[36,60,16],reservelv:35,commonli:[48,16],elev:14,miicsdccahmgawibagi:49,iproute2:51,traffic:[49,7,56,35,59,55],fibrechannel:40,ourselv:29,nic_ip_pool:41,weakest:59,explan:[58,62,50,17],op_instance_remove_mddrbd:3,dts_ext_mirror:40,reduct:55,opinstancesetparam:12,debug_level:13,luinstancequeri:35,administ:49,openstack:16,poller:35,coul:5,priorit:[48,13],move:[11,3,7,62,14,16,17,22,51,24,29,35,36,39,15,46,48,49,50,13,52,54,55,27,58,59,60],password:[50,13,7,8,54,56,35,61],cur:15,received_t:8,disk0:[34,15],scenario:[15,31,48,13],disk2:34,restructuredtext:35,pubkei:35,theoret:[48,14,55,30],stabilis:[35,55],forbid:12,"0x0000000a":38,realloc:27,snap:[35,51],input:[40,15,4,29,48,8,50,13,7,22,31,27,35,36,14],prealloc_wipe_disk:[35,8],subsequ:[50,49,56,13],ld_:35,fcgiwrap:14,bin:[15,3,49,50,51,56,9,35,38],bio:7,disk_space_tot:27,transpar:[41,15,48,13,51,14,24],ipmi:[44,9],subfield:36,distrupt:15,unexpir:49,bia:50,custom_beparam:8,nginx:30,alias:[35,31],bit:[48,28,6,50,31,56,35,59,38],awaken:49,like:[2,3,5,7,8,10,14,23,19,51,16,24,29,30,33,34,35,36,37,39,40,15,48,49,50,13,55,56,57,58,59,60],resolver_error:8,lost:[15,13,7,31,55,35,23],semi:[55,56],justif:16,docutil:26,signal:[2,48,49,50,8,35],resolv:[15,60,48,49,8,56,35,58,14,16],extra_packag:49,manifest:16,collect:[35,36],fake:[8,25,31],api:[17,2,15,49,48,6,50,30,57,8,61,27,33,34,35,59,13,14,36],diskn:15,flup:[14,30],modprob:56,encount:[60,48,58,53,14,61,16],versa:[15,60,50],often:[49,8,36,60,61,16],t30jmn:15,dest_cluster_nam:49,creation:[40,0,41,15,4,48,21,50,49,7,8,31,24,55,35,59,13,53,12,16,19],some:[1,2,3,4,28,6,7,8,53,62,14,23,17,18,19,51,22,16,24,25,29,30,33,34,35,36,37,38,39,40,41,52,48,49,50,13,15,55,56,57,58,59,60,61],back:[40,25,15,60,29,48,49,50,30,8,18,9,35,13,14,24,39],understood:15,unspecifi:35,sampl:[14,11,56,23,30],remove_uid:8,sight:[48,13],mirror:[40,15,3,7,56,35],server:[2,7,8,9,11,14,17,51,26,30,34,35,36,40,15,48,49,50,13,44,54,56,59,61],collectori:36,insepar:13,scale:[31,48,49,56,13],plugin_specif:36,go_her:36,shall:[10,21,61],pep:30,per:[3,28,8,14,23,19,21,16,55,25,26,27,29,31,35,36,37,38,39,40,15,48,49,50,13,56,59],pem:[18,49,48,22,50,30,8,54,61,23],substitut:56,retri:[60,48,50,13,8,35,14],larg:[17,41,50,30,22,56,58,53,37,14],dts_mirror:40,recognis:35,buildbi:36,prof:28,proc:[7,36,18,35],recogniz:1,cgi:[14,30],gntnode:48,run:[1,2,3,4,5,6,7,8,9,62,14,15,16,18,20,51,22,24,25,27,28,29,30,31,35,36,38,39,40,44,48,49,50,13,52,54,55,56,57,59,60],reserved_count:8,pyopenssl:[51,23],instance6:7,unlik:[8,49,14,12,30],viabl:40,sacrif:53,noteworti:16,step:[18,2,15,29,50,30,7,33,31,56,9,10,36,13,14,57,55,39],squeez:[35,51,50,56],prerequisit:[15,18,27],meantim:[4,48,6,8,35,14,24],wget:[8,14,56],shorten:[17,27,35],subtract:24,crond:7,constraint:[40,41,15,3,19,55,24],mem_siz:55,bgl:[48,53,20,55],idl:[49,55],ganeti_instance_disk_templ:3,bash_complet:35,block:[40,18,13,44,53,19,48,49,50,30,7,8,52,56,57,35,36,37,14,15,24],timewrit:36,instance_nicn_network_subnet6:3,repair:[18,43,15,48,29,5,50,13,7,8,9,35,36,60],instance_hv_x:3,primarili:40,lifetim:[48,13],pythonpath:28,curlopt_maxconnect:14,within:[25,62,27,29,48,49,50,13,54,56,35,59,37,36,24],seamless:15,network_mod:8,import_index:13,tdsk:55,protocol_vers:8,chang:[1,2,3,4,5,6,7,8,9,53,11,12,14,15,23,17,18,19,21,22,16,24,27,28,29,30,31,33,34,35,36,37,38,39,40,41,44,48,49,50,13,52,54,55,56,58,59,60,61,62],artifici:[31,13,30],characht:35,occupi:41,inclus:[8,32,50],span:26,mask:[38,50],spam:35,declarelock:[20,13],fledg:[50,30],question:[53,37,55,9],submit:[1,2,13,15,22,4,48,6,49,7,8,34,35,53,37,60,46],custom:[27,15,5,28,50,13,31,56,35,14,55],adjac:36,doubt:14,pocoo:35,sslv2:30,forward:[2,19,30,8,33,36,38,55],etch:[8,18,50],usr:[25,27,15,3,28,49,50,7,51,18,56,9,38],jobexecutor:6,properli:[1,2,52,5,49,50,13,7,35,15,16,39],reorgan:2,lockless:48,writeord:36,"417c":7,admin_st:[35,8,1,36,27],pwd:28,state:[1,2,4,5,6,7,8,9,10,11,12,14,15,23,19,20,21,22,24,29,30,33,34,35,36,37,53,39,41,44,48,49,50,13,52,55,59,60,61],link:[41,15,49,50,7,8,56,35,16],translat:[40,13,56,30,38,55],sdb:[7,13],delta:[15,35],russian:30,ns1:49,line:[2,3,7,8,14,16,51,55,27,31,33,34,35,36,38,40,15,48,49,50,13,56,58,59,61],mitig:59,xendomains_sav:56,already_exist:8,info:[40,1,41,15,19,48,50,13,7,8,18,27,9,35,25,38,16],utc:7,"03af5da9dc50":7,utf:[35,28],nic_link:21,consist:[2,3,7,8,62,19,55,25,31,34,35,36,40,41,52,48,49,50,13,15,24,56],simplif:55,free_memori:27,cim:16,growth:15,"_mac":13,inet:56,redistribut:[18,15,29,48,50,8,35],"4bea":7,highlight:15,similar:[1,3,28,7,8,14,23,18,22,16,55,30,35,15,48,49,50,13,54,24,27,57,53],signed_cert:49,memtot:34,constant:[40,41,19,49,48,21,50,6,8,31,55,57,35,13,16],nic_:13,backrefer:34,allocateport:50,new_primari:3,doesn:[3,4,5,6,7,8,10,14,23,19,21,51,24,29,31,35,36,40,15,48,49,50,13,55,56,59,60],repres:[15,3,19,33,48,50,13,7,55,56,9,34,36,38,24],sxp:56,proper:[1,15,29,48,38,16,35,59,13,36,24],incomplet:[17,41,16,35],"1400m":27,home:[15,3,49,30,35,14],disktempl:16,nas2:40,ignore_secondari:[3,8],unifi:[55,13],titl:[34,35],sequenti:[41,13],exportdisk:49,nal:53,accross:[46,62],bracket:55,librari:[41,28,20,6,30,7,8,31,51,48,56,33,57,35,36,13,14,53],nat:[8,16],instance_memori:3,mock:[35,31,28],nice:[50,56,13],deseri:6,node2_nam:15,gigabyt:[14,55,13],shared_file_storage_dir:8,spice_image_compress:35,elsewher:29,meaning:[19,53],force_vari:8,evac:[8,29],score:[29,55],unaffect:2,svn:35,infrequ:35,sparringli:55,noqueu:56,vice:[15,35,60,50],nic2:34,nic3:34,nic0:34,confirm:[18,15,30,7,9,13,14],notion:40,depth:[35,48],leak:[36,61],far:[52,13,8,55,36,30,23],fresh:[36,2],vsize:7,fr5e:15,vnc_console_port:13,getaddrinfo:48,code:[2,3,4,28,6,8,9,11,14,15,23,19,20,22,55,27,5,30,31,34,35,36,37,53,40,41,44,48,49,50,13,52,56,59,60,61],req_wait_for_job_chang:49,edg:39,scratch:3,confd:[2,52,50,15,51,31,0,35,59,36],issue10882:14,vgextend:56,ip_address:49,urandom:49,untangl:52,unfulfil:51,cognit:13,op_group_remov:3,compact:[57,41],privat:[48,49,22,54,59,14,23],ganeti_api_vers:13,last_resort:[8,48],src_cert_fil:49,sensit:[59,27,56,13],base64:41,friendli:41,simliar:8,granular:[36,49,13],evolv:40,becam:[60,6,61],diskinfo:[15,49],networksect:[58,16],execwithunuseduid:50,inst_list:8,xenbr0:56,sent:[26,15,49,50,13,8,35,2,36,14],inject:35,deactiv:[15,3,8,35,7],unverifi:49,cheapli:[29,48],rollback:[10,18,50,13],whichev:55,rc5:35,querygroup:35,account:[40,62,41,29,48,13,8,31,55,27,35,58,24],tue:[7,35],graphic:16,volum:[40,15,19,50,13,7,8,55,56,57,35,59,36,24],removenod:49,rubi:8,untouch:60,implicitli:41,lv_attr:35,runnabl:31,tri:[15,19,20,50,13,7,8,31,9,35,55],magic:[50,13],opt:9,scalabl:[35,48,49,50,13],fewer:[15,35,19,29,6],queryinst:35,replication_ip:15,race:[35,2,60,13],cgroup:24,dealt:13,freed:7,claudia:16,malici:35,impli:[40,8,31,22,24],x509v3:23,luclusterredistconf:50,clusternam:[8,56,13],natur:[48,49,24,55],get_subject:23,add_uid:8,cron:[44,15,18,56],node_tags_:3,debootrap:50,instance_nicn_network_gateway6:3,accomod:35,ifdown:56,valid_lft:56,secwritten:36,odd:55,append:[10,13,55,50,33],osp_:[40,49],compat:[17,18,11,41,27,19,6,50,21,8,51,56,34,35,53,13,14,38,24],index:[3,48,49,50,13,7,8,27,33,35,12,55],copyfil:[15,35,18,48,13],undetect:35,compar:[40,29,48,49,50,13,35,59,36,24],outsmart:24,affin:[51,38],henceforth:[53,14],experiment:[7,5,56,35],hostkei:7,bridge_port:56,obvious:[19,5],journal:[3,7],deduc:[7,27],can:[1,2,3,4,5,6,7,8,9,10,62,12,13,14,15,23,17,18,19,20,21,22,16,24,25,26,27,28,29,30,31,33,34,35,36,37,38,39,40,41,44,48,49,50,51,52,53,54,55,56,57,58,59,60,61],spice_password_fil:35,shelltestrunn:28,ration:35,runtime_mem:8,chose:50,acceler:35,despit:54,syncstatu:36,len:49,bodi:[35,8,14,13],intercept:49,let:[25,2,41,48,49,13,7,57],ubuntu:[35,51,28,56],extglob:35,vcluster:25,safer:39,bdev:[57,40],win:[15,60],implicit:[35,29,48],obsolesc:40,libnss:35,convert:[17,15,28,49,50,13,56,35,34,58,16],convers:[15,58,13,16,35],vbd:7,overwritten:35,hypervisor:[1,2,3,7,8,16,21,51,24,25,27,31,35,36,38,41,15,48,49,50,13,44,54,55,56,59],conceiv:[15,13],larger:[15,35],reus:[13,7,35,36,14,55,39],later:[18,15,48,4,61,28,49,50,13,8,1,56,35,53,14,60,55,39],glusterf:[57,17,11],bootdata:7,cert:[23,16,49],temporaryreservationmanag:41,ssconf_node_primary_ip:35,rdb:[35,19],earli:[35,4,14],opinion:14,typic:[58,13,16,40],problemat:28,explain:[62,27,15,48,49,56,57,36,60,16],chanc:48,revoc:23,danger:[15,5,20,7,56,36,61],apr:35,"_sre":34,app:33,offset:[36,14],gatewai:[35,8,41,56,30],apt:[51,28,56],"boolean":[29,48,22,13,8,27,9,34,35,53],"512mib":[7,56],immut:[48,50],opcod:[62,3,4,6,8,14,20,22,55,33,34,35,36,37,44,48,49,50,13,15,27,53,60],sharedlock:[20,48,49,50,13,53],ocf:5,cloud:[29,16],dump_certif:23,fed:48,feb:35,usb:[58,16],commun:[2,3,48,50,13,7,27,9,59,14,60,23],doubl:[35,31,18,48,50],querytag:35,upgrad:[17,18,2,15,19,50,13,56,35],mydir:19,next:[2,15,29,48,6,50,30,7,51,35,13],websit:30,few:[17,15,48,28,51,50,49,8,31,54,55,34,35,13,60,24,39],import_devic:13,"791667d07800":7,checker:35,releaseuid:50,ganeti_rootdir:[31,25],remaind:40,sort:[35,29,48,60],masterfailov:[35,48],sudo:[51,28],src:[15,35,28,54,56],ssh_host_kei:22,mismatch:[15,7,38,35],sbin:[15,3,56],balanc:[4,29,28,21,50,16,55,35,36,53,24],backslash:35,some_late_data:36,pkei:23,"transient":[52,35,13],"32gb":7,insensit:[35,8,51],name1:28,central:[34,15,36,21],osuf:28,focu:[48,39],greatli:[40,13],fan:9,"5fd6b69479c0":6,unacknowledg:36,reboot_typ:3,retriev:[49,13,8,34,35,37,61],vswitch:[17,11,21],augment:48,perceiv:[4,60],alia:35,keyusag:23,autorepair:[15,60,36,5],timescal:48,executor:[35,2,6,49],meet:9,exc:48,fetch:[8,1,36,49],"7ydj":7,proof:[35,50],control:[40,53,15,3,29,22,49,8,10,35,12,13,14,38,55],all_set:55,tap:35,slip:49,tar:[18,28,56],process:[2,6,8,14,23,18,20,51,22,16,24,30,35,36,37,38,40,52,48,49,50,13,15,56,53],lock:[17,2,13,52,48,29,20,6,50,49,15,8,0,9,34,35,53,37,60,61,55],migrate_l:3,candidate_pool_s:[8,13],src_imag:3,ganeti_op_target:3,pictur:[14,11],rapi:[2,4,8,12,14,23,18,22,55,29,30,31,33,34,35,37,39,44,48,49,50,13,15,54,59],tag:[41,15,3,29,5,13,8,27,35,36,60,16,39],sligthli:3,ganeti_object_typ:3,tab:[28,49],"_path":13,instance_nicn_mac:3,onlin:[15,19,5,7,9,60,55,39],serial:[2,27,62,44,4,34,49,50,13,7,8,56,33,10,35,53,14,15,23],pawt:15,hvm_disk_typ:13,delai:[52,60,48,50,30,9,35,59,14,61,55],kib:[36,55],sip:8,forese:13,sit:[35,55],qcow2:16,vnc_password_fil:50,subdirectori:35,instead:[1,2,28,8,9,12,14,23,19,22,16,24,27,29,34,35,36,37,38,40,52,48,49,50,13,15,54,55,56,53,60],ovftool:[58,16],weaken:49,panic:13,source_handshak:8,migrateinst:35,everywher:[35,13],workerpool:48,nph:14,op_cluster_renam:[15,3],myvg:19,stop:[15,28,1,52,3,5,48,50,49,25,7,31,18,56,33,35,59,13,14,36,55],realoc:27,surfac:[49,30],walk:[7,17],metavg:[35,19,55],inst:[35,38,16],attent:[7,53,20,56,35],op_cod:3,netcat:49,redund:[15,7,55,9,35,60,24,39],physic:[15,25,27,44,61,48,50,13,7,8,31,55,56,35,36,14,38,24],alloc:[1,3,4,7,8,9,17,19,55,29,35,36,53,40,41,15,48,50,13,44,24,27,59],drop:[49,50,22,34,35,59,14,39],essenti:[53,31],use_localtim:35,syscal:56,seriou:50,op_cluster_ip_turnup:3,correspond:[2,41,3,49,50,8,31,56,9,62,59,14,36,16],element:[62,33,48,13,8,27,35,58,36,16],issu:[1,2,28,6,7,9,14,18,55,29,31,33,34,35,36,38,15,48,49,50,13,52,56,58,59,60],mainten:[62,15,48,56,33,35,36,39],allow:[1,2,3,4,5,6,7,8,9,62,14,16,17,19,24,25,27,29,30,31,33,34,35,36,37,38,39,15,48,49,50,13,55,56,59,60,61],fallback:[8,16],jobqueue_renam:13,elif:49,least:[2,3,5,6,8,9,14,16,55,29,31,35,36,40,41,52,48,50,13,15,56,60],ouput:35,setuptool:[51,28],export_index:13,imperson:54,kbd:56,templatehaskel:28,comma:[28,49,50,13,8,27,35,38],expanduidpool:50,blktap:[8,13],avaibl:50,bunch:4,perfect:29,job_info:8,eui:41,ispec:[35,55],libghc6:51,send:[2,52,49,50,30,15,8,9,35,59,13,14,38],certificateerror:35,early_releas:[35,8],infrastructur:[41,52,50,13,15,57,35,14,16],instance_disk_count:3,purg:13,therefor:[1,2,3,6,9,14,19,55,30,33,34,35,53,44,48,50,13,15,54,24,58,59],submitjob:[49,13],onreadystatechang:8,pinst_cnt:8,crash:[1,2,52,48,7,9,35,36,60,15,55],fourth:35,admin_down:[1,55],drbd0px:7,auto:[27,43,15,48,28,13,7,51,16,56,55,35,60,24],overal:[29,48,50],guess:[55,56],dai:[35,14,49],auth:7,modify_etc_host:[35,8],mention:[53,62,60,48,38,55,56,35,36,14,59,16],op_node_add:3,terminolog:[15,17,56],luinstancemov:35,fingerprint:[7,59,14],front:[35,48,50],bootload:[15,35],ganeti_hooks_phas:3,align:13,strive:24,getclient:34,gnutl:35,writefil:48,somewher:[7,49],xvda1:15,anyth:[13,7,8,56,10,36],mytest:28,unlimit:52,epip:35,masterd:[18,2,52,19,48,49,50,15,31,35,59],c_other:50,disk0_data:7,cluster1:[3,27],tracer:49,subset:[2,19,48,50,13,35,36,55],bump:[35,49],chunk:[48,14,16,30],"try":[13,15,48,19,29,5,51,50,38,7,8,24,56,9,35,59,49,55,36,23],ignore_consist:[3,8],remap:38,networksend:36,consum:[34,35,48],meta:[35,19,55],"static":[2,21,13,8,31,55,56,34,35,59,24],opbackupexport:[14,49],our:[41,48,19,5,49,50,13,7,31,55,35,16],patch:[57,35,51],special:[5,13,15,3,20,6,50,49,8,31,48,34,35,53,37,14],out:[1,2,4,6,7,9,14,24,29,31,52,34,35,36,53,39,41,15,48,49,50,13,44,55,56,59],variabl:[40,25,41,52,3,28,49,50,13,7,31,1,55,56,35,12,24],lock_statu:35,soften:35,reload:[35,28],influenc:[15,40,13],insserv:35,hconfd:35,reboot:[1,44,3,33,50,7,8,56,9,35,60,15,39],master_ip:[3,22],stub:34,suitabl:27,libceph:[51,56],max:[35,8,27,55],hardwar:[40,44,13,7,31,16,56,35,53,58,36,55,15,24],saniti:[4,22,50],name_check:8,red:16,statist:[36,48],clarifi:35,shut:[1,15,49,13,7,8,9,35,14],footprint:[52,35,36],jessica:8,insid:[1,11,12,14,17,19,20,24,29,30,36,37,39,40,41,15,48,49,50,13,55,56,57,58],workflow:[35,55,49,16,13],stripe_s:50,cleartext:8,source_shutdown_timeout:8,undo:[15,50],standalon:[15,7,8,0,35,36],devnot:51,dictionari:[40,41,13,48,8,50,49,22,55,27,34,59,37,61,16],tempt:13,shun:5,thecollectoridentifi:36,afterward:[60,56,13],greedi:39,log:[15,18,62,52,3,61,20,51,50,49,7,8,31,48,56,9,35,59,13,36],indent:[35,13],hidden_o:8,logilab:28,unwant:[7,59,50],could:[2,3,6,7,8,9,62,14,23,19,24,29,30,31,34,35,36,53,40,41,15,48,49,50,13,52,55,56,57,59,60,61],diskread:36,put:[40,15,3,6,49,8,56,35,36,13,60,39],segreg:24,"10gib":35,david:7,counterpart:[19,28],length:[41,15,50,13,8,27,34,14],custom_ndparam:8,organis:3,crl:23,outsid:[41,15,19,48,49,50,13,55,56,57,35,59,60,36,16],serial_consol:56,retain:50,timezon:35,distinguish:[40,4,29,50,13,33,36,19],addnod:[49,50],oob_program:35,softwar:[18,11,41,15,5,49,35,30,51,55,23,56,9,58,59,13,14,36,16],kernel_major:36,st_lvm_pv:8,qualiti:14,echo:[7,8,56],tier:[35,48,24,16],instance_diskn_:3,fignor:28,owner:[2,15,48,6,50,49,27,35,59],minmem:[15,7,35],getmirrorstatu:50,facil:[34,41],cluster_renam:15,prioriti:[13,15,48,6,49,35,37,24],"long":[2,5,7,8,9,11,14,16,24,29,30,31,35,36,40,41,15,48,49,50,13,52,55],singular:16,strict:[35,8,12,13],export_mod:3,licens:16,sep:35,mkdir:56,capac:[15,29,48,50,55,35,60,24,39],messag:[40,1,15,33,20,49,50,30,7,8,18,61,27,9,10,35,36,13,38],basicconstraint:23,togheth:20,attack:[49,50,30,35,59,61],disk_index:[35,8],op_instance_shutdown:3,volume_group:15,termin:[2,15,28,49,13,8,35],"\u03bb":28,"final":[40,52,49,6,30,15,8,56,35,36,13,60,55],tag2:[8,16],ipv4:[35,8,48,14,41],abicloud:16,udp:[35,2,50],shell:[18,15,28,22,8,31,35],gear:[55,30],format_vers:36,rsa:[7,22,14,49,35],simplestor:50,biggest:[24,13],accompani:40,be_:13,enqueu:33,rst:[10,58,51,13,35],repect:3,exactli:[52,3,48,49,50,13,55,56,34,59,14,16],strictli:[48,56,24],blind:50,dt_plain:55,mechan:[44,60,19,29,48,21,50,13,55,56,35,12,14,36,24],dev_typ:50,ben:7,i_pri_up:27,distro:35,bloat:35,pyassert:8,op_instance_renam:3,emac:28,structur:[2,44,3,49,48,6,50,30,52,22,31,55,27,33,34,36,13,16],querynod:[34,35],claim:16,src_key_nam:49,sens:[41,15,19,48,51,50,21,8,55,13,24],becom:[40,2,41,15,29,48,22,50,49,7,8,34,35,59,13,14,36,16,39],bind:[48,51,31,35,59,14],bare:20,stricter:[35,59,61],greater:[35,55,13],querylock:35,unencrypt:59,counter:50,network_nam:[3,8,41],pvcreat:56,deprec:[62,15,4,8,27,35],fail:[3,4,5,6,7,8,9,14,20,51,55,27,29,52,34,35,36,37,38,15,48,49,50,13,44,56,53,60,61],ocfs2:40,interf:49,have:[0,1,2,3,4,5,6,7,8,9,10,62,12,13,14,23,53,18,19,20,21,22,16,24,25,26,27,28,29,30,31,33,34,35,36,37,38,39,40,41,42,43,52,45,46,47,48,49,50,51,15,54,55,56,57,58,59,60,61],tabl:[34,35,48,50,56],close:[48,2,50,13],border:14,paramiko:[35,22,51],instance5:[7,27],rebuild:35,pemfil:30,simplejson:51,min:[27,35,8,55,56],rout:[41,50,7,8,16,56,35,58,59,24],mib:[3,8],ipaddr:51,mid:40,innard:[37,6],contact:[15,48,49,50,13,7],mix:[35,55,50,56,13],unawar:36,disk_usag:8,best:[3,29,2,56,24],which:[1,2,3,4,5,6,7,8,9,53,62,13,14,15,16,17,18,19,20,21,22,24,25,27,28,29,30,31,34,35,36,37,38,39,40,41,44,48,49,50,51,52,54,55,56,57,58,59,60,61],discrep:15,jail:59,viridian:35,singl:[2,3,6,7,9,10,14,16,20,21,22,55,25,27,29,31,34,35,36,37,38,39,52,48,49,50,13,15,54,56,53,60],uppercas:34,nfdhcpd:8,regard:[3,19,5,48,13,7,31,55,35,58,59,60,16],led:[35,48],principl:[35,8],x509extens:23,ghc:[35,51,28],oracl:58,discov:[15,58,49,55,13],admin_up:35,insufficient_resourc:8,size_in_mb:16,gntadmin:48,cipher:30,jul:35,deploi:[15,51,56,13],segment:36,why:[1,2,62,52,6,56,33,35,36,14,61,16],group_network_mod:[3,41],minor_count:56,prove:49,op_node_set_param:[3,6],placement:[41,15,4,48,50,27,35],won:[15,60,4,29,48,6,50,49,7,8,31,36,55,35,58,59,13,14,19,24,39],dens:41,gather:[2,15,29,7,22,35,36,14,16],stronger:[19,48,55],uri:[8,13],node_pip:3,face:[35,19,2,30],inde:35,deni:3,file_driv:[35,8],snapshot:[52,35,14,15,13],yum:51,determin:[40,15,53,19,29,48,6,50,21,33,8,55,9,34,35,59,13,14,36,16],built:[15,19,49,50,30,7,31,33,34,35,36,14,55],constrain:[35,13,24,43],modularli:56,fact:[41,44,3,48,13,52,55,58,59,24],gain:[53,23,13],epydoc:28,"45173e82":7,assert:35,text:[40,51,49,8,31,34,35,36],verbos:[18,26,15,34,54,10,35,36],prof_o:28,ioemu:35,waitforchang:13,ecode_temp_nor:53,op_instance_reboot:3,envisag:48,new_vg_nam:3,requested_nod:27,chicken:19,debat:55,trivial:[52,29,50,13,7,14,15,23],anywai:[29,48,50,13,7,31,56,14,24,39],nonemptystr:8,subjectkeyidentifi:23,redirect:[52,49,50],textual:8,locat:[25,27,15,29,48,49,50,13,56,35,36],nois:35,tapx:59,dom0_max_vcpu:56,wreak:7,threateningli:48,prolong:50,restructur:2,initramf:56,combo:59,ksm:24,spice:[35,8],akward:50,hope:19,d_name:12,devnum:13,mtu:[35,56],contribut:51,example_monitor_host2:56,familiar:[15,62,49,56],pypi:[14,38],pave:13,autom:[15,9,5,49,50,13,7,55,33,10,35,36,23,39],ec018ffe72b8e75bb4d508ed5b6d079c:8,db8:41,hinotifi:51,bandwith:35,increas:[2,52,48,5,51,50,49,7,8,54,55,56,10,35,36,60,15,16],f0fac192:15,op_jobid:37,ganeti_instance_nic0_bridg:3,tbd:5,endless:35,shr:48,enabl:[28,7,8,16,53,18,19,51,22,24,25,56,30,32,35,38,39,41,15,48,50,55,27,59,60],organ:41,twice:[7,28,35],upper:[3,48,49,55,40],approach:[40,1,41,21,13,7,55,39],osp_ns1:49,storage_pool:[40,29],whether:[1,2,3,8,14,23,19,22,55,29,30,34,35,36,37,53,40,52,48,49,50,13,15,27,59,60],cluster_tag:[3,27],integr:[52,48,5,13,15,51,31,0,57,35,36],partit:[42,43,15,19,29,7,0,56,35,36,14,24,39],usb_mous:35,grab:[48,20,21,50,13],view:[1,15,29,48,49,13,35,36,53],conform:[3,8,48],legaci:8,bridge_fd:56,"_total":55,googlecod:56,frame:48,knowledg:[15,59,49,13],orphan:[15,7,55,35],group_nam:[3,8,35],packet:[35,14],displai:[34,35,8,9],malfunct:35,"954bja":7,subresourc:8,full:[3,5,7,8,16,18,21,22,55,27,30,31,35,36,37,39,40,41,15,48,49,50,56,60],multipart:14,paravirtu:[35,56],bytestr:51,rc3:35,xxxxx:41,ssconf_:48,master_candid:[15,3,50,7,8,27],statu:[1,2,3,5,6,7,8,9,14,24,26,29,33,34,35,36,37,38,39,40,43,15,48,50,13,55,27,59],wire:50,extend:[17,1,41,40,15,60,4,29,6,50,21,8,55,9,35,12,49,53,19,24,13],"881a":8,pattern:[28,13],boundari:[48,55,50],cours:[18,15,48,19,29,5,50,13,56,35,36,60,55],tend:[5,55],favor:35,written:[26,13,15,60,49,48,22,50,6,8,35,2,36,30,14],crude:55,progress:[52,6,13,15,16,36,24],neither:[15,49,50,58,14,55],lun:40,tent:52,sole:16,amem:55,secondary_ip:[15,8,27],extstorag:[15,35,8,0,40],parseabl:35,reinforc:23,syncer:[35,55],sow:[44,55,9],entir:[2,52,48,29,28,49,13,15,31,55,38,24],rare:[35,55,16,13],previous_log_seri:8,group:[1,62,3,28,6,7,8,9,10,17,18,19,21,24,27,29,34,35,36,53,39,40,41,15,46,48,49,50,13,55,56,59,60],xmlreq:8,dmz:13,haddock:28,ipolici:[19,8,55,27,35,24],revers:9,succss:14,plugin:[62,15,48,50,53,36,55],admin:[27,15,50,13,7,56,33,35,36,60],goal:[40,48,49,50,13,22,31,61,55],etx:13,equal:[25,62,3,48,8,50,13,22,55,27,34,59,14,16],vgcreat:56,thereaft:35,chosen:[62,48,49,50,13,7,31,55,35,14,16],admit:35,instanc:[0,1,11,3,4,5,7,8,9,53,62,12,14,15,23,17,18,19,20,21,51,16,24,25,27,29,30,31,33,34,35,36,37,38,39,40,41,43,44,45,46,48,49,50,13,52,54,55,56,57,58,59,60],grain:[35,13],equat:55,ialloc:[40,62,41,15,27,4,29,48,50,13,7,8,31,54,56,35,53,19,55],vol_nam:40,freeli:48,sigterm:35,minumum:36,noshutdown:15,comment:[15,8,56,9,35,55],localcount:36,job_status_wait:6,primaryinst:34,hyphen:3,arriv:[14,49,50],chmod:56,vgreduc:[15,7,13,50,35],dest_cert_fil:49,solv:[19,13,7,35,60,24],rpc:[1,2,13,52,48,19,61,20,49,50,30,0,22,31,60,35,59,37,14,36,39],free_disk:27,pkgdatadir:35,diskwrit:36,rpm:9,uid:[35,59,8,50],quit:[2,15,29,6,50,49,61,24],vgname:56,evalu:[48,6,50,49,36,24],yaml:[35,28],addition:[40,15,4,48,8,50,13,51,55,9,34,35,16],libdir:[35,18,27],compos:[29,13],insuffici:24,compon:[25,2,27,15,3,28,48,50,7,56,33,36,60,55],json:[18,2,41,15,4,33,8,50,30,22,51,27,9,26,35,13,36,55],besid:[15,29,13,27,35,38,55],treat:[49,6,8,9,35,13,55],vm_capabl:[15,3,48,8,31,27,35],unconfigur:36,remove_reserved_ip:8,doabl:[31,55],partial:[25,42,43,15,48,5,50,13,0,55,35,36,14,53,24],runlevel:7,wiomilli:36,presenc:[41,29,5,50,8,35,58,60],instance_nicn_:3,vmware:[58,16],deliber:6,behalf:2,togeth:[25,2,15,48,5,49,50,30,55,35,36,13,14,24,39],minim:[35,36,56,13],criterion:55,skew:35,veth:49,hbal:[19,7,31,35,53,24],replic:[40,2,27,15,3,49,50,13,7,56,35,59,36,23,39],multi:[62,27,52,4,29,48,50,15,8,55,56,9,35,14,16,39],novel:48,autoarchivejob:49,usermode_help:[35,56],plain:[15,60,19,29,48,50,8,16,27,55,35,36,14,24,39],"12gb":7,hyperlink:[58,16],harder:[15,49,24,13],migrate_cleanup:3,implic:[59,54],harden:59,defin:[1,2,3,6,8,9,10,14,23,19,51,22,24,31,35,36,38,40,41,15,48,49,50,13,55,27,59],ganeti_:3,suffix:13,disk_typ:16,charaterist:60,glossari:[17,44,15,51,56,35],load_certificate_request:23,rbd:[15,19,51,8,56,57,35,36,60,39],redistributeconfig:50,layer:[40,48,31,35,36,37,24],customiz:[35,50],helper:[25,41,8,56,9,35,53],almost:[35,14,48],mac_prefix:8,mtime:[8,36],customis:[17,15,3,28,50,31,35,55],unclean:35,pip:[34,8],middlewar:30,archiv:[15,48,6,13,56,35,16],motiv:62,substanti:24,lightweight:50,fiddl:56,incom:[48,14],revis:[18,50],unneed:[35,13],halt:[38,9],dts_int_mirror:40,ing:8,welcom:[17,35],parti:[29,49,54,35,59,14,23],balloon:[44,8,55,56,35,24],roman:35,member:[15,53,48,21],python:[18,2,41,52,3,28,6,50,30,7,8,31,51,56,34,35,13,14,38],ganetiapierror:35,largest:29,fourcc:50,codepath:[35,13],"__version__":8,incl:[17,19,11,42,43],infer:[48,41],hv_kernel_arg:35,difficult:[2,15,20,6,50,13,31,23,35,14,24],bes_paramet:13,oneof:8,immedi:[1,2,15,6,50,13,36,24],slave:[40,56],context:[35,55,49,23,50],denot:[15,3,29,28,22,50,13,8,48,27,35],vpcu_ratio:24,keepal:[35,49],target_group:[3,27],file_storage_dir:[35,8],struct:36,handi:9,phy:7,nic_typ:18,sooner:35,usecas:58,auto_bal:[35,8,18,16,13],decoupl:35,login:[7,59],reclam:29,cosmet:35,audit:36,dmesg:7,off:[62,27,15,48,5,49,13,8,56,9,35,60,38,39],disapear:60,nevertheless:59,sre_pattern:34,colour:34,reboot_behavior:35,well:[1,2,28,6,8,14,16,19,21,51,55,5,29,30,31,35,36,53,39,41,52,48,49,50,13,15,54,56,57,59,60],versatil:6,thought:60,versionid:16,exampl:[0,62,3,28,6,7,8,9,13,14,15,23,17,18,19,21,22,16,24,25,27,5,30,31,33,34,35,36,37,38,40,41,44,48,49,50,51,52,55,56,57,58,60],command:[1,2,3,4,28,7,8,9,12,14,23,18,51,22,16,55,25,27,31,33,34,35,36,38,40,15,48,49,50,13,0,54,56,57,58,59,61],achiev:[41,15,29,50,35,12,55],choos:[40,15,48,29,5,49,50,13,56,35,39],undefin:9,is_primari:7,stripecount:[35,55],piec:[52,50,13,33,30,60,16],latest:[17,4,13,56,35],regularli:[44,15],requestunuseduid:50,test3:41,test2:41,newest:58,paus:[10,35,18,15],export_vers:[8,16],less:[41,15,27,48,6,49,7,8,31,55,56,34,35,13,14,60,23],xen:[1,3,7,8,16,18,21,51,24,56,35,36,38,15,48,50,13,44,55,27,57,58,59],e90739d625b:35,tcp:[15,49,7,8,54,35,14],detail:[1,2,28,6,7,8,9,62,12,14,15,16,17,18,21,51,55,27,5,30,33,35,36,38,39,41,44,48,50,13,52,54,56,58,59,60],should:[2,3,28,6,7,8,9,10,62,12,14,15,16,18,19,20,21,22,24,25,27,5,29,30,31,34,35,36,37,38,39,40,41,44,48,49,50,13,52,54,55,56,57,58,53,60],trust:[35,59,49],interim:19,heavili:[40,2,48,5,49,50,55],simultan:[40,29,14,39],drdb:16,ganetisect:16,web:[8,49,56,30],rapid:2,amd64:[50,56],wee:13,tight:[22,50],cert_fil:49,point:[2,3,4,7,8,9,10,12,14,16,17,18,19,29,33,35,36,15,48,49,50,13,52,56,60,61],makefil:[35,28],aris:[36,14,49],script:[3,28,7,8,10,14,17,18,51,22,25,27,5,31,35,36,40,41,15,48,49,50,13,56,57,58,59,61],add:[1,3,4,5,6,7,8,9,12,16,18,19,21,22,24,25,27,31,34,35,36,40,41,44,48,49,50,13,15,55,56,57,59,60],src_cert:49,remote_raid1:[35,18,13],newnam:48,sweet:13,freshest:36,iputil:51,suit:[48,29,28,50,13,27,35,55],adh:30,vdi:16,always_failov:35,ganetilockmanag:53,unprivileg:31,rememb:[35,8,18,55,50],minimalist:58,dest:[51,54],os_vari:35,fyi:[50,9],os_nam:8,arguabl:48,effortless:16,punctuat:34,realiz:[15,57],five:[15,35,48],know:[1,52,4,6,50,49,15,27,35,34,58,36,13,60,19,55],burden:49,redesign:50,realis:[15,1],runcmd:49,imem:55,mkstemp:49,toe:13,python2:7,insert:[53,8,36,56],storage_typ:[8,29,19],resid:[40,20,8,56,35,61],ganeti_instance_secondari:3,success:[40,62,15,3,4,29,5,6,50,13,8,54,27,35,14,60,16],request:[1,2,4,6,7,8,62,14,23,53,19,24,26,29,30,33,34,35,36,38,15,48,49,50,13,52,55,27,59,60,61],pinst_list:8,mergedwrit:36,ganeti_hostnam:25,instance_nic_count:3,opqueri:35,stagger:50,necessari:[25,31,15,60,6,50,49,8,18,56,34,35,53,13,14,16,39],lose:[18,15,50,13,35,60,39],async:13,resiz:[15,40,49],be_typ:13,soft:[15,7,8,35],page:[17,15,44,50,13,7,8,32,55,56,35,36,38,24],backlog:36,unreach:[2,3,51,7,8,34,35],exceed:9,didn:[15,49,50,54,35,14],responsetext:8,your_network:56,gluster:57,replace_on_secondari:[7,8],hail:[25,7,31,27,35,55,39],rtl8139:13,contin:14,"export":[11,3,28,6,7,8,14,23,17,51,16,55,27,5,29,30,31,33,35,36,40,15,45,48,49,50,13,54,56,57,58,53,60],mlockal:[35,50],guarante:[13,15,3,4,49,6,50,30,8,55,35,12,37,16],meta_flush:50,transport:[31,14,13],tmp:[25,49],guid:[17,27,15,28,7,51,56,35],leaf:[49,13],deboostrap:50,lead:[1,4,48,50,38,51,56,34,35,37,19,55],esp:30,broad:55,avoid:[1,48,19,20,6,35,49,55,50,9,34,58,59,13,14,53,24,39],octet:[7,14],thank:35,overlap:[48,24],ether:[35,56],estim:[19,50,13,7,31,35,14,55],leav:[1,2,15,48,20,49,13,7,56,14,24],fe1e:56,remedei:51,launch:[15,35,31,27,33],speak:[29,13,30],symbol:[41,56,13],mode:[62,3,4,5,6,7,8,16,21,55,27,29,35,36,53,39,40,41,15,48,49,50,13,44,56,57,58,59,61],"95f1":15,encourag:50,investig:[52,60,49,7,31,35,58,14,16],"68f9":8,slight:[4,50],imaginari:8,usag:[15,25,41,44,19,48,51,50,49,34,8,54,16,56,55,10,35,36,13,38,24],symlink:[50,51,56,35,59,61],vhost:35,host:[3,28,7,8,16,22,24,25,29,31,35,36,39,40,41,15,48,49,13,56,57,59],toggl:[15,13],xenstor:7,obei:55,although:[40,41,48,49,50,55,56,16],so_peercr:35,op_query_:13,sphinx:[35,28],stage:[13,60,6,56,38],op_instance_reinstal:3,about:[1,2,3,6,8,12,14,16,17,18,19,24,27,29,31,33,34,35,36,37,39,40,15,48,49,50,13,54,55,56,58,59,60],pypars:[35,51],toolchain:35,socket:[2,44,48,49,30,52,35,59,13,14,36],column:34,briefli:24,fear:56,discard:[48,49],idisk_param:40,"0d7d407c":8,addinst:50,synctarget:13,own:[1,2,16,22,51,23,24,29,31,33,35,36,41,15,48,49,50,13,55,56,59,61],reserved_memori:27,submitt:[53,37,14],bashrc:28,easy_instal:[51,28],automat:[1,5,7,8,9,14,23,17,18,21,51,16,55,27,35,36,39,41,15,48,50,13,44,56,58,59,60],ext3:7,hooks_vers:3,automak:[35,28],doesnt:3,wait_for_sync:[8,53],mere:22,virtualbox:[58,16],merg:[10,17,36,50,35],x509_key_nam:8,oversubscription_ratio:55,curlopt_encod:14,transfer:[5,49,30,51,14,38],resource_not_uniqu:8,rotat:56,secret:[15,48,49,13,54,35,59,14],trigger:[48,5,50,33,35,36,60,39],downgrad:[35,18],inner:34,signx509certif:49,"var":[18,15,3,28,51,13,8,31,54,56,35],lds_drbd:50,extra_:28,object_typ:3,favorit:8,"function":[1,28,7,8,14,23,17,19,20,21,51,16,55,29,30,31,34,35,36,37,53,39,40,41,43,52,48,49,50,13,15,56,57,59],mtotal:[34,7,8,19,56],beta1:35,unexpect:35,beta0:35,wed:35,node_r:35,"100gib":55,remote_import:49,backtrac:35,uninstal:15,hooks_phas:3,eas:[40,50,13,56,35,36],highest:[15,48,50,13,8,60,24],node_sip:3,suppli:[35,51,49,50,55],succe:[3,5,48,50,13,8,37,38,55,39],made:[18,41,27,48,19,28,21,50,30,56,34,35,36,13,14,61,55],wise:[48,5],googlecom:14,temp:9,rc4:35,wish:[58,56],rc6:35,rc0:35,rc1:[35,18],rc2:[35,18],relativejobid:8,asynchron:[48,37,14,49,13],record:[44,9,15,8,55,33,35,24],below:[1,62,5,7,8,14,23,17,18,51,22,24,29,34,35,38,39,40,15,48,49,50,13,54,55,27,59],limit:[1,2,6,8,9,14,16,19,21,24,25,29,31,34,35,37,38,40,52,48,49,13,15,55,58],indefinit:[15,60],"8e950e3cec6854b0181fbc3a6058657701f2d458":7,lvm:[40,2,15,19,29,50,13,7,8,55,56,35,60,24],vol_metadata:40,start_t:8,trail:[42,43,33,35,36,14],"750gb":7,ssconf_online_nod:18,multip:55,astng:28,pin:[35,51,38,24,45],"int":[35,8,48,55],ganeti_instance_nic0_ip:3,your_gatewai:56,pid:[15,35,49],meaningless:55,"_jobqueuework":[20,49],twist:[35,6,13],st_lvm_vg:8,implement:[0,1,2,3,4,5,6,8,53,11,12,14,15,23,17,19,21,51,16,24,25,30,31,33,34,35,36,38,39,40,41,42,43,44,45,46,47,48,49,50,13,52,55,27,57,59,60,62],ini:[58,16],luinstancecr:35,op_group_add:3,flight:55,pygrub:15,a1aa:7,probabl:[15,5,49,50,13,58,38,39],otherwis:[7,8,9,10,16,20,22,55,56,29,31,35,36,15,48,49,50,13,54,27,57,60],tricki:[31,24],mutual:[10,50,13],xenbu:7,getnodelist:50,acloc:35,guidanc:17,vol_siz:40,boot:[18,44,13,7,51,56,35,15],"_backend_typ":13,virtual:[1,3,8,14,15,16,17,24,25,31,35,38,40,0,45,48,49,50,13,44,55,56,58],ssconf:[15,3,48,50,13,22,35],other:[1,2,3,4,5,6,7,8,9,10,11,12,14,15,23,17,18,19,20,21,51,16,24,25,27,28,29,30,31,33,34,35,36,37,38,39,40,41,44,48,49,50,13,52,53,55,56,57,58,59,60,61],bool:[34,8,55],futur:[28,8,14,23,18,19,21,16,24,5,34,35,36,37,39,41,15,48,49,50,13,58,60],branch:35,varieti:[37,2],hpc:28,opinstancecr:[4,49,13,8,35,53,14],instance_diskn_mod:3,valuenon:8,fmem:55,stat:[35,36],dismiss:37,prealloc:35,opqueryfield:35,setinfo:40,on_crash:1,op_cluster_set_param:3,throughout:40,end_t:8,debian:[18,3,28,51,50,7,8,56,35],e1000:35,experienc:[17,51],qdisc:56,quicker:16,"__acquire_inn":53,pycrypto:35,reliabl:[52,29,13,15,31,35],collis:35,rule:[40,18,41,3,29,49,13,7,56,59,53],cpu:[15,27,44,3,29,48,51,7,8,45,55,24,56,35,58,36,14,38,16],unknown_ent:8,portion:16,emerg:[35,48,38,9],opmodifyinst:13,keycertsign:23},objtypes:{},titles:["Ganeti 2.7 design","Detection of user-initiated shutdown from inside an instance","Ganeti daemons refactoring","Ganeti customisation using hooks","Ganeti Bulk Create","Linux HA integration","Chained jobs","Ganeti walk-through","Ganeti remote API","Ganeti Node OOB Management Framework","Merging clusters","Design document drafts","Design for adding UUID and name to devices","Ganeti 2.0 design","Design for import/export version 2","Ganeti administrator’s guide","Ganeti Instance Import/Export using Open Virtualization Format","Welcome to Ganeti’s documentation!","Upgrade notes","Management of storage types and disk templates, incl. storage space reporting","Ganeti locking","Support for Open vSwitch","Design for adding a node to a cluster","Design for a X509 Certificate Authority","Partitioned Ganeti","Virtual cluster support","/","Ganeti automatic instance allocation","Developer notes","Synchronising htools to Ganeti 2.3","Design for replacing Ganeti’s HTTP server","Design for virtual clusters support","Man pages","Ganeti reason trail","Query version 2 design","News","Ganeti monitoring agent","Submitting jobs from logical units","Ganeti CPU Pinning","HRoller tool","Ganeti shared storage support","Network management","Ganeti 2.9 design","Ganeti 2.8 design","Glossary","Ganeti 2.6 design","Ganeti 2.5 design","Ganeti 2.4 design","Ganeti 2.3 design","Ganeti 2.2 design","Ganeti 2.1 design","Ganeti quick installation guide","Splitting the query and job execution paths","Design for parallelized instance creations and opportunistic locking","Moving instances between clusters","Resource model changes","Ganeti installation tutorial","GlusterFS Ganeti support","OVF converter","Security in Ganeti","Instance auto-repair","Design for executing commands via RPC","Moving instances accross node groups"],objnames:{},filenames:["design-2.7","design-internal-shutdown","design-daemons","hooks","design-bulk-create","design-linuxha","design-chained-jobs","walkthrough","rapi","design-oob","cluster-merge","design-draft","design-device-uuid-name","design-2.0","design-impexp2","admin","design-ovf-support","index","upgrade","design-storagetypes","locking","design-openvswitch","design-node-add","design-x509-ca","design-partitioned","virtual-cluster","monitoring-query-format","iallocator","devnotes","design-htools-2.3","design-http-server","design-virtual-clusters","manpages","design-reason-trail","design-query2","news","design-monitoring-agent","design-lu-generated-jobs","design-cpu-pinning","design-hroller","design-shared-storage","design-network","design-2.9","design-2.8","glossary","design-2.6","design-2.5","design-2.4","design-2.3","design-2.2","design-2.1","install-quick","design-query-splitting","design-opportunistic-locking","move-instance","design-resource-model","install","design-glusterfs-ganeti-support","ovfconverter","security","design-autorepair","design-restricted-commands","design-multi-reloc"]})ganeti-2.9.3/doc/html/design-partitioned.html0000644000000000000000000005735212271443667021212 0ustar00rootroot00000000000000 Partitioned Ganeti — Ganeti 2.9.3 documentation

Partitioned Ganeti¶

Current state and shortcomings¶

Currently Ganeti can be used to easily share a node between multiple virtual instances. While it’s easy to do a completely “best effort” sharing it’s quite harder to completely reserve resources for the use of a particular instance. In particular this has to be done manually for CPUs and disk, is implemented for RAM under Xen, but not under KVM, and there’s no provision for network level QoS.

Proposed changes¶

We want to make it easy to partition a node between machines with exclusive use of hardware resources. While some sharing will anyway need to happen (e.g. for operations that use the host domain, or use resources, like buses, which are unique or very scarce on host systems) we’ll strive to maintain contention at a minimum, but won’t try to avoid all possible sources of it.

Exclusive use of disks¶

exclusive_storage is a new node parameter. When it’s enabled, Ganeti will allocate entire disks to instances. Though it’s possible to think of ways of doing something similar for other storage back-ends, this design targets only plain and drbd. The name is generic enough in case the feature will be extended to other back-ends. The flag value should be homogeneous within a node-group; cluster-verify will report any violation of this condition.

Ganeti will consider each physical volume in the destination volume group as a host disk (for proper isolation, an administrator should make sure that there aren’t multiple PVs on the same physical disk). When exclusive_storage is enabled in a node group, all PVs in the node group must have the same size (within a certain margin, say 1%, defined through a new parameter). Ganeti will check this condition when the exclusive_storage flag is set, whenever a new node is added and as part of cluster-verify.

When creating a new disk for an instance, Ganeti will allocate the minimum number of PVs to hold the disk, and those PVs will be excluded from the pool of available PVs for further disk creations. The underlying LV will be striped, when striping is allowed by the current configuration. Ganeti will continue to track only the LVs, and query the LVM layer to figure out which PVs are available and how much space is free. Yet, creation, disk growing, and free-space reporting will ignore any partially allocated PVs, so that PVs won’t be shared between instance disks.

For compatibility with the DRBD template and to take into account disk variability, Ganeti will always subtract 2% (this will be a parameter) from the PV space when calculating how many PVs are needed to allocate an instance and when nodes report free space.

The obvious target for this option is plain disk template, which doesn’t provide redundancy. An administrator can still provide resilience against disk failures by setting up RAID under PVs, but this is transparent to Ganeti.

Spindles as a resource¶

When resources are dedicated and there are more spindles than instances on a node, it is natural to assign more spindles to instances than what is strictly needed. For this reason, we introduce a new resource: spindles. A spindle is a PV in LVM. The number of spindles required for a disk of an instance is specified together with the size. Specifying the number of spindles is possible only when exclusive_storage is enabled. It is an error to specify a number of spindles insufficient to contain the requested disk size.

When exclusive_storage is not enabled, spindles are not used in free space calculation, in allocation algorithms, and policies. When it’s enabled, hspace, hbal, and allocators will use spindles instead of disk size for their computation. For each node, the number of all the spindles in every LVM group is recorded, and different LVM groups are accounted separately in allocation and balancing.

There is already a concept of spindles in Ganeti. It’s not related to any actual spindle or volume count, but it’s used in spindle_use to measure the pressure of an instance on the storage system and in spindle_ratio to balance the I/O load on the nodes. When exclusive_storage is enabled, these parameters as currently defined won’t make any sense, so their meaning will be changed in this way:

  • spindle_use refers to the resource, hence to the actual spindles (PVs in LVM), used by an instance. The values specified in the instance policy specifications are compared to the run-time numbers of spindle used by an instance. The spindle_use back-end parameter will be ignored.
  • spindle_ratio in instance policies and spindle_count in node parameters are ignored, as the exclusive assignment of PVs already implies a value of 1.0 for the first, and the second is replaced by the actual number of spindles.

When exclusive_storage is disabled, the existing spindle parameters behave as before.

Dedicated CPUs¶

vpcu_ratio can be used to tie the number of VCPUs to the number of CPUs provided by the hardware. We need to take into account the CPU usage of the hypervisor. For Xen, this means counting the number of VCPUs assigned to Domain-0.

For KVM, it’s more difficult to limit the number of CPUs used by the node OS. cgroups could be a solution to restrict the node OS to use some of the CPUs, leaving the other ones to instances and KVM processes. For KVM, the number of CPUs for the host system should also be a hypervisor parameter (set at the node group level).

Dedicated RAM¶

Instances should not compete for RAM. This is easily done on Xen, but it is tricky on KVM.

Xen¶

Memory is already fully segregated under Xen, if sharing mechanisms (transcendent memory, auto ballooning, etc) are not in use.

KVM¶

Under KVM or LXC memory is fully shared between the host system and all the guests, and instances can even be swapped out by the host OS.

It’s not clear if the problem can be solved by limiting the size of the instances, so that there is plenty of room for the host OS.

We could implement segregation using cgroups to limit the memory used by the host OS. This requires finishing the implementation of the memory hypervisor status (set at the node group level) that changes how free memory is computed under KVM systems. Then we have to add a way to enforce this limit on the host system itself, rather than leaving it as a calculation tool only.

Another problem for KVM is that we need to decide about the size of the cgroup versus the size of the VM: some overhead will in particular exist, due to the fact that an instance and its encapsulating KVM process share the same space. For KVM systems the physical memory allocatable to instances should be computed by subtracting an overhead for the KVM processes, whose value can be either statically configured or set in a hypervisor status parameter.

NUMA¶

If instances are pinned to CPUs, and the amount of memory used for every instance is proportionate to the number of VCPUs, NUMA shouldn’t be a problem, as the hypervisors allocate memory in the appropriate NUMA node. Work is in progress in Xen and the Linux kernel to always allocate memory correctly even without pinning. Therefore, we don’t need to address this problem specifically; it will be solved by future versions of the hypervisors or by implementing CPU pinning.

Constrained instance sizes¶

In order to simplify allocation and resource provisioning we want to limit the possible sizes of instances to a finite set of specifications, defined at node-group level.

Currently it’s possible to define an instance policy that limits the minimum and maximum value for CPU, memory, and disk usage (and spindles and any other resource, when implemented), independently from each other. We extend the policy by allowing it to contain more occurrences of the specifications for both the limits for the instance resources. Each specification pair (minimum and maximum) has a unique priority associated to it (or in other words, specifications are ordered), which is used by hspace (see below). The standard specification doesn’t change: there is one for the whole cluster.

For example, a policy could be set up to allow instances with this constraints:

  • between 1 and 2 CPUs, 2 GB of RAM, and between 10 GB and 400 GB of disk space;
  • 4 CPUs, 4 GB of RAM, and between 10 GB and 800 GB of disk space.

Then, an instance using 1 CPU, 2 GB of RAM and 50 GB of disk would be legal, as an instance using 4 CPUs, 4 GB of RAM, and 20 GB of disk, while an instance using 2 CPUs, 4 GB of RAM and 40 GB of disk would be illegal.

Ganeti will refuse to create (or modify) instances that violate instance policy constraints, unless the flag --ignore-ipolicy is passed.

While the changes needed to check constraint violations are straightforward, hspace behavior needs some adjustments for tiered allocation. hspace will start to allocate instances using the maximum specification with the highest priority, then it will try to lower the most constrained resources (without breaking the policy) before moving to the second highest priority, and so on.

For consistent results in capacity calculation, the specifications inside a policy should be ordered so that the biggest specifications have the highest priorities. Also, specifications should not overlap. Ganeti won’t check nor enforce such constraints, though.

Implementation order¶

We will implement this design in the following order:

  • Exclusive use of disks (without spindles as a resource)
  • Constrained instance sizes
  • Spindles as a resource
  • Dedicated CPU and memory

In this way have always new features that are immediately useful. Spindles as a resource are not needed for correct capacity calculation, as long as allowed disk sizes are multiples of spindle size, so it’s been moved after constrained instance sizes. If it turns out that it’s easier to implement dedicated disks with spindles as a resource, then we will do that.

Possible future enhancements¶

This section briefly describes some enhancements to the current design. They may require their own design document, and must be re-evaluated when considered for implementation, as Ganeti and the hypervisors may change substantially in the meantime.

Network bandwidth¶

A new resource is introduced: network bandwidth. An administrator must be able to assign some network bandwidth to the virtual interfaces of an instance, and set limits in instance policies. Also, a list of the physical network interfaces available for Ganeti use and their maximum bandwidth must be kept at node-group or node level. This information will be taken into account for allocation, balancing, and free-space calculation.

An additional enhancement is Ganeti enforcing the values set in the bandwidth resource. This can be done by configuring limits for example via openvswitch or normal QoS for bridging or routing. The bandwidth resource represents the average bandwidth usage, so a few new back-end parameters are needed to configure how to deal with bursts (they depend on the actual way used to enforce the limit).

CPU pinning¶

In order to avoid unwarranted migrations between CPUs and to deal with NUMA effectively we may need CPU pinning. CPU scheduling is a complex topic and still under active development in Xen and the Linux kernel, so we wont’ try to outsmart their developers. If we need pinning it’s more to have predictable performance than to get the maximum performance (which is best done by the hypervisor), so we’ll implement a very simple algorithm that allocates CPUs when an instance is assigned to a node (either when it’s created or when it’s moved) and takes into account NUMA and maybe CPU multithreading. A more refined version might run also when an instance is deleted, but that would involve reassigning CPUs, which could be bad with NUMA.

Overcommit for RAM and disks¶

Right now it is possible to assign more VCPUs to the instances running on a node than there are CPU available. This works as normally CPU usage on average is way below 100%. There are ways to share memory pages (e.g. KSM, transcendent memory) and disk blocks, so we could add new parameters to overcommit memory and disks, similar to vcpu_ratio.

ganeti-2.9.3/doc/html/_static/0000755000000000000000000000000012271443673016142 5ustar00rootroot00000000000000ganeti-2.9.3/doc/html/_static/up-pressed.png0000644000000000000000000000056411644410761020737 0ustar00rootroot00000000000000‰PNG  IHDRóÿasRGB®ÎébKGDùC» pHYs × ×B(›xtIMEÚ ,ˆ±ZeôIDAT8ËÍ“¿jAÆ—»*ÿW¥KkåØÙ-,ÓÙBòy‘@Ò- ÛÙˆ/`cáXYh!6jÎf GrOlXvØùvæûf¸k2±!ûóp!GOOÔ² &z·®f 6|M ©~¥%‘Ï`]*ð äΛM]K Õ‰úËZĆ1Eé¹rÅ%èȶÀc…¼ØmçÍ1Æ` -1) start = i; }); start = Math.max(start - 120, 0); var excerpt = ((start > 0) ? '...' : '') + $.trim(text.substr(start, 240)) + ((start + 240 - text.length) ? '...' : ''); var rv = $('
').text(excerpt); $.each(hlwords, function() { rv = rv.highlightText(this, 'highlighted'); }); return rv; } /** * Porter Stemmer */ var Stemmer = function() { var step2list = { ational: 'ate', tional: 'tion', enci: 'ence', anci: 'ance', izer: 'ize', bli: 'ble', alli: 'al', entli: 'ent', eli: 'e', ousli: 'ous', ization: 'ize', ation: 'ate', ator: 'ate', alism: 'al', iveness: 'ive', fulness: 'ful', ousness: 'ous', aliti: 'al', iviti: 'ive', biliti: 'ble', logi: 'log' }; var step3list = { icate: 'ic', ative: '', alize: 'al', iciti: 'ic', ical: 'ic', ful: '', ness: '' }; var c = "[^aeiou]"; // consonant var v = "[aeiouy]"; // vowel var C = c + "[^aeiouy]*"; // consonant sequence var V = v + "[aeiou]*"; // vowel sequence var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 var s_v = "^(" + C + ")?" + v; // vowel in stem this.stemWord = function (w) { var stem; var suffix; var firstch; var origword = w; if (w.length < 3) return w; var re; var re2; var re3; var re4; firstch = w.substr(0,1); if (firstch == "y") w = firstch.toUpperCase() + w.substr(1); // Step 1a re = /^(.+?)(ss|i)es$/; re2 = /^(.+?)([^s])s$/; if (re.test(w)) w = w.replace(re,"$1$2"); else if (re2.test(w)) w = w.replace(re2,"$1$2"); // Step 1b re = /^(.+?)eed$/; re2 = /^(.+?)(ed|ing)$/; if (re.test(w)) { var fp = re.exec(w); re = new RegExp(mgr0); if (re.test(fp[1])) { re = /.$/; w = w.replace(re,""); } } else if (re2.test(w)) { var fp = re2.exec(w); stem = fp[1]; re2 = new RegExp(s_v); if (re2.test(stem)) { w = stem; re2 = /(at|bl|iz)$/; re3 = new RegExp("([^aeiouylsz])\\1$"); re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); if (re2.test(w)) w = w + "e"; else if (re3.test(w)) { re = /.$/; w = w.replace(re,""); } else if (re4.test(w)) w = w + "e"; } } // Step 1c re = /^(.+?)y$/; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; re = new RegExp(s_v); if (re.test(stem)) w = stem + "i"; } // Step 2 re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; suffix = fp[2]; re = new RegExp(mgr0); if (re.test(stem)) w = stem + step2list[suffix]; } // Step 3 re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; suffix = fp[2]; re = new RegExp(mgr0); if (re.test(stem)) w = stem + step3list[suffix]; } // Step 4 re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; re2 = /^(.+?)(s|t)(ion)$/; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; re = new RegExp(mgr1); if (re.test(stem)) w = stem; } else if (re2.test(w)) { var fp = re2.exec(w); stem = fp[1] + fp[2]; re2 = new RegExp(mgr1); if (re2.test(stem)) w = stem; } // Step 5 re = /^(.+?)e$/; if (re.test(w)) { var fp = re.exec(w); stem = fp[1]; re = new RegExp(mgr1); re2 = new RegExp(meq1); re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) w = stem; } re = /ll$/; re2 = new RegExp(mgr1); if (re.test(w) && re2.test(w)) { re = /.$/; w = w.replace(re,""); } // and turn initial Y back to y if (firstch == "y") w = firstch.toLowerCase() + w.substr(1); return w; } } /** * Search Module */ var Search = { _index : null, _queued_query : null, _pulse_status : -1, init : function() { var params = $.getQueryParameters(); if (params.q) { var query = params.q[0]; $('input[name="q"]')[0].value = query; this.performSearch(query); } }, loadIndex : function(url) { $.ajax({type: "GET", url: url, data: null, success: null, dataType: "script", cache: true}); }, setIndex : function(index) { var q; this._index = index; if ((q = this._queued_query) !== null) { this._queued_query = null; Search.query(q); } }, hasIndex : function() { return this._index !== null; }, deferQuery : function(query) { this._queued_query = query; }, stopPulse : function() { this._pulse_status = 0; }, startPulse : function() { if (this._pulse_status >= 0) return; function pulse() { Search._pulse_status = (Search._pulse_status + 1) % 4; var dotString = ''; for (var i = 0; i < Search._pulse_status; i++) dotString += '.'; Search.dots.text(dotString); if (Search._pulse_status > -1) window.setTimeout(pulse, 500); }; pulse(); }, /** * perform a search for something */ performSearch : function(query) { // create the required interface elements this.out = $('#search-results'); this.title = $('

' + _('Searching') + '

').appendTo(this.out); this.dots = $('').appendTo(this.title); this.status = $('

').appendTo(this.out); this.output = $('