backupchecker-1.9/0000755000000000000000000000000013073706716014125 5ustar rootroot00000000000000backupchecker-1.9/man/0000755000000000000000000000000013073706716014700 5ustar rootroot00000000000000backupchecker-1.9/man/backupchecker.10000664000000000000000000001056113073704736017561 0ustar rootroot00000000000000'\" t .\" Title: backupchecker .\" Author: Carl Chenet .\" Generator: DocBook XSL Stylesheets v1.78.1 .\" Date: 02/27/2015 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" .TH "BACKUPCHECKER" "1" "02/27/2015" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" http://bugs.debian.org/507673 .\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .\" ----------------------------------------------------------------- .\" * MAIN CONTENT STARTS HERE * .\" ----------------------------------------------------------------- .SH "NAME" backupchecker \- fully automated backup checker .SH "SYNOPSIS" .sp backupchecker [\-c DIR] [\-l FILE] [\-G] ARG1 ARG2 .SH "DESCRIPTION" .sp Backup Checker parses backups (archives and file tree) to perform several different checks in order to verify your backup integrity and its associated content\&. .SH "OPTIONS" .PP \fB\-c \fR\fB\fIDIR\fR\fR\fB, \-\-configpath \fR\fB\fIDIR\fR\fR .RS 4 the path to the configurations .RE .PP \fB\-C \fR\fB\fIDIR\fR\fR\fB, \-\-output\-conf\-dir \fR\fB\fIDIR\fR\fR .RS 4 the directory to store the configuration file .RE .PP \fB\-d \fR\fB\fIDELIMITER\fR\fR\fB, \-\-delimiter \fR\fB\fIDELIMITER\fR\fR .RS 4 delimiter of the fields for the list of files .RE .PP \fB\-E \fR\fB\fIFILE\fR\fR\fB, \-\-exceptions\-file \fR\fB\fIFILE\fR\fR .RS 4 delimiter of the fields for the list of files .RE .PP \fB\-g, \-\-gen\-list\fR .RS 4 generate a list of files inside a backup .RE .PP \fB\-G, \-\-gen\-full\fR .RS 4 generate the configuration file and the list of files for the backup .RE .PP \fB\-H, \-\-hashes\fR .RS 4 generate the hash sum of each encountered file in the backup .RE .PP \fB\-\-hashtype \fR\fB\fIHASHTYPE\fR\fR .RS 4 the type of the hash sum to use while generating configurations for the archive .RE .PP \fB\-l \fR\fB\fIFILE\fR\fR\fB, \-\-log \fR\fB\fIFILE\fR\fR .RS 4 the log file .RE .PP \fB\-L \fR\fB\fIDIR\fR\fR\fB, \-\-output\-list\-dir \fR\fB\fIDIR\fR\fR .RS 4 the directory to store the list of files inside an archive or tree .RE .PP \fB\-n \fR\fB\fINAME\fR\fR\fB, \-\-configuration\-name \fR\fB\fINAME\fR\fR .RS 4 the name to use to name the \&.conf and \&.list generated by \-g or \-G options (default is the name of the archive) .RE .PP \fB\-O \fR\fB\fIDIR\fR\fR\fB, \-\-output\-list\-and\-conf\-dir \fR\fB\fIDIR\fR\fR .RS 4 the directory to store the configuration file and the list of files inside an archive or tree .RE .PP \fB\-v, \-\-version\fR .RS 4 the version of Backup Checker .RE .SH "EXAMPLES" .sp Generate the list of files (\&.conf and \&.list files) and their attributes inside a backup .sp $ backupchecker \-G /backups/monthly\-backup\&.tar\&.gz .sp Launch Backup Checker indicating your configuration directory and your log file .sp $ backupchecker \-c /etc/brebis/ \-l /var/log/brebis\&.log .sp Verify a remote backup by FTP .sp $ wget \-\-quiet \-O \- ftp://user:pass@server/backup\&.tar\&.gz | backupchecker \-c /etc/brebis/ \-l /var/log/brebis\&.log .SH "AUTHOR" .sp Carl Chenet .SH "RESSOURCES" .sp Backup Checkup project homepage : https://github\&.com/backupchecker/backupchecker .SH "LICENSE" .sp Copyright \(co 2015-2017 Carl Chenet This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License\&. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE\&. See the GNU General Public License for more details\&. You should have received a copy of the GNU General Public License along with this program\&. If not, see http://www\&.gnu\&.org/licenses/\&. .RE backupchecker-1.9/backupchecker/0000755000000000000000000000000013073706716016717 5ustar rootroot00000000000000backupchecker-1.9/backupchecker/generatelist/0000755000000000000000000000000013073706716021405 5ustar rootroot00000000000000backupchecker-1.9/backupchecker/generatelist/generatelistforzip.py0000664000000000000000000003345713073704736025715 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Generate a list of files from a zip archive '''Generate a list of files from a zip archive''' import datetime import fnmatch import logging import os import os.path import stat import sys import zipfile from backupchecker.checkhashes import get_hash from backupchecker.generatelist.generatelist import GenerateList class GenerateListForZip(GenerateList): '''Generate a list of files from a zip archive''' def __init__(self, __genparams): '''The constructor for the GenerateListForZip class''' self.__arcpath = __genparams['arcpath'] self.__delimiter = __genparams['delimiter'] self._genfull = __genparams['genfull'] self.__listoutput = __genparams['listoutput'] self.__confoutput = __genparams['confoutput'] self.__fulloutput = __genparams['fulloutput'] self.__getallhashes = __genparams['getallhashes'] self.__hashtype = __genparams['hashtype'] self.__parsingexceptions = __genparams['parsingexceptions'] self.__confname = __genparams['confname'] try: __zip = zipfile.ZipFile(self.__arcpath, 'r', allowZip64=True) self.__main(__zip) except zipfile.BadZipfile as _msg: __warn = '. You should investigate for a data corruption.' logging.warning('{}: {}{}'.format(__self.arcpath, str(__msg), __warn)) def __main(self, __zip): '''Main of the GenerateListForZip class''' __arcstat = os.stat(self.__arcpath) __listoffiles = ['[archive]\nmtime{} {}\n\n[files]\n'.format(self.__delimiter,__arcstat.st_mtime)] __oneline = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value}\n'.format(value='{}', delimiter=self.__delimiter) if self.__getallhashes: if not self.__hashtype: __onelinewithhash = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value} md5{delimiter}{value}\n'.format(value='{}', delimiter=self.__delimiter) else: __onelinewithhash = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value} {hashtype}{delimiter}{value}\n'.format(value='{}', hashtype=self.__hashtype, delimiter=self.__delimiter) else: __onelinewithouthash = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value}\n'.format(value='{}', delimiter=self.__delimiter) __onelinenoexternalattr = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} mtime{delimiter}{value}\n'.format(value='{}', delimiter=self.__delimiter) __crcerror = __zip.testzip() if __crcerror: logging.warning('{} has at least one file corrupted:{}'.format(self.__arcpath, __crcerror)) else: __zipinfo = __zip.infolist() for __fileinfo in __zipinfo: __fileinfo.filename = self._normalize_path(__fileinfo.filename) __uid, __gid = self.__extract_uid_gid(__fileinfo) # check if external_attr is available if __fileinfo.external_attr != 0: __type = self.__translate_type(__fileinfo.external_attr >> 16) __mode = oct(stat.S_IMODE((__fileinfo.external_attr >> 16))).split('o')[-1] # Prepare a timestamp for the ctime object __dt = __fileinfo.date_time try: __mtime = float(datetime.datetime(__dt[0], __dt[1], __dt[2], __dt[3], __dt[4], __dt[5]).timestamp()) except ValueError as __msg: __warn = 'Issue with timestamp while controlling {} in {}'.format(_fileinfo.filename,_cfgvalues['path']) logging.warning(__warn) if __fileinfo.external_attr != 0 and __type == 'f': if self.__getallhashes: if not self.__hashtype: __hash = get_hash(__zip.open(__fileinfo.filename, 'r'), 'md5') else: __hash = get_hash(__zip.open(__fileinfo.filename, 'r'), self.__hashtype) __listoffiles.append(__onelinewithhash.format(__fileinfo.filename, str(__fileinfo.file_size), str(__uid), str(__gid), __mode, __type, __mtime, __hash)) else: # check if there are exceptions while parsing if self.__parsingexceptions: for __file in self.__parsingexceptions: if fnmatch.fnmatch(__fileinfo.filename, __file): __hash = get_hash(__zip.open(__fileinfo.filename, 'r'), self.__parsingexceptions[__file]) __onelinewithhash = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value} {hashtype}{delimiter}{value}\n'.format(value='{}', hashtype=self.__parsingexceptions[__file], delimiter=self.__delimiter) __listoffiles.append(__onelinewithhash.format(__fileinfo.filename, str(__fileinfo.file_size), str(__uid), str(__gid), __mode, __type, __mtime, __hash)) else: # we use exceptions-file option but the file is not concerned by an exception __listoffiles.append(__onelinewithouthash.format(__fileinfo.filename, str(__fileinfo.file_size), str(__uid), str(__gid), __mode, __type, __mtime)) else: # we don't use the --exceptions-file option __listoffiles.append(__onelinewithouthash.format(__fileinfo.filename, str(__fileinfo.file_size), str(__uid), str(__gid), __mode, __type, __mtime)) elif __fileinfo.external_attr != 0 and __type == 'd': __listoffiles.append(__oneline.format(__fileinfo.filename, str(__fileinfo.file_size), str(__uid), str(__gid), __mode, __type, __mtime)) else: __listoffiles.append(__onelinenoexternalattr.format(__fileinfo.filename, str(__fileinfo.file_size), str(__uid), str(__gid), __mtime)) # define the flexible file list path __arcwithext = os.path.split(''.join([self.__arcpath[:-3], 'list']))[1] if self.__listoutput: if self.__confname: # --gen-list and --output-list-dir and --configuration-name __arclistpath = os.path.join(self.__listoutput, '.'.join([self.__confname, 'list'])) else: # --gen-list and --output-list-dir __arclistpath = os.path.join(self.__listoutput, __arcwithext) elif self.__fulloutput: if self.__confname: # --gen-list and --output-conf-and-list-dir and --configuration-name __arclistpath = os.path.join(self.__fulloutput, '.'.join([self.__confname, 'list'])) else: # --gen-list and --output-conf-and-list-dir __arclistpath = os.path.join(self.__fulloutput, __arcwithext) else: # --gen-list if self.__confname: __arc = os.path.dirname(self.__arcpath) __arclistpath = os.path.join(__arc, '.'.join([self.__confname, 'list'])) else: __arclistpath = ''.join([self.__arcpath[:-3], 'list']) __listconfinfo = {'arclistpath': __arclistpath, 'listoffiles': __listoffiles} # call the method to write the list of files inside the archive self._generate_list(__listconfinfo) # call the method to write the configuration file if --gen-full was required if self._genfull: # generate the hash sum of the list of files __listhashsum = self._get_list_hash(__listconfinfo['arclistpath']) # define the flexible configuration file path __arcwithext = os.path.split(''.join([self.__arcpath[:-3], 'conf']))[1] if self.__confoutput: if self.__confname: # --gen-full and --output-conf-dir and --configuration-name __arcconfpath = os.path.join(self.__confoutput, '.'.join([self.__confname, 'conf'])) else: # --gen-full and --output-conf-dir __arcconfpath = os.path.join(self.__confoutput, __arcwithext) elif self.__fulloutput: if self.__confname: # --gen-full and --output-conf-and-list-dir and --configuration-name __arcconfpath = os.path.join(self.__fulloutput, '.'.join([self.__confname, 'conf'])) else: # --gen-full and --output-conf-and-list-dir __arcconfpath = os.path.join(self.__fulloutput, __arcwithext) else: # --gen-full only if self.__confname: __arc = os.path.dirname(self.__arcpath) __arcconfpath = os.path.join(__arc, '.'.join([self.__confname, 'conf'])) else: __arcconfpath = ''.join([self.__arcpath[:-3], 'conf']) # name of the archive in the configuration file if self.__confname: __arcname = self.__confname else: __arcname = os.path.basename(self.__arcpath[:-4]) __confinfo = {'arcname': __arcname, 'arcpath': self.__arcpath, 'arcconfpath': __arcconfpath, 'arclistpath': __listconfinfo['arclistpath'], 'arctype': 'archive', 'sha512': __listhashsum} self._generate_conf(__confinfo) def __extract_uid_gid(self, __binary): '''Extract uid and gid from a zipinfo.extra object (platform dependant)''' __uid, __gid = int.from_bytes(__binary.extra[15:17], sys.byteorder), \ int.from_bytes(__binary.extra[20:22], sys.byteorder) return (__uid, __gid) def __translate_type(self, __mode): '''Translate the type of the file to a generic name''' if stat.S_ISREG(__mode): return 'f' elif stat.S_ISDIR(__mode): return 'd' backupchecker-1.9/backupchecker/generatelist/generatelistfortar.py0000664000000000000000000004643013073704736025674 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Generate a list of files from a tar archive '''Generate a list of files from a tar archive''' import fnmatch import logging import os import os.path import sys import tarfile from backupchecker.generatelist.generatelist import GenerateList from backupchecker.checkhashes import get_hash class GenerateListForTar(GenerateList): '''Generate a list of files from a tar archive''' def __init__(self, __genparams): '''The constructor for the GenerateListForTar class''' self.__arcpath = __genparams['arcpath'] self.__delimiter = __genparams['delimiter'] self._genlist = __genparams['genlist'] self._genfull = __genparams['genfull'] self.__listoutput = __genparams['listoutput'] self.__confoutput = __genparams['confoutput'] self.__fulloutput = __genparams['fulloutput'] self.__getallhashes = __genparams['getallhashes'] self.__hashtype = __genparams['hashtype'] self.__parsingexceptions = __genparams['parsingexceptions'] self.__isastream = __genparams['isastream'] self.__confname = __genparams['confname'] try: if self.__isastream: self.__tarstreamname = 'tarstream' self.__streampath = os.path.join(self.__arcpath, self.__tarstreamname) __tar = tarfile.open(mode='r|*', fileobj=sys.stdin.buffer) else: __tar = tarfile.open(self.__arcpath, 'r') self.__main(__tar) except (tarfile.TarError, EOFError) as _msg: __warn = '. You should investigate for a data corruption.' logging.warning('{}: {}{}'.format(self.__arcpath, str(_msg), __warn)) def __main(self, __tar): '''Main for the GenerateListForTar class''' # extract mtime of the archive if not self.__isastream: __arcstat = os.stat(self.__arcpath) __listoffiles = ['[archive]\nmtime{} {}\n\n[files]\n'.format(self.__delimiter,__arcstat.st_mtime)] else: __listoffiles = ['[files]\n'] __oneline = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} owner{delimiter}{value} group{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value}\n'.format(value='{}', delimiter=self.__delimiter) if self.__getallhashes: # we get all the hash sums of files inside the backup if not self.__hashtype: __onelinewithhash = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} owner{delimiter}{value} group{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value} md5{delimiter}{value}\n'.format(value='{}', delimiter=self.__delimiter) else: # we switch the default hash sum __onelinewithhash = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} owner{delimiter}{value} group{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value} {hashtype}{delimiter}{value}\n'.format(value='{}', hashtype=self.__hashtype, delimiter=self.__delimiter) else: __onelinewithouthash = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} owner{delimiter}{value} group{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value}\n'.format(value='{}', delimiter=self.__delimiter) __onelinewithtarget = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} owner{delimiter}{value} group{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value} target{delimiter}{value}\n'.format(value='{}', delimiter=self.__delimiter) for __tarinfo in __tar: # Pick up tar information __tarinfo.name = self._normalize_path(__tarinfo.name) __type = self.__translate_type(__tarinfo.type) __mode = oct(__tarinfo.mode).split('o')[-1] # if the file has no right, need to manipulate the output - solving #15 if __mode == '0': __mode = '000' if __type == 'f': if self.__getallhashes: # extract all hash sums from the archive if not self.__hashtype: # extract hash sum of the file inside the archive __hash = get_hash(__tar.extractfile(__tarinfo.name), 'md5') else: # switch the default hash sum type __hash = get_hash(__tar.extractfile(__tarinfo.name), self.__hashtype) # format the retrieved information __listoffiles.append(__onelinewithhash.format(__tarinfo.name, str(__tarinfo.size), str(__tarinfo.uid), str(__tarinfo.gid), str(__tarinfo.uname), str(__tarinfo.gname), __mode, __type, float(__tarinfo.mtime), __hash, __tarinfo.linkname)) else: # check if there are exceptions while parsing if self.__parsingexceptions: for __file in self.__parsingexceptions: if fnmatch.fnmatch(__tarinfo.name, __file): __hash = get_hash(__tar.extractfile(__tarinfo.name), self.__parsingexceptions[__file]) __onelinewithhash = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} owner{delimiter}{value} group{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value} {hashtype}{delimiter}{value}\n'.format(value='{}', hashtype=self.__parsingexceptions[__file], delimiter=self.__delimiter) __listoffiles.append(__onelinewithhash.format(__tarinfo.name, str(__tarinfo.size), str(__tarinfo.uid), str(__tarinfo.gid), str(__tarinfo.uname), str(__tarinfo.gname), __mode, __type, float(__tarinfo.mtime), __hash, __tarinfo.linkname)) else: # we use exceptions-file option but the file is not concerned by an exception __listoffiles.append(__onelinewithouthash.format(__tarinfo.name, str(__tarinfo.size), str(__tarinfo.uid), str(__tarinfo.gid), str(__tarinfo.uname), str(__tarinfo.gname), __mode, __type, float(__tarinfo.mtime), __tarinfo.linkname)) else: # we don't use the --exceptions-file option __listoffiles.append(__onelinewithouthash.format(__tarinfo.name, str(__tarinfo.size), str(__tarinfo.uid), str(__tarinfo.gid), str(__tarinfo.uname), str(__tarinfo.gname), __mode, __type, float(__tarinfo.mtime), __tarinfo.linkname)) elif __type == 'l' or __type == 's': # format the retrieved information __listoffiles.append(__onelinewithtarget.format(__tarinfo.name, str(__tarinfo.size), str(__tarinfo.uid), str(__tarinfo.gid), str(__tarinfo.uname), str(__tarinfo.gname), __mode, __type, float(__tarinfo.mtime), __tarinfo.linkname)) else: # if file is not regular file, ignoring its hash sum __listoffiles.append(__oneline.format(__tarinfo.name, str(__tarinfo.size), str(__tarinfo.uid), str(__tarinfo.gid), str(__tarinfo.uname), str(__tarinfo.gname), __mode, __type, float(__tarinfo.mtime))) # Compose the name of the generated list ### for tar archive if self.__arcpath.lower().endswith('.tar'): self.__make_conf_and_list_paths('.tar') ### for tar.gz archive elif self.__arcpath.lower().endswith('.tar.gz'): self.__make_conf_and_list_paths('.tar.gz') ### for tar.bz2 archive elif self.__arcpath.lower().endswith('.tar.bz2'): self.__make_conf_and_list_paths('.tar.bz2') ### for tar.xz archive elif self.__arcpath.lower().endswith('.tar.xz'): self.__make_conf_and_list_paths('.tar.xz') ### for tgz archive elif self.__arcpath.lower().endswith('.tgz'): self.__make_conf_and_list_paths('.tgz') ### for tbz archive elif self.__arcpath.lower().endswith('.tbz'): self.__make_conf_and_list_paths('.tbz') ### for tbz2 archive elif self.__arcpath.lower().endswith('.tbz2'): self.__make_conf_and_list_paths('.tbz2') ### for tar stream elif self.__isastream: #if self._genfull: # self.__arcname = self.__tarstreamname #if self.__confname: # self.__arcname = self.__confname # self.__arcconfpath = ''.join([self.__confname, '.conf']) # self.__arclistpath = ''.join([self.__confname, '.list']) #else: # self.__arcconfpath = ''.join([self.__streampath, '.conf']) # self.__arclistpath = ''.join([self.__streampath, '.list']) self.__make_conf_and_list_paths('') # call the method to write information in a file __listconfinfo = {'arclistpath': self.__arclistpath, 'listoffiles':__listoffiles} self._generate_list(__listconfinfo) # call the method to write the configuration file if --gen-full was required if self._genfull: # generate the hash sum of the list of files __listhashsum = self._get_list_hash(__listconfinfo['arclistpath']) if self.__isastream: __confinfo = {'arcname':self.__arcname, 'arcconfpath': self.__arcconfpath, 'arclistpath': self.__arclistpath, 'arctype': 'archive', 'sha512': __listhashsum} else: __confinfo = {'arcname':self.__arcname, 'arcpath':self.__arcpath, 'arcconfpath': self.__arcconfpath, 'arclistpath': self.__arclistpath, 'arctype': 'archive', 'sha512': __listhashsum} self._generate_conf(__confinfo,self.__isastream) def __translate_type(self, __arctype): '''Translate the type of the file inside the tar by a generic name ''' __types = {tarfile.REGTYPE: 'f', tarfile.AREGTYPE: 'a', tarfile.CHRTYPE: 'c', tarfile.DIRTYPE: 'd', tarfile.LNKTYPE: 'l', tarfile.SYMTYPE: 's', tarfile.CONTTYPE: 'n', tarfile.BLKTYPE: 'b', tarfile.GNUTYPE_SPARSE: 'g', tarfile.FIFOTYPE: 'o'} return __types[__arctype] def __make_conf_and_list_paths(self, __tartype): '''Make conf file path and list file paths''' if not self.__isastream: __arcwithext = os.path.split(self.__arcpath[:-(len(__tartype)-1)])[1] # behaviour for --gen-list option # define custom path for the filelist or use the default one if self.__listoutput: # --gen-list and --output-list-dir and --configuration-name if self.__confname: self.__arclistpath = os.path.join(self.__listoutput, ''.join([self.__confname, '.', 'list'])) # --gen-list and --output-list-dir else: if self.__isastream: self.__arclistpath = os.path.join(self.__listoutput, '.'.join([self.__tarstreamname, 'list'])) else: self.__arclistpath = os.path.join(self.__listoutput, ''.join([__arcwithext, 'list'])) # define custom path for both filelist and conflist elif self.__fulloutput: # --gen-list and --output-list-and-conf-dir and --configuration-name if self.__confname: self.__arclistpath = os.path.join(self.__fulloutput, ''.join([self.__confname, '.', 'list'])) else: # --gen-list and --ouput-list-and-conf-dir if self.__isastream: self.__arclistpath = os.path.join(self.__fulloutput, '.'.join([self.__tarstreamname, 'list'])) else: self.__arclistpath = os.path.join(self.__fulloutput, ''.join([__arcwithext, 'list'])) else: # only --configuration-name if self.__confname: __arcpath = os.path.dirname(self.__arcpath) __arcpath = os.path.join(__arcpath, self.__confname) self.__arclistpath = ''.join([__arcpath, '.', 'list']) # default behaviour else: if self.__isastream: __arcdir = os.path.dirname(self.__arcpath) self.__arclistpath = os.path.join(__arcdir, '.'.join([self.__tarstreamname, 'list'])) else: self.__arclistpath = ''.join([self.__arcpath[:-(len(__tartype)-1)], 'list']) # behaviour for --gen-full option if self._genfull: # define custom path for the conf file if self.__confoutput: if self.__confname: # --gen-full and --output-conf-dir and --configuration-name self.__arcconfpath = os.path.join(self.__confoutput, ''.join([self.__confname, '.', 'conf'])) else: # --gen-full and --output-conf-dir if self.__isastream: self.__arcconfpath = os.path.join(self.__confoutput, '.'.join([self.__tarstreamname, 'conf'])) else: self.__arcconfpath = os.path.join(self.__confoutput, ''.join([__arcwithext, 'conf'])) elif self.__fulloutput: # --gen-full and --output-list-and-conf-dir and --configuration-name if self.__confname: self.__arcconfpath = os.path.join(self.__fulloutput, ''.join([self.__confname, '.', 'conf'])) else: # --gen-full and --output-list-and-conf-dir if self.__isastream: self.__arcconfpath = os.path.join(self.__fulloutput, '.'.join([self.__tarstreamname, 'conf'])) else: self.__arcconfpath = os.path.join(self.__fulloutput, ''.join([__arcwithext, 'conf'])) else: # --gen-full and --configuration-name if self.__confname: __arcpath = os.path.dirname(self.__arcpath) __arcpath = os.path.join(__arcpath, self.__confname) self.__arcconfpath = ''.join([__arcpath, '.', 'conf']) else: # only --gen-full if self.__isastream: __arcdir = os.path.dirname(self.__arcpath) self.__arcconfpath = os.path.join(__arcdir, '.'.join([self.__tarstreamname, 'conf'])) else: self.__arcconfpath = ''.join([self.__arcpath[:-(len(__tartype)-1)], 'conf']) # user-defined name of the archive/stream if self.__confname: self.__arcname = self.__confname else: if self.__isastream: self.__arcname = self.__tarstreamname else: self.__arcname = os.path.basename(self.__arcpath[:-len(__tartype)]) backupchecker-1.9/backupchecker/generatelist/__init__.py0000664000000000000000000000000013073704736023506 0ustar rootroot00000000000000backupchecker-1.9/backupchecker/generatelist/generatelistforgzip.py0000664000000000000000000002264513073704736026061 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import fnmatch import gzip import os import os.path from backupchecker.checkhashes import get_hash from backupchecker.generatelist.generatelist import GenerateList # Generate a list of files from a gzip archive '''Generate a list of files from a gzip archive''' class GenerateListForGzip(GenerateList): '''Generate a list of files from a gzip archive''' def __init__(self, __genparams): '''The constructor for the GenerateListForGzip class''' __arcpath = __genparams['arcpath'] __delimiter = __genparams['delimiter'] self._genfull = __genparams['genfull'] self.__confoutput = __genparams['confoutput'] self.__listoutput = __genparams['listoutput'] self.__fulloutput = __genparams['fulloutput'] self.__getallhashes = __genparams['getallhashes'] self.__hashtype = __genparams['hashtype'] self.__parsingexceptions = __genparams['parsingexceptions'] self.__confname = __genparams['confname'] __arcstat = os.stat(__arcpath) __listoffiles = ['[archive]\nmtime{} {}\n\n[files]\n'.format(__delimiter,__arcstat.st_mtime)] __fileinfo = os.lstat(__arcpath) __filetype = 'f' if not self.__hashtype: __filehash = get_hash(gzip.open(__arcpath, 'rb'), 'md5') else: __filehash = get_hash(gzip.open(__arcpath, 'rb'), self.__hashtype) with open(__arcpath, 'rb') as __gzip: __filesize = self.__extract_size(__gzip) __filename = self.__extract_initial_filename(__gzip, os.path.split(__arcpath)[-1][:-2]) if self.__getallhashes: if not self.__hashtype: __onelinewithhash = '{value}{delimiter} ={value} type{delimiter}{value} md5{delimiter}{value}\n'.format(value='{}', delimiter=__delimiter) else: __onelinewithhash = '{value}{delimiter} ={value} type{delimiter}{value} {hashtype}{delimiter}{value}\n'.format(value='{}', hashtype=self.__hashtype, delimiter=__delimiter) __listoffiles.append(__onelinewithhash.format( __filename, str(__filesize), __filetype, __filehash)) else: if self.__parsingexceptions : for __file in self.__parsingexceptions: if fnmatch.fnmatch(__filename, __file): __filehash = get_hash(gzip.open(__arcpath, 'rb'), self.__parsingexceptions[__file]) __onelinewithhash = '{value}{delimiter} ={value} type{delimiter}{value} {hashtype}{delimiter}{value}\n'.format(value='{}', hashtype=self.__parsingexceptions[__file], delimiter=__delimiter) __listoffiles.append(__onelinewithhash.format( __filename, str(__filesize), __filetype, __filehash)) else: __onelinewithouthash = '{value}{delimiter} ={value} type{delimiter}{value}\n'.format(value='{}', delimiter=__delimiter) __listoffiles.append(__onelinewithouthash.format( __filename, str(__filesize), __filetype)) else: __onelinewithouthash = '{value}{delimiter} ={value} type{delimiter}{value}\n'.format(value='{}', delimiter=__delimiter) __listoffiles.append(__onelinewithouthash.format( __filename, str(__filesize), __filetype)) # define the flexible file list path __arcwithext = os.path.split(''.join([__arcpath[:-2], 'list']))[1] if self.__listoutput: if self.__confname: # --gen-list and --list-output-dir and --configuration-name __arclistpath = os.path.join(self.__listoutput, '.'.join([self.__confname, 'list'])) else: # --gen-list and --list-output-dir __arclistpath = os.path.join(self.__listoutput, __arcwithext) elif self.__fulloutput: if self.__confname: # --gen-list and --output-list-and-conf-dir and --configuration-name __arclistpath = os.path.join(self.__fulloutput, '.'.join([self.__confname, 'list'])) else: # --gen-list and --output-list-and-conf-dir __arclistpath = os.path.join(self.__fulloutput, __arcwithext) else: # --gen-list only if self.__confname: __arc = os.path.dirname(__arcpath) __arclistpath = os.path.join(__arc, '.'.join([self.__confname, 'list'])) else: __arclistpath = ''.join([__arcpath[:-2], 'list']) # call the method to write information in a file __listconfinfo = {'arclistpath': __arclistpath, 'listoffiles': __listoffiles} self._generate_list(__listconfinfo) # call the method to write the configuration file if --gen-full was required if self._genfull: # generate the hash sum of the list of files __listhashsum = self._get_list_hash(__listconfinfo['arclistpath']) # define the flexible configuration file path __arcwithext = os.path.split(''.join([__arcpath[:-2], 'conf']))[1] if self.__confoutput: if self.__confname: # --gen-full and --output-conf-dir and --configuration-name __arcconfpath = os.path.join(self.__confoutput, '.'.join([self.__confname, 'conf'])) else: # --gen-full and --output-conf-dir __arcconfpath = os.path.join(self.__confoutput, __arcwithext) elif self.__fulloutput: if self.__confname: # --gen-full and --output-list-and-conf-dir and --configuration-name __arcconfpath = os.path.join(self.__fulloutput, '.'.join([self.__confname, 'conf'])) else: # --gen-full and --output-list-and-conf-dir __arcconfpath = os.path.join(self.__fulloutput, __arcwithext) else: # --gen-full only if self.__confname: __arc = os.path.dirname(__arcpath) __arcconfpath = os.path.join(__arc, '.'.join([self.__confname, 'conf'])) else: __arcconfpath = ''.join([__arcpath[:-2], 'conf']) # the name of the backup inside the configuration file if self.__confname: __arcname = self.__confname else: __arcname = os.path.basename(__arcpath[:-3]) __confinfo = {'arcname': __arcname, 'arcpath': __arcpath, 'arcconfpath': __arcconfpath, 'arclistpath': __listconfinfo['arclistpath'], 'arctype': 'archive', 'sha512': __listhashsum} self._generate_conf(__confinfo) def __extract_size(self, __binary): '''Extract the size of the uncompressed file inside the archive - 4 last bytes of the archive ''' __binary.seek(-4, 2) return int.from_bytes(__binary.read(), 'little') def __extract_initial_filename(self, __binary, __arcname): '''Extract initial filename of the uncompressed file''' # We move the cursor on the 4th byte __binary.seek(3,0) # Read a byte __flag = __binary.read(1) # Store flag byte __intflag = int.from_bytes(__flag,'little') # If the extra field flag is on, extract the size of its data field __extralen = 0 if __intflag & 4 != 0: __binary.seek(9,0) __extralenbyte = __binary.read(2) __extralen = int.from_byte(__extralenbyte,'little') + 2 # If the flag "name" is on, skip to it and read the associated content __binaryname = b'' if __intflag & 8 != 0: __binary.seek(10 + __extralen) # until zero byte is found, read the initial filename in bytes while True: __newbyte = __binary.read(1) if __newbyte != b'\x00': __binaryname += __newbyte else: break return __binaryname.decode('latin1') else: return __arcname backupchecker-1.9/backupchecker/generatelist/generatelistforlzma.py0000664000000000000000000001554513073704736026054 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import fnmatch import lzma import os import os.path import stat from backupchecker.checkhashes import get_hash from backupchecker.generatelist.generatelist import GenerateList # Generate a list of files from a lzma archive '''Generate a list of files from a lzma archive''' class GenerateListForLzma(GenerateList): '''Generate a list of files from a lzma archive''' def __init__(self, __genparams): '''The constructor for the GenerateListForlzma class''' __arcpath = __genparams['arcpath'] __delimiter = __genparams['delimiter'] self._genfull = __genparams['genfull'] self.__confoutput = __genparams['confoutput'] self.__listoutput = __genparams['listoutput'] self.__fulloutput = __genparams['fulloutput'] self.__getallhashes = __genparams['getallhashes'] self.__hashtype = __genparams['hashtype'] self.__parsingexceptions = __genparams['parsingexceptions'] self.__confname = __genparams['confname'] __listoffiles = ['[files]\n'] __filetype = 'f' __filehash = get_hash(lzma.LZMAFile(__arcpath, 'r'), 'md5') if self.__getallhashes: if not self.__hashtype: __onelinewithhash = '{value}{delimiter} type{delimiter}{value} md5{delimiter}{value}\n'.format(value='{}', delimiter=__delimiter) else: __onelinewithhash = '{value}{delimiter} type{delimiter}{value} {hashtype}{delimiter}{value}\n'.format(value='{}', hashtype=self.__hashtype, delimiter=__delimiter) __listoffiles.append(__onelinewithhash.format( os.path.split(__arcpath)[-1][:-3], __filetype, __filehash)) else: if self.__parsingexceptions: for __file in self.__parsingexceptions: if fnmatch.fnmatch(os.path.split(__arcpath)[-1][:-3], __file): __filehash = get_hash(lzma.LZMAFile(__arcpath, 'r'), self.__parsingexceptions[__file]) __onelinewithhash = '{value}{delimiter} type{delimiter}{value} {hashtype}{delimiter}{value}\n'.format(value='{}', hashtype=self.__parsingexceptions[__file], delimiter=__delimiter) __listoffiles.append(__onelinewithhash.format( os.path.split(__arcpath)[-1][:-3], __filetype, __filehash)) else: __onelinewithouthash = '{value}{delimiter} type{delimiter}{value}\n'.format(value='{}', delimiter=__delimiter) __listoffiles.append(__onelinewithouthash.format( os.path.split(__arcpath)[-1][:-3], __filetype)) else: __onelinewithouthash = '{value}{delimiter} type{delimiter}{value}\n'.format(value='{}', delimiter=__delimiter) __listoffiles.append(__onelinewithouthash.format( os.path.split(__arcpath)[-1][:-3], __filetype)) # define the flexible file list path __arcwithext = os.path.split(''.join([__arcpath[:-2], 'list']))[1] if self.__listoutput: if self.__confname: __arclistpath = os.path.join(self.__listoutput, '.'.join([self.__confname, 'list'])) else: __arclistpath = os.path.join(self.__listoutput, __arcwithext) elif self.__fulloutput: if self.__confname: __arclistpath = os.path.join(self.__fulloutput, '.'.join([self.__confname, 'list']) ) else: __arclistpath = os.path.join(self.__fulloutput, __arcwithext) else: # --gen-list only if self.__confname: __arc = os.path.dirname(__arcpath) __arclistpath = os.path.join(__arc, '.'.join([self.__confname, 'list']) ) else: __arclistpath = ''.join([__arcpath[:-2], 'list']) # call the method to write information in a file __listconfinfo = {'arclistpath': __arclistpath, 'listoffiles': __listoffiles} self._generate_list(__listconfinfo) # call the method to write the configuration file if --gen-full was required if self._genfull: # generate the hash sum of the list of files __listhashsum = self._get_list_hash(__listconfinfo['arclistpath']) # define the flexible configuration file path __arcwithext = os.path.split(''.join([__arcpath[:-2], 'conf']))[1] if self.__confoutput: if self.__confname: __arcconfpath = os.path.join(self.__confoutput, '.'.join([self.__confname, 'conf'])) else: __arcconfpath = os.path.join(self.__confoutput, __arcwithext) elif self.__fulloutput: if self.__confname: __arcconfpath = os.path.join(self.__fulloutput, '.'.join([self.__confname, 'conf'])) else: __arcconfpath = os.path.join(self.__fulloutput, __arcwithext) else: # --gen-full only if self.__confname: __arc = os.path.dirname(__arcpath) __arcconfpath = os.path.join(__arc, '.'.join([self.__confname, 'conf']) ) else: __arcconfpath = ''.join([__arcpath[:-2], 'conf']) # user-define change of the name of the archive if self.__confname: __arcname = self.__confname else: __arcname = os.path.basename(__arcpath[:-3]) __confinfo = {'arcname': __arcname, 'arcpath': __arcpath, 'arcconfpath': __arcconfpath, 'arclistpath': __listconfinfo['arclistpath'], 'arctype': 'archive', 'sha512': __listhashsum} self._generate_conf(__confinfo) backupchecker-1.9/backupchecker/generatelist/generatelistfortree.py0000664000000000000000000004114313073704736026041 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import fnmatch import grp import os import os.path import pwd import stat from backupchecker.generatelist.generatelist import GenerateList from backupchecker.checkhashes import get_hash # Generate a list of files from a tree '''Generate a list of files from a tree''' class GenerateListForTree(GenerateList): '''Generate a list of files from a tree''' def __init__(self, __genparams): '''The constructor for the GenerateListForTree class''' __arcpath = __genparams['arcpath'] __delimiter = __genparams['delimiter'] self._genfull = __genparams['genfull'] self.__listoutput = __genparams['listoutput'] self.__confoutput = __genparams['confoutput'] self.__fulloutput = __genparams['fulloutput'] self.__getallhashes = __genparams['getallhashes'] self.__hashtype = __genparams['hashtype'] self.__parsingexceptions = __genparams['parsingexceptions'] self.__confname = __genparams['confname'] __listoffiles = ['[files]\n'] __oneline = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} owner{delimiter}{value} group{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value}\n'.format(value='{}', delimiter=__delimiter) if self.__getallhashes: if not self.__hashtype: __onelinewithhash = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} owner{delimiter}{value} group{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value} md5{delimiter}{value}\n'.format(value='{}', delimiter=__delimiter) else: __onelinewithhash = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} owner{delimiter}{value} group{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value} {hashtype}{delimiter}{value}\n'.format(value='{}', hashtype=self.__hashtype, delimiter=__delimiter) else: __onelinewithouthash = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} owner{delimiter}{value} group{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value}\n'.format(value='{}', delimiter=__delimiter) # we also need parameters for symbolic links __onelinewithtarget = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} owner{delimiter}{value} group{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value} target{delimiter}{value}\n'.format(value='{}', delimiter=__delimiter) for __dirpath, __dirnames, __filenames, in os.walk(__arcpath): # ignoring the uppest directory if os.path.relpath(__dirpath, __arcpath) != '.': # studying directories __dirinfo = os.lstat(__dirpath) __dirmode = oct(stat.S_IMODE(__dirinfo.st_mode)).split('o')[-1] # translate file type in backupchecker intern file type __type = self.__translate_type(__dirinfo.st_mode) # extract file data __listoffiles.append(__oneline.format( os.path.relpath(__dirpath, __arcpath), str(__dirinfo.st_size), str(__dirinfo.st_uid), str(__dirinfo.st_gid), self.__extract_username(__dirinfo.st_uid), self.__extract_groupname(__dirinfo.st_gid), __dirmode, __type, str(__dirinfo.st_mtime))) # studying files for __filename in __filenames: __filepath = os.path.join(__dirpath, __filename) __filepath = self._normalize_path(__filepath) self.__fileinfo = os.lstat(__filepath) __filemode = oct(stat.S_IMODE(self.__fileinfo.st_mode)).split('o')[-1] __type = self.__translate_type(self.__fileinfo.st_mode) if __type == 'f': if self.__getallhashes: if not self.__hashtype: # extract hash sum of the file inside the archive __hash = get_hash(open(__filepath, 'rb'), 'md5') else: # extract hash sum of the file inside the archive __hash = get_hash(open(__filepath, 'rb'), self.__hashtype) # extract file data and prepare data __listoffiles.append(__onelinewithhash.format( os.path.relpath(__filepath, __arcpath), str(self.__fileinfo.st_size), str(self.__fileinfo.st_uid), str(self.__fileinfo.st_gid), self.__extract_username(self.__fileinfo.st_uid), self.__extract_groupname(self.__fileinfo.st_gid), __filemode, __type, str(self.__fileinfo.st_mtime), __hash)) else: # check if there are exceptions while parsing if self.__parsingexceptions: for __file in self.__parsingexceptions: if fnmatch.fnmatch(os.path.relpath(__filepath, __arcpath), __file): __hash = get_hash(open(__filepath, 'rb'), self.__parsingexceptions[__file]) __onelinewithhash = '{value}{delimiter} ={value} uid{delimiter}{value} gid{delimiter}{value} owner{delimiter}{value} group{delimiter}{value} mode{delimiter}{value} type{delimiter}{value} mtime{delimiter}{value} {hashtype}{delimiter}{value}\n'.format(value='{}', hashtype=self.__parsingexceptions[__file], delimiter=__delimiter) __listoffiles.append(__onelinewithhash.format( os.path.relpath(__filepath, __arcpath), str(self.__fileinfo.st_size), str(self.__fileinfo.st_uid), str(self.__fileinfo.st_gid), self.__extract_username(self.__fileinfo.st_uid), self.__extract_groupname(self.__fileinfo.st_gid), __filemode, __type, str(self.__fileinfo.st_mtime), __hash)) else: # we use exceptions-file option but the file is not concerned by an exception __listoffiles.append(__onelinewithouthash.format( os.path.relpath(__filepath, __arcpath), str(self.__fileinfo.st_size), str(self.__fileinfo.st_uid), str(self.__fileinfo.st_gid), self.__extract_username(self.__fileinfo.st_uid), self.__extract_groupname(self.__fileinfo.st_gid), __filemode, __type, str(self.__fileinfo.st_mtime))) else: # we don't use the --exceptions-file option __listoffiles.append(__onelinewithouthash.format( os.path.relpath(__filepath, __arcpath), str(self.__fileinfo.st_size), str(self.__fileinfo.st_uid), str(self.__fileinfo.st_gid), self.__extract_username(self.__fileinfo.st_uid), self.__extract_groupname(self.__fileinfo.st_gid), __filemode, __type, str(self.__fileinfo.st_mtime))) elif __type == 's': # extract hash sum of the file inside the archive # extract file data and prepare data __listoffiles.append(__onelinewithtarget.format( os.path.relpath(__filepath, __arcpath), str(self.__fileinfo.st_size), str(self.__fileinfo.st_uid), str(self.__fileinfo.st_gid), self.__extract_username(self.__fileinfo.st_uid), self.__extract_groupname(self.__fileinfo.st_gid), __filemode, __type, str(self.__fileinfo.st_mtime), os.readlink(__filepath))) else: # if file is not regular file, ignoring its hash sum __listoffiles.append(__oneline.format( os.path.relpath(__filepath, __arcpath), str(self.__fileinfo.st_size), str(self.__fileinfo.st_uid), str(self.__fileinfo.st_gid), self.__extract_username(self.__fileinfo.st_uid), self.__extract_groupname(self.__fileinfo.st_gid), __filemode, __type, str(self.__fileinfo.st_mtime))) # include custom paths for output conf files __reparc = os.path.split(__arcpath)[-1] if self.__listoutput: # --gen-list and --output-list-dir and --configuration-name if self.__confname: __arclistpath = os.path.join(self.__listoutput, '.'.join([self.__confname, 'list'])) else: # --gen-list and --output-list-dir __arclistpath = os.path.join(self.__listoutput, '.'.join([__reparc, 'list'])) elif self.__fulloutput: if self.__confname: # --gen-list and --output-list-and-conf-dir and --configuration-name __arclistpath = os.path.join(self.__fulloutput, '.'.join([self.__confname, 'list'])) else: # --gen-list and --output-list-and-conf-dir __arclistpath = os.path.join(self.__fulloutput, '.'.join([__reparc, 'list'])) else: # --gen-list only if self.__confname: __arc = os.path.dirname(__arcpath) __arclistpath = os.path.join(__arc, '.'.join([self.__confname, 'list'])) else: __arclistpath = ''.join([__arcpath, '.list']) __listconfinfo = {'arclistpath': __arclistpath, 'listoffiles': __listoffiles} # call the method to write information in a file self._generate_list(__listconfinfo) # call the method to write the configuration file if --gen-full was required if self._genfull: if self.__confoutput: if self.__confname: # --gen-full and --output-conf-dir and --configuration-name __arcconfpath = os.path.join(self.__confoutput, '.'.join([self.__confname, 'conf'])) else: # --gen-full and --output-conf-dir __arcconfpath = os.path.join(self.__confoutput, '.'.join([__reparc, 'conf'])) elif self.__fulloutput: if self.__confname: # --gen-full and --output-list-and-conf-dir and --configuration-name __arcconfpath = os.path.join(self.__fulloutput, '.'.join([self.__confname, 'conf'])) else: # --gen-full and --output-list-and-conf-dir __arcconfpath = os.path.join(self.__fulloutput, '.'.join([__reparc, 'conf'])) else: if self.__confname: # --gen-full and --configuration-name __arc = os.path.dirname(__arcpath) __arc = os.path.join(__arc, self.__confname) __arcconfpath = '.'.join([__arc, 'conf']) else: # --gen-full only if self.__confname: __arc = os.path.dirname(__arcpath) __arcconfpath = os.path.join(__arc, '.'.join([self.__confname, 'conf'])) else: __arcconfpath = '.'.join([__arcpath, 'conf']) # generate the hash sum of the list of files __listhashsum = self._get_list_hash(__listconfinfo['arclistpath']) __arcname = os.path.basename(__arcpath) if self.__confname: __arcname = self.__confname else: __arcname = os.path.basename(__arcpath) # include custom paths for output conf files __confinfo = {'arcname': __arcname, 'arcpath': __arcpath, 'arcconfpath': __arcconfpath, 'arclistpath': __arclistpath, 'arctype': 'tree', 'sha512': __listhashsum} self._generate_conf(__confinfo) def __translate_type(self, __mode): '''Translate the type of the file to a generic name''' if stat.S_ISREG(__mode): if self.__fileinfo[stat.ST_NLINK] > 1: return 'l' else: return 'f' elif stat.S_ISDIR(__mode): return 'd' elif stat.S_ISCHR(__mode): return 'c' elif stat.S_ISLNK(__mode): return 's' elif stat.S_ISBLK(__mode): return 'b' elif stat.S_ISSOCK(__mode): return 'k' elif stat.S_ISFIFO(__mode): return 'o' pass def __extract_username(self, __uid): '''Get the username mapping the uid, return unknown otherwise''' try: __username = pwd.getpwuid(__uid).pw_name except KeyError as __msg: if 'uid not found' in str(__msg): return 'unknown' return __username def __extract_groupname(self, __gid): '''Get the group name mapping the gid, return unknown otherwise''' try: __groupname = grp.getgrgid(__gid).gr_name except KeyError as __msg: if 'gid not found' in str(__msg): return 'unknown' return __groupname backupchecker-1.9/backupchecker/generatelist/generatelistforbzip2.py0000664000000000000000000001673013073704736026134 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import bz2 import fnmatch import os import os.path import stat from backupchecker.checkhashes import get_hash from backupchecker.generatelist.generatelist import GenerateList # Generate a list of files from a bzip2 archive '''Generate a list of files from a bzip2 archive''' class GenerateListForBzip2(GenerateList): '''Generate a list of files from a bzip2 archive''' def __init__(self, __genparams): '''The constructor for the GenerateListForBzip2 class''' __arcpath = __genparams['arcpath'] __delimiter = __genparams['delimiter'] self._genfull = __genparams['genfull'] self.__listoutput = __genparams['listoutput'] self.__confoutput = __genparams['confoutput'] self.__fulloutput = __genparams['fulloutput'] self.__getallhashes = __genparams['getallhashes'] self.__hashtype = __genparams['hashtype'] self.__parsingexceptions = __genparams['parsingexceptions'] self.__confname = __genparams['confname'] __listoffiles = ['[files]\n'] __filetype = 'f' __filehash = get_hash(bz2.BZ2File(__arcpath, 'r'), 'md5') if self.__getallhashes: if not self.__hashtype: __onelinewithhash = '{value}{delimiter} type{delimiter}{value} md5{delimiter}{value}\n'.format(value='{}', delimiter=__delimiter) else: __onelinewithhash = '{value}{delimiter} type{delimiter}{value} {hashtype}{delimiter}{value}\n'.format(value='{}', hashtype=self.__hashtype, delimiter=__delimiter) __listoffiles.append(__onelinewithhash.format( os.path.split(__arcpath)[-1][:-4], __filetype, __filehash)) else: if self.__parsingexceptions : for __file in self.__parsingexceptions: if fnmatch.fnmatch(os.path.split(__arcpath)[-1][:-4], __file): __filehash = get_hash(bz2.BZ2File(__arcpath, 'r'), self.__parsingexceptions[__file]) __onelinewithhash = '{value}{delimiter} type{delimiter}{value} {hashtype}{delimiter}{value}\n'.format(value='{}', hashtype=self.__parsingexceptions[__file], delimiter=__delimiter) __listoffiles.append(__onelinewithhash.format( os.path.split(__arcpath)[-1][:-4], __filetype, __filehash)) else: __onelinewithouthash = '{value}{delimiter} type{delimiter}{value}\n'.format(value='{}', delimiter=__delimiter) __listoffiles.append(__onelinewithouthash.format( os.path.split(__arcpath)[-1][:-4], __filetype)) else: __onelinewithouthash = '{value}{delimiter} type{delimiter}{value}\n'.format(value='{}', delimiter=__delimiter) __listoffiles.append(__onelinewithouthash.format( os.path.split(__arcpath)[-1][:-4], __filetype)) # define the flexible file list path __arcwithext = os.path.split(''.join([__arcpath[:-3], 'list']))[1] if self.__listoutput: if self.__confname: # --gen-list and --output-list-dir and --configuration-name __arclistpath = os.path.join(self.__listoutput, '.'.join([self.__confname, 'list'])) else: # --gen-list and --output-list-dir __arclistpath = os.path.join(self.__listoutput, __arcwithext) elif self.__fulloutput: if self.__confname: # --gen-list and --output-conf-and-list-dir and --configuration-name __arclistpath = os.path.join(self.__fulloutput, '.'.join([self.__confname, 'list'])) else: # --gen-list and --output-conf-and-list-dir __arclistpath = os.path.join(self.__fulloutput, __arcwithext) else: # --gen-list only if self.__confname: __arc = os.path.dirname(__arcpath) __arclistpath = os.path.join(__arc, '.'.join([self.__confname, 'list'])) else: __arclistpath = ''.join([__arcpath[:-3], 'list']) # call the method to write information in a file __listconfinfo = {'arclistpath': __arclistpath, 'listoffiles': __listoffiles} self.__lci = __listconfinfo self._generate_list(__listconfinfo) # call the method to write the configuration file if --gen-full was required if self._genfull: # generate the hash sum of the list of files __listhashsum = self._get_list_hash(__listconfinfo['arclistpath']) # define the flexible configuration file path __arcwithext = os.path.split(''.join([__arcpath[:-3], 'conf']))[1] if self.__confoutput: if self.__confname: # --gen-full and --output-conf-dir and --configuration-name __arcconfpath = os.path.join(self.__confoutput, '.'.join([self.__confname, 'conf'])) else: # --gen-full and --output-conf-dir __arcconfpath = os.path.join(self.__confoutput, __arcwithext) elif self.__fulloutput: if self.__confname: # --gen-full and --output-conf-and-list-dir and --configuration-name __arcconfpath = os.path.join(self.__fulloutput, '.'.join([self.__confname, 'conf'])) else: # --gen-full and --output-conf-and-list-dir __arcconfpath = os.path.join(self.__fulloutput, __arcwithext) else: # --gen-full only if self.__confname: __arc = os.path.dirname(__arcpath) __arcconfpath = os.path.join(__arc, '.'.join([self.__confname, 'conf'])) else: __arcconfpath = ''.join([__arcpath[:-3], 'conf']) # user-define name of the archive if self.__confname: __arcname = self.__confname else: __arcname = os.path.basename(__arcpath[:-4]) __confinfo = {'arcname': __arcname, 'arcpath': __arcpath, 'arcconfpath': __arcconfpath, 'arclistpath': __listconfinfo['arclistpath'], 'arctype': 'archive', 'sha512': __listhashsum} self.__ci = __confinfo self._generate_conf(__confinfo) backupchecker-1.9/backupchecker/generatelist/generatelist.py0000664000000000000000000000563313073704736024456 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Generate a list of files in a backup '''Generate a list of files in a backup''' import logging import os.path import sys from backupchecker.checkhashes import get_hash class GenerateList: '''The GenerateList class''' def _generate_list(self, __listconfinfo): '''Write the list of file information inside the archive in a file''' try: with open(__listconfinfo['arclistpath'], 'w') as __file: __file.writelines(__listconfinfo['listoffiles']) except (OSError, IOError) as __msg: print(__msg) sys.exit(1) except UnicodeEncodeError as __msg: print(__msg) print('The encoding of the archive and the one of this system differs.\nThe result will not be reliable. Aborting.') sys.exit(1) def _generate_conf(self, __confinfo, __isastream=False): '''Write the configuration file for the archive''' if __isastream: __confcontent = '[main]\nname={name}\ntype={type}\nfiles_list={listoffiles}\nsha512={sha512}\n'.format(name=__confinfo['arcname'],type=__confinfo['arctype'],listoffiles=__confinfo['arclistpath'], sha512=__confinfo['sha512']) else: __confcontent = '[main]\nname={name}\ntype={type}\npath={path}\nfiles_list={listoffiles}\nsha512={sha512}\n'.format(name=__confinfo['arcname'],type=__confinfo['arctype'],path=__confinfo['arcpath'],listoffiles=__confinfo['arclistpath'], sha512=__confinfo['sha512']) try: with open(__confinfo['arcconfpath'], 'w') as __file: __file.write(__confcontent) except (OSError, IOError) as __msg: print(__msg) sys.exit(1) def _normalize_path(self, __path): '''Remove last slash of a directory path if present''' if __path.endswith('/'): return __path[:-1] else: return __path def _get_list_hash(self, __listpath): '''Get the hash sum of the list of files''' try: with open(__listpath, 'rb') as __file: __listhash = get_hash(__file, 'sha512') except (OSError, IOError) as __msg: print(__msg) sys.exit(1) return __listhash backupchecker-1.9/backupchecker/main.py0000664000000000000000000000346413073704736020226 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # The application main '''The application main''' import sys from backupchecker.checkbackups.checkbackups import CheckBackups from backupchecker.cliparse import CliParse from backupchecker.configurations import Configurations from backupchecker.exceptionsparsing import ExceptionsParsing from backupchecker.listtype import ListType class Main(object): '''The main class''' def __init__(self): '''The constructor of the Main class.''' self.__main() def __main(self): '''The main for the Main class''' __options = CliParse().options # no list generation mode, check backups if not __options.genlist and not __options.genfull: __confs = Configurations(__options.confpath, __options.isastream) CheckBackups(__confs.configs, __options) else: # Analyze the type of the list to produce if __options.parsingexceptions: __exps = ExceptionsParsing(__options.parsingexceptions, __options.delimiter) ListType(__options, __exps.exceptions) else: ListType(__options) backupchecker-1.9/backupchecker/cliparse.py0000664000000000000000000002167313073704736021106 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Retrieve the command line options '''Retrieve the command line options''' import logging from argparse import ArgumentParser import os import sys from hashlib import algorithms_guaranteed from backupchecker.applogger import AppLogger class CliParse: '''Retrieve the command line options''' def __init__(self): '''The constructor for the CliParse class.''' self._options = () backupcheckerdescription = 'Fully automated backup checker' backupcheckerepilog = 'For more information: http://www.backupcheckerproject.org' __parser = ArgumentParser(prog='backupchecker', description=backupcheckerdescription, epilog=backupcheckerepilog) self.__define_options(__parser) def __define_options(self, __parser): '''Define the options''' # define mutually exclusive arguments __group = __parser.add_mutually_exclusive_group(required=True) __group.add_argument('-c', '--configpath', dest='confpath', action='store', default=os.getcwd(), help='the path to the configurations', metavar='DIR') __parser.add_argument('-C', '--output-conf-dir', dest='confoutput', action='store', default='', help='the directory to store the configuration file', metavar='DIR') __parser.add_argument('-d', '--delimiter', dest='delimiter', action='store', default='|', help='delimiter of the fields for the list of files', metavar='DELIMITER') __parser.add_argument('-E', '--exceptions-file', dest='parsingexceptions', action='store', default='', help='the file with exceptions to normal Brebis behaviour while generating configuration files', metavar='FILE') __group.add_argument('-g', '--gen-list', dest='genlist', action='store_true', help='generate a list of files inside a backup') __group.add_argument('-G', '--gen-full', dest='genfull', action='store_true', help='generate the configuration file and the list of files for the backup') __parser.add_argument('-H', '--hashes', dest='getallhashes', action='store_true', help='generate the hash sum of each encountered file in the backup') __parser.add_argument('--hashtype', dest='hashtype', action='store', default='', help='the type of the hash sum to use while generating configurations for the archive', metavar='HASHTYPE') __parser.add_argument('-l', '--log', dest='logfile', action='store', default=os.path.join(os.getcwd(), 'a.out'), help='the log file', metavar='FILE') __parser.add_argument('-L', '--output-list-dir', dest='listoutput', action='store', default='', help='the directory to store the list of files inside an archive or tree', metavar='DIR') __parser.add_argument('-O', '--output-list-and-conf-dir', dest='fulloutput', action='store', default='', help='the directory to store the configuration file and the list of files inside an archive or tree', metavar='DIR') __parser.add_argument('-n', '--configuration-name', dest='confname', action='store', help='the name of the configuration files') __parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.9', help='print the version of this program and exit') __parser.add_argument('archives', nargs='*', help='archives to check') __args = __parser.parse_args() self.__verify_options(__args) def __verify_options(self, __options): '''Verify the options given on the command line''' __options.isastream = False # check if the archives exist for __i, __path in enumerate(__options.archives): # the input is a stream if __i == 0 and __path == '-': __options.isastream = True __options.archives[__i] = os.path.abspath(os.getcwd()) break if not os.path.exists(__path): print('{} : no file or directory at this path. Exiting.'.format(__path)) sys.exit(1) # using absolute path in order to be consistent __path = os.path.abspath(__path) # if the path exists, check if it is a regular file, a link or # a directory otherwise exits if not os.path.isfile(__path) and not os.path.isdir(__path): print('{}: not a file or a directory. Exiting.'.format(__path)) sys.exit(1) else: __options.archives[__i] = __path # verify option compatibilites if __options.isastream and __options.getallhashes: print('Options are not compatible, not possible to compute the hash of files within an archive from a stream') sys.exit(1) # Check the logfile __options.logfile = __options.logfile.strip() __logdir = os.path.split(__options.logfile)[0] if __logdir and not os.path.exists(__logdir): print('The directory where to write the log file {} does not exist'.format(__logdir)) sys.exit(1) # Check the exceptions parsing file __parsingexceptions = os.path.split(__options.parsingexceptions)[0] if __parsingexceptions and not os.path.exists(__parsingexceptions): print('The file with exceptions to comply while generating configurations file {} does not exist'.format(__parsingexceptions)) sys.exit(1) # Check the configuration output directory __confoutput = __options.confoutput if __confoutput and not os.path.exists(__confoutput): print('The directory where to write the configuration file {} does not exist'.format(__confoutput)) sys.exit(1) # Check the directory where the list of files is written __listoutput= __options.listoutput if __listoutput and not os.path.exists(__listoutput): print('The directory where to write the list of files inside the archive {} does not exist'.format(__listoutput)) sys.exit(1) # Check the directory where the list of files and the configuration are written __fulloutput= __options.fulloutput if __fulloutput and not os.path.exists(__fulloutput): print('The directory where to write the list of files inside the archive and the configuration file {} does not exist'.format(__fulloutput)) sys.exit(1) # using absolute path in order to be consistent __options.logfile = os.path.abspath(__options.logfile) # Configure the logger AppLogger(__options.logfile) # Verify if --gen-list option is not invoked before calling configuration path control if not __options.genlist: # Check the configuration directory or file if not os.path.exists(__options.confpath): print('The configuration directory or file does not exist: {}'.format(__options.confpath)) sys.exit(1) __options.confpath = os.path.abspath(__options.confpath) # Check that the hash type for the option --hashtype is available if __options.hashtype and (__options.hashtype not in algorithms_guaranteed): print('The hash type {} you specified is not available'.format(__options.hashtype)) sys.exit(1) # strip blank space leading some fields from the command line using python subprocess if __options.confname: __options.confname = __options.confname.strip() if __options.listoutput: __options.listoutput = __options.listoutput.strip() if __options.confoutput: __options.confoutput = __options.confoutput.strip() if __options.fulloutput: __options.fulloutput = __options.fulloutput.strip() self.__options = __options @property def options(self): '''Return the command line options''' return self.__options backupchecker-1.9/backupchecker/exceptionsparsing.py0000664000000000000000000000532613073704736023046 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Extract from the parsing exceptions file the exceptions to comply with '''Extract from the parsing exceptions file the exceptions to comply with''' import configparser from hashlib import algorithms_guaranteed import os.path import sys class ExceptionsParsing: '''The ExceptionsParsing class''' def __init__(self, __filepath, __delimiter): '''The constructor for the ExceptionsParsing class ''' self.__parsingexceptions = {} self.__delimiter = __delimiter self.__main(__filepath) def __main(self, __filepath): '''Main for ExceptionsFile class''' try: with open(__filepath, 'r') as __exceptfile: self.__retrieve_data(__exceptfile) except (configparser.Error, IOError, OSError) as __err: print(__err) sys.exit(1) def __retrieve_data(self, __file): '''Retrieve data from the expected files''' # Using default delimiter __config = configparser.ConfigParser(delimiters=(self.__delimiter,)) __config.optionxform = str __config.read_file(__file) if __config.has_section('files'): __files = __config.items('files') for __fileitems in __files: if __fileitems[0].endswith('/'): self.__parsingexceptions[__fileitems[0][:-1]] = '' __key = __fileitems[0][:-1] else: self.__parsingexceptions[__fileitems[0]] = '' __key = __fileitems[0] if len(__fileitems) == 2: for __item in __fileitems[1].split(' '): # Test if a hash is provided for this file for __hash in algorithms_guaranteed: if __item.startswith('{}'.format(__hash)): self.__parsingexceptions[__key] = __item @property def exceptions(self): '''Return the parsing exceptions''' return self.__parsingexceptions backupchecker-1.9/backupchecker/applogger.py0000664000000000000000000000237713073704736021264 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Application logger '''Application logger''' import logging import sys class AppLogger(object): '''The application logger''' def __init__(self, __logfile): '''The constructor for the AppLogger class. Keyword arguments: __logfile -- the path of the log ''' try: logging.basicConfig(filename=__logfile, level=logging.WARNING, filemode='w') except (IOError,OSError) as __msg: print('Brebis output file could not be created: {}'.format(__msg)) sys.exit(1) backupchecker-1.9/backupchecker/checkhashes.py0000664000000000000000000000171513073704736021550 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Get the hash of a file '''Get the hash of a file''' import hashlib def get_hash(__arcfile, __hashtype): '''return the hash of a file.''' __res = getattr(hashlib, __hashtype)(__arcfile.read()).hexdigest() __arcfile.close() return __res backupchecker-1.9/backupchecker/expectedvalues.py0000664000000000000000000002524313073704736022322 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Extract information about the archive (if it is one) and expected saved files '''Extract information about the archive (if it is one) and expected saved files''' import logging import os import sys import configparser from configparser import ConfigParser from hashlib import algorithms_guaranteed from backupchecker.checkfilelist import CheckFileList class ExpectedValues(object): '''Extract information about the archive (if it is one) and expected saved files. ''' def __init__(self, __bckconf, __options): '''The constructor of the ExpectedValues class. ''' self.__bckfiles= [] self.__arcdata = {} __path = __bckconf['files_list'] # Define delimiter value if not __bckconf['delimiter']: __delimiter = __options.delimiter else: __delimiter = __bckconf['delimiter'] # test if the expected value of the hash of the list of file is correct CheckFileList(__bckconf) # launch the main of the class self.__main(__path, __delimiter) def __main(self, __path, __delimiter): '''Main of the ExpectedValues class''' try: with open(__path, 'r') as __file: self.__retrieve_data(__file, __path, __delimiter) except (configparser.Error, IOError, OSError) as __err: print(__err) sys.exit(1) def __retrieve_data(self, __file, __path, __delimiter): '''Retrieve data from the expected files''' # Using default delimiter __config = ConfigParser(delimiters=(__delimiter)) __config.optionxform = str __config.read_file(__file) ######################### # Test the archive itself ######################### if __config.has_section('archive'): __archive = __config.items('archive') # Testing the mtime of the archive if 'mtime' in __config['archive']: self.__arcdata['mtime'] = float(__config['archive']['mtime']) # Testing the size of the archive if 'size' in __config['archive']: ### Test if the equality is required if __config['archive']['size'].startswith('='): self.__arcdata['equals'] = self.__convert_arg(__config['archive']['size']) ### Test if bigger than is required elif __config['archive']['size'].startswith('>'): self.__arcdata['biggerthan'] = self.__convert_arg(__config['archive']['size']) ### Test if smaller than is required elif __config['archive']['size'].startswith('<'): self.__arcdata['smallerthan'] = self.__convert_arg(__config['archive']['size']) # Test the mode of the archive if 'mode' in __config['archive']: if len(__config['archive']['mode']) < 3 or len(__config['archive']['mode']) > 4: logging.warning('{}: Wrong format for the mode.'.format(__path)) else: self.__arcdata['mode'] = __config['archive']['mode'] try: # Testing the uid of the archive if 'uid' in __config['archive']: self.__arcdata['uid'] = int(__config['archive']['uid']) # Testing the gid of the archive if 'gid' in __config['archive']: self.__arcdata['gid'] = int(__config['archive']['gid']) # Testing the owner of the archive if 'uname' in __config['archive']: self.__arcdata['uname'] = __config['archive']['uname'] # Testing the group owner of the archive if 'gname' in __config['archive']: self.__arcdata['gname'] = __config['archive']['gname'] except ValueError as __msg: logging.warning(__msg) # Testing the hash of the archive for __hash in algorithms_guaranteed: if __hash in __config['archive']: self.__arcdata['hash'] = {'hashtype':__hash, 'hashvalue':__config['archive'][__hash]} ###################### # Test expected files ###################### if __config.has_section('files'): __files = __config.items('files') for __fileitems in __files: __data = {} __data['path'] = __fileitems[0] if __data['path'].endswith('/'): __data['path'] = __data['path'][:-1] if len(__fileitems) == 2: for __item in __fileitems[1].split(' '): try: # Testing the items for an expected file if __item == 'unexpected': __data['unexpected'] = True # The uid of the expected file elif __item.startswith('uid{}'.format(__delimiter)): __data['uid'] = int(__item.split(__delimiter)[-1]) # The gid of the expected file elif __item.startswith('gid{}'.format(__delimiter)): __data['gid'] = int(__item.split(__delimiter)[-1]) # The owner name of the expected file elif __item.startswith('owner{}'.format(__delimiter)): __data['uname'] = __item.split(__delimiter)[-1] # The gname of the expected file elif __item.startswith('group{}'.format(__delimiter)): __data['gname'] = __item.split(__delimiter)[-1] # The mode of the expected file elif __item.startswith('mode{}'.format(__delimiter)): __mode =__item.split(__delimiter)[-1] if len(__mode) < 3 or len(__mode) > 4: logging.warning('{}: Wrong format for the mode.'.format(__data['path'])) else: __data['mode'] = __mode # Testing the type of the file elif __item.startswith('type{}'.format(__delimiter)): __type =__item.split(__delimiter)[-1] ### f for file, c for character, d for directory ### s for symbolink link, b for block, o for fifo, ### k for socket, l for hard link __types = ('f','c','d','s','b','o','k', 'l') if __type not in __types: logging.warning('{}: Unknown type {} for file parameter'.format(__data['path'], __type)) else: __data['type'] = __type # Testing the mtime of the file elif __item.startswith('mtime{}'.format(__delimiter)): try: __data['mtime'] = float(__item.split(__delimiter)[-1]) except ValueError as __msg: logging.warning(__msg) __data['mtime'] = 0.0 # Testing the size of the file ### Test if the equality is required elif __item.startswith('='): __data['equals'] = self.__convert_arg(__item) ### Test if bigger than is required elif __item.startswith('>'): __data['biggerthan'] = self.__convert_arg(__item) ### Test if smaller than is required elif __item.startswith('<'): __data['smallerthan'] = self.__convert_arg(__item) # Testing if there is a target for this file elif __item.startswith('target{}'.format(__delimiter)): if __data['type'] and (__data['type'] == 'l' or __data['type'] == 's'): __data['target'] = __item.split(__delimiter)[-1] else: __errmsg = 'The list of your file contains a target field although the file is not a symlink or a hard link' print(__errmsg) sys.exit(1) # Test if a hash is provided for this file for __hash in algorithms_guaranteed: if __item.startswith('{}{}'.format(__hash, __delimiter)): __hashtype, __hashvalue = __item.split(__delimiter) __data['hash'] = {'hashtype':__hashtype, 'hashvalue':__hashvalue} except ValueError as __msg: logging.warning(__msg) self.__bckfiles.append(__data) def __convert_arg(self, __arg): '''Convert the given file length to bytes''' __res = 0 __arg = ''.join([__arg[:-1], __arg[-1].lower()]) try: for __value, __power in [('k', 1),('m', 2),('g', 3),('p', 4), ('e', 5),('z', 6),('y', 7)]: if __arg.endswith(__value): __res = int(__arg[1:-1]) * 1024**__power if __res == 0: __res = int(__arg[1:]) except ValueError as __msg: logging.warning(__msg) __res = 0 finally: return __res @property def data(self): '''Return the paths of the expected files in the archive''' return self.__bckfiles, self.__arcdata backupchecker-1.9/backupchecker/__init__.py0000664000000000000000000000000013073704736021020 0ustar rootroot00000000000000backupchecker-1.9/backupchecker/listtype.py0000664000000000000000000001055013073704736021151 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Analyze the type of the backup to produce the list '''Analyze the type of the backup to produce the list''' import logging import os.path from backupchecker.generatelist.generatelistforbzip2 import GenerateListForBzip2 from backupchecker.generatelist.generatelistforlzma import GenerateListForLzma from backupchecker.generatelist.generatelistforgzip import GenerateListForGzip from backupchecker.generatelist.generatelistfortar import GenerateListForTar from backupchecker.generatelist.generatelistfortree import GenerateListForTree from backupchecker.generatelist.generatelistforzip import GenerateListForZip class ListType(object): '''The ListType class''' def __init__(self, __options, __parsingexceptions={}): '''The constructor for the ListType class. ''' self.__parsingexceptions = __parsingexceptions self.__main(__options) def __main(self, __options): '''Main for ListType class''' __arcpaths = __options.archives __delimiter = __options.delimiter __genfull = __options.genfull __genlist = __options.genlist __fulloutput = __options.fulloutput __confoutput = __options.confoutput __listoutput = __options.listoutput __getallhashes = __options.getallhashes __hashtype = __options.hashtype __isastream = __options.isastream __confname = __options.confname for __arcpath in __arcpaths: # create a tuple with the different parameters # for the generation of the archives's files __genparams = {'arcpath': __arcpath, 'delimiter': __delimiter, 'genfull': __genfull, 'genlist': __genlist, 'confoutput': __confoutput, 'listoutput': __listoutput, 'fulloutput': __fulloutput, 'getallhashes': __getallhashes, 'hashtype': __hashtype, 'parsingexceptions': self.__parsingexceptions, 'isastream': __isastream, 'confname': __confname} # generate a list of files for a tree if not __isastream and os.path.isdir(__arcpath): self.__bck = GenerateListForTree(__genparams) # generate a list of files for a tar.gz/bz2 archive elif __arcpath.lower().endswith('.tar') or __isastream or\ __arcpath.lower().endswith('.tar.gz') or\ __arcpath.lower().endswith('.tar.bz2') or\ __arcpath.lower().endswith('.tar.xz') or\ __arcpath.lower().endswith('.tgz') or\ __arcpath.lower().endswith('.tbz') or\ __arcpath.lower().endswith('.tbz2'): self.__bck = GenerateListForTar(__genparams) # generate a list of files for a gzip archive elif __arcpath.lower().endswith('.gz'): self.__bck = GenerateListForGzip(__genparams) # generate a list of files for a bzip2 archive elif __arcpath.lower().endswith('.bz2'): self.__bck = GenerateListForBzip2(__genparams) # generate a list of files for a lzma archive elif __arcpath.lower().endswith('.xz'): self.__bck = GenerateListForLzma(__genparams) # generate a list of files for a zip archive elif __arcpath.lower().endswith('.zip'): self.__bck = GenerateListForZip(__genparams) # generate a list of files for a apk archive elif __arcpath.lower().endswith('.apk'): self.__bck = GenerateListForZip(__genparams) # A MESSAGE RESUMING OPERATION FOR GENERATING THE LIST OF FILES IS MISSING HERE backupchecker-1.9/backupchecker/archiveinfomsg.py0000664000000000000000000002565613073704736022315 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Generate the information message about an archive '''Generate the information message about an archive''' import logging import os.path class ArchiveInfoMsg(object): '''Generate the information message about an archive''' def __init__(self, __bck, __cfgvalues, __isastream, __confname): '''The constructor for the ArchiveInfoMsg class. __bck -- the retrieved value for the archive __cfgvalues -- the expected values for the archive __isastream -- is the archive coming from a stream or not ''' self.__main(__bck, __cfgvalues, __isastream, __confname) def __main(self, __bck, __cfgvalues, __isastream, __confname): '''The main for the ArchiveInfoMsg class''' if __cfgvalues['type'] == 'archive' or __cfgvalues['type'] == 'tree': if __isastream: if __confname: __cfgvalues['path'] = __confname else: __cfgvalues['path'] = __cfgvalues['name'] self.__missing_files(__bck.missing_files, __cfgvalues['path']) self.__unexpected_files(__bck.unexpected_files, __cfgvalues['path']) self.__classify_differences(__bck, __cfgvalues['path']) self.__uid_gid_mismatches(__bck, __cfgvalues['path']) self.__uname_gname_mismatches(__bck, __cfgvalues['path']) self.__mode_mismatches(__bck, __cfgvalues['path']) self.__type_mismatches(__bck, __cfgvalues['path']) self.__mtime_mismatches(__bck, __cfgvalues['path']) self.__hash_mismatches(__bck, __cfgvalues['path']) self.__target_mismatches(__bck, __cfgvalues['path']) def __missing_files(self, __missing, __archivepath): '''Warn about the missing files in an archive''' if __missing: __msg= 'file' if len(__missing) > 1: __msg = 'files' logging.warning('{} {} missing in {}: '.format( len(__missing), __msg, __archivepath)) for __path in __missing: logging.warning('{}'.format(__path)) def __unexpected_files(self, __unexpected, __archivepath): '''Warn about the unexpected files in the archive''' if __unexpected: __msg= 'file' if len(__unexpected) > 1: __msg = 'files' logging.warning('{} unexpected {} checking {}: '.format( len(__unexpected), __msg, __archivepath)) for __path in __unexpected: logging.warning('{}'.format(__path)) def __classify_differences(self, __bck, __archivepath): '''Report differences between expected files and files in the archive ''' if __bck.missing_equality: __topic = '{} {} with unexpected size while checking {}: ' self.__log_differences( __bck.missing_equality, __archivepath, __topic) if __bck.missing_smaller_than: __topic = '{} {} bigger than expected while checking {}: ' self.__log_differences( __bck.missing_smaller_than, __archivepath, __topic, 'smaller than') if __bck.missing_bigger_than: __topic = '{} {} smaller than expected while checking {}: ' self.__log_differences( __bck.missing_bigger_than, __archivepath, __topic, 'bigger than') def __log_differences(self, __files, __archivepath, __topic, __qty=''): '''Log the differences between the expected files and the files in the archive ''' __fileword = 'file' if len(__files) > 1: __fileword = 'files' logging.warning(__topic.format(len(__files), __fileword, __archivepath)) if __qty: for __file in __files: logging.warning('{} size is {}. Should have been {} {}.'.format( __file['path'], __file['size'], __qty, __file['expected'])) else: for __file in __files: logging.warning('{} size is {}. Should have been {}.'.format( __file['path'], __file['size'], __file['expected'])) def __uid_gid_mismatches(self, __bck, __archivepath): '''Log the uids and gids mismatches''' # Uid if __bck.mismatched_uids: __errnb = len(__bck.mismatched_uids) __fileword = 'file' __uidword = 'uid' if __errnb > 1: __fileword = 'files' __uidword = 'uids' logging.warning('{} {} with unexpected {} while checking {}:'.format(__errnb, __fileword, __uidword, __archivepath)) for __file in __bck.mismatched_uids: logging.warning('{} uid is {!s}. Should have been {!s}.'.format(__file['path'], __file['uid'], __file['expecteduid'])) # Gid if __bck.mismatched_gids: __errnb = len(__bck.mismatched_gids) __fileword = 'file' __gidword = 'gid' if __errnb > 1: __fileword = 'files' __gidword = 'gids' logging.warning('{} {} with unexpected {} while checking {}:'.format(__errnb, __fileword, __gidword, __archivepath)) for __file in __bck.mismatched_gids: logging.warning('{} gid is {!s}. Should have been {!s}.'.format(__file['path'], __file['gid'], __file['expectedgid'])) def __uname_gname_mismatches(self, __bck, __archivepath): '''Log the unames and gnames mismatches''' # uname if __bck.mismatched_unames: __errnb = len(__bck.mismatched_unames) __fileword = 'file' __unameword = 'owner' if __errnb > 1: __fileword = 'files' __unameword = 'owners' logging.warning('{} {} with unexpected {} while checking {}:'.format(__errnb, __fileword, __unameword, __archivepath)) for __file in __bck.mismatched_unames: logging.warning('{} owner is {!s}. Should have been {!s}.'.format(__file['path'], __file['uname'], __file['expecteduname'])) # gname if __bck.mismatched_gnames: __errnb = len(__bck.mismatched_gnames) __fileword = 'file' __gnameword = 'group owner' if __errnb > 1: __fileword = 'files' __gnameword = 'group owners' logging.warning('{} {} with unexpected {} while checking {}:'.format(__errnb, __fileword, __gnameword, __archivepath)) for __file in __bck.mismatched_gnames: logging.warning('{} group owner is {!s}. Should have been {!s}.'.format(__file['path'], __file['gname'], __file['expectedgname'])) def __mode_mismatches(self, __bck, __archivepath): '''Log the file mode mismatches''' if __bck.mismatched_modes: __errnb = len(__bck.mismatched_modes) __fileword = 'file' __modeword = 'mode' if __errnb > 1: __fileword = 'files' __modeword = 'modes' logging.warning('{} {} with unexpected {} while checking {}:'.format( __errnb, __fileword, __modeword, __archivepath, )) for __file in __bck.mismatched_modes: logging.warning('{} mode is {}. Should have been {}.'.format(__file['path'], __file['mode'], __file['expectedmode'])) def __target_mismatches(self, __bck, __archivepath): '''Log the targe mismatches''' if __bck.mismatched_targets: __errnb = len(__bck.mismatched_targets) __fileword = 'link' __modeword = 'target' if __errnb > 1: __fileword = 'links' __modeword = 'targets' logging.warning('{} {} with unexpected {} while checking {}:'.format( __errnb, __fileword, __modeword, __archivepath, )) for __file in __bck.mismatched_targets: logging.warning('{} target is {}. Should have been {}.'.format(__file['path'], __file['target'], __file['expectedtarget'])) def __type_mismatches(self, __bck, __archivepath): '''Log the file type mismatches''' __types = {'f': 'regular file', 'c': 'character', 'd': 'directory', 's': 'symbolic link', 'l': 'hard link', 'b': 'block', 'o': 'fifo', 'k': 'socket'} if __bck.mismatched_types: __errnb = len(__bck.mismatched_types) __fileword = 'file' __typeword = 'type' if __errnb > 1: __fileword = 'files' __typeword = 'types' logging.warning('{} contains {} {} with unexpected {}:'.format(__archivepath, __errnb, __fileword, __typeword)) for __file in __bck.mismatched_types: logging.warning('{} is a {}. Should have been a {}.'.format(__file['path'], __types[__file['type']], __types[__file['expectedtype']])) def __mtime_mismatches(self, __bck, __archivepath): '''Log the file mtime mismatches''' if __bck.mismatched_mtimes: __errnb = len(__bck.mismatched_mtimes) __fileword = 'file' __mtimeword = 'mtime' if __errnb > 1: __fileword = 'files' __mtimeword = 'types' logging.warning('{} contains {} {} with unexpected {}:'.format(__archivepath, __errnb, __fileword, __mtimeword)) for __file in __bck.mismatched_mtimes: logging.warning('{} mtime is {}. Should have been {}.'.format(__file['path'], __file['mtime'], __file['expectedmtime'])) def __hash_mismatches(self, __bck, __archivepath): '''Log the file hash mismatches''' if __bck.mismatched_hashes: __errnb = len(__bck.mismatched_hashes) __fileword = 'file' __hashword = 'hash' if __errnb > 1: __fileword = 'files' __hashword = 'hashes' logging.warning('{} {} with unexpected {} while checking {}:'.format(__errnb, __fileword, __hashword, __archivepath)) for __file in __bck.mismatched_hashes: logging.warning('{} hash is {}. Should have been {}.'.format(__file['path'], __file['hash'], __file['expectedhash'])) backupchecker-1.9/backupchecker/checkbackups/0000755000000000000000000000000013073706716021345 5ustar rootroot00000000000000backupchecker-1.9/backupchecker/checkbackups/checkbzip2.py0000664000000000000000000000570513073704736023754 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Check a bzip2 archive '''Check a bzip2 archive''' import sys import logging import os.path import bz2 from backupchecker.checkbackups.checkarchive import CheckArchive from backupchecker.expectedvalues import ExpectedValues from backupchecker.identifylimitations import IdentifyLimitations class CheckBzip2(CheckArchive): '''Check a bzip2 archive''' def _main(self, _cfgvalues, _options): '''Main for CheckBzip2''' _data = [] _data, __arcdata = ExpectedValues(_cfgvalues, _options).data self.__arcpath = _cfgvalues['path'] ######################### # Test the archive itself ######################### self._archive_checks(__arcdata, _cfgvalues['path']) ############################### # Test the file in the archive ############################### if _data: # Identify limitations given the features asked by the user # retrieve every keys of every files in _data configkeys = set() for i in _data: configkeys = configkeys | set(i.keys()) IdentifyLimitations(_cfgvalues['path'], 'bz2', configkeys) ############################################## # Looking for data corruption # Have to read the whole archive to check CRC ############################################## try: with bz2.BZ2File(_cfgvalues['path'], 'r') as __bz2: __bz2.read() except IOError as __msg: __warn = '. You should investigate for a data corruption.' logging.warning('{}: {}{}'.format(_cfgvalues['path'], str(__msg), __warn)) else: __name = os.path.split(_cfgvalues['path'])[-1].split('.')[0] # Bzip2 does not allow to know the compressed file size, default to 0 __arcinfo = {'path': __name, 'type': 'f', 'size': 0} _data = self._check_path(__arcinfo, _data) self._missing_files = [_file['path'] for _file in _data] def _extract_stored_file(self, __nouse): '''Extract a file from the archive and return a file object''' __fileobj = bz2.BZ2File(self.__arcpath, 'r') return __fileobj backupchecker-1.9/backupchecker/checkbackups/checklzma.py0000664000000000000000000000573113073704736023670 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Check a lzma archive '''Check a lzma archive''' import sys import logging import os.path import lzma from backupchecker.checkbackups.checkarchive import CheckArchive from backupchecker.expectedvalues import ExpectedValues from backupchecker.identifylimitations import IdentifyLimitations class CheckLzma(CheckArchive): '''Check a lzma archive''' def _main(self, _cfgvalues, _options): '''Main for CheckLzma''' _data = [] _data, __arcdata = ExpectedValues(_cfgvalues, _options).data self.__arcpath = _cfgvalues['path'] ######################### # Test the archive itself ######################### self._archive_checks(__arcdata, _cfgvalues['path']) ############################### # Test the file in the archive ############################### if _data: # Identify limitations given the features asked by the user # retrieve every keys of every files in _data configkeys = set() for i in _data: configkeys = configkeys | set(i.keys()) IdentifyLimitations(_cfgvalues['path'], 'lzma', configkeys) ############################################## # Looking for data corruption # Have to read the whole archive to check CRC ############################################## try: with lzma.LZMAFile(_cfgvalues['path'], 'r') as __lzma: __lzma.read() except (lzma.LZMAError, IOError) as __msg: __warn = '. You should investigate for a data corruption.' logging.warning('{}: {}{}'.format(_cfgvalues['path'], str(__msg), __warn)) else: __name = os.path.split(_cfgvalues['path'])[-1].split('.')[0] # lzma does not allow to know the compressed file size, default to 0 __arcinfo = {'path': __name, 'type': 'f', 'size': 0} _data = self._check_path(__arcinfo, _data) self._missing_files = [_file['path'] for _file in _data] def _extract_stored_file(self, __nouse): '''Extract a file from the archive and return a file object''' __fileobj = lzma.LZMAFile(self.__arcpath, 'r') return __fileobj backupchecker-1.9/backupchecker/checkbackups/__init__.py0000664000000000000000000000000013073704736023446 0ustar rootroot00000000000000backupchecker-1.9/backupchecker/checkbackups/checktar.py0000664000000000000000000000663213073704736023514 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Check a tar archive '''Check a tar archive''' import logging import sys import tarfile from backupchecker.expectedvalues import ExpectedValues from backupchecker.checkbackups.checkarchive import CheckArchive class CheckTar(CheckArchive): '''Check a tar archive''' def _main(self, _cfgvalues, _options): '''Main for CheckTar''' _data = [] _data, __arcdata = ExpectedValues(_cfgvalues, _options).data if _options.isastream: __isastream = True else: __isastream = False ######################### # Test the archive itself ######################### if not __isastream: self._archive_checks(__arcdata, _cfgvalues['path']) ############################### # Test the files in the archive ############################### if _data: try: if __isastream: self._tar = tarfile.open(mode='r|*',fileobj=sys.stdin.buffer) else: self._tar = tarfile.open(_cfgvalues['path'], 'r') for _tarinfo in self._tar: _tarinfo.name = self._normalize_path(_tarinfo.name) __type = self.__translate_type(_tarinfo.type) __arcinfo = {'path':_tarinfo.name, 'size':_tarinfo.size, 'uid':_tarinfo.uid, 'gid':_tarinfo.gid, 'uname':_tarinfo.uname, 'gname':_tarinfo.gname, 'mode':_tarinfo.mode, 'type': __type, 'target':_tarinfo.linkname, 'mtime':_tarinfo.mtime} _data = self._check_path(__arcinfo, _data) self._missing_files = [_file['path'] for _file in _data] except (tarfile.TarError, EOFError) as _msg: __warn = '. You should investigate for a data corruption.' logging.warning('{}: {}{}'.format(_cfgvalues['path'], str(_msg), __warn)) def __translate_type(self, __arctype): '''Translate the type of the file inside the tar by a generic name ''' __types = {tarfile.REGTYPE: 'f', tarfile.AREGTYPE: 'a', tarfile.CHRTYPE: 'c', tarfile.DIRTYPE: 'd', tarfile.LNKTYPE: 'l', tarfile.SYMTYPE: 's', tarfile.CONTTYPE: 'n', tarfile.BLKTYPE: 'b', tarfile.GNUTYPE_SPARSE: 'g', tarfile.FIFOTYPE: 'o'} return __types[__arctype] def _extract_stored_file(self, __arcfilepath): '''Extract a file from the archive and return a file object''' __file = self._tar.extractfile(__arcfilepath) return __file backupchecker-1.9/backupchecker/checkbackups/checktree.py0000664000000000000000000001106213073704736023656 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Check a file tree '''Check a file tree''' import grp import os import pwd import stat from backupchecker.expectedvalues import ExpectedValues from backupchecker.checkbackups.checkarchive import CheckArchive class CheckTree(CheckArchive): '''Check a file tree''' def _main(self, _cfgvalues, _options): '''Main for CheckTree''' _data = [] self.__treepath = _cfgvalues['path'] _data, __arcdata = ExpectedValues(_cfgvalues, _options).data # Save the tree root to determine the relative path in the file tree self.__treepath = self.__treepath for __dirpath, __dirnames, __filenames, in os.walk(_cfgvalues['path']): __dirinfo = os.lstat(__dirpath) __dirmode = stat.S_IMODE(__dirinfo.st_mode) # Translate file type in backupchecker intern file type __type = self.__translate_type(__dirinfo.st_mode) # Extract file data __arcinfo = {'path': os.path.relpath(__dirpath, self.__treepath), 'size': __dirinfo.st_size, 'uid': __dirinfo.st_uid, 'gid': __dirinfo.st_gid, 'mode': __dirmode, 'uname': pwd.getpwuid(__dirinfo.st_uid).pw_name, 'gname': grp.getgrgid(__dirinfo.st_gid).gr_name, 'type': __type} _data = self._check_path(__arcinfo, _data) for __filename in __filenames: __filepath = os.path.join(__dirpath, __filename) __filepath = self._normalize_path(__filepath) self.__fileinfo = os.lstat(__filepath) __filemode = stat.S_IMODE(self.__fileinfo.st_mode) __type = self.__translate_type(self.__fileinfo.st_mode) if __type == 's': __arcinfo = {'path': os.path.relpath(__filepath, self.__treepath), 'size': self.__fileinfo.st_size, 'uid': self.__fileinfo.st_uid, 'gid': self.__fileinfo.st_gid, 'mode': __filemode, 'uname': pwd.getpwuid(self.__fileinfo.st_uid).pw_name, 'gname': grp.getgrgid(self.__fileinfo.st_gid).gr_name, 'type': __type, 'target': os.readlink(__filepath)} else: __arcinfo = {'path': os.path.relpath(__filepath, self.__treepath), 'size': self.__fileinfo.st_size, 'uid': self.__fileinfo.st_uid, 'gid': self.__fileinfo.st_gid, 'mode': __filemode, 'uname': pwd.getpwuid(self.__fileinfo.st_uid).pw_name, 'gname': grp.getgrgid(self.__fileinfo.st_gid).gr_name, 'type': __type} _data = self._check_path(__arcinfo, _data) self._missing_files = [_file['path'] for _file in _data] def __translate_type(self, __mode): '''Translate the type of the file to a generic name''' if stat.S_ISREG(__mode): if self.__fileinfo[stat.ST_NLINK] > 1: return 'l' else: return 'f' elif stat.S_ISDIR(__mode): return 'd' elif stat.S_ISCHR(__mode): return 'c' elif stat.S_ISLNK(__mode): return 's' elif stat.S_BLK(__mode): return 'b' elif stat.S_ISSOCK(__mode): return 'k' elif stat.S_ISFIFO(__mode): return 'o' def _extract_stored_file(self, __arcfilepath): '''extract a file from the tree and return a file object''' if os.path.isabs(__arcfilepath): __file = open(__arcfilepath, 'rb') else: __fullpath = os.path.normpath(os.path.join(self.__treepath, __arcfilepath)) __file = open(__fullpath, 'rb') return __file backupchecker-1.9/backupchecker/checkbackups/checkgzip.py0000664000000000000000000001117013073704736023670 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Check a gzip archive '''Check a gzip archive''' import sys import logging import os.path import gzip from backupchecker.checkbackups.checkarchive import CheckArchive from backupchecker.expectedvalues import ExpectedValues from backupchecker.identifylimitations import IdentifyLimitations class CheckGzip(CheckArchive): '''Check a gzip archive''' def _main(self, _cfgvalues, _options): '''Main for CheckGzip''' _data = [] _data, __arcdata = ExpectedValues(_cfgvalues, _options).data self.__arcpath = _cfgvalues['path'] ######################### # Test the archive itself ######################### self._archive_checks(__arcdata, _cfgvalues['path']) ############################### # Test the file in the archive ############################### if _data: # Identify limitations given the features asked by the user # retrieve every keys of every files in _data configkeys = set() for i in _data: configkeys = configkeys | set(i.keys()) IdentifyLimitations(_cfgvalues['path'], 'gz', configkeys) ############################################## # Looking for data corruption # Have to read the whole archive to check CRC ############################################## try: with gzip.open(_cfgvalues['path'], 'rb') as __gzip: __gzip.read() except IOError as __msg: __warn = '. You should investigate for a data corruption.' logging.warning('{}: {}{}'.format(_cfgvalues['path'], str(__msg), __warn)) else: ######################################## # No corruption, extracting information ######################################## with open(_cfgvalues['path'], 'rb') as __gzip: __filesize = self.__extract_size(__gzip) __name = self.__extract_initial_filename(__gzip, os.path.split(_cfgvalues['path'])[-1].rstrip('.gz')) __arcinfo = {'path': __name, 'size': __filesize, 'type': 'f'} _data = self._check_path(__arcinfo, _data) self._missing_files = [_file['path'] for _file in _data] def __extract_size(self, __binary): '''Extract the size of the uncompressed file inside the archive - 4 last bytes of the archive ''' __binary.seek(-4, 2) return int.from_bytes(__binary.read(), 'little') def __extract_initial_filename(self, __binary, __arcname): '''Extract initial filename of the uncompressed file''' # We move the cursor on the 4th byte __binary.seek(3,0) # Read a byte __flag = __binary.read(1) # Store flag byte __intflag = int.from_bytes(__flag,'little') # If the extra field flag is on, extract the size of its data field __extralen = 0 if __intflag & 4 != 0: __binary.seek(9,0) __extralenbyte = __binary.read(2) __extralen = int.from_byte(__extralenbyte,'little') + 2 # If the flag "name" is on, skip to it and read the associated content __binaryname = b'' if __intflag & 8 != 0: __binary.seek(10 + __extralen) # until zero byte is found, read the initial filename in bytes while True: __newbyte = __binary.read(1) if __newbyte != b'\x00': __binaryname += __newbyte else: break return __binaryname.decode('latin1') else: return __arcname def _extract_stored_file(self, __arcfilepath): '''Extract a file from the archive and return a file object''' __fileobj = gzip.open(self.__arcpath, 'rb') return __fileobj backupchecker-1.9/backupchecker/checkbackups/checkbackups.py0000664000000000000000000000730313073704736024352 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Check the given backups '''Check the given backups''' import logging import sys from tarfile import is_tarfile from zipfile import is_zipfile from backupchecker.archiveinfomsg import ArchiveInfoMsg from backupchecker.checkbackups.checktar import CheckTar from backupchecker.checkbackups.checkgzip import CheckGzip from backupchecker.checkbackups.checkbzip2 import CheckBzip2 from backupchecker.checkbackups.checklzma import CheckLzma from backupchecker.checkbackups.checkzip import CheckZip from backupchecker.checkbackups.checktree import CheckTree class CheckBackups(object): '''The backup checker class''' def __init__(self, __confs, __options): '''The constructor for the Checkbackups class. __confs -- the different configurations of the backups __options -- global options from the command line ''' self.__main(__confs, __options) def __main(self, __confs, __options): '''Main for CheckBackups''' __cfgsets = __confs.values() for __cfgvalues in __cfgsets: # check a file tree if __cfgvalues['type'] == 'tree': __bck = CheckTree(__cfgvalues, __options) # check a tar file, by name elif not __options.isastream and __cfgvalues['type'] == 'archive' and (__cfgvalues['path'].lower().endswith('.tar') \ or __cfgvalues['path'].lower().endswith('.tar.gz') \ or __cfgvalues['path'].lower().endswith('.tar.bz2') \ or __cfgvalues['path'].lower().endswith('.tar.xz') \ or __cfgvalues['path'].lower().endswith('.tgz') \ or __cfgvalues['path'].lower().endswith('.tbz') \ or __cfgvalues['path'].lower().endswith('.tbz2')): __bck = CheckTar(__cfgvalues, __options) elif __options.isastream: __bck = CheckTar(__cfgvalues, __options) # check a gzip file, by name elif __cfgvalues['type'] == 'archive' and __cfgvalues['path'].lower().endswith('.gz'): __bck = CheckGzip(__cfgvalues, __options) # check a bzip2 file, by name elif __cfgvalues['type'] == 'archive' and __cfgvalues['path'].lower().endswith('.bz2'): __bck = CheckBzip2(__cfgvalues, __options) # check a xz file, by name elif __cfgvalues['type'] == 'archive' and __cfgvalues['path'].lower().endswith('.xz'): __bck = CheckLzma(__cfgvalues, __options) # check a zip file, by name elif __cfgvalues['type'] == 'archive' and __cfgvalues['path'].lower().endswith('.zip'): __bck = CheckZip(__cfgvalues, __options) elif __cfgvalues['type'] == 'archive' and __cfgvalues['path'].lower().endswith('.apk'): __bck = CheckZip(__cfgvalues, __options) else: __errmsg = 'The type of the archive is not supported.' sys.exit(1) ArchiveInfoMsg(__bck, __cfgvalues, __options.isastream, __options.confname) backupchecker-1.9/backupchecker/checkbackups/checkzip.py0000664000000000000000000001101113073704736023513 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Check a zip archive '''Check a zip archive''' import datetime import logging import stat import sys import zipfile from backupchecker.checkbackups.checkarchive import CheckArchive from backupchecker.expectedvalues import ExpectedValues from backupchecker.identifylimitations import IdentifyLimitations class CheckZip(CheckArchive): '''Check a zip archive''' def _main(self, _cfgvalues, _options): '''Main for CheckZip''' _crcerror = '' _data = [] _data, __arcdata = ExpectedValues(_cfgvalues, _options).data ######################### # Test the archive itself ######################### self._archive_checks(__arcdata, _cfgvalues['path']) try: self._zip = zipfile.ZipFile(_cfgvalues['path'], 'r', allowZip64=True) ############################### # Test the files in the archive ############################### if _data: # Identify limitations given the features asked by the user # retrieve every keys of every files in _data configkeys = set() for i in _data: configkeys = configkeys | set(i.keys()) IdentifyLimitations(_cfgvalues['path'], 'zip', configkeys) _crcerror = self._zip.testzip() if _crcerror: # corrupted archive logging.warning('{} has at least one file corrupted:{}'.format(_cfgvalues['path'], _crcerror)) else: _zipinfo = self._zip.infolist() # iterate through the files in the archive for _fileinfo in _zipinfo: _fileinfo.filename = self._normalize_path(_fileinfo.filename) # Prepare a timestamp for the ctime object __dt = _fileinfo.date_time try: __mtime = float(datetime.datetime(__dt[0],__dt[1],__dt[2],__dt[3],__dt[4],__dt[5]).timestamp()) except ValueError as __msg: __warn = 'Issue with timestamp while controlling {} in {}'.format(_fileinfo.filename,_cfgvalues['path']) logging.warning(__warn) __uid, __gid = self.__extract_uid_gid(_fileinfo) __type = self.__translate_type(_fileinfo.external_attr >> 16) __arcinfo = {'path': _fileinfo.filename, 'size': _fileinfo.file_size, 'mode': stat.S_IMODE((_fileinfo.external_attr >> 16)), 'uid': __uid, 'gid': __gid, 'type': __type, 'mtime': __mtime} _data = self._check_path(__arcinfo, _data) self._missing_files = [_file['path'] for _file in _data] except zipfile.BadZipfile as _msg: # corrupted archive __warn = '. You should investigate for a data corruption.' logging.warning('{}: {}{}'.format(_cfgvalues['path'], str(_msg), __warn)) def _extract_stored_file(self, __arcfilepath): '''Extract a file from the archive and return a file object''' __file = self._zip.open(__arcfilepath, 'r') return __file def __extract_uid_gid(self, __binary): '''Extract uid and gid from a zipinfo.extra object (platform dependant)''' __uid, __gid = int.from_bytes(__binary.extra[15:17], 'little'), \ int.from_bytes(__binary.extra[20:22], 'little') return (__uid, __gid) def __translate_type(self, __mode): '''Translate the type of the file to a generic name''' if stat.S_ISREG(__mode): return 'f' elif stat.S_ISDIR(__mode): return 'd' backupchecker-1.9/backupchecker/checkbackups/checkarchive.py0000664000000000000000000003455013073704736024347 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Check an archive '''Check an archive''' import os import stat from logging import warn import backupchecker.checkhashes class CheckArchive(object): '''Check an archive''' def __init__(self, _cfgvalues, _options): '''The constructor of the CheckArchive class. _cfgvalues -- the expected values for the archive ''' self._missing_files = [] self._missing_equality = [] self._missing_bigger_than = [] self._missing_smaller_than = [] self._unexpected_files = [] self._mismatched_uids = [] self._mismatched_gids = [] self._mismatched_unames = [] self._mismatched_gnames = [] self._mismatched_modes = [] self._mismatched_types = [] self._mismatched_mtimes = [] self._mismatched_targets = [] self._mismatched_hashes = [] self.__fileinfo = False self._main(_cfgvalues, _options) def _check_path(self, __arcinfo, _data): '''Check if the expected path exists in the archive''' for _ind, _file in enumerate(_data): if __arcinfo['path'] == _file['path']: # Tests of files in the archive and expected ones ### Compare the sizes of the file in the archive and the ### expected file self._compare_sizes(__arcinfo['size'], __arcinfo['path'], _file) ### Check if an unexpected file is in the archive self._check_unexpected_files(__arcinfo['path'], _file) ### Compare the uid of the file in the archive and the ### expected one if 'uid' in __arcinfo and 'uid' in _file: self.__check_uid(__arcinfo['uid'], _file) ### Compare the gid of the file in the archive and the ### expected one if 'gid' in __arcinfo and 'gid' in _file: self.__check_gid(__arcinfo['gid'], _file) ### Compare the uname of the file in the archive and the ### expected one if 'uname' in __arcinfo and 'uname' in _file: self.__check_uname(__arcinfo['uname'], _file) ### Compare the gname of the file in the archive and the ### expected one if 'gname' in __arcinfo and 'gname' in _file: self.__check_gname(__arcinfo['gname'], _file) ### Compare the filemode and the mode of the expected file if 'mode' in __arcinfo and 'mode' in _file: self._check_mode(__arcinfo['mode'], _file) ### Compare the file type and the type of the expected file if 'type' in __arcinfo and 'type' in _file: self._check_type(__arcinfo['type'], _file) if 'target' in __arcinfo and 'target' in _file: self._check_target(__arcinfo['target'], _file) ### Compare the file mtime and the mtime of the expected file if 'mtime' in __arcinfo and 'mtime' in _file: self._check_mtime(__arcinfo['mtime'], _file) ### Compare the hash of the file and the one of the expected file if 'hash' in _file: self._check_hash(__arcinfo['path'], _file) # We reduce the number of files to work with del(_data[_ind]) return _data def __extract_archive_info(self, __arcpath): '''Extract the archive file system information''' if not self.__fileinfo: try: self.__fileinfo = os.stat(__arcpath) except (OSError, IOError) as __msg: logging.warning(__msg) return self.__fileinfo def __find_archive_size(self, __arcpath): '''Find the size of the archive''' __fileinfo = self.__extract_archive_info(__arcpath) return __fileinfo.st_size def __find_archive_mode(self, __arcpath): '''Find the mode of the archive''' __fileinfo = self.__extract_archive_info(__arcpath) __mode= stat.S_IMODE(__fileinfo.st_mode) return __mode def __find_archive_uid_gid(self, __arcpath): '''Find the uid of the archive''' __fileinfo = self.__extract_archive_info(__arcpath) return __fileinfo.st_uid, __fileinfo.st_gid def __find_archive_mtime(self, __arcpath): '''Find the mtime of the archive''' __arcstat = os.stat(__arcpath) return __arcstat.st_mtime def _compare_sizes(self, _arcsize, _arcname, _file): '''Compare the sizes of the files in the archive and the expected files ''' if 'equals' in _file and _arcsize != _file['equals']: self.missing_equality.append({'path': _arcname, 'size': _arcsize, 'expected': _file['equals']}) elif 'biggerthan' in _file and _arcsize < _file['biggerthan']: self.missing_bigger_than.append({'path': _arcname, 'size': _arcsize, 'expected': _file['biggerthan']}) elif 'smallerthan' in _file and _arcsize > _file['smallerthan']: self.missing_smaller_than.append({'path': _arcname, 'size': _arcsize, 'expected': _file['smallerthan']}) def _normalize_path(self, __path): '''Remove last slash of a directory path if present''' if __path.endswith('/'): return __path[:-1] else: return __path def _check_unexpected_files(self, __arcname, __file): '''Check if an unexpected file exists in the archive''' if 'unexpected' in __file: self.unexpected_files.append(__arcname) def __check_uid(self, __arcuid, __file): '''Check if the file uid in the archive matches the expected one ''' if __file['uid'] != __arcuid: self.mismatched_uids.append({'path':__file['path'], 'expecteduid':__file['uid'], 'uid':__arcuid}) def __check_gid(self, __arcgid, __file): '''Check if the file gid in the archive matches the expected one ''' if __file['gid'] != __arcgid: self.mismatched_gids.append({'path':__file['path'], 'expectedgid':__file['gid'], 'gid':__arcgid}) def __check_uname(self, __arcuname, __file): '''Check if the file uname in the archive matches the expected one ''' if __file['uname'] != __arcuname: self.mismatched_unames.append({'path':__file['path'], 'expecteduname':__file['uname'], 'uname':__arcuname}) def __check_gname(self, __arcgname, __file): '''Check if the file gname in the archive matches the expected one ''' if __file['gname'] != __arcgname: self.mismatched_gnames.append({'path':__file['path'], 'expectedgname':__file['gname'], 'gname':__arcgname}) def _check_mode(self, __arcmode, __file): '''Check if the file mode in the archive matches the expected one ''' __arcmode = oct(__arcmode).split('o')[-1] # if the file has no right, need to manipulate the output - solving #15 if __arcmode == '0': __arcmode = '000' if __file['mode'] != __arcmode: self.mismatched_modes.append({'path': __file['path'], 'expectedmode': __file['mode'], 'mode': __arcmode}) def _check_type(self, __arctype, __file): '''Check if the file type in the archive matches the expected one ''' if __file['type'] != __arctype: self.mismatched_types.append({'path': __file['path'], 'expectedtype': __file['type'], 'type': __arctype}) def _check_mtime(self, __arcmtime, __file): '''Check if the file mtime in the archive matches the expected one ''' if __file['mtime'] != __arcmtime: self.mismatched_mtimes.append({'path': __file['path'], 'expectedmtime': __file['mtime'], 'mtime': __arcmtime}) def _check_hash(self, __arcpath, __file): '''Check if the file hash in the archive matches the expected one ''' __arcfile = self._extract_stored_file(__arcpath) __arcfilehash = backupchecker.checkhashes.get_hash(__arcfile, __file['hash']['hashtype']) self._report_hash(__file['path'], __file['hash']['hashvalue'], __arcfilehash) def _report_hash(self, __arcpath, __expectedhash, __archash): '''Check if the hashes are different and report the fact''' if __expectedhash != __archash: self._mismatched_hashes.append({'path': __arcpath, 'expectedhash': __expectedhash, 'hash': __archash}) def _check_target(self, __arctarget, __file): '''Check if the target field in the archive matches the expected one ''' if __file['target'] != __arctarget: self._mismatched_targets.append({'path': __file['path'], 'expectedtarget' : __file['target'], 'target': __arctarget}) def _archive_checks(self, __arcdata, __arcpath): '''Launch the checks for the archive itself''' if __arcdata: # Store the path into archive data __arcdata['path'] = __arcpath # archive size if 'equals' in __arcdata or 'biggerthan' in __arcdata or 'smallerthan' in __arcdata: __arcsize = self.__find_archive_size(__arcdata['path']) self._compare_sizes(__arcsize, __arcdata['path'], __arcdata) # archive hash if 'hash' in __arcdata: with open(__arcdata['path'], 'rb') as __archive: __archash = backupchecker.checkhashes.get_hash(__archive, __arcdata['hash']['hashtype']) self._report_hash(__arcdata['path'], __arcdata['hash']['hashvalue'], __archash) # archive mode if 'mode' in __arcdata: __arcmode = self.__find_archive_mode(__arcdata['path']) self._check_mode(__arcmode, __arcdata) # archive uid and gid if 'uid' in __arcdata: __arcuid, _ = self.__find_archive_uid_gid(__arcdata['path']) self.__check_uid(__arcuid, __arcdata) if 'gid' in __arcdata: _, __arcgid = self.__find_archive_uid_gid(__arcdata['path']) self.__check_gid(__arcgid, __arcdata) # archive uname if 'uname' in __arcdata: __arcuname, _ = self.__find_archive_uname_gname(__arcdata['path']) self.__check_uname(__arcuname, __arcdata) # archive gname if 'gname' in __arcdata: _, __arcgname = self.__find_archive_uname_gname(__arcdata['path']) self.__check_gname(__arcgname, __arcdata) # mtime of the archive if 'mtime' in __arcdata: __arcmtime = self.__find_archive_mtime(__arcdata['path']) self._check_mtime(__arcmtime, __arcdata) @property def missing_equality(self): '''A list containing the paths of the files missing the equality parameters in the archive ''' return self._missing_equality @property def missing_files(self): '''A list containing the paths of the missing files in the archive ''' return self._missing_files @property def missing_bigger_than(self): '''A list containing the path and the size of the files missing the bigger than parameter in the archive ''' return self._missing_bigger_than @property def missing_smaller_than(self): '''A list containing the path and the size of the files missing the smaller than parameter in the archive ''' return self._missing_smaller_than @property def unexpected_files(self): ''' A list containing the unexpected files in the archive''' return self._unexpected_files @property def mismatched_uids(self): '''A list containing a {path,uid,expecteduid} of the files in the archive with an unexpected uid ''' return self._mismatched_uids @property def mismatched_gids(self): '''A list containing a {path,gid,expectedgid} of the files in the archive with an unexpected gid ''' return self._mismatched_gids @property def mismatched_unames(self): '''A list containing a {path,uname,expecteduname} of the files in the archive with an unexpected uname ''' return self._mismatched_unames @property def mismatched_gnames(self): '''A list containing a {path,gname,expectedgname} of the files in the archive with an unexpected gname ''' return self._mismatched_gnames @property def mismatched_modes(self): '''A list containing {path,mode,expectedmode} of the files in the archive with an unexpected mode ''' return self._mismatched_modes @property def mismatched_types(self): '''A list containing {path,type,expectedtype} of the files in the archive with an unexpected type ''' return self._mismatched_types @property def mismatched_mtimes(self): '''A list containing {path,mtime,expectedmtime} of the files in the archive with an unexpected mtime ''' return self._mismatched_mtimes @property def mismatched_hashes(self): '''A list containing {path,hash,expectedhash} of the files in the archive with an unexpected hash ''' return self._mismatched_hashes @property def mismatched_targets(self): '''A list containing {target,expectedtarget} of the targets of the links in the archive with an unexpected target ''' return self._mismatched_targets backupchecker-1.9/backupchecker/placeholder.py0000664000000000000000000000751513073704736021565 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Identify and replace placeholder in a path '''Identify and replace placeholder in a path''' from datetime import datetime import os import os.path import re class PlaceHolder(object): '''Identify and replace placeholder in a path''' def __init__(self, __path): '''The constructor for the PlaceHolder class. Keyword arguments: __path -- the path of the backup ''' self.__path = __path self.__main() def __main(self): '''Main of the PlaceHolder class''' __year, __shortyear, __month, __weeknumber, __monthday, __weekday, __hour, __minute, __second = datetime.now().strftime('%Y %y %m %W %d %w %H %M %S').split() # year if '%Y' in self.__path: self.__path = self.__path.replace('%Y', __year) # year in two-digit format if '%y' in self.__path: self.__path = self.__path.replace('%y', __shortyear) # month (1..12) if '%m' in self.__path: self.__path = self.__path.replace('%m', __month) # week number in year (1..52) if '%W' in self.__path: self.__path = self.__path.replace('%W', __weeknumber) # monthday (1..31) if '%d' in self.__path: self.__path = self.__path.replace('%d', __monthday) # weekday first monday (1..7) if '%w' in self.__path: self.__path = self.__path.replace('%w', __weekday) # hour (00..24) if '%H' in self.__path: self.__path = self.__path.replace('%H', __hour) # minute (00..59) if '%M' in self.__path: self.__path = self.__path.replace('%M', __minute) # second (00..59) if '%S' in self.__path: self.__path = self.__path.replace('%S', __second) # biggest integer for the same path in the same directory if '%i' in self.__path: self.__path = self.__biggestinteger() def __biggestinteger(self): '''return the path with the biggest integer in the same directory for the placeholder''' __result = {} __newpath = [] __missingpath = [] __found = False for __chunk in self.__path.split('/'): if not __found: __newpath.append(__chunk) else: __missingpath.append(__chunk) if '%i' in __chunk: __found = True self.__path = '/'.join(__newpath) __head, __tail = os.path.split(self.__path) __tail = __tail.replace('%i', "([\d]+)") for __file in os.listdir(__head): if re.search(__tail, __file): __res = re.search(__tail, __file) __result[__res.group(1)] = os.path.join(__head, __res.group(0)) # get the max value __maxvalue = max(__result) # join the modified path and the original left apart part if '/'.join(__missingpath): return os.path.join(__result[__maxvalue], '/'.join(__missingpath)) else: return __result[__maxvalue] @property def realpath(self): '''Return the real path afther placeholder replacement''' return self.__path backupchecker-1.9/backupchecker/checkfilelist.py0000664000000000000000000000327713073704736022115 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Check the hash of the list of files by comparing it to the expected value '''Check the hash of the list of files by comparing it to the expected value''' import sys from backupchecker.checkhashes import get_hash class CheckFileList(object): '''Check the hash of the list of files by comparing it to the expected value''' def __init__(self, __bckconf): '''The constructor of the CheckFileList class.''' self.__main(__bckconf) def __main(self, __bckconf): '''The main for the CheckFileList class''' if 'sha512' in __bckconf and __bckconf['sha512'] != None: __hashtype = 'sha512' with open(__bckconf['files_list'], 'rb') as __conf: __realhash = get_hash(__conf, __hashtype) if __realhash != __bckconf['sha512']: print('The list of files {} should have a {} hash sum of {}. Current value: {}'.format(__bckconf['files_list'], __hashtype, __bckconf['sha512'], __realhash)) sys.exit(1) backupchecker-1.9/backupchecker/identifylimitations.py0000664000000000000000000000553313073704736023371 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Identify limitations for this archive type given the checks asked by the user '''Identify limitations for this archive type given the checks asked by the user''' import logging class IdentifyLimitations: '''Identify limitations for this archive type given the checks asked by the user''' def __init__(self, __arcpath, __arctype, __data): self.__data = __data self.__arcpath = __arcpath self.__main(__arctype) def __main(self, __arctype): '''Main for IdentifyLimitations''' getattr(self, ''.join(['_IdentifyLimitations__study_', __arctype]))() def __study_gz(self): '''Study the required checks for the gzip archive type''' __unsupported_gz = {'uid', 'gid', 'uname', 'gname', 'mode', 'target', 'mtime'} for __param in self.__data: if __param in __unsupported_gz: self.__warn(__param) def __study_bz2(self): '''Study the required checks for the gzip archive type''' __unsupported_bz2 = {'uid', 'gid', 'uname', 'gname', 'mode', 'equals', 'biggerthan', 'smallerthan', 'target' 'mtime'} for __param in self.__data: if __param in __unsupported_bz2: self.__warn(__param) def __study_zip(self): '''Study the required checks for the zip archive type''' __unsupported_zip = {'uname', 'gname', 'target'} for __param in self.__data: if __param in __unsupported_zip: self.__warn(__param) def __study_lzma(self): '''Study the required checks for the lzma archive type''' # seems pretty hard to get xz/lzma archive size - maybe in another release __unsupported_lzma = {'uid', 'gid', 'uname', 'gname', 'mode', 'equals', 'biggerthan', 'smallerthan', 'target', 'mtime'} for __param in self.__data: if __param in __unsupported_lzma: self.__warn(__param) def __warn(self, __param): '''Warn the user that parameter is not supported by message in logging''' logging.warning('{}: The required parameter {} is not supported by this type of archive. Ignoring it.'.format(self.__arcpath, __param)) backupchecker-1.9/backupchecker/configurations.py0000664000000000000000000001622213073704736022330 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . #Parse the configurations '''Parse the configurations''' import sys from configparser import RawConfigParser from configparser import ParsingError, NoSectionError, NoOptionError import os from backupchecker.placeholder import PlaceHolder class Configurations: '''Retrieve the different configurations''' def __init__(self, __confpath, __isastream): '''The constructor of the Configurations class. __confpath -- the path to the directory with the configuration files ''' self.__configs = {} self.__parse_configurations(__confpath, __isastream) def __parse_configurations(self, __confpath, __isastream): '''Parse the different configurations''' try: # check if the path to the confs is a directory or a file if os.path.isdir(__confpath): __confs = [__file for __file in os.listdir(__confpath) if __file.endswith('.conf')] else: __confpath, __conft = os.path.split(__confpath) __confs = [__conft] # check if at least one configuration file is availabe if not __confs: __errmsg = 'Could not find any .conf file in {}' print(__errmsg.format(__confpath)) sys.exit(1) # parse the configuration files for __conf in __confs: __currentconf = {} __config = RawConfigParser() __fullconfpath = os.path.join('/'.join([__confpath, __conf])) try: with open(__fullconfpath, 'r') as __file: # strip GPG/PGP header and footer if it is a signed file __stripres = self.strip_gpg_header(__file, __fullconfpath) __config.read_string(__stripres) except UnicodeDecodeError as __err: __msg = 'Error while parsing the configuration file {}:'.format(__fullconfpath) print(__msg) print(__err) sys.exit(1) # Common information for the backups # The name of the backup __currentconf['name'] = __config.get('main', 'name') ### The type of the backups __currentconf['type'] = __config.get('main', 'type') # Common information for the archives ### The archive path __confsettings = [{'main': 'path'}, ### The list of the expected files in the archive {'main': 'files_list'}, ### The delimiter to use in the list of files {'main': 'delimiter'}, ### The hash sum to identify the list of files {'main': 'sha512'} ] for __element in __confsettings: __key, __value = __element.popitem() if __config.has_option(__key, __value): __currentconf[__value] = __config.get( __key, __value) else: __currentconf[__value] = __config.set( __key, __value, '') # Checking the information ### Check the paths in the configuration __confkeys= ('path', 'files_list') for __confkey in __confkeys: if __confkey == 'path' and __isastream: break else: __path = __currentconf[__confkey] if not __path: print('A path is missing in {}.'.format(__config.get('main', 'name'))) sys.exit(1) if not os.path.isabs(__path): __path = os.path.normpath(os.path.join(os.path.abspath(__confpath), __path)) __currentconf[__confkey] = __path # placeholder should be here plh = PlaceHolder(__currentconf[__confkey]) __currentconf[__confkey] = plh.realpath # test if the path exists if not os.path.exists(__currentconf[__confkey]): print('{} does not exist.'.format(__path)) sys.exit(1) # If the backup type is archive, path must not be a directory if not __isastream and __currentconf['type'] == 'archive' and os.path.isdir(__currentconf['path']): __errmsg = '{} is a directory but appears as an archive in configuration {}.' print(__errmsg.format(__currentconf['path'], __config.get('main', 'name'))) sys.exit(1) # check if the name of the conf does not exist yet if __config.get('main', 'name') in self.__configs: __errmsg = 'The configuration name in {} already exists. Please rename it.' print(__errmsg.format(__fullconfpath)) sys.exit(1) else: self.__configs[__config.get('main', 'name')] = __currentconf except (ParsingError, NoSectionError, NoOptionError, OSError, IOError) as __err: print(__err) sys.exit(1) def strip_gpg_header(self, __file, __confpath): '''strip the GPG/PGP header and footer if it is a signed file''' __pgpheader = '-----BEGIN PGP SIGNED MESSAGE-----\n' __pgpfooter = '-----BEGIN PGP SIGNATURE-----\n' __pgpfootermissing = 'Found PGP header but could not find PGP footer for {}' __pgpheadermissing = 'Found PGP footer but could not find PGP header for {}' __content = __file.read() if __pgpheader in __content and __pgpfooter not in __content: print(__pgpfootermissing.format(__confpath)) sys.exit(1) if __pgpheader not in __content and __pgpfooter in __content: print(__pgpheadermissing.format(__confpath)) sys.exit(1) if __pgpheader in __content and __pgpfooter: __content = __content[__content.index('[main]'):] __content = __content[0:__content.index(__pgpfooter)] return __content @property def configs(self): '''Return the different configurations parameters''' return self.__configs backupchecker-1.9/scripts/0000755000000000000000000000000013073706716015614 5ustar rootroot00000000000000backupchecker-1.9/scripts/backupchecker0000775000000000000000000000144413073704736020341 0ustar rootroot00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . from backupchecker.main import Main if __name__ == '__main__': Main() backupchecker-1.9/PKG-INFO0000644000000000000000000000133713073706716015226 0ustar rootroot00000000000000Metadata-Version: 1.1 Name: backupchecker Version: 1.9 Summary: automated backup checker Home-page: https://github.com/backupchecker/backupchecker Author: Carl Chenet Author-email: chaica@backupchecker.com License: GNU GPL v3 Download-URL: https://github.com/backupchecker/backupchecker Description: Backup Checker is a fully automated backup checker. Platform: UNKNOWN Classifier: Intended Audience :: System Administrators Classifier: Development Status :: 5 - Production/Stable Classifier: Environment :: Console Classifier: License :: OSI Approved :: GNU General Public License (GPL) Classifier: Operating System :: POSIX :: Linux Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 backupchecker-1.9/setup.py0000664000000000000000000000366013073704736015646 0ustar rootroot00000000000000# -*- coding: utf-8 -*- # Copyright © 2015-2017 Carl Chenet # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see from distutils.core import setup import os.path import platform import sys # Warn the user about the supported Python versions if float(platform.python_version()[0:3]) < 3.4: print('You need at least Python 3.4 to use BackupChecker') sys.exit(1) CLASSIFIERS = [ 'Intended Audience :: System Administrators', 'Development Status :: 5 - Production/Stable', 'Environment :: Console', 'License :: OSI Approved :: GNU General Public License (GPL)', 'Operating System :: POSIX :: Linux', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5' ] setup(name = 'backupchecker', version = '1.9', license = 'GNU GPL v3', description = 'automated backup checker', long_description = 'Backup Checker is a fully automated backup checker.', classifiers = CLASSIFIERS, author = 'Carl Chenet', author_email = 'chaica@backupchecker.com', url = 'https://github.com/backupchecker/backupchecker', download_url = 'https://github.com/backupchecker/backupchecker', packages = ['backupchecker', 'backupchecker.checkbackups', 'backupchecker.generatelist'], data_files=[(os.path.join('share','man','man1'), ['man/backupchecker.1'])], scripts = ['scripts/backupchecker'] )