halberd-0.2.4/0000755000175000017500000000000011431513770011566 5ustar jmbrjmbrhalberd-0.2.4/Halberd/0000755000175000017500000000000011431513770013127 5ustar jmbrjmbrhalberd-0.2.4/Halberd/version.py0000644000175000017500000000077211431512602015165 0ustar jmbrjmbr## ## ./Halberd/version.py -- Version Information for halberd (syntax: Python) ## [automatically generated and maintained by GNU shtool] ## class version: v_hex = 0x002204 v_short = "0.2.4" v_long = "0.2.4 (14-Aug-2010)" v_tex = "This is halberd, Version 0.2.4 (14-Aug-2010)" v_gnu = "halberd 0.2.4 (14-Aug-2010)" v_web = "halberd/0.2.4" v_sccs = "@(#)halberd 0.2.4 (14-Aug-2010)" v_rcs = "$Id: halberd 0.2.4 (14-Aug-2010) $" halberd-0.2.4/Halberd/__init__.py0000644000175000017500000000202211431512414015226 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """HTTP load balancer detector module. """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA __all__ = [ 'version', 'clientlib', 'clues', 'reportlib', 'conflib', 'util', 'shell', 'crew', 'ScanTask', 'logger', ] # vim: ts=4 sw=4 et halberd-0.2.4/Halberd/reportlib.py0000644000175000017500000000551511431512414015503 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """Output module. """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import sys import Halberd.logger import Halberd.clues.analysis as analysis def report(scantask): """Displays detailed report information to the user. """ if scantask.out: out = open(scantask.out, 'a') else: out = sys.stdout clues = scantask.analyzed hits = analysis.hits(clues) logger = Halberd.logger.getLogger() # xxx This could be passed by the caller in order to avoid recomputation in # case the clues needed a re-analysis. diff_fields = analysis.diff_fields(clues) out.write('=' * 70 + '\n') out.write('%s' % scantask.url) if scantask.addr: out.write(' (%s)' % scantask.addr) out.write(': %d real server(s)\n' % len(clues)) out.write('=' * 70 + '\n') for num, clue in enumerate(clues): assert hits > 0 info = clue.info out.write('\n') # out.write('-' * 70 + '\n') out.write('server %d: %s\n' % (num + 1, info['server'].lstrip())) out.write('-' * 70 + '\n\n') out.write('difference: %d seconds\n' % clue.diff) out.write('successful requests: %d hits (%.2f%%)\n' \ % (clue.getCount(), clue.getCount() * 100 / float(hits))) if info['contloc']: out.write('content-location: %s\n' % info['contloc'].lstrip()) if len(info['cookies']) > 0: out.write('cookie(s):\n') for cookie in info['cookies']: out.write(' %s\n' % cookie.lstrip()) out.write('header fingerprint: %s\n' % info['digest']) different = [(field, value) for field, value in clue.headers \ if field in diff_fields] if different: out.write('different headers:\n') idx = 1 for field, value in different: out.write(' %d. %s:%s\n' % (idx, field, value)) idx += 1 if scantask.debug: import pprint out.write('headers:\n') pprint.pprint(clue.headers, out) # vim: ts=4 sw=4 et halberd-0.2.4/Halberd/shell.py0000644000175000017500000001430211431512414014602 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """Provides scanning patterns to be used as building blocks for more complex scans. Strategies are different ways in which target scans may be done. We provide basic functionality so more complex stuff can be built upon this. """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import Halberd.crew import Halberd.logger import Halberd.reportlib import Halberd.clues.file import Halberd.clues.analysis as analysis class ScanError(Exception): """Generic error during scanning. """ def __init__(self, msg): self.msg = msg def __str__(self): return str(self.msg) class BaseStrategy: """Defines the strategy used to scan. A strategy is a certain way to use the program. Theses can be layered to build a bigger strategy doing more complex things, etc. """ def __init__(self, scantask): self.task = scantask self.logger = Halberd.logger.getLogger() def execute(self): """Executes the strategy. """ pass # --------------------------- # Higher-level helper methods # --------------------------- def _scan(self): """Allocates a work crew of scanners and launches them on the target. """ assert self.task.url and self.task.addr self.task.clues = [] self.task.analyzed = [] crew = Halberd.crew.WorkCrew(self.task) self.task.clues = crew.scan() def _analyze(self): """Performs clue analysis. """ if len(self.task.clues) == 0: return self.task.analyzed = analysis.analyze(self.task.clues) self.task.analyzed = analysis.reanalyze(self.task.clues, self.task.analyzed, self.task.ratio_threshold) class UniScanStrategy(BaseStrategy): """Scan a single URL. """ def __init__(self, scantask): BaseStrategy.__init__(self, scantask) if not self.task.url: raise ScanError, 'Didn\'t provide an URL to scan' if self.task.addr: # The user passed a specific address as a parameter. self.addrs = [self.task.addr] else: host = Halberd.util.hostname(self.task.url) self.logger.info('looking up host %s... ', host) try: self.addrs = Halberd.util.addresses(host) except KeyboardInterrupt: raise ScanError, 'interrupted by the user' if not self.addrs: raise ScanError, 'unable to resolve %s' % host self.addrs.sort() self.logger.info('host lookup done.') if len(self.addrs) > 1: for addr in self.addrs: #self.logger.debug('%s resolves to %s', host, addr) self.logger.info('%s resolves to %s', host, addr) def execute(self): """Scans, analyzes and presents results coming a single target. """ if self.task.save: cluedir = Halberd.clues.file.ClueDir(self.task.save) for self.task.addr in self.addrs: self._scan() self._analyze() Halberd.reportlib.report(self.task) if self.task.save: cluedir.save(self.task.url, self.task.addr, self.task.clues) class MultiScanStrategy(BaseStrategy): """Scan multiple URLs. """ def __init__(self, scantask): BaseStrategy.__init__(self, scantask) if not self.task.urlfile: raise ScanError, 'An urlfile parameter must be provided' self.urlfp = open(self.task.urlfile, 'r') def _targets(self, urlfp): """Obtain target addresses from URLs. @param urlfp: File where the list of URLs is stored. @type urlfp: C{file} @return: Generator providing the desired addresses. """ for url in urlfp: if url == '\n': continue # Strip end of line character and whitespaces. url = url[:-1].strip() host = Halberd.util.hostname(url) if not host: self.logger.warn('unable to extract hostname from %s', host) continue self.logger.info('looking up host %s... ', host) try: addrs = Halberd.util.addresses(host) except KeyboardInterrupt: raise ScanError, 'interrupted by the user' self.logger.info('host lookup done.') for addr in addrs: yield (url, addr) def execute(self): """Launch a multiple URL scan. """ cluedir = Halberd.clues.file.ClueDir(self.task.save) for url, addr in self._targets(self.urlfp): self.task.url = url self.task.addr = addr self.logger.info('scanning %s (%s)', url, addr) self._scan() cluedir.save(url, addr, self.task.clues) self._analyze() Halberd.reportlib.report(self.task) class ClueReaderStrategy(BaseStrategy): """Clue reader strategy. Works by reading and analyzing files of previously stored clues. """ def __init__(self, scantask): BaseStrategy.__init__(self, scantask) def execute(self): """Reads and interprets clues. """ self.task.clues = Halberd.clues.file.load(self.task.cluefile) self._analyze() self.task.url = self.task.cluefile Halberd.reportlib.report(self.task) # vim: ts=4 sw=4 et halberd-0.2.4/Halberd/logger.py0000644000175000017500000000335311431512414014756 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """Logger singleton. This module allows halberd to easily log certain events. """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import sys import logging _logger = None #_logfmt = '%(name)s %(thread)d %(asctime)s %(levelname)s %(message)s' _logfmt = '%(levelname)s %(message)s' def getLogger(): """Get a reference to an instance of a logger object. @return: reference to a logger. @rtype: C{object} """ global _logger if _logger is None: _logger = logging.getLogger('Halberd') handler = logging.StreamHandler(sys.stdout) handler.setFormatter(logging.Formatter(_logfmt)) _logger.addHandler(handler) _logger.setLevel(logging.INFO) return _logger def setDebug(): """Set the logging level to C{debug}. """ logger = getLogger() logger.setLevel(logging.DEBUG) def setError(): """Set the logging level to C{error}. """ logger = getLogger() logger.setLevel(logging.ERROR) # vim: ts=4 sw=4 et halberd-0.2.4/Halberd/util.py0000644000175000017500000000440511431512414014453 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """Miscellaneous functions. @var table: Translation table for normalizing strings. @type table: C{str} """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import time import socket import urlparse table = '________________________________________________0123456789_______ABCDEFGHIJKLMNOPQRSTUVWXYZ______abcdefghijklmnopqrstuvwxyz_____________________________________________________________________________________________________________________________________' def _gen_table(): """Generate translation table. """ tab = '' for c in map(chr, xrange(256)): tab += (c.isalnum() and c) or '_' return tab def utctime(): return time.mktime(time.gmtime()) def hostname(url): """Get the hostname part of an URL. @param url: A valid URL (must be preceded by scheme://). @type url: C{str} @return: Hostname corresponding to the URL or the empty string in case of failure. @rtype: C{str} """ netloc = urlparse.urlparse(url)[1] if netloc == '': return '' return netloc.split(':', 1)[0] def addresses(host): """Get the network addresses to which a given host resolves to. @param host: Hostname we want to resolve. @type host: C{str} @return: Network addresses. @rtype: C{tuple} """ assert host != '' try: name, aliases, addrs = socket.gethostbyname_ex(host) except socket.error: return () return addrs if __name__ == '__main__': print "table = '%s'" % _gen_table() # vim: ts=4 sw=4 et halberd-0.2.4/Halberd/ScanTask.py0000644000175000017500000001164011431512414015204 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """Scanning tasks. @var default_scantime: Time to spend probing the target expressed in seconds. @type default_scantime: C{int} @var default_parallelism: Number of parallel threads to launch for the scan. @type default_parallelism: C{int} @var default_conf_dir: Path to the directory where the configuration file is located. @type default_conf_dir: C{str} @var default_conf_file: Name of the default configuration file for halberd. @type default_conf_file: C{str} @var default_ratio_threshold: Minimum clues-to-realservers ratio to trigger a clue reanalysis. @type default_ratio_threshold: C{float} @var default_out: Default place where to write reports (None means stdout). @type default_out: C{str} """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import os import Halberd.conflib default_scantime = 15 default_parallelism = 4 default_conf_dir = os.path.join(os.path.expanduser('~'), '.halberd') default_conf_file = os.path.join(default_conf_dir, 'halberd' + os.extsep + 'cfg') default_ratio_threshold = 0.6 default_out = None class ConfError(Exception): """Error with configuration file(s) """ def __init__(self, msg): self.msg = msg def __str__(self): return str(self.msg) class ScanTask: """Describes the way a scan should be performed. @ivar verbose: Display status information during the scan. @type verbose: C{bool} @ivar debug: Display debug information. @type debug: C{bool} @ivar urlfile: Root folder to use for storing results of MultiScans. @type urlfile: C{str} @ivar url: URL to scan. @type url: C{str} @ivar addr: Address of the target web server. @type addr: C{str} @ivar proxy_serv_addr: Address + port where to listen when operating as a proxy. @type proxy_serv_addr: C{tuple} @ivar out: File where to write reports. If it's not set, stdout will be used. @type out: C{str} @ivar save: File or directory name where the results will be written. @type save: C{str} @ivar keyfile: Key file for SSL connections. @type keyfile: C{str} @ivar certfile: Certificate to be used for SSL connections. @type certfile: C{str} @ivar clues: Sequence of clues obtained from the target. @type clues: C{list} @ivar analyzed: Sequence of clues after the analysis phase. @type analyzed: C{list} """ def __init__(self): self.scantime = default_scantime self.parallelism = default_parallelism self.conf_file = default_conf_file self.verbose = False self.debug = False self.ratio_threshold = default_ratio_threshold self.urlfile = '' self.url = '' self.addr = '' self.proxy_serv_addr = () self.save = '' self.out = default_out self.keyfile = None self.certfile = None self.clues = [] self.analyzed = [] def readConf(self): """Read configuration file. This method tries to read the specified configuration file. If we try to read it at the default path and it's not there we create a bare-bones file and use that one. @raise ConfError: If there's some problem creating or reading the configuration file. """ # xxx - Move this into Halberd.conflib as a higher level function. reader = Halberd.conflib.ConfReader() try: reader.open(self.conf_file) except IOError: if self.conf_file == default_conf_file: try: os.mkdir(default_conf_dir) reader.writeDefault(default_conf_file) reader.open(default_conf_file) except (OSError, IOError): raise ConfError, 'unable to create a default conf. file' else: raise ConfError, 'unable to open configuration file %s\n' except conflib.InvalidConfFile: raise ConfError, 'invalid configuration file %s\n' % self.conf_file confvals = reader.parse() self.proxy_serv_addr = confvals[0] self.keyfile, self.certfile = confvals[1:] reader.close() # vim: ts=4 sw=4 et halberd-0.2.4/Halberd/crew.py0000644000175000017500000003045011431512414014435 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """\ Work crew pattern of parallel scanners ====================================== Overview -------- A work crew is instantiated passing a ScanTask object as a parameter, thus defining the target and the way the scanning should be done. After the initialization of the work crew it can be used to scan the target and get the obtained clues back. >>> crew = WorkCrew(scantask) >>> clues = crew.scan() Requirements ------------ These are the features that the WorkCrew must provide: 1. There are 3 different types of consumers: - Controller thread (Performs timing + error-checking). - Local scanning thread. - Remote scanning thread. 2. We need a way to signal: - When a fatal error has happened. - When the user has pressed Control-C Types of scanning threads ------------------------- The WorkCrew object spawns different kinds of threads. Here's a brief summary of what they do: - Manager: Detects when the time for performing the scan has expired and notifies the rest of the threads. This code is executed in the main thread in order to be able to appropriately catch signals, etc. - Scanner: Performs a load-balancer scan from the current machine. The following is a diagram showing the way it works:: .--> Manager --. | | +--> Scanner --+ .----------. .----------. | | .-------. IN --> | ScanTask |->-| WorkCrew |--+--> Scanner --+->-| Clues |--> OUT `----------' `----------' | | `-------' +--> Scanner --+ | | `--> Scanner --' """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import sys import time import math import copy import signal import threading import Halberd.logger import Halberd.clues.Clue import Halberd.clientlib as clientlib __all__ = ['WorkCrew'] class ScanState: """Shared state among scanner threads. @ivar shouldstop: Signals when the threads should stop scanning. @type shouldstop: C{threading.Event} caught with an exception). """ def __init__(self): """Initializes shared state among scanning threads. """ self.__mutex = threading.Lock() self.shouldstop = threading.Event() self.__error = None self.__clues = [] self.__missed = 0 self.__replies = 0 def getStats(self): """Provides statistics about the scanning process. @return: Number of clues gathered so far, number of successful requests and number of unsuccessful ones (missed replies). @rtype: C{tuple} """ # xxx - I badly need read/write locks. self.__mutex.acquire() nclues = len(self.__clues) replies = self.__replies missed = self.__missed self.__mutex.release() return (nclues, replies, missed) def insertClue(self, clue): """Inserts a clue in the list if it is new. """ self.__mutex.acquire() count = clue.getCount() self.__replies += count try: idx = self.__clues.index(clue) self.__clues[idx].incCount(count) except ValueError: self.__clues.append(clue) self.__mutex.release() def getClues(self): """Clue accessor. @return: A copy of all obtained clues. @rtype: C{list} """ self.__mutex.acquire() clues = self.__clues[:] self.__mutex.release() return clues def incMissed(self): """Increase the counter of missed replies. """ self.__mutex.acquire() self.__missed += 1 self.__mutex.release() def setError(self, err): """Signal an error condition. """ self.__mutex.acquire() if self.__error is not None: # An error has already been signalled. self.__mutex.release() return self.__error = err self.shouldstop.set() self.__mutex.release() def getError(self): """Returns the reason of the error condition. """ self.__mutex.acquire() # Since we don't know what the nature of __error will be, we need to # provide a clean copy of it to the caller so that no possible # references or changes to __error can affect the object we return. err = copy.deepcopy(self.__error) self.__mutex.release() return err class WorkCrew: """Pool of scanners working in parallel. @ivar task: A reference to scantask. @type task: L{ScanTask} @ivar working: Indicates whether the crew is working or idle. @type working: C{bool} @ivar prev: Previous SIGINT handler. """ def __init__(self, scantask): self.workers = [] self.task = scantask self.state = ScanState() self.working = False self.prev = None def _setupSigHandler(self): """Performs what's needed to catch SIGINT. """ def interrupt(signum, frame): """SIGINT handler """ self.state.setError('received SIGINT') self.prev = signal.signal(signal.SIGINT, interrupt) def _restoreSigHandler(self): """Restore previous SIGINT handler. """ signal.signal(signal.SIGINT, self.prev) def _initLocal(self): """Initializes conventional (local) scanner threads. """ for i in xrange(self.task.parallelism): worker = Scanner(self.state, self.task) self.workers.append(worker) def scan(self): """Perform a parallel load-balancer scan. """ self.working = True self._setupSigHandler() self._initLocal() for worker in self.workers: worker.start() # The Manager executes in the main thread WHILE the others are working # so that signals are correctly caught. manager = Manager(self.state, self.task) manager.run() for worker in self.workers: worker.join() # Display status information for the last time. manager.showStats() sys.stdout.write('\n\n') self._restoreSigHandler() self.working = False err = self.state.getError() if err is not None: sys.stderr.write('*** finished (%s) ***\n\n' % err) return self._getClues() def _getClues(self): """Returns a sequence of clues obtained during the scan. """ assert not self.working return self.state.getClues() class BaseScanner(threading.Thread): """Base class for load balancer scanning threads. @ivar timeout: Time (in seconds since the UNIX Epoch) when the scan will be stopped. @type timeout: C{float} """ def __init__(self, state, scantask): """Initializes the scanning thread. @param state: Container to store the results of the scan (shared among scanning threads). @type state: C{instanceof(ScanState)} @param scantask: Object providing information needed to perform the scan. @type scantask: C{instanceof(ScanTask)} """ threading.Thread.__init__(self) self.state = state self.task = scantask self.timeout = 0 self.logger = Halberd.logger.getLogger() def remaining(self, end=None): """Seconds left until a given point in time. @param end: Ending time. @type end: C{float} @return: Remaining time until L{self.timeout} @rtype: C{int} """ if not end: end = self.timeout return int(end - time.time()) def hasExpired(self): """Expiration predicate. @return: True if the timeout has expired, False otherwise. @rtype: C{bool} """ return (self.remaining() <= 0) def setTimeout(self, secs): """Compute an expiration time. @param secs: Amount of seconds to spend scanning the target. @type secs: C{int} @return: The moment in time when the task expires. @rtype: C{float} """ self.timeout = time.time() + secs def run(self): """Perform the scan. """ self.setTimeout(self.task.scantime) while not self.state.shouldstop.isSet(): self.process() def process(self): """Perform a scanning task. This method should be overriden to do actual work. """ pass class Scanner(BaseScanner): """Scans the target host from the local machine. """ def process(self): """Gathers clues connecting directly to the target web server. """ client = clientlib.clientFactory(self.task) fatal_exceptions = ( clientlib.ConnectionRefused, clientlib.UnknownReply, clientlib.HTTPSError, ) try: ts, hdrs = client.getHeaders(self.task.addr, self.task.url) except fatal_exceptions, msg: self.state.setError(msg) except clientlib.TimedOut, msg: self.state.incMissed() else: self.state.insertClue(self.makeClue(ts, hdrs)) def makeClue(self, timestamp, headers): """Compose a clue object. @param timestamp: Time when the reply was received. @type timestamp: C{float} @param headers: MIME headers coming from an HTTP response. @type headers: C{str} @return: A valid clue @rtype: C{Clue} """ clue = Halberd.clues.Clue.Clue() clue.setTimestamp(timestamp) clue.parse(headers) return clue class Manager(BaseScanner): """Performs management tasks during the scan. """ # Indicates how often the state must be refreshed (in seconds). refresh_interval = 0.25 def process(self): """Controls the whole scanning process. This method checks when the timeout has expired and notifies the rest of the scanning threads that they should stop. It also displays (in case the user asked for it) detailed information regarding the process. """ self.showStats() if self.hasExpired(): self.state.shouldstop.set() try: time.sleep(self.refresh_interval) except IOError: # Catch interrupted system call exception (it happens when # CONTROL-C is pressed on win32 systems). self.state.shouldstop.set() def showStats(self): """Displays certain statistics while the scan is happening. """ if not self.task.verbose: return def statbar(elapsed, total): """Compose a status bar string showing progress. """ done = int(math.floor(float(total - elapsed)/total * 10)) notdone = int(math.ceil(float(elapsed)/total * 10)) return '[' + '#' * done + ' ' * notdone + ']' nclues, replies, missed = self.state.getStats() # We put a lower bound on the remaining time. if self.remaining() < 0: remaining = 0 else: remaining = self.remaining() statusline = '\r' + self.task.addr.ljust(15) + \ ' %s clues: %3d | replies: %3d | missed: %3d' \ % (statbar(remaining, self.task.scantime), nclues, replies, missed) sys.stdout.write(statusline) sys.stdout.flush() # vim: ts=4 sw=4 et halberd-0.2.4/Halberd/clues/0000755000175000017500000000000011431513770014242 5ustar jmbrjmbrhalberd-0.2.4/Halberd/clues/__init__.py0000644000175000017500000000201111431512414016337 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """Clue management package This package contains modules implements functionality related to creation, analysis and storage of clues. """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA __all__ = [ 'Clue', 'analysis', 'file' ] # vim: ts=4 sw=4 et halberd-0.2.4/Halberd/clues/file.py0000644000175000017500000001176311431512414015535 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """Utilities for clue storage. Provides functionality needed to store clues on disk. """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import os import csv import types import shutil import Halberd.util from Halberd.clues.Clue import Clue class InvalidFile(Exception): """The loaded file is not a valid clue file. """ def __init__(self, msg): self.msg = msg def __str__(self): return self.msg def save(filename, clues): """Save a clues to a file. @param filename: Name of the file where the clues will be written to. @type filename: C{str} @param clues: Sequence of clues to write. @type clues: C{list} """ # Create or truncate the destination file. cluefp = open(filename, 'w+') writer = csv.writer(cluefp) for clue in clues: # Store the most relevant clue information. writer.writerow((clue.getCount(), clue._local, clue.headers)) cluefp.close() def load(filename): """Load clues from file. @param filename: Name of the files where the clues are stored. @type filename: C{str} @return: Clues extracted from the file. @rtype: C{list} @raise InvalidFile: In case there's a problem while reinterpreting the clues. """ cluefp = open(filename, 'r') reader = csv.reader(cluefp) clues = [] for tup in reader: try: count, localtime, headers = tup except ValueError: raise InvalidFile, 'Cannot unpack fields' # Recreate the current clue. clue = Clue() try: clue._count = int(count) clue._local = float(localtime) except ValueError: raise InvalidFile, 'Could not convert fields' # This may be risky from a security standpoint. clue.headers = eval(headers, {}, {}) if not (isinstance(clue.headers, types.ListType) or isinstance(clue.headers, types.TupleType)): raise InvalidFile, 'Wrong clue header field' clue.parse(clue.headers) clues.append(clue) cluefp.close() return clues class ClueDir: """Stores clues hierarchically using the underlying filesystem. ClueDir tries to be as portable as possible but requires the host operating system to be able to create long filenames (and directories, of course). This is an example layout:: http___www_microsoft_com/ http___www_microsoft_com/207_46_134_221.clu http___www_microsoft_com/207_46_156_220.clu http___www_microsoft_com/207_46_156_252.clu . . . """ def __init__(self, root=None): """Initializes ClueDir object. @param root: Root folder where to start creating sub-folders. @type root: C{str} """ self.ext = 'clu' if not root: self.root = os.getcwd() else: self.root = root self._mkdir(self.root) def _sanitize(self, url): """Filter out potentially dangerous chars. """ return url.translate(Halberd.util.table) def _mkdir(self, dest): """Creates a directory to store clues. If the directory already exists it won't complain about that. """ try: st = os.stat(dest) except OSError: os.mkdir(dest) else: if not shutil.stat.S_ISDIR(st.st_mode): raise InvalidFile, \ '%s already exist and is not a directory' % dest return dest def save(self, url, addr, clues): """Hierarchically write clues. @param url: URL scanned (will be used as a directory name). @type url: C{url} @param addr: Address of the target. @type addr: C{str} @param clues: Clues to be stored. @type clues: C{list} @raise OSError: If the directories can't be created. @raise IOError: If the file can't be stored successfully. """ assert url and addr urldir = self._mkdir(os.path.join(self.root, self._sanitize(url))) filename = self._sanitize(addr) + os.extsep + self.ext cluefile = os.path.join(urldir, filename) Halberd.clues.file.save(cluefile, clues) # vim: ts=4 sw=4 et halberd-0.2.4/Halberd/clues/Clue.py0000644000175000017500000001734311431512414015506 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """Clue generation module. Clues are pieces of information obtained from the responses sent by a webserver. Their importance comes from the fact that they're the datastructure we use to detect real servers behind HTTP load balancer devices. """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import time import types import rfc822 import hashlib import Halberd.util class Clue: """A clue is what we use to tell real servers behind a virtual IP. Clues are gathered during several connections to a web server and they allow us to try to identify patterns in its responses. Those patterns could allow us to find out which real servers are behind a VIP """ def __init__(self): # Number of times this clue has been found. self._count = 1 # Generic server info (sometimes useful for distinguising servers). self.info = { 'server': '', 'contloc': '', 'cookies': [], 'date': '', 'digest': '' } # Local time and remote time (in seconds since the Epoch) self._local, self._remote = 0, 0 self.diff = None # We store the headers we're interested in digesting in a string and # calculate its hash _after_ the header processing takes place. This # way we incur in less computational overhead. self.__tmphdrs = '' # Original MIME headers. They're useful during analysis and reporting. self.headers = None def parse(self, headers): """Extracts all relevant information from the MIME headers replied by the target. @param headers: A set of MIME headers (a string as replied by the webserver or a previously parsed sequence of name, value tuples). @type headers: C{str}, C{list} or C{tuple} @raise TypeError: If headers is neither a string nor a sequence. """ if isinstance(headers, basestring): # We parse the server's response into a sequence of name, value # tuples instead of a dictionary because with this approach we keep # the header's order as sent by the target, This is a relevant # piece of information we can't afford to miss. self.headers = [tuple(line.split(':', 1)) \ for line in headers.splitlines() if line != ''] elif isinstance(headers, types.ListType): self.headers = headers else: raise TypeError, 'Unable to parse headers of type %s' \ % type(headers).__name__ # We examine each MIME field and try to find an appropriate handler. If # there is none we simply digest the info it provides. self.__tmphdrs = '' for name, value in self.headers: try: handlerfn = getattr(self, '_get_' + Clue.normalize(name)) handlerfn(value) except AttributeError: self.__tmphdrs += '%s: %s ' % (name, value) self._updateDigest() self._calcDiff() def normalize(name): """Normalize string. This method takes a string coming out of mime-fields and transforms it into a valid Python identifier. That's done by removing invalid non-alphanumeric characters and also numeric ones placed at the beginning of the string. @param name: String to be normalized. @type name: C{str} @return: Normalized string. @rtype: C{str} """ normal = name.translate(Halberd.util.table).lower() while normal[0].isdigit(): normal = normal[1:] return ''.join(normal) normalize = staticmethod(normalize) def _updateDigest(self): """Updates header fingerprint. """ assert self.__tmphdrs != None fingerprint = hashlib.sha1(self.__tmphdrs) self.__tmphdrs = None self.info['digest'] = fingerprint.hexdigest() def _calcDiff(self): """Compute the time difference between the remote and local clocks. @return: Time difference. @rtype: C{int} """ self.diff = int(self._local - self._remote) def incCount(self, num=1): """Increase the times this clue has been found. @param num: A positive non-zero number of hits to increase. @type num: C{int} @raise ValueError: in case L{num} is less than or equal to zero. """ if num <= 0: raise ValueError self._count += num def getCount(self): """Retrieve the number of times the clue has been found @return: Number of hits. @rtype: C{int}. """ return self._count def setTimestamp(self, timestamp): """Sets the local clock attribute. @param timestamp: The local time (expressed in seconds since the Epoch) when the connection to the target was successfully completed. @type timestamp: C{int} """ self._local = timestamp def __eq__(self, other): if self.diff != other.diff: return False if self.info['digest'] != other.info['digest']: return False return True def __ne__(self, other): return not self == other def __repr__(self): if not (self.diff or self.info['digest']): return "" % id(self) return "" \ % (id(self), self.diff, self._count, self.info['digest'][:4] + '...') # ================================================================== # The following methods extract relevant data from the MIME headers. # ================================================================== def _get_server(self, field): """Server:""" self.info['server'] = field self.__tmphdrs += field # Make sure this gets hashed too. def _get_date(self, field): """Date:""" self.info['date'] = field self._remote = time.mktime(rfc822.parsedate(field)) def _get_content_location(self, field): """Content-location:""" self.info['contloc'] = field self.__tmphdrs += field def _get_set_cookie(self, field): """Set-cookie:""" self.info['cookies'].append(field) # ==================================================== # Ignored headers (they don't contribute to the hash). # ==================================================== def _get_expires(self, field): """Expires:""" pass def _get_age(self, field): """Age:""" pass def _get_content_length(self, field): """Content-length:""" pass def _get_last_modified(self, field): """Last-modified:""" pass def _get_etag(self, field): """ETag:""" pass def _get_cache_expires(self, field): """Cache-expires:""" pass def _get_content_type(self, field): """Content-type:""" pass # vim: ts=4 sw=4 et halberd-0.2.4/Halberd/clues/analysis.py0000644000175000017500000003525011431512414016436 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """Utilities for clue analysis. """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import copy import Halberd.logger logger = Halberd.logger.getLogger() # TODO - Test fuzzy clustering and k-means against this naive hierarchical # clustering algorithm to see which one performs better (there's a k-means # implementation in Scipy). # Fuzzy clustering will probably be better as it can output a degree of # confidence which might be helpful to halberd's users. # XXX - In python 2.4 there's itertools.groupby() which replaces the idiomatic # dictionary uses for grouping things together. def diff_fields(clues): """Study differences between fields. @param clues: Clues to analyze. @type clues: C{list} @return: Fields which were found to be different among the analyzed clues. @rtype: C{list} """ def pairs(num): for i in xrange(num): for j in xrange(num): if i == j: continue yield (i, j) import difflib different = [] for i, j in pairs(len(clues)): one, other = clues[i].headers, clues[j].headers matcher = difflib.SequenceMatcher(None, one, other) for tag, alo, ahi, blo, bhi in matcher.get_opcodes(): if tag == 'equal': continue for name, value in one[alo:ahi] + other[blo:bhi]: different.append(name) different.sort() different.reverse() return different def ignore_changing_fields(clues): """Tries to detect and ignore MIME fields with ever changing content. Some servers might include fields varying with time, randomly, etc. Those fields are likely to alter the clue's digest and interfer with L{analyze}, producing many false positives and making the scan useless. This function detects those fields and recalculates each clue's digest so they can be safely analyzed again. @param clues: Sequence of clues. @type clues: C{list} or C{tuple} """ from Halberd.clues.Clue import Clue different = diff_fields(clues) # First alter Clue to be able to cope with the varying fields. ignored = [] for field in different: method = '_get_' + Clue.normalize(field) if not hasattr(Clue, method): logger.debug('ignoring %s', field) ignored.append(method) setattr(Clue, method, lambda s, f: None) for clue in clues: Clue.parse(clue, clue.headers) for method in ignored: # We want to leave the Clue class as before because a MIME field # causing trouble for the current scan might be the source of precious # information for another scan. delattr(Clue, method) return clues def get_digest(clue): """Returns the specified clue's digest. This function is usually passed as a parameter for L{classify} so it can separate clues according to their digest (among other fields). @return: The digest of a clue's parsed headers. @rtype: C{str} """ return clue.info['digest'] def clusters(clues, step=3): """Finds clusters of clues. A cluster is a group of at most C{step} clues which only differ in 1 seconds between each other. @param clues: A sequence of clues to analyze @type clues: C{list} or C{tuple} @param step: Maximum difference between the time differences of the cluster's clues. @type step: C{int} @return: A sequence with merged clusters. @rtype: C{tuple} """ def iscluster(clues, num): """Determines if a list of clues form a cluster of the specified size. """ assert len(clues) == num if abs(clues[0].diff - clues[-1].diff) <= num: return True return False def find_cluster(clues, num): if len(clues) >= num: if iscluster(clues[:num], num): return tuple(clues[:num]) return () clues = sort_clues(clues) invrange = lambda num: [(num - x) for x in range(num)] start = 0 while True: clues = clues[start:] if not clues: break for i in invrange(step): cluster = find_cluster(clues, i) if cluster: yield cluster start = i break def merge(clues): """Merges a sequence of clues into one. A new clue will store the total count of the clues. Note that each L{Clue} has a starting count of 1 >>> a, b, c = Clue(), Clue(), Clue() >>> sum([x.getCount() for x in [a, b, c]]) 3 >>> a.incCount(5), b.incCount(11), c.incCount(23) (None, None, None) >>> merged = merge((a, b, c)) >>> merged.getCount() 42 >>> merged == a True @param clues: A sequence containing all the clues to merge into one. @type clues: C{list} or C{tuple} @return: The result of merging all the passed clues into one. @rtype: L{Clue} """ merged = copy.copy(clues[0]) for clue in clues[1:]: merged.incCount(clue.getCount()) return merged def classify(seq, *classifiers): """Classify a sequence according to one or several criteria. We store each item into a nested dictionary using the classifiers as key generators (all of them must be callable objects). In the following example we classify a list of clues according to their digest and their time difference. >>> a, b, c = Clue(), Clue(), Clue() >>> a.diff, b.diff, c.diff = 1, 2, 2 >>> a.info['digest'] = 'x' >>> b.info['digest'] = c.info['digest'] = 'y' >>> get_diff = lambda x: x.diff >>> classified = classify([a, b, c], get_digest, get_diff) >>> digests = classified.keys() >>> digests.sort() # We sort these so doctest won't fail. >>> for digest in digests: ... print digest ... for diff in classified[digest].keys(): ... print ' ', diff ... for clue in classified[digest][diff]: ... if clue is a: print ' a' ... elif clue is b: print ' b' ... elif clue is c: print ' c' ... x 1 a y 2 b c @param seq: A sequence to classify. @type seq: C{list} or C{tuple} @param classifiers: A sequence of callables which return specific fields of the items contained in L{seq} @type classifiers: C{list} or C{tuple} @return: A nested dictionary in which the keys are the fields obtained by applying the classifiers to the items in the specified sequence. @rtype: C{dict} """ # XXX - Printing a dictionary in a doctest string is a very bad idea. classified = {} for item in seq: section = classified for classifier in classifiers[:-1]: assert callable(classifier) section = section.setdefault(classifier(item), {}) # At the end no more dict nesting is needed. We simply store the items. last = classifiers[-1] section.setdefault(last(item), []).append(item) return classified def sections(classified, sects=None): """Returns sections (and their items) from a nested dict. See also: L{classify} @param classified: Nested dictionary. @type classified: C{dict} @param sects: List of results. It should not be specified by the user. @type sects: C{list} @return: A list of lists in where each item is a subsection of a nested dictionary. @rtype: C{list} """ if sects is None: sects = [] if isinstance(classified, dict): for key in classified.keys(): sections(classified[key], sects) elif isinstance(classified, list): sects.append(classified) return sects def deltas(xs): """Computes the differences between the elements of a sequence of integers. >>> deltas([-1, 0, 1]) [1, 1] >>> deltas([1, 1, 2, 3, 5, 8, 13]) [0, 1, 1, 2, 3, 5] @param xs: A sequence of integers. @type xs: C{list} @return: A list of differences between consecutive elements of L{xs}. @rtype: C{list} """ if len(xs) < 2: return [] else: return [xs[1] - xs[0]] + deltas(xs[1:]) def slices(start, xs): """Returns slices of a given sequence separated by the specified indices. If we wanted to get the slices necessary to split range(20) in sub-sequences of 5 items each we'd do: >>> seq = range(20) >>> indices = [5, 10, 15] >>> for piece in slices(0, indices): ... print seq[piece] [0, 1, 2, 3, 4] [5, 6, 7, 8, 9] [10, 11, 12, 13, 14] [15, 16, 17, 18, 19] @param start: Index of the first element of the sequence we want to partition. @type start: C{int}. @param xs: Sequence of indexes where 'cuts' must be made. @type xs: C{list} @return: A sequence of C{slice} objects suitable for splitting a list as specified. @rtype: C{list} of C{slice} """ if xs == []: # The last slice includes all the remaining items in the sequence. return [slice(start, None)] return [slice(start, xs[0])] + slices(xs[0], xs[1:]) def sort_clues(clues): """Sorts clues according to their time difference. """ # This can be accomplished in newer (>= 2.4) Python versions using: # clues.sort(key=lambda x: x.diff) tmps = [(x.diff, x) for x in clues] tmps.sort() return [x[1] for x in tmps] def filter_proxies(clues, maxdelta=3): """Detect and merge clues pointing to a proxy cache on the remote end. @param clues: Sequence of clues to analyze @type clues: C{list} @param maxdelta: Maximum difference allowed between a clue's time difference and the previous one. @type maxdelta: C{int} @return: Sequence where all irrelevant clues pointing out to proxy caches have been filtered out. @rtype: C{list} """ results = [] # Classify clues by remote time and digest. get_rtime = lambda c: c._remote classified = classify(clues, get_rtime, get_digest) subsections = sections(classified) for cur_clues in subsections: if len(cur_clues) == 1: results.append(cur_clues[0]) continue cur_clues = sort_clues(cur_clues) diffs = [c.diff for c in cur_clues] # We find the indices of those clues which differ from the rest in # more than maxdelta seconds. indices = [idx for idx, delta in enumerate(deltas(diffs)) if abs(delta) > maxdelta] for piece in slices(0, indices): if cur_clues[piece] == []: break results.append(merge(cur_clues[piece])) return results def uniq(clues): """Return a list of unique clues. This is needed when merging clues coming from different sources. Clues with the same time diff and digest are not discarded, they are merged into one clue with the aggregated number of hits. @param clues: A sequence containing the clues to analyze. @type clues: C{list} @return: Filtered sequence of clues where no clue has the same digest and time difference. @rtype: C{list} """ results = [] get_diff = lambda c: c.diff classified = classify(clues, get_digest, get_diff) for section in sections(classified): results.append(merge(section)) return results def hits(clues): """Compute the total number of hits in a sequence of clues. @param clues: Sequence of clues. @type clues: C{list} @return: Total hits. @rtype: C{int} """ return sum([clue.getCount() for clue in clues]) def analyze(clues): """Draw conclusions from the clues obtained during the scanning phase. @param clues: Unprocessed clues obtained during the scanning stage. @type clues: C{list} @return: Coherent list of clues identifying real web servers. @rtype: C{list} """ results = [] clues = uniq(clues) clues = filter_proxies(clues) cluesbydigest = classify(clues, get_digest) for key in cluesbydigest.keys(): for cluster in clusters(cluesbydigest[key]): results.append(merge(cluster)) return results # TODO - reanalyze should be called from this module and not from Halberd.shell. def reanalyze(clues, analyzed, threshold): """Identify and ignore changing header fields. After initial analysis one must check that there aren't as many realservers as obtained clues. If there were it could be a sign of something wrong happening: each clue is different from the others due to one or more MIME header fields which change unexpectedly. @param clues: Raw sequence of clues. @type clues: C{list} @param analyzed: Result from the first analysis phase. @type analyzed: C{list} @param threshold: Minimum clue-to-realserver ratio in order to trigger field inspection. @type threshold: C{float} """ def ratio(): return len(analyzed) / float(len(clues)) assert len(clues) > 0 r = ratio() if r >= threshold: logger.debug('clue-to-realserver ratio is high (%.3f)', r) logger.debug('reanalyzing clues...') ignore_changing_fields(clues) analyzed = analyze(clues) logger.debug('clue reanalysis done.') # Check again to see if we solved the problem but only warn the user if # there's a significant amount of evidence. if ratio() >= threshold and len(clues) > 10: logger.warn( '''The following results might be incorrect. It could be because the remote host keeps changing its server version string or because halberd didn't have enough samples.''') return analyzed def _test(): import doctest import Halberd.clues.Clue import Halberd.clues.analysis # Due to the above imports, this test must be executed from the top level # source directory: # python Halberd/clues/analysis.py -v globs = Halberd.clues.analysis.__dict__ globs.update(Halberd.clues.Clue.__dict__) return doctest.testmod(m=Halberd.clues.analysis, name='analysis', globs=globs) if __name__ == '__main__': _test() # vim: ts=4 sw=4 et halberd-0.2.4/Halberd/clientlib.py0000644000175000017500000002455711431512414015455 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """HTTP/HTTPS client module. @var default_timeout: Default timeout for socket operations. @type default_timeout: C{float} @var default_bufsize: Default number of bytes to try to read from the network. @type default_bufsize: C{int} @var default_template: Request template, must be filled by L{HTTPClient} @type default_template: C{str} """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import time import socket import urlparse from itertools import takewhile import Halberd.ScanTask default_timeout = 2 default_bufsize = 1024 # WARNING - Changing the HTTP request method in the following template will # require updating tests/test_clientlib.py accordingly. default_template = """\ GET %(request)s HTTP/1.1\r\n\ Host: %(hostname)s%(port)s\r\n\ Pragma: no-cache\r\n\ Cache-control: no-cache\r\n\ User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.7) Gecko/20050414 Firefox/1.0.3\r\n\ Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg,\ application/x-shockwave-flash, */*\r\n\ Accept-Language: en-us,en;q=0.5\r\n\ Accept-Encoding: gzip,deflate\r\n\ Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n\ Keep-Alive: 300\r\n\ Connection: keep-alive\r\n\r\n\ """ class HTTPError(Exception): """Generic HTTP exception""" def __init__(self, msg): self.msg = msg def __str__(self): return str(self.msg) def __deepcopy__(self, memo): return self class HTTPSError(HTTPError): """Generic HTTPS exception""" class InvalidURL(HTTPError): """Invalid or unsupported URL""" class TimedOut(HTTPError): """Operation timed out""" class ConnectionRefused(HTTPError): """Unable to reach webserver""" class UnknownReply(HTTPError): """The remote host didn't return an HTTP reply""" class HTTPClient: """Special-purpose HTTP client. @ivar timeout: Timeout for socket operations (expressed in seconds). B{WARNING}: changing this value is strongly discouraged. @type timeout: C{float} @ivar bufsize: Buffer size for network I/O. @type bufsize: C{int} @ivar template: Template of the HTTP request to be sent to the target. @type template: C{str} @ivar _recv: Reference to a callable responsible from reading data from the network. @type _recv: C{callable} """ timeout = default_timeout bufsize = default_bufsize template = default_template def __init__(self): """Initializes the object. """ self.schemes = ['http'] self.default_port = 80 # _timeout_exceptions MUST be converted to a tuple before using it with # except. self._timeout_exceptions = [socket.timeout] self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self._sock.settimeout(self.timeout) self._recv = self._sock.recv def getHeaders(self, address, urlstr): """Talk to the target webserver and fetch MIME headers. @param address: The target's network address. @type address: C{tuple} @param urlstr: URL to use. @type urlstr: C{str} @return: The time when the client started reading the server's response and the MIME headers that were sent. @rtype: C{tuple} """ self._putRequest(address, urlstr) timestamp, headers = self._getReply() if not headers: return None # Remove HTTP response and leave only the MIME headers. headers = headers.splitlines()[1:] headers = list(takewhile(lambda x: x != '', headers)) headers.append('\r\n') headers = '\r\n'.join(headers) return timestamp, headers def _putRequest(self, address, urlstr): """Sends an HTTP request to the target webserver. This method connects to the target server, sends the HTTP request and records a timestamp. @param address: Target address. @type address: C{str} @param urlstr: A valid Unified Resource Locator. @type urlstr: C{str} @raise InvalidURL: In case the URL scheme is not HTTP or HTTPS @raise ConnectionRefused: If it can't reach the target webserver. @raise TimedOut: If we cannot send the data within the specified time. """ scheme, netloc, url, params, query, fragment = urlparse.urlparse(urlstr) if scheme not in self.schemes: raise InvalidURL, '%s is not a supported protocol' % scheme hostname, port = self._getHostAndPort(netloc) # NOTE: address and hostname may not be the same. The caller is # responsible for checking that. req = self._fillTemplate(hostname, port, url, params, query, fragment) self._connect((address, port)) self._sendAll(req) def _getHostAndPort(self, netloc): """Determine the hostname and port to connect to from an URL @param netloc: Relevant part of the parsed URL. @type netloc: C{str} @return: Hostname (C{str}) and port (C{int}) @rtype: C{tuple} """ try: hostname, portnum = netloc.split(':', 1) except ValueError: hostname, port = netloc, self.default_port else: if portnum.isdigit(): port = int(portnum) else: raise InvalidURL, '%s is not a valid port number' % portnum return hostname, port def _fillTemplate(self, hostname, port, url, params='', query='', fragment=''): """Fills the request template with relevant information. @param hostname: Target host to reach. @type hostname: C{str} @param port: Remote port. @type port: C{int} @param url: URL to use as source. @type url: C{str} @return: A request ready to be sent @rtype: C{str} """ urlstr = url or '/' if params: urlstr += ';' + params if query: urlstr += '?' + query if fragment: urlstr += '#' + fragment if port == self.default_port: p = '' else: p = ':' + str(port) values = {'request': urlstr, 'hostname': hostname, 'port': p} return self.template % values def _connect(self, addr): """Connect to the target address. @param addr: The target's address. @type addr: C{tuple} @raise ConnectionRefused: If it can't reach the target webserver. """ try: self._sock.connect(addr) except socket.error: raise ConnectionRefused, 'Connection refused' def _sendAll(self, data): """Sends a string to the socket. """ try: self._sock.sendall(data) except socket.timeout: raise TimedOut, 'timed out while writing to the network' def _getReply(self): """Read a reply from the server. @return: Time when the data started arriving plus the received data. @rtype: C{tuple} @raise UnknownReply: If the remote server doesn't return a valid HTTP reply. @raise TimedOut: In case reading from the network takes too much time. """ data = '' timestamp = None stoptime = time.time() + self.timeout while time.time() < stoptime: try: chunk = self._recv(self.bufsize) except tuple(self._timeout_exceptions), msg: raise TimedOut, msg if not chunk: # The remote end closed the connection. break if not timestamp: timestamp = time.time() data += chunk idx = data.find('\r\n\r\n') if idx != -1: data = data[:idx] break if not data.startswith('HTTP/'): raise UnknownReply, 'Invalid protocol' return timestamp, data def __del__(self): if self._sock: self._sock.close() class HTTPSClient(HTTPClient): """Special-purpose HTTPS client. """ def __init__(self): HTTPClient.__init__(self) self.schemes.append('https') self.default_port = 443 self._recv = None self._sslsock = None self._timeout_exceptions.append(socket.sslerror) # Path to an SSL key file and certificate. self.keyfile = None self.certfile = None def _connect(self, addr): """Connect to the target web server. @param addr: The target's address. @type addr: C{tuple} @raise HTTPSError: In case there's some mistake during the SSL negotiation. """ HTTPClient._connect(self, addr) try: self._sslsock = socket.ssl(self._sock, self.keyfile, self.certfile) except socket.sslerror, msg: raise HTTPSError, msg self._recv = self._sslsock.read def _sendAll(self, data): """Sends a string to the socket. """ # xxx - currently we don't make sure everything is sent. self._sslsock.write(data) def clientFactory(scantask): """HTTP/HTTPS client factory. @param scantask: Object describing where the target is and how to reach it. @type scantask: C{instanceof(ScanTask)} @return: The appropriate client class for the specified URL. @rtype: C{class} """ url = scantask.url keyfile = scantask.keyfile certfile = scantask.certfile if url.startswith('http://'): return HTTPClient() elif url.startswith('https://'): httpsclient = HTTPSClient() httpsclient.keyfile = keyfile httpsclient.certfile = certfile return httpsclient else: raise InvalidURL # vim: ts=4 sw=4 et halberd-0.2.4/Halberd/conflib.py0000644000175000017500000001032711431512414015112 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """Configuration file management module. Halberd uses configuration files to store relevant information needed for certain protocols (SSL) or modes of operation (proxy, distributed client/server, etc.). This module takes care of reading and writing configuration files. @var default_proxy_port: Default TCP port to listen when acting as a proxy. @type default_proxy_port: C{int} """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import os import ConfigParser default_proxy_port = 8080 default_conf = r""" # ============================================================================ # halberd configuration file. # ============================================================================ [proxy] address: port: 8080 [ssl] keyfile: certfile: """ class InvalidConfFile(Exception): """Invalid configuration file. """ class ConfReader: """Takes care of turning configuration files into meaningful information. """ def __init__(self): self.__dict = {} self.__conf = None self.confparser = ConfigParser.SafeConfigParser() def open(self, fname): """Opens the configuration file. @param fname: Pathname to the configuration file. @type fname: C{str} @raise InvalidConfFile: In case the passed file is not a valid one. """ self.__conf = open(os.path.expanduser(fname), 'r') try: self.confparser.readfp(self.__conf, fname) except ConfigParser.MissingSectionHeaderError, msg: raise InvalidConfFile, msg def close(self): """Release the configuration file's descriptor. """ if self.__conf: self.__conf.close() def _getAddr(self, sectname, default_port): """Read a network address from the given section. """ section = self.__dict[sectname] addr = section.get('address', '') try: port = int(section.get('port', default_port)) except ValueError: port = default_port return (addr, port) def parse(self): """Parses the configuration file. """ assert self.__conf, 'The configuration file is not open' proxy_serv_addr = () # The orthodox way of doing this is via ConfigParser.get*() but those # methods lack the convenience of dict.get. While another approach # could be to subclass ConfigParser I think it's overkill for the # current situation. for section in self.confparser.sections(): sec = self.__dict.setdefault(section, {}) for name, value in self.confparser.items(section): sec.setdefault(name, value) if self.__dict.has_key('proxy'): proxy_serv_addr = self._getAddr('proxy', default_proxy_port) keyfile = self.__dict['ssl'].get('keyfile', None) certfile = self.__dict['ssl'].get('certfile', None) if keyfile == '': keyfile = None if certfile == '': certfile = None return proxy_serv_addr, keyfile, certfile def writeDefault(self, conf_file): """Write a bare-bones configuration file @param conf_file: Target file where the default conf. will be written. @type conf_file: C{str} """ assert conf_file and isinstance(conf_file, basestring) conf_fp = open(conf_file, 'w') conf_fp.write(default_conf) conf_fp.close() def __del__(self): self.close() # vim: ts=4 sw=4 et halberd-0.2.4/THANKS0000644000175000017500000000054411431512414012476 0ustar jmbrjmbrHalberd THANKS file Halberd has originally been written by Juan M. Bello Rivas. Many people have further contributed to Halberd by reporting problems, suggesting various improvements, or submitting actual code. Here is a list of these people. Help me keep it complete and exempt of errors. Dethy Fabian Affolter Sindre Pedersen Bjørdal Andrés Riancho halberd-0.2.4/man/0000755000175000017500000000000011431513770012341 5ustar jmbrjmbrhalberd-0.2.4/man/man1/0000755000175000017500000000000011431513770013175 5ustar jmbrjmbrhalberd-0.2.4/man/man1/halberd.10000644000175000017500000000301511431512610014647 0ustar jmbrjmbr.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.35. .TH HALBERD "1" "August 2010" "halberd 0.2.4 (14-Aug-2010)" "User Commands" .SH NAME halberd \- manual page for halberd 0.2.4 (14-Aug-2010) .SH DESCRIPTION Usage: halberd [OPTION]... URL .PP Discover web servers behind HTTP load balancers. .SS "Options:" .TP \fB\-\-version\fR show program's version number and exit .TP \fB\-h\fR, \fB\-\-help\fR show this help message and exit .TP \fB\-v\fR, \fB\-\-verbose\fR explain what is being done .TP \fB\-q\fR, \fB\-\-quiet\fR run quietly .TP \fB\-d\fR, \fB\-\-debug\fR enable debugging information .TP \fB\-t\fR NUM, \fB\-\-time\fR=\fINUM\fR time (in seconds) to spend scanning the target .TP \fB\-p\fR NUM, \fB\-\-parallelism\fR=\fINUM\fR specify the number of parallel threads to use .TP \fB\-u\fR FILE, \fB\-\-urlfile\fR=\fIFILE\fR read URLs from FILE .TP \fB\-o\fR FILE, \fB\-\-out\fR=\fIFILE\fR write report to the specified file .TP \fB\-a\fR ADDR, \fB\-\-address\fR=\fIADDR\fR specify address to scan .TP \fB\-r\fR FILE, \fB\-\-read\fR=\fIFILE\fR load clues from the specified file .TP \fB\-w\fR DIR, \fB\-\-write\fR=\fIDIR\fR save clues to the specified directory .TP \fB\-\-config\fR=\fIFILE\fR use alternative configuration file .SH BUGS Report bugs to .SH COPYRIGHT Copyright \(co 2004, 2005, 2006, 2010 Juan M. Bello Rivas .PP This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. halberd-0.2.4/GNUmakefile0000644000175000017500000000741411431512575013650 0ustar jmbrjmbr# ============================================================================ # This makefile is intended for developers. End users should rely on setup.py. # ============================================================================ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA srcdir := . scriptsdir := $(srcdir)/scripts modulesdir := $(srcdir)/Halberd docdir := $(srcdir)/doc apidocdir := $(srcdir)/doc/api mandir := $(srcdir)/man testdir := $(srcdir)/tests PYTHON := /usr/bin/python PYTHON_COUNT := /usr/local/bin/python_count PYLINT := /usr/bin/pylint EPYDOC := /usr/bin/epydoc CTAGS := ctags DARCS := /usr/bin/darcs SHTOOLIZE := /usr/bin/shtoolize SHTOOL := $(srcdir)/shtool RM := /bin/rm -f MKDIR := /bin/mkdir SETUP := $(PYTHON) $(srcdir)/setup.py HELP2MAN := ~/bin/hacked-help2man versionfile := $(modulesdir)/version.py SCRIPTS := $(scriptsdir)/halberd MODULES := $(filter-out $(modulesdir)/version.py, \ $(wildcard $(modulesdir)/*.py)) \ $(wildcard $(modulesdir)/clues/*.py) SOURCES := $(SCRIPTS) $(MODULES) TEST_SOURCES := $(wildcard $(testdir)/*.py) ALL_SOURCES := $(SOURCES) $(TEST_SOURCES) ALL_DIRS := $(sort $(dir $(ALL_SOURCES))) remove = $(RM) $(addsuffix $(strip $(1)), $(2)) all: @echo "============================================================================" @echo "This makefile is intended for developers. End users should rely on setup.py." @echo "============================================================================" clean: $(RM) tags $(RM) -r $(srcdir)/build $(call remove, *.pyc, $(ALL_DIRS)) $(call remove, *.pyo, $(ALL_DIRS)) $(MAKE) -C doc clean clobber: clean $(RM) *.bak $(call remove, *~, $(ALL_DIRS) $(docdir)/) $(MAKE) -C doc clobber build: $(SOURCES) $(SETUP) build dist: distclean setversion doc ChangeLog $(SETUP) sdist check: $(ALL_SOURCES) $(SETUP) test PYTHONPATH=$(modulesdir):$(modulesdir)/clues:$$PYTHONPATH \ $(PYTHON) $(modulesdir)/clues/analysis.py install: build doc $(SETUP) install --prefix $$HOME distclean: clobber $(RM) $(srcdir)/MANIFEST $(RM) $(srcdir)/ChangeLog $(RM) $(docdir)/*.html $(RM) $(mandir)/man1/halberd.1 $(RM) -r $(apidocdir) $(RM) -r $(srcdir)/dist doc: $(apidocdir)/index.html $(mandir)/man1/halberd.1 $(MAKE) -C $(docdir) $(apidocdir)/index.html: $(MODULES) $(EPYDOC) -o $(apidocdir) $^ $(mandir)/man1/halberd.1: $(HELP2MAN) --include $(srcdir)/help2man.cfg --no-info $(scriptsdir)/halberd --output $@ tags: $(ALL_SOURCES) $(CTAGS) $^ setversion: shtool @version=`$(SHTOOL) version -l python $(versionfile)`; \ $(SHTOOL) version -l python -n halberd -s $$version $(versionfile) incversion: shtool $(SHTOOL) version -l python -n halberd -i l $(versionfile) shtool: $(SHTOOLIZE) -o $@ version lint: $(PYLINT) --required-attributes= --additional-builtins=map,filter --method-rgx='.*' --function-rgx='.*' $(ALL_SOURCES) ChangeLog: $(ALL_SOURCES) $(DARCS) changes --human-readable > ChangeLog count: $(ALL_SOURCES) @$(PYTHON_COUNT) $^ .PHONY: clean clobber distclean dist setversion incversion check count install lint ChangeLog # vim: noexpandtab halberd-0.2.4/scripts/0000755000175000017500000000000011431513770013255 5ustar jmbrjmbrhalberd-0.2.4/scripts/halberd0000755000175000017500000001345111431512414014602 0ustar jmbrjmbr#!/usr/bin/env python # -*- coding: iso-8859-1 -*- """Command line interface. """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import sys import Halberd.shell import Halberd.logger import Halberd.ScanTask import Halberd.version as version def make_parser(): """Sets up the command line option parser. """ import optparse notice = version.version.v_gnu + '\n\n' + \ r"""Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.""" parser = optparse.OptionParser(usage='%prog [OPTION]... URL', version=notice) parser.set_description("Discover web servers behind HTTP load balancers.") parser.add_option('-v', '--verbose', action='store_true', dest='verbose', help='explain what is being done', default=True) parser.add_option('-q', '--quiet', action='store_false', dest='verbose', help='run quietly', default=True) # xxx - use increment over verbosity level instead of this parser.add_option('-d', '--debug', action='store_true', dest='debug', help='enable debugging information', default=False) parser.add_option('-t', '--time', action='store', type='int', dest='scantime', help='time (in seconds) to spend scanning the target', metavar='NUM', default=Halberd.ScanTask.default_scantime) parser.add_option('-p', '--parallelism', action='store', type='int', dest='parallelism', help='specify the number of parallel threads to use', metavar='NUM', default=Halberd.ScanTask.default_parallelism) parser.add_option('-u', '--urlfile', action='store', dest='urlfile', help='read URLs from FILE', metavar='FILE') parser.add_option('-o', '--out', action='store', dest='out', help='write report to the specified file', metavar='FILE', default='') parser.add_option('-a', '--address', action='store', dest='addr', help='specify address to scan', metavar='ADDR', default='') parser.add_option('-r', '--read', action='store', dest='cluefile', help='load clues from the specified file', metavar='FILE', default='') parser.add_option('-w', '--write', action='store', dest='save', help='save clues to the specified directory', metavar='DIR', default='') parser.add_option('', '--config', action='store', dest='confname', help='use alternative configuration file', metavar='FILE', default=Halberd.ScanTask.default_conf_file) return parser def make_url(url): """Ensures the URL is a valid one. Characters aren't escaped, so strings like 'htt%xx://' won't be parsed. @param url: An incomplete (or not) URL. @type url: C{str} """ if url.startswith('http://') or url.startswith('https://'): newurl = url else: newurl = 'http://' + url return newurl def scannerFactory(opts, args): """Instantiates a scanner of the appropriate flavour. It selects which scanning strategy to follow depending on how the user invoked the program. """ scantask = Halberd.ScanTask.ScanTask() scantask.scantime = opts.scantime scantask.parallelism = opts.parallelism scantask.verbose = opts.verbose scantask.debug = opts.debug scantask.conf_file = opts.confname scantask.cluefile = opts.cluefile scantask.save = opts.save scantask.out = opts.out # Set logging level. if not scantask.verbose: Halberd.logger.setError() if scantask.debug: Halberd.logger.setDebug() scantask.readConf() if opts.cluefile: # Read and analyze clues. scanner = Halberd.shell.ClueReaderStrategy elif opts.urlfile: # MultiScan scantask.urlfile = opts.urlfile scanner = Halberd.shell.MultiScanStrategy elif len(args) > 0: # UniScan scantask.url = make_url(args[0]) scantask.addr = opts.addr scanner = Halberd.shell.UniScanStrategy else: return None return scanner(scantask) def main(argv): """Command line interface. """ parser = make_parser() (opts, args) = parser.parse_args(argv[1:]) if opts.verbose: print version.version.v_gnu print try: scanner = scannerFactory(opts, args) if scanner is None: parser.error('incorrect number of arguments') scanner.execute() except Halberd.shell.ScanError, msg: sys.stderr.write('\n*** %s ***\n' % msg) except KeyboardInterrupt: sys.stderr.write('\r*** interrupted by the user ***\n') if __name__ == '__main__': # import gc # gc.set_debug(gc.DEBUG_LEAK) main(sys.argv) # vim: ts=4 sw=4 et halberd-0.2.4/PKG-INFO0000644000175000017500000000160011431513770012660 0ustar jmbrjmbrMetadata-Version: 1.0 Name: halberd Version: 0.2.4 Summary: HTTP load balancer detector Home-page: http://halberd.superadditive.com/ Author: Juan M. Bello Rivas Author-email: jmbr@superadditive.com License: UNKNOWN Description: Halberd discovers HTTP load balancers. It is useful for web application security auditing and for load balancer configuration testing. Platform: UNKNOWN Classifier: Development Status :: 4 - Beta Classifier: Environment :: Console Classifier: Intended Audience :: Developers Classifier: Intended Audience :: Information Technology Classifier: Intended Audience :: System Administrators Classifier: License :: OSI Approved :: GNU General Public License (GPL) Classifier: Natural Language :: English Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python Classifier: Topic :: Internet :: WWW/HTTP Classifier: Topic :: Security halberd-0.2.4/tests/0000755000175000017500000000000011431513770012730 5ustar jmbrjmbrhalberd-0.2.4/tests/__init__.py0000644000175000017500000000154211431512414015035 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """Testing framework. """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # vim: ts=4 sw=4 et halberd-0.2.4/tests/test_clues_Clue.py0000644000175000017500000000401411431512414016415 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """Unit test for Halberd.clues.Clue """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import unittest from Halberd.clues.Clue import Clue class TestClue(unittest.TestCase): def setUp(self): self.clue = Clue() def tearDown(self): pass def testCount(self): self.failUnlessEqual(self.clue.getCount(), 1) self.clue.incCount() self.failUnlessEqual(self.clue.getCount(), 2) self.clue.incCount(21) self.failUnlessEqual(self.clue.getCount(), 23) self.failUnlessRaises(ValueError, self.clue.incCount, 0) self.failUnlessRaises(ValueError, self.clue.incCount, -7) def testNormalize(self): value = '123content-location*23' self.failUnless(Clue.normalize(value) == 'content_location_23') value = 'content/location' self.failUnless(Clue.normalize(value) == 'content_location') value = '*content/location123' self.failUnless(Clue.normalize(value) == '_content_location123') def testRecompute(self): # Check for invalid digest computations. self.clue.parse('Test: abc\r\nSomething: blah\r\n\r\n') self.assertRaises(AssertionError, self.clue._updateDigest, ) if __name__ == '__main__': unittest.main() # vim: ts=4 sw=4 et halberd-0.2.4/tests/test_clues_file.py0000644000175000017500000000447311431512414016455 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """Unit tests for clue storage functionality. """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import os import unittest import Halberd.clues.file from Halberd.clues.Clue import Clue class TestStorage(unittest.TestCase): def setUp(self): self.clue = Clue() self.clue.setTimestamp(100) self.clue.headers = eval(r"""[ ('Date', ' Tue, 24 Feb 2004 17:09:05 GMT'), ('Server', ' Apache/2.0.48 (Unix) DAV/2 SVN/0.35.1'), ('Content-Location', ' index.html.en'), ('Vary', ' negotiate,accept-language,accept-charset'), ('TCN', ' choice'), ('Last-Modified', ' Sat, 22 Nov 2003 15:56:12 GMT'), ('ETag', ' "252ff0-5b0-3b5aff00;253006-961-3b5aff00"'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 1456'), ('Keep-Alive', ' timeout=15, max=100'), ('Connection', ' Keep-Alive'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Content-Language', ' en') ]""") self.clue.parse(self.clue.headers) self.filename = os.path.join('tests', 'data', 'test.clues') def tearDown(self): pass def testSimpleSaveAndLoad(self): try: Halberd.clues.file.save(self.filename, [self.clue]) clues = Halberd.clues.file.load(self.filename) finally: os.unlink(self.filename) self.failUnless(len(clues) == 1) self.failUnless(clues[0] == self.clue) if __name__ == '__main__': unittest.main() # vim: ts=4 sw=4 et halberd-0.2.4/tests/data/0000755000175000017500000000000011431513770013641 5ustar jmbrjmbrhalberd-0.2.4/tests/data/www.cdrom.com.clu0000644000175000017500000000233211144237007017047 0ustar jmbrjmbr243,1078568553.52,"[('Location', ' http://www.cdrom.com/spotlights/burnquick.php'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Server', ' Oracle9iAS/9.0.4 Apache/1.3.27 (Unix) PHP/4.1.1 Oracle9iAS-Web-Cache/9.0.4.0.0 (N)'), ('Date', ' Sat, 06 Mar 2004 10:24:27 GMT'), ('Age', ' 0')]" 3,1078568559.99,"[('Location', ' http://www.cdrom.com/spotlights/burnquick.php'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Server', ' Oracle9iAS/9.0.4 Apache/1.3.27 (Unix) PHP/4.1.1 Oracle9iAS-Web-Cache/9.0.4.0.0 (N)'), ('Date', ' Sat, 06 Mar 2004 10:24:34 GMT'), ('Age', ' 1')]" 18,1078568572.83,"[('Location', ' http://www.cdrom.com/spotlights/burnquick.php'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Server', ' Oracle9iAS/9.0.4 Apache/1.3.27 (Unix) PHP/4.1.1 mod_ssl/2.8.14 OpenSSL/0.9.7b Oracle9iAS-Web-Cache/9.0.4.0.0 (N)'), ('Date', ' Sat, 06 Mar 2004 10:23:46 GMT'), ('Age', ' 60')]" 21,1078568573.35,"[('Location', ' http://www.cdrom.com/spotlights/burnquick.php'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Server', ' Oracle9iAS/9.0.4 Apache/1.3.27 (Unix) PHP/4.1.1 mod_ssl/2.8.14 OpenSSL/0.9.7b Oracle9iAS-Web-Cache/9.0.4.0.0 (N)'), ('Date', ' Sat, 06 Mar 2004 10:23:46 GMT'), ('Age', ' 61')]" halberd-0.2.4/tests/data/www.pogo.com.clu0000644000175000017500000002407011144236656016723 0ustar jmbrjmbr1,1077680800.06,"[('Date', ' Wed, 25 Feb 2004 03:48:56 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6008397254107947;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:56 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:56 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=cp7ewn7ys4;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=cp7ewn7ys4;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680800.4,"[('Date', ' Wed, 25 Feb 2004 03:48:57 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6008912650186628;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:57 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:57 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=v9qu8g7ys6;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=v9qu8g7ys6;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 1')]" 1,1077680800.44,"[('Date', ' Wed, 25 Feb 2004 03:48:57 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6008367189336538;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:57 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:57 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=l6kb8q7ys2;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=l6kb8q7ys2;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 1')]" 1,1077680800.54,"[('Date', ' Wed, 25 Feb 2004 03:48:57 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6009415161359852;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:57 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:57 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=fjdi4a7ys3;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=fjdi4a7ys3;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680800.81,"[('Date', ' Wed, 25 Feb 2004 03:48:57 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6008517513192006;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:57 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:57 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=qdth9q7ys5;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=qdth9q7ys5;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680801.2,"[('Date', ' Wed, 25 Feb 2004 03:48:57 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6009290607306659;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:57 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:57 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=potvh67ys5;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=potvh67ys5;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 2')]" 1,1077680801.27,"[('Date', ' Wed, 25 Feb 2004 03:48:57 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6009535420442499;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:57 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:57 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=p9v5bw7ys3;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=p9v5bw7ys3;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 2')]" 1,1077680801.29,"[('Date', ' Wed, 25 Feb 2004 03:48:57 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6008517513192007;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:57 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:57 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=vw35ja7ys7;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=vw35ja7ys7;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 2')]" 1,1077680801.6,"[('Date', ' Wed, 25 Feb 2004 03:48:58 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6009020024368334;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:58 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:58 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=3829se7yt2;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=3829se7yt2;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680801.83,"[('Date', ' Wed, 25 Feb 2004 03:48:58 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6008839635740593;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:58 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:58 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=st4zv77yt2;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=st4zv77yt2;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680801.98,"[('Date', ' Wed, 25 Feb 2004 03:48:58 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6009612729856841;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:58 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:58 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=ue1glp7yt4;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=ue1glp7yt4;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680802.04,"[('Date', ' Wed, 25 Feb 2004 03:48:58 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6008839635740594;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:58 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:58 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=sj5pgh7yt3;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=sj5pgh7yt3;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680802.26,"[('Date', ' Wed, 25 Feb 2004 03:48:58 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6009050089139470;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:58 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:58 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=lsjmb07yt3;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=lsjmb07yt3;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 2')]" 1,1077680802.51,"[('Date', ' Wed, 25 Feb 2004 03:48:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6008637772277633;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:59 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:59 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=0hvgek7yt3;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=0hvgek7yt3;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680802.67,"[('Date', ' Wed, 25 Feb 2004 03:48:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6009638499659223;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:59 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:59 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=rgeyoj7yt6;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=rgeyoj7yt6;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680802.78,"[('Date', ' Wed, 25 Feb 2004 03:48:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6009535420442500;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:59 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:59 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=c0luae7yt4;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=c0luae7yt4;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680803.0,"[('Date', ' Wed, 25 Feb 2004 03:48:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6008367189336539;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:59 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:59 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=8en92m7yt4;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=8en92m7yt4;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680803.08,"[('Date', ' Wed, 25 Feb 2004 03:48:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6008397254107951;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:59 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:48:59 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=duy2rd7yt3;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=duy2rd7yt3;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680803.41,"[('Date', ' Wed, 25 Feb 2004 03:49:00 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6009767348678147;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:49:00 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:49:00 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=gyjecv7yu1;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=gyjecv7yu1;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 1')]" 1,1077680803.75,"[('Date', ' Wed, 25 Feb 2004 03:49:00 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_jk'), ('Set-Cookie', ' com.pogo.unid=6009020024368335;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:49:00 GMT;Path=/'), ('Set-Cookie', ' com.pogo.hp.ls.cfg=0;Domain=.pogo.com;Expires=Mon, 23-Feb-2009 03:49:00 GMT;Path=/home'), ('Set-Cookie2', ' JSESSIONID=iprbwe7yu1;Version=1;Discard;Path=""/""'), ('Set-Cookie', ' JSESSIONID=iprbwe7yu1;Path=/'), ('Content-Type', ' text/html;charset=ISO-8859-1'), ('Age', ' 1')]" halberd-0.2.4/tests/data/www.register.com.clu0000644000175000017500000001324711144236656017607 0ustar jmbrjmbr1,1077680533.57,"[('Date', ' Wed, 25 Feb 2004 03:44:28 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174716\t2004-2-24 22:44:29\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 3')]" 1,1077680533.85,"[('Date', ' Wed, 25 Feb 2004 03:44:30 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174723\t2004-2-24 22:44:30\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 1')]" 1,1077680533.87,"[('Date', ' Wed, 25 Feb 2004 03:44:30 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174721\t2004-2-24 22:44:30\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 1')]" 1,1077680534.31,"[('Date', ' Wed, 25 Feb 2004 03:44:29 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174728\t2004-2-24 22:44:31\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 4')]" 1,1077680534.56,"[('Date', ' Wed, 25 Feb 2004 03:44:29 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174729\t2004-2-24 22:44:30\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 3')]" 1,1077680535.13,"[('Date', ' Wed, 25 Feb 2004 03:44:31 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174734\t2004-2-24 22:44:31\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077680535.14,"[('Date', ' Wed, 25 Feb 2004 03:44:31 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174736\t2004-2-24 22:44:31\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077680535.4,"[('Date', ' Wed, 25 Feb 2004 03:44:31 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174739\t2004-2-24 22:44:32\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077680535.71,"[('Date', ' Wed, 25 Feb 2004 03:44:31 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174743\t2004-2-24 22:44:31\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077680536.33,"[('Date', ' Wed, 25 Feb 2004 03:44:32 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174748\t2004-2-24 22:44:33\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077680536.42,"[('Date', ' Wed, 25 Feb 2004 03:44:32 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174749\t2004-2-24 22:44:33\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077680536.63,"[('Date', ' Wed, 25 Feb 2004 03:44:31 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174752\t2004-2-24 22:44:32\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 3')]" 1,1077680536.93,"[('Date', ' Wed, 25 Feb 2004 03:44:33 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174758\t2004-2-24 22:44:33\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 1')]" 1,1077680537.34,"[('Date', ' Wed, 25 Feb 2004 03:44:33 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174760\t2004-2-24 22:44:34\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077680537.67,"[('Date', ' Wed, 25 Feb 2004 03:44:32 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174763\t2004-2-24 22:44:33\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 3')]" 1,1077680537.83,"[('Date', ' Wed, 25 Feb 2004 03:44:34 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174765\t2004-2-24 22:44:34\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 1')]" 1,1077680538.11,"[('Date', ' Wed, 25 Feb 2004 03:44:34 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174768\t2004-2-24 22:44:34\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077680538.41,"[('Date', ' Wed, 25 Feb 2004 03:44:34 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174769\t2004-2-24 22:44:35\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077680538.91,"[('Date', ' Wed, 25 Feb 2004 03:44:35 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174772\t2004-2-24 22:44:35\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 1')]" 1,1077680539.39,"[('Date', ' Wed, 25 Feb 2004 03:44:35 GMT'), ('Server', ' Apache'), ('x-reglog-nav', ' 2\t3826174777\t2004-2-24 22:44:36\t/template/CO1/index.tpl\t1\t1'), ('P3P', ' policyref=""http://www.register.com/websitepolicy.xml""'), ('Content-Type', ' text/html'), ('Age', ' 2')]" halberd-0.2.4/tests/data/www.barclays.es.clu0000644000175000017500000000221211144236650017374 0ustar jmbrjmbr38,1077676048.9,"[('Date', ' Wed, 25 Feb 2004 09:28:52 GMT'), ('Server', ' IBM_HTTP_Server'), ('Last-Modified', ' Tue, 04 Nov 2003 11:33:08 GMT'), ('ETag', ' ""1842-b7d-75698500""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 2941'), ('Content-Type', ' text/html'), ('Age', ' 0')]" 12,1077676048.94,"[('Date', ' Wed, 25 Feb 2004 09:32:23 GMT'), ('Server', ' IBM_HTTP_Server'), ('Last-Modified', ' Tue, 04 Nov 2003 11:33:08 GMT'), ('ETag', ' ""3842-b7d-75698500""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 2941'), ('Content-Type', ' text/html'), ('Age', ' 0')]" 31,1077676049.02,"[('Date', ' Wed, 25 Feb 2004 09:28:52 GMT'), ('Server', ' IBM_HTTP_Server'), ('Last-Modified', ' Tue, 04 Nov 2003 11:33:08 GMT'), ('ETag', ' ""1842-b7d-75698500""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 2941'), ('Content-Type', ' text/html'), ('Age', ' 0')]" 4,1077676050.26,"[('Date', ' Wed, 25 Feb 2004 09:32:24 GMT'), ('Server', ' IBM_HTTP_Server'), ('Last-Modified', ' Tue, 04 Nov 2003 11:33:08 GMT'), ('ETag', ' ""3842-b7d-75698500""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 2941'), ('Content-Type', ' text/html'), ('Age', ' 0')]" halberd-0.2.4/tests/data/www.ask.com.clu0000644000175000017500000000151611144237007016524 0ustar jmbrjmbr66,1078567445.33,"[('Server', ' Microsoft-IIS/5.0'), ('Date', ' Sat, 06 Mar 2004 10:05:58 GMT'), ('Content-Length', ' 9749'), ('Content-Type', ' text/html'), ('Set-Cookie', ' CTST=yes; expires=Sat, 06-Mar-2004 10:30:58 GMT; path=/'), ('Cache-control', ' private'), ('Age', ' 1')]" 144,1078567445.69,"[('Server', ' Microsoft-IIS/5.0'), ('Date', ' Sat, 06 Mar 2004 10:05:59 GMT'), ('Content-Length', ' 9749'), ('Content-Type', ' text/html'), ('Set-Cookie', ' CTST=yes; expires=Sat, 06-Mar-2004 10:30:58 GMT; path=/'), ('Cache-control', ' private'), ('Age', ' 0')]" 7,1078567447.86,"[('Server', ' Microsoft-IIS/5.0'), ('Date', ' Sat, 06 Mar 2004 10:06:02 GMT'), ('Content-Length', ' 9742'), ('Content-Type', ' text/html'), ('Set-Cookie', ' CTST=yes; expires=Sat, 06-Mar-2004 10:31:02 GMT; path=/'), ('Cache-control', ' private'), ('Age', ' 1')]" halberd-0.2.4/tests/data/www.dmoz.org.clu0000644000175000017500000003253011144236656016741 0ustar jmbrjmbr2,1077677743.66,"[('Date', ' Mon, 23 Feb 2004 09:12:44 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 09:12:44 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 150316'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 2,1077677743.68,"[('Date', ' Mon, 23 Feb 2004 20:56:49 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 20:56:49 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 108071'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 3,1077677744.16,"[('Date', ' Mon, 23 Feb 2004 20:56:49 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 20:56:49 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 108073'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 3,1077677744.41,"[('Date', ' Mon, 23 Feb 2004 09:12:44 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 09:12:44 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 150317'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 3,1077677745.06,"[('Date', ' Mon, 23 Feb 2004 20:56:49 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 20:56:49 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 108072'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 4,1077677745.08,"[('Date', ' Mon, 23 Feb 2004 09:12:44 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 09:12:44 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 150317'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 3,1077677746.27,"[('Date', ' Mon, 23 Feb 2004 09:12:44 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 09:12:44 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 150320'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 1,1077677746.49,"[('Date', ' Mon, 23 Feb 2004 20:56:49 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 20:56:49 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 108074'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 2,1077677747.07,"[('Date', ' Mon, 23 Feb 2004 20:56:49 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 20:56:49 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 108076'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 5,1077677747.12,"[('Date', ' Mon, 23 Feb 2004 09:12:44 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 09:12:44 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 150321'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 3,1077677748.14,"[('Date', ' Mon, 23 Feb 2004 09:12:44 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 09:12:44 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 150322'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 2,1077677748.32,"[('Date', ' Mon, 23 Feb 2004 20:56:49 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 20:56:49 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 108076'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 1,1077677748.6,"[('Date', ' Mon, 23 Feb 2004 08:55:28 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 08:55:28 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 151357'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 3,1077677749.01,"[('Date', ' Mon, 23 Feb 2004 20:56:49 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 20:56:49 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 108076'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 3,1077677749.18,"[('Date', ' Mon, 23 Feb 2004 08:55:28 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 08:55:28 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 151359'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 2,1077677749.21,"[('Date', ' Mon, 23 Feb 2004 09:12:44 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 09:12:44 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 150323'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 1,1077677750.3,"[('Date', ' Mon, 23 Feb 2004 20:56:49 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 20:56:49 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 108079'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 2,1077677750.4,"[('Date', ' Mon, 23 Feb 2004 08:55:28 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 08:55:28 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 151359'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 3,1077677750.48,"[('Date', ' Mon, 23 Feb 2004 09:12:44 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 09:12:44 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 150323'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 3,1077677751.08,"[('Date', ' Mon, 23 Feb 2004 09:12:44 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 09:12:44 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 150325'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 2,1077677751.37,"[('Date', ' Mon, 23 Feb 2004 08:55:28 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 08:55:28 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 151360'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 2,1077677752.05,"[('Date', ' Mon, 23 Feb 2004 09:12:44 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 09:12:44 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 150324'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 2,1077677752.28,"[('Date', ' Mon, 23 Feb 2004 20:56:49 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 20:56:49 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 108081'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 1,1077677752.95,"[('Date', ' Mon, 23 Feb 2004 08:55:28 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 08:55:28 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 151361'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 2,1077677753.24,"[('Date', ' Mon, 23 Feb 2004 09:12:44 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 09:12:44 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 150327'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 2,1077677753.38,"[('Date', ' Mon, 23 Feb 2004 08:55:28 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 08:55:28 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 151362'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 1,1077677753.87,"[('Date', ' Mon, 23 Feb 2004 20:56:49 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 20:56:49 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 108081'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 2,1077677754.05,"[('Date', ' Mon, 23 Feb 2004 09:12:44 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 09:12:44 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 150326'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" 2,1077677754.27,"[('Date', ' Mon, 23 Feb 2004 20:56:49 GMT'), ('Server', ' Apache/2.0.43 (Unix)'), ('Cache-Control', ' max-age=2592000'), ('Expires', ' Wed, 24 Mar 2004 20:56:49 GMT'), ('Last-Modified', ' Mon, 23 Feb 2004 07:00:00 GMT'), ('ETag', ' ""2db7ed-1803-94ff1c00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 6147'), ('Content-Type', ' text/html'), ('Age', ' 108083'), ('X-Cache', ' HIT from dmoz.org'), ('X-Cache-Lookup', ' HIT from dmoz.org:8080')]" halberd-0.2.4/tests/data/www.synnergy.net.clu0000644000175000017500000000067111144236650017640 0ustar jmbrjmbr306,1077678347.89,"[('Server', ' publicfile'), ('Date', ' Wed, 25 Feb 2004 03:08:40 GMT'), ('Last-Modified', ' Thu, 10 Jul 2003 03:48:07 GMT'), ('Content-Type', ' text/html'), ('Content-Length', ' 575'), ('Age', ' 0')]" 44,1077678348.06,"[('Server', ' publicfile'), ('Date', ' Wed, 25 Feb 2004 03:08:40 GMT'), ('Last-Modified', ' Thu, 10 Jul 2003 03:48:07 GMT'), ('Content-Type', ' text/html'), ('Content-Length', ' 575'), ('Age', ' 0')]" halberd-0.2.4/tests/data/www.sohu.com.clu0000644000175000017500000001754111144236650016734 0ustar jmbrjmbr3,1077678094.48,"[('Date', ' Wed, 25 Feb 2004 03:00:28 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:01:38 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:46:45 GMT'), ('ETag', ' ""2d1c4-1f38d-403c0c95""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 205'), ('X-Cache', ' HIT from squid.sohu.com')]" 2,1077678096.31,"[('Date', ' Wed, 25 Feb 2004 03:00:28 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:01:38 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:46:45 GMT'), ('ETag', ' ""2d1c4-1f38d-403c0c95""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 207'), ('X-Cache', ' HIT from squid.sohu.com')]" 4,1077678098.1,"[('Date', ' Wed, 25 Feb 2004 03:00:28 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:01:38 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:46:45 GMT'), ('ETag', ' ""2d1c4-1f38d-403c0c95""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 209'), ('X-Cache', ' HIT from squid.sohu.com')]" 3,1077678100.46,"[('Date', ' Wed, 25 Feb 2004 03:00:28 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:01:38 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:46:45 GMT'), ('ETag', ' ""2d1c4-1f38d-403c0c95""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 211'), ('X-Cache', ' HIT from squid.sohu.com')]" 4,1077678102.02,"[('Date', ' Wed, 25 Feb 2004 03:00:28 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:01:38 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:46:45 GMT'), ('ETag', ' ""2d1c4-1f38d-403c0c95""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 211'), ('X-Cache', ' HIT from squid.sohu.com')]" 3,1077678104.08,"[('Date', ' Wed, 25 Feb 2004 03:03:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:05:09 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:55:07 GMT'), ('ETag', ' ""153105-1f38d-403c0e8b""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 4'), ('X-Cache', ' HIT from squid.sohu.com')]" 2,1077678105.7,"[('Date', ' Wed, 25 Feb 2004 03:03:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:05:09 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:55:07 GMT'), ('ETag', ' ""153105-1f38d-403c0e8b""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 4'), ('X-Cache', ' HIT from squid.sohu.com')]" 1,1077678106.19,"[('Date', ' Wed, 25 Feb 2004 03:03:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:05:09 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:55:07 GMT'), ('ETag', ' ""153105-1f38d-403c0e8b""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 6'), ('X-Cache', ' HIT from squid.sohu.com')]" 1,1077678107.36,"[('Date', ' Wed, 25 Feb 2004 03:03:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:05:09 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:55:07 GMT'), ('ETag', ' ""153105-1f38d-403c0e8b""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 7'), ('X-Cache', ' HIT from squid.sohu.com')]" 2,1077678108.09,"[('Date', ' Wed, 25 Feb 2004 03:03:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:05:09 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:55:07 GMT'), ('ETag', ' ""153105-1f38d-403c0e8b""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 8'), ('X-Cache', ' HIT from squid.sohu.com')]" 3,1077678109.08,"[('Date', ' Wed, 25 Feb 2004 03:03:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:05:09 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:55:07 GMT'), ('ETag', ' ""153105-1f38d-403c0e8b""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 9'), ('X-Cache', ' HIT from squid.sohu.com')]" 1,1077678110.42,"[('Date', ' Wed, 25 Feb 2004 03:03:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:05:09 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:55:07 GMT'), ('ETag', ' ""153105-1f38d-403c0e8b""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 10'), ('X-Cache', ' HIT from squid.sohu.com')]" 2,1077678111.75,"[('Date', ' Wed, 25 Feb 2004 03:03:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:05:09 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:55:07 GMT'), ('ETag', ' ""153105-1f38d-403c0e8b""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 10'), ('X-Cache', ' HIT from squid.sohu.com')]" 1,1077678112.37,"[('Date', ' Wed, 25 Feb 2004 03:03:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:05:09 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:55:07 GMT'), ('ETag', ' ""153105-1f38d-403c0e8b""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 12'), ('X-Cache', ' HIT from squid.sohu.com')]" 3,1077678113.18,"[('Date', ' Wed, 25 Feb 2004 03:03:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:05:09 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:55:07 GMT'), ('ETag', ' ""153105-1f38d-403c0e8b""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 13'), ('X-Cache', ' HIT from squid.sohu.com')]" 2,1077678114.21,"[('Date', ' Wed, 25 Feb 2004 03:03:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:05:09 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:55:07 GMT'), ('ETag', ' ""153105-1f38d-403c0e8b""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 14'), ('X-Cache', ' HIT from squid.sohu.com')]" 3,1077678115.24,"[('Date', ' Wed, 25 Feb 2004 03:03:59 GMT'), ('Server', ' Apache/1.3.26 (Unix) mod_gzip/1.3.19.1a'), ('Vary', ' Accept-Encoding'), ('Cache-Control', ' max-age=70'), ('Expires', ' Wed, 25 Feb 2004 03:05:09 GMT'), ('Last-Modified', ' Wed, 25 Feb 2004 02:55:07 GMT'), ('ETag', ' ""153105-1f38d-403c0e8b""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 127885'), ('Content-Type', ' text/html'), ('Age', ' 15'), ('X-Cache', ' HIT from squid.sohu.com')]" halberd-0.2.4/tests/data/www.ebay.com.clu0000644000175000017500000000130211144236650016662 0ustar jmbrjmbr17,1077677679.19,"[('Server', ' Microsoft-IIS/4.0'), ('Content-Location', ' http://www.ebay.com/index.html'), ('Date', ' Wed, 25 Feb 2004 02:56:56 GMT'), ('Content-Type', ' text/html'), ('Accept-Ranges', ' bytes'), ('Last-Modified', ' Wed, 25 Feb 2004 02:51:32 GMT'), ('ETag', ' ""0d22c464afbc31:15f37""'), ('Content-Length', ' 40718'), ('Age', ' 1')]" 2,1077677681.0,"[('Server', ' Microsoft-IIS/4.0'), ('Content-Location', ' http://www.ebay.com/index.html'), ('Date', ' Wed, 25 Feb 2004 02:56:57 GMT'), ('Content-Type', ' text/html'), ('Accept-Ranges', ' bytes'), ('Last-Modified', ' Wed, 25 Feb 2004 02:51:32 GMT'), ('ETag', ' ""0d22c464afbc31:1627d""'), ('Content-Length', ' 40718'), ('Age', ' 0')]" halberd-0.2.4/tests/data/www.tripod.com.clu0000644000175000017500000000366111144236736017262 0ustar jmbrjmbr50,1078312241.92,"[('Date', ' Wed, 03 Mar 2004 11:12:14 GMT'), ('Server', ' Squeegit/1.2.5 (3_sir)'), ('Location', ' http://www.tripod.lycos.com/'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Age', ' 29')]" 5,1078312242.14,"[('Date', ' Wed, 03 Mar 2004 11:12:14 GMT'), ('Server', ' Squeegit/1.2.5 (3_sir)'), ('Location', ' http://www.tripod.lycos.com/'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Age', ' 29')]" 7,1078312242.21,"[('Date', ' Wed, 03 Mar 2004 10:46:43 GMT'), ('Server', ' Squeegit/1.2.5 (3_sir)'), ('Location', ' http://www.tripod.lycos.com/'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Age', ' 1560')]" 34,1078312242.45,"[('Date', ' Wed, 03 Mar 2004 11:12:01 GMT'), ('Server', ' Squeegit/1.2.5 (3_sir)'), ('Location', ' http://www.tripod.lycos.com/'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Age', ' 42')]" 17,1078312242.76,"[('Date', ' Wed, 03 Mar 2004 11:12:02 GMT'), ('Server', ' Squeegit/1.2.5 (3_sir)'), ('Location', ' http://www.tripod.lycos.com/'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Age', ' 43')]" 23,1078312242.79,"[('Date', ' Wed, 03 Mar 2004 11:11:31 GMT'), ('Server', ' Squeegit/1.2.5 (3_sir)'), ('Location', ' http://www.tripod.lycos.com/'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Age', ' 74')]" 4,1078312245.02,"[('Date', ' Wed, 03 Mar 2004 11:11:33 GMT'), ('Server', ' Squeegit/1.2.5 (3_sir)'), ('Location', ' http://www.tripod.lycos.com/'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Age', ' 73')]" 28,1078312250.22,"[('Date', ' Wed, 03 Mar 2004 11:11:23 GMT'), ('Server', ' Squeegit/1.2.5 (3_sir)'), ('Location', ' http://www.tripod.lycos.com/'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Age', ' 88')]" 7,1078312252.78,"[('Date', ' Wed, 03 Mar 2004 11:11:26 GMT'), ('Server', ' Squeegit/1.2.5 (3_sir)'), ('Location', ' http://www.tripod.lycos.com/'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Age', ' 89')]" halberd-0.2.4/tests/data/www.macromedia.com.clu0000644000175000017500000000216511144237007020050 0ustar jmbrjmbr81,1078567354.32,"[('Date', ' Sat, 06 Mar 2004 10:03:56 GMT'), ('Server', ' Apache/1.3.29 (Unix) mod_perl/1.29'), ('Content-Type', ' text/html'), ('Age', ' 33')]" 13,1078567354.46,"[('Date', ' Sat, 06 Mar 2004 10:00:48 GMT'), ('Server', ' Apache/1.3.29 (Unix) mod_perl/1.29'), ('Content-Type', ' text/html'), ('Age', ' 220')]" 49,1078567354.55,"[('Date', ' Sat, 06 Mar 2004 10:05:26 GMT'), ('Server', ' Apache/1.3.29 (Unix) mod_perl/1.29'), ('Content-Type', ' text/html'), ('Age', ' 0')]" 36,1078567354.65,"[('Date', ' Sat, 06 Mar 2004 10:05:36 GMT'), ('Server', ' Apache/1.3.29 (Unix) mod_perl/1.29'), ('Content-Type', ' text/html'), ('Age', ' 0')]" 6,1078567355.99,"[('Date', ' Sat, 06 Mar 2004 10:03:58 GMT'), ('Server', ' Apache/1.3.29 (Unix) mod_perl/1.29'), ('Content-Type', ' text/html'), ('Age', ' 33')]" 8,1078567356.11,"[('Date', ' Sat, 06 Mar 2004 10:05:37 GMT'), ('Server', ' Apache/1.3.29 (Unix) mod_perl/1.29'), ('Content-Type', ' text/html'), ('Age', ' 0')]" 1,1078567375.0,"[('Date', ' Sat, 06 Mar 2004 10:05:46 GMT'), ('Server', ' Apache/1.3.29 (Unix) mod_perl/1.29'), ('Content-Type', ' text/html'), ('Age', ' 1')]" halberd-0.2.4/tests/data/hotwired.lycos.com.clu0000644000175000017500000000241211144237007020074 0ustar jmbrjmbr248,1078568167.54,"[('Date', ' Sat, 06 Mar 2004 10:18:17 GMT'), ('Server', ' Apache/1.3.26 (Unix)'), ('Location', ' http://hotwired.wired.com/'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Age', ' 0')]" 118,1078568167.86,"[('Date', ' Sat, 06 Mar 2004 10:18:10 GMT'), ('Server', ' Apache/1.3.26 (Unix)'), ('Location', ' http://hotwired.wired.com/'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Age', ' 1')]" 124,1078568167.87,"[('Date', ' Sat, 06 Mar 2004 10:17:34 GMT'), ('Server', ' Apache/1.3.26 (Unix)'), ('Location', ' http://hotwired.wired.com/'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Age', ' 29')]" 22,1078568168.04,"[('Date', ' Sat, 06 Mar 2004 10:18:10 GMT'), ('Server', ' Apache/1.3.26 (Unix)'), ('Location', ' http://hotwired.wired.com/'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Age', ' 1')]" 127,1078568168.04,"[('Date', ' Sat, 06 Mar 2004 10:17:34 GMT'), ('Server', ' Apache/1.3.26 (Unix)'), ('Location', ' http://hotwired.wired.com/'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Age', ' 29')]" 49,1078568170.17,"[('Date', ' Sat, 06 Mar 2004 10:18:19 GMT'), ('Server', ' Apache/1.3.26 (Unix)'), ('Location', ' http://hotwired.wired.com/'), ('Content-Type', ' text/html; charset=iso-8859-1'), ('Age', ' 0')]" halberd-0.2.4/tests/data/www.comcast.net.clu0000644000175000017500000000341511144237007017407 0ustar jmbrjmbr62,1078567586.16,"[('Server', ' Netscape-Enterprise/6.0'), ('Date', ' Sat, 06 Mar 2004 10:08:24 GMT'), ('Content-length', ' 1448'), ('Content-type', ' text/html'), ('Set-cookie', ' CPAC=14a34940; path=/; domain=.comcast.net'), ('Etag', ' ""636a4c75-2-0-5a8""'), ('Last-modified', ' Mon, 01 Mar 2004 19:59:53 GMT'), ('Accept-ranges', ' bytes'), ('Age', ' 1')]" 32,1078567586.38,"[('Server', ' Netscape-Enterprise/6.0'), ('Date', ' Sat, 06 Mar 2004 10:08:21 GMT'), ('Content-length', ' 1448'), ('Content-type', ' text/html'), ('Set-cookie', ' CPAC=14a34940; path=/; domain=.comcast.net'), ('Etag', ' ""636a4c75-2-0-5a8""'), ('Last-modified', ' Mon, 01 Mar 2004 19:59:53 GMT'), ('Accept-ranges', ' bytes'), ('Age', ' 0')]" 44,1078567586.44,"[('Server', ' Netscape-Enterprise/6.0'), ('Date', ' Sat, 06 Mar 2004 10:08:22 GMT'), ('Content-length', ' 1448'), ('Content-type', ' text/html'), ('Set-cookie', ' CPAC=14a34940; path=/; domain=.comcast.net'), ('Etag', ' ""636a4c75-2-0-5a8""'), ('Last-modified', ' Mon, 01 Mar 2004 19:59:53 GMT'), ('Accept-ranges', ' bytes'), ('Age', ' 0')]" 49,1078567586.46,"[('Server', ' Netscape-Enterprise/6.0'), ('Date', ' Sat, 06 Mar 2004 10:08:25 GMT'), ('Content-length', ' 1448'), ('Content-type', ' text/html'), ('Set-cookie', ' CPAC=14a34940; path=/; domain=.comcast.net'), ('Etag', ' ""636a4c75-2-0-5a8""'), ('Last-modified', ' Mon, 01 Mar 2004 19:59:53 GMT'), ('Accept-ranges', ' bytes'), ('Age', ' 0')]" 22,1078567587.92,"[('Server', ' Netscape-Enterprise/6.0'), ('Date', ' Sat, 06 Mar 2004 10:08:24 GMT'), ('Content-length', ' 1448'), ('Content-type', ' text/html'), ('Set-cookie', ' CPAC=16a34940; path=/; domain=.comcast.net'), ('Etag', ' ""636a4c75-2-0-5a8""'), ('Last-modified', ' Mon, 01 Mar 2004 19:59:53 GMT'), ('Accept-ranges', ' bytes'), ('Age', ' 1')]" halberd-0.2.4/tests/data/email.excite.com.clu0000644000175000017500000003642511144236650017504 0ustar jmbrjmbr9,1077678249.28,"[('Date', ' Wed, 25 Feb 2004 03:06:23 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:22 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e12.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 4')]" 8,1077678249.58,"[('Date', ' Wed, 25 Feb 2004 03:06:23 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:22 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e11.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 3')]" 7,1077678249.61,"[('Date', ' Wed, 25 Feb 2004 03:06:24 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:24 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e10.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 8,1077678249.62,"[('Date', ' Wed, 25 Feb 2004 03:06:24 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:23 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e8.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 8,1077678249.84,"[('Date', ' Wed, 25 Feb 2004 03:06:26 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:25 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e6.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 0')]" 7,1077678250.22,"[('Date', ' Wed, 25 Feb 2004 03:06:27 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:26 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e3.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 1')]" 4,1077678250.27,"[('Date', ' Wed, 25 Feb 2004 03:06:27 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:26 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e2.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 1')]" 7,1077678250.4,"[('Date', ' Wed, 25 Feb 2004 03:06:27 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:26 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e1.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 1')]" 8,1077678250.56,"[('Date', ' Wed, 25 Feb 2004 03:06:21 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:20 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e25.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 6')]" 2,1077678250.88,"[('Date', ' Wed, 25 Feb 2004 03:06:21 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:20 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e24.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 6')]" 6,1077678250.91,"[('Date', ' Wed, 25 Feb 2004 03:06:24 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:23 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e22.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 3')]" 7,1077678251.08,"[('Date', ' Wed, 25 Feb 2004 03:06:22 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:21 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e20.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 5')]" 6,1077678251.29,"[('Date', ' Wed, 25 Feb 2004 03:06:21 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:20 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e19.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 8')]" 6,1077678251.58,"[('Date', ' Wed, 25 Feb 2004 03:06:21 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:20 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e18.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 7')]" 9,1077678251.7,"[('Date', ' Wed, 25 Feb 2004 03:06:21 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:20 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e17.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 7')]" 3,1077678251.84,"[('Date', ' Wed, 25 Feb 2004 03:06:23 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:22 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e15.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 5')]" 2,1077678252.1,"[('Date', ' Wed, 25 Feb 2004 03:06:23 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:23 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e13.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 7')]" 5,1077678252.81,"[('Date', ' Wed, 25 Feb 2004 03:06:29 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:28 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e4.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 0')]" 1,1077678253.15,"[('Date', ' Wed, 25 Feb 2004 03:06:23 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:22 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e25.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 8')]" 2,1077678253.19,"[('Date', ' Wed, 25 Feb 2004 03:06:29 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:28 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e1.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 6,1077678253.59,"[('Date', ' Wed, 25 Feb 2004 03:06:23 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:22 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e24.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 7')]" 2,1077678253.86,"[('Date', ' Wed, 25 Feb 2004 03:06:25 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:24 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e20.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 5')]" 6,1077678253.91,"[('Date', ' Wed, 25 Feb 2004 03:06:25 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:24 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e21.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 5')]" 6,1077678254.55,"[('Date', ' Wed, 25 Feb 2004 03:06:25 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:24 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e15.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 6')]" 6,1077678254.8,"[('Date', ' Wed, 25 Feb 2004 03:06:26 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:25 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e14.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 5')]" 3,1077678257.0,"[('Date', ' Wed, 25 Feb 2004 03:06:28 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:27 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e21.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 5')]" 2,1077678259.09,"[('Date', ' Wed, 25 Feb 2004 03:06:35 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:34 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e4.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 0')]" 3,1077678259.2,"[('Date', ' Wed, 25 Feb 2004 03:06:35 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:34 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e2.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 4,1077678262.49,"[('Date', ' Wed, 25 Feb 2004 03:06:35 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:34 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e22.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 4')]" 5,1077678263.2,"[('Date', ' Wed, 25 Feb 2004 03:06:35 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:34 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e13.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 6')]" 2,1077678263.89,"[('Date', ' Wed, 25 Feb 2004 03:06:39 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:38 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e10.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 1')]" 1,1077678267.14,"[('Date', ' Wed, 25 Feb 2004 03:06:43 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:42 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e3.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 2,1077678269.16,"[('Date', ' Wed, 25 Feb 2004 03:06:43 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:42 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e8.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 4')]" 3,1077678270.75,"[('Date', ' Wed, 25 Feb 2004 03:06:41 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:40 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e19.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 6')]" 1,1077678271.85,"[('Date', ' Wed, 25 Feb 2004 03:06:46 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:45 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e11.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077678272.07,"[('Date', ' Wed, 25 Feb 2004 03:06:48 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:47 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e6.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 0')]" 2,1077678273.85,"[('Date', ' Wed, 25 Feb 2004 03:06:44 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:43 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e18.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 6')]" 1,1077678274.04,"[('Date', ' Wed, 25 Feb 2004 03:06:43 GMT'), ('Server', ' Apache/1.3.22 (Unix) PHP/4.1.2'), ('X-Powered-By', ' PHP/4.1.2'), ('Set-Cookie', ' ArdSI=deleted; expires=Tue, 25-Feb-03 03:06:42 GMT; path=/; domain=email.excite.com'), ('Location', ' http://registration.excite.com/excitereg/login.jsp?ref=email&return_url=http://e17.email.excite.com'), ('Content-Type', ' text/html'), ('Age', ' 7')]" halberd-0.2.4/tests/data/agartha.clu0000644000175000017500000000212511144236650015755 0ustar jmbrjmbr830,1077675383.88,"[('Date', ' Wed, 25 Feb 2004 02:16:23 GMT'), ('Server', ' Apache/2.0.48 (Unix) DAV/2 SVN/0.35.1'), ('Content-Location', ' index.html.en'), ('Vary', ' negotiate,accept-language,accept-charset'), ('TCN', ' choice'), ('Last-Modified', ' Sat, 22 Nov 2003 15:56:12 GMT'), ('ETag', ' ""252ff0-5b0-3b5aff00;253006-961-3b5aff00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 1456'), ('Keep-Alive', ' timeout=15, max=100'), ('Connection', ' Keep-Alive'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Content-Language', ' en')]" 3,1077675385.0,"[('Date', ' Wed, 25 Feb 2004 02:16:24 GMT'), ('Server', ' Apache/2.0.48 (Unix) DAV/2 SVN/0.35.1'), ('Content-Location', ' index.html.en'), ('Vary', ' negotiate,accept-language,accept-charset'), ('TCN', ' choice'), ('Last-Modified', ' Sat, 22 Nov 2003 15:56:12 GMT'), ('ETag', ' ""252ff0-5b0-3b5aff00;253006-961-3b5aff00""'), ('Accept-Ranges', ' bytes'), ('Content-Length', ' 1456'), ('Keep-Alive', ' timeout=15, max=100'), ('Connection', ' Keep-Alive'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Content-Language', ' en')]" halberd-0.2.4/tests/data/login.passport.net.clu0000644000175000017500000000362111144237007020114 0ustar jmbrjmbr88,1078568344.55,"[('Server', ' Microsoft-IIS/5.0'), ('Date', ' Sat, 06 Mar 2004 10:20:58 GMT'), ('PPServer', ' H: Lawpplogu3a006'), ('Content-Type', ' text/html'), ('Expires', ' Sat, 06 Mar 2004 10:19:58 GMT'), ('Cache-Control', ' no-cache'), ('P3P', ' CP=""DSP CUR OTPi IND OTRi ONL FIN""'), ('Set-Cookie', ' MSPRequ=lt=1078568458&co=1&id=10'), ('Location', ' http://login.passport.com/login.srf?lc=1033&sf=1&id=10&tw=20&fs=0&cb=&ts=0&sec=&mspp_shared=&seclog=0'), ('Age', ' 0')]" 21,1078568344.99,"[('Server', ' Microsoft-IIS/5.0'), ('Date', ' Sat, 06 Mar 2004 10:20:58 GMT'), ('PPServer', ' H: LAWPPLOGU3A003'), ('Content-Type', ' text/html'), ('Expires', ' Sat, 06 Mar 2004 10:19:59 GMT'), ('Cache-Control', ' no-cache'), ('P3P', ' CP=""DSP CUR OTPi IND OTRi ONL FIN""'), ('Set-Cookie', ' MSPRequ=lt=1078568459&co=1&id=10'), ('Location', ' http://login.passport.com/login.srf?lc=1033&sf=1&id=10&tw=20&fs=0&cb=&ts=0&sec=&mspp_shared=&seclog=0'), ('Age', ' 2')]" 43,1078568345.02,"[('Server', ' Microsoft-IIS/5.0'), ('Date', ' Sat, 06 Mar 2004 10:20:58 GMT'), ('PPServer', ' H: Lawpplogu3a006'), ('Content-Type', ' text/html'), ('Expires', ' Sat, 06 Mar 2004 10:19:58 GMT'), ('Cache-Control', ' no-cache'), ('P3P', ' CP=""DSP CUR OTPi IND OTRi ONL FIN""'), ('Set-Cookie', ' MSPRequ=lt=1078568458&co=1&id=10'), ('Location', ' http://login.passport.com/login.srf?lc=1033&sf=1&id=10&tw=20&fs=0&cb=&ts=0&sec=&mspp_shared=&seclog=0'), ('Age', ' 2')]" 111,1078568345.39,"[('Server', ' Microsoft-IIS/5.0'), ('Date', ' Sat, 06 Mar 2004 10:20:58 GMT'), ('PPServer', ' H: LAWPPLOGU3A003'), ('Content-Type', ' text/html'), ('Expires', ' Sat, 06 Mar 2004 10:19:59 GMT'), ('Cache-Control', ' no-cache'), ('P3P', ' CP=""DSP CUR OTPi IND OTRi ONL FIN""'), ('Set-Cookie', ' MSPRequ=lt=1078568459&co=1&id=10'), ('Location', ' http://login.passport.com/login.srf?lc=1033&sf=1&id=10&tw=20&fs=0&cb=&ts=0&sec=&mspp_shared=&seclog=0'), ('Age', ' 1')]" halberd-0.2.4/tests/data/www.pricegrabber.com.clu0000644000175000017500000003301611144236656020406 0ustar jmbrjmbr1,1077680167.8,"[('Date', ' Wed, 25 Feb 2004 03:38:24 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680304531705; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00ebe1e36dcb; expires=Thu, 24-Feb-2005 03:38:24 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00ebe1e36dcb'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680168.09,"[('Date', ' Wed, 25 Feb 2004 03:38:24 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680304831303; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00ebfc47de2c; expires=Thu, 24-Feb-2005 03:38:24 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00ebfc47de2c'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680168.11,"[('Date', ' Wed, 25 Feb 2004 03:38:24 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680304844672; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00ebb82004b8; expires=Thu, 24-Feb-2005 03:38:24 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00ebb82004b8'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680168.14,"[('Date', ' Wed, 25 Feb 2004 03:38:24 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680304881331; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00ebffba685a; expires=Thu, 24-Feb-2005 03:38:24 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00ebffba685a'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 2')]" 1,1077680168.26,"[('Date', ' Wed, 25 Feb 2004 03:38:25 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680305013386; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00eb8391ee91; expires=Thu, 24-Feb-2005 03:38:25 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00eb8391ee91'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 1')]" 1,1077680168.5,"[('Date', ' Wed, 25 Feb 2004 03:38:25 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680305273442; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00ebadb5e759; expires=Thu, 24-Feb-2005 03:38:25 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00ebadb5e759'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680168.74,"[('Date', ' Wed, 25 Feb 2004 03:38:25 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680305503770; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00eb9a7f9cc4; expires=Thu, 24-Feb-2005 03:38:25 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00eb9a7f9cc4'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680168.81,"[('Date', ' Wed, 25 Feb 2004 03:38:25 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680305541262; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00eb52c993f7; expires=Thu, 24-Feb-2005 03:38:25 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00eb52c993f7'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680168.83,"[('Date', ' Wed, 25 Feb 2004 03:38:25 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680305601561; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00ebd2ff3e67; expires=Thu, 24-Feb-2005 03:38:25 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00ebd2ff3e67'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680169.05,"[('Date', ' Wed, 25 Feb 2004 03:38:25 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680305792008; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00eb2416a8cf; expires=Thu, 24-Feb-2005 03:38:25 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00eb2416a8cf'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680169.31,"[('Date', ' Wed, 25 Feb 2004 03:38:26 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680306062759; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00eb8879ace2; expires=Thu, 24-Feb-2005 03:38:26 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00eb8879ace2'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 1')]" 1,1077680169.51,"[('Date', ' Wed, 25 Feb 2004 03:38:26 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680306241368; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00ebaeaffb6f; expires=Thu, 24-Feb-2005 03:38:26 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00ebaeaffb6f'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680169.53,"[('Date', ' Wed, 25 Feb 2004 03:38:26 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680306271696; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00eb4bbeffc8; expires=Thu, 24-Feb-2005 03:38:26 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00eb4bbeffc8'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680169.61,"[('Date', ' Wed, 25 Feb 2004 03:38:26 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680306385069; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00ebe53a86db; expires=Thu, 24-Feb-2005 03:38:26 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00ebe53a86db'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680170.02,"[('Date', ' Wed, 25 Feb 2004 03:38:26 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680306693876; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00eba9b4f076; expires=Thu, 24-Feb-2005 03:38:26 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00eba9b4f076'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680170.23,"[('Date', ' Wed, 25 Feb 2004 03:38:26 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680306979304; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00eb89c58f2c; expires=Thu, 24-Feb-2005 03:38:26 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00eb89c58f2c'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 2')]" 1,1077680170.28,"[('Date', ' Wed, 25 Feb 2004 03:38:27 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680307002398; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00eb12ae1210; expires=Thu, 24-Feb-2005 03:38:27 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00eb12ae1210'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 1')]" 1,1077680170.34,"[('Date', ' Wed, 25 Feb 2004 03:38:27 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680307063745; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00ebe1156421; expires=Thu, 24-Feb-2005 03:38:27 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00ebe1156421'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 1')]" 1,1077680170.72,"[('Date', ' Wed, 25 Feb 2004 03:38:27 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680307417873; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00ebc97ed007; expires=Thu, 24-Feb-2005 03:38:27 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00ebc97ed007'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 0')]" 1,1077680170.74,"[('Date', ' Wed, 25 Feb 2004 03:38:27 GMT'), ('Server', ' Apache/2.0.46 (Red Hat)'), ('Set-Cookie', ' Apache=80.58.0.235.1077680307417823; path=/'), ('Accept-Ranges', ' bytes'), ('X-Powered-By', ' PHP/4.3.2'), ('X-Accelerated-By', ' PHPA/1.3.3r2'), ('Set-Cookie', ' ut_cookie=503a00ebe7a88858; expires=Thu, 24-Feb-2005 03:38:27 GMT; path=/; domain=.pricegrabber.com'), ('Location', ' http://www.pricegrabber.com/index.php/ut=503a00ebe7a88858'), ('Vary', ' Accept-Encoding,User-Agent'), ('Content-Encoding', ' gzip'), ('P3P', ' CP=""CAO DSP COR LAW ADM TAIi OUR DELi BUS PRE""'), ('Content-Length', ' 20'), ('Content-Type', ' text/html; charset=ISO-8859-1'), ('Age', ' 0')]" halberd-0.2.4/tests/data/www.yesky.com.clu0000644000175000017500000001233211144236656017121 0ustar jmbrjmbr2,1077680699.38,"[('Date', ' Wed, 25 Feb 2004 03:47:15 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-239'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 2,1077680699.61,"[('Date', ' Wed, 25 Feb 2004 03:47:16 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-240'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 1')]" 1,1077680700.89,"[('Date', ' Wed, 25 Feb 2004 03:47:17 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-241'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 1')]" 2,1077680701.21,"[('Date', ' Wed, 25 Feb 2004 03:47:17 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-241'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 3')]" 1,1077680702.32,"[('Date', ' Wed, 25 Feb 2004 03:47:18 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-242'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 3')]" 1,1077680703.08,"[('Date', ' Wed, 25 Feb 2004 03:47:18 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-242'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077680703.69,"[('Date', ' Wed, 25 Feb 2004 03:47:19 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-243'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077680704.9,"[('Date', ' Wed, 25 Feb 2004 03:47:21 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-245'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 1')]" 2,1077680705.28,"[('Date', ' Wed, 25 Feb 2004 03:47:21 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-245'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 3')]" 2,1077680706.35,"[('Date', ' Wed, 25 Feb 2004 03:47:22 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-246'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 3')]" 1,1077680706.82,"[('Date', ' Wed, 25 Feb 2004 03:47:23 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-247'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 1')]" 1,1077680707.31,"[('Date', ' Wed, 25 Feb 2004 03:47:23 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-247'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 3')]" 1,1077680707.73,"[('Date', ' Wed, 25 Feb 2004 03:47:24 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-248'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 1')]" 2,1077680708.52,"[('Date', ' Wed, 25 Feb 2004 03:47:24 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-248'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077680708.96,"[('Date', ' Wed, 25 Feb 2004 03:47:25 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-249'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 1')]" 1,1077680709.16,"[('Date', ' Wed, 25 Feb 2004 03:47:25 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-249'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 3')]" 2,1077680710.36,"[('Date', ' Wed, 25 Feb 2004 03:47:26 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-250'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077680711.77,"[('Date', ' Wed, 25 Feb 2004 03:47:27 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-251'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077680712.41,"[('Date', ' Wed, 25 Feb 2004 03:47:28 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-252'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 2')]" 1,1077680712.7,"[('Date', ' Wed, 25 Feb 2004 03:47:29 GMT'), ('Server', ' Apache/1.3.29 (Unix)'), ('Cache-Control', ' public,max-age=300,must-revalidate, max-age=-253'), ('Expires', ' Wed, 25 Feb 2004 03:43:16 GMT'), ('Content-Type', ' text/html'), ('Age', ' 1')]" halberd-0.2.4/tests/test_clues_analysis.py0000644000175000017500000000621511431512414017355 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """Unit test for Halberd.clues.analysis """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import os import unittest import Halberd.ScanTask import Halberd.clues.file import Halberd.clues.analysis as analysis class TestAnalysis(unittest.TestCase): def setUp(self): self.threshold = Halberd.ScanTask.default_ratio_threshold def _hits(self, clues): return sum(map(lambda c: c.getCount(), clues)) def _getClues(self, filename): fname = os.path.join('tests', 'data', filename + '.clu') return Halberd.clues.file.load(fname) def analyze(self, filename, expected_raw, expected_analyzed): clues = self._getClues(filename) self.failUnless(len(clues) >= expected_raw) analyzed = analysis.analyze(clues) analyzed = analysis.reanalyze(clues, analyzed, self.threshold) self.failUnlessEqual(len(analyzed), expected_analyzed) total_before = self._hits(clues) total_after = self._hits(analyzed) self.failUnlessEqual(total_before, total_after) def testSimple(self): self.analyze('agartha', 2, 1) def testSynnergy(self): self.analyze('www.synnergy.net', 2, 1) def testTripod(self): self.analyze('www.tripod.com', 9, 5) def testEbay(self): self.analyze('www.ebay.com', 2, 1) def testBarclays(self): self.analyze('www.barclays.es', 3, 2) def testSohu(self): self.analyze('www.sohu.com', 15, 2) def testDmoz(self): self.analyze('www.dmoz.org', 15, 3) def testExcite(self): self.analyze('email.excite.com', 30, 20) def testRegister(self): self.analyze('www.register.com', 20, 1) def testPricegrabber(self): self.analyze('www.pricegrabber.com', 20, 1) def testYesky(self): self.analyze('www.yesky.com', 20, 1) def testPogo(self): self.analyze('www.pogo.com', 20, 1) def testMacromedia(self): self.analyze('www.macromedia.com', 7, 4) def testAsk(self): self.analyze('www.ask.com', 3, 1) def testComcast(self): self.analyze('www.comcast.net', 5, 2) def testHotwired(self): self.analyze('hotwired.lycos.com', 6, 3) def testPassport(self): self.analyze('login.passport.net', 4, 2) def testCdrom(self): self.analyze('www.cdrom.com', 4, 2) if __name__ == '__main__': unittest.main() # vim: ts=4 sw=4 et halberd-0.2.4/tests/test_clientlib.py0000644000175000017500000001236611431512414016310 0ustar jmbrjmbr# -*- coding: iso-8859-1 -*- """Unit tests for Halberd.clientlib """ # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import unittest import urlparse import Halberd.clientlib as clientlib # TODO - Implement an HTTPServer so the test suite doesn't need to connect to # external hosts. # This HTTPServer must be bound only to localhost (for security reasons). # TODO - Think about something similar for HTTPS. class TestHTTPClient(unittest.TestCase): def setUp(self): self.client = clientlib.HTTPClient() def testGetHostAndPort(self): self.failUnlessEqual(self.client._getHostAndPort('localhost:8080'), ('localhost', 8080)) self.failUnlessEqual(self.client._getHostAndPort('localhost'), ('localhost', self.client.default_port)) self.assertRaises(clientlib.InvalidURL, self.client._getHostAndPort, 'localhost:abc') def testFillTemplate(self): def get_request(url): scheme, netloc, url, params, query, fragment = \ urlparse.urlparse(url) hostname, port = self.client._getHostAndPort(netloc) return self.client._fillTemplate(hostname, port, url, params, query, fragment) req = get_request('http://www.example.com:23/test?blop=777') self.failUnless(req.splitlines()[:2] == \ ['GET /test?blop=777 HTTP/1.1', 'Host: www.example.com:23']) req = get_request('http://www.example.com/test;blop?q=something') self.failUnless(req.splitlines()[:2] == \ ['GET /test;blop?q=something HTTP/1.1', 'Host: www.example.com']) req = get_request('http://localhost:8080') self.failUnless(req.splitlines()[0] == 'GET / HTTP/1.1') def testAntiCache(self): req = self.client._fillTemplate('localhost', 80, '/index.html') self.failUnless(req.splitlines()[2:4] == \ ['Pragma: no-cache', 'Cache-control: no-cache']) def testSendRequestSanityCheck(self): self.failUnlessRaises(clientlib.InvalidURL, self.client._putRequest, '127.0.0.1', 'gopher://blop') def testSendRequestToLocal(self): try: self.client._putRequest('127.0.0.1', 'http://localhost:8000') except clientlib.ConnectionRefused: return def testSendRequestToRemote(self): self.client._putRequest('66.35.250.203', 'http://www.sourceforge.net') timestamp, headers = self.client._getReply() self.failUnless(headers and headers.startswith('HTTP/')) def testGetHeaders(self): addr, url = '66.35.250.203', 'http://www.sourceforge.net' reply = self.client.getHeaders(addr, url) self.failUnless(reply != (None, None)) def testIncorrectReading(self): """Check for bug in _getReply (issue 60) Incorrect reading procedure in Halberd.clientlib.HTTPClient._getReply """ self.client.bufsize = 1 self.client.timeout = 10 addr, url = '127.0.0.1', 'http://localhost' self.client._putRequest(addr, url) try: timestamp, headers = self.client._getReply() except clientlib.TimedOut, msg: self.fail('Timed out while trying to read terminator') self.failUnless(headers) class TestHTTPSClient(unittest.TestCase): def setUp(self): self.client = clientlib.HTTPSClient() def testGetHostAndPort(self): self.failUnlessEqual(self.client._getHostAndPort('secure'), ('secure', self.client.default_port)) self.failUnlessEqual(self.client._getHostAndPort('secure:777'), ('secure', 777)) def testConnect(self): clientlib.HTTPSClient()._connect(('www.sourceforge.net', 443)) def testInvalidConnect(self): self.failUnlessRaises(clientlib.HTTPSError, clientlib.HTTPSClient()._connect, ('localhost', 80)) # XXX For better testing a keyfile and a certificate should be used. def testSendRequestToRemote(self): self.client._putRequest('66.35.250.203', 'https://www.sourceforge.net') timestamp, headers = self.client._getReply() self.failUnless(headers != None and headers.startswith('HTTP/')) if __name__ == '__main__': unittest.main() # vim: ts=4 sw=4 et halberd-0.2.4/setup.py0000755000175000017500000000745111431512414013304 0ustar jmbrjmbr#!/usr/bin/env python # -*- coding: iso-8859-1 -*- # Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import os D = os.path.join from distutils.core import setup, Command import Halberd.ScanTask from Halberd.version import version class test(Command): """Automated testing. Based upon: http://mail.python.org/pipermail/distutils-sig/2002-January/002714.html """ description = "test the distribution prior to install" user_options = [ ('test-dir=', None, "directory that contains the test definitions"), ] def initialize_options(self): self.test_dir = 'tests' def finalize_options(self): build = self.get_finalized_command('build') self.build_purelib = build.build_purelib self.build_platlib = build.build_platlib def run(self): import sys import unittest self.run_command('build') self.run_command('build_ext') # remember old sys.path to restore it afterwards old_path = sys.path[:] # extend sys.path sys.path.insert(0, self.build_purelib) sys.path.insert(0, self.build_platlib) sys.path.insert(0, D(os.getcwd(), self.test_dir)) modules = [test[:-3] for test in os.listdir(self.test_dir) \ if test.startswith('test_') and test.endswith('.py')] loader = unittest.TestLoader() runner = unittest.TextTestRunner(verbosity=2) for module in modules: print "Running tests found in '%s'..." % module TEST = __import__(module, globals(), locals(), []) suite = loader.loadTestsFromModule(TEST) runner.run(suite) # restore sys.path sys.path = old_path[:] long_description = \ r"""Halberd discovers HTTP load balancers. It is useful for web application security auditing and for load balancer configuration testing.""" # Trove classifiers. The complete list can be grabbed from: # http://www.python.org/pypi?:action=list_classifiers classifiers = """\ Development Status :: 4 - Beta Environment :: Console Intended Audience :: Developers Intended Audience :: Information Technology Intended Audience :: System Administrators License :: OSI Approved :: GNU General Public License (GPL) Natural Language :: English Operating System :: OS Independent Programming Language :: Python Topic :: Internet :: WWW/HTTP Topic :: Security """ setup( name = 'halberd', version = version.v_short, description = 'HTTP load balancer detector', long_description = long_description, author = 'Juan M. Bello Rivas', author_email = 'jmbr@superadditive.com', url = 'http://halberd.superadditive.com/', packages = ['Halberd', 'Halberd.clues'], package_dir = {'Halberd': 'Halberd'}, scripts = [D('scripts', 'halberd')], data_files = [(D('man', 'man1'), \ [D('man', 'man1', 'halberd.1')])], classifiers = classifiers.splitlines(), cmdclass = {'test': test}, ) # vim: ts=4 sw=4 et halberd-0.2.4/INSTALL0000644000175000017500000000132111431512414012606 0ustar jmbrjmbrInstallation ============ Prerequisites ------------- You need Python version 2.6 or above with the threading module enabled. If you want to scan using the HTTPS protocol, you will also need a Python interpreter configured with support for SSL sockets. Platforms --------- Halberd should work in every machine satisfying the prerequisites mentioned above. The program has been successfully built and tested on GNU/Linux, Windows 2000 and Mac OS X. Steps ----- Installing Halberd is a very simple task. It suffices to write (perhaps as root): python setup.py install It is recommended that you read the output of: python setup.py install --help in case you want to fine tune the installation process. halberd-0.2.4/doc/0000755000175000017500000000000011431513770012333 5ustar jmbrjmbrhalberd-0.2.4/doc/manual.tex0000644000175000017500000002764111431512414014336 0ustar jmbrjmbr\documentclass[a4paper]{book} \usepackage[english]{babel} \begin{document} \title{Halberd user's guide} \author{Juan M. Bello Rivas} \maketitle \tableofcontents % 1. Explain the problem being solved. % 2. Present the concepts, not just the features. % 3. Give'em more than they deserve. % 4. Make it enjoyable to read. \chapter{Introduction} Halberd discovers HTTP load balancers. It is useful for web application security auditing and for load balancer configuration testing. \section{Motivation} To cope with heavy traffic loads, web site administrators often install load balancer devices. These machines hide (possibly) many real web servers behind a virtual IP. They receive HTTP requests and redirect them to the real web servers in order to share the traffic between them. There are a few ways to map the servers behind the VIP and to reach them individually. Identifying and being able to reach all real servers individually (effectively bypassing the load balancer) is very important for an attacker trying to break into a site. It is often the case that there are configuration differences ranging from the slight: \begin{itemize} \item server software versions, \item server modules \end{itemize} to the extreme: \begin{itemize} \item different platforms \item server software. \end{itemize} For an attacker, this information is crucial because he might find vulnerable configurations that otherwise (without mapping the real servers) could have gone unnoticed. But someone trying to break into a web site doesn't have server software as its only target. He will try to subvert dynamic server pages in several ways. By identifying all the real servers and scanning them individually for vulnerabilities, he might find bugs affecting only one or a few of the web servers. Even if all machines are running the same server software, halberd can enumerate them allowing more thorough vulnerability scans on the application level. \chapter{Concepts} Halberd operates in stages: \begin{enumerate} \item Initially, it sends multiple requests to the target web server and records its responses. This is called the \emph{sampling phase}. The time to spend in this phase and the amount of HTTP requests to be sent can be specified using the \texttt{--time} and \texttt{--parallelism} command line options. See \ref{sec:time} and \ref{sec:parallelism}. \item After the analysis phase finishes, either normally or because the user interrupted it pressing Control-C, the program processes the replies looking for signs of load balancing. This is called the \emph{analysis phase}. \item Finally, halberd writes a report of its findings to the screen (or to a file if the \texttt{--out} option is used). \end{enumerate} The user may skip the scanning phase and proceed to the analysis of samplings saved in a previous session. See \ref{sec:read} and \ref{sec:write}. The following is a list of detection techniques currently implemented: \section{Date comparison} HTTP responses reveal the internal clock of the web server that produces them. By tracking how many different clocks appear to be, halberd can have some insight into the number of real servers. \section{MIME header field names, values and their order} Differences in fields appearing in server responses can allow halberd to narrow down its search. %\section{Special cookie usage} % %Some HTTP load balancers generate a cookie which will be used to direct future %requests to a certain server. Administrators can also set up cookies that will %be taken into account for deciding where to send requests. By selecting which %cookies to send, the program attempts to force the load balancer to route %requests to a server of our choice. \section{Generating high amounts of traffic} Under certain configurations, load balancers might start to distribute traffic only after a certain threshold has been reached. By default, halberd attempts to generate a sizable traffic volume to trigger this condition and reach as many real servers as possible. See \ref{sec:parallelism}. %Halberd can be used in a distributed fashion to generate heavy traffic loads. %Running the program in RPC server mode in computers located at several %networks lets the user launch distributed scans against a web site, forcing %the idle web servers to reveal themselves. \section{Using different URLs} An HTTP load balancer can be configured to take URLs into account when deciding where to direct requests. Gathering URLs with a spider and feeding them to halberd will make it more likely that all servers will reply at some point. See \ref{sec:urlfile}. \section{Detecting server-side caches} Halberd might come across web sites having server-side caches (e.g.: Squid). This kind of configuration is appropriately identified by halberd and in case there is more than one cache, the program can enumerate them. \section{Obtaining public IP addresses} Sometimes cookies or special MIME fields in server responses can reveal public IP addresses or host names. In these cases the load balancer can be bypassed connecting directly to the real servers. \chapter{Installation} \section{Prerequisites} You need Python version 2.3 or above with the threading and MD5 (or SHA1) modules enabled. If you want to scan through HTTPS you will also need a Python interpreter configured with support for SSL sockets. \section{Supported platforms} Halberd should work in every machine satisfying the prerequisites mentioned above. The program has been successfully built and tested on GNU/Linux, Windows 2000 and Mac OS X. \section{Installation steps} Installing halberd is a very simple task. It suffices to write (perhaps as root): \begin{verbatim} python setup.py install \end{verbatim} Halberd creates the file \verb|$HOME/.halberd/halberd.cfg| when you execute it for the first time. This file lets the user configure proxy settings and specify an SSL certificate. It is recommended that you read the output of \verb|python setup.py install --help| in case you want to fine tune the installation process. \chapter{Operation} This chapter explains how to use halberd in the real world. \section{Sample session} Peter is on the phone, talking to a client who asks him to perform a penetration test of a large web application that's just entered production. He impatiently taps his fingers against the wooden table waiting for the email from the client to arrive. Our hero is finally given the green light and he begins his dutiful work. First of all, a load balancer scanning is due. He fires up a virtual terminal and writes: \begin{verbatim} $ halberd http://www.target-company.com \end{verbatim} Peter reads the output of his previous incantation and nods... % XXX: use sed or whatever to update the version here. \begin{verbatim} halberd 0.2.0 INFO looking up host www.target-company.com... INFO host lookup done. INFO www.target-company.com resolves to x.x.x.1 INFO www.target-company.com resolves to x.x.x.2 x.x.x.1 [###### ] clues: 3 | replies: 17 | missed: 0 \end{verbatim} After 17 replies he thinks halberd has enough samples to analyze, so he hits Control-C and stops the scan for this host. Peter notes there is DNS load balancing in place so he'll have to check both IP addresses. \begin{verbatim} *** finished (received SIGINT) *** ============================================================= http://www.target-company.com (x.x.x.1): 1 real server(s) ============================================================= server 1: foo/1.2.3 mod_bar/4.2 (Unix) ------------------------------------------------------------- difference: 3600 seconds successful requests: 17 hits (100.00%) header fingerprint: c0ba8262100168851872c8feea3196f21ba2d732 different headers: 1. Server: foo/1.2.3 mod_bar/4.2 (Unix) \end{verbatim} "Nothing to see here, let's move along" muttered Peter while halberd progressed to the second IP address. \begin{verbatim} ============================================================ http://www.target-company.com (x.x.x.2): 2 real server(s) ============================================================ server 1: foo/1.2.2 mod_bar/4.2 (Unix) ------------------------------------------------------------ difference: 3600 seconds successful requests: 11 hits (33.33%) header fingerprint: 732deadbeef100168851872c8feea196f21ba2d2 different headers: 1. Date: Wed, 16 Aug 2006 22:47:04 GMT 2. Server: foo/1.2.2 mod_bar/4.2 (Unix) server 2: foo/1.2.3 mod_bar/4.2 (Unix) ------------------------------------------------------------ difference: 3662 seconds successful requests: 23 hits (66.66%) header fingerprint: ad2d33a88f259b434c094a7b1172f5697a35cff4 different headers: 1. Date: Wed, 16 Aug 2006 22:46:02 GMT 2. Server: foo/1.2.3 mod_bar/4.2 (Unix) \end{verbatim} "Aha!" shouts our fellow, almost falling off his chair. "Not only did they forget to set up NTP on those servers, letting me distinguish them easily, they also have different \emph{server versions}!" His eyes go blank while he runs a search in his brain cell based, caffeine fueled vulnerability database for the terms foo/1.2.2 mod\_bar/4.2. He remembers there was a recent exploit for a buffer overflow in that version of foo. Peter has to take into account that the vulnerable web server gets one third of the traffic (33.33\%). Thus, he executes the exploit enough times to make sure it hits the exposed target and he finally breaks into the machine. "I'm so lucky to have this wonderful load balancer detector in my toolkit! This vulnerability could have easily been skipped." Our hero begins to walk in circles around the room, planning the next stages of his assault and anticipating with apprehension the time when he'll have to write the report for his client. \section{Command line options} \subsection{\texttt{--version}} Shows the program's version number. \subsection{\texttt{-h, --help}} Shows a help message describing every option. \subsection{\texttt{-q, --quiet}} Runs quietly, limiting the amount of information being displayed while the program runs. \subsection{\texttt{-d, --debug}} Enables debugging information. This can be useful if you want halberd to dump all the HTTP headers or if you're debugging the tool itself. \subsection{\texttt{-t NUM, --time=NUM}} \label{sec:time} Stops halberd after NUM seconds have passed since the beginning of the sampling phase. \subsection{\texttt{-p NUM, --parallelism=NUM}} \label{sec:parallelism} Specifies the number of parallel threads to use for network operations. This can increase the amount of requests per second during the sampling phase. \subsection{\texttt{-u FILE, --urlfile=FILE}} \label{sec:urlfile} Read URLs from FILE. FILE is a text archive containing an URL in each line. halberd will scan these URLs one by one. \subsection{\texttt{-o FILE, --out=FILE}} Writes the human-readable results from the analysis phase to FILE. \subsection{\texttt{-a ADDR, --address=ADDR}} Specifies the target by its IP address. \subsection{\texttt{-r FILE, --read=FILE}} \label{sec:read} Loads and analyzes \emph{clues} from FILE. \emph{Clues} are what halberd uses to figure out (during the analysis phase) whether there is a load balancer or not. These \emph{clues} can be written (un-analyzed) to a file with the \texttt{--write} option for comparison with future scans or other purposes. \subsection{\texttt{-w DIR, --write=DIR}} \label{sec:write} Saves \emph{clues} to the specified directory. If it doesn't exist, it will be created. For being portable, \emph{clues} are stored in text files contained in a special directory layout. The following is an example of the generated file hierarchy: \begin{verbatim} http___www_target_company_com/ http___www_target_company_com/x_x_x_1.clu http___www_target_company_com/x_x_x_2.clu \end{verbatim} \subsection{\texttt{--config=FILE}} Tells halberd to use the configuration stored in FILE instead of the default \verb|halberd.cfg|. \chapter{Support} Suggestions, bug reports and patches can be emailed to the author at \texttt{jmbr@superadditive.com} \end{document} % vim: ts=2 sw=2 et ft=tex halberd-0.2.4/doc/GNUmakefile0000644000175000017500000000220711431512414014400 0ustar jmbrjmbr# Copyright (C) 2004, 2005, 2006, 2010 Juan M. Bello Rivas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA LATEX := /usr/bin/latex DVIPDF := /usr/bin/dvipdf LATEX2HTML := /usr/bin/latex2html manual.pdf: manual.dvi $(DVIPDF) $^ $@ manual.dvi: manual.tex $(LATEX) $^ $(LATEX) $^ manual: manual.tex rm -rf manual $(LATEX2HTML) -local_icons -dir $@ -mkdir $^ .PHONY: clean clobber clean: rm -rf manual rm -f *.aux *.log *.dvi *.toc *.pdf clobber: clean rm -f *~ *.bak halberd-0.2.4/doc/api/0000755000175000017500000000000011431513770013104 5ustar jmbrjmbrhalberd-0.2.4/doc/api/toc-Halberd.logger-module.html0000644000175000017500000000317311431512607020661 0ustar jmbrjmbr logger

Module logger


Functions

getLogger
setDebug
setError

Variables

__package__

[hide private] halberd-0.2.4/doc/api/Halberd.clientlib.UnknownReply-class.html0000644000175000017500000001616511431512607023063 0ustar jmbrjmbr Halberd.clientlib.UnknownReply
Package Halberd :: Module clientlib :: Class UnknownReply
[hide private]
[frames] | no frames]

Class UnknownReply

source code

              object --+            
                       |            
exceptions.BaseException --+        
                           |        
        exceptions.Exception --+    
                               |    
                       HTTPError --+
                                   |
                                  UnknownReply

The remote host didn't return an HTTP reply

Instance Methods [hide private]

Inherited from HTTPError: __deepcopy__, __init__, __str__

Inherited from exceptions.Exception: __new__

Inherited from exceptions.BaseException: __delattr__, __getattribute__, __getitem__, __getslice__, __reduce__, __repr__, __setattr__, __setstate__, __unicode__

Inherited from object: __format__, __hash__, __reduce_ex__, __sizeof__, __subclasshook__

Properties [hide private]

Inherited from exceptions.BaseException: args, message

Inherited from object: __class__

halberd-0.2.4/doc/api/Halberd.clues.Clue-module.html0000644000175000017500000001365311431512607020625 0ustar jmbrjmbr Halberd.clues.Clue
Package Halberd :: Package clues :: Module Clue
[hide private]
[frames] | no frames]

Module Clue

source code

Clue generation module.

Clues are pieces of information obtained from the responses sent by a webserver. Their importance comes from the fact that they're the datastructure we use to detect real servers behind HTTP load balancer devices.

Classes [hide private]
  Clue
A clue is what we use to tell real servers behind a virtual IP.
Variables [hide private]
  __package__ = 'Halberd.clues'
halberd-0.2.4/doc/api/toc-Halberd.clues.file-module.html0000644000175000017500000000277711431512607021444 0ustar jmbrjmbr file

Module file


Classes

ClueDir
InvalidFile

Functions

load
save

Variables

__package__

[hide private] halberd-0.2.4/doc/api/redirect.html0000644000175000017500000000403211431512610015562 0ustar jmbrjmbrEpydoc Redirect Page

Epydoc Auto-redirect page

When javascript is enabled, this page will redirect URLs of the form redirect.html#dotted.name to the documentation for the object with the given fully-qualified dotted name.

 

halberd-0.2.4/doc/api/Halberd.clues.analysis-module.html0000644000175000017500000011527711431512607021565 0ustar jmbrjmbr Halberd.clues.analysis
Package Halberd :: Package clues :: Module analysis
[hide private]
[frames] | no frames]

Module analysis

source code

Utilities for clue analysis.

Functions [hide private]
list
diff_fields(clues)
Study differences between fields.
source code
 
ignore_changing_fields(clues)
Tries to detect and ignore MIME fields with ever changing content.
source code
str
get_digest(clue)
Returns the specified clue's digest.
source code
tuple
clusters(clues, step=3)
Finds clusters of clues.
source code
Clue
merge(clues)
Merges a sequence of clues into one.
source code
dict
classify(seq, *classifiers)
Classify a sequence according to one or several criteria.
source code
list
sections(classified, sects=None)
Returns sections (and their items) from a nested dict.
source code
list
deltas(xs)
Computes the differences between the elements of a sequence of integers.
source code
list of slice
slices(start, xs)
Returns slices of a given sequence separated by the specified indices.
source code
 
sort_clues(clues)
Sorts clues according to their time difference.
source code
list
filter_proxies(clues, maxdelta=3)
Detect and merge clues pointing to a proxy cache on the remote end.
source code
list
uniq(clues)
Return a list of unique clues.
source code
int
hits(clues)
Compute the total number of hits in a sequence of clues.
source code
list
analyze(clues)
Draw conclusions from the clues obtained during the scanning phase.
source code
 
reanalyze(clues, analyzed, threshold)
Identify and ignore changing header fields.
source code
 
_test() source code
Variables [hide private]
  logger = Halberd.logger.getLogger()
  __package__ = 'Halberd.clues'
Function Details [hide private]

diff_fields(clues)

source code 

Study differences between fields.

Parameters:
  • clues (list) - Clues to analyze.
Returns: list
Fields which were found to be different among the analyzed clues.

ignore_changing_fields(clues)

source code 

Tries to detect and ignore MIME fields with ever changing content.

Some servers might include fields varying with time, randomly, etc. Those fields are likely to alter the clue's digest and interfer with analyze, producing many false positives and making the scan useless. This function detects those fields and recalculates each clue's digest so they can be safely analyzed again.

Parameters:
  • clues (list or tuple) - Sequence of clues.

get_digest(clue)

source code 

Returns the specified clue's digest.

This function is usually passed as a parameter for classify so it can separate clues according to their digest (among other fields).

Returns: str
The digest of a clue's parsed headers.

clusters(clues, step=3)

source code 

Finds clusters of clues.

A cluster is a group of at most step clues which only differ in 1 seconds between each other.

Parameters:
  • clues (list or tuple) - A sequence of clues to analyze
  • step (int) - Maximum difference between the time differences of the cluster's clues.
Returns: tuple
A sequence with merged clusters.

merge(clues)

source code 

Merges a sequence of clues into one.

A new clue will store the total count of the clues.

Note that each Clue has a starting count of 1

>>> a, b, c = Clue(), Clue(), Clue()
>>> sum([x.getCount() for x in [a, b, c]])
3
>>> a.incCount(5), b.incCount(11), c.incCount(23)
(None, None, None)
>>> merged = merge((a, b, c))
>>> merged.getCount()
42
>>> merged == a
True
Parameters:
  • clues (list or tuple) - A sequence containing all the clues to merge into one.
Returns: Clue
The result of merging all the passed clues into one.

classify(seq, *classifiers)

source code 

Classify a sequence according to one or several criteria.

We store each item into a nested dictionary using the classifiers as key generators (all of them must be callable objects).

In the following example we classify a list of clues according to their digest and their time difference.

>>> a, b, c = Clue(), Clue(), Clue()
>>> a.diff, b.diff, c.diff = 1, 2, 2
>>> a.info['digest'] = 'x'
>>> b.info['digest'] = c.info['digest'] = 'y'
>>> get_diff = lambda x: x.diff
>>> classified = classify([a, b, c], get_digest, get_diff)
>>> digests = classified.keys()
>>> digests.sort()  # We sort these so doctest won't fail.
>>> for digest in digests:
...     print digest
...     for diff in classified[digest].keys():
...         print ' ', diff
...         for clue in classified[digest][diff]:
...             if clue is a: print '    a'
...             elif clue is b: print '    b'
...             elif clue is c: print '    c'
...
x
  1
    a
y
  2
    b
    c
Parameters:
  • seq (list or tuple) - A sequence to classify.
  • classifiers (list or tuple) - A sequence of callables which return specific fields of the items contained in seq
Returns: dict
A nested dictionary in which the keys are the fields obtained by applying the classifiers to the items in the specified sequence.

sections(classified, sects=None)

source code 

Returns sections (and their items) from a nested dict.

See also: classify

Parameters:
  • classified (dict) - Nested dictionary.
  • sects (list) - List of results. It should not be specified by the user.
Returns: list
A list of lists in where each item is a subsection of a nested dictionary.

deltas(xs)

source code 

Computes the differences between the elements of a sequence of integers.

>>> deltas([-1, 0, 1])
[1, 1]
>>> deltas([1, 1, 2, 3, 5, 8, 13])
[0, 1, 1, 2, 3, 5]
Parameters:
  • xs (list) - A sequence of integers.
Returns: list
A list of differences between consecutive elements of xs.

slices(start, xs)

source code 

Returns slices of a given sequence separated by the specified indices.

If we wanted to get the slices necessary to split range(20) in sub-sequences of 5 items each we'd do:

>>> seq = range(20) 
>>> indices = [5, 10, 15]
>>> for piece in slices(0, indices):
...     print seq[piece]
[0, 1, 2, 3, 4]
[5, 6, 7, 8, 9]
[10, 11, 12, 13, 14]
[15, 16, 17, 18, 19]
Parameters:
  • start (int.) - Index of the first element of the sequence we want to partition.
  • xs (list) - Sequence of indexes where 'cuts' must be made.
Returns: list of slice
A sequence of slice objects suitable for splitting a list as specified.

filter_proxies(clues, maxdelta=3)

source code 

Detect and merge clues pointing to a proxy cache on the remote end.

Parameters:
  • clues (list) - Sequence of clues to analyze
  • maxdelta (int) - Maximum difference allowed between a clue's time difference and the previous one.
Returns: list
Sequence where all irrelevant clues pointing out to proxy caches have been filtered out.

uniq(clues)

source code 

Return a list of unique clues.

This is needed when merging clues coming from different sources. Clues with the same time diff and digest are not discarded, they are merged into one clue with the aggregated number of hits.

Parameters:
  • clues (list) - A sequence containing the clues to analyze.
Returns: list
Filtered sequence of clues where no clue has the same digest and time difference.

hits(clues)

source code 

Compute the total number of hits in a sequence of clues.

Parameters:
  • clues (list) - Sequence of clues.
Returns: int
Total hits.

analyze(clues)

source code 

Draw conclusions from the clues obtained during the scanning phase.

Parameters:
  • clues (list) - Unprocessed clues obtained during the scanning stage.
Returns: list
Coherent list of clues identifying real web servers.

reanalyze(clues, analyzed, threshold)

source code 

Identify and ignore changing header fields.

After initial analysis one must check that there aren't as many realservers as obtained clues. If there were it could be a sign of something wrong happening: each clue is different from the others due to one or more MIME header fields which change unexpectedly.

Parameters:
  • clues (list) - Raw sequence of clues.
  • analyzed (list) - Result from the first analysis phase.
  • threshold (float) - Minimum clue-to-realserver ratio in order to trigger field inspection.

halberd-0.2.4/doc/api/toc-Halberd.reportlib-module.html0000644000175000017500000000231711431512607021403 0ustar jmbrjmbr reportlib

Module reportlib


Functions

report

Variables

__package__

[hide private] halberd-0.2.4/doc/api/Halberd.shell.UniScanStrategy-class.html0000644000175000017500000002135611431512607022633 0ustar jmbrjmbr Halberd.shell.UniScanStrategy
Package Halberd :: Module shell :: Class UniScanStrategy
[hide private]
[frames] | no frames]

Class UniScanStrategy

source code

BaseStrategy --+
               |
              UniScanStrategy

Scan a single URL.

Instance Methods [hide private]
 
__init__(self, scantask) source code
 
execute(self)
Scans, analyzes and presents results coming a single target.
source code

Inherited from BaseStrategy (private): _analyze, _scan

Method Details [hide private]

__init__(self, scantask)
(Constructor)

source code 
Overrides: BaseStrategy.__init__

execute(self)

source code 

Scans, analyzes and presents results coming a single target.

Overrides: BaseStrategy.execute

halberd-0.2.4/doc/api/module-tree.html0000644000175000017500000001323111431512607016212 0ustar jmbrjmbr Module Hierarchy
 
[hide private]
[frames] | no frames]
[ Module Hierarchy | Class Hierarchy ]

Module Hierarchy

halberd-0.2.4/doc/api/Halberd.crew.Scanner-class.html0000644000175000017500000003126511431512607020772 0ustar jmbrjmbr Halberd.crew.Scanner
Package Halberd :: Module crew :: Class Scanner
[hide private]
[frames] | no frames]

Class Scanner

source code

        object --+            
                 |            
threading._Verbose --+        
                     |        
      threading.Thread --+    
                         |    
               BaseScanner --+
                             |
                            Scanner

Scans the target host from the local machine.

Instance Methods [hide private]
 
process(self)
Gathers clues connecting directly to the target web server.
source code
Clue
makeClue(self, timestamp, headers)
Compose a clue object.
source code

Inherited from BaseScanner: __init__, hasExpired, remaining, run, setTimeout

Inherited from threading.Thread: __repr__, getName, isAlive, isDaemon, is_alive, join, setDaemon, setName, start

Inherited from threading.Thread (private): _set_daemon, _set_ident

Inherited from threading._Verbose (private): _note

Inherited from object: __delattr__, __format__, __getattribute__, __hash__, __new__, __reduce__, __reduce_ex__, __setattr__, __sizeof__, __str__, __subclasshook__

Instance Variables [hide private]

Inherited from BaseScanner: timeout

Properties [hide private]

Inherited from threading.Thread: daemon, ident, name

Inherited from object: __class__

Method Details [hide private]

process(self)

source code 

Gathers clues connecting directly to the target web server.

Overrides: BaseScanner.process

makeClue(self, timestamp, headers)

source code 

Compose a clue object.

Parameters:
  • timestamp (float) - Time when the reply was received.
  • headers (str) - MIME headers coming from an HTTP response.
Returns: Clue
A valid clue

halberd-0.2.4/doc/api/Halberd.reportlib-module.html0000644000175000017500000001374611431512607020630 0ustar jmbrjmbr Halberd.reportlib
Package Halberd :: Module reportlib
[hide private]
[frames] | no frames]

Module reportlib

source code

Output module.

Functions [hide private]
 
report(scantask)
Displays detailed report information to the user.
source code
Variables [hide private]
  __package__ = 'Halberd'
halberd-0.2.4/doc/api/crarr.png0000644000175000017500000000052411431512607014722 0ustar jmbrjmbr‰PNG  IHDR e¢E,tEXtCreation TimeTue 22 Aug 2006 00:43:10 -0500` XtIMEÖ)Ó}Ö pHYsÂÂnÐu>gAMA± üaEPLTEÿÿÿÍð×ÏÀ€f4sW áÛЊrD`@bCÜÕÈéäÜ–X{`,¯Ÿ€lN‡o@õóðª™xdEðí螊dÐÆ´”~TÖwÅvtRNS@æØfMIDATxÚc`@¼ì¼0&+š—Šˆ°»(’ˆ€ ;; /ðEXùØ‘?Ð n ƒª†— b;'ª+˜˜YÐ#œ(r<£"IEND®B`‚halberd-0.2.4/doc/api/Halberd.clues.file.InvalidFile-class.html0000644000175000017500000002445411431512607022662 0ustar jmbrjmbr Halberd.clues.file.InvalidFile
Halberd :: clues :: file :: InvalidFile :: Class InvalidFile
[hide private]
[frames] | no frames]

Class InvalidFile

source code

              object --+        
                       |        
exceptions.BaseException --+    
                           |    
        exceptions.Exception --+
                               |
                              InvalidFile

The loaded file is not a valid clue file.

Instance Methods [hide private]
 
__init__(self, msg)
x.__init__(...) initializes x; see x.__class__.__doc__ for signature
source code
 
__str__(self)
str(x)
source code

Inherited from exceptions.Exception: __new__

Inherited from exceptions.BaseException: __delattr__, __getattribute__, __getitem__, __getslice__, __reduce__, __repr__, __setattr__, __setstate__, __unicode__

Inherited from object: __format__, __hash__, __reduce_ex__, __sizeof__, __subclasshook__

Properties [hide private]

Inherited from exceptions.BaseException: args, message

Inherited from object: __class__

Method Details [hide private]

__init__(self, msg)
(Constructor)

source code 

x.__init__(...) initializes x; see x.__class__.__doc__ for signature

Overrides: object.__init__
(inherited documentation)

__str__(self)
(Informal representation operator)

source code 

str(x)

Overrides: object.__str__
(inherited documentation)

halberd-0.2.4/doc/api/Halberd.shell.ClueReaderStrategy-class.html0000644000175000017500000002143511431512607023304 0ustar jmbrjmbr Halberd.shell.ClueReaderStrategy
Package Halberd :: Module shell :: Class ClueReaderStrategy
[hide private]
[frames] | no frames]

Class ClueReaderStrategy

source code

BaseStrategy --+
               |
              ClueReaderStrategy

Clue reader strategy.

Works by reading and analyzing files of previously stored clues.

Instance Methods [hide private]
 
__init__(self, scantask) source code
 
execute(self)
Reads and interprets clues.
source code

Inherited from BaseStrategy (private): _analyze, _scan

Method Details [hide private]

__init__(self, scantask)
(Constructor)

source code 
Overrides: BaseStrategy.__init__

execute(self)

source code 

Reads and interprets clues.

Overrides: BaseStrategy.execute

halberd-0.2.4/doc/api/toc-Halberd.clues.analysis-module.html0000644000175000017500000000553211431512607022340 0ustar jmbrjmbr analysis

Module analysis


Functions

analyze
classify
clusters
deltas
diff_fields
filter_proxies
get_digest
hits
ignore_changing_fields
merge
reanalyze
sections
slices
sort_clues
uniq

Variables

__package__
logger

[hide private] halberd-0.2.4/doc/api/Halberd.logger-pysrc.html0000644000175000017500000004054411431512610017746 0ustar jmbrjmbr Halberd.logger
Package Halberd :: Module logger
[hide private]
[frames] | no frames]

Source Code for Module Halberd.logger

 1  # -*- coding: iso-8859-1 -*- 
 2   
 3  """Logger singleton. 
 4   
 5  This module allows halberd to easily log certain events. 
 6  """ 
 7   
 8  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
 9  # 
10  # This program is free software; you can redistribute it and/or modify 
11  # it under the terms of the GNU General Public License as published by 
12  # the Free Software Foundation; either version 2 of the License, or 
13  # (at your option) any later version. 
14  # 
15  # This program is distributed in the hope that it will be useful, 
16  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
17  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
18  # GNU General Public License for more details. 
19  # 
20  # You should have received a copy of the GNU General Public License 
21  # along with this program; if not, write to the Free Software 
22  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
23   
24   
25  import sys 
26  import logging 
27   
28   
29  _logger = None 
30   
31  #_logfmt = '%(name)s %(thread)d %(asctime)s %(levelname)s %(message)s' 
32  _logfmt = '%(levelname)s %(message)s' 
33   
34   
35 -def getLogger():
36 """Get a reference to an instance of a logger object. 37 38 @return: reference to a logger. 39 @rtype: C{object} 40 """ 41 global _logger 42 43 if _logger is None: 44 _logger = logging.getLogger('Halberd') 45 handler = logging.StreamHandler(sys.stdout) 46 handler.setFormatter(logging.Formatter(_logfmt)) 47 _logger.addHandler(handler) 48 _logger.setLevel(logging.INFO) 49 50 return _logger
51
52 -def setDebug():
53 """Set the logging level to C{debug}. 54 """ 55 logger = getLogger() 56 logger.setLevel(logging.DEBUG)
57
58 -def setError():
59 """Set the logging level to C{error}. 60 """ 61 logger = getLogger() 62 logger.setLevel(logging.ERROR)
63 64 65 # vim: ts=4 sw=4 et 66

halberd-0.2.4/doc/api/Halberd.shell-pysrc.html0000644000175000017500000021277211431512607017610 0ustar jmbrjmbr Halberd.shell
Package Halberd :: Module shell
[hide private]
[frames] | no frames]

Source Code for Module Halberd.shell

  1  # -*- coding: iso-8859-1 -*- 
  2   
  3  """Provides scanning patterns to be used as building blocks for more complex 
  4  scans. 
  5   
  6  Strategies are different ways in which target scans may be done. We provide 
  7  basic functionality so more complex stuff can be built upon this. 
  8  """ 
  9   
 10  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
 11  # 
 12  # This program is free software; you can redistribute it and/or modify 
 13  # it under the terms of the GNU General Public License as published by 
 14  # the Free Software Foundation; either version 2 of the License, or 
 15  # (at your option) any later version. 
 16  # 
 17  # This program is distributed in the hope that it will be useful, 
 18  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 19  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 20  # GNU General Public License for more details. 
 21  # 
 22  # You should have received a copy of the GNU General Public License 
 23  # along with this program; if not, write to the Free Software 
 24  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 25   
 26   
 27  import Halberd.crew 
 28  import Halberd.logger 
 29  import Halberd.reportlib 
 30  import Halberd.clues.file 
 31  import Halberd.clues.analysis as analysis 
 32   
 33   
34 -class ScanError(Exception):
35 """Generic error during scanning. 36 """
37 - def __init__(self, msg):
38 self.msg = msg
39
40 - def __str__(self):
41 return str(self.msg)
42 43
44 -class BaseStrategy:
45 """Defines the strategy used to scan. 46 47 A strategy is a certain way to use the program. Theses can be layered to 48 build a bigger strategy doing more complex things, etc. 49 """
50 - def __init__(self, scantask):
51 self.task = scantask 52 self.logger = Halberd.logger.getLogger()
53
54 - def execute(self):
55 """Executes the strategy. 56 """ 57 pass
58 59 # --------------------------- 60 # Higher-level helper methods 61 # --------------------------- 62
63 - def _scan(self):
64 """Allocates a work crew of scanners and launches them on the target. 65 """ 66 assert self.task.url and self.task.addr 67 68 self.task.clues = [] 69 self.task.analyzed = [] 70 crew = Halberd.crew.WorkCrew(self.task) 71 self.task.clues = crew.scan()
72
73 - def _analyze(self):
74 """Performs clue analysis. 75 """ 76 if len(self.task.clues) == 0: 77 return 78 79 self.task.analyzed = analysis.analyze(self.task.clues) 80 self.task.analyzed = analysis.reanalyze(self.task.clues, 81 self.task.analyzed, self.task.ratio_threshold)
82
83 -class UniScanStrategy(BaseStrategy):
84 """Scan a single URL. 85 """
86 - def __init__(self, scantask):
87 BaseStrategy.__init__(self, scantask) 88 89 if not self.task.url: 90 raise ScanError, 'Didn\'t provide an URL to scan' 91 92 if self.task.addr: 93 # The user passed a specific address as a parameter. 94 self.addrs = [self.task.addr] 95 else: 96 host = Halberd.util.hostname(self.task.url) 97 self.logger.info('looking up host %s... ', host) 98 99 try: 100 self.addrs = Halberd.util.addresses(host) 101 except KeyboardInterrupt: 102 raise ScanError, 'interrupted by the user' 103 104 if not self.addrs: 105 raise ScanError, 'unable to resolve %s' % host 106 107 self.addrs.sort() 108 self.logger.info('host lookup done.') 109 110 if len(self.addrs) > 1: 111 for addr in self.addrs: 112 #self.logger.debug('%s resolves to %s', host, addr) 113 self.logger.info('%s resolves to %s', host, addr)
114
115 - def execute(self):
116 """Scans, analyzes and presents results coming a single target. 117 """ 118 if self.task.save: 119 cluedir = Halberd.clues.file.ClueDir(self.task.save) 120 121 for self.task.addr in self.addrs: 122 self._scan() 123 124 self._analyze() 125 Halberd.reportlib.report(self.task) 126 127 if self.task.save: 128 cluedir.save(self.task.url, 129 self.task.addr, 130 self.task.clues)
131
132 -class MultiScanStrategy(BaseStrategy):
133 """Scan multiple URLs. 134 """
135 - def __init__(self, scantask):
136 BaseStrategy.__init__(self, scantask) 137 138 if not self.task.urlfile: 139 raise ScanError, 'An urlfile parameter must be provided' 140 141 self.urlfp = open(self.task.urlfile, 'r')
142
143 - def _targets(self, urlfp):
144 """Obtain target addresses from URLs. 145 146 @param urlfp: File where the list of URLs is stored. 147 @type urlfp: C{file} 148 149 @return: Generator providing the desired addresses. 150 """ 151 for url in urlfp: 152 if url == '\n': 153 continue 154 155 # Strip end of line character and whitespaces. 156 url = url[:-1].strip() 157 158 host = Halberd.util.hostname(url) 159 if not host: 160 self.logger.warn('unable to extract hostname from %s', host) 161 continue 162 163 self.logger.info('looking up host %s... ', host) 164 try: 165 addrs = Halberd.util.addresses(host) 166 except KeyboardInterrupt: 167 raise ScanError, 'interrupted by the user' 168 self.logger.info('host lookup done.') 169 170 for addr in addrs: 171 yield (url, addr)
172
173 - def execute(self):
174 """Launch a multiple URL scan. 175 """ 176 cluedir = Halberd.clues.file.ClueDir(self.task.save) 177 178 for url, addr in self._targets(self.urlfp): 179 self.task.url = url 180 self.task.addr = addr 181 self.logger.info('scanning %s (%s)', url, addr) 182 self._scan() 183 184 cluedir.save(url, addr, self.task.clues) 185 186 self._analyze() 187 188 Halberd.reportlib.report(self.task)
189
190 -class ClueReaderStrategy(BaseStrategy):
191 """Clue reader strategy. 192 193 Works by reading and analyzing files of previously stored clues. 194 """
195 - def __init__(self, scantask):
196 BaseStrategy.__init__(self, scantask)
197
198 - def execute(self):
199 """Reads and interprets clues. 200 """ 201 self.task.clues = Halberd.clues.file.load(self.task.cluefile) 202 self._analyze() 203 self.task.url = self.task.cluefile 204 Halberd.reportlib.report(self.task)
205 206 207 # vim: ts=4 sw=4 et 208

halberd-0.2.4/doc/api/Halberd.ScanTask-module.html0000644000175000017500000002053111431512607020323 0ustar jmbrjmbr Halberd.ScanTask
Package Halberd :: Module ScanTask
[hide private]
[frames] | no frames]

Module ScanTask

source code

Scanning tasks.

Classes [hide private]
  ConfError
Error with configuration file(s)
  ScanTask
Describes the way a scan should be performed.
Variables [hide private]
int default_scantime = 15
Time to spend probing the target expressed in seconds.
int default_parallelism = 4
Number of parallel threads to launch for the scan.
str default_conf_dir = '/home/jmbr/.halberd'
Path to the directory where the configuration file is located.
str default_conf_file = '/home/jmbr/.halberd/halberd.cfg'
Name of the default configuration file for halberd.
float default_ratio_threshold = 0.6
Minimum clues-to-realservers ratio to trigger a clue reanalysis.
str default_out = None
Default place where to write reports (None means stdout).
  __package__ = 'Halberd'
halberd-0.2.4/doc/api/Halberd.shell.ScanError-class.html0000644000175000017500000002443011431512607021442 0ustar jmbrjmbr Halberd.shell.ScanError
Package Halberd :: Module shell :: Class ScanError
[hide private]
[frames] | no frames]

Class ScanError

source code

              object --+        
                       |        
exceptions.BaseException --+    
                           |    
        exceptions.Exception --+
                               |
                              ScanError

Generic error during scanning.

Instance Methods [hide private]
 
__init__(self, msg)
x.__init__(...) initializes x; see x.__class__.__doc__ for signature
source code
 
__str__(self)
str(x)
source code

Inherited from exceptions.Exception: __new__

Inherited from exceptions.BaseException: __delattr__, __getattribute__, __getitem__, __getslice__, __reduce__, __repr__, __setattr__, __setstate__, __unicode__

Inherited from object: __format__, __hash__, __reduce_ex__, __sizeof__, __subclasshook__

Properties [hide private]

Inherited from exceptions.BaseException: args, message

Inherited from object: __class__

Method Details [hide private]

__init__(self, msg)
(Constructor)

source code 

x.__init__(...) initializes x; see x.__class__.__doc__ for signature

Overrides: object.__init__
(inherited documentation)

__str__(self)
(Informal representation operator)

source code 

str(x)

Overrides: object.__str__
(inherited documentation)

halberd-0.2.4/doc/api/frames.html0000644000175000017500000000111511431512607015243 0ustar jmbrjmbr API Documentation halberd-0.2.4/doc/api/Halberd.clientlib-module.html0000644000175000017500000003451411431512607020567 0ustar jmbrjmbr Halberd.clientlib
Package Halberd :: Module clientlib
[hide private]
[frames] | no frames]

Module clientlib

source code

HTTP/HTTPS client module.

Classes [hide private]
  HTTPError
Generic HTTP exception
  HTTPSError
Generic HTTPS exception
  InvalidURL
Invalid or unsupported URL
  TimedOut
Operation timed out
  ConnectionRefused
Unable to reach webserver
  UnknownReply
The remote host didn't return an HTTP reply
  HTTPClient
Special-purpose HTTP client.
  HTTPSClient
Special-purpose HTTPS client.
Functions [hide private]
class
clientFactory(scantask)
HTTP/HTTPS client factory.
source code
Variables [hide private]
float default_timeout = 2
Default timeout for socket operations.
int default_bufsize = 1024
Default number of bytes to try to read from the network.
str default_template = 'GET %(request)s HTTP/1.1\r\nHost: %(hostna...
Request template, must be filled by HTTPClient
  __package__ = 'Halberd'
Function Details [hide private]

clientFactory(scantask)

source code 

HTTP/HTTPS client factory.

Parameters:
  • scantask (instanceof(ScanTask)) - Object describing where the target is and how to reach it.
Returns: class
The appropriate client class for the specified URL.

Variables Details [hide private]

default_template

Request template, must be filled by HTTPClient
Type:
str
Value:
'''GET %(request)s HTTP/1.1\r
Host: %(hostname)s%(port)s\r
Pragma: no-cache\r
Cache-control: no-cache\r
User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.7) Gecko/20\
050414 Firefox/1.0.3\r
Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, applicati\
on/x-shockwave-flash, */*\r
...

halberd-0.2.4/doc/api/Halberd.clues.analysis-pysrc.html0000644000175000017500000034123011431512610021420 0ustar jmbrjmbr Halberd.clues.analysis
Package Halberd :: Package clues :: Module analysis
[hide private]
[frames] | no frames]

Source Code for Module Halberd.clues.analysis

  1  # -*- coding: iso-8859-1 -*- 
  2   
  3  """Utilities for clue analysis. 
  4  """ 
  5   
  6  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22   
 23  import copy 
 24   
 25  import Halberd.logger 
 26   
 27   
 28  logger = Halberd.logger.getLogger() 
 29   
 30   
 31  # TODO - Test fuzzy clustering and k-means against this naive hierarchical 
 32  # clustering algorithm to see which one performs better (there's a k-means 
 33  # implementation in Scipy). 
 34  # Fuzzy clustering will probably be better as it can output a degree of 
 35  # confidence which might be helpful to halberd's users. 
 36   
 37  # XXX - In python 2.4 there's itertools.groupby() which replaces the idiomatic 
 38  # dictionary uses for grouping things together. 
 39   
40 -def diff_fields(clues):
41 """Study differences between fields. 42 43 @param clues: Clues to analyze. 44 @type clues: C{list} 45 46 @return: Fields which were found to be different among the analyzed clues. 47 @rtype: C{list} 48 """ 49 def pairs(num): 50 for i in xrange(num): 51 for j in xrange(num): 52 if i == j: 53 continue 54 yield (i, j)
55 56 import difflib 57 58 different = [] 59 for i, j in pairs(len(clues)): 60 one, other = clues[i].headers, clues[j].headers 61 matcher = difflib.SequenceMatcher(None, one, other) 62 63 for tag, alo, ahi, blo, bhi in matcher.get_opcodes(): 64 if tag == 'equal': 65 continue 66 67 for name, value in one[alo:ahi] + other[blo:bhi]: 68 different.append(name) 69 70 different.sort() 71 different.reverse() 72 73 return different 74
75 -def ignore_changing_fields(clues):
76 """Tries to detect and ignore MIME fields with ever changing content. 77 78 Some servers might include fields varying with time, randomly, etc. Those 79 fields are likely to alter the clue's digest and interfer with L{analyze}, 80 producing many false positives and making the scan useless. This function 81 detects those fields and recalculates each clue's digest so they can be 82 safely analyzed again. 83 84 @param clues: Sequence of clues. 85 @type clues: C{list} or C{tuple} 86 """ 87 from Halberd.clues.Clue import Clue 88 89 different = diff_fields(clues) 90 91 # First alter Clue to be able to cope with the varying fields. 92 ignored = [] 93 for field in different: 94 method = '_get_' + Clue.normalize(field) 95 if not hasattr(Clue, method): 96 logger.debug('ignoring %s', field) 97 ignored.append(method) 98 setattr(Clue, method, lambda s, f: None) 99 100 for clue in clues: 101 Clue.parse(clue, clue.headers) 102 103 for method in ignored: 104 # We want to leave the Clue class as before because a MIME field 105 # causing trouble for the current scan might be the source of precious 106 # information for another scan. 107 delattr(Clue, method) 108 109 return clues
110 111
112 -def get_digest(clue):
113 """Returns the specified clue's digest. 114 115 This function is usually passed as a parameter for L{classify} so it can 116 separate clues according to their digest (among other fields). 117 118 @return: The digest of a clue's parsed headers. 119 @rtype: C{str} 120 """ 121 return clue.info['digest']
122
123 -def clusters(clues, step=3):
124 """Finds clusters of clues. 125 126 A cluster is a group of at most C{step} clues which only differ in 1 seconds 127 between each other. 128 129 @param clues: A sequence of clues to analyze 130 @type clues: C{list} or C{tuple} 131 132 @param step: Maximum difference between the time differences of the 133 cluster's clues. 134 @type step: C{int} 135 136 @return: A sequence with merged clusters. 137 @rtype: C{tuple} 138 """ 139 def iscluster(clues, num): 140 """Determines if a list of clues form a cluster of the specified size. 141 """ 142 assert len(clues) == num 143 144 if abs(clues[0].diff - clues[-1].diff) <= num: 145 return True 146 return False
147 148 def find_cluster(clues, num): 149 if len(clues) >= num: 150 if iscluster(clues[:num], num): 151 return tuple(clues[:num]) 152 return () 153 154 clues = sort_clues(clues) 155 156 invrange = lambda num: [(num - x) for x in range(num)] 157 158 start = 0 159 while True: 160 clues = clues[start:] 161 if not clues: 162 break 163 164 for i in invrange(step): 165 cluster = find_cluster(clues, i) 166 if cluster: 167 yield cluster 168 start = i 169 break 170
171 -def merge(clues):
172 """Merges a sequence of clues into one. 173 174 A new clue will store the total count of the clues. 175 176 Note that each L{Clue} has a starting count of 1 177 178 >>> a, b, c = Clue(), Clue(), Clue() 179 >>> sum([x.getCount() for x in [a, b, c]]) 180 3 181 >>> a.incCount(5), b.incCount(11), c.incCount(23) 182 (None, None, None) 183 >>> merged = merge((a, b, c)) 184 >>> merged.getCount() 185 42 186 >>> merged == a 187 True 188 189 @param clues: A sequence containing all the clues to merge into one. 190 @type clues: C{list} or C{tuple} 191 192 @return: The result of merging all the passed clues into one. 193 @rtype: L{Clue} 194 """ 195 merged = copy.copy(clues[0]) 196 for clue in clues[1:]: 197 merged.incCount(clue.getCount()) 198 return merged
199
200 -def classify(seq, *classifiers):
201 """Classify a sequence according to one or several criteria. 202 203 We store each item into a nested dictionary using the classifiers as key 204 generators (all of them must be callable objects). 205 206 In the following example we classify a list of clues according to their 207 digest and their time difference. 208 209 >>> a, b, c = Clue(), Clue(), Clue() 210 >>> a.diff, b.diff, c.diff = 1, 2, 2 211 >>> a.info['digest'] = 'x' 212 >>> b.info['digest'] = c.info['digest'] = 'y' 213 >>> get_diff = lambda x: x.diff 214 >>> classified = classify([a, b, c], get_digest, get_diff) 215 >>> digests = classified.keys() 216 >>> digests.sort() # We sort these so doctest won't fail. 217 >>> for digest in digests: 218 ... print digest 219 ... for diff in classified[digest].keys(): 220 ... print ' ', diff 221 ... for clue in classified[digest][diff]: 222 ... if clue is a: print ' a' 223 ... elif clue is b: print ' b' 224 ... elif clue is c: print ' c' 225 ... 226 x 227 1 228 a 229 y 230 2 231 b 232 c 233 234 @param seq: A sequence to classify. 235 @type seq: C{list} or C{tuple} 236 237 @param classifiers: A sequence of callables which return specific fields of 238 the items contained in L{seq} 239 @type classifiers: C{list} or C{tuple} 240 241 @return: A nested dictionary in which the keys are the fields obtained by 242 applying the classifiers to the items in the specified sequence. 243 @rtype: C{dict} 244 """ 245 # XXX - Printing a dictionary in a doctest string is a very bad idea. 246 classified = {} 247 248 for item in seq: 249 section = classified 250 for classifier in classifiers[:-1]: 251 assert callable(classifier) 252 section = section.setdefault(classifier(item), {}) 253 254 # At the end no more dict nesting is needed. We simply store the items. 255 last = classifiers[-1] 256 section.setdefault(last(item), []).append(item) 257 258 return classified
259
260 -def sections(classified, sects=None):
261 """Returns sections (and their items) from a nested dict. 262 263 See also: L{classify} 264 265 @param classified: Nested dictionary. 266 @type classified: C{dict} 267 268 @param sects: List of results. It should not be specified by the user. 269 @type sects: C{list} 270 271 @return: A list of lists in where each item is a subsection of a nested dictionary. 272 @rtype: C{list} 273 """ 274 if sects is None: 275 sects = [] 276 277 if isinstance(classified, dict): 278 for key in classified.keys(): 279 sections(classified[key], sects) 280 elif isinstance(classified, list): 281 sects.append(classified) 282 283 return sects
284
285 -def deltas(xs):
286 """Computes the differences between the elements of a sequence of integers. 287 288 >>> deltas([-1, 0, 1]) 289 [1, 1] 290 >>> deltas([1, 1, 2, 3, 5, 8, 13]) 291 [0, 1, 1, 2, 3, 5] 292 293 @param xs: A sequence of integers. 294 @type xs: C{list} 295 296 @return: A list of differences between consecutive elements of L{xs}. 297 @rtype: C{list} 298 """ 299 if len(xs) < 2: 300 return [] 301 else: 302 return [xs[1] - xs[0]] + deltas(xs[1:])
303
304 -def slices(start, xs):
305 """Returns slices of a given sequence separated by the specified indices. 306 307 If we wanted to get the slices necessary to split range(20) in 308 sub-sequences of 5 items each we'd do: 309 310 >>> seq = range(20) 311 >>> indices = [5, 10, 15] 312 >>> for piece in slices(0, indices): 313 ... print seq[piece] 314 [0, 1, 2, 3, 4] 315 [5, 6, 7, 8, 9] 316 [10, 11, 12, 13, 14] 317 [15, 16, 17, 18, 19] 318 319 @param start: Index of the first element of the sequence we want to 320 partition. 321 @type start: C{int}. 322 323 @param xs: Sequence of indexes where 'cuts' must be made. 324 @type xs: C{list} 325 326 @return: A sequence of C{slice} objects suitable for splitting a list as 327 specified. 328 @rtype: C{list} of C{slice} 329 """ 330 if xs == []: 331 # The last slice includes all the remaining items in the sequence. 332 return [slice(start, None)] 333 return [slice(start, xs[0])] + slices(xs[0], xs[1:])
334
335 -def sort_clues(clues):
336 """Sorts clues according to their time difference. 337 """ 338 # This can be accomplished in newer (>= 2.4) Python versions using: 339 # clues.sort(key=lambda x: x.diff) 340 tmps = [(x.diff, x) for x in clues] 341 tmps.sort() 342 return [x[1] for x in tmps]
343 344
345 -def filter_proxies(clues, maxdelta=3):
346 """Detect and merge clues pointing to a proxy cache on the remote end. 347 348 @param clues: Sequence of clues to analyze 349 @type clues: C{list} 350 351 @param maxdelta: Maximum difference allowed between a clue's time 352 difference and the previous one. 353 @type maxdelta: C{int} 354 355 @return: Sequence where all irrelevant clues pointing out to proxy caches 356 have been filtered out. 357 @rtype: C{list} 358 """ 359 results = [] 360 361 # Classify clues by remote time and digest. 362 get_rtime = lambda c: c._remote 363 classified = classify(clues, get_rtime, get_digest) 364 365 subsections = sections(classified) 366 for cur_clues in subsections: 367 if len(cur_clues) == 1: 368 results.append(cur_clues[0]) 369 continue 370 371 cur_clues = sort_clues(cur_clues) 372 373 diffs = [c.diff for c in cur_clues] 374 375 # We find the indices of those clues which differ from the rest in 376 # more than maxdelta seconds. 377 indices = [idx for idx, delta in enumerate(deltas(diffs)) 378 if abs(delta) > maxdelta] 379 380 for piece in slices(0, indices): 381 if cur_clues[piece] == []: 382 break 383 results.append(merge(cur_clues[piece])) 384 385 return results
386
387 -def uniq(clues):
388 """Return a list of unique clues. 389 390 This is needed when merging clues coming from different sources. Clues with 391 the same time diff and digest are not discarded, they are merged into one 392 clue with the aggregated number of hits. 393 394 @param clues: A sequence containing the clues to analyze. 395 @type clues: C{list} 396 397 @return: Filtered sequence of clues where no clue has the same digest and 398 time difference. 399 @rtype: C{list} 400 """ 401 results = [] 402 403 get_diff = lambda c: c.diff 404 classified = classify(clues, get_digest, get_diff) 405 406 for section in sections(classified): 407 results.append(merge(section)) 408 409 return results
410
411 -def hits(clues):
412 """Compute the total number of hits in a sequence of clues. 413 414 @param clues: Sequence of clues. 415 @type clues: C{list} 416 417 @return: Total hits. 418 @rtype: C{int} 419 """ 420 return sum([clue.getCount() for clue in clues])
421
422 -def analyze(clues):
423 """Draw conclusions from the clues obtained during the scanning phase. 424 425 @param clues: Unprocessed clues obtained during the scanning stage. 426 @type clues: C{list} 427 428 @return: Coherent list of clues identifying real web servers. 429 @rtype: C{list} 430 """ 431 results = [] 432 433 clues = uniq(clues) 434 435 clues = filter_proxies(clues) 436 437 cluesbydigest = classify(clues, get_digest) 438 439 for key in cluesbydigest.keys(): 440 for cluster in clusters(cluesbydigest[key]): 441 results.append(merge(cluster)) 442 443 return results
444 445 # TODO - reanalyze should be called from this module and not from Halberd.shell.
446 -def reanalyze(clues, analyzed, threshold):
447 """Identify and ignore changing header fields. 448 449 After initial analysis one must check that there aren't as many realservers 450 as obtained clues. If there were it could be a sign of something wrong 451 happening: each clue is different from the others due to one or more MIME 452 header fields which change unexpectedly. 453 454 @param clues: Raw sequence of clues. 455 @type clues: C{list} 456 457 @param analyzed: Result from the first analysis phase. 458 @type analyzed: C{list} 459 460 @param threshold: Minimum clue-to-realserver ratio in order to trigger 461 field inspection. 462 @type threshold: C{float} 463 """ 464 def ratio(): 465 return len(analyzed) / float(len(clues))
466 467 assert len(clues) > 0 468 469 r = ratio() 470 if r >= threshold: 471 logger.debug('clue-to-realserver ratio is high (%.3f)', r) 472 logger.debug('reanalyzing clues...') 473 474 ignore_changing_fields(clues) 475 analyzed = analyze(clues) 476 477 logger.debug('clue reanalysis done.') 478 479 # Check again to see if we solved the problem but only warn the user if 480 # there's a significant amount of evidence. 481 if ratio() >= threshold and len(clues) > 10: 482 logger.warn( 483 '''The following results might be incorrect. It could be because the remote 484 host keeps changing its server version string or because halberd didn't have 485 enough samples.''') 486 487 return analyzed 488 489
490 -def _test():
491 import doctest 492 493 import Halberd.clues.Clue 494 import Halberd.clues.analysis 495 496 # Due to the above imports, this test must be executed from the top level 497 # source directory: 498 # python Halberd/clues/analysis.py -v 499 500 globs = Halberd.clues.analysis.__dict__ 501 globs.update(Halberd.clues.Clue.__dict__) 502 503 return doctest.testmod(m=Halberd.clues.analysis, name='analysis', globs=globs)
504 505 if __name__ == '__main__': 506 _test() 507 508 509 # vim: ts=4 sw=4 et 510

halberd-0.2.4/doc/api/Halberd.reportlib-pysrc.html0000644000175000017500000006575611431512607020513 0ustar jmbrjmbr Halberd.reportlib
Package Halberd :: Module reportlib
[hide private]
[frames] | no frames]

Source Code for Module Halberd.reportlib

 1  # -*- coding: iso-8859-1 -*- 
 2   
 3  """Output module. 
 4  """ 
 5   
 6  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
 7  # 
 8  # This program is free software; you can redistribute it and/or modify 
 9  # it under the terms of the GNU General Public License as published by 
10  # the Free Software Foundation; either version 2 of the License, or 
11  # (at your option) any later version. 
12  # 
13  # This program is distributed in the hope that it will be useful, 
14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
16  # GNU General Public License for more details. 
17  # 
18  # You should have received a copy of the GNU General Public License 
19  # along with this program; if not, write to the Free Software 
20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
21   
22   
23  import sys 
24   
25  import Halberd.logger 
26  import Halberd.clues.analysis as analysis 
27   
28   
29 -def report(scantask):
30 """Displays detailed report information to the user. 31 """ 32 if scantask.out: 33 out = open(scantask.out, 'a') 34 else: 35 out = sys.stdout 36 37 clues = scantask.analyzed 38 hits = analysis.hits(clues) 39 logger = Halberd.logger.getLogger() 40 41 # xxx This could be passed by the caller in order to avoid recomputation in 42 # case the clues needed a re-analysis. 43 diff_fields = analysis.diff_fields(clues) 44 45 out.write('=' * 70 + '\n') 46 out.write('%s' % scantask.url) 47 if scantask.addr: 48 out.write(' (%s)' % scantask.addr) 49 out.write(': %d real server(s)\n' % len(clues)) 50 out.write('=' * 70 + '\n') 51 52 for num, clue in enumerate(clues): 53 assert hits > 0 54 info = clue.info 55 56 out.write('\n') 57 # out.write('-' * 70 + '\n') 58 out.write('server %d: %s\n' % (num + 1, info['server'].lstrip())) 59 out.write('-' * 70 + '\n\n') 60 61 out.write('difference: %d seconds\n' % clue.diff) 62 63 out.write('successful requests: %d hits (%.2f%%)\n' \ 64 % (clue.getCount(), clue.getCount() * 100 / float(hits))) 65 66 if info['contloc']: 67 out.write('content-location: %s\n' % info['contloc'].lstrip()) 68 69 if len(info['cookies']) > 0: 70 out.write('cookie(s):\n') 71 for cookie in info['cookies']: 72 out.write(' %s\n' % cookie.lstrip()) 73 74 out.write('header fingerprint: %s\n' % info['digest']) 75 76 different = [(field, value) for field, value in clue.headers \ 77 if field in diff_fields] 78 if different: 79 out.write('different headers:\n') 80 idx = 1 81 for field, value in different: 82 out.write(' %d. %s:%s\n' % (idx, field, value)) 83 idx += 1 84 85 if scantask.debug: 86 import pprint 87 out.write('headers:\n') 88 pprint.pprint(clue.headers, out)
89 90 91 # vim: ts=4 sw=4 et 92

halberd-0.2.4/doc/api/Halberd-module.html0000644000175000017500000001650011431512607016616 0ustar jmbrjmbr Halberd
Package Halberd
[hide private]
[frames] | no frames]

Package Halberd

source code

HTTP load balancer detector module.

Submodules [hide private]

Variables [hide private]
  __package__ = None
halberd-0.2.4/doc/api/Halberd.shell.MultiScanStrategy-class.html0000644000175000017500000002472711431512607023177 0ustar jmbrjmbr Halberd.shell.MultiScanStrategy
Package Halberd :: Module shell :: Class MultiScanStrategy
[hide private]
[frames] | no frames]

Class MultiScanStrategy

source code

BaseStrategy --+
               |
              MultiScanStrategy

Scan multiple URLs.

Instance Methods [hide private]
 
__init__(self, scantask) source code
 
_targets(self, urlfp)
Obtain target addresses from URLs.
source code
 
execute(self)
Launch a multiple URL scan.
source code

Inherited from BaseStrategy (private): _analyze, _scan

Method Details [hide private]

__init__(self, scantask)
(Constructor)

source code 
Overrides: BaseStrategy.__init__

_targets(self, urlfp)

source code 

Obtain target addresses from URLs.

Parameters:
  • urlfp (file) - File where the list of URLs is stored.
Returns:
Generator providing the desired addresses.

execute(self)

source code 

Launch a multiple URL scan.

Overrides: BaseStrategy.execute

halberd-0.2.4/doc/api/api-objects.txt0000644000175000017500000004475611431512610016055 0ustar jmbrjmbrHalberd Halberd-module.html Halberd.__package__ Halberd-module.html#__package__ Halberd.ScanTask Halberd.ScanTask-module.html Halberd.ScanTask.default_conf_dir Halberd.ScanTask-module.html#default_conf_dir Halberd.ScanTask.default_out Halberd.ScanTask-module.html#default_out Halberd.ScanTask.default_ratio_threshold Halberd.ScanTask-module.html#default_ratio_threshold Halberd.ScanTask.default_scantime Halberd.ScanTask-module.html#default_scantime Halberd.ScanTask.__package__ Halberd.ScanTask-module.html#__package__ Halberd.ScanTask.default_parallelism Halberd.ScanTask-module.html#default_parallelism Halberd.ScanTask.default_conf_file Halberd.ScanTask-module.html#default_conf_file Halberd.clientlib Halberd.clientlib-module.html Halberd.clientlib.default_bufsize Halberd.clientlib-module.html#default_bufsize Halberd.clientlib.default_timeout Halberd.clientlib-module.html#default_timeout Halberd.clientlib.__package__ Halberd.clientlib-module.html#__package__ Halberd.clientlib.clientFactory Halberd.clientlib-module.html#clientFactory Halberd.clientlib.default_template Halberd.clientlib-module.html#default_template Halberd.clues Halberd.clues-module.html Halberd.clues.__package__ Halberd.clues-module.html#__package__ Halberd.clues.Clue Halberd.clues.Clue-module.html Halberd.clues.Clue.__package__ Halberd.clues.Clue-module.html#__package__ Halberd.clues.analysis Halberd.clues.analysis-module.html Halberd.clues.analysis.hits Halberd.clues.analysis-module.html#hits Halberd.clues.analysis.get_digest Halberd.clues.analysis-module.html#get_digest Halberd.clues.analysis.sections Halberd.clues.analysis-module.html#sections Halberd.clues.analysis._test Halberd.clues.analysis-module.html#_test Halberd.clues.analysis.slices Halberd.clues.analysis-module.html#slices Halberd.clues.analysis.sort_clues Halberd.clues.analysis-module.html#sort_clues Halberd.clues.analysis.ignore_changing_fields Halberd.clues.analysis-module.html#ignore_changing_fields Halberd.clues.analysis.__package__ Halberd.clues.analysis-module.html#__package__ Halberd.clues.analysis.filter_proxies Halberd.clues.analysis-module.html#filter_proxies Halberd.clues.analysis.deltas Halberd.clues.analysis-module.html#deltas Halberd.clues.analysis.reanalyze Halberd.clues.analysis-module.html#reanalyze Halberd.clues.analysis.merge Halberd.clues.analysis-module.html#merge Halberd.clues.analysis.uniq Halberd.clues.analysis-module.html#uniq Halberd.clues.analysis.clusters Halberd.clues.analysis-module.html#clusters Halberd.clues.analysis.logger Halberd.clues.analysis-module.html#logger Halberd.clues.analysis.analyze Halberd.clues.analysis-module.html#analyze Halberd.clues.analysis.diff_fields Halberd.clues.analysis-module.html#diff_fields Halberd.clues.analysis.classify Halberd.clues.analysis-module.html#classify Halberd.clues.file Halberd.clues.file-module.html Halberd.clues.file.load Halberd.clues.file-module.html#load Halberd.clues.file.save Halberd.clues.file-module.html#save Halberd.clues.file.__package__ Halberd.clues.file-module.html#__package__ Halberd.conflib Halberd.conflib-module.html Halberd.conflib.default_proxy_port Halberd.conflib-module.html#default_proxy_port Halberd.conflib.__package__ Halberd.conflib-module.html#__package__ Halberd.conflib.default_conf Halberd.conflib-module.html#default_conf Halberd.crew Halberd.crew-module.html Halberd.crew.__package__ Halberd.crew-module.html#__package__ Halberd.logger Halberd.logger-module.html Halberd.logger._logfmt Halberd.logger-module.html#_logfmt Halberd.logger._logger Halberd.logger-module.html#_logger Halberd.logger.__package__ Halberd.logger-module.html#__package__ Halberd.logger.getLogger Halberd.logger-module.html#getLogger Halberd.logger.setDebug Halberd.logger-module.html#setDebug Halberd.logger.setError Halberd.logger-module.html#setError Halberd.reportlib Halberd.reportlib-module.html Halberd.reportlib.report Halberd.reportlib-module.html#report Halberd.reportlib.__package__ Halberd.reportlib-module.html#__package__ Halberd.shell Halberd.shell-module.html Halberd.shell.__package__ Halberd.shell-module.html#__package__ Halberd.util Halberd.util-module.html Halberd.util.addresses Halberd.util-module.html#addresses Halberd.util.hostname Halberd.util-module.html#hostname Halberd.util.__package__ Halberd.util-module.html#__package__ Halberd.util.utctime Halberd.util-module.html#utctime Halberd.util._gen_table Halberd.util-module.html#_gen_table Halberd.util.table Halberd.util-module.html#table Halberd.ScanTask.ConfError Halberd.ScanTask.ConfError-class.html Halberd.ScanTask.ConfError.__str__ Halberd.ScanTask.ConfError-class.html#__str__ Halberd.ScanTask.ConfError.__init__ Halberd.ScanTask.ConfError-class.html#__init__ Halberd.ScanTask.ScanTask Halberd.ScanTask.ScanTask-class.html Halberd.ScanTask.ScanTask.save Halberd.ScanTask.ScanTask-class.html#save Halberd.ScanTask.ScanTask.certfile Halberd.ScanTask.ScanTask-class.html#certfile Halberd.ScanTask.ScanTask.verbose Halberd.ScanTask.ScanTask-class.html#verbose Halberd.ScanTask.ScanTask.url Halberd.ScanTask.ScanTask-class.html#url Halberd.ScanTask.ScanTask.keyfile Halberd.ScanTask.ScanTask-class.html#keyfile Halberd.ScanTask.ScanTask.urlfile Halberd.ScanTask.ScanTask-class.html#urlfile Halberd.ScanTask.ScanTask.readConf Halberd.ScanTask.ScanTask-class.html#readConf Halberd.ScanTask.ScanTask.analyzed Halberd.ScanTask.ScanTask-class.html#analyzed Halberd.ScanTask.ScanTask.clues Halberd.ScanTask.ScanTask-class.html#clues Halberd.ScanTask.ScanTask.debug Halberd.ScanTask.ScanTask-class.html#debug Halberd.ScanTask.ScanTask.out Halberd.ScanTask.ScanTask-class.html#out Halberd.ScanTask.ScanTask.proxy_serv_addr Halberd.ScanTask.ScanTask-class.html#proxy_serv_addr Halberd.ScanTask.ScanTask.__init__ Halberd.ScanTask.ScanTask-class.html#__init__ Halberd.ScanTask.ScanTask.addr Halberd.ScanTask.ScanTask-class.html#addr Halberd.clientlib.ConnectionRefused Halberd.clientlib.ConnectionRefused-class.html Halberd.clientlib.HTTPError.__deepcopy__ Halberd.clientlib.HTTPError-class.html#__deepcopy__ Halberd.clientlib.HTTPError.__str__ Halberd.clientlib.HTTPError-class.html#__str__ Halberd.clientlib.HTTPError.__init__ Halberd.clientlib.HTTPError-class.html#__init__ Halberd.clientlib.HTTPClient Halberd.clientlib.HTTPClient-class.html Halberd.clientlib.HTTPClient.getHeaders Halberd.clientlib.HTTPClient-class.html#getHeaders Halberd.clientlib.HTTPClient._recv Halberd.clientlib.HTTPClient-class.html#_recv Halberd.clientlib.HTTPClient._connect Halberd.clientlib.HTTPClient-class.html#_connect Halberd.clientlib.HTTPClient.timeout Halberd.clientlib.HTTPClient-class.html#timeout Halberd.clientlib.HTTPClient.__del__ Halberd.clientlib.HTTPClient-class.html#__del__ Halberd.clientlib.HTTPClient._getHostAndPort Halberd.clientlib.HTTPClient-class.html#_getHostAndPort Halberd.clientlib.HTTPClient._putRequest Halberd.clientlib.HTTPClient-class.html#_putRequest Halberd.clientlib.HTTPClient._fillTemplate Halberd.clientlib.HTTPClient-class.html#_fillTemplate Halberd.clientlib.HTTPClient.__init__ Halberd.clientlib.HTTPClient-class.html#__init__ Halberd.clientlib.HTTPClient.template Halberd.clientlib.HTTPClient-class.html#template Halberd.clientlib.HTTPClient._getReply Halberd.clientlib.HTTPClient-class.html#_getReply Halberd.clientlib.HTTPClient.bufsize Halberd.clientlib.HTTPClient-class.html#bufsize Halberd.clientlib.HTTPClient._sendAll Halberd.clientlib.HTTPClient-class.html#_sendAll Halberd.clientlib.HTTPError Halberd.clientlib.HTTPError-class.html Halberd.clientlib.HTTPError.__deepcopy__ Halberd.clientlib.HTTPError-class.html#__deepcopy__ Halberd.clientlib.HTTPError.__str__ Halberd.clientlib.HTTPError-class.html#__str__ Halberd.clientlib.HTTPError.__init__ Halberd.clientlib.HTTPError-class.html#__init__ Halberd.clientlib.HTTPSClient Halberd.clientlib.HTTPSClient-class.html Halberd.clientlib.HTTPClient._getHostAndPort Halberd.clientlib.HTTPClient-class.html#_getHostAndPort Halberd.clientlib.HTTPClient._recv Halberd.clientlib.HTTPClient-class.html#_recv Halberd.clientlib.HTTPSClient._connect Halberd.clientlib.HTTPSClient-class.html#_connect Halberd.clientlib.HTTPClient.template Halberd.clientlib.HTTPClient-class.html#template Halberd.clientlib.HTTPClient.__del__ Halberd.clientlib.HTTPClient-class.html#__del__ Halberd.clientlib.HTTPClient.getHeaders Halberd.clientlib.HTTPClient-class.html#getHeaders Halberd.clientlib.HTTPClient._putRequest Halberd.clientlib.HTTPClient-class.html#_putRequest Halberd.clientlib.HTTPClient._fillTemplate Halberd.clientlib.HTTPClient-class.html#_fillTemplate Halberd.clientlib.HTTPSClient.__init__ Halberd.clientlib.HTTPSClient-class.html#__init__ Halberd.clientlib.HTTPClient.timeout Halberd.clientlib.HTTPClient-class.html#timeout Halberd.clientlib.HTTPClient._getReply Halberd.clientlib.HTTPClient-class.html#_getReply Halberd.clientlib.HTTPClient.bufsize Halberd.clientlib.HTTPClient-class.html#bufsize Halberd.clientlib.HTTPSClient._sendAll Halberd.clientlib.HTTPSClient-class.html#_sendAll Halberd.clientlib.HTTPSError Halberd.clientlib.HTTPSError-class.html Halberd.clientlib.HTTPError.__deepcopy__ Halberd.clientlib.HTTPError-class.html#__deepcopy__ Halberd.clientlib.HTTPError.__str__ Halberd.clientlib.HTTPError-class.html#__str__ Halberd.clientlib.HTTPError.__init__ Halberd.clientlib.HTTPError-class.html#__init__ Halberd.clientlib.InvalidURL Halberd.clientlib.InvalidURL-class.html Halberd.clientlib.HTTPError.__deepcopy__ Halberd.clientlib.HTTPError-class.html#__deepcopy__ Halberd.clientlib.HTTPError.__str__ Halberd.clientlib.HTTPError-class.html#__str__ Halberd.clientlib.HTTPError.__init__ Halberd.clientlib.HTTPError-class.html#__init__ Halberd.clientlib.TimedOut Halberd.clientlib.TimedOut-class.html Halberd.clientlib.HTTPError.__deepcopy__ Halberd.clientlib.HTTPError-class.html#__deepcopy__ Halberd.clientlib.HTTPError.__str__ Halberd.clientlib.HTTPError-class.html#__str__ Halberd.clientlib.HTTPError.__init__ Halberd.clientlib.HTTPError-class.html#__init__ Halberd.clientlib.UnknownReply Halberd.clientlib.UnknownReply-class.html Halberd.clientlib.HTTPError.__deepcopy__ Halberd.clientlib.HTTPError-class.html#__deepcopy__ Halberd.clientlib.HTTPError.__str__ Halberd.clientlib.HTTPError-class.html#__str__ Halberd.clientlib.HTTPError.__init__ Halberd.clientlib.HTTPError-class.html#__init__ Halberd.clues.Clue.Clue Halberd.clues.Clue.Clue-class.html Halberd.clues.Clue.Clue.__ne__ Halberd.clues.Clue.Clue-class.html#__ne__ Halberd.clues.Clue.Clue.parse Halberd.clues.Clue.Clue-class.html#parse Halberd.clues.Clue.Clue.__init__ Halberd.clues.Clue.Clue-class.html#__init__ Halberd.clues.Clue.Clue.normalize Halberd.clues.Clue.Clue-class.html#normalize Halberd.clues.Clue.Clue._get_set_cookie Halberd.clues.Clue.Clue-class.html#_get_set_cookie Halberd.clues.Clue.Clue._get_expires Halberd.clues.Clue.Clue-class.html#_get_expires Halberd.clues.Clue.Clue.setTimestamp Halberd.clues.Clue.Clue-class.html#setTimestamp Halberd.clues.Clue.Clue._get_content_location Halberd.clues.Clue.Clue-class.html#_get_content_location Halberd.clues.Clue.Clue._get_etag Halberd.clues.Clue.Clue-class.html#_get_etag Halberd.clues.Clue.Clue._get_cache_expires Halberd.clues.Clue.Clue-class.html#_get_cache_expires Halberd.clues.Clue.Clue._get_content_length Halberd.clues.Clue.Clue-class.html#_get_content_length Halberd.clues.Clue.Clue._get_date Halberd.clues.Clue.Clue-class.html#_get_date Halberd.clues.Clue.Clue._calcDiff Halberd.clues.Clue.Clue-class.html#_calcDiff Halberd.clues.Clue.Clue._get_age Halberd.clues.Clue.Clue-class.html#_get_age Halberd.clues.Clue.Clue._get_server Halberd.clues.Clue.Clue-class.html#_get_server Halberd.clues.Clue.Clue.incCount Halberd.clues.Clue.Clue-class.html#incCount Halberd.clues.Clue.Clue.getCount Halberd.clues.Clue.Clue-class.html#getCount Halberd.clues.Clue.Clue.__eq__ Halberd.clues.Clue.Clue-class.html#__eq__ Halberd.clues.Clue.Clue._get_content_type Halberd.clues.Clue.Clue-class.html#_get_content_type Halberd.clues.Clue.Clue._get_last_modified Halberd.clues.Clue.Clue-class.html#_get_last_modified Halberd.clues.Clue.Clue._updateDigest Halberd.clues.Clue.Clue-class.html#_updateDigest Halberd.clues.Clue.Clue.__repr__ Halberd.clues.Clue.Clue-class.html#__repr__ Halberd.clues.file.ClueDir Halberd.clues.file.ClueDir-class.html Halberd.clues.file.ClueDir.save Halberd.clues.file.ClueDir-class.html#save Halberd.clues.file.ClueDir.__init__ Halberd.clues.file.ClueDir-class.html#__init__ Halberd.clues.file.ClueDir._mkdir Halberd.clues.file.ClueDir-class.html#_mkdir Halberd.clues.file.ClueDir._sanitize Halberd.clues.file.ClueDir-class.html#_sanitize Halberd.clues.file.InvalidFile Halberd.clues.file.InvalidFile-class.html Halberd.clues.file.InvalidFile.__str__ Halberd.clues.file.InvalidFile-class.html#__str__ Halberd.clues.file.InvalidFile.__init__ Halberd.clues.file.InvalidFile-class.html#__init__ Halberd.conflib.ConfReader Halberd.conflib.ConfReader-class.html Halberd.conflib.ConfReader._getAddr Halberd.conflib.ConfReader-class.html#_getAddr Halberd.conflib.ConfReader.__del__ Halberd.conflib.ConfReader-class.html#__del__ Halberd.conflib.ConfReader.parse Halberd.conflib.ConfReader-class.html#parse Halberd.conflib.ConfReader.writeDefault Halberd.conflib.ConfReader-class.html#writeDefault Halberd.conflib.ConfReader.close Halberd.conflib.ConfReader-class.html#close Halberd.conflib.ConfReader.open Halberd.conflib.ConfReader-class.html#open Halberd.conflib.ConfReader.__init__ Halberd.conflib.ConfReader-class.html#__init__ Halberd.conflib.InvalidConfFile Halberd.conflib.InvalidConfFile-class.html Halberd.crew.BaseScanner Halberd.crew.BaseScanner-class.html Halberd.crew.BaseScanner.process Halberd.crew.BaseScanner-class.html#process Halberd.crew.BaseScanner.__init__ Halberd.crew.BaseScanner-class.html#__init__ Halberd.crew.BaseScanner.setTimeout Halberd.crew.BaseScanner-class.html#setTimeout Halberd.crew.BaseScanner.hasExpired Halberd.crew.BaseScanner-class.html#hasExpired Halberd.crew.BaseScanner.remaining Halberd.crew.BaseScanner-class.html#remaining Halberd.crew.BaseScanner.run Halberd.crew.BaseScanner-class.html#run Halberd.crew.BaseScanner.timeout Halberd.crew.BaseScanner-class.html#timeout Halberd.crew.Manager Halberd.crew.Manager-class.html Halberd.crew.Manager.process Halberd.crew.Manager-class.html#process Halberd.crew.BaseScanner.__init__ Halberd.crew.BaseScanner-class.html#__init__ Halberd.crew.BaseScanner.setTimeout Halberd.crew.BaseScanner-class.html#setTimeout Halberd.crew.Manager.showStats Halberd.crew.Manager-class.html#showStats Halberd.crew.BaseScanner.hasExpired Halberd.crew.BaseScanner-class.html#hasExpired Halberd.crew.BaseScanner.remaining Halberd.crew.BaseScanner-class.html#remaining Halberd.crew.BaseScanner.run Halberd.crew.BaseScanner-class.html#run Halberd.crew.Manager.refresh_interval Halberd.crew.Manager-class.html#refresh_interval Halberd.crew.BaseScanner.timeout Halberd.crew.BaseScanner-class.html#timeout Halberd.crew.ScanState Halberd.crew.ScanState-class.html Halberd.crew.ScanState.getError Halberd.crew.ScanState-class.html#getError Halberd.crew.ScanState.incMissed Halberd.crew.ScanState-class.html#incMissed Halberd.crew.ScanState.insertClue Halberd.crew.ScanState-class.html#insertClue Halberd.crew.ScanState.getClues Halberd.crew.ScanState-class.html#getClues Halberd.crew.ScanState.setError Halberd.crew.ScanState-class.html#setError Halberd.crew.ScanState.shouldstop Halberd.crew.ScanState-class.html#shouldstop Halberd.crew.ScanState.__init__ Halberd.crew.ScanState-class.html#__init__ Halberd.crew.ScanState.getStats Halberd.crew.ScanState-class.html#getStats Halberd.crew.Scanner Halberd.crew.Scanner-class.html Halberd.crew.Scanner.process Halberd.crew.Scanner-class.html#process Halberd.crew.Scanner.makeClue Halberd.crew.Scanner-class.html#makeClue Halberd.crew.BaseScanner.__init__ Halberd.crew.BaseScanner-class.html#__init__ Halberd.crew.BaseScanner.setTimeout Halberd.crew.BaseScanner-class.html#setTimeout Halberd.crew.BaseScanner.hasExpired Halberd.crew.BaseScanner-class.html#hasExpired Halberd.crew.BaseScanner.remaining Halberd.crew.BaseScanner-class.html#remaining Halberd.crew.BaseScanner.run Halberd.crew.BaseScanner-class.html#run Halberd.crew.BaseScanner.timeout Halberd.crew.BaseScanner-class.html#timeout Halberd.crew.WorkCrew Halberd.crew.WorkCrew-class.html Halberd.crew.WorkCrew.task Halberd.crew.WorkCrew-class.html#task Halberd.crew.WorkCrew._initLocal Halberd.crew.WorkCrew-class.html#_initLocal Halberd.crew.WorkCrew.working Halberd.crew.WorkCrew-class.html#working Halberd.crew.WorkCrew.scan Halberd.crew.WorkCrew-class.html#scan Halberd.crew.WorkCrew._setupSigHandler Halberd.crew.WorkCrew-class.html#_setupSigHandler Halberd.crew.WorkCrew._getClues Halberd.crew.WorkCrew-class.html#_getClues Halberd.crew.WorkCrew._restoreSigHandler Halberd.crew.WorkCrew-class.html#_restoreSigHandler Halberd.crew.WorkCrew.prev Halberd.crew.WorkCrew-class.html#prev Halberd.crew.WorkCrew.__init__ Halberd.crew.WorkCrew-class.html#__init__ Halberd.shell.BaseStrategy Halberd.shell.BaseStrategy-class.html Halberd.shell.BaseStrategy._analyze Halberd.shell.BaseStrategy-class.html#_analyze Halberd.shell.BaseStrategy.execute Halberd.shell.BaseStrategy-class.html#execute Halberd.shell.BaseStrategy._scan Halberd.shell.BaseStrategy-class.html#_scan Halberd.shell.BaseStrategy.__init__ Halberd.shell.BaseStrategy-class.html#__init__ Halberd.shell.ClueReaderStrategy Halberd.shell.ClueReaderStrategy-class.html Halberd.shell.BaseStrategy._analyze Halberd.shell.BaseStrategy-class.html#_analyze Halberd.shell.ClueReaderStrategy.execute Halberd.shell.ClueReaderStrategy-class.html#execute Halberd.shell.BaseStrategy._scan Halberd.shell.BaseStrategy-class.html#_scan Halberd.shell.ClueReaderStrategy.__init__ Halberd.shell.ClueReaderStrategy-class.html#__init__ Halberd.shell.MultiScanStrategy Halberd.shell.MultiScanStrategy-class.html Halberd.shell.BaseStrategy._analyze Halberd.shell.BaseStrategy-class.html#_analyze Halberd.shell.MultiScanStrategy.execute Halberd.shell.MultiScanStrategy-class.html#execute Halberd.shell.MultiScanStrategy._targets Halberd.shell.MultiScanStrategy-class.html#_targets Halberd.shell.BaseStrategy._scan Halberd.shell.BaseStrategy-class.html#_scan Halberd.shell.MultiScanStrategy.__init__ Halberd.shell.MultiScanStrategy-class.html#__init__ Halberd.shell.ScanError Halberd.shell.ScanError-class.html Halberd.shell.ScanError.__str__ Halberd.shell.ScanError-class.html#__str__ Halberd.shell.ScanError.__init__ Halberd.shell.ScanError-class.html#__init__ Halberd.shell.UniScanStrategy Halberd.shell.UniScanStrategy-class.html Halberd.shell.BaseStrategy._analyze Halberd.shell.BaseStrategy-class.html#_analyze Halberd.shell.UniScanStrategy.execute Halberd.shell.UniScanStrategy-class.html#execute Halberd.shell.BaseStrategy._scan Halberd.shell.BaseStrategy-class.html#_scan Halberd.shell.UniScanStrategy.__init__ Halberd.shell.UniScanStrategy-class.html#__init__ halberd-0.2.4/doc/api/Halberd.clientlib.TimedOut-class.html0000644000175000017500000001610511431512607022134 0ustar jmbrjmbr Halberd.clientlib.TimedOut
Package Halberd :: Module clientlib :: Class TimedOut
[hide private]
[frames] | no frames]

Class TimedOut

source code

              object --+            
                       |            
exceptions.BaseException --+        
                           |        
        exceptions.Exception --+    
                               |    
                       HTTPError --+
                                   |
                                  TimedOut

Operation timed out

Instance Methods [hide private]

Inherited from HTTPError: __deepcopy__, __init__, __str__

Inherited from exceptions.Exception: __new__

Inherited from exceptions.BaseException: __delattr__, __getattribute__, __getitem__, __getslice__, __reduce__, __repr__, __setattr__, __setstate__, __unicode__

Inherited from object: __format__, __hash__, __reduce_ex__, __sizeof__, __subclasshook__

Properties [hide private]

Inherited from exceptions.BaseException: args, message

Inherited from object: __class__

halberd-0.2.4/doc/api/Halberd.clientlib.HTTPSClient-class.html0000644000175000017500000003201011431512607022434 0ustar jmbrjmbr Halberd.clientlib.HTTPSClient
Package Halberd :: Module clientlib :: Class HTTPSClient
[hide private]
[frames] | no frames]

Class HTTPSClient

source code

HTTPClient --+
             |
            HTTPSClient

Special-purpose HTTPS client.

Instance Methods [hide private]
 
__init__(self)
Initializes the object.
source code
 
_connect(self, addr)
Connect to the target web server.
source code
 
_sendAll(self, data)
Sends a string to the socket.
source code

Inherited from HTTPClient: __del__, getHeaders

Instance Variables [hide private]

Inherited from HTTPClient: bufsize, template, timeout

Inherited from HTTPClient (private): _recv

Method Details [hide private]

__init__(self)
(Constructor)

source code 

Initializes the object.

Overrides: HTTPClient.__init__
(inherited documentation)

_connect(self, addr)

source code 

Connect to the target web server.

Parameters:
  • addr (tuple) - The target's address.
Raises:
  • HTTPSError - In case there's some mistake during the SSL negotiation.
Overrides: HTTPClient._connect

_sendAll(self, data)

source code 

Sends a string to the socket.

Overrides: HTTPClient._sendAll

halberd-0.2.4/doc/api/Halberd.crew.WorkCrew-class.html0000644000175000017500000002414011431512607021136 0ustar jmbrjmbr Halberd.crew.WorkCrew
Package Halberd :: Module crew :: Class WorkCrew
[hide private]
[frames] | no frames]

Class WorkCrew

source code

Pool of scanners working in parallel.

Instance Methods [hide private]
 
__init__(self, scantask) source code
 
_setupSigHandler(self)
Performs what's needed to catch SIGINT.
source code
 
_restoreSigHandler(self)
Restore previous SIGINT handler.
source code
 
_initLocal(self)
Initializes conventional (local) scanner threads.
source code
 
scan(self)
Perform a parallel load-balancer scan.
source code
 
_getClues(self)
Returns a sequence of clues obtained during the scan.
source code
Instance Variables [hide private]
  prev
Previous SIGINT handler.
ScanTask task
A reference to scantask.
bool working
Indicates whether the crew is working or idle.
halberd-0.2.4/doc/api/toc-Halberd.conflib-module.html0000644000175000017500000000301711431512607021013 0ustar jmbrjmbr conflib

Module conflib


Classes

ConfReader
InvalidConfFile

Variables

__package__
default_conf
default_proxy_port

[hide private] halberd-0.2.4/doc/api/toc-Halberd.util-module.html0000644000175000017500000000311211431512607020350 0ustar jmbrjmbr util

Module util


Functions

addresses
hostname
utctime

Variables

__package__
table

[hide private] halberd-0.2.4/doc/api/Halberd.clues.file-pysrc.html0000644000175000017500000013014711431512607020525 0ustar jmbrjmbr Halberd.clues.file
Package Halberd :: Package clues :: Module file
[hide private]
[frames] | no frames]

Source Code for Module Halberd.clues.file

  1  # -*- coding: iso-8859-1 -*- 
  2   
  3  """Utilities for clue storage. 
  4   
  5  Provides functionality needed to store clues on disk. 
  6  """ 
  7   
  8  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
  9  # 
 10  # This program is free software; you can redistribute it and/or modify 
 11  # it under the terms of the GNU General Public License as published by 
 12  # the Free Software Foundation; either version 2 of the License, or 
 13  # (at your option) any later version. 
 14  # 
 15  # This program is distributed in the hope that it will be useful, 
 16  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 17  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 18  # GNU General Public License for more details. 
 19  # 
 20  # You should have received a copy of the GNU General Public License 
 21  # along with this program; if not, write to the Free Software 
 22  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 23   
 24   
 25  import os 
 26  import csv 
 27  import types 
 28  import shutil 
 29   
 30  import Halberd.util 
 31  from Halberd.clues.Clue import Clue 
 32   
 33   
34 -class InvalidFile(Exception):
35 """The loaded file is not a valid clue file. 36 """
37 - def __init__(self, msg):
38 self.msg = msg
39
40 - def __str__(self):
41 return self.msg
42 43
44 -def save(filename, clues):
45 """Save a clues to a file. 46 47 @param filename: Name of the file where the clues will be written to. 48 @type filename: C{str} 49 50 @param clues: Sequence of clues to write. 51 @type clues: C{list} 52 """ 53 # Create or truncate the destination file. 54 cluefp = open(filename, 'w+') 55 writer = csv.writer(cluefp) 56 57 for clue in clues: 58 # Store the most relevant clue information. 59 writer.writerow((clue.getCount(), clue._local, clue.headers)) 60 61 cluefp.close()
62 63
64 -def load(filename):
65 """Load clues from file. 66 67 @param filename: Name of the files where the clues are stored. 68 @type filename: C{str} 69 70 @return: Clues extracted from the file. 71 @rtype: C{list} 72 73 @raise InvalidFile: In case there's a problem while reinterpreting the 74 clues. 75 """ 76 cluefp = open(filename, 'r') 77 reader = csv.reader(cluefp) 78 79 clues = [] 80 for tup in reader: 81 try: 82 count, localtime, headers = tup 83 except ValueError: 84 raise InvalidFile, 'Cannot unpack fields' 85 86 # Recreate the current clue. 87 clue = Clue() 88 try: 89 clue._count = int(count) 90 clue._local = float(localtime) 91 except ValueError: 92 raise InvalidFile, 'Could not convert fields' 93 94 # This may be risky from a security standpoint. 95 clue.headers = eval(headers, {}, {}) 96 if not (isinstance(clue.headers, types.ListType) or 97 isinstance(clue.headers, types.TupleType)): 98 raise InvalidFile, 'Wrong clue header field' 99 clue.parse(clue.headers) 100 101 clues.append(clue) 102 103 cluefp.close() 104 return clues
105 106
107 -class ClueDir:
108 """Stores clues hierarchically using the underlying filesystem. 109 110 ClueDir tries to be as portable as possible but requires the host operating 111 system to be able to create long filenames (and directories, of course). 112 113 This is an example layout:: 114 115 http___www_microsoft_com/ 116 http___www_microsoft_com/207_46_134_221.clu 117 http___www_microsoft_com/207_46_156_220.clu 118 http___www_microsoft_com/207_46_156_252.clu 119 . 120 . 121 . 122 """
123 - def __init__(self, root=None):
124 """Initializes ClueDir object. 125 126 @param root: Root folder where to start creating sub-folders. 127 @type root: C{str} 128 """ 129 self.ext = 'clu' 130 if not root: 131 self.root = os.getcwd() 132 else: 133 self.root = root 134 self._mkdir(self.root)
135
136 - def _sanitize(self, url):
137 """Filter out potentially dangerous chars. 138 """ 139 return url.translate(Halberd.util.table)
140
141 - def _mkdir(self, dest):
142 """Creates a directory to store clues. 143 144 If the directory already exists it won't complain about that. 145 """ 146 try: 147 st = os.stat(dest) 148 except OSError: 149 os.mkdir(dest) 150 else: 151 if not shutil.stat.S_ISDIR(st.st_mode): 152 raise InvalidFile, \ 153 '%s already exist and is not a directory' % dest 154 155 return dest
156
157 - def save(self, url, addr, clues):
158 """Hierarchically write clues. 159 160 @param url: URL scanned (will be used as a directory name). 161 @type url: C{url} 162 163 @param addr: Address of the target. 164 @type addr: C{str} 165 166 @param clues: Clues to be stored. 167 @type clues: C{list} 168 169 @raise OSError: If the directories can't be created. 170 @raise IOError: If the file can't be stored successfully. 171 """ 172 assert url and addr 173 174 urldir = self._mkdir(os.path.join(self.root, self._sanitize(url))) 175 filename = self._sanitize(addr) + os.extsep + self.ext 176 cluefile = os.path.join(urldir, filename) 177 178 Halberd.clues.file.save(cluefile, clues)
179 180 181 # vim: ts=4 sw=4 et 182

halberd-0.2.4/doc/api/Halberd.crew-module.html0000644000175000017500000002257611431512607017567 0ustar jmbrjmbr Halberd.crew
Package Halberd :: Module crew
[hide private]
[frames] | no frames]

Module crew

source code

Work crew pattern of parallel scanners

Overview

A work crew is instantiated passing a ScanTask object as a parameter, thus defining the target and the way the scanning should be done. After the initialization of the work crew it can be used to scan the target and get the obtained clues back.

>>> crew = WorkCrew(scantask)
>>> clues = crew.scan()

Requirements

These are the features that the WorkCrew must provide:

  1. There are 3 different types of consumers:
    • Controller thread (Performs timing + error-checking).
    • Local scanning thread.
    • Remote scanning thread.
  2. We need a way to signal:
    • When a fatal error has happened.
    • When the user has pressed Control-C

Types of scanning threads

The WorkCrew object spawns different kinds of threads. Here's a brief summary of what they do:

  • Manager: Detects when the time for performing the scan has expired and notifies the rest of the threads. This code is executed in the main thread in order to be able to appropriately catch signals, etc.
  • Scanner: Performs a load-balancer scan from the current machine.

The following is a diagram showing the way it works:

                                    .--> Manager --.
                                    |              |
                                    +--> Scanner --+
       .----------.   .----------.  |              |   .-------.
IN --> | ScanTask |->-| WorkCrew |--+--> Scanner --+->-| Clues |--> OUT
       `----------'   `----------'  |              |   `-------'
                                    +--> Scanner --+
                                    |              |
                                    `--> Scanner --'
Classes [hide private]
  ScanState
Shared state among scanner threads.
  WorkCrew
Pool of scanners working in parallel.
  BaseScanner
Base class for load balancer scanning threads.
  Scanner
Scans the target host from the local machine.
  Manager
Performs management tasks during the scan.
Variables [hide private]
  __package__ = 'Halberd'
halberd-0.2.4/doc/api/Halberd.conflib-module.html0000644000175000017500000002173511431512607020237 0ustar jmbrjmbr Halberd.conflib
Package Halberd :: Module conflib
[hide private]
[frames] | no frames]

Module conflib

source code

Configuration file management module.

Halberd uses configuration files to store relevant information needed for certain protocols (SSL) or modes of operation (proxy, distributed client/server, etc.).

This module takes care of reading and writing configuration files.

Classes [hide private]
  InvalidConfFile
Invalid configuration file.
  ConfReader
Takes care of turning configuration files into meaningful information.
Variables [hide private]
int default_proxy_port = 8080
Default TCP port to listen when acting as a proxy.
  default_conf = '\n# ==========================================...
  __package__ = 'Halberd'
Variables Details [hide private]

default_conf

Value:
'''
# ====================================================================\
========
# halberd configuration file.
# ====================================================================\
========

[proxy]
...

halberd-0.2.4/doc/api/index.html0000644000175000017500000000111511431512610015067 0ustar jmbrjmbr API Documentation halberd-0.2.4/doc/api/Halberd.clues-pysrc.html0000644000175000017500000001741711431512610017605 0ustar jmbrjmbr Halberd.clues
Package Halberd :: Package clues
[hide private]
[frames] | no frames]

Source Code for Package Halberd.clues

 1  # -*- coding: iso-8859-1 -*- 
 2   
 3  """Clue management package 
 4   
 5  This package contains modules implements functionality related to creation, 
 6  analysis and storage of clues. 
 7  """ 
 8   
 9  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
10  # 
11  # This program is free software; you can redistribute it and/or modify 
12  # it under the terms of the GNU General Public License as published by 
13  # the Free Software Foundation; either version 2 of the License, or 
14  # (at your option) any later version. 
15  # 
16  # This program is distributed in the hope that it will be useful, 
17  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
18  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
19  # GNU General Public License for more details. 
20  # 
21  # You should have received a copy of the GNU General Public License 
22  # along with this program; if not, write to the Free Software 
23  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
24   
25  __all__ = [ 
26      'Clue', 
27      'analysis', 
28      'file' 
29  ] 
30   
31   
32  # vim: ts=4 sw=4 et 
33   

halberd-0.2.4/doc/api/toc-Halberd.clues-module.html0000644000175000017500000000214711431512607020515 0ustar jmbrjmbr clues

Module clues


Variables


[hide private] halberd-0.2.4/doc/api/Halberd.util-pysrc.html0000644000175000017500000004430411431512607017450 0ustar jmbrjmbr Halberd.util
Package Halberd :: Module util
[hide private]
[frames] | no frames]

Source Code for Module Halberd.util

 1  # -*- coding: iso-8859-1 -*- 
 2   
 3  """Miscellaneous functions. 
 4   
 5  @var table: Translation table for normalizing strings. 
 6  @type table: C{str} 
 7  """ 
 8   
 9  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
10  # 
11  # This program is free software; you can redistribute it and/or modify 
12  # it under the terms of the GNU General Public License as published by 
13  # the Free Software Foundation; either version 2 of the License, or 
14  # (at your option) any later version. 
15  # 
16  # This program is distributed in the hope that it will be useful, 
17  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
18  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
19  # GNU General Public License for more details. 
20  # 
21  # You should have received a copy of the GNU General Public License 
22  # along with this program; if not, write to the Free Software 
23  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
24   
25   
26  import time 
27  import socket 
28  import urlparse 
29   
30   
31  table = '________________________________________________0123456789_______ABCDEFGHIJKLMNOPQRSTUVWXYZ______abcdefghijklmnopqrstuvwxyz_____________________________________________________________________________________________________________________________________' 
32   
33   
34 -def _gen_table():
35 """Generate translation table. 36 """ 37 tab = '' 38 for c in map(chr, xrange(256)): 39 tab += (c.isalnum() and c) or '_' 40 41 return tab
42 43
44 -def utctime():
45 return time.mktime(time.gmtime())
46 47
48 -def hostname(url):
49 """Get the hostname part of an URL. 50 51 @param url: A valid URL (must be preceded by scheme://). 52 @type url: C{str} 53 54 @return: Hostname corresponding to the URL or the empty string in case of 55 failure. 56 @rtype: C{str} 57 """ 58 netloc = urlparse.urlparse(url)[1] 59 if netloc == '': 60 return '' 61 62 return netloc.split(':', 1)[0]
63
64 -def addresses(host):
65 """Get the network addresses to which a given host resolves to. 66 67 @param host: Hostname we want to resolve. 68 @type host: C{str} 69 70 @return: Network addresses. 71 @rtype: C{tuple} 72 """ 73 assert host != '' 74 75 try: 76 name, aliases, addrs = socket.gethostbyname_ex(host) 77 except socket.error: 78 return () 79 80 return addrs
81 82 83 if __name__ == '__main__': 84 print "table = '%s'" % _gen_table() 85 86 87 # vim: ts=4 sw=4 et 88

halberd-0.2.4/doc/api/Halberd.clues.Clue.Clue-class.html0000644000175000017500000006721411431512607021336 0ustar jmbrjmbr Halberd.clues.Clue.Clue
Halberd :: clues :: Clue :: Clue :: Class Clue
[hide private]
[frames] | no frames]

Class Clue

source code

A clue is what we use to tell real servers behind a virtual IP.

Clues are gathered during several connections to a web server and they allow us to try to identify patterns in its responses. Those patterns could allow us to find out which real servers are behind a VIP

Instance Methods [hide private]
 
__init__(self) source code
 
parse(self, headers)
Extracts all relevant information from the MIME headers replied by the target.
source code
 
_updateDigest(self)
Updates header fingerprint.
source code
int
_calcDiff(self)
Compute the time difference between the remote and local clocks.
source code
 
incCount(self, num=1)
Increase the times this clue has been found.
source code
int.
getCount(self)
Retrieve the number of times the clue has been found
source code
 
setTimestamp(self, timestamp)
Sets the local clock attribute.
source code
 
__eq__(self, other) source code
 
__ne__(self, other) source code
 
__repr__(self) source code
 
_get_server(self, field)
Server:
source code
 
_get_date(self, field)
Date:
source code
 
_get_content_location(self, field)
Content-location:
source code
 
_get_set_cookie(self, field)
Set-cookie:
source code
 
_get_expires(self, field)
Expires:
source code
 
_get_age(self, field)
Age:
source code
 
_get_content_length(self, field)
Content-length:
source code
 
_get_last_modified(self, field)
Last-modified:
source code
 
_get_etag(self, field)
ETag:
source code
 
_get_cache_expires(self, field)
Cache-expires:
source code
 
_get_content_type(self, field)
Content-type:
source code
Static Methods [hide private]
str
normalize(name)
Normalize string.
source code
Method Details [hide private]

parse(self, headers)

source code 

Extracts all relevant information from the MIME headers replied by the target.

Parameters:
  • headers (str, list or tuple) - A set of MIME headers (a string as replied by the webserver or a previously parsed sequence of name, value tuples).
Raises:
  • TypeError - If headers is neither a string nor a sequence.

normalize(name)
Static Method

source code 

Normalize string.

This method takes a string coming out of mime-fields and transforms it into a valid Python identifier. That's done by removing invalid non-alphanumeric characters and also numeric ones placed at the beginning of the string.

Parameters:
  • name (str) - String to be normalized.
Returns: str
Normalized string.

_calcDiff(self)

source code 

Compute the time difference between the remote and local clocks.

Returns: int
Time difference.

incCount(self, num=1)

source code 

Increase the times this clue has been found.

Parameters:
  • num (int) - A positive non-zero number of hits to increase.
Raises:
  • ValueError - in case num is less than or equal to zero.

getCount(self)

source code 

Retrieve the number of times the clue has been found

Returns: int.
Number of hits.

setTimestamp(self, timestamp)

source code 

Sets the local clock attribute.

Parameters:
  • timestamp (int) - The local time (expressed in seconds since the Epoch) when the connection to the target was successfully completed.

halberd-0.2.4/doc/api/Halberd-pysrc.html0000644000175000017500000002043111431512607016467 0ustar jmbrjmbr Halberd
Package Halberd
[hide private]
[frames] | no frames]

Source Code for Package Halberd

 1  # -*- coding: iso-8859-1 -*- 
 2   
 3  """HTTP load balancer detector module. 
 4  """ 
 5   
 6  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
 7  # 
 8  # This program is free software; you can redistribute it and/or modify 
 9  # it under the terms of the GNU General Public License as published by 
10  # the Free Software Foundation; either version 2 of the License, or 
11  # (at your option) any later version. 
12  # 
13  # This program is distributed in the hope that it will be useful, 
14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
16  # GNU General Public License for more details. 
17  # 
18  # You should have received a copy of the GNU General Public License 
19  # along with this program; if not, write to the Free Software 
20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
21   
22  __all__ = [ 
23      'version', 
24      'clientlib', 
25      'clues', 
26      'reportlib', 
27      'conflib', 
28      'util', 
29      'shell', 
30      'crew', 
31      'ScanTask', 
32      'logger', 
33  ] 
34   
35   
36  # vim: ts=4 sw=4 et 
37   

halberd-0.2.4/doc/api/Halberd.clientlib.HTTPError-class.html0000644000175000017500000002672711431512607022206 0ustar jmbrjmbr Halberd.clientlib.HTTPError
Package Halberd :: Module clientlib :: Class HTTPError
[hide private]
[frames] | no frames]

Class HTTPError

source code

              object --+        
                       |        
exceptions.BaseException --+    
                           |    
        exceptions.Exception --+
                               |
                              HTTPError
Known Subclasses:

Generic HTTP exception

Instance Methods [hide private]
 
__init__(self, msg)
x.__init__(...) initializes x; see x.__class__.__doc__ for signature
source code
 
__str__(self)
str(x)
source code
 
__deepcopy__(self, memo) source code

Inherited from exceptions.Exception: __new__

Inherited from exceptions.BaseException: __delattr__, __getattribute__, __getitem__, __getslice__, __reduce__, __repr__, __setattr__, __setstate__, __unicode__

Inherited from object: __format__, __hash__, __reduce_ex__, __sizeof__, __subclasshook__

Properties [hide private]

Inherited from exceptions.BaseException: args, message

Inherited from object: __class__

Method Details [hide private]

__init__(self, msg)
(Constructor)

source code 

x.__init__(...) initializes x; see x.__class__.__doc__ for signature

Overrides: object.__init__
(inherited documentation)

__str__(self)
(Informal representation operator)

source code 

str(x)

Overrides: object.__str__
(inherited documentation)

halberd-0.2.4/doc/api/Halberd.conflib.InvalidConfFile-class.html0000644000175000017500000001514311431512607023046 0ustar jmbrjmbr Halberd.conflib.InvalidConfFile
Package Halberd :: Module conflib :: Class InvalidConfFile
[hide private]
[frames] | no frames]

Class InvalidConfFile

source code

              object --+        
                       |        
exceptions.BaseException --+    
                           |    
        exceptions.Exception --+
                               |
                              InvalidConfFile

Invalid configuration file.

Instance Methods [hide private]

Inherited from exceptions.Exception: __init__, __new__

Inherited from exceptions.BaseException: __delattr__, __getattribute__, __getitem__, __getslice__, __reduce__, __repr__, __setattr__, __setstate__, __str__, __unicode__

Inherited from object: __format__, __hash__, __reduce_ex__, __sizeof__, __subclasshook__

Properties [hide private]

Inherited from exceptions.BaseException: args, message

Inherited from object: __class__

halberd-0.2.4/doc/api/Halberd.clues.file.ClueDir-class.html0000644000175000017500000002737011431512607022023 0ustar jmbrjmbr Halberd.clues.file.ClueDir
Halberd :: clues :: file :: ClueDir :: Class ClueDir
[hide private]
[frames] | no frames]

Class ClueDir

source code

Stores clues hierarchically using the underlying filesystem.

ClueDir tries to be as portable as possible but requires the host operating system to be able to create long filenames (and directories, of course).

This is an example layout:

   http___www_microsoft_com/
   http___www_microsoft_com/207_46_134_221.clu
   http___www_microsoft_com/207_46_156_220.clu
   http___www_microsoft_com/207_46_156_252.clu
           .
           .
           .
Instance Methods [hide private]
 
__init__(self, root=None)
Initializes ClueDir object.
source code
 
_sanitize(self, url)
Filter out potentially dangerous chars.
source code
 
_mkdir(self, dest)
Creates a directory to store clues.
source code
 
save(self, url, addr, clues)
Hierarchically write clues.
source code
Method Details [hide private]

__init__(self, root=None)
(Constructor)

source code 

Initializes ClueDir object.

Parameters:
  • root (str) - Root folder where to start creating sub-folders.

_mkdir(self, dest)

source code 

Creates a directory to store clues.

If the directory already exists it won't complain about that.

save(self, url, addr, clues)

source code 

Hierarchically write clues.

Parameters:
  • url (url) - URL scanned (will be used as a directory name).
  • addr (str) - Address of the target.
  • clues (list) - Clues to be stored.
Raises:
  • OSError - If the directories can't be created.
  • IOError - If the file can't be stored successfully.

halberd-0.2.4/doc/api/Halberd.conflib-pysrc.html0000644000175000017500000010535711431512610020107 0ustar jmbrjmbr Halberd.conflib
Package Halberd :: Module conflib
[hide private]
[frames] | no frames]

Source Code for Module Halberd.conflib

  1  # -*- coding: iso-8859-1 -*- 
  2   
  3  """Configuration file management module. 
  4   
  5  Halberd uses configuration files to store relevant information needed for 
  6  certain protocols (SSL) or modes of operation (proxy, distributed 
  7  client/server, etc.). 
  8   
  9  This module takes care of reading and writing configuration files. 
 10   
 11  @var default_proxy_port: Default TCP port to listen when acting as a proxy. 
 12  @type default_proxy_port: C{int} 
 13  """ 
 14   
 15  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
 16  # 
 17  # This program is free software; you can redistribute it and/or modify 
 18  # it under the terms of the GNU General Public License as published by 
 19  # the Free Software Foundation; either version 2 of the License, or 
 20  # (at your option) any later version. 
 21  # 
 22  # This program is distributed in the hope that it will be useful, 
 23  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 24  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 25  # GNU General Public License for more details. 
 26  # 
 27  # You should have received a copy of the GNU General Public License 
 28  # along with this program; if not, write to the Free Software 
 29  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 30   
 31   
 32  import os 
 33  import ConfigParser 
 34   
 35   
 36  default_proxy_port = 8080 
 37   
 38  default_conf = r""" 
 39  # ============================================================================ 
 40  # halberd configuration file. 
 41  # ============================================================================ 
 42   
 43  [proxy] 
 44   
 45  address: 
 46  port: 8080 
 47   
 48  [ssl] 
 49   
 50  keyfile: 
 51  certfile: 
 52  """ 
 53   
 54   
55 -class InvalidConfFile(Exception):
56 """Invalid configuration file. 57 """
58 59
60 -class ConfReader:
61 """Takes care of turning configuration files into meaningful information. 62 """ 63
64 - def __init__(self):
65 self.__dict = {} 66 self.__conf = None 67 68 self.confparser = ConfigParser.SafeConfigParser()
69
70 - def open(self, fname):
71 """Opens the configuration file. 72 73 @param fname: Pathname to the configuration file. 74 @type fname: C{str} 75 76 @raise InvalidConfFile: In case the passed file is not a valid one. 77 """ 78 self.__conf = open(os.path.expanduser(fname), 'r') 79 try: 80 self.confparser.readfp(self.__conf, fname) 81 except ConfigParser.MissingSectionHeaderError, msg: 82 raise InvalidConfFile, msg
83
84 - def close(self):
85 """Release the configuration file's descriptor. 86 """ 87 if self.__conf: 88 self.__conf.close()
89 90
91 - def _getAddr(self, sectname, default_port):
92 """Read a network address from the given section. 93 """ 94 section = self.__dict[sectname] 95 addr = section.get('address', '') 96 try: 97 port = int(section.get('port', default_port)) 98 except ValueError: 99 port = default_port 100 101 return (addr, port)
102
103 - def parse(self):
104 """Parses the configuration file. 105 """ 106 assert self.__conf, 'The configuration file is not open' 107 108 proxy_serv_addr = () 109 110 # The orthodox way of doing this is via ConfigParser.get*() but those 111 # methods lack the convenience of dict.get. While another approach 112 # could be to subclass ConfigParser I think it's overkill for the 113 # current situation. 114 for section in self.confparser.sections(): 115 sec = self.__dict.setdefault(section, {}) 116 for name, value in self.confparser.items(section): 117 sec.setdefault(name, value) 118 119 if self.__dict.has_key('proxy'): 120 proxy_serv_addr = self._getAddr('proxy', default_proxy_port) 121 122 keyfile = self.__dict['ssl'].get('keyfile', None) 123 certfile = self.__dict['ssl'].get('certfile', None) 124 125 if keyfile == '': 126 keyfile = None 127 if certfile == '': 128 certfile = None 129 130 return proxy_serv_addr, keyfile, certfile
131
132 - def writeDefault(self, conf_file):
133 """Write a bare-bones configuration file 134 135 @param conf_file: Target file where the default conf. will be written. 136 @type conf_file: C{str} 137 """ 138 assert conf_file and isinstance(conf_file, basestring) 139 140 conf_fp = open(conf_file, 'w') 141 conf_fp.write(default_conf) 142 conf_fp.close()
143 144
145 - def __del__(self):
146 self.close()
147 148 149 # vim: ts=4 sw=4 et 150

halberd-0.2.4/doc/api/Halberd.crew.Manager-class.html0000644000175000017500000003120111431512607020741 0ustar jmbrjmbr Halberd.crew.Manager
Package Halberd :: Module crew :: Class Manager
[hide private]
[frames] | no frames]

Class Manager

source code

        object --+            
                 |            
threading._Verbose --+        
                     |        
      threading.Thread --+    
                         |    
               BaseScanner --+
                             |
                            Manager

Performs management tasks during the scan.

Instance Methods [hide private]
 
process(self)
Controls the whole scanning process.
source code
 
showStats(self)
Displays certain statistics while the scan is happening.
source code

Inherited from BaseScanner: __init__, hasExpired, remaining, run, setTimeout

Inherited from threading.Thread: __repr__, getName, isAlive, isDaemon, is_alive, join, setDaemon, setName, start

Inherited from threading.Thread (private): _set_daemon, _set_ident

Inherited from threading._Verbose (private): _note

Inherited from object: __delattr__, __format__, __getattribute__, __hash__, __new__, __reduce__, __reduce_ex__, __setattr__, __sizeof__, __str__, __subclasshook__

Class Variables [hide private]
  refresh_interval = 0.25
Instance Variables [hide private]

Inherited from BaseScanner: timeout

Properties [hide private]

Inherited from threading.Thread: daemon, ident, name

Inherited from object: __class__

Method Details [hide private]

process(self)

source code 

Controls the whole scanning process.

This method checks when the timeout has expired and notifies the rest of the scanning threads that they should stop. It also displays (in case the user asked for it) detailed information regarding the process.

Overrides: BaseScanner.process

halberd-0.2.4/doc/api/help.html0000644000175000017500000002473111431512607014727 0ustar jmbrjmbr Help
 
[hide private]
[frames] | no frames]

API Documentation

This document contains the API (Application Programming Interface) documentation for this project. Documentation for the Python objects defined by the project is divided into separate pages for each package, module, and class. The API documentation also includes two pages containing information about the project as a whole: a trees page, and an index page.

Object Documentation

Each Package Documentation page contains:

  • A description of the package.
  • A list of the modules and sub-packages contained by the package.
  • A summary of the classes defined by the package.
  • A summary of the functions defined by the package.
  • A summary of the variables defined by the package.
  • A detailed description of each function defined by the package.
  • A detailed description of each variable defined by the package.

Each Module Documentation page contains:

  • A description of the module.
  • A summary of the classes defined by the module.
  • A summary of the functions defined by the module.
  • A summary of the variables defined by the module.
  • A detailed description of each function defined by the module.
  • A detailed description of each variable defined by the module.

Each Class Documentation page contains:

  • A class inheritance diagram.
  • A list of known subclasses.
  • A description of the class.
  • A summary of the methods defined by the class.
  • A summary of the instance variables defined by the class.
  • A summary of the class (static) variables defined by the class.
  • A detailed description of each method defined by the class.
  • A detailed description of each instance variable defined by the class.
  • A detailed description of each class (static) variable defined by the class.

Project Documentation

The Trees page contains the module and class hierarchies:

  • The module hierarchy lists every package and module, with modules grouped into packages. At the top level, and within each package, modules and sub-packages are listed alphabetically.
  • The class hierarchy lists every class, grouped by base class. If a class has more than one base class, then it will be listed under each base class. At the top level, and under each base class, classes are listed alphabetically.

The Index page contains indices of terms and identifiers:

  • The term index lists every term indexed by any object's documentation. For each term, the index provides links to each place where the term is indexed.
  • The identifier index lists the (short) name of every package, module, class, method, function, variable, and parameter. For each identifier, the index provides a short description, and a link to its documentation.

The Table of Contents

The table of contents occupies the two frames on the left side of the window. The upper-left frame displays the project contents, and the lower-left frame displays the module contents:

Project
Contents
...
API
Documentation
Frame


Module
Contents
 
...
 

The project contents frame contains a list of all packages and modules that are defined by the project. Clicking on an entry will display its contents in the module contents frame. Clicking on a special entry, labeled "Everything," will display the contents of the entire project.

The module contents frame contains a list of every submodule, class, type, exception, function, and variable defined by a module or package. Clicking on an entry will display its documentation in the API documentation frame. Clicking on the name of the module, at the top of the frame, will display the documentation for the module itself.

The "frames" and "no frames" buttons below the top navigation bar can be used to control whether the table of contents is displayed or not.

The Navigation Bar

A navigation bar is located at the top and bottom of every page. It indicates what type of page you are currently viewing, and allows you to go to related pages. The following table describes the labels on the navigation bar. Note that not some labels (such as [Parent]) are not displayed on all pages.

Label Highlighted when... Links to...
[Parent] (never highlighted) the parent of the current package
[Package] viewing a package the package containing the current object
[Module] viewing a module the module containing the current object
[Class] viewing a class the class containing the current object
[Trees] viewing the trees page the trees page
[Index] viewing the index page the index page
[Help] viewing the help page the help page

The "show private" and "hide private" buttons below the top navigation bar can be used to control whether documentation for private objects is displayed. Private objects are usually defined as objects whose (short) names begin with a single underscore, but do not end with an underscore. For example, "_x", "__pprint", and "epydoc.epytext._tokenize" are private objects; but "re.sub", "__init__", and "type_" are not. However, if a module defines the "__all__" variable, then its contents are used to decide which objects are private.

A timestamp below the bottom navigation bar indicates when each page was last updated.

halberd-0.2.4/doc/api/class-tree.html0000644000175000017500000002213711431512607016037 0ustar jmbrjmbr Class Hierarchy
 
[hide private]
[frames] | no frames]
[ Module Hierarchy | Class Hierarchy ]

Class Hierarchy

halberd-0.2.4/doc/api/toc-Halberd-module.html0000644000175000017500000000214511431512607017401 0ustar jmbrjmbr Halberd

Module Halberd


Variables


[hide private] halberd-0.2.4/doc/api/Halberd.crew.BaseScanner-class.html0000644000175000017500000004515011431512607021563 0ustar jmbrjmbr Halberd.crew.BaseScanner
Package Halberd :: Module crew :: Class BaseScanner
[hide private]
[frames] | no frames]

Class BaseScanner

source code

        object --+        
                 |        
threading._Verbose --+    
                     |    
      threading.Thread --+
                         |
                        BaseScanner
Known Subclasses:

Base class for load balancer scanning threads.

Instance Methods [hide private]
 
__init__(self, state, scantask)
Initializes the scanning thread.
source code
int
remaining(self, end=None)
Seconds left until a given point in time.
source code
bool
hasExpired(self)
Expiration predicate.
source code
float
setTimeout(self, secs)
Compute an expiration time.
source code
 
run(self)
Perform the scan.
source code
 
process(self)
Perform a scanning task.
source code

Inherited from threading.Thread: __repr__, getName, isAlive, isDaemon, is_alive, join, setDaemon, setName, start

Inherited from threading.Thread (private): _set_daemon, _set_ident

Inherited from threading._Verbose (private): _note

Inherited from object: __delattr__, __format__, __getattribute__, __hash__, __new__, __reduce__, __reduce_ex__, __setattr__, __sizeof__, __str__, __subclasshook__

Instance Variables [hide private]
float timeout
Time (in seconds since the UNIX Epoch) when the scan will be stopped.
Properties [hide private]

Inherited from threading.Thread: daemon, ident, name

Inherited from object: __class__

Method Details [hide private]

__init__(self, state, scantask)
(Constructor)

source code 

Initializes the scanning thread.

Parameters:
  • state (instanceof(ScanState)) - Container to store the results of the scan (shared among scanning threads).
  • scantask (instanceof(ScanTask)) - Object providing information needed to perform the scan.
Overrides: object.__init__

remaining(self, end=None)

source code 

Seconds left until a given point in time.

Parameters:
  • end (float) - Ending time.
Returns: int
Remaining time until self.timeout

hasExpired(self)

source code 

Expiration predicate.

Returns: bool
True if the timeout has expired, False otherwise.

setTimeout(self, secs)

source code 

Compute an expiration time.

Parameters:
  • secs (int) - Amount of seconds to spend scanning the target.
Returns: float
The moment in time when the task expires.

run(self)

source code 

Perform the scan.

Overrides: threading.Thread.run

process(self)

source code 

Perform a scanning task.

This method should be overriden to do actual work.


halberd-0.2.4/doc/api/Halberd.clues-module.html0000644000175000017500000001361511431512607017734 0ustar jmbrjmbr Halberd.clues
Package Halberd :: Package clues
[hide private]
[frames] | no frames]

Package clues

source code

Clue management package

This package contains modules implements functionality related to creation, analysis and storage of clues.

Submodules [hide private]

Variables [hide private]
  __package__ = None
halberd-0.2.4/doc/api/Halberd.crew-pysrc.html0000644000175000017500000032264211431512610017431 0ustar jmbrjmbr Halberd.crew
Package Halberd :: Module crew
[hide private]
[frames] | no frames]

Source Code for Module Halberd.crew

  1  # -*- coding: iso-8859-1 -*- 
  2   
  3  """\ 
  4  Work crew pattern of parallel scanners 
  5  ====================================== 
  6   
  7  Overview 
  8  -------- 
  9   
 10  A work crew is instantiated passing a ScanTask object as a parameter, thus 
 11  defining the target and the way the scanning should be done. After the 
 12  initialization of the work crew it can be used to scan the target and get the 
 13  obtained clues back. 
 14   
 15      >>> crew = WorkCrew(scantask) 
 16      >>> clues = crew.scan() 
 17   
 18  Requirements 
 19  ------------ 
 20   
 21  These are the features that the WorkCrew must provide: 
 22   
 23      1. There are 3 different types of consumers: 
 24          - Controller thread (Performs timing + error-checking). 
 25          - Local scanning thread. 
 26          - Remote scanning thread. 
 27   
 28      2. We need a way to signal: 
 29          - When a fatal error has happened. 
 30          - When the user has pressed Control-C 
 31   
 32  Types of scanning threads 
 33  ------------------------- 
 34   
 35  The WorkCrew object spawns different kinds of threads. Here's a brief summary 
 36  of what they do: 
 37   
 38      - Manager: Detects when the time for performing the scan has expired 
 39      and notifies the rest of the threads. This code is executed in the main 
 40      thread in order to be able to appropriately catch signals, etc. 
 41   
 42      - Scanner: Performs a load-balancer scan from the current machine. 
 43   
 44  The following is a diagram showing the way it works:: 
 45   
 46                                       .--> Manager --. 
 47                                       |              | 
 48                                       +--> Scanner --+ 
 49          .----------.   .----------.  |              |   .-------. 
 50   IN --> | ScanTask |->-| WorkCrew |--+--> Scanner --+->-| Clues |--> OUT 
 51          `----------'   `----------'  |              |   `-------' 
 52                                       +--> Scanner --+ 
 53                                       |              | 
 54                                       `--> Scanner --' 
 55  """ 
 56   
 57  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
 58  # 
 59  # This program is free software; you can redistribute it and/or modify 
 60  # it under the terms of the GNU General Public License as published by 
 61  # the Free Software Foundation; either version 2 of the License, or 
 62  # (at your option) any later version. 
 63  # 
 64  # This program is distributed in the hope that it will be useful, 
 65  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 66  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 67  # GNU General Public License for more details. 
 68  # 
 69  # You should have received a copy of the GNU General Public License 
 70  # along with this program; if not, write to the Free Software 
 71  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 72   
 73   
 74  import sys 
 75  import time 
 76  import math 
 77  import copy 
 78  import signal 
 79  import threading 
 80   
 81  import Halberd.logger 
 82  import Halberd.clues.Clue 
 83  import Halberd.clientlib as clientlib 
 84   
 85   
 86  __all__ = ['WorkCrew'] 
 87   
 88   
89 -class ScanState:
90 """Shared state among scanner threads. 91 92 @ivar shouldstop: Signals when the threads should stop scanning. 93 @type shouldstop: C{threading.Event} 94 95 caught with an exception). 96 """
97 - def __init__(self):
98 """Initializes shared state among scanning threads. 99 """ 100 self.__mutex = threading.Lock() 101 self.shouldstop = threading.Event() 102 self.__error = None 103 self.__clues = [] 104 105 self.__missed = 0 106 self.__replies = 0
107
108 - def getStats(self):
109 """Provides statistics about the scanning process. 110 111 @return: Number of clues gathered so far, number of successful requests 112 and number of unsuccessful ones (missed replies). 113 @rtype: C{tuple} 114 """ 115 # xxx - I badly need read/write locks. 116 self.__mutex.acquire() 117 nclues = len(self.__clues) 118 replies = self.__replies 119 missed = self.__missed 120 self.__mutex.release() 121 122 return (nclues, replies, missed)
123
124 - def insertClue(self, clue):
125 """Inserts a clue in the list if it is new. 126 """ 127 self.__mutex.acquire() 128 129 count = clue.getCount() 130 self.__replies += count 131 try: 132 idx = self.__clues.index(clue) 133 self.__clues[idx].incCount(count) 134 except ValueError: 135 self.__clues.append(clue) 136 137 self.__mutex.release()
138
139 - def getClues(self):
140 """Clue accessor. 141 142 @return: A copy of all obtained clues. 143 @rtype: C{list} 144 """ 145 self.__mutex.acquire() 146 clues = self.__clues[:] 147 self.__mutex.release() 148 149 return clues
150
151 - def incMissed(self):
152 """Increase the counter of missed replies. 153 """ 154 self.__mutex.acquire() 155 self.__missed += 1 156 self.__mutex.release()
157
158 - def setError(self, err):
159 """Signal an error condition. 160 """ 161 self.__mutex.acquire() 162 if self.__error is not None: 163 # An error has already been signalled. 164 self.__mutex.release() 165 return 166 self.__error = err 167 self.shouldstop.set() 168 self.__mutex.release()
169
170 - def getError(self):
171 """Returns the reason of the error condition. 172 """ 173 self.__mutex.acquire() 174 # Since we don't know what the nature of __error will be, we need to 175 # provide a clean copy of it to the caller so that no possible 176 # references or changes to __error can affect the object we return. 177 err = copy.deepcopy(self.__error) 178 self.__mutex.release() 179 180 return err
181 182
183 -class WorkCrew:
184 """Pool of scanners working in parallel. 185 186 @ivar task: A reference to scantask. 187 @type task: L{ScanTask} 188 189 @ivar working: Indicates whether the crew is working or idle. 190 @type working: C{bool} 191 192 @ivar prev: Previous SIGINT handler. 193 """
194 - def __init__(self, scantask):
195 self.workers = [] 196 self.task = scantask 197 198 self.state = ScanState() 199 200 self.working = False 201 202 self.prev = None
203
204 - def _setupSigHandler(self):
205 """Performs what's needed to catch SIGINT. 206 """ 207 def interrupt(signum, frame): 208 """SIGINT handler 209 """ 210 self.state.setError('received SIGINT')
211 212 self.prev = signal.signal(signal.SIGINT, interrupt)
213
214 - def _restoreSigHandler(self):
215 """Restore previous SIGINT handler. 216 """ 217 signal.signal(signal.SIGINT, self.prev)
218
219 - def _initLocal(self):
220 """Initializes conventional (local) scanner threads. 221 """ 222 for i in xrange(self.task.parallelism): 223 worker = Scanner(self.state, self.task) 224 self.workers.append(worker)
225
226 - def scan(self):
227 """Perform a parallel load-balancer scan. 228 """ 229 self.working = True 230 self._setupSigHandler() 231 232 self._initLocal() 233 234 for worker in self.workers: 235 worker.start() 236 237 # The Manager executes in the main thread WHILE the others are working 238 # so that signals are correctly caught. 239 manager = Manager(self.state, self.task) 240 manager.run() 241 242 for worker in self.workers: 243 worker.join() 244 245 # Display status information for the last time. 246 manager.showStats() 247 sys.stdout.write('\n\n') 248 249 self._restoreSigHandler() 250 self.working = False 251 252 err = self.state.getError() 253 if err is not None: 254 sys.stderr.write('*** finished (%s) ***\n\n' % err) 255 256 return self._getClues()
257
258 - def _getClues(self):
259 """Returns a sequence of clues obtained during the scan. 260 """ 261 assert not self.working 262 263 return self.state.getClues()
264 265
266 -class BaseScanner(threading.Thread):
267 """Base class for load balancer scanning threads. 268 269 @ivar timeout: Time (in seconds since the UNIX Epoch) when the scan will be 270 stopped. 271 @type timeout: C{float} 272 """
273 - def __init__(self, state, scantask):
274 """Initializes the scanning thread. 275 276 @param state: Container to store the results of the scan (shared among 277 scanning threads). 278 @type state: C{instanceof(ScanState)} 279 280 @param scantask: Object providing information needed to perform the 281 scan. 282 @type scantask: C{instanceof(ScanTask)} 283 """ 284 threading.Thread.__init__(self) 285 self.state = state 286 self.task = scantask 287 self.timeout = 0 288 self.logger = Halberd.logger.getLogger()
289
290 - def remaining(self, end=None):
291 """Seconds left until a given point in time. 292 293 @param end: Ending time. 294 @type end: C{float} 295 296 @return: Remaining time until L{self.timeout} 297 @rtype: C{int} 298 """ 299 if not end: 300 end = self.timeout 301 return int(end - time.time())
302
303 - def hasExpired(self):
304 """Expiration predicate. 305 306 @return: True if the timeout has expired, False otherwise. 307 @rtype: C{bool} 308 """ 309 return (self.remaining() <= 0)
310
311 - def setTimeout(self, secs):
312 """Compute an expiration time. 313 314 @param secs: Amount of seconds to spend scanning the target. 315 @type secs: C{int} 316 317 @return: The moment in time when the task expires. 318 @rtype: C{float} 319 """ 320 self.timeout = time.time() + secs
321
322 - def run(self):
323 """Perform the scan. 324 """ 325 self.setTimeout(self.task.scantime) 326 327 while not self.state.shouldstop.isSet(): 328 self.process()
329
330 - def process(self):
331 """Perform a scanning task. 332 333 This method should be overriden to do actual work. 334 """ 335 pass
336
337 -class Scanner(BaseScanner):
338 """Scans the target host from the local machine. 339 """
340 - def process(self):
341 """Gathers clues connecting directly to the target web server. 342 """ 343 client = clientlib.clientFactory(self.task) 344 345 fatal_exceptions = ( 346 clientlib.ConnectionRefused, 347 clientlib.UnknownReply, 348 clientlib.HTTPSError, 349 ) 350 351 try: 352 ts, hdrs = client.getHeaders(self.task.addr, self.task.url) 353 except fatal_exceptions, msg: 354 self.state.setError(msg) 355 except clientlib.TimedOut, msg: 356 self.state.incMissed() 357 else: 358 self.state.insertClue(self.makeClue(ts, hdrs))
359
360 - def makeClue(self, timestamp, headers):
361 """Compose a clue object. 362 363 @param timestamp: Time when the reply was received. 364 @type timestamp: C{float} 365 366 @param headers: MIME headers coming from an HTTP response. 367 @type headers: C{str} 368 369 @return: A valid clue 370 @rtype: C{Clue} 371 """ 372 clue = Halberd.clues.Clue.Clue() 373 clue.setTimestamp(timestamp) 374 clue.parse(headers) 375 376 return clue
377 378
379 -class Manager(BaseScanner):
380 """Performs management tasks during the scan. 381 """ 382 # Indicates how often the state must be refreshed (in seconds). 383 refresh_interval = 0.25 384
385 - def process(self):
386 """Controls the whole scanning process. 387 388 This method checks when the timeout has expired and notifies the rest 389 of the scanning threads that they should stop. It also displays (in 390 case the user asked for it) detailed information regarding the process. 391 """ 392 self.showStats() 393 394 if self.hasExpired(): 395 self.state.shouldstop.set() 396 try: 397 time.sleep(self.refresh_interval) 398 except IOError: 399 # Catch interrupted system call exception (it happens when 400 # CONTROL-C is pressed on win32 systems). 401 self.state.shouldstop.set()
402
403 - def showStats(self):
404 """Displays certain statistics while the scan is happening. 405 """ 406 if not self.task.verbose: 407 return 408 409 def statbar(elapsed, total): 410 """Compose a status bar string showing progress. 411 """ 412 done = int(math.floor(float(total - elapsed)/total * 10)) 413 notdone = int(math.ceil(float(elapsed)/total * 10)) 414 return '[' + '#' * done + ' ' * notdone + ']'
415 416 nclues, replies, missed = self.state.getStats() 417 418 # We put a lower bound on the remaining time. 419 if self.remaining() < 0: 420 remaining = 0 421 else: 422 remaining = self.remaining() 423 424 statusline = '\r' + self.task.addr.ljust(15) + \ 425 ' %s clues: %3d | replies: %3d | missed: %3d' \ 426 % (statbar(remaining, self.task.scantime), 427 nclues, replies, missed) 428 sys.stdout.write(statusline) 429 sys.stdout.flush()
430 431 432 # vim: ts=4 sw=4 et 433

halberd-0.2.4/doc/api/toc-everything.html0000644000175000017500000002317011431512607016742 0ustar jmbrjmbr Everything

Everything


All Classes

Halberd.ScanTask.ConfError
Halberd.ScanTask.ScanTask
Halberd.clientlib.ConnectionRefused
Halberd.clientlib.HTTPClient
Halberd.clientlib.HTTPError
Halberd.clientlib.HTTPSClient
Halberd.clientlib.HTTPSError
Halberd.clientlib.InvalidURL
Halberd.clientlib.TimedOut
Halberd.clientlib.UnknownReply
Halberd.clues.Clue.Clue
Halberd.clues.file.ClueDir
Halberd.clues.file.InvalidFile
Halberd.conflib.ConfReader
Halberd.conflib.InvalidConfFile
Halberd.crew.WorkCrew
Halberd.shell.BaseStrategy
Halberd.shell.ClueReaderStrategy
Halberd.shell.MultiScanStrategy
Halberd.shell.ScanError
Halberd.shell.UniScanStrategy

All Functions

Halberd.clientlib.clientFactory
Halberd.clues.analysis.analyze
Halberd.clues.analysis.classify
Halberd.clues.analysis.clusters
Halberd.clues.analysis.deltas
Halberd.clues.analysis.diff_fields
Halberd.clues.analysis.filter_proxies
Halberd.clues.analysis.get_digest
Halberd.clues.analysis.hits
Halberd.clues.analysis.ignore_changing_fields
Halberd.clues.analysis.merge
Halberd.clues.analysis.reanalyze
Halberd.clues.analysis.sections
Halberd.clues.analysis.slices
Halberd.clues.analysis.sort_clues
Halberd.clues.analysis.uniq
Halberd.reportlib.report

All Variables

Halberd.ScanTask.__package__
Halberd.ScanTask.default_conf_dir
Halberd.ScanTask.default_conf_file
Halberd.ScanTask.default_out
Halberd.ScanTask.default_parallelism
Halberd.ScanTask.default_ratio_threshold
Halberd.ScanTask.default_scantime
Halberd.clientlib.__package__
Halberd.clientlib.default_bufsize
Halberd.clientlib.default_template
Halberd.clientlib.default_timeout
Halberd.clues.Clue.__package__
Halberd.clues.analysis.__package__
Halberd.clues.analysis.logger
Halberd.clues.file.__package__
Halberd.conflib.__package__
Halberd.conflib.default_conf
Halberd.conflib.default_proxy_port
Halberd.logger.__package__
Halberd.reportlib.__package__
Halberd.shell.__package__
Halberd.util.__package__
Halberd.util.table

[hide private] halberd-0.2.4/doc/api/toc-Halberd.clues.Clue-module.html0000644000175000017500000000230011431512607021373 0ustar jmbrjmbr Clue

Module Clue


Classes

Clue

Variables

__package__

[hide private] halberd-0.2.4/doc/api/Halberd.conflib.ConfReader-class.html0000644000175000017500000002777111431512607022074 0ustar jmbrjmbr Halberd.conflib.ConfReader
Package Halberd :: Module conflib :: Class ConfReader
[hide private]
[frames] | no frames]

Class ConfReader

source code

Takes care of turning configuration files into meaningful information.

Instance Methods [hide private]
 
__init__(self) source code
 
open(self, fname)
Opens the configuration file.
source code
 
close(self)
Release the configuration file's descriptor.
source code
 
_getAddr(self, sectname, default_port)
Read a network address from the given section.
source code
 
parse(self)
Parses the configuration file.
source code
 
writeDefault(self, conf_file)
Write a bare-bones configuration file
source code
 
__del__(self) source code
Method Details [hide private]

open(self, fname)

source code 

Opens the configuration file.

Parameters:
  • fname (str) - Pathname to the configuration file.
Raises:

writeDefault(self, conf_file)

source code 

Write a bare-bones configuration file

Parameters:
  • conf_file (str) - Target file where the default conf. will be written.

halberd-0.2.4/doc/api/Halberd.ScanTask-pysrc.html0000644000175000017500000011705011431512607020201 0ustar jmbrjmbr Halberd.ScanTask
Package Halberd :: Module ScanTask
[hide private]
[frames] | no frames]

Source Code for Module Halberd.ScanTask

  1  # -*- coding: iso-8859-1 -*- 
  2   
  3  """Scanning tasks. 
  4   
  5  @var default_scantime: Time to spend probing the target expressed in seconds. 
  6  @type default_scantime: C{int} 
  7   
  8  @var default_parallelism: Number of parallel threads to launch for the scan. 
  9  @type default_parallelism: C{int} 
 10   
 11  @var default_conf_dir: Path to the directory where the configuration file is 
 12  located. 
 13  @type default_conf_dir: C{str} 
 14   
 15  @var default_conf_file: Name of the default configuration file for halberd. 
 16  @type default_conf_file: C{str} 
 17   
 18  @var default_ratio_threshold: Minimum clues-to-realservers ratio to trigger a 
 19  clue reanalysis. 
 20  @type default_ratio_threshold: C{float} 
 21   
 22  @var default_out: Default place where to write reports (None means stdout). 
 23  @type default_out: C{str} 
 24  """ 
 25   
 26  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
 27  # 
 28  # This program is free software; you can redistribute it and/or modify 
 29  # it under the terms of the GNU General Public License as published by 
 30  # the Free Software Foundation; either version 2 of the License, or 
 31  # (at your option) any later version. 
 32  # 
 33  # This program is distributed in the hope that it will be useful, 
 34  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 35  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 36  # GNU General Public License for more details. 
 37  # 
 38  # You should have received a copy of the GNU General Public License 
 39  # along with this program; if not, write to the Free Software 
 40  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 41   
 42   
 43  import os 
 44   
 45  import Halberd.conflib 
 46   
 47   
 48  default_scantime = 15 
 49   
 50  default_parallelism = 4 
 51   
 52  default_conf_dir = os.path.join(os.path.expanduser('~'), '.halberd') 
 53  default_conf_file = os.path.join(default_conf_dir, 
 54                                   'halberd' + os.extsep + 'cfg') 
 55   
 56  default_ratio_threshold = 0.6 
 57   
 58  default_out = None 
 59   
 60   
61 -class ConfError(Exception):
62 """Error with configuration file(s) 63 """
64 - def __init__(self, msg):
65 self.msg = msg
66
67 - def __str__(self):
68 return str(self.msg)
69 70
71 -class ScanTask:
72 """Describes the way a scan should be performed. 73 74 @ivar verbose: Display status information during the scan. 75 @type verbose: C{bool} 76 77 @ivar debug: Display debug information. 78 @type debug: C{bool} 79 80 @ivar urlfile: Root folder to use for storing results of MultiScans. 81 @type urlfile: C{str} 82 83 @ivar url: URL to scan. 84 @type url: C{str} 85 86 @ivar addr: Address of the target web server. 87 @type addr: C{str} 88 89 @ivar proxy_serv_addr: Address + port where to listen when operating as a 90 proxy. 91 @type proxy_serv_addr: C{tuple} 92 93 @ivar out: File where to write reports. If it's not set, stdout will be 94 used. 95 @type out: C{str} 96 97 @ivar save: File or directory name where the results will be written. 98 @type save: C{str} 99 100 @ivar keyfile: Key file for SSL connections. 101 @type keyfile: C{str} 102 103 @ivar certfile: Certificate to be used for SSL connections. 104 @type certfile: C{str} 105 106 @ivar clues: Sequence of clues obtained from the target. 107 @type clues: C{list} 108 109 @ivar analyzed: Sequence of clues after the analysis phase. 110 @type analyzed: C{list} 111 """
112 - def __init__(self):
113 self.scantime = default_scantime 114 self.parallelism = default_parallelism 115 self.conf_file = default_conf_file 116 self.verbose = False 117 self.debug = False 118 119 self.ratio_threshold = default_ratio_threshold 120 121 self.urlfile = '' 122 self.url = '' 123 self.addr = '' 124 125 self.proxy_serv_addr = () 126 127 self.save = '' 128 129 self.out = default_out 130 131 self.keyfile = None 132 self.certfile = None 133 134 self.clues = [] 135 self.analyzed = []
136 137
138 - def readConf(self):
139 """Read configuration file. 140 141 This method tries to read the specified configuration file. If we try 142 to read it at the default path and it's not there we create a 143 bare-bones file and use that one. 144 145 @raise ConfError: If there's some problem creating or reading the 146 configuration file. 147 """ 148 # xxx - Move this into Halberd.conflib as a higher level function. 149 150 reader = Halberd.conflib.ConfReader() 151 152 try: 153 reader.open(self.conf_file) 154 except IOError: 155 if self.conf_file == default_conf_file: 156 try: 157 os.mkdir(default_conf_dir) 158 reader.writeDefault(default_conf_file) 159 reader.open(default_conf_file) 160 except (OSError, IOError): 161 raise ConfError, 'unable to create a default conf. file' 162 else: 163 raise ConfError, 'unable to open configuration file %s\n' 164 except conflib.InvalidConfFile: 165 raise ConfError, 'invalid configuration file %s\n' % self.conf_file 166 167 confvals = reader.parse() 168 self.proxy_serv_addr = confvals[0] 169 self.keyfile, self.certfile = confvals[1:] 170 171 reader.close()
172 173 174 # vim: ts=4 sw=4 et 175

halberd-0.2.4/doc/api/Halberd.ScanTask.ConfError-class.html0000644000175000017500000002447311431512607022052 0ustar jmbrjmbr Halberd.ScanTask.ConfError
Package Halberd :: Module ScanTask :: Class ConfError
[hide private]
[frames] | no frames]

Class ConfError

source code

              object --+        
                       |        
exceptions.BaseException --+    
                           |    
        exceptions.Exception --+
                               |
                              ConfError

Error with configuration file(s)

Instance Methods [hide private]
 
__init__(self, msg)
x.__init__(...) initializes x; see x.__class__.__doc__ for signature
source code
 
__str__(self)
str(x)
source code

Inherited from exceptions.Exception: __new__

Inherited from exceptions.BaseException: __delattr__, __getattribute__, __getitem__, __getslice__, __reduce__, __repr__, __setattr__, __setstate__, __unicode__

Inherited from object: __format__, __hash__, __reduce_ex__, __sizeof__, __subclasshook__

Properties [hide private]

Inherited from exceptions.BaseException: args, message

Inherited from object: __class__

Method Details [hide private]

__init__(self, msg)
(Constructor)

source code 

x.__init__(...) initializes x; see x.__class__.__doc__ for signature

Overrides: object.__init__
(inherited documentation)

__str__(self)
(Informal representation operator)

source code 

str(x)

Overrides: object.__str__
(inherited documentation)

halberd-0.2.4/doc/api/identifier-index.html0000644000175000017500000013766011431512607017234 0ustar jmbrjmbr Identifier Index
 
[hide private]
[frames] | no frames]

Identifier Index

[ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z _ ]

A

B

C

D

E

F

G

H

I

L

M

N

O

P

R

S

T

U

W

_



halberd-0.2.4/doc/api/Halberd.clues.Clue-pysrc.html0000644000175000017500000016506211431512610020474 0ustar jmbrjmbr Halberd.clues.Clue
Package Halberd :: Package clues :: Module Clue
[hide private]
[frames] | no frames]

Source Code for Module Halberd.clues.Clue

  1  # -*- coding: iso-8859-1 -*- 
  2   
  3  """Clue generation module. 
  4   
  5  Clues are pieces of information obtained from the responses sent by a 
  6  webserver. 
  7  Their importance comes from the fact that they're the datastructure we use to 
  8  detect real servers behind HTTP load balancer devices. 
  9  """ 
 10   
 11  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
 12  # 
 13  # This program is free software; you can redistribute it and/or modify 
 14  # it under the terms of the GNU General Public License as published by 
 15  # the Free Software Foundation; either version 2 of the License, or 
 16  # (at your option) any later version. 
 17  # 
 18  # This program is distributed in the hope that it will be useful, 
 19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 21  # GNU General Public License for more details. 
 22  # 
 23  # You should have received a copy of the GNU General Public License 
 24  # along with this program; if not, write to the Free Software 
 25  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 26   
 27   
 28  import time 
 29  import types 
 30  import rfc822 
 31  import hashlib 
 32   
 33  import Halberd.util 
 34   
 35   
36 -class Clue:
37 """A clue is what we use to tell real servers behind a virtual IP. 38 39 Clues are gathered during several connections to a web server and they 40 allow us to try to identify patterns in its responses. Those patterns could 41 allow us to find out which real servers are behind a VIP 42 """
43 - def __init__(self):
44 # Number of times this clue has been found. 45 self._count = 1 46 47 # Generic server info (sometimes useful for distinguising servers). 48 self.info = { 49 'server': '', 50 'contloc': '', 51 'cookies': [], 52 'date': '', 53 'digest': '' 54 } 55 56 # Local time and remote time (in seconds since the Epoch) 57 self._local, self._remote = 0, 0 58 59 self.diff = None 60 61 # We store the headers we're interested in digesting in a string and 62 # calculate its hash _after_ the header processing takes place. This 63 # way we incur in less computational overhead. 64 self.__tmphdrs = '' 65 66 # Original MIME headers. They're useful during analysis and reporting. 67 self.headers = None
68 69
70 - def parse(self, headers):
71 """Extracts all relevant information from the MIME headers replied by 72 the target. 73 74 @param headers: A set of MIME headers (a string as replied by the 75 webserver or a previously parsed sequence of name, value tuples). 76 @type headers: C{str}, C{list} or C{tuple} 77 78 @raise TypeError: If headers is neither a string nor a sequence. 79 """ 80 if isinstance(headers, basestring): 81 # We parse the server's response into a sequence of name, value 82 # tuples instead of a dictionary because with this approach we keep 83 # the header's order as sent by the target, This is a relevant 84 # piece of information we can't afford to miss. 85 self.headers = [tuple(line.split(':', 1)) \ 86 for line in headers.splitlines() if line != ''] 87 elif isinstance(headers, types.ListType): 88 self.headers = headers 89 else: 90 raise TypeError, 'Unable to parse headers of type %s' \ 91 % type(headers).__name__ 92 93 # We examine each MIME field and try to find an appropriate handler. If 94 # there is none we simply digest the info it provides. 95 self.__tmphdrs = '' 96 for name, value in self.headers: 97 try: 98 handlerfn = getattr(self, '_get_' + Clue.normalize(name)) 99 handlerfn(value) 100 except AttributeError: 101 self.__tmphdrs += '%s: %s ' % (name, value) 102 103 self._updateDigest() 104 self._calcDiff()
105
106 - def normalize(name):
107 """Normalize string. 108 109 This method takes a string coming out of mime-fields and transforms it 110 into a valid Python identifier. That's done by removing invalid 111 non-alphanumeric characters and also numeric ones placed at the 112 beginning of the string. 113 114 @param name: String to be normalized. 115 @type name: C{str} 116 117 @return: Normalized string. 118 @rtype: C{str} 119 """ 120 normal = name.translate(Halberd.util.table).lower() 121 while normal[0].isdigit(): 122 normal = normal[1:] 123 return ''.join(normal)
124 125 normalize = staticmethod(normalize) 126
127 - def _updateDigest(self):
128 """Updates header fingerprint. 129 """ 130 assert self.__tmphdrs != None 131 fingerprint = hashlib.sha1(self.__tmphdrs) 132 self.__tmphdrs = None 133 self.info['digest'] = fingerprint.hexdigest()
134
135 - def _calcDiff(self):
136 """Compute the time difference between the remote and local clocks. 137 138 @return: Time difference. 139 @rtype: C{int} 140 """ 141 self.diff = int(self._local - self._remote)
142 143
144 - def incCount(self, num=1):
145 """Increase the times this clue has been found. 146 147 @param num: A positive non-zero number of hits to increase. 148 @type num: C{int} 149 150 @raise ValueError: in case L{num} is less than or equal to zero. 151 """ 152 if num <= 0: 153 raise ValueError 154 self._count += num
155
156 - def getCount(self):
157 """Retrieve the number of times the clue has been found 158 159 @return: Number of hits. 160 @rtype: C{int}. 161 """ 162 return self._count
163 164
165 - def setTimestamp(self, timestamp):
166 """Sets the local clock attribute. 167 168 @param timestamp: The local time (expressed in seconds since the Epoch) 169 when the connection to the target was successfully completed. 170 @type timestamp: C{int} 171 """ 172 self._local = timestamp
173 174
175 - def __eq__(self, other):
176 if self.diff != other.diff: 177 return False 178 179 if self.info['digest'] != other.info['digest']: 180 return False 181 182 return True
183
184 - def __ne__(self, other):
185 return not self == other
186
187 - def __repr__(self):
188 if not (self.diff or self.info['digest']): 189 return "<Clue at %x>" % id(self) 190 return "<Clue at %x diff=%d found=%d digest='%s'>" \ 191 % (id(self), self.diff, self._count, 192 self.info['digest'][:4] + '...')
193 194 # ================================================================== 195 # The following methods extract relevant data from the MIME headers. 196 # ================================================================== 197
198 - def _get_server(self, field):
199 """Server:""" 200 self.info['server'] = field 201 self.__tmphdrs += field # Make sure this gets hashed too.
202
203 - def _get_date(self, field):
204 """Date:""" 205 self.info['date'] = field 206 self._remote = time.mktime(rfc822.parsedate(field))
207
208 - def _get_content_location(self, field):
209 """Content-location:""" 210 self.info['contloc'] = field 211 self.__tmphdrs += field
212 216 217 # ==================================================== 218 # Ignored headers (they don't contribute to the hash). 219 # ==================================================== 220
221 - def _get_expires(self, field):
222 """Expires:""" 223 pass
224
225 - def _get_age(self, field):
226 """Age:""" 227 pass
228
229 - def _get_content_length(self, field):
230 """Content-length:""" 231 pass
232
233 - def _get_last_modified(self, field):
234 """Last-modified:""" 235 pass
236
237 - def _get_etag(self, field):
238 """ETag:""" 239 pass
240
241 - def _get_cache_expires(self, field):
242 """Cache-expires:""" 243 pass
244
245 - def _get_content_type(self, field):
246 """Content-type:""" 247 pass
248 249 250 # vim: ts=4 sw=4 et 251

halberd-0.2.4/doc/api/toc-Halberd.clientlib-module.html0000644000175000017500000000455611431512607021355 0ustar jmbrjmbr clientlib

Module clientlib


Classes

ConnectionRefused
HTTPClient
HTTPError
HTTPSClient
HTTPSError
InvalidURL
TimedOut
UnknownReply

Functions

clientFactory

Variables

__package__
default_bufsize
default_template
default_timeout

[hide private] halberd-0.2.4/doc/api/Halberd.clientlib.HTTPClient-class.html0000644000175000017500000006410711431512607022325 0ustar jmbrjmbr Halberd.clientlib.HTTPClient
Package Halberd :: Module clientlib :: Class HTTPClient
[hide private]
[frames] | no frames]

Class HTTPClient

source code

Known Subclasses:

Special-purpose HTTP client.

Instance Methods [hide private]
 
__init__(self)
Initializes the object.
source code
tuple
getHeaders(self, address, urlstr)
Talk to the target webserver and fetch MIME headers.
source code
 
_putRequest(self, address, urlstr)
Sends an HTTP request to the target webserver.
source code
tuple
_getHostAndPort(self, netloc)
Determine the hostname and port to connect to from an URL
source code
str
_fillTemplate(self, hostname, port, url, params='', query='', fragment='')
Fills the request template with relevant information.
source code
 
_connect(self, addr)
Connect to the target address.
source code
 
_sendAll(self, data)
Sends a string to the socket.
source code
tuple
_getReply(self)
Read a reply from the server.
source code
 
__del__(self) source code
Instance Variables [hide private]
float timeout = 2
Timeout for socket operations (expressed in seconds).
int bufsize = 1024
Buffer size for network I/O.
str template = 'GET %(request)s HTTP/1.1\r\nHost: %(hostname)s%(po...
Template of the HTTP request to be sent to the target.
callable _recv
Reference to a callable responsible from reading data from the network.
Method Details [hide private]

getHeaders(self, address, urlstr)

source code 

Talk to the target webserver and fetch MIME headers.

Parameters:
  • address (tuple) - The target's network address.
  • urlstr (str) - URL to use.
Returns: tuple
The time when the client started reading the server's response and the MIME headers that were sent.

_putRequest(self, address, urlstr)

source code 

Sends an HTTP request to the target webserver.

This method connects to the target server, sends the HTTP request and records a timestamp.

Parameters:
  • address (str) - Target address.
  • urlstr (str) - A valid Unified Resource Locator.
Raises:
  • InvalidURL - In case the URL scheme is not HTTP or HTTPS
  • ConnectionRefused - If it can't reach the target webserver.
  • TimedOut - If we cannot send the data within the specified time.

_getHostAndPort(self, netloc)

source code 

Determine the hostname and port to connect to from an URL

Parameters:
  • netloc (str) - Relevant part of the parsed URL.
Returns: tuple
Hostname (str) and port (int)

_fillTemplate(self, hostname, port, url, params='', query='', fragment='')

source code 

Fills the request template with relevant information.

Parameters:
  • hostname (str) - Target host to reach.
  • port (int) - Remote port.
  • url (str) - URL to use as source.
Returns: str
A request ready to be sent

_connect(self, addr)

source code 

Connect to the target address.

Parameters:
  • addr (tuple) - The target's address.
Raises:

_getReply(self)

source code 

Read a reply from the server.

Returns: tuple
Time when the data started arriving plus the received data.
Raises:
  • UnknownReply - If the remote server doesn't return a valid HTTP reply.
  • TimedOut - In case reading from the network takes too much time.

Instance Variable Details [hide private]

timeout

Timeout for socket operations (expressed in seconds). WARNING: changing this value is strongly discouraged.
Type:
float
Value:
2

template

Template of the HTTP request to be sent to the target.
Type:
str
Value:
'''GET %(request)s HTTP/1.1\r
Host: %(hostname)s%(port)s\r
Pragma: no-cache\r
Cache-control: no-cache\r
User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.7) Gecko/20\
050414 Firefox/1.0.3\r
Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, applicati\
on/x-shockwave-flash, */*\r
...

halberd-0.2.4/doc/api/epydoc.css0000644000175000017500000003722711431512607015112 0ustar jmbrjmbr /* Epydoc CSS Stylesheet * * This stylesheet can be used to customize the appearance of epydoc's * HTML output. * */ /* Default Colors & Styles * - Set the default foreground & background color with 'body'; and * link colors with 'a:link' and 'a:visited'. * - Use bold for decision list terms. * - The heading styles defined here are used for headings *within* * docstring descriptions. All headings used by epydoc itself use * either class='epydoc' or class='toc' (CSS styles for both * defined below). */ body { background: #ffffff; color: #000000; } p { margin-top: 0.5em; margin-bottom: 0.5em; } a:link { color: #0000ff; } a:visited { color: #204080; } dt { font-weight: bold; } h1 { font-size: +140%; font-style: italic; font-weight: bold; } h2 { font-size: +125%; font-style: italic; font-weight: bold; } h3 { font-size: +110%; font-style: italic; font-weight: normal; } code { font-size: 100%; } /* N.B.: class, not pseudoclass */ a.link { font-family: monospace; } /* Page Header & Footer * - The standard page header consists of a navigation bar (with * pointers to standard pages such as 'home' and 'trees'); a * breadcrumbs list, which can be used to navigate to containing * classes or modules; options links, to show/hide private * variables and to show/hide frames; and a page title (using *

). The page title may be followed by a link to the * corresponding source code (using 'span.codelink'). * - The footer consists of a navigation bar, a timestamp, and a * pointer to epydoc's homepage. */ h1.epydoc { margin: 0; font-size: +140%; font-weight: bold; } h2.epydoc { font-size: +130%; font-weight: bold; } h3.epydoc { font-size: +115%; font-weight: bold; margin-top: 0.2em; } td h3.epydoc { font-size: +115%; font-weight: bold; margin-bottom: 0; } table.navbar { background: #a0c0ff; color: #000000; border: 2px groove #c0d0d0; } table.navbar table { color: #000000; } th.navbar-select { background: #70b0ff; color: #000000; } table.navbar a { text-decoration: none; } table.navbar a:link { color: #0000ff; } table.navbar a:visited { color: #204080; } span.breadcrumbs { font-size: 85%; font-weight: bold; } span.options { font-size: 70%; } span.codelink { font-size: 85%; } td.footer { font-size: 85%; } /* Table Headers * - Each summary table and details section begins with a 'header' * row. This row contains a section title (marked by * 'span.table-header') as well as a show/hide private link * (marked by 'span.options', defined above). * - Summary tables that contain user-defined groups mark those * groups using 'group header' rows. */ td.table-header { background: #70b0ff; color: #000000; border: 1px solid #608090; } td.table-header table { color: #000000; } td.table-header table a:link { color: #0000ff; } td.table-header table a:visited { color: #204080; } span.table-header { font-size: 120%; font-weight: bold; } th.group-header { background: #c0e0f8; color: #000000; text-align: left; font-style: italic; font-size: 115%; border: 1px solid #608090; } /* Summary Tables (functions, variables, etc) * - Each object is described by a single row of the table with * two cells. The left cell gives the object's type, and is * marked with 'code.summary-type'. The right cell gives the * object's name and a summary description. * - CSS styles for the table's header and group headers are * defined above, under 'Table Headers' */ table.summary { border-collapse: collapse; background: #e8f0f8; color: #000000; border: 1px solid #608090; margin-bottom: 0.5em; } td.summary { border: 1px solid #608090; } code.summary-type { font-size: 85%; } table.summary a:link { color: #0000ff; } table.summary a:visited { color: #204080; } /* Details Tables (functions, variables, etc) * - Each object is described in its own div. * - A single-row summary table w/ table-header is used as * a header for each details section (CSS style for table-header * is defined above, under 'Table Headers'). */ table.details { border-collapse: collapse; background: #e8f0f8; color: #000000; border: 1px solid #608090; margin: .2em 0 0 0; } table.details table { color: #000000; } table.details a:link { color: #0000ff; } table.details a:visited { color: #204080; } /* Fields */ dl.fields { margin-left: 2em; margin-top: 1em; margin-bottom: 1em; } dl.fields dd ul { margin-left: 0em; padding-left: 0em; } dl.fields dd ul li ul { margin-left: 2em; padding-left: 0em; } div.fields { margin-left: 2em; } div.fields p { margin-bottom: 0.5em; } /* Index tables (identifier index, term index, etc) * - link-index is used for indices containing lists of links * (namely, the identifier index & term index). * - index-where is used in link indices for the text indicating * the container/source for each link. * - metadata-index is used for indices containing metadata * extracted from fields (namely, the bug index & todo index). */ table.link-index { border-collapse: collapse; background: #e8f0f8; color: #000000; border: 1px solid #608090; } td.link-index { border-width: 0px; } table.link-index a:link { color: #0000ff; } table.link-index a:visited { color: #204080; } span.index-where { font-size: 70%; } table.metadata-index { border-collapse: collapse; background: #e8f0f8; color: #000000; border: 1px solid #608090; margin: .2em 0 0 0; } td.metadata-index { border-width: 1px; border-style: solid; } table.metadata-index a:link { color: #0000ff; } table.metadata-index a:visited { color: #204080; } /* Function signatures * - sig* is used for the signature in the details section. * - .summary-sig* is used for the signature in the summary * table, and when listing property accessor functions. * */ .sig-name { color: #006080; } .sig-arg { color: #008060; } .sig-default { color: #602000; } .summary-sig { font-family: monospace; } .summary-sig-name { color: #006080; font-weight: bold; } table.summary a.summary-sig-name:link { color: #006080; font-weight: bold; } table.summary a.summary-sig-name:visited { color: #006080; font-weight: bold; } .summary-sig-arg { color: #006040; } .summary-sig-default { color: #501800; } /* Subclass list */ ul.subclass-list { display: inline; } ul.subclass-list li { display: inline; } /* To render variables, classes etc. like functions */ table.summary .summary-name { color: #006080; font-weight: bold; font-family: monospace; } table.summary a.summary-name:link { color: #006080; font-weight: bold; font-family: monospace; } table.summary a.summary-name:visited { color: #006080; font-weight: bold; font-family: monospace; } /* Variable values * - In the 'variable details' sections, each varaible's value is * listed in a 'pre.variable' box. The width of this box is * restricted to 80 chars; if the value's repr is longer than * this it will be wrapped, using a backslash marked with * class 'variable-linewrap'. If the value's repr is longer * than 3 lines, the rest will be ellided; and an ellipsis * marker ('...' marked with 'variable-ellipsis') will be used. * - If the value is a string, its quote marks will be marked * with 'variable-quote'. * - If the variable is a regexp, it is syntax-highlighted using * the re* CSS classes. */ pre.variable { padding: .5em; margin: 0; background: #dce4ec; color: #000000; border: 1px solid #708890; } .variable-linewrap { color: #604000; font-weight: bold; } .variable-ellipsis { color: #604000; font-weight: bold; } .variable-quote { color: #604000; font-weight: bold; } .variable-group { color: #008000; font-weight: bold; } .variable-op { color: #604000; font-weight: bold; } .variable-string { color: #006030; } .variable-unknown { color: #a00000; font-weight: bold; } .re { color: #000000; } .re-char { color: #006030; } .re-op { color: #600000; } .re-group { color: #003060; } .re-ref { color: #404040; } /* Base tree * - Used by class pages to display the base class hierarchy. */ pre.base-tree { font-size: 80%; margin: 0; } /* Frames-based table of contents headers * - Consists of two frames: one for selecting modules; and * the other listing the contents of the selected module. * - h1.toc is used for each frame's heading * - h2.toc is used for subheadings within each frame. */ h1.toc { text-align: center; font-size: 105%; margin: 0; font-weight: bold; padding: 0; } h2.toc { font-size: 100%; font-weight: bold; margin: 0.5em 0 0 -0.3em; } /* Syntax Highlighting for Source Code * - doctest examples are displayed in a 'pre.py-doctest' block. * If the example is in a details table entry, then it will use * the colors specified by the 'table pre.py-doctest' line. * - Source code listings are displayed in a 'pre.py-src' block. * Each line is marked with 'span.py-line' (used to draw a line * down the left margin, separating the code from the line * numbers). Line numbers are displayed with 'span.py-lineno'. * The expand/collapse block toggle button is displayed with * 'a.py-toggle' (Note: the CSS style for 'a.py-toggle' should not * modify the font size of the text.) * - If a source code page is opened with an anchor, then the * corresponding code block will be highlighted. The code * block's header is highlighted with 'py-highlight-hdr'; and * the code block's body is highlighted with 'py-highlight'. * - The remaining py-* classes are used to perform syntax * highlighting (py-string for string literals, py-name for names, * etc.) */ pre.py-doctest { padding: .5em; margin: 1em; background: #e8f0f8; color: #000000; border: 1px solid #708890; } table pre.py-doctest { background: #dce4ec; color: #000000; } pre.py-src { border: 2px solid #000000; background: #f0f0f0; color: #000000; } .py-line { border-left: 2px solid #000000; margin-left: .2em; padding-left: .4em; } .py-lineno { font-style: italic; font-size: 90%; padding-left: .5em; } a.py-toggle { text-decoration: none; } div.py-highlight-hdr { border-top: 2px solid #000000; border-bottom: 2px solid #000000; background: #d8e8e8; } div.py-highlight { border-bottom: 2px solid #000000; background: #d0e0e0; } .py-prompt { color: #005050; font-weight: bold;} .py-more { color: #005050; font-weight: bold;} .py-string { color: #006030; } .py-comment { color: #003060; } .py-keyword { color: #600000; } .py-output { color: #404040; } .py-name { color: #000050; } .py-name:link { color: #000050 !important; } .py-name:visited { color: #000050 !important; } .py-number { color: #005000; } .py-defname { color: #000060; font-weight: bold; } .py-def-name { color: #000060; font-weight: bold; } .py-base-class { color: #000060; } .py-param { color: #000060; } .py-docstring { color: #006030; } .py-decorator { color: #804020; } /* Use this if you don't want links to names underlined: */ /*a.py-name { text-decoration: none; }*/ /* Graphs & Diagrams * - These CSS styles are used for graphs & diagrams generated using * Graphviz dot. 'img.graph-without-title' is used for bare * diagrams (to remove the border created by making the image * clickable). */ img.graph-without-title { border: none; } img.graph-with-title { border: 1px solid #000000; } span.graph-title { font-weight: bold; } span.graph-caption { } /* General-purpose classes * - 'p.indent-wrapped-lines' defines a paragraph whose first line * is not indented, but whose subsequent lines are. * - The 'nomargin-top' class is used to remove the top margin (e.g. * from lists). The 'nomargin' class is used to remove both the * top and bottom margin (but not the left or right margin -- * for lists, that would cause the bullets to disappear.) */ p.indent-wrapped-lines { padding: 0 0 0 7em; text-indent: -7em; margin: 0; } .nomargin-top { margin-top: 0; } .nomargin { margin-top: 0; margin-bottom: 0; } /* HTML Log */ div.log-block { padding: 0; margin: .5em 0 .5em 0; background: #e8f0f8; color: #000000; border: 1px solid #000000; } div.log-error { padding: .1em .3em .1em .3em; margin: 4px; background: #ffb0b0; color: #000000; border: 1px solid #000000; } div.log-warning { padding: .1em .3em .1em .3em; margin: 4px; background: #ffffb0; color: #000000; border: 1px solid #000000; } div.log-info { padding: .1em .3em .1em .3em; margin: 4px; background: #b0ffb0; color: #000000; border: 1px solid #000000; } h2.log-hdr { background: #70b0ff; color: #000000; margin: 0; padding: 0em 0.5em 0em 0.5em; border-bottom: 1px solid #000000; font-size: 110%; } p.log { font-weight: bold; margin: .5em 0 .5em 0; } tr.opt-changed { color: #000000; font-weight: bold; } tr.opt-default { color: #606060; } pre.log { margin: 0; padding: 0; padding-left: 1em; } halberd-0.2.4/doc/api/epydoc.js0000644000175000017500000002452511431512607014733 0ustar jmbrjmbrfunction toggle_private() { // Search for any private/public links on this page. Store // their old text in "cmd," so we will know what action to // take; and change their text to the opposite action. var cmd = "?"; var elts = document.getElementsByTagName("a"); for(var i=0; i...
"; elt.innerHTML = s; } } function toggle(id) { elt = document.getElementById(id+"-toggle"); if (elt.innerHTML == "-") collapse(id); else expand(id); return false; } function highlight(id) { var elt = document.getElementById(id+"-def"); if (elt) elt.className = "py-highlight-hdr"; var elt = document.getElementById(id+"-expanded"); if (elt) elt.className = "py-highlight"; var elt = document.getElementById(id+"-collapsed"); if (elt) elt.className = "py-highlight"; } function num_lines(s) { var n = 1; var pos = s.indexOf("\n"); while ( pos > 0) { n += 1; pos = s.indexOf("\n", pos+1); } return n; } // Collapse all blocks that mave more than `min_lines` lines. function collapse_all(min_lines) { var elts = document.getElementsByTagName("div"); for (var i=0; i 0) if (elt.id.substring(split, elt.id.length) == "-expanded") if (num_lines(elt.innerHTML) > min_lines) collapse(elt.id.substring(0, split)); } } function expandto(href) { var start = href.indexOf("#")+1; if (start != 0 && start != href.length) { if (href.substring(start, href.length) != "-") { collapse_all(4); pos = href.indexOf(".", start); while (pos != -1) { var id = href.substring(start, pos); expand(id); pos = href.indexOf(".", pos+1); } var id = href.substring(start, href.length); expand(id); highlight(id); } } } function kill_doclink(id) { var parent = document.getElementById(id); parent.removeChild(parent.childNodes.item(0)); } function auto_kill_doclink(ev) { if (!ev) var ev = window.event; if (!this.contains(ev.toElement)) { var parent = document.getElementById(this.parentID); parent.removeChild(parent.childNodes.item(0)); } } function doclink(id, name, targets_id) { var elt = document.getElementById(id); // If we already opened the box, then destroy it. // (This case should never occur, but leave it in just in case.) if (elt.childNodes.length > 1) { elt.removeChild(elt.childNodes.item(0)); } else { // The outer box: relative + inline positioning. var box1 = document.createElement("div"); box1.style.position = "relative"; box1.style.display = "inline"; box1.style.top = 0; box1.style.left = 0; // A shadow for fun var shadow = document.createElement("div"); shadow.style.position = "absolute"; shadow.style.left = "-1.3em"; shadow.style.top = "-1.3em"; shadow.style.background = "#404040"; // The inner box: absolute positioning. var box2 = document.createElement("div"); box2.style.position = "relative"; box2.style.border = "1px solid #a0a0a0"; box2.style.left = "-.2em"; box2.style.top = "-.2em"; box2.style.background = "white"; box2.style.padding = ".3em .4em .3em .4em"; box2.style.fontStyle = "normal"; box2.onmouseout=auto_kill_doclink; box2.parentID = id; // Get the targets var targets_elt = document.getElementById(targets_id); var targets = targets_elt.getAttribute("targets"); var links = ""; target_list = targets.split(","); for (var i=0; i" + target[0] + ""; } // Put it all together. elt.insertBefore(box1, elt.childNodes.item(0)); //box1.appendChild(box2); box1.appendChild(shadow); shadow.appendChild(box2); box2.innerHTML = "Which "+name+" do you want to see documentation for?" + ""; } return false; } function get_anchor() { var href = location.href; var start = href.indexOf("#")+1; if ((start != 0) && (start != href.length)) return href.substring(start, href.length); } function redirect_url(dottedName) { // Scan through each element of the "pages" list, and check // if "name" matches with any of them. for (var i=0; i-m" or "-c"; // extract the portion & compare it to dottedName. var pagename = pages[i].substring(0, pages[i].length-2); if (pagename == dottedName.substring(0,pagename.length)) { // We've found a page that matches `dottedName`; // construct its URL, using leftover `dottedName` // content to form an anchor. var pagetype = pages[i].charAt(pages[i].length-1); var url = pagename + ((pagetype=="m")?"-module.html": "-class.html"); if (dottedName.length > pagename.length) url += "#" + dottedName.substring(pagename.length+1, dottedName.length); return url; } } } halberd-0.2.4/doc/api/Halberd.clientlib.ConnectionRefused-class.html0000644000175000017500000001620111431512607024014 0ustar jmbrjmbr Halberd.clientlib.ConnectionRefused
Package Halberd :: Module clientlib :: Class ConnectionRefused
[hide private]
[frames] | no frames]

Class ConnectionRefused

source code

              object --+            
                       |            
exceptions.BaseException --+        
                           |        
        exceptions.Exception --+    
                               |    
                       HTTPError --+
                                   |
                                  ConnectionRefused

Unable to reach webserver

Instance Methods [hide private]

Inherited from HTTPError: __deepcopy__, __init__, __str__

Inherited from exceptions.Exception: __new__

Inherited from exceptions.BaseException: __delattr__, __getattribute__, __getitem__, __getslice__, __reduce__, __repr__, __setattr__, __setstate__, __unicode__

Inherited from object: __format__, __hash__, __reduce_ex__, __sizeof__, __subclasshook__

Properties [hide private]

Inherited from exceptions.BaseException: args, message

Inherited from object: __class__

halberd-0.2.4/doc/api/Halberd.clientlib-pysrc.html0000644000175000017500000025344211431512607020445 0ustar jmbrjmbr Halberd.clientlib
Package Halberd :: Module clientlib
[hide private]
[frames] | no frames]

Source Code for Module Halberd.clientlib

  1  # -*- coding: iso-8859-1 -*- 
  2   
  3  """HTTP/HTTPS client module. 
  4   
  5  @var default_timeout: Default timeout for socket operations. 
  6  @type default_timeout: C{float} 
  7   
  8  @var default_bufsize: Default number of bytes to try to read from the network. 
  9  @type default_bufsize: C{int} 
 10   
 11  @var default_template: Request template, must be filled by L{HTTPClient} 
 12  @type default_template: C{str} 
 13  """ 
 14   
 15  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
 16  # 
 17  # This program is free software; you can redistribute it and/or modify 
 18  # it under the terms of the GNU General Public License as published by 
 19  # the Free Software Foundation; either version 2 of the License, or 
 20  # (at your option) any later version. 
 21  # 
 22  # This program is distributed in the hope that it will be useful, 
 23  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 24  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 25  # GNU General Public License for more details. 
 26  # 
 27  # You should have received a copy of the GNU General Public License 
 28  # along with this program; if not, write to the Free Software 
 29  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 30   
 31   
 32  import time 
 33  import socket 
 34  import urlparse 
 35   
 36  from itertools import takewhile 
 37   
 38  import Halberd.ScanTask 
 39   
 40   
 41  default_timeout = 2 
 42   
 43  default_bufsize = 1024 
 44   
 45  # WARNING - Changing the HTTP request method in the following template will 
 46  # require updating tests/test_clientlib.py accordingly. 
 47  default_template = """\ 
 48  GET %(request)s HTTP/1.1\r\n\ 
 49  Host: %(hostname)s%(port)s\r\n\ 
 50  Pragma: no-cache\r\n\ 
 51  Cache-control: no-cache\r\n\ 
 52  User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.7) Gecko/20050414 Firefox/1.0.3\r\n\ 
 53  Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg,\ 
 54   application/x-shockwave-flash, */*\r\n\ 
 55  Accept-Language: en-us,en;q=0.5\r\n\ 
 56  Accept-Encoding: gzip,deflate\r\n\ 
 57  Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n\ 
 58  Keep-Alive: 300\r\n\ 
 59  Connection: keep-alive\r\n\r\n\ 
 60  """ 
 61   
 62   
63 -class HTTPError(Exception):
64 """Generic HTTP exception""" 65
66 - def __init__(self, msg):
67 self.msg = msg
68
69 - def __str__(self):
70 return str(self.msg)
71
72 - def __deepcopy__(self, memo):
73 return self
74
75 -class HTTPSError(HTTPError):
76 """Generic HTTPS exception"""
77
78 -class InvalidURL(HTTPError):
79 """Invalid or unsupported URL"""
80
81 -class TimedOut(HTTPError):
82 """Operation timed out"""
83
84 -class ConnectionRefused(HTTPError):
85 """Unable to reach webserver"""
86
87 -class UnknownReply(HTTPError):
88 """The remote host didn't return an HTTP reply"""
89 90
91 -class HTTPClient:
92 """Special-purpose HTTP client. 93 94 @ivar timeout: Timeout for socket operations (expressed in seconds). 95 B{WARNING}: changing this value is strongly discouraged. 96 @type timeout: C{float} 97 98 @ivar bufsize: Buffer size for network I/O. 99 @type bufsize: C{int} 100 101 @ivar template: Template of the HTTP request to be sent to the target. 102 @type template: C{str} 103 104 @ivar _recv: Reference to a callable responsible from reading data from the 105 network. 106 @type _recv: C{callable} 107 """ 108 timeout = default_timeout 109 bufsize = default_bufsize 110 template = default_template 111
112 - def __init__(self):
113 """Initializes the object. 114 """ 115 self.schemes = ['http'] 116 self.default_port = 80 117 # _timeout_exceptions MUST be converted to a tuple before using it with 118 # except. 119 self._timeout_exceptions = [socket.timeout] 120 121 self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 122 self._sock.settimeout(self.timeout) 123 124 self._recv = self._sock.recv
125
126 - def getHeaders(self, address, urlstr):
127 """Talk to the target webserver and fetch MIME headers. 128 129 @param address: The target's network address. 130 @type address: C{tuple} 131 132 @param urlstr: URL to use. 133 @type urlstr: C{str} 134 135 @return: The time when the client started reading the server's response 136 and the MIME headers that were sent. 137 @rtype: C{tuple} 138 """ 139 self._putRequest(address, urlstr) 140 141 timestamp, headers = self._getReply() 142 if not headers: 143 return None 144 145 # Remove HTTP response and leave only the MIME headers. 146 headers = headers.splitlines()[1:] 147 headers = list(takewhile(lambda x: x != '', headers)) 148 headers.append('\r\n') 149 headers = '\r\n'.join(headers) 150 151 return timestamp, headers
152
153 - def _putRequest(self, address, urlstr):
154 """Sends an HTTP request to the target webserver. 155 156 This method connects to the target server, sends the HTTP request and 157 records a timestamp. 158 159 @param address: Target address. 160 @type address: C{str} 161 162 @param urlstr: A valid Unified Resource Locator. 163 @type urlstr: C{str} 164 165 @raise InvalidURL: In case the URL scheme is not HTTP or HTTPS 166 @raise ConnectionRefused: If it can't reach the target webserver. 167 @raise TimedOut: If we cannot send the data within the specified time. 168 """ 169 scheme, netloc, url, params, query, fragment = urlparse.urlparse(urlstr) 170 171 if scheme not in self.schemes: 172 raise InvalidURL, '%s is not a supported protocol' % scheme 173 174 hostname, port = self._getHostAndPort(netloc) 175 # NOTE: address and hostname may not be the same. The caller is 176 # responsible for checking that. 177 178 req = self._fillTemplate(hostname, port, url, params, query, fragment) 179 180 self._connect((address, port)) 181 182 self._sendAll(req)
183
184 - def _getHostAndPort(self, netloc):
185 """Determine the hostname and port to connect to from an URL 186 187 @param netloc: Relevant part of the parsed URL. 188 @type netloc: C{str} 189 190 @return: Hostname (C{str}) and port (C{int}) 191 @rtype: C{tuple} 192 """ 193 try: 194 hostname, portnum = netloc.split(':', 1) 195 except ValueError: 196 hostname, port = netloc, self.default_port 197 else: 198 if portnum.isdigit(): 199 port = int(portnum) 200 else: 201 raise InvalidURL, '%s is not a valid port number' % portnum 202 203 return hostname, port
204
205 - def _fillTemplate(self, hostname, port, url, params='', query='', fragment=''):
206 """Fills the request template with relevant information. 207 208 @param hostname: Target host to reach. 209 @type hostname: C{str} 210 211 @param port: Remote port. 212 @type port: C{int} 213 214 @param url: URL to use as source. 215 @type url: C{str} 216 217 @return: A request ready to be sent 218 @rtype: C{str} 219 """ 220 urlstr = url or '/' 221 if params: 222 urlstr += ';' + params 223 if query: 224 urlstr += '?' + query 225 if fragment: 226 urlstr += '#' + fragment 227 228 if port == self.default_port: 229 p = '' 230 else: 231 p = ':' + str(port) 232 233 values = {'request': urlstr, 'hostname': hostname, 'port': p} 234 235 return self.template % values
236
237 - def _connect(self, addr):
238 """Connect to the target address. 239 240 @param addr: The target's address. 241 @type addr: C{tuple} 242 243 @raise ConnectionRefused: If it can't reach the target webserver. 244 """ 245 try: 246 self._sock.connect(addr) 247 except socket.error: 248 raise ConnectionRefused, 'Connection refused'
249
250 - def _sendAll(self, data):
251 """Sends a string to the socket. 252 """ 253 try: 254 self._sock.sendall(data) 255 except socket.timeout: 256 raise TimedOut, 'timed out while writing to the network'
257
258 - def _getReply(self):
259 """Read a reply from the server. 260 261 @return: Time when the data started arriving plus the received data. 262 @rtype: C{tuple} 263 264 @raise UnknownReply: If the remote server doesn't return a valid HTTP 265 reply. 266 @raise TimedOut: In case reading from the network takes too much time. 267 """ 268 data = '' 269 timestamp = None 270 stoptime = time.time() + self.timeout 271 while time.time() < stoptime: 272 try: 273 chunk = self._recv(self.bufsize) 274 except tuple(self._timeout_exceptions), msg: 275 raise TimedOut, msg 276 277 if not chunk: 278 # The remote end closed the connection. 279 break 280 281 if not timestamp: 282 timestamp = time.time() 283 284 data += chunk 285 idx = data.find('\r\n\r\n') 286 if idx != -1: 287 data = data[:idx] 288 break 289 290 if not data.startswith('HTTP/'): 291 raise UnknownReply, 'Invalid protocol' 292 293 return timestamp, data
294
295 - def __del__(self):
296 if self._sock: 297 self._sock.close()
298 299
300 -class HTTPSClient(HTTPClient):
301 """Special-purpose HTTPS client. 302 """ 303
304 - def __init__(self):
305 HTTPClient.__init__(self) 306 307 self.schemes.append('https') 308 309 self.default_port = 443 310 311 self._recv = None 312 self._sslsock = None 313 self._timeout_exceptions.append(socket.sslerror) 314 315 # Path to an SSL key file and certificate. 316 self.keyfile = None 317 self.certfile = None
318
319 - def _connect(self, addr):
320 """Connect to the target web server. 321 322 @param addr: The target's address. 323 @type addr: C{tuple} 324 325 @raise HTTPSError: In case there's some mistake during the SSL 326 negotiation. 327 """ 328 HTTPClient._connect(self, addr) 329 try: 330 self._sslsock = socket.ssl(self._sock, self.keyfile, self.certfile) 331 except socket.sslerror, msg: 332 raise HTTPSError, msg 333 334 self._recv = self._sslsock.read
335
336 - def _sendAll(self, data):
337 """Sends a string to the socket. 338 """ 339 # xxx - currently we don't make sure everything is sent. 340 self._sslsock.write(data)
341 342
343 -def clientFactory(scantask):
344 """HTTP/HTTPS client factory. 345 346 @param scantask: Object describing where the target is and how to reach it. 347 @type scantask: C{instanceof(ScanTask)} 348 349 @return: The appropriate client class for the specified URL. 350 @rtype: C{class} 351 """ 352 url = scantask.url 353 keyfile = scantask.keyfile 354 certfile = scantask.certfile 355 356 if url.startswith('http://'): 357 return HTTPClient() 358 elif url.startswith('https://'): 359 httpsclient = HTTPSClient() 360 httpsclient.keyfile = keyfile 361 httpsclient.certfile = certfile 362 return httpsclient 363 else: 364 raise InvalidURL
365 366 367 # vim: ts=4 sw=4 et 368

halberd-0.2.4/doc/api/Halberd.clues.file-module.html0000644000175000017500000002572411431512607020656 0ustar jmbrjmbr Halberd.clues.file
Package Halberd :: Package clues :: Module file
[hide private]
[frames] | no frames]

Module file

source code

Utilities for clue storage.

Provides functionality needed to store clues on disk.

Classes [hide private]
  InvalidFile
The loaded file is not a valid clue file.
  ClueDir
Stores clues hierarchically using the underlying filesystem.
Functions [hide private]
 
save(filename, clues)
Save a clues to a file.
source code
list
load(filename)
Load clues from file.
source code
Variables [hide private]
  __package__ = 'Halberd.clues'
Function Details [hide private]

save(filename, clues)

source code 

Save a clues to a file.

Parameters:
  • filename (str) - Name of the file where the clues will be written to.
  • clues (list) - Sequence of clues to write.

load(filename)

source code 

Load clues from file.

Parameters:
  • filename (str) - Name of the files where the clues are stored.
Returns: list
Clues extracted from the file.
Raises:
  • InvalidFile - In case there's a problem while reinterpreting the clues.

halberd-0.2.4/doc/api/Halberd.logger-module.html0000644000175000017500000002260011431512607020072 0ustar jmbrjmbr Halberd.logger
Package Halberd :: Module logger
[hide private]
[frames] | no frames]

Module logger

source code

Logger singleton.

This module allows halberd to easily log certain events.

Functions [hide private]
object
getLogger()
Get a reference to an instance of a logger object.
source code
 
setDebug()
Set the logging level to debug.
source code
 
setError()
Set the logging level to error.
source code
Variables [hide private]
  _logger = None
  _logfmt = '%(levelname)s %(message)s'
  __package__ = 'Halberd'
Function Details [hide private]

getLogger()

source code 

Get a reference to an instance of a logger object.

Returns: object
reference to a logger.

halberd-0.2.4/doc/api/toc-Halberd.crew-module.html0000644000175000017500000000331511431512607020340 0ustar jmbrjmbr crew

Module crew


Classes

WorkCrew

Variables


[hide private] halberd-0.2.4/doc/api/Halberd.shell.BaseStrategy-class.html0000644000175000017500000001711011431512607022136 0ustar jmbrjmbr Halberd.shell.BaseStrategy
Package Halberd :: Module shell :: Class BaseStrategy
[hide private]
[frames] | no frames]

Class BaseStrategy

source code

Known Subclasses:

Defines the strategy used to scan.

A strategy is a certain way to use the program. Theses can be layered to build a bigger strategy doing more complex things, etc.

Instance Methods [hide private]
 
__init__(self, scantask) source code
 
execute(self)
Executes the strategy.
source code
 
_scan(self)
Allocates a work crew of scanners and launches them on the target.
source code
 
_analyze(self)
Performs clue analysis.
source code
halberd-0.2.4/doc/api/Halberd.clientlib.InvalidURL-class.html0000644000175000017500000001613011431512607022351 0ustar jmbrjmbr Halberd.clientlib.InvalidURL
Package Halberd :: Module clientlib :: Class InvalidURL
[hide private]
[frames] | no frames]

Class InvalidURL

source code

              object --+            
                       |            
exceptions.BaseException --+        
                           |        
        exceptions.Exception --+    
                               |    
                       HTTPError --+
                                   |
                                  InvalidURL

Invalid or unsupported URL

Instance Methods [hide private]

Inherited from HTTPError: __deepcopy__, __init__, __str__

Inherited from exceptions.Exception: __new__

Inherited from exceptions.BaseException: __delattr__, __getattribute__, __getitem__, __getslice__, __reduce__, __repr__, __setattr__, __setstate__, __unicode__

Inherited from object: __format__, __hash__, __reduce_ex__, __sizeof__, __subclasshook__

Properties [hide private]

Inherited from exceptions.BaseException: args, message

Inherited from object: __class__

halberd-0.2.4/doc/api/toc-Halberd.shell-module.html0000644000175000017500000000315211431512607020506 0ustar jmbrjmbr shell

Module shell


Classes

BaseStrategy
ClueReaderStrategy
MultiScanStrategy
ScanError
UniScanStrategy

Variables

__package__

[hide private] halberd-0.2.4/doc/api/Halberd.crew.ScanState-class.html0000644000175000017500000003076111431512607021266 0ustar jmbrjmbr Halberd.crew.ScanState
Package Halberd :: Module crew :: Class ScanState
[hide private]
[frames] | no frames]

Class ScanState

source code

Shared state among scanner threads.

Instance Methods [hide private]
 
__init__(self)
Initializes shared state among scanning threads.
source code
tuple
getStats(self)
Provides statistics about the scanning process.
source code
 
insertClue(self, clue)
Inserts a clue in the list if it is new.
source code
list
getClues(self)
Clue accessor.
source code
 
incMissed(self)
Increase the counter of missed replies.
source code
 
setError(self, err)
Signal an error condition.
source code
 
getError(self)
Returns the reason of the error condition.
source code
Instance Variables [hide private]
threading.Event

caught with an exception).

shouldstop
Signals when the threads should stop scanning.
Method Details [hide private]

getStats(self)

source code 

Provides statistics about the scanning process.

Returns: tuple
Number of clues gathered so far, number of successful requests and number of unsuccessful ones (missed replies).

getClues(self)

source code 

Clue accessor.

Returns: list
A copy of all obtained clues.

halberd-0.2.4/doc/api/toc-Halberd.ScanTask-module.html0000644000175000017500000000372211431512607021111 0ustar jmbrjmbr ScanTask

Module ScanTask


Classes

ConfError
ScanTask

Variables

__package__
default_conf_dir
default_conf_file
default_out
default_parallelism
default_ratio_threshold
default_scantime

[hide private] halberd-0.2.4/doc/api/Halberd.util-module.html0000644000175000017500000003201611431512607017572 0ustar jmbrjmbr Halberd.util
Package Halberd :: Module util
[hide private]
[frames] | no frames]

Module util

source code

Miscellaneous functions.

Functions [hide private]
 
_gen_table()
Generate translation table.
source code
 
utctime() source code
str
hostname(url)
Get the hostname part of an URL.
source code
tuple
addresses(host)
Get the network addresses to which a given host resolves to.
source code
Variables [hide private]
str table = '________________________________________________01234...
Translation table for normalizing strings.
  __package__ = 'Halberd'
Function Details [hide private]

hostname(url)

source code 

Get the hostname part of an URL.

Parameters:
  • url (str) - A valid URL (must be preceded by scheme://).
Returns: str
Hostname corresponding to the URL or the empty string in case of failure.

addresses(host)

source code 

Get the network addresses to which a given host resolves to.

Parameters:
  • host (str) - Hostname we want to resolve.
Returns: tuple
Network addresses.

Variables Details [hide private]

table

Translation table for normalizing strings.
Type:
str
Value:
'________________________________________________0123456789_______ABCD\
EFGHIJKLMNOPQRSTUVWXYZ______abcdefghijklmnopqrstuvwxyz________________\
______________________________________________________________________\
_______________________________________________'

halberd-0.2.4/doc/api/toc.html0000644000175000017500000000670711431512607014567 0ustar jmbrjmbr Table of Contents

Table of Contents


Everything

Modules

Halberd
Halberd.ScanTask
Halberd.clientlib
Halberd.clues
Halberd.clues.Clue
Halberd.clues.analysis
Halberd.clues.file
Halberd.conflib
Halberd.crew
Halberd.logger
Halberd.reportlib
Halberd.shell
Halberd.util

[hide private] halberd-0.2.4/doc/api/Halberd.ScanTask.ScanTask-class.html0000644000175000017500000003136311431512607021656 0ustar jmbrjmbr Halberd.ScanTask.ScanTask
Package Halberd :: Module ScanTask :: Class ScanTask
[hide private]
[frames] | no frames]

Class ScanTask

source code

Describes the way a scan should be performed.

Instance Methods [hide private]
 
__init__(self) source code
 
readConf(self)
Read configuration file.
source code
Instance Variables [hide private]
str addr
Address of the target web server.
list analyzed
Sequence of clues after the analysis phase.
str certfile
Certificate to be used for SSL connections.
list clues
Sequence of clues obtained from the target.
bool debug
Display debug information.
str keyfile
Key file for SSL connections.
str out
File where to write reports.
tuple proxy_serv_addr
Address + port where to listen when operating as a proxy.
str save
File or directory name where the results will be written.
str url
URL to scan.
str urlfile
Root folder to use for storing results of MultiScans.
bool verbose
Display status information during the scan.
Method Details [hide private]

readConf(self)

source code 

Read configuration file.

This method tries to read the specified configuration file. If we try to read it at the default path and it's not there we create a bare-bones file and use that one.

Raises:
  • ConfError - If there's some problem creating or reading the configuration file.

Instance Variable Details [hide private]

out

File where to write reports. If it's not set, stdout will be used.
Type:
str

halberd-0.2.4/doc/api/Halberd.shell-module.html0000644000175000017500000001572511431512607017734 0ustar jmbrjmbr Halberd.shell
Package Halberd :: Module shell
[hide private]
[frames] | no frames]

Module shell

source code

Provides scanning patterns to be used as building blocks for more complex scans.

Strategies are different ways in which target scans may be done. We provide basic functionality so more complex stuff can be built upon this.

Classes [hide private]
  ScanError
Generic error during scanning.
  BaseStrategy
Defines the strategy used to scan.
  UniScanStrategy
Scan a single URL.
  MultiScanStrategy
Scan multiple URLs.
  ClueReaderStrategy
Clue reader strategy.
Variables [hide private]
  __package__ = 'Halberd'
halberd-0.2.4/doc/api/Halberd.clientlib.HTTPSError-class.html0000644000175000017500000001612511431512607022320 0ustar jmbrjmbr Halberd.clientlib.HTTPSError
Package Halberd :: Module clientlib :: Class HTTPSError
[hide private]
[frames] | no frames]

Class HTTPSError

source code

              object --+            
                       |            
exceptions.BaseException --+        
                           |        
        exceptions.Exception --+    
                               |    
                       HTTPError --+
                                   |
                                  HTTPSError

Generic HTTPS exception

Instance Methods [hide private]

Inherited from HTTPError: __deepcopy__, __init__, __str__

Inherited from exceptions.Exception: __new__

Inherited from exceptions.BaseException: __delattr__, __getattribute__, __getitem__, __getslice__, __reduce__, __repr__, __setattr__, __setstate__, __unicode__

Inherited from object: __format__, __hash__, __reduce_ex__, __sizeof__, __subclasshook__

Properties [hide private]

Inherited from exceptions.BaseException: args, message

Inherited from object: __class__

halberd-0.2.4/README0000644000175000017500000000157311431512414012446 0ustar jmbrjmbrhalberd -- HTTP load balancer detector ====================================== Overview -------- Halberd discovers HTTP load balancers. It is useful for web application security auditing and for load balancer configuration testing. Installation ------------ Follow the installation instructions written in INSTALL. Getting started --------------- You could begin with: $ halberd --help and: $ halberd www.example.com A configuration file is stored in a directory named .halberd hanging from the user's home directory (that is: $HOME in Unix/Linux systems). This file, named halberd.cfg, contains configurable run-time settings you can tweak. Documentation ------------- The user's guide is located in doc/manual.pdf Full API documentation can be found in doc/api Feedback and bug reports ------------------------ Send bug reports and comments to jmbr@superadditive.com halberd-0.2.4/AUTHORS0000644000175000017500000000016411144236326012637 0ustar jmbrjmbrAuthors of halberd. See also the files THANKS and ChangeLog. Juan M. Bello Rivas designed and implemented halberd. halberd-0.2.4/ChangeLog0000644000175000017500000027621611431513713013353 0ustar jmbrjmbrcommit f7ed35e760e071bde2b1b27b0b6ccb2cc5699530 Author: Juan M. Bello Rivas Date: Sat Aug 14 15:04:53 2010 +0200 Bumped version number. commit 46245cead44e7023c62129e0ac0c43738d5c206c Author: Juan M. Bello Rivas Date: Sat Aug 14 15:04:39 2010 +0200 Revised list of prerequisites. commit cc942d87fec080a51c3a57a626222295745c8b3a Author: Juan M. Bello Rivas Date: Sat Aug 14 15:02:47 2010 +0200 Use the hashlib module (this change is based on the patch provided by Fabian Affolter). commit 004130c493aad6ccc079a0e5f47d1832219379c1 Author: Juan M. Bello Rivas Date: Sat Aug 14 14:34:18 2010 +0200 Dropped the NEWS file. delete mode 100644 NEWS commit 3f2af63e89f8c6a3c9c0615a81b043af853b2280 Author: Juan M. Bello Rivas Date: Sat Aug 14 12:49:56 2010 +0200 Updated copyright notice. commit 2fab44d2f17039c8a707184299f6b5a1523dc726 Author: Juan M. Bello Rivas Date: Tue Sep 18 05:46:23 2007 +0200 Updated version information in the repository. darcs-hash:20070918034623-6d791-9b51c3fc70675792ae222e92233cf8becb112e77.gz commit c745a8583dd385edcae4dd1247abcdeee8be5f0b Author: Juan M. Bello Rivas Date: Wed Jul 18 22:32:54 2007 +0200 Fixed a bug triggered by Python 2.5 darcs-hash:20070718203254-6d791-c065247a9de950fc4c34c83a3e3b27bf1e3d96f5.gz commit 3a516c3deb39f9af999f12c5f93156c35deaa8f6 Author: Juan M. Bello Rivas Date: Wed Jul 18 22:32:33 2007 +0200 Added the THANKS file to the distribution darcs-hash:20070718203233-6d791-cf8dd82cb4c3136ae7aec1ba34a131e587126250.gz commit 1035545789115748c2f69b8cb6fdd18126f7088f Author: Juan M. Bello Rivas Date: Tue May 29 20:56:58 2007 +0200 Added THANKS file darcs-hash:20070529185658-6d791-938f85309d1e8fa69de0d2068b09ce2b2a478027.gz create mode 100644 THANKS commit 89e52ddd97ca436f77dcebc4767c8c706da92fd1 Author: Juan M. Bello Rivas Date: Tue May 29 20:36:17 2007 +0200 Bumped version number darcs-hash:20070529183617-6d791-a10c8c101c1f51fdea7407091abfe5111fbab740.gz commit 6c741f87cec06fee7c7c2735b90fda7217c6d3aa Author: Juan M. Bello Rivas Date: Tue May 29 20:30:28 2007 +0200 Fixed an incorrectly named exception. Noticed by Andres Riancho (http://w3af.sourceforge.net/) darcs-hash:20070529183028-6d791-b3a5e6e635b9e236b40663256c98d3f20cebfa44.gz commit 1fc6bfa2cc02c9bf2be2c5ee9bf4073cb49414a4 Author: Juan M. Bello Rivas Date: Tue May 29 20:28:01 2007 +0200 Forgot to distribute a copy of the GPL darcs-hash:20070529182801-6d791-9b89de72f7ef01dfb8322c72548a0f9db9b0ecad.gz commit 72e25b60259aecb9fbd6d47fbef42d4e5fb7261b Author: Juan M. Bello Rivas Date: Mon Sep 18 12:42:03 2006 +0200 Further modifications to warning messages. darcs-hash:20060918104203-6d791-42ae3c5dcf9c520d965e0093f116124776f38856.gz commit 0ccc7400ba1715741e12d81203303b627eb17267 Author: Juan M. Bello Rivas Date: Mon Sep 18 12:03:45 2006 +0200 Enhanced warning during the clue analysis stage. darcs-hash:20060918100345-6d791-dc7f97279a13e025e65cc4946386dee8713b323c.gz commit 7230e6cd55eb6509be465f7542f50b59f33994f9 Author: Juan M. Bello Rivas Date: Sun Sep 17 15:18:21 2006 +0200 Further improvements towards compatibility with older Python versions. darcs-hash:20060917131821-6d791-e995258f9880a23b198d5cb83593e0d45b0f4e86.gz commit 4dd993da6d3160c66c41889e5a968c2cc00522b0 Author: Juan M. Bello Rivas Date: Sat Sep 16 18:13:38 2006 +0200 Updated email address. darcs-hash:20060916161338-6d791-283ce3f0ce018a14918cb8733dbb87bdf117f75e.gz commit 3677fe716a7f9e6dceff0785217d674a1bc5095a Author: Juan M. Bello Rivas Date: Sat Sep 16 17:59:36 2006 +0200 Updated documentation to reflect compatibility with Python 2.3.* darcs-hash:20060916155936-6d791-529043c5d8240ee72a2a6874e4c1f9c50ea96ea8.gz commit f629fe9b3c1a19f1a1e9de2c5f68516f10f8c8a9 Author: Juan M. Bello Rivas Date: Sat Sep 16 14:00:50 2006 +0200 Replaced list.sort(key darcs-hash:20060916120050-6d791-7bac16b5722044e3ef687fa956cd15367550a8cf.gz commit 3f88c388678b673c12c5653cb5e899cdef1296c9 Author: Juan M. Bello Rivas Date: Thu Sep 7 17:45:14 2006 +0200 Removed reference to a stale file name in README. darcs-hash:20060907154514-6d791-8722662e7cd9822b26ac3873e639b8f18424987f.gz commit 93f634435b65e42d4934dc039aa86061f489e5cc Author: Juan M. Bello Rivas Date: Tue Aug 29 00:07:58 2006 +0200 Fixed a bug which prevented human readable reports from being saved. darcs-hash:20060828220758-d00bf-5ddbc51e15677ffec77dfd01c8dd214969d8cfc0.gz commit 9bb76eaaf1137827fcd6c50f7f7434729437cb7a Author: Juan M. Bello Rivas Date: Mon Aug 28 23:34:42 2006 +0200 Fixed a bug in Halberd.clues.analysis.reanalyze that would provide incorrect information to the user in certain cases. darcs-hash:20060828213442-d00bf-dbff6aa6a390ece78a7fb9fc1701267913742e77.gz commit 2e72f657bc3e3ec7bf91d9bd0fc53724c220a303 Author: Juan M. Bello Rivas Date: Thu Aug 24 19:07:23 2006 +0200 Added the NEWS file to the distribution. darcs-hash:20060824170723-d00bf-55491ff74c3fe5ebfc08b606e0bf3934f0f457a0.gz create mode 100644 NEWS commit 1b2283f915d62469daed9495d4000d4b4b15af4b Author: Juan M. Bello Rivas Date: Thu Aug 24 18:38:24 2006 +0200 Bumped version number darcs-hash:20060824163824-d00bf-b8bc49d63fba95c3eecfc719302cfa0a4cb916b8.gz commit 3be265e1725bce4461cc18c774e41cab4f9e570d Author: Juan M. Bello Rivas Date: Thu Aug 24 18:37:39 2006 +0200 Synced the classifiers with those on cheeseshop.python.org/pypi darcs-hash:20060824163739-d00bf-9fe71d5fd41e2721f33641dbb292bd9da3709092.gz commit 4a6865c4187102a788ccbf057900a0a9b65964a6 Author: Juan M. Bello Rivas Date: Sat Aug 19 12:45:03 2006 +0200 Removed outdated TODO document from the distribution. darcs-hash:20060819104503-d00bf-d0ec7c75d728d980c03090e03fa8c878ff149cda.gz delete mode 100644 TODO commit da204b9d2f5135c4c20d615723e048afd81ea082 Author: Juan M. Bello Rivas Date: Mon Aug 29 00:17:40 2005 +0200 Redistributed TODO file. darcs-hash:20050828221740-abeea-3a45776de1e6ab70d61a81a86f7c1d677e09ed35.gz commit 5c1b309656210073e8348c83c5a0e0595c0c8f2a Author: Juan M. Bello Rivas Date: Sun Aug 28 18:54:38 2005 +0200 Moved some pending task from the TODO list to the issue tracker. darcs-hash:20050828165438-abeea-56604d68c8a352d1ceb858df1762b990e3439cba.gz commit f2d78ae46ae511d65ba95e11770e6f8157853d56 Author: Juan M. Bello Rivas Date: Sun Aug 28 14:54:52 2005 +0200 Removed a debugging print from ScanTask. darcs-hash:20050828125452-abeea-4ba4ea9ff048608da9d44ddf468dcebb4dca821f.gz commit 6f8a743f736ed5f2e4a89e9d7c20b806fa4622b8 Author: Juan M. Bello Rivas Date: Sun Aug 28 14:44:52 2005 +0200 Removed the distributed (RPC) scanning feature from the stable branch. darcs-hash:20050828124452-abeea-28394b335d9fdc766f7981155e15aee6d9210b73.gz delete mode 100644 Halberd/RPCServer.py commit d2afa501576bb4e664488ebde1bd5938748db4d4 Author: Juan M. Bello Rivas Date: Mon Aug 21 12:49:22 2006 +0200 Corrected a spelling error in the user's guide. darcs-hash:20060821104922-d00bf-e824b61f7ac0494bfb4deb803405eea187de7802.gz commit 6f6f9942e04ad486cc6aee26c66223b7398e00f2 Author: Juan M. Bello Rivas Date: Mon Aug 21 00:40:33 2006 +0200 Made some corrections to the documentation. darcs-hash:20060820224033-d00bf-35ce091b284d836c886d9cb47bcb55b99d115747.gz commit 3f47e281d8314967703f7f5f1e756dc9b28e9e55 Author: Juan M. Bello Rivas Date: Thu Aug 17 04:34:45 2006 +0200 Dropped the overview.html makefile target and the dependency on docutils. darcs-hash:20060817023445-d00bf-99cc891e26a8fde5930b4860fe3acbf6623ac88d.gz commit 5b57685ae4e2fd138e4059ae4932d3cc2dccbd1e Author: Juan M. Bello Rivas Date: Thu Aug 17 03:10:58 2006 +0200 Bumped the version number. darcs-hash:20060817011058-d00bf-e33b6622acd6d84ef67a34ada1910c55b6ebb38e.gz commit d790e93dfad338553edcf13896a5804344d5ce78 Author: Juan M. Bello Rivas Date: Sun Aug 20 11:37:23 2006 +0200 Reduced default_scantime to 15 seconds. darcs-hash:20060820093723-d00bf-8c4a762ddc02812b4612119f26f73e16424ce0fd.gz commit bba74b156a2f9c15d35bfa7c4bc928bffd8afbb3 Author: Juan M. Bello Rivas Date: Sat Aug 19 02:09:31 2006 +0200 Implemented a sanity check at the end of ignore_changing_fields. darcs-hash:20060819000931-d00bf-3cef826ca852203014b4c34cf7230f315fcb7496.gz commit a0cc077436d8c2dbfc839d7f921f966cb8bc25b0 Author: Juan M. Bello Rivas Date: Thu Aug 17 18:01:12 2006 +0200 More documentation rewrites. darcs-hash:20060817160112-d00bf-93dc402e590791769057deb52a119b368774b718.gz commit 25988745cf80986b20d18e06eac1a5c8b281907f Author: Juan M. Bello Rivas Date: Thu Aug 17 03:38:42 2006 +0200 Dropped doc/overview.txt darcs-hash:20060817013842-d00bf-6ff72f9e59280212826fc5f6d463d09ceb93692d.gz delete mode 100644 doc/overview.txt commit 49a8cf0e3542c691d8e48bd6a5e30d26f7785981 Author: Juan M. Bello Rivas Date: Thu Aug 17 03:10:34 2006 +0200 Updated the project URL in setup.py darcs-hash:20060817011034-d00bf-5061a026157fcc326682c7baea4dd6c942503f71.gz commit 4c1cea2246143c1bce9582cdf7f8c2d8a4d589ac Author: Juan M. Bello Rivas Date: Thu Aug 17 03:10:04 2006 +0200 Updated copyright notice in the scripts/halberd darcs-hash:20060817011004-d00bf-db91e094765c6dee7044b1381f902b32324bb3c6.gz commit 70058f109b0c3b2f52c14ab286bafb38a0bd253a Author: Juan M. Bello Rivas Date: Thu Aug 17 03:09:18 2006 +0200 Produce PDF manual by default and added LaTeX2HTML target. darcs-hash:20060817010918-d00bf-bac213daf40a6b2ffeac5fa32d2053a3fc8add29.gz commit c2b23b8ee5cd040411ba02f563f1aef52f20d24b Author: Juan M. Bello Rivas Date: Thu Aug 17 03:07:19 2006 +0200 Major documentation effort. darcs-hash:20060817010719-d00bf-d38d3fe3950d0bb276ef79b49fd5cac131867715.gz commit 9c915fef09d18fb91f8f68f2c84bb11704c70cae Author: Juan M. Bello Rivas Date: Thu Aug 17 03:06:32 2006 +0200 Updated darcs' path in main makefile. darcs-hash:20060817010632-d00bf-37459402939f2023bd6e02aeea06b8ef7f343251.gz commit 0e0eae7bc51820a9ec368c3545ac76f40c01f91a Author: Juan M. Bello Rivas Date: Wed Aug 16 01:31:31 2006 +0200 Added some more explanations to the HACKING document. darcs-hash:20060815233131-d00bf-f23ce65d89734d885e2c582dfd227cbf93450b3b.gz commit bef21fc9eda3819f7834a8c98affa599770d025f Author: Juan M. Bello Rivas Date: Wed Aug 16 01:18:56 2006 +0200 Added Halberd/version.py to the repository. darcs-hash:20060815231856-d00bf-11c92362f7d7d4495a0f4eada4886cbac9ee2421.gz create mode 100644 Halberd/version.py commit 62bb9c59f5c13a7beb9f67abb1ccc188c9b564bf Author: Juan M. Bello Rivas Date: Tue Aug 15 15:01:45 2006 +0200 Created the HACKING document. darcs-hash:20060815130145-d00bf-226676be26212c3175343b9faf3870baf6f62028.gz create mode 100644 HACKING commit 11d752706e93fbac3bc8d398e704239fef35d331 Author: Juan M. Bello Rivas Date: Tue Aug 15 02:00:34 2006 +0200 Updated copyright notice and contact information. darcs-hash:20060815000034-d00bf-fab743de4967e972286e1b30d67c2f370317d842.gz commit 224ac2270c3ad9739b35bbad587fae3952e28736 Author: Juan M. Bello Rivas Date: Tue Aug 15 01:50:39 2006 +0200 Minor cleanups in setup.py darcs-hash:20060814235039-d00bf-75a1cb1edbf103a2443cd09d79a6e96c30591e26.gz commit c271ea76404986baea885fc33ac50ff2e9481f72 Author: Juan M. Bello Rivas Date: Tue Aug 15 01:50:21 2006 +0200 Improved documentation darcs-hash:20060814235021-d00bf-ae2b9507529753a41b5a2d467b6dab50eb784f8f.gz commit c63bcf50f64f17deaa7aec792c8237ba1e926f3f Author: Juan M. Bello Rivas Date: Tue Aug 15 01:48:42 2006 +0200 Included INSTALL in MANIFEST.in darcs-hash:20060814234842-d00bf-3910ee4d53b389ffe09b06ff9ceff9993d148bcd.gz commit abb835e161a2769745dd0c7d60cbd7087ae9643a Author: Juan M. Bello Rivas Date: Wed Aug 31 23:52:28 2005 +0200 Documentation enhancements. darcs-hash:20050831215228-abeea-c2e513b12dd13927a991d709d9b75452959f4b77.gz commit cf8f6a10211ce30e6373998ca350bc83f02ee793 Author: Juan M. Bello Rivas Date: Wed Aug 31 23:51:15 2005 +0200 Trimmed stale code. darcs-hash:20050831215115-abeea-9de98b669ca8d086795362128898f53f1a85a17c.gz commit 91d69ddfcda5386d7c11ee34790c39437d4f8862 Author: Juan M. Bello Rivas Date: Wed Aug 31 23:50:15 2005 +0200 Inserted pylint in the development toolchain. darcs-hash:20050831215015-abeea-c97241e17ebad35a803ba82ab968d979c846a8eb.gz commit 71fd1e56a88f99fc9613f929c86def95aad4d447 Author: Juan M. Bello Rivas Date: Wed Aug 31 23:02:39 2005 +0200 Wrote makefile targets for cleaning the doc directory. darcs-hash:20050831210239-abeea-2941624f5f73e45d48b83c19c88b46b1a2f248db.gz commit be578e1e590abb1d36fceee05e62d67d059443fb Author: Juan M. Bello Rivas Date: Wed Aug 31 13:03:18 2005 +0200 Wrote the layout for the user's guide. darcs-hash:20050831110318-abeea-a4c7773c37670d3e34a77c23162a18ca2e1af5e1.gz create mode 100644 doc/GNUmakefile create mode 100644 doc/manual.tex commit 6731677a91106a241cffe02335e2326bdcd88406 Author: Juan M. Bello Rivas Date: Wed Aug 31 13:00:58 2005 +0200 Removed dangling reST markup in README. darcs-hash:20050831110058-abeea-8f9fcd127915893ec5dc4cee94ab85e03b0a1b3c.gz commit d14dc4967d2b574b9a8d57228452301d9f6f08d7 Author: Juan M. Bello Rivas Date: Wed Aug 31 12:58:20 2005 +0200 Fixed a problem with the install target in GNUmakefile (it wouldn't install the man page). This fix is only relevant for internal development. darcs-hash:20050831105820-abeea-a8ad2b3352858c88a2ca3971d58608f53d0585d4.gz commit 7ea6268421457ae6aa817b2ea9a1ffe6d3a889cd Author: Juan M. Bello Rivas Date: Mon Aug 29 05:52:34 2005 +0200 Arranged the program so that man pages can be generated automatically by GNU help2man. darcs-hash:20050829035234-abeea-0104d6de345b4cc79fa79303355f4f56b78510d0.gz create mode 100644 help2man.cfg commit 194a61e62a912fb18cc0803a8aabe6be4ba1901d Author: Juan M. Bello Rivas Date: Mon Aug 29 05:50:16 2005 +0200 Dropped references to obsolete code in the makefile. darcs-hash:20050829035016-abeea-b46c43b069431227b05357a0c68a2b14350d88e0.gz commit f52e6c28bd968d11752258076e9aeb92c5b6f6e2 Author: Juan M. Bello Rivas Date: Mon Aug 29 00:16:38 2005 +0200 Edited README document. darcs-hash:20050828221638-abeea-21649390f18ddb766e34c534d3279be5ecaf5f12.gz commit ecb3ba99f50314455ede3704b7dd257804457d1c Author: Juan M. Bello Rivas Date: Mon Aug 29 00:15:42 2005 +0200 Wrote INSTALL document. darcs-hash:20050828221542-abeea-227347ecf13ecc9f2503bdb4e34621e47cbdedbc.gz create mode 100644 INSTALL commit 54c60b7f1e80176f200b0aa6844dc800ccfb8642 Author: Juan M. Bello Rivas Date: Sun Aug 28 15:21:17 2005 +0200 Removed __revision__ strings from sources. darcs-hash:20050828132117-abeea-8d9066e1f048ab63bf12db435e4236b4c04818ec.gz commit 85fc41deb23a99dd1d9e48eff7694c28df0376f9 Author: Juan M. Bello Rivas Date: Sun Aug 28 14:44:00 2005 +0200 Modified the install target in the makefile to ease local testing. darcs-hash:20050828124400-abeea-49d25dc8dd72482446a95ee1a8d34ea439cdd26d.gz commit 497909ff9db570808703832e894cb24d61fd993c Author: Juan M. Bello Rivas Date: Sun Aug 28 14:17:39 2005 +0200 Added two new items. darcs-hash:20050828121739-abeea-d3f0e1a8f73f6ef0e7a8a0845b9ed5dcba5953ab.gz commit 7d801a5840c38645aa81f16a5f04dc51aadcdf8a Author: Juan M. Bello Rivas Date: Sun Aug 28 14:09:34 2005 +0200 Removed left-over code from transition to darcs. darcs-hash:20050828120934-abeea-8193673a21f2747492a46ec2fd83f88fe4745449.gz delete mode 100644 Halberd/shell/__init__.py delete mode 100644 Halberd/shell/strategy.py commit da40fe9a92edbf6aa3c8bc48eaa0854473d88b4e Author: Juan M. Bello Rivas Date: Sun Aug 28 14:01:18 2005 +0200 Replaced the external hosts used for testing with more reliable ones. darcs-hash:20050828120118-abeea-dc0ca221d6a3e9967f76bf81ba7c9407df713687.gz commit c5906ece793467be7760d55a975f8e94df9c14ab Author: Juan M. Bello Rivas Date: Sun Aug 28 14:00:55 2005 +0200 Changed one of the pending tasks. darcs-hash:20050828120055-abeea-6f028f675afeaf884952728d10256e63948467c1.gz commit f774391f6cd4ab30c791b8ee418332e9c07724a8 Author: Juan M. Bello Rivas Date: Sun Aug 28 13:57:00 2005 +0200 Updated email address. darcs-hash:20050828115700-abeea-9eb9d81c8317a38601268f893511f17f3e25e1eb.gz commit 205e05bea9c0724d3f644d8c719d23912053d749 Author: Juan M. Bello Rivas Date: Sun Aug 28 13:50:02 2005 +0200 Updated the program's description. darcs-hash:20050828115002-abeea-81c8075a624f89aa81cf13f0a4b2b036a5e5b5c5.gz commit de86f83a14e389e58add92373a5025b080071ba6 Author: Juan M. Bello Rivas Date: Sun Aug 28 13:29:24 2005 +0200 Fixed the ChangeLog target to use darcs instead of the old cvs2cl.pl script darcs-hash:20050828112924-abeea-29d3381ae0d5ba98ce7990cdba42bc6840759122.gz commit 80f5157d4af0486f1b21fbfaf3ea0a7142eafbe2 Author: Juan M. Bello Rivas Date: Sun Aug 28 13:21:06 2005 +0200 Removed dependency on docutils' default stylesheet. darcs-hash:20050828112106-abeea-c5e28f03c3955123ffa97e9d43f69a4383ef1b70.gz commit 891a229ec59212ff06947b1fc75bce93a31b0acf Author: rwx Date: Sun Aug 28 12:48:07 2005 +0200 Moved the main program 'halberd' to the new 'scripts' directory to avoid Moved the main program 'halberd' to the new 'scripts' directory to avoid conflicts with case-insensitive file-systems. darcs-hash:20050828104807-a3a09-53d23267d880a80df34dfb33b45117fb591f9ade.gz commit 82ba4b158908465db344c5514e3f1cd39a75bfea Author: rwx Date: Sun Aug 28 11:18:44 2005 +0200 Got rid of some pending items. Added some more. darcs-hash:20050828091844-a3a09-4a95ec7c745b808c1b77b78c62d7abe38fe7d576.gz commit 4a5180ca4dddda4794afb4f84c513f4c08167c60 Author: rwx Date: Sun Aug 28 11:18:26 2005 +0200 Clarified the --help message. darcs-hash:20050828091826-a3a09-2e55888e74bc4ab35ef3c7ffa8a6b809645f6fd8.gz commit a3143fa3e5c66a3cb969c8ca2e8a4b7671882d4e Author: rwx Date: Sun Aug 28 11:17:40 2005 +0200 Enforced docstring-revision-copyright convention darcs-hash:20050828091740-a3a09-6203c820ed357df905b88693ca9200512b7f26fd.gz commit 12df28105b08b54cc0d0b621dfbb76e3dfee3cdb Author: rwx Date: Sun Aug 28 11:17:05 2005 +0200 Fixed an error in a docstring. darcs-hash:20050828091705-a3a09-1802b5f98b916fc9d1452690ce95446284844192.gz commit 4437d66ddfde5f393c12901cf6d4242ae35557b4 Author: rwx Date: Sun Aug 28 11:15:59 2005 +0200 Removed internal TODO from the distribution file. darcs-hash:20050828091559-a3a09-f150beab50f307b982d7fe102e5b1ab4ece3331d.gz commit 5229aa5a168b841ed0ebc7f62c3f094b1b57f6d8 Author: rwx Date: Sat Aug 27 15:44:53 2005 +0200 Pretty print headers when halberd is invoked with the --debug parameter. darcs-hash:20050827134453-a3a09-6f57c43786f278ec390d71fb3691427c4248a167.gz commit 626d017ea51ab71b9598a69fed58469ce8f2ab87 Author: rwx Date: Sat Aug 27 15:14:46 2005 +0200 Fixed a bug related to MIME header manipulation. darcs-hash:20050827131446-a3a09-17db0ddf10b095e01c86a99a0a2f3c8fefe9b444.gz commit 4e3c8214942f680d414887f839270ed7e23281b5 Author: rwx Date: Sat Aug 27 14:21:42 2005 +0200 Updated to take into account the modifications in Halberd.clientlib. darcs-hash:20050827122142-a3a09-2b862133304f6a3642a3b1fefaf50bbe5f93ce15.gz commit a6152114b69e4fd172cf198e2dfaf50365f2d917 Author: rwx Date: Sat Aug 27 14:20:49 2005 +0200 Wrote a notice about the dependency of test_clientlib.py on Wrote a notice about the dependency of test_clientlib.py on Halberd.clientlib.default_template. darcs-hash:20050827122049-a3a09-0d07634a117f40b6a3acb0415ee9a4304dad2ba1.gz commit df9274ddcae21dec68954dcc65eefd58a29d6bad Author: rwx Date: Sat Aug 27 14:13:44 2005 +0200 Updated default_template with Firefox headers. Updated default_template with Firefox headers. Added the port number to the Host header field in HTTPClient._fillTemplate() darcs-hash:20050827121344-a3a09-db7b472d310fcd684db83522fc50ea192149f2d3.gz commit 21e07162eb8f29573e6555e03f9c3863e7843e7c Author: rwx Date: Sat Aug 27 13:58:53 2005 +0200 Documented the fact that halberd requires Python 2.4 or above. darcs-hash:20050827115853-a3a09-bc9b09e0b0189ea56ce519ab6f693edb74759a54.gz commit 1580b45b4713f01a71848c42432f1c775f3b4fb9 Author: rwx Date: Sat Aug 27 13:41:49 2005 +0200 Replaced the functions unzip, decorate_and_sort and undecorate with a (Python Replaced the functions unzip, decorate_and_sort and undecorate with a (Python 2.4 specific) call to list.sort(key darcs-hash:20050827114149-a3a09-22cf8e36ff2897af844556c6c763e55783aea6bc.gz commit 78fec5d4b7e4242fc486a19d0afe9d07841a4993 Author: rwx Date: Sat Aug 27 03:36:05 2005 +0200 Replaced the implementation of deltas() with a shorter and more reliable one. darcs-hash:20050827013605-a3a09-654b831da23dfb316ab07b4ca5b892b6c83e7dc7.gz commit 45bac865d93ad36aa4d6ae3ed83f6dcef03b49c7 Author: rwx Date: Sat Aug 27 03:06:16 2005 +0200 Corrected a mistake in the license name. darcs-hash:20050827010616-a3a09-7dda19cc1182db58aa38fc686342bf880995ad54.gz commit e056c2b9d551b4dcf981a11687caa974bc760139 Author: rwx Date: Sat Aug 27 03:05:19 2005 +0200 Minor changes to an output message. darcs-hash:20050827010519-a3a09-3f3baa48c32483fecc924e1cefe9f5bbfb90013b.gz commit 3e294df4aaaef2f10816a960203971b028e6c6d4 Author: rwx Date: Fri Aug 26 14:15:57 2005 +0200 One more task added to the list. darcs-hash:20050826121557-a3a09-63a3e301b4e5766a6a47a5a775197d23102d6c19.gz commit efcfb4171c1acb52b09c53e3981ab08aabf0940b Author: rwx Date: Fri Aug 26 14:14:56 2005 +0200 Added another item to the TODO list. darcs-hash:20050826121456-a3a09-c7788802eea1e0350fb2f6501c795a1b2f09d08f.gz commit 5c66b0a12efd84820a4e939fe899661e5eb4d749 Author: rwx Date: Fri Aug 26 14:14:01 2005 +0200 Renamed main program from 'halberd.py' to 'halberd' darcs-hash:20050826121401-a3a09-0edf6eb8f555f0577a9b4c13b9c2708b20e622be.gz commit eaa229781e78a4f3b5ada42445b4bf02cf01da51 Author: rwx Date: Fri Aug 26 14:06:11 2005 +0200 Renamed the hlbd module to Halberd in the whole source tree. darcs-hash:20050826120611-a3a09-0f7cfaca9d31a6616daa7e3c5859ee9639400d12.gz commit b5c3006851dad92cf7e6b46d1be5accd918108d7 Author: rwx Date: Fri Aug 26 14:05:39 2005 +0200 Updated the TODO list. darcs-hash:20050826120539-a3a09-336fc77af573e8f961b0b753ba0829a15e7517a1.gz commit 207da97b33edfd6c68bf87888323d438e6a06c3b Author: rwx Date: Fri Aug 26 13:44:21 2005 +0200 Enforced the docstring-revision-copyright convention in every file. darcs-hash:20050826114421-a3a09-e850d00fee4a23d13ec7a9afe822576f4fef2772.gz delete mode 100644 THANKS commit 2fe96ae784a03b423935f4fb5c11c2cac997bee3 Author: rwx Date: Sat Aug 21 08:42:39 2004 +0200 Made some assorted corrections and remarks. darcs-hash:20040821064239-a3a09-ab33730bab2ae36f2085e726c088892183f95565.gz commit 7e784cb3c106cdb03556fb0fb7a6b406c9e9b290 Author: rwx Date: Sun Apr 11 13:36:08 2004 +0200 Improved reStructuredText output. darcs-hash:20040411113608-a3a09-40caf18e602c65ed9f3158f8af030ae83bdfc5e9.gz commit deadc98796d27abeae3dd14e609567306a0d0fa9 Author: rwx Date: Sun Apr 11 13:35:47 2004 +0200 Updated documentation. darcs-hash:20040411113547-a3a09-14d9701217e81296422befeafd7eca2715b82c5b.gz commit 2275b0f145509eca55af3482675e2a70306b0808 Author: rwx Date: Sun Apr 11 13:35:23 2004 +0200 Did some error checking and code clean-up. darcs-hash:20040411113523-a3a09-0f801e5ee851a65d6667e2450bc3f860ae43109d.gz commit 6783de3dd907dea2beb525890d92382e13c4db1c Author: rwx Date: Sun Apr 11 13:34:53 2004 +0200 Documented the functions. darcs-hash:20040411113453-a3a09-c25cd4b7b8f80216fd1305773a450c1370360769.gz commit db0c52f2643f2c1745c11945ac649321e917cc45 Author: rwx Date: Sun Apr 11 13:34:29 2004 +0200 Catches KeyboardInterrupt cleanly. darcs-hash:20040411113429-a3a09-37cf4fb3285dec8aa305e83b5d63c5bbc6c37be1.gz commit f241206a926b3c309cf0eef1a53ce5965495f0be Author: rwx Date: Sun Apr 11 13:34:10 2004 +0200 Updated some instructions. darcs-hash:20040411113410-a3a09-18ac1d03b20b433935f9ce81e15880fe3023e769.gz commit d0399c8f742090276f8344fb4458cb354a02eb6b Author: rwx Date: Wed Apr 7 14:16:02 2004 +0200 Added a new member ScanTask.debug to tell if debugging info. should be Added a new member ScanTask.debug to tell if debugging info. should be displayed/reported or not. darcs-hash:20040407121602-a3a09-69e380fb6342c87e081251fa57da53b8b67f048d.gz commit 34fc77528ab4cdb0b9175475dd512ad684aca74e Author: rwx Date: Wed Apr 7 14:15:25 2004 +0200 Improved --debug command line option. darcs-hash:20040407121525-a3a09-26f6b0820ab79c59456f77e2171841d319645daf.gz commit a05797ff718c3827815749023b2f36500e482626 Author: rwx Date: Wed Apr 7 13:11:20 2004 +0200 Several enhancements to debugging information. darcs-hash:20040407111120-a3a09-706d556a82adf9543070d1a9443b81e6fdc5c613.gz commit 6971b60d558daa3ca23ee44ae4116f6a843c04a3 Author: rwx Date: Wed Apr 7 13:09:38 2004 +0200 Done with logging. darcs-hash:20040407110938-a3a09-16d64debbf25393975674186305089e5bfd14987.gz commit acaf0ae93ad8abcbfeb1f14f6f6062ecebc166f1 Author: rwx Date: Wed Apr 7 12:25:47 2004 +0200 Added logging facilities in hlbd.logger and modified the program to use it. darcs-hash:20040407102547-a3a09-b31920956caa8044e10cd9efd3a5c7db83f05034.gz create mode 100644 Halberd/logger.py commit bcb2949920646ca686242b198e5bf7d1a71d534f Author: rwx Date: Wed Apr 7 02:35:10 2004 +0200 Improved module documentation. darcs-hash:20040407003510-a3a09-ec9e77bc24d49ebf53968ed232649fcb19330324.gz commit 9e729f0a036358f329cd1e109729efa82ba565ba Author: rwx Date: Wed Apr 7 02:26:44 2004 +0200 Removed a wrong import statement. darcs-hash:20040407002644-a3a09-09d132deffd26895c3279645fb6810c79682df75.gz commit dc4cf652660b94e8553c983bd71402a4b61f10a5 Author: rwx Date: Wed Apr 7 02:24:22 2004 +0200 Discarded tlslite as SSL/TLS library of choice. darcs-hash:20040407002422-a3a09-ea9e9e75eaad553bc984b40b617053351f86e97f.gz commit 4c1203d4dfbd1707d4c6138d3f8278a84774de74 Author: rwx Date: Wed Apr 7 02:22:19 2004 +0200 Removed hlbd.ScanTask.ScanTask.setURL and ScanTask.setAddr, its work is Removed hlbd.ScanTask.ScanTask.setURL and ScanTask.setAddr, its work is currently done by hlbd.shell.BaseStrategy._scan directly. darcs-hash:20040407002219-a3a09-aeb39c54e0937869312fa377411cdc9a37a005e3.gz commit 6caf1748de13bc1f0829a74a3c019c6c68dee392 Author: rwx Date: Tue Apr 6 14:07:29 2004 +0200 Removed an unneeded import. darcs-hash:20040406120729-a3a09-91dbdcebf965ac2a9b45e9e85a7dac7e032483cf.gz commit 99f443731c3c5c3a3b7c3dd156725ab31ccdae51 Author: rwx Date: Tue Apr 6 14:04:15 2004 +0200 Added a TODO file. darcs-hash:20040406120415-a3a09-64105d9fd792c7abafb4b545be50e558f1607e29.gz create mode 100644 TODO commit fa01b7a29e841a115e1c45b725fa35884c2fee09 Author: rwx Date: Tue Apr 6 14:03:01 2004 +0200 Several clean-ups + inclusion of the TODO file. darcs-hash:20040406120301-a3a09-1644ab1d5467f84a6d37167c0bd6e37ddbd307e5.gz commit aa557a23b9d1dccd09b684e31bbce01f4038f613 Author: rwx Date: Tue Apr 6 14:02:30 2004 +0200 Renamed isDist to isDistributed for better clarity. darcs-hash:20040406120230-a3a09-ee70fe376f6f96a90653311c38ef8cb0bc41d9f6.gz commit 70fdedc1a5fd62a3d64359e265797fbfad1461f1 Author: rwx Date: Tue Apr 6 14:01:09 2004 +0200 Improved documentation. Improved documentation. Allowed users of HTTPClient (and its derivates) to specify the template for the HTTP request to be sent. darcs-hash:20040406120109-a3a09-6f0acbf3cbc8481089e159dafc14d29e0a3cd0e7.gz commit 220c3c6dc63fc3039f2cd7bb73269bc3a859fb19 Author: rwx Date: Tue Apr 6 14:00:08 2004 +0200 Refactoring in RPCScanner thread code. Refactoring in RPCScanner thread code. Fixed a subtle synchronization bug which could have caused big headaches. darcs-hash:20040406120008-a3a09-166979c26c29fb1aa056b2a5e65a0a9e994143de.gz commit a4585b8090b52ed6a160f514604da05be259a024 Author: rwx Date: Tue Apr 6 13:58:41 2004 +0200 Improved documentation and made higher level helper methods private. darcs-hash:20040406115841-a3a09-450d2f44c1bcd0f2b21837545a3b8c3839968d9e.gz commit 85f452d47fa55747394bae9af889e670d7d8ab69 Author: rwx Date: Tue Apr 6 13:57:39 2004 +0200 Fixed documentation mistakes. darcs-hash:20040406115739-a3a09-896f1f001a4df16077d4b3dd4532dc5cae28b542.gz commit bb5181aac2cbd58d231c0ef369dbfcb25b66636d Author: rwx Date: Sun Apr 4 03:19:09 2004 +0200 Removed temporarily. darcs-hash:20040404011909-a3a09-17e44c23558b880b3e75be086f4b5b2f68b74ca2.gz delete mode 100644 Halberd/proxylib.py commit 08b59d4fd01fc156ea3839b177bf3fa431f0c481 Author: rwx Date: Sun Apr 4 03:18:51 2004 +0200 Fixed a bug regarding hit accounting (issue 80). Fixed a bug regarding hit accounting (issue 80). Improved the clarity of the output. darcs-hash:20040404011851-a3a09-cae274fe23cbc8a58c7b92c08612d9951a9085ba.gz commit 240b0c0121544a24e430ae86b72fb67136180542 Author: rwx Date: Sun Apr 4 03:16:40 2004 +0200 Refactored MultiScanStrategy. Refactored MultiScanStrategy. Improved module documentation. darcs-hash:20040404011640-a3a09-5f068a271f1ba361fef5341769935880428aef16.gz commit e49d0e2502b014f76a45a04a08cb3942e0c27363 Author: rwx Date: Sun Apr 4 03:15:39 2004 +0200 Cosmetic changes to the output (in verbose mode). darcs-hash:20040404011539-a3a09-42f6231dcd08d4d2f67af13618d9423c98f44425.gz commit 0b96d6e11006acb83ee7fd38430ea2bd7afa2791 Author: rwx Date: Sun Apr 4 03:15:22 2004 +0200 Disabled garbage collection debugging hooks. darcs-hash:20040404011522-a3a09-4b3ad0a459e01a7b1c0950df1f109fb47225770e.gz commit 9f72b379cf8e36cf80fe61d048714f4c4f1f7d5e Author: rwx Date: Sun Apr 4 03:15:01 2004 +0200 Removed halberd.cfg darcs-hash:20040404011501-a3a09-ae243e3ece1f611ad773a335561a8fc4f001f53d.gz commit 8756bec70201d34e8b674892a88f99507b985d14 Author: rwx Date: Sun Apr 4 03:14:28 2004 +0200 There's no longer a need to copy halberd.cfg anywhere at install time. darcs-hash:20040404011428-a3a09-05b3c14ca6cd8e79af00d0e9e2e79f10ba99915b.gz commit 054bed67620e0d6c0cced9bc4f2061dbce899418 Author: rwx Date: Sun Apr 4 03:13:29 2004 +0200 No longer needed (it is generated by hlbd.conflib). darcs-hash:20040404011329-a3a09-e46604926a6a20a226f93dba2a4ee34a13b52552.gz delete mode 100644 halberd.cfg commit 8293fc58ddc34a0801eebc23d8e8cea7ab847174 Author: rwx Date: Sat Apr 3 17:29:19 2004 +0200 Fixed a small glitch darcs-hash:20040403152919-a3a09-dd0f3fa853ee6302d9a84df5e5658f402cfd019d.gz commit 327e5555e1784ab2b7b3a8885a8878403298fb2e Author: rwx Date: Sat Apr 3 17:24:33 2004 +0200 No longer needed. darcs-hash:20040403152433-a3a09-372fbd66381e2c2510e30d796bf94ff51bff0023.gz delete mode 100644 tests/test_halberd.py commit de9366ef6166c3b90a10e258ce1676e8f741272e Author: rwx Date: Sat Apr 3 17:24:16 2004 +0200 Removed hlbd.shell *package* from the distribution. darcs-hash:20040403152416-a3a09-64250eddcef794ef5f588078776a0891d8f31245.gz commit 3f7880a57b4e2c430441e82fd699d7543f27ccbe Author: rwx Date: Sat Apr 3 17:23:58 2004 +0200 Implemented and polished the class hierarchy and code. darcs-hash:20040403152358-a3a09-0b64c91a5f0822974024ea2fd2801d6bfb1e6347.gz create mode 100644 Halberd/shell.py commit 43f2441e43d238170bfbcae8a439d9f3830d6142 Author: rwx Date: Sat Apr 3 17:12:28 2004 +0200 Minor package enhancements. darcs-hash:20040403151228-a3a09-2928e6fbeda5e7f9bfb4518c6ca0157ddd88d05f.gz commit 30aad86af7ebb5394310801e0d9172ec2133fb44 Author: rwx Date: Sat Apr 3 17:11:02 2004 +0200 Improved documentation. darcs-hash:20040403151102-a3a09-92799927623603d2b2125ccd2cf97f194269eb56.gz commit 4ae9796ce590ecade6fe2e2dc002ddf80887e068 Author: rwx Date: Sat Apr 3 17:10:45 2004 +0200 Most of the program's inner workings have changed. Now halberd works with a few Most of the program's inner workings have changed. Now halberd works with a few important scanning concepts: tasks, strategies and a pool of scanner threads. All the code has changed to accomodate this. Distributed scan code is MUCH more reliable and robust now and the whole program is easier to modify and script. darcs-hash:20040403151045-a3a09-49686480eaaac2a7a8824e5d6f96b49c5d381673.gz create mode 100644 Halberd/RPCServer.py create mode 100644 Halberd/ScanTask.py create mode 100644 Halberd/crew.py commit 7211ab79906e540f2c4ee0410794972eb868c715 Author: rwx Date: Sat Apr 3 17:07:29 2004 +0200 Changed to use the new framework. darcs-hash:20040403150729-a3a09-ed7b52f59a3a8d30005a804c10a5ebcefcc88734.gz commit 515da329e2d9fdd4ced705a6e9e995ae8ffa9aa5 Author: rwx Date: Sat Apr 3 17:06:14 2004 +0200 Made some corrections. darcs-hash:20040403150614-a3a09-1f1ba5318e68dd35225db1770ebf34c2103a50fa.gz commit 2adde5b10986901dfe4de919a1453b3c22185fa3 Author: rwx Date: Sat Apr 3 17:05:54 2004 +0200 Modified to generate documentation of modules and not scripts. darcs-hash:20040403150554-a3a09-ff772c802d62e24b7bc013db5f51337c5ae9f3dd.gz commit ffc59bcfea681de243a281f5524c3f89726a3eb1 Author: rwx Date: Sat Apr 3 17:05:18 2004 +0200 No longer needed. darcs-hash:20040403150518-a3a09-83a87f79a575450aec0a88bcfa79eae55596924b.gz delete mode 100755 Halberd/shell/core.py commit d2e968edf7f9a9aab3c77c32dac75fbcb0a629e1 Author: rwx Date: Sat Apr 3 13:50:01 2004 +0200 Replaced by hlbd.RPCServer darcs-hash:20040403115001-a3a09-241e273a39235339a9f07314d0be2e9c51d43d4a.gz delete mode 100644 Halberd/rpclib.py commit 2474c65af979dbda3da9e33aa9a048229194fabc Author: rwx Date: Thu Apr 1 14:28:06 2004 +0200 Superseded by hlbd.crew darcs-hash:20040401122806-a3a09-02c31add5461ad0ae1f7e6dcef05c2affbaea48f.gz delete mode 100644 Halberd/scanlib.py commit 59c29865faaaa9e568391404588ef07c36747f18 Author: rwx Date: Tue Mar 30 16:46:44 2004 +0200 No longer needed. darcs-hash:20040330144644-a3a09-d910668a0f38048b0ed79a80caa0579e35ba99cf.gz delete mode 100755 Halberd/shell.py commit b92b79d469c93ee241d3d71c34b45d02191e5fea Author: rwx Date: Tue Mar 30 11:25:43 2004 +0200 No longer needed. darcs-hash:20040330092543-a3a09-01770d15f75e2df2d6864ea3007112a39ac4f228.gz delete mode 100644 Halberd/shell/factory.py commit d3cd44866452db498c2709e5cd0bb8225c805395 Author: rwx Date: Mon Mar 29 12:36:56 2004 +0200 Fixed the RPC server entry point. darcs-hash:20040329103656-a3a09-5b13d9fbb447ef08947cfb543deca4a3ef56efbd.gz commit c497c4ea437cebe5039b77376dda14d184a196fa Author: rwx Date: Mon Mar 29 12:36:22 2004 +0200 Analyze clues prior to reporting. darcs-hash:20040329103622-a3a09-cf76c89ebe0f1b3cef0feb55332cd0010435fbbb.gz commit 085302c8de47322d0c63b2d6cc0bffecc7c6397b Author: rwx Date: Mon Mar 29 12:35:57 2004 +0200 Fixed an incorrect assertion. darcs-hash:20040329103557-a3a09-9a53b3b6302013b2acbb793c9833d3032fa15364.gz commit 7a25f85ea715642bc9031375adb832e00d04dfac Author: rwx Date: Mon Mar 29 11:56:04 2004 +0200 Removed bulkscan.py. Removed bulkscan.py. Now mass scans can be launched with the --urlfile option of halberd.py darcs-hash:20040329095604-a3a09-2a17711d257b8f20fac3a7018033525f4385e9d8.gz commit 2c0fbd5d44064f23df797f6d68e61afcb8382038 Author: rwx Date: Mon Mar 29 11:54:33 2004 +0200 Modified to use hlbd.shell darcs-hash:20040329095433-a3a09-140072e2f4c7bb2b02174538e239289e9632504e.gz commit 6b6c8b3ae931a4956c81cb80048a639b30bd0061 Author: rwx Date: Mon Mar 29 11:54:21 2004 +0200 Fixed a problem with default values. darcs-hash:20040329095421-a3a09-360c1419628165fa1652b2f28b35a95abeb0a524.gz commit 6a8f3d2e7cac90bbb34c034546cd7fd10d7b6455 Author: rwx Date: Mon Mar 29 11:49:06 2004 +0200 Wrote the hlbd.shell package. It deals with creation and use of different types Wrote the hlbd.shell package. It deals with creation and use of different types of scanners so they can be easily instantiated in several ways. It also eases the construction of meta-scanners. darcs-hash:20040329094906-a3a09-8b770b1374a9f652fb4928253e368772a064a26d.gz create mode 100644 Halberd/shell/__init__.py create mode 100755 Halberd/shell/core.py create mode 100644 Halberd/shell/factory.py create mode 100644 Halberd/shell/strategy.py commit 9c6c03955fdefa4723ffe56e69c74e814bd7e316 Author: rwx Date: Mon Mar 29 11:47:53 2004 +0200 Added the hlbd.shell package to the distribution. darcs-hash:20040329094753-a3a09-717a1b075b03d692b78acd83f5e1503ea5808cb8.gz commit 14539267e93f8bf86d54664b4ca09bc85037a4e9 Author: rwx Date: Mon Mar 29 11:47:06 2004 +0200 Slight output changes. darcs-hash:20040329094706-a3a09-1a6ef47c7c705bd4c636b0fd57b66efa307d1896.gz commit 3e5cb983dbde3092ad2e66c7326d3acb5c43af05 Author: rwx Date: Mon Mar 29 11:46:15 2004 +0200 Implemented a ClueDir class which deals with hierarchical storage of clues Implemented a ClueDir class which deals with hierarchical storage of clues using the underlying filesystem. darcs-hash:20040329094615-a3a09-50e54b6d16c87eda52620a4a88c7020b8a6dc100.gz commit 79504f930b9a6efff756da8996b19d3e0ad83d17 Author: rwx Date: Mon Mar 29 11:45:27 2004 +0200 No longer needed. Its functionality has been integrated with the main script No longer needed. Its functionality has been integrated with the main script (halberd.py) darcs-hash:20040329094527-a3a09-dc8dc7defa4624ea61805b04af1b6dae05c86bfc.gz delete mode 100755 bulkscan.py commit 56c767b2b4f1788e39d290cc2b194dde9e5a27fb Author: rwx Date: Fri Mar 26 01:49:05 2004 +0100 Improved the portability of the clue storage scheme. darcs-hash:20040326004905-a3a09-a74c45a50893f2d0d4055e5aa9965f514364e7b0.gz commit 0f6b194cb6fc327221e90722911eac5ab944c7d5 Author: rwx Date: Fri Mar 26 01:48:25 2004 +0100 Moved the halberd class into the shell module so it can be easily reused by Moved the halberd class into the shell module so it can be easily reused by meta-scanners. darcs-hash:20040326004825-a3a09-9f6cd35b38158555249e7ce72f861be18f59262e.gz create mode 100755 Halberd/shell.py commit bc385e0a326201382017826b191f32adb19afbdb Author: rwx Date: Fri Mar 26 01:46:49 2004 +0100 Minor corrections. darcs-hash:20040326004649-a3a09-b99eebb7a6b0436300d0c79d0a763c9c74c359be.gz commit 16e116717695b601e8a8d8914bf219766256741b Author: rwx Date: Sun Mar 7 11:29:32 2004 +0100 Takes generated HTML from reST into account. darcs-hash:20040307102932-a3a09-ea328bf8873bf80a866555a2c47156a1267bf089.gz commit 7d20b7c03266806ac6506046b653f8956f533db2 Author: rwx Date: Sun Mar 7 11:29:14 2004 +0100 Now generates HTML from reStructuredText documents. darcs-hash:20040307102914-a3a09-a51534b3a660dddb24159ef1358225e8a9f04348.gz commit 4671c96b25e2b8730cb36ea662d340661fa009fa Author: rwx Date: Sun Mar 7 11:27:51 2004 +0100 Converted to reStructuredText. darcs-hash:20040307102751-a3a09-073e0e4b77035bc533b8ca362dc887e4375e9b20.gz commit ff72b1ec4c3ee2903c547f1eb313a7f2e67574c1 Author: rwx Date: Sat Mar 6 11:28:21 2004 +0100 Corrected misspelling. darcs-hash:20040306102821-a3a09-594d0b8a9500096ad3632f54cef53412bb76bc2d.gz commit 111d8769a7b181336778ecdb2f121f173ae83f39 Author: rwx Date: Sat Mar 6 11:27:23 2004 +0100 Improved documentation. darcs-hash:20040306102723-a3a09-4f86f82a8849fa7856a47bcbe562c23046cbe934.gz commit 72b672f879894c8b2cf837d58c313f02d5b9b245 Author: rwx Date: Sat Mar 6 11:27:09 2004 +0100 Small changes to package description. darcs-hash:20040306102709-a3a09-1d5b0d01e6372f0b9db6361372402ab6a0236daf.gz commit 3359d055d710f9fdc64de16edde4e3c73497ba91 Author: rwx Date: Sat Mar 6 11:26:26 2004 +0100 Removed unneeded dependency. darcs-hash:20040306102626-a3a09-84d3a1a6531afb137ab106af32993f721a23fb1b.gz commit 53f83e9ed10f68428c0e5d60019bd37eb0823f41 Author: rwx Date: Sat Mar 6 11:26:13 2004 +0100 Corrected a mistake in the way doctests were executed. darcs-hash:20040306102613-a3a09-24d4e1a251141d9f8c1c85c835aa06fa495d6a5b.gz commit aaca181346fc3f4d0731b5c6f5e9d486c48e187a Author: rwx Date: Sat Mar 6 11:25:16 2004 +0100 Added new tests. darcs-hash:20040306102516-a3a09-3e5ff21cd65ae21bd38540daad2eb091114ba947.gz create mode 100644 tests/data/hotwired.lycos.com.clu create mode 100644 tests/data/login.passport.net.clu create mode 100644 tests/data/www.ask.com.clu create mode 100644 tests/data/www.cdrom.com.clu create mode 100644 tests/data/www.comcast.net.clu create mode 100644 tests/data/www.macromedia.com.clu commit 3aac747d6a4d5ca88ea08998b976fce5cea0d7da Author: rwx Date: Sat Mar 6 11:25:02 2004 +0100 Added some new site tests. darcs-hash:20040306102502-a3a09-2b9b7c4f57a4f25f3315a122f5b30fbe96557abf.gz commit 6cf7fe8a0be01d7db53b5913f4000bb175d35970 Author: rwx Date: Fri Mar 5 01:31:50 2004 +0100 Slight user interface changes. darcs-hash:20040305003150-a3a09-c3d53550307f4feda75532c98efbc8ae7090a24b.gz commit 0d28eabc7a499ba45f9c45775afd253cdf857a60 Author: rwx Date: Fri Mar 5 01:30:55 2004 +0100 Corrected a slight mistake in the help output. darcs-hash:20040305003055-a3a09-e0c5be938605ce3eb4ca0ebf5a6016b8b8d77d4a.gz commit 62a052969ef1a2202a9ce754676b81be99eab3cd Author: rwx Date: Fri Mar 5 01:29:08 2004 +0100 Split TestHTTPSClient.testConnect in two for clarity. darcs-hash:20040305002908-a3a09-9020a2c4d626744840314ab2af026436269a0100.gz commit 0d68313884098e8fa7088a815caf4a1458814340 Author: rwx Date: Thu Mar 4 17:09:25 2004 +0100 Corrections. darcs-hash:20040304160925-a3a09-5827fa839a56d1abe02617035b6feb33e7a765e3.gz commit a7a8daaa2082ad4325239ae3339bd1e7d90d3768 Author: rwx Date: Thu Mar 4 12:56:38 2004 +0100 Updated to take doc/overview.txt into account. darcs-hash:20040304115638-a3a09-1b42f74b697daa0f82690a97156a0f6ac3ea6f7a.gz commit c721f3338f9447444c133f1c70fe722663389071 Author: rwx Date: Thu Mar 4 12:55:57 2004 +0100 Modified to use hlbd.clues.file. Now clues resulting from a massive scan are Modified to use hlbd.clues.file. Now clues resulting from a massive scan are stored in a platform independent way. darcs-hash:20040304115557-a3a09-2e131ed361c1e394a03c94ca68f8f02e7a372b0e.gz commit 8779697db8e054e133569fa33463cb3d9a62d554 Author: rwx Date: Thu Mar 4 12:53:26 2004 +0100 Initial serious documentation. darcs-hash:20040304115326-a3a09-508dcc6c06cb6ed4800cb311d50730dda371eb72.gz create mode 100644 doc/overview.txt commit c34554e92318a219b2104ab48ab0e1b93714361d Author: rwx Date: Wed Mar 3 16:44:39 2004 +0100 Fixed a small glitch. darcs-hash:20040303154439-a3a09-87bc3fdcdc1c7ee96d8f0aca19ce7eef05b72b0a.gz commit d2e38b1e8b7b036c4e98d4b512bfe5fa60427c55 Author: rwx Date: Wed Mar 3 16:22:02 2004 +0100 hlbd.clues.Clue.normalize has changed so testNormalize had to take the changes hlbd.clues.Clue.normalize has changed so testNormalize had to take the changes into account. darcs-hash:20040303152202-a3a09-0900a7b9644c2a74f0f86256250cd244e124bc6a.gz commit 393e56ab01d9a379674da0bd9412f2bd0fd99387 Author: rwx Date: Wed Mar 3 16:21:23 2004 +0100 hlbd.clues.Clue.normalize uses a translation table from hlbd.util to convert hlbd.clues.Clue.normalize uses a translation table from hlbd.util to convert strings into method names. hlbd.clues.analysis had to be fixed to allow the doctest suite to run properly. darcs-hash:20040303152123-a3a09-a3ed8764f9f9102f4eebf8bb3650609ac7600e59.gz commit b5d4a9eb6f36a2e6eee577d1bd4b67454c39320e Author: rwx Date: Wed Mar 3 16:20:03 2004 +0100 Added a new module hlbd.util. It will act as a placeholder for functions that Added a new module hlbd.util. It will act as a placeholder for functions that don't fit well anywhere else. darcs-hash:20040303152003-a3a09-76174194f71393e867490e874490164ee0b28e7d.gz create mode 100644 Halberd/util.py commit e78de1e0b7f67370989c28d666b7131e5ca4e3eb Author: rwx Date: Wed Mar 3 14:31:47 2004 +0100 Now halberd.readConf catches OSError in case os.mkdir fails when creating a Now halberd.readConf catches OSError in case os.mkdir fails when creating a default configuration file. darcs-hash:20040303133147-a3a09-45335f91f18db2fa6b742fb729e369c758324fb2.gz commit dbf80f56eb33d9fb17f717922697f8cfbda802dd Author: rwx Date: Wed Mar 3 14:21:19 2004 +0100 Early implementation of a proxy. darcs-hash:20040303132119-a3a09-202f2a33e7b381c0acd1fa34d71e1b2f5cdd1037.gz create mode 100644 Halberd/proxylib.py commit efb7f1053158d53912b43c25988a3464fdfef9bb Author: rwx Date: Wed Mar 3 14:05:22 2004 +0100 Further readability improvements in HTTPClient._getReply. Further readability improvements in HTTPClient._getReply. Modified HTTPSClient._connect's interface for correctness' sake. darcs-hash:20040303130522-a3a09-624d7b3346a498a36c48cfab424ddb805d97ae1b.gz commit 36d6df25bba4094c8022e83af921e918be8fb5d6 Author: rwx Date: Wed Mar 3 13:53:32 2004 +0100 Improved error reporting. Improved error reporting. Now scan_thr catches the appropriate exception when an SSL connection can't be established. darcs-hash:20040303125332-a3a09-5224a93d6dd1a5f831e6f831b07b338317bd3ad7.gz commit 9beabc0dce5fa388ca09d6c5f477aedd83a92f60 Author: rwx Date: Wed Mar 3 13:52:08 2004 +0100 Introduced an HTTPClient._timeout_exceptions attribute for dealing with SSL Introduced an HTTPClient._timeout_exceptions attribute for dealing with SSL exceptions when reading from the network. darcs-hash:20040303125208-a3a09-9f4493a9b8e7a47b509433b507725e88bc147e7b.gz commit 2405f47f1155ff259150595d141e711c17dd4dd6 Author: rwx Date: Wed Mar 3 13:11:55 2004 +0100 Replaced by default configuration file. darcs-hash:20040303121155-a3a09-08842f462250d01a4d8cd1b5b5c0a6d7cad81300.gz commit 1eb9b3e8850d931d5cca3a3267b294f56598d7c9 Author: rwx Date: Wed Mar 3 13:11:27 2004 +0100 Slight changes to target distclean. darcs-hash:20040303121127-a3a09-2a69d699ad5310908f0eb34e787cc11b3a48b9c2.gz commit 27dd993ac6f732bd0c0dd6f793061d8fc274d3fc Author: rwx Date: Wed Mar 3 13:11:08 2004 +0100 Increaded verbosity while running the test suite. darcs-hash:20040303121108-a3a09-503c145e25b1360547f1f2c97153a9ed62c48881.gz commit e59a8284acd23752792223cd1e1bf74ff2c8b86f Author: rwx Date: Wed Mar 3 13:10:35 2004 +0100 Improved logging information. darcs-hash:20040303121035-a3a09-3c69495e7cf838f7bf887f7621242bc399479b5b.gz commit 69455fc22185028fb436a6120417099f65f374df Author: rwx Date: Wed Mar 3 13:10:20 2004 +0100 Fixed a typo. darcs-hash:20040303121020-a3a09-a6ec733f079aa6362ca30d9eb616ec071dce5758.gz commit ab4248bec552b1cb0430a1b645213804fa719be9 Author: rwx Date: Wed Mar 3 12:38:10 2004 +0100 Added a test for http://www.tripod.com darcs-hash:20040303113810-a3a09-e055828fe3bb26cbb8fc6f3a364e50d0f99948e9.gz create mode 100644 tests/data/www.tripod.com.clu commit 3b25e8ed3028a331e2c5956588674453a21bc423 Author: rwx Date: Wed Mar 3 12:36:42 2004 +0100 There were two bugs fixed in hlbd.clientlib.HTTPClient._getReply. One of the There were two bugs fixed in hlbd.clientlib.HTTPClient._getReply. One of the related to a condition leading to an endless loop while reading from the network and the other had to do with buffer sizes and being able to find the end of MIME headers. Wrote a test to prevent the fixed bugs from being reintroduced in the future. darcs-hash:20040303113642-a3a09-c7c1e0449fad39b5fe3a5847e3d1622eb01cce58.gz commit 1789a05c736037ca393bc34da230478a63fcdc7d Author: rwx Date: Wed Mar 3 12:33:52 2004 +0100 Fixed two bugs in HTTPClient._getReply. Fixed two bugs in HTTPClient._getReply. Removed the wildcard exception handler in HTTPClient._getReply. darcs-hash:20040303113352-a3a09-bff628d570ca7b527404d7434abca6d9ab426154.gz commit 94fda24917924f9dc679d35821304b1a80f0703b Author: rwx Date: Wed Mar 3 10:38:17 2004 +0100 Made several minor corrections. darcs-hash:20040303093817-a3a09-1665cf94bb5bda7aeaa29ad297822b0d8111247b.gz commit ebf76d8f6a14ab989702eee40d74d6217c7464a2 Author: rwx Date: Wed Mar 3 10:29:53 2004 +0100 Corrected documentation. darcs-hash:20040303092953-a3a09-28b6843973180b29a66ba407e94eae4fd7d7da05.gz commit 30b2bef89f995a592b7f99a8ce86ddd23387f96c Author: rwx Date: Wed Mar 3 10:29:28 2004 +0100 Updated documentation. darcs-hash:20040303092928-a3a09-6532713fe04c3e312fdd81f5a860eab5e134f3a6.gz commit 9657a898af1c093a04150a425c78e23c9bb2acd0 Author: rwx Date: Wed Mar 3 02:14:48 2004 +0100 Clue.parse now accepts strings in unicode too. darcs-hash:20040303011448-a3a09-a505d40225f270719a64c27fd80c6080cdf03850.gz commit e94c8dae6ba8a9b604a4cfd00879e891908bc0d2 Author: rwx Date: Wed Mar 3 01:04:40 2004 +0100 Creates a default configuration file automatically when there is none at the Creates a default configuration file automatically when there is none at the default path. darcs-hash:20040303000440-a3a09-85776e90bce1622601378085b0408fab8d54088d.gz commit 5833a5d50b0c28c33959d35eabab69cc17bec4be Author: rwx Date: Wed Mar 3 01:03:55 2004 +0100 Implemented ConfReader.writeDefault to write a bare-bones configuration to the Implemented ConfReader.writeDefault to write a bare-bones configuration to the specified file. darcs-hash:20040303000355-a3a09-2b01c6d5ab53c65a33f0f9d19606263c12dd520a.gz commit cc2a65534d3f0ef89cf4b77dd2d24d475a6ab1b6 Author: rwx Date: Tue Mar 2 12:57:57 2004 +0100 Now it checks if the server responds with HTTP right at the beginning and if it Now it checks if the server responds with HTTP right at the beginning and if it doesn't aborts without reading anymore data from the network. darcs-hash:20040302115757-a3a09-3a7fb2c1c3ed9b7297ecfb071680f0fc366696eb.gz commit d66fcd5fead0034f97e8778020bcbdc14676ecb4 Author: rwx Date: Tue Mar 2 12:56:42 2004 +0100 Removed wrong default path for halberd.cfg darcs-hash:20040302115642-a3a09-2c0b077bd8a732889054b6c7c73e64d9ef64a03d.gz commit e1ad28b0ba6f855aaf2db08b96a83d503292e67c Author: rwx Date: Tue Mar 2 03:12:21 2004 +0100 Improved documentation. darcs-hash:20040302021221-a3a09-caa65c404239066e073986abf72d26e2c113029b.gz commit 5539dfba6c044a2d5826111789a24ccd981436ca Author: rwx Date: Tue Mar 2 03:12:04 2004 +0100 Wrote the remove function and did some slight changes to several targets. darcs-hash:20040302021204-a3a09-8e10c3337541d8f09e05c7a24a8703ea01da7b5d.gz commit 86e83018438b102bcd0519987d57ff52e4f28308 Author: rwx Date: Tue Mar 2 03:10:30 2004 +0100 Instead of instantiating hlbd.clientlib.HTTPClient we now call an abstract Instead of instantiating hlbd.clientlib.HTTPClient we now call an abstract client factory which returns the appropriate client class depending on the target URL. darcs-hash:20040302021030-a3a09-5e5c7a27336a0923a530bb98cf51d71e545c4775.gz commit 0505aab944044592286163a305022be32dcf4bad Author: rwx Date: Tue Mar 2 03:08:38 2004 +0100 Now `import hlbd.clues' will also take hlbd.clues.file into account. darcs-hash:20040302020838-a3a09-78aee3abb7baf12997745cc2fbc0b46073bea01c.gz commit 674852b9b997dd19a937f7f7d917ff71ba3c0ea7 Author: rwx Date: Tue Mar 2 03:07:16 2004 +0100 Wrote test for hlbd.clientlib's SSL support. darcs-hash:20040302020716-a3a09-695c79a22e9632112b8eea60d1b4593b7a09f0c1.gz commit db2b13f265fde29946daee28fc778ab12fe06049 Author: rwx Date: Tue Mar 2 03:07:01 2004 +0100 Implemented SSL support. darcs-hash:20040302020701-a3a09-20f0bfe773d44cfe572d71a55310aa3a4a02b076.gz commit f12f03f5c2499b80f5a0e8a9f08d13f661615468 Author: rwx Date: Tue Mar 2 01:55:54 2004 +0100 Wrote a better README file darcs-hash:20040302005554-a3a09-4f3a55e728aad4d7371a5410e1a5ec9a8682d903.gz commit 6dbc55e908a3d7149e7dc96b1184b3fb1916f9ed Author: rwx Date: Tue Mar 2 01:55:07 2004 +0100 halberd.cfg is installed in the user's home directory. darcs-hash:20040302005507-a3a09-4c3b5307d7c7395d86e512b1a6d7b7af122f0ab4.gz commit 7faff814381bfdbb1890b92e56dd2115740997b7 Author: rwx Date: Tue Mar 2 01:54:02 2004 +0100 Changed the return values for hostname and addresses when errors are found so Changed the return values for hostname and addresses when errors are found so that their callers can deal with them better. darcs-hash:20040302005402-a3a09-34994172f70e2241c95bd9c21c3bf0df694ef931.gz commit 48a3eb774968a49dcfbcd388d691de6c851ce5be Author: rwx Date: Tue Mar 2 01:52:26 2004 +0100 Fixed a bug in HTTPClient._getHostAndPort. Fixed a bug in HTTPClient._getHostAndPort. Initial work towards a useful HTTPSClient class. darcs-hash:20040302005226-a3a09-e611d7e15aaba9cda82af7255629c8230202026b.gz commit 8e81b5ddf1c1d193aea4283fcc0b23ff017e592b Author: rwx Date: Thu Feb 26 05:15:03 2004 +0100 Fixed several glitches in the makefile and added it to the distribution. darcs-hash:20040226041503-a3a09-c2ca3d0f3af2af73429fdbb94edab439e71a3728.gz commit 16082feb651f82f3209bb345041e0900c2977c77 Author: rwx Date: Thu Feb 26 05:12:40 2004 +0100 Made some changes to conduct the tests faster. darcs-hash:20040226041240-a3a09-cd1648dff4431259581992e68d2bb1641f277dc3.gz commit 3e0fc410d4cf9a44c238e5dca9de2c27bc8b8f38 Author: rwx Date: Wed Feb 25 12:15:02 2004 +0100 Added an initial unit test for the main class (Halberd) darcs-hash:20040225111502-a3a09-66e506d141557942b857c0d5b7e74f2052fe5739.gz create mode 100644 tests/test_halberd.py commit 256008bfc8914605daebf8998fba3237bc171fbb Author: rwx Date: Wed Feb 25 12:10:00 2004 +0100 Moved the second phase analysis code into hlbd.clues.analysis.reanalyze. Moved the second phase analysis code into hlbd.clues.analysis.reanalyze. Modified clue load/save calls so that the new hlbd.clues.file functions are used. darcs-hash:20040225111000-a3a09-03cac1e57527d8dca2a7c89a3e8d4b97081f5f88.gz commit 46490bb021d304ffc3d466b5e695984597acd8de Author: rwx Date: Wed Feb 25 12:08:22 2004 +0100 Wrote documentation for the second phase analysis function. darcs-hash:20040225110822-a3a09-0a80834ded4f78121373d4de697d0f4c6fea80f5.gz commit 08f83d60046e0a117c6cbe5a1fa428a4933b4398 Author: rwx Date: Wed Feb 25 12:07:51 2004 +0100 Put an upper bound to the number of clues so the test suite runs faster. darcs-hash:20040225110751-a3a09-5ec07ba81080b8a873db066636e45c181a07d49f.gz commit a1afcfad0c4225c02db2e20a604cc6ad608c7ec4 Author: rwx Date: Wed Feb 25 05:01:29 2004 +0100 Wrote test suite for hlbd.clues.file darcs-hash:20040225040129-a3a09-0b538f1d26f78d6b7a06e47eeb9e8ec7716a67e8.gz create mode 100644 tests/test_clues_file.py commit c27a7630f9e38c5d7a3ba23749ce01c9b50b2bf8 Author: rwx Date: Wed Feb 25 05:01:15 2004 +0100 Wrote platform-independent clue storage functions. darcs-hash:20040225040115-a3a09-02032f6bc294f787f1b0bc74928c00668e3366ff.gz create mode 100644 Halberd/clues/file.py commit 36cd8e86a86fa462661c3632c548ce1e5d8f4fba Author: rwx Date: Wed Feb 25 04:53:04 2004 +0100 Auxiliary files for hlbd.clues.analysis' test suite. darcs-hash:20040225035304-a3a09-a4bc0057ab3b90170702625eb00fdf2f484db59b.gz create mode 100644 tests/data/agartha.clu create mode 100644 tests/data/email.excite.com.clu create mode 100644 tests/data/www.barclays.es.clu create mode 100644 tests/data/www.dmoz.org.clu create mode 100644 tests/data/www.ebay.com.clu create mode 100644 tests/data/www.pogo.com.clu create mode 100644 tests/data/www.pricegrabber.com.clu create mode 100644 tests/data/www.register.com.clu create mode 100644 tests/data/www.sohu.com.clu create mode 100644 tests/data/www.synnergy.net.clu create mode 100644 tests/data/www.yesky.com.clu commit 53cce8364ea6ff3b0e397bcac146c8bd7dd1e48e Author: rwx Date: Wed Feb 25 04:52:46 2004 +0100 Wrote initial test suite for hlbd.clues.analysis darcs-hash:20040225035246-a3a09-6d31e61e55e389dc368813fa31d3cbb374c93745.gz create mode 100644 tests/test_clues_analysis.py commit 2548bf78b33460e0d04720b76101d4ad56d5edbc Author: rwx Date: Wed Feb 25 04:52:20 2004 +0100 Moved second stage analysis functionality into this module. darcs-hash:20040225035220-a3a09-4ddc2150efd63a48618b050e3385f187d62f92e9.gz commit 0cd081542ce8944f1368e52b59e2cf9959262fee Author: rwx Date: Wed Feb 25 04:51:18 2004 +0100 Fixed a bug regarding Clue.__tmphdrs darcs-hash:20040225035118-a3a09-efa9b90df639e7df5b818f8affdb5af6ada06d53.gz commit 58a2f3ba627db545eb9745029132360ac798a84f Author: rwx Date: Wed Feb 25 02:37:59 2004 +0100 No longer relevant. darcs-hash:20040225013759-a3a09-11231c287d17fa7d8149cb8a450d9b531e7c939a.gz delete mode 100644 tests/test_cluelib.py commit 72ca857d727e77f0bf7564074675285315672703 Author: rwx Date: Wed Feb 25 02:36:48 2004 +0100 Fixed a bug in ignore_varying_fields. darcs-hash:20040225013648-a3a09-1fb3a9eff37a3244ed150a538cd2c8e1156e459e.gz commit f006a1e5caa46c84d237be0ff0b83d217a8991dd Author: rwx Date: Wed Feb 25 02:36:16 2004 +0100 Renamed test_cluelib.py as test_clues_Clue.py. Renamed test_cluelib.py as test_clues_Clue.py. Added a test for invalid digest recomputations. darcs-hash:20040225013616-a3a09-7f8a30ac4b19e1e711723af6eeafdbf791354f27.gz create mode 100644 tests/test_clues_Clue.py commit 8fa57227b3c7d9aa29fc98ac799fbc346fdef602 Author: rwx Date: Wed Feb 25 02:35:33 2004 +0100 Modified Clue.parse to accept headers as either a string or a sequence of name, Modified Clue.parse to accept headers as either a string or a sequence of name, value tuples. Added an assertion to ensure Clue._updateDigest is not called without a previous call to Clue.parse. darcs-hash:20040225013533-a3a09-c2021dd4ea02a589b65c2f017910da4a49026b11.gz commit 24f0848a59478a629527a61b8724e97ce7876160 Author: rwx Date: Wed Feb 25 02:30:23 2004 +0100 halberd.hostname now returns None in case the passed URL doesn't have a netloc halberd.hostname now returns None in case the passed URL doesn't have a netloc part. bulkscan has been updated to check for that error condition. darcs-hash:20040225013023-a3a09-d3429041bccc3c85c0b2a04c89a836abc5aa130c.gz commit 9132a8aa8639dfbb91f4a0feb39b61f9eafb7cba Author: rwx Date: Fri Feb 20 13:55:21 2004 +0100 Adapted to the new results coming out of hlbd.clues.analysis.diff_fields darcs-hash:20040220125521-a3a09-b71d1843fbc134eb3b628b6b9206cd42ffe001c6.gz commit d5ae1beee550e6e456b31c0db24df2fdd5fdbf65 Author: rwx Date: Fri Feb 20 13:54:36 2004 +0100 Removed the percentage information from the results of diff_fields darcs-hash:20040220125436-a3a09-52e5e009bc5f92287c8ab8163f68893320c2289b.gz commit 0d9e61da7e360af979149eab45354290307014e1 Author: rwx Date: Fri Feb 20 13:50:46 2004 +0100 Allow the user to specify the destination directory. Allow the user to specify the destination directory. Fixed a bug which involved modifying directly the address attribute without taking into account the list of clues (which had to be emptied). darcs-hash:20040220125046-a3a09-16072092c6a61f6f1f1d0159ef263310ec696a38.gz commit 740b4ea0f9fc827b8a9cc11022cbc81fd6436e27 Author: rwx Date: Fri Feb 20 10:35:10 2004 +0100 Calls hlbd.clues.analysis.hits to obtain the total number of replies. darcs-hash:20040220093510-a3a09-32e13e21929152b25ff5a1469bbdfa6ad695f470.gz commit 9dcafade796dd34430edfc0ac8c42027abd730ef Author: rwx Date: Fri Feb 20 10:34:50 2004 +0100 Address the case of a web site producing different MIME fields in its headers Address the case of a web site producing different MIME fields in its headers for each request. There's a routine that calls hlbd.clues.analyze.ignore_changing_fields and reanalyzes the clues after doing so. darcs-hash:20040220093450-a3a09-bac20343a1e451a4c434497c92408c0b55594e25.gz commit 851e2da211ebb98b4e46c1b95f4211602cb1dca3 Author: rwx Date: Fri Feb 20 10:30:39 2004 +0100 Implemented ignore_changing_fields and hits. darcs-hash:20040220093039-a3a09-455fd485e1bc38da478d20a927a87a8409097e09.gz commit a9743c36a8c32cffdee093db45d3f9b7f2036c53 Author: rwx Date: Thu Feb 19 16:01:55 2004 +0100 Modified in order to distribute bulkscan.py darcs-hash:20040219150155-a3a09-8e1367bfe23abc1e182fe9652078856126285991.gz commit 4671e4cf40bebc40f90d10f952338130920b9325 Author: rwx Date: Thu Feb 19 16:00:29 2004 +0100 Refactored Halberd class. Refactored Halberd class. Now scanning all the addresses of a host with DNS RR is the default behaviour. darcs-hash:20040219150029-a3a09-302f74014ad77b1da70bb6c09c1bc4c19ae15fd2.gz commit 124bfd16e1aab4483be63acd4f10f2cff9b474ad Author: rwx Date: Thu Feb 19 15:59:37 2004 +0100 Displays all the cookies instead of just one. darcs-hash:20040219145937-a3a09-56903349cf728ca7da2340f106e5554d5bdddf86.gz commit 4a9b235a741ae65c5fba059edb74fef727a8228d Author: rwx Date: Thu Feb 19 15:59:11 2004 +0100 Improved exception info. darcs-hash:20040219145911-a3a09-0a6ca741b90676ce39dfcc36e9ced6139ab06fc3.gz commit a7010ca5bb41749105aeb4fdcaae62f47cfedbcf Author: rwx Date: Thu Feb 19 15:58:39 2004 +0100 Improved status information. darcs-hash:20040219145839-a3a09-da8536e1365a9fe8325f8d2cb04ffd8346430def.gz commit 2e7f20a9fee53e65d8ab724832274d6e4b5fd3c2 Author: rwx Date: Thu Feb 19 15:58:14 2004 +0100 Caught hlbd.clientlib.UnknownReply exception. Caught hlbd.clientlib.UnknownReply exception. Modified the status information so it fits better with multiple address scanning. darcs-hash:20040219145814-a3a09-cc26ea7e560c5db4ad37eb0833a2650e5b71de15.gz commit 2554dce2390b0e23598a487103ffd3ba6c0d8f38 Author: rwx Date: Thu Feb 19 15:57:25 2004 +0100 Cookies are now stored in a list so all of them are kept. darcs-hash:20040219145725-a3a09-a6203cdfab153d515ff15e0809e2b6eaa777f3e8.gz commit d7a4e5783803086b7266a4d97c955fdc7c908f43 Author: rwx Date: Thu Feb 19 15:56:23 2004 +0100 Fixed some documentation mistakes. darcs-hash:20040219145623-a3a09-0a2b41e12c3f3578a75b88408257fc49e1ac8a70.gz commit 831bbc68c5eb8f339c482e79a833dba8b4a8a6a5 Author: rwx Date: Thu Feb 19 15:55:33 2004 +0100 Wrote a tool for massive scanning. darcs-hash:20040219145533-a3a09-989dcd862cf6f14cafe9fd8da7bbdf8ab8905999.gz create mode 100755 bulkscan.py commit 614fceaebdd55ff1fe56a81665d9afc2e5389d49 Author: rwx Date: Sun Feb 15 20:54:52 2004 +0100 Added classifiers for the Trove software map (used by PyPI). darcs-hash:20040215195452-a3a09-4448fb3a84cd3fa4037a1111dc167aad98b63d0f.gz commit cd851d69a8608e7929b2630df6ac8a3942b575e8 Author: rwx Date: Sun Feb 15 19:57:14 2004 +0100 Added a target for LOC count. darcs-hash:20040215185714-a3a09-eb5180633327800deaddea25923540aa3d433891.gz commit 7c22cf75a47b730157050c406e93969e4856d429 Author: rwx Date: Sun Feb 15 19:56:54 2004 +0100 Now merge creates a new clue instead of using the first item of the passed Now merge creates a new clue instead of using the first item of the passed sequence. This allows filter_proxy to work without destroying the original list of clues. darcs-hash:20040215185654-a3a09-d627c472469e5eac934f192894b6629dd789a067.gz commit 99f0de7e18a5fb58d113e2eb7967cf3e51e40f75 Author: rwx Date: Sun Feb 15 19:17:15 2004 +0100 Now includes itself in the distribution. darcs-hash:20040215181715-a3a09-ea09989df0e9dce97290228b781fdcc208c90ff9.gz commit a55e8fc39ffd7a8e748147e4cb0c820b8f03f28e Author: rwx Date: Sun Feb 15 19:02:35 2004 +0100 Moved tests to another directory following the changes in setup.py darcs-hash:20040215180235-a3a09-de460e9b10011936194d823b47a69560a2852eff.gz commit 23cef688a49cf2af96fe8aabc90e75cd9400f40a Author: rwx Date: Sun Feb 15 19:02:00 2004 +0100 Moved testing framework into setup.py darcs-hash:20040215180200-a3a09-c8c674fd418327160e092cfe31cc34f5846a752f.gz commit 58277119650829683b9c9d71ed29cf0a7cf0b952 Author: rwx Date: Sun Feb 15 19:01:25 2004 +0100 Adapted to the current layout of hlbd.clues darcs-hash:20040215180125-a3a09-86df5f06ac3615ace541742d0f696517d39e74a6.gz commit 9ffc37e1b069c0db2afb4648e628c236585e04c6 Author: rwx Date: Sun Feb 15 18:03:20 2004 +0100 Clarified some parts of the code. Clarified some parts of the code. Implemented utctime function. darcs-hash:20040215170320-a3a09-35db4579fbb909f63e711d4f2d42a84ae4e60efe.gz commit 4b2bbe9b07267ed9ebf6468cac207683ea4aa808 Author: rwx Date: Sun Feb 15 18:01:46 2004 +0100 Distributed scanning now done in parallel with local scanning. darcs-hash:20040215170146-a3a09-4012e7910f0dcc674cde88832c0cdada308b5ec1.gz commit 3d5a0a58bcdf31eeb13dc6edfc5c138fd51db7cd Author: rwx Date: Sun Feb 15 18:00:39 2004 +0100 Improved documentation. darcs-hash:20040215170039-a3a09-39bfe5e8881bdc70587333b150c6298badcd2f38.gz commit a395809b2bf0d040c6a61492af73c5d1f132340f Author: rwx Date: Sun Feb 15 15:03:54 2004 +0100 Wrote useful documentation. Wrote useful documentation. Turned Clue.normalize into a static method. darcs-hash:20040215140354-a3a09-d9838e93cddc0e93dd5d9e41fddb6eafa7b612e5.gz commit e11dfd39087adc07e10d4ad1cb04ccae261a983a Author: rwx Date: Sun Feb 15 05:33:11 2004 +0100 Refactored and documented several important functions. Refactored and documented several important functions. The examples in the documentation can be automatically checked out by doctest. darcs-hash:20040215043311-a3a09-5ad5998781afe518a2280aaa9d56f1ff16216caa.gz commit 1f7f502484a9245e96a60e137851bb6fef2df17e Author: rwx Date: Fri Feb 13 02:29:54 2004 +0100 Updated to the new module layout. Updated to the new module layout. Now dist target doesn't rely on incversion. darcs-hash:20040213012954-a3a09-d698f12883538924b8c549d5bb3fb20c0763d374.gz commit 0d6522b1b0969a2a76a69ffc5ec6f39896abfaf5 Author: rwx Date: Fri Feb 13 02:29:10 2004 +0100 Fixed a typo. darcs-hash:20040213012910-a3a09-659a0c44ede7a1e0a2b466c41a4a8f332c815f08.gz commit 39057edf5b687c0b792cb366bae789d692fa881a Author: rwx Date: Fri Feb 13 02:29:00 2004 +0100 Implemented the initial version of a working distributed scanning feature. darcs-hash:20040213012900-a3a09-64e9cde377cb93d172e652505f9d08c472505be6.gz commit 8cdb866c15632c1e219e27f4b4a7a75a02d6988a Author: rwx Date: Fri Feb 13 02:27:36 2004 +0100 Removed unneeded code. darcs-hash:20040213012736-a3a09-87e44dc8fe2b18cad1192062e6215bb80afd3f7a.gz commit 5525bc7d75574a5ba4936898d6187cf16740d634 Author: rwx Date: Fri Feb 13 02:24:51 2004 +0100 Rewrote the RPC mechanism. darcs-hash:20040213012451-a3a09-c4145ac0a27906f6ac86a33871287fed353a2689.gz commit 44608f64b3bb5a93f7fa770ec84eaccac5b3bfcb Author: rwx Date: Fri Feb 13 02:23:24 2004 +0100 Updated to the new module layout. darcs-hash:20040213012324-a3a09-23dfbdd40ec082ed163013e6d135483c730a3d83.gz commit 2efe0869da6c228f54ccf8bbff528e82e06e2aed Author: rwx Date: Fri Feb 13 02:17:43 2004 +0100 Splitted cluelib into sub-modules for easier manipulation. darcs-hash:20040213011743-a3a09-66456a027fa8618e1b5dbd23cd621f1b81d3648f.gz create mode 100644 Halberd/clues/Clue.py create mode 100644 Halberd/clues/__init__.py create mode 100644 Halberd/clues/analysis.py commit a28f3207aa85d6dd355cd698c9955c806d793620 Author: rwx Date: Fri Feb 13 02:17:10 2004 +0100 Updated the test cases depending on cluelib. darcs-hash:20040213011710-a3a09-dff690a26fd8acaa036588e963cbe77b1851b7c5.gz commit 114d6b3ca7a1a0f70d510663f9958175d9049192 Author: rwx Date: Fri Feb 13 02:16:55 2004 +0100 Splitted cluelib into two modules: hlbd.clues.{Clue,analysis} Splitted cluelib into two modules: hlbd.clues.{Clue,analysis} Updated the code depending on cluelib. hlbd.scanlib wraps signal handling around exception handlers to avoid problems when invoked as an rpc client. This is a kludge and a temporary solution. darcs-hash:20040213011655-a3a09-ab89515964be35cce20ece83b6e599ff7e817624.gz delete mode 100644 Halberd/cluelib.py commit 57833f0761befcf6bbdde447fd72154742c6c32c Author: rwx Date: Thu Feb 12 12:20:38 2004 +0100 Added configuration file template. darcs-hash:20040212112038-a3a09-c383bb2b594b4c5ce8604212c3746822c74985d0.gz commit 191082d3dc60f6a2502341f9690e8592857d2b51 Author: rwx Date: Thu Feb 12 12:20:11 2004 +0100 Wrote a module to deal properly with configuration file parsing. darcs-hash:20040212112011-a3a09-98d6dea8e973aab900e9266f18a582082f548a3b.gz create mode 100644 Halberd/conflib.py commit c91c8bff81b518ebcaf9f5a7753d02bc8d9ff6f6 Author: rwx Date: Thu Feb 12 12:19:51 2004 +0100 Improved configuration file handling. Improved configuration file handling. Added a conf. file temlate with the distribution. darcs-hash:20040212111951-a3a09-9197c2c878151aae71b8f078e9ddd618db66fbe0.gz create mode 100644 halberd.cfg commit d568090a6634788cd8896fe850299fd1322595d0 Author: rwx Date: Thu Feb 12 12:19:02 2004 +0100 Added a handler for Cache-expires in Clue. Added a handler for Cache-expires in Clue. Refactored some of find_proxies' auxiliary functions. darcs-hash:20040212111902-a3a09-5dd9101dba1aa5962666b4f9646bf001f9264653.gz commit 8bfc95a80ab46c9c977ef668902564f6cc4ebae9 Author: rwx Date: Thu Feb 12 12:15:39 2004 +0100 Several minor enhancements. darcs-hash:20040212111539-a3a09-998ccf9c303ab62adc982575a043fe4d68571eef.gz commit c9c0b550c715a1ff2e3ebf6b1e221c90a63629eb Author: rwx Date: Wed Feb 11 12:17:02 2004 +0100 Performed some refactoring in the analysis functions. darcs-hash:20040211111702-a3a09-ee220c0c798e25c469933c215d7846bfcfb9a021.gz commit beddae2e19b412d228a3afc1f275c12833e81876 Author: rwx Date: Wed Feb 11 11:19:55 2004 +0100 Added the bare-bones of configuration file handling. Added the bare-bones of configuration file handling. Adapted to the new semantics of hlbd.scanlib.scan darcs-hash:20040211101955-a3a09-17353d2407c773a0289cc3b13927d49006053bd4.gz commit 92f4b37bbb838ff9418b5d4f25c9affc6994b987 Author: rwx Date: Wed Feb 11 11:19:20 2004 +0100 Now scan doesn't return a tuple with the total number of hits since those can Now scan doesn't return a tuple with the total number of hits since those can be easily calculated from the list of clues. darcs-hash:20040211101920-a3a09-7074b2abc74363ee4d556ea2617833703f127670.gz commit 2dd804df98b138a63a5c42120d9e199540404483 Author: rwx Date: Wed Feb 11 11:18:36 2004 +0100 Added filter_proxies functionality for detecting proxy-caches on the target. darcs-hash:20040211101836-a3a09-2e0def3869da0ba1f7a2ba1e7141498cd346260e.gz commit 5f2e21d7bb8f6c7902351801ba4130a69ce427da Author: rwx Date: Mon Feb 9 13:07:25 2004 +0100 Moved the functionality from inspectlib into cluelib.diff_fields darcs-hash:20040209120725-a3a09-42b2ea8d0eade80618cf7cb58656d01c00da9885.gz delete mode 100644 Halberd/inspectlib.py commit 4ee536687de442258344a320ed889c6ba661d149 Author: rwx Date: Mon Feb 9 13:05:37 2004 +0100 Several enhancements to the output (now it's much more clear and informative). darcs-hash:20040209120537-a3a09-b58e068b0b363be4ea9d2b11734a0c098d4fcd16.gz commit 0da670db9985256fa2047150e9d7dde3c00dac26 Author: rwx Date: Mon Feb 9 13:05:08 2004 +0100 Fixed locking. darcs-hash:20040209120508-a3a09-e30f8e1b2c13f71864eb6f45afb3b188c51f4134.gz commit ba48b658ef44bdacec8f9cbaaf78043962377054 Author: rwx Date: Sun Feb 8 02:55:05 2004 +0100 Fixed a small glitch in Clue.parse darcs-hash:20040208015505-a3a09-956bc2fa6db653577a32a45e8eda11183b044278.gz commit 2be59f4627d01f64d4308942e69f2235145ab0de Author: rwx Date: Sat Feb 7 22:15:53 2004 +0100 Moved status updates to the main thread so that they don't slow down the Moved status updates to the main thread so that they don't slow down the scanning. The scan can now be interrupted anytime by the user. darcs-hash:20040207211553-a3a09-1d4f4aaba8ac47c4ede400c4930cb15fd9472220.gz commit 940112a84cac3fa5f1c1ff6b80a378f1f8248fea Author: rwx Date: Sat Feb 7 20:08:20 2004 +0100 Fixed one of the targets darcs-hash:20040207190820-a3a09-c36b32ad41f69caeddc78a92e6c3d236abd5de21.gz commit 80592eabc9905b1a5cf71755d8152a119727597c Author: rwx Date: Sat Feb 7 18:15:37 2004 +0100 The scanner runs completely in parallel now. darcs-hash:20040207171537-a3a09-d15d4ecaa98920c5c7b2efeca0f03f4e8e408e35.gz commit 20649182445b4f8b1b04c601f82193ab473f6fab Author: rwx Date: Sat Feb 7 18:01:09 2004 +0100 Refactored find_clusters. darcs-hash:20040207170109-a3a09-3f7be4bfd8938581e2b6b3061e35528dc411de05.gz commit e33340f473f23864467f75d85b8074fa287c9055 Author: rwx Date: Sat Feb 7 17:59:18 2004 +0100 Adapted to the new return type of getReply. darcs-hash:20040207165918-a3a09-c207eb7305ebbd7c919c81cd68edcf621b45e21b.gz commit b1aba8d81b848e4704af7b1cdd7375bd5ee4a78f Author: rwx Date: Sat Feb 7 14:33:30 2004 +0100 Began to turn the code into a multi-threaded scanner. Began to turn the code into a multi-threaded scanner. Disabled temporarily the clue inspector. darcs-hash:20040207133330-a3a09-0873dff9417ff520cd81c10d93cc29a8c57670bf.gz commit 5b090249bf8743d0086cfa47892008a691d6b1cd Author: rwx Date: Sat Feb 7 14:29:56 2004 +0100 Added two header handlers in Clue: one for ETag and the other for Added two header handlers in Clue: one for ETag and the other for Last-modified. Fixed a serious bug in find_clusters. darcs-hash:20040207132956-a3a09-47ba794d2533ceb61c4a141deac20d8a4a67a1de.gz commit 4894ea0ef6dd24a1422b4913a8a04a549c508656 Author: rwx Date: Sat Feb 7 14:28:02 2004 +0100 The skeleton functions for halberd's distributed capabilities have been The skeleton functions for halberd's distributed capabilities have been written. darcs-hash:20040207132802-a3a09-48aaabd3ef5d3442867536e7be94144554c82a3a.gz create mode 100644 Halberd/rpclib.py commit 3daac80511618543deb4fced7858be80f8a528c6 Author: rwx Date: Sat Feb 7 14:27:12 2004 +0100 Several enhancements were made to the output of the program. darcs-hash:20040207132712-a3a09-15e23fc4def61e9e4f4bc5e1822dd0aa0917d351.gz commit c4b649a00c7399fb6cf3fa4b67868353e9d1b850 Author: rwx Date: Sat Feb 7 14:26:04 2004 +0100 Added some new options (mainly --parallelism) and rearranged the code heavily. Added some new options (mainly --parallelism) and rearranged the code heavily. Now there's a Halber class which encapsulates everything related to the scanning/analysis/reporting process. darcs-hash:20040207132604-a3a09-266f6e955b7f70331b388ed99bfa8b1a36395be2.gz commit 69fb37956d45c46998e24a1dd79c3f0f537baf40 Author: rwx Date: Fri Feb 6 17:02:26 2004 +0100 Modified Clue to make it easier to send it by XML-RPC. Modified Clue to make it easier to send it by XML-RPC. Improved documentation and clarity of the analysis functions. darcs-hash:20040206160226-a3a09-f519c1f23a34cef4769e9af99a102ffa29f866d4.gz commit 275e5a284ac20ac6bba6a50914906fa5e6f16d2e Author: rwx Date: Fri Feb 6 16:59:22 2004 +0100 Improved timestamp accuracy. darcs-hash:20040206155922-a3a09-06b22e0ee7c12e734279197845f1beedaf5ce76f.gz commit f568a3fb1a218744c4a4ba7816fdbffa9fa992de Author: rwx Date: Wed Feb 4 05:31:03 2004 +0100 Beautified HTTPClient._fillTemplate darcs-hash:20040204043103-a3a09-b761efd34af1b0aa19fb573491ce653e6a5aaea1.gz commit 985400de41a046387192cfd4f2e46516c592bae5 Author: rwx Date: Wed Feb 4 05:11:54 2004 +0100 Removed tests for CmpOperators since they no longer exist in hlbd.cluelib darcs-hash:20040204041154-a3a09-2c3e559aadf167f423bcf2b9acf28a8586d9d692.gz commit 254661cfafa9fbc6dc940982db0541e9011ea443 Author: rwx Date: Wed Feb 4 05:11:39 2004 +0100 Refactored heavily. darcs-hash:20040204041139-a3a09-2c6366d3faf0859cd57f86013e5af49d92107afa.gz commit 9d25763db61d3928a77ad486bf7fe02717a17622 Author: rwx Date: Tue Feb 3 20:47:56 2004 +0100 Began refactoring of analyze. Began refactoring of analyze. CmpOperators were removed from the code. They were no longer needed since clue sorting is now done using the decorate-sort-undecorate pattern and Clue.__contains__ is not used anymore in analyze. The relevant functionality was moved into Clue.__eq__. darcs-hash:20040203194756-a3a09-4ef9ad486a08289e11e490723eae1a5b00fd95ab.gz commit 654c88a1da96fd3734a8c4170ef2b263bd8cae2f Author: rwx Date: Mon Feb 2 08:32:08 2004 +0100 Fixed a small glitch regarding cluelib.normalize darcs-hash:20040202073208-a3a09-fbbb69c971f4279e57a9c3b65553d0d1e357aa32.gz commit 38e5f33196a37e0dced2a93bdde0b3c1353f37f8 Author: rwx Date: Mon Feb 2 08:21:08 2004 +0100 Wrote some code to detect when the number of clues increments linearly with Wrote some code to detect when the number of clues increments linearly with regard to the received responses. Now there's a piece of code which automagickally finds out the MIME headers responsible for such increase and ignores them on-the-fly. darcs-hash:20040202072108-a3a09-73e4293a626c6cf8e93d712d758ed9d886e90a81.gz create mode 100644 Halberd/inspectlib.py commit a26e778966c208c56ad4434c58edd945b5c67242 Author: rwx Date: Mon Feb 2 08:15:50 2004 +0100 Turned normalize into a conventional function (for use with inspectlib). Turned normalize into a conventional function (for use with inspectlib). Stopped using dictionaries to store MIME headers because field ordering (very important) was missing from the hash. Now the Server field gets hashed too and a new default handler method for Content-Length has been added. darcs-hash:20040202071550-a3a09-a68460f10aa8666f2140a3c035ea0c8d1f0d4732.gz commit 7e97a3c97c9645973e8083bfab454524a17a2c70 Author: rwx Date: Mon Feb 2 08:04:46 2004 +0100 Removed --sockets option. Removed --sockets option. Fixed a bug in make_url. Now the user doesn't need to specify --verbose to see the DNS information. It is shown by default when the target host resolves to several addresses. darcs-hash:20040202070446-a3a09-3c422e1cf290debb5a7884f89b095571320fbb64.gz commit 4d43381645893c5bca2c5f995db57193ab1bda3d Author: rwx Date: Sun Feb 1 04:50:28 2004 +0100 Turned analyze.groups into a generator function. Turned analyze.groups into a generator function. Fixed the Clue._get_* methods so that they work with Clue._normalize. darcs-hash:20040201035028-a3a09-c42a8329f7dd8e540bc970b763862a4df2b4407e.gz commit 8c53c14dbbefa2ed2ee4f7fb113ebb001b397713 Author: rwx Date: Sun Feb 1 04:48:56 2004 +0100 Fixed a small glitch with make_url darcs-hash:20040201034856-a3a09-c46166d61a200350ca07ebcc6c0ccd3a2b001e3f.gz commit 4112f34867fd4ba2a32d57fe3b70c132c66dd76a Author: rwx Date: Sun Feb 1 04:48:39 2004 +0100 Now catches hlbd.clientlib.ConnectionRefused and shows the amount of replies Now catches hlbd.clientlib.ConnectionRefused and shows the amount of replies missed due to timeouts. darcs-hash:20040201034839-a3a09-9a97959e700192892784312497bb4f77670e7302.gz commit 8e3458b2ba3119a218a77d6357499ca4c4ffe0dd Author: rwx Date: Sat Jan 31 15:03:59 2004 +0100 Some tests for hlbd.clientlib have been written. darcs-hash:20040131140359-a3a09-37bee5922a76e1f811edc7d8a3e17575260ce5b3.gz create mode 100644 tests/test_clientlib.py commit 4161e0660cd5a4824aec92283431ddb95086ad9e Author: rwx Date: Sat Jan 31 15:03:46 2004 +0100 hlbd.clientlib has been dramatically simplified and hlbd.scanlib has been hlbd.clientlib has been dramatically simplified and hlbd.scanlib has been changed accordingly. Now hlbd.scanlib performs more reliable (but iterative) scans. darcs-hash:20040131140346-a3a09-0b85dda4c6de390e8ae70c1e988443cd28b4404b.gz commit 3ca95485a2319e3e02e6b48975882a7459413564 Author: rwx Date: Sat Jan 31 14:59:41 2004 +0100 Some tests for cluelib have been written. darcs-hash:20040131135941-a3a09-9042d2625b690220bbf598e0681f6aa69d928c66.gz commit c5338857f908b7385e4227c19e7fb6e784174591 Author: rwx Date: Sat Jan 31 14:59:21 2004 +0100 hlbd.cluelib now has a better analysis functionality and some Clue attributes hlbd.cluelib now has a better analysis functionality and some Clue attributes have been replaced for entries in an info dictionary attribute. hlbd.reportlib has been updated for to work with the changes in the Clue class. darcs-hash:20040131135921-a3a09-d9eb27f406025f6f2b8a09fe32ec8b38b4bcdbd1.gz commit 2eb220cc8921d48fe7570cd664a06c9216a701dc Author: rwx Date: Sat Jan 31 14:57:48 2004 +0100 All the analysis functionality has been rewritten and the main module adapted All the analysis functionality has been rewritten and the main module adapted to the changes. darcs-hash:20040131135748-a3a09-04c8c87c23bef681333da258855de5c9031f5177.gz commit a5b35cc384a2a976797f3aa742163f991ef56569 Author: rwx Date: Thu Jan 29 14:11:33 2004 +0100 Added a test for Clue._normalize. Added a test for Clue._normalize. Added a test exercising Clue.incCount's error checking. darcs-hash:20040129131133-a3a09-a62ce2b51ffee80e218f101a9330b94821be1c03.gz commit 3a15534c17502b3d47dbf1dad9bc8449ae75c5de Author: rwx Date: Thu Jan 29 14:10:59 2004 +0100 Implemented Clue._normalize to avoid getting invalid MIME field names. Implemented Clue._normalize to avoid getting invalid MIME field names. Improved Clue.incCount error checking. darcs-hash:20040129131059-a3a09-3f70b30f8d58285051f378f50cd723b8c7701df9.gz commit 02be690ab600f790dad3c4ed8a773b5c9913c79d Author: rwx Date: Thu Jan 29 03:17:52 2004 +0100 The file is automatically generated by shtool so there's no need to have the The file is automatically generated by shtool so there's no need to have the file in the repository. darcs-hash:20040129021752-a3a09-7525e5ce3fa7d6c24d23d8b40935c1c08740afff.gz delete mode 100644 Halberd/version.py commit 176a978fceac7d0a784ba59dc6efc14ebba58157 Author: rwx Date: Thu Jan 29 03:15:42 2004 +0100 Improved documentation darcs-hash:20040129021542-a3a09-db26350083e5113730cc6e45e3584a28b1c7e81b.gz commit 92b276b715e145632705cb4c0da687ad10439826 Author: rwx Date: Thu Jan 29 03:14:56 2004 +0100 Implemented clue list loading and saving functionality. Implemented clue list loading and saving functionality. Improved documentation. darcs-hash:20040129021456-a3a09-a658d4175a0f51b7cd5043af7092eeba41919213.gz commit 77ea279f6a7d53031ff84e34c41b54297ad24fed Author: rwx Date: Thu Jan 29 03:13:58 2004 +0100 Moved clue saving code to hlbd.cluelib darcs-hash:20040129021358-a3a09-eeea24d62ee7a4df601b2054065840918f8a2d5a.gz commit 0c19da3af564953028ba4827a29b8f50e2cc51a1 Author: rwx Date: Thu Jan 29 03:11:41 2004 +0100 Rewrote test suite support. darcs-hash:20040129021141-a3a09-70ac8ce8bdf06cb7433f55e4d09d311b4fb14645.gz create mode 100644 tests/__init__.py delete mode 100644 tests/test_clue.py create mode 100644 tests/test_cluelib.py delete mode 100644 tests/test_http.py commit bb2c23049ba28825a45457c9c13b360673eaedc1 Author: rwx Date: Thu Jan 29 03:10:47 2004 +0100 Added test suite support. darcs-hash:20040129021047-a3a09-51c651f4fa7bfc90ecfd43acbba3783448335d64.gz commit 86474b111d0a2f7f85a49f2687523e4514412678 Author: rwx Date: Tue Jan 27 23:10:02 2004 +0100 Modified the Clue object so that it can be pickled. darcs-hash:20040127221002-a3a09-ee73be0e98c089f73022c368e46f2c07a60abdcd.gz commit d5377bed8fc4adbf49640fe7e14cef02f7977dd0 Author: rwx Date: Tue Jan 27 23:09:31 2004 +0100 Added --record option to be able to write clues to a file. Added --record option to be able to write clues to a file. This makes it possible to load them again in future executions of the program. darcs-hash:20040127220931-a3a09-380771225c61da89eb1f14c6c316baad94d85c12.gz commit bf8417cfaf3010c4440fa9a28a4b4bc816d5c2d0 Author: rwx Date: Tue Jan 27 17:47:42 2004 +0100 Added --address option. darcs-hash:20040127164742-a3a09-c1b00e06b73a022ef59627c69ea0bdac85fe0272.gz commit 5a3fdc8280d127be80b1ab370dfe5da78596b61e Author: rwx Date: Tue Jan 27 14:15:03 2004 +0100 Refactored all the clue comparison code into a CmpOperator class which allows Refactored all the clue comparison code into a CmpOperator class which allows the creation of customized comparison functions. darcs-hash:20040127131503-a3a09-5c5acfc3ef28af36c6bdfe8a7e803a48a0814d5c.gz commit 035678d1e695d953296d1269c546536c618df615 Author: rwx Date: Tue Jan 27 03:28:32 2004 +0100 Takes the ChangeLog file into account. darcs-hash:20040127022832-a3a09-f85eabd4f655f288cde6a55fdce915ab9472f7b8.gz commit c2de560e400273b7e51e5358176d8133b99df6ac Author: rwx Date: Tue Jan 27 03:24:30 2004 +0100 Now distributes the ChangeLog too. darcs-hash:20040127022430-a3a09-720117fe14a3ac6960e7ef901a249d69a9ace343.gz commit f15af6fad4511ccafe7e94edde0a0d26264995a1 Author: rwx Date: Tue Jan 27 00:07:31 2004 +0100 Initial revision darcs-hash:20040126230731-a3a09-53057b0ad2d5b93317730b13b1ed7621891e9add.gz create mode 100644 AUTHORS create mode 100644 GNUmakefile create mode 100644 Halberd/__init__.py create mode 100644 Halberd/clientlib.py create mode 100644 Halberd/cluelib.py create mode 100644 Halberd/reportlib.py create mode 100644 Halberd/scanlib.py create mode 100644 Halberd/version.py create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 README create mode 100644 THANKS create mode 100755 scripts/halberd create mode 100755 setup.py create mode 100644 tests/test_clue.py create mode 100644 tests/test_http.py halberd-0.2.4/LICENSE0000644000175000017500000004313111144236326012575 0ustar jmbrjmbr GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License.