pax_global_header00006660000000000000000000000064123436422120014511gustar00rootroot0000000000000052 comment=2ba7afab870880565e7efa0d9b55ba820d38bb1d httpry-httpry-0.1.8/000077500000000000000000000000001234364221200144015ustar00rootroot00000000000000httpry-httpry-0.1.8/Makefile000066400000000000000000000036271234364221200160510ustar00rootroot00000000000000# # ---------------------------------------------------- # httpry - HTTP logging and information retrieval tool # ---------------------------------------------------- # # Copyright (c) 2005-2014 Jason Bittel # CC = gcc CCFLAGS = -Wall -O3 -funroll-loops -I/usr/include/pcap -I/usr/local/include/pcap DEBUGFLAGS = -Wall -g -DDEBUG -I/usr/include/pcap -I/usr/local/include/pcap LIBS = -lpcap -lm -pthread PROG = httpry FILES = httpry.c format.c methods.c utility.c rate.c .PHONY: all debug profile install uninstall clean all: $(PROG) $(PROG): $(FILES) $(CC) $(CCFLAGS) -o $(PROG) $(FILES) $(LIBS) debug: $(FILES) @echo "--------------------------------------------------" @echo "Compiling $(PROG) in debug mode" @echo "" @echo "This will cause the program to run slightly" @echo "slower, but enables additional data verification" @echo "and sanity checks; recommended for testing, not" @echo "production usage" @echo "--------------------------------------------------" @echo "" $(CC) $(DEBUGFLAGS) -o $(PROG) $(FILES) $(LIBS) profile: $(FILES) @echo "--------------------------------------------------" @echo "Compiling $(PROG) in profile mode" @echo "" @echo "This enables profiling so gprof can be used for" @echo "code analysis" @echo "--------------------------------------------------" @echo "" $(CC) $(CCFLAGS) -pg -o $(PROG) $(FILES) $(LIBS) install: $(PROG) @echo "--------------------------------------------------" @echo "Installing $(PROG) into /usr/sbin/" @echo "" @echo "You can move the Perl scripts and other tools to" @echo "a location of your choosing manually" @echo "--------------------------------------------------" @echo "" cp -f $(PROG) /usr/sbin/ cp -f $(PROG).1 /usr/man/man1/ || cp -f $(PROG).1 /usr/local/man/man1/ uninstall: rm -f /usr/sbin/$(PROG) rm -f /usr/man/man1/$(PROG).1 || rm -f /usr/local/man/man1/$(PROG).1 clean: rm -f $(PROG) httpry-httpry-0.1.8/README000077700000000000000000000000001234364221200167012doc/READMEustar00rootroot00000000000000httpry-httpry-0.1.8/config.h000066400000000000000000000034331234364221200160220ustar00rootroot00000000000000/* ---------------------------------------------------- httpry - HTTP logging and information retrieval tool ---------------------------------------------------- Copyright (c) 2005-2014 Jason Bittel */ #ifndef _HAVE_CONFIG_H #define _HAVE_CONFIG_H #define PROG_NAME "httpry" #define PROG_VER "0.1.8" /* Default packet capture filter; must be a standard libpcap style filter *** Can be overridden */ #define DEFAULT_CAPFILTER "tcp port 80 or 8080" /* Default output format string; see doc/format-string for more information *** Can be overridden with -f */ #define DEFAULT_FORMAT "timestamp,source-ip,dest-ip,direction,method,host,request-uri,http-version,status-code,reason-phrase" /* Default format string for rate statistics mode; should never change! */ #define RATE_FORMAT "host" /* Default request methods to process; see doc/method-string for more information *** Can be overridden with -m */ #define DEFAULT_METHODS "get,post,put,head,options,delete,trace,connect,patch" /* Default threshold for displaying rps in rate statistics mode *** Can be overridden with -l */ #define DEFAULT_RATE_THRESHOLD 2 /* Default display interval for rate statistics *** Can be overridden with -t */ #define DEFAULT_RATE_INTERVAL 5 /* Default location to store the PID file when running in daemon mode *** Can be overridden with -P */ #define PID_FILENAME "/var/run/httpry.pid" /* Where to send unnecessary output */ #define NULL_FILE "/dev/null" /* String to print when an output field has no associated data */ #define EMPTY_FIELD "-" /* Delimiter that separates output fields */ #define FIELD_DELIM "\t" /* HTTP specific constant; should never change! */ #define HTTP_STRING "HTTP/" #define MAX_TIME_LEN 20 #define PORTSTRLEN 6 #endif /* ! _HAVE_CONFIG_H */ httpry-httpry-0.1.8/doc/000077500000000000000000000000001234364221200151465ustar00rootroot00000000000000httpry-httpry-0.1.8/doc/AUTHORS000066400000000000000000000020061234364221200162140ustar00rootroot00000000000000Original author and primary maintainer: Jason Bittel Significant suggestions or contributions: Shawn Ashlee * Provided a spec file to generate RPMs Philipp Buck * Suggested source/destination port as output fields * Provided a patch for reopening output files with SIGHUP * Suggested line buffering for output files Mark Carter * Provided a patch adding IPv6 support Mats Erik Andersson * Provided a patch adding IPv6 support Dennis Opacki * Provided a major patch adding a rate statistics mode Emanuele Acri * Provided a patch adding a force output flush option Bart Roos * Provided a patch adding an option to specify the PID filename Dustin Webber * Provided a patch fixing compile errors on OSX httpry-httpry-0.1.8/doc/COPYING000066400000000000000000000431031234364221200162020ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. httpry-httpry-0.1.8/doc/ChangeLog000066400000000000000000000131221234364221200167170ustar00rootroot00000000000000This is the high level list of changes for each httpry release. Note that this changelog does not include changes to the log file parsing scripts, which are often updated significantly between releases as well. It only lists notable changes and fixes to the core httpry program. version 0.1.8 * added support for 802.1Q VLAN tagged packets * added PATCH HTTP method to default method list * changed packet parsing to continue without a full header present * added PPP link type support * added custom ethernet header offset option (-S) * changed read timeout to be non-zero version 0.1.7 * added an option (-P) to specify the PID filename * fixed compiling under OSX * changed IPv6 parsing to follow extension headers if present * changed rate statistics code to use a hash table data structure * added a rps threshold option (-l) when in rate statistics mode * changed rps display mode to -s, -t now just sets the display interval version 0.1.6 * added IPv6 support * added a force output flush option (-F) to disable output buffering * removed line buffering on output files * added a HTTP request per second display mode (-t) version 0.1.5 * added a binary pcap dump file option (-b) * changed -s to -f to be more mnemonic * added SIGHUP handling for graceful reloads * default to line buffering on output files * added source-port and dest-port as available output fields version 0.1.4 * improved efficiency of header line parsing * added support for parsing any request method (-m) * default to capturing all HTTP packets with a standard RFC2616 method * modified date/time stamp to be more consistent with ISO 8601 * messages now print to the syslog only when running as a daemon version 0.1.3 * improved parsing of non-standard spacing and line terminators * fixed compiling under FreeBSD * removed broken conditionals that caused pcap_breakloop to not be used * fixed the program returning success even if the capture loop failed * fixed a potential crash if the program was running in debug mode * added a quiet option (-q) to suppress non-critical output version 0.1.2 * added support for multiple datalink packet header types * added support for virtual devices and interfaces without an IP address * output format structure is now a hybrid hash table/linked list * removed -f option; capture filters are now passed as the final argument version 0.1.1 * fixed some minor format string parsing bugs * added debug code to validate correct behavior (use 'make debug' to enable) * added packet capture/processing statistics * fixed error when changing output file owner after dropping privileges * output format structure is now a hybrid tree/list for improved efficiency * fixed the header parsing loop continuing into the response data * added some additional sanity checks and more informative error strings version 0.1.0 * massive code rewrite/refactoring * rewrote output format handling code * removed config file option * removed writing binary dump file * removed option to specify run directory in daemon mode * changed option letter -f to -r and -l to -f * hugely improved the efficiency of packet parsing * tweaked default capture filter * eliminated some minor memory leaks on shutdown * reduced data coupling within the program * removed custom argument parser to make it more functional and consistent * format string can now have internal whitespace and is now case insensitive version 0.0.9 * added parsing of both client requests and server responses * default capture filter now less restrictive * command line arguments now *always* take precedence over config options * wrote custom command line argument parser * removed chance to test against invalid data on exit * all major packet fields are now available for output * config file options now all lowercase * output files are now chown'd to process owner * -n switch now counts parsed packets, not total packets seen * additional sanity checking for output file paths * added necessary compile directives for OpenBSD * '-' can be specified for the filename to force STDOUT version 0.0.8 * fixed crash on exit when reading from an input file * handle whitespace properly in config file lines * allow user to specify fields to output * removed extended options switch (-x) * configurable output data format (-s) * parsed HTTP header fields stored in dynamic list * error/log/warn macros encapsulated in header file * moved TCP data fields to separate header file version 0.0.7 * option to read settings from a config file (-c) * changed packet count switch from -c to -n * fixed bug in packet count check * cleaned up initialization logic in main() * added config.h for setting compile time program defaults * added option to display extended packet information (-x) * added option to dump binary capture output file (-b) * basic packet capture/parsing stats at program termination * program now compiles cleanly as strict ANSI C version 0.0.6 * ability to change process owner (-u) * added additional error checking when entering daemon mode * completely reworked output data handling * removed stdout suppression (-s) option * daemon mode (-d) now requires output file (-o) * error/log/warn message printing pushed into function macros * standardized error and status message strings * ability to specify running directory (-r) * fixed an incorrect array initialization * split version and help information into two commands (-v and -h) httpry-httpry-0.1.8/doc/README000066400000000000000000000147621234364221200160400ustar00rootroot00000000000000 _ _ _ | | | | | | | |__ | |_| |_ _ __ _ __ _ _ | '_ \| __| __| '_ \| '__| | | | | | | | |_| |_| |_) | | | |_| | |_| |_|\__|\__| .__/|_| \__, | | | __/ | |_| |___/ HTTP logging and information retrieval tool version 0.1.8 Copyright (c) 2005-2014 Jason Bittel For further information about the program, see: http://dumpsterventures.com/jason/httpry For modification and redistribution information, see COPYING file --{ ABOUT }-- httpry is a tool designed for displaying and logging HTTP traffic. It is not intended to perform analysis itself, but instead to capture, parse and/or log the traffic for later analysis. It can be run in real-time displaying the live traffic on the wire, or as a daemon process that logs to an output file. It is written to be as lightweight and flexible as possible, so that it can be easily adaptable to different applications. It does not display the raw HTTP data transferred, but instead focuses on parsing and displaying the request/response line along with associated header fields. "How is this tool useful?" you may ask. Here's just a few ideas: > See what users on your network are browsing online > Check for proper server configuration (or improper, as the case may be) > Research patterns in HTTP usage > Watch for dangerous downloaded files > Verify the enforcement of HTTP policy on your network > Extract HTTP statistics out of saved capture files > It's just plain fun to watch in realtime In addition to the core program, there are several Perl scripts included for processing httpry log files. They should be useful for a number of generic situations, and can serve as a useful starting point for your own log parsing toolset. More information about these scripts can be found in the doc/perl-tools file. --{ INSTALLATION }-- httpry should compile on almost any *nix based OS with a relatively recent version of libpcap (specifically tested against 1.1.1 and newer). To compile and install, run these commands in the base httpry directory: $ make # make install which compiles the program and copies the binary and man page to their appropriate locations. You can run the binary from the compilation directory if you don't want to install it. To uninstall the program, run: # make uninstall from the installation directory, or manually delete the executable and man page. --{ USAGE }-- Running httpry with no options will cause it to listen on the first network device and output to the console with some sane defaults. The -h switch will print out an abbreviated description of the available options to change the defaults. This section describes these options in greater detail. httpry [ -dFhpqs ] [ -b file ] [ -f format ] [ -i device ] [ -l threshold ] [ -m methods ] [ -n count ] [ -o file ] [ -P file ] [ -r file ] [ -S bytes ] [ -t seconds ] [ -u user ] [ 'expression' ] -b file Write all processed HTTP packets to a binary pcap dump file. Useful for further analysis of logged data. -d Run the program as a daemon process. All program status output will be sent to syslog. A pid file is created for the process in /var/run/httpry.pid by default. Requires an output file specified with -o. -f format Provide a comma-delimited string specifying the parsed HTTP data to output. See the doc/format-string file for further information regarding available options and syntax. -F Disable all output buffering. This may be helpful when piping httpry output into another program. -h Display a brief summary of these options. -i device Specify an ethernet interface for the program to listen on. If not specified, the program will poll the system for a list of interfaces and select the first one found. -l threshold Specify a requests per second rate threshold value when running in rate statistics mode (-s). Only hosts with a rps value greater than or equal to this number will be displayed. Defaults to 1. -m methods Provide a comma-delimited string that specifies the request methods to parse. The program defaults to parsing all of the standard RFC2616 method strings if this option is not set. See the doc/method-string file for more information. -n count Parse this number of HTTP packets and then exit. Defaults to 0, which means loop forever. -o file Specify an output file for writing parsed packet data. -p Do not put the NIC in promiscuous mode on startup. Note that the NIC could already be in that mode for another reason. -P file Specify a path and filename for creating the PID file in daemon mode. -q Suppress non-critical output (startup banner, statistics, etc.). -r file Provide an input capture file to read from instead of performing a live capture. This option does not require root privileges. -s Run httpry in an HTTP request per second display mode. This periodically displays the rate per active host and total rate at a specified interval. -S Specify a number of bytes to skip in the ethernet header. This allows for custom header offsets to be accounted for. -t seconds Specify the host statistics display interval in seconds when running in rate statistics mode (-s). Defaults to 5 seconds. -u user Specify an alternate user to take ownership of the process and any output files. You will need root privileges to do this; it will switch to the new user after initialization. 'expression' Specify a bpf-style capture filter, overriding the default. Here are a few basic examples, starting with the default filter: 'tcp port 80 or 8080' 'tcp dst port 80' 'tcp dst port 80 and src host 192.168.1.1' These filters will capture all web traffic both directions on two common ports, capture only requests made to port 80, and capture requests to port 80 by a particular host, respectively. See 'man tcpdump' for further information on the syntax and available primitives. --{ KNOWN ISSUES }-- It is worth noting that httpry is rather naive when it comes to parsing HTTP packets. It does not perform any reordering or reassembly of packets and simply searches the start of each packet for HTTP data and ignores the packet if it does not find valid data. HTTP packets that are fragmented within the request/response line will be parsed to the end of the packet and any header data present in subsequent packets will not be parsed. httpry-httpry-0.1.8/doc/format-string000066400000000000000000000052041234364221200176660ustar00rootroot00000000000000The -f switch allows the user to provide a format string that tells httpry which fields to print from each HTTP packet. The string is a comma- delimited list of elements, examples of which are provided below. The format strings can have spaces between elements and are _not_ case sensitive. Duplicate field names are ignored. An example, you say? The default format string looks like this: timestamp,source-ip,dest-ip,direction,method,host,request-uri, http-version,status-code,reason-phrase The output will look something like this for a request and a response (a '-' character indicates no data for that field): 06/05/2006 15:32:31 192.168.0.15 66.102.7.104 > GET www.google.com / HTTP/1.1 - - 06/05/2006 15:32:31 66.102.7.104 192.168.0.15 < - - - HTTP/1.1 200 OK In these two example lines the fields are space delimited for readability, but the standard output from httpry is tab delimited. There are eleven special (i.e. outside the body of the HTTP request) fields that can be specified in the format string: Timestamp Request-URI Source-IP Method Source-Port HTTP-Version Dest-IP Status-Code Dest-Port Reason-Phrase Direction Most of these are fields from the header line of each request or response. The direction field will print a chevron with '>' indicating a client request and '<' indicating a server response. The program can parse any header field found in the packet, even custom headers not included in the HTTP standard. For reference, here is a list of the standard RFC2616 headers: Accept If-None-Match Accept-Charset If-Range Accept-Encoding If-Unmodified-Since Accept-Language Max-Forwards Authorization Proxy-Authorization Expect Range From Referer Host TE If-Match User-Agent If-Modified-Since Of course there are other request header fields, but this provides a starting point. Here are some example format strings: host,user-agent referer,request-uri,http-version source-ip,x-forwarded-for,user-agent timestamp,source-ip,dest-ip,direction,host,request-uri status-code,reason-phrase,my-custom-header-field There is no limit on the length of the format string. This provides a reasonably flexible method for specifying the output string, while still supporting custom fields. Input order is maintained so you can position the fields in the output string. If you consistently use a custom format string and don't want to specify it every run, just modify the default format string in config.h and recompile httpry. httpry-httpry-0.1.8/doc/method-string000066400000000000000000000023571234364221200176640ustar00rootroot00000000000000The -m switch allows the user to specify the request methods they are interested in. Any request that has a method in this list is processed, and all others are ignored. The string itself is a comma-delimited list of terms. Spaces are allowed between elements and the terms are not case sensitive. Any duplicate field names are ignored. This is the default methods string used: get,post,put,head,options,delete,trace,connect,patch For example, if you want to only process GET and HEAD requests (the default behavior in httpry <= version 0.1.3) you would use: httpry -m get,head You are not limited to the above list of methods. Any string can be provided as a potential method, and it will be matched if it appears in a HTTP request. This allows you to easily search for WebDAV methods, or anything else you're interested in. Note that requests and responses are not associated within httpry, as the program simply displays what it sees on the wire. If you limit the request types but are capturing traffic both directions, you will still see the server responses for requests that were ignored. If you consistently use a custom method string and don't want to specify it every run, modify the default method string in config.h and recompile httpry. httpry-httpry-0.1.8/doc/perl-tools000066400000000000000000000077701234364221200172040ustar00rootroot00000000000000A number of Perl scripts are included with the project to provide a basic log parsing framework. There is a central Perl script (parse_log.pl) that handles all of the core log parsing duties, and a set of Perl plugins (plugins/*.pm) with associated config files (plugins/*.cfg) that perform assorted processing duties. It is straightforward to create or modify the plugins to customize processing. *** IMPORTANT! *** The log parser will attempt to use all plugins that it finds in the default plugin directory. To override this behavior, use the -p switch to specify plugins by path and name and/or use -d to specify an alternate directory to use. If the plugin requires a configuration file, it must be located in the same directory as the plugin and named '.cfg'. Additionally, any input log file needs to be a valid httpry log with the field specifier at the start of the file. These non-core Perl modules must be installed to run all scripts: DBI Included Perl scripts: --------------------- ./scripts/parse_log.pl Core script for plugins. Processes log files and delivers data to each initialized plugin. Plugins are utilized by having a .pm extension and being placed in the plugin directory (default is ./plugins). If a plugins directory is specified (-d), that is used instead. If no directory is specified, the script first searches the current directory for a plugin directory followed by the base directory of the script. The first directory found is used. To load only specific plugins, use the -p switch to provide a comma-delimited list of plugins (-d is not applied here, so specify them by their full path). Most included plugins require a config file, which must be named '.cfg' and placed in the same directory as the plugin. Included plugins for parse_log.pl: --------------------------------- ./scripts/plugins/common_log.pm Outputs the log data in the standard common logfile format. This allows webserver log analysis tools to be applied to the captured data. ./scripts/plugins/content_analysis.pm Breaks the input file into flows, which are time delimited blocks of log lines by client IP. It then searches for specified keywords and scores the flows accordingly. Outputs a summary file listing all scored flows, as well as flow data for each scored flow. Requires an input file containing search terms. ./scripts/plugins/db_dump.pm Dumps log file data into a previously initialized database. Useful for more extensive data mining or adding a more user-friendly web interface. Use the included db_dump.mysql to create the required database tables. ./scripts/plugins/find_proxies.pm Performs some basic tests looking for proxy usage. The tests are currently rather rudimentary and generate quite a lot of noise. ./scripts/plugins/hostnames.pm Outputs a list of unique hostnames with counts found in the log data. ./scripts/plugins/log_summary.pm Provides summary information regarding the input files including visited hosts, top talkers and file extensions, along with other assorted data. Useful for a high-level overview of the input files. ./scripts/plugins/sample_plugin.pm An example plugin with no functionality designed to demonstrate how plugins are structured and to provide a start for new plugins. ./scripts/plugins/search_terms.pm Outputs a raw dump of all entered search terms for a number of the major search services. It also calculates the average length of extracted search queries. ./scripts/plugins/tokenize.pm Outputs a set of tokens for each source IP address, broken into separate output files (a token here is defined as a sequence of alphanumeric characters delimited by non-alphanumeric characters). ./scripts/plugins/xml_format.pm Outputs the input log data in XML format. httpry-httpry-0.1.8/error.h000066400000000000000000000031321234364221200157020ustar00rootroot00000000000000/* ---------------------------------------------------- httpry - HTTP logging and information retrieval tool ---------------------------------------------------- Copyright (c) 2005-2014 Jason Bittel */ #ifndef _HAVE_ERROR_H #define _HAVE_ERROR_H #include #include #include "config.h" extern int quiet_mode; extern int use_syslog; /* Macros for logging/displaying status messages */ #define PRINT(x...) { if (!quiet_mode) { fprintf(stderr, x); fprintf(stderr, "\n"); } } #define WARN(x...) { fprintf(stderr, "Warning: " x); fprintf(stderr, "\n"); } #define LOG(x...) { if (use_syslog) { openlog(PROG_NAME, LOG_PID, LOG_DAEMON); syslog(LOG_ERR, x); closelog(); } } #define DIE(x...) { fprintf(stderr, "Error: " x); fprintf(stderr, "\n"); raise(SIGINT); } #define LOG_PRINT(x...) { LOG(x); PRINT(x); } #define LOG_WARN(x...) { LOG(x); WARN(x); } #define LOG_DIE(x...) { LOG(x); DIE(x); } /* Assert macro for testing and debugging; use 'make debug' to compile the program with debugging features enabled */ #ifdef DEBUG #define ASSERT(x) \ if (!(x)) { \ fflush(NULL); \ fprintf(stderr, "\nAssertion failed: %s, line %d\n", \ __FILE__, __LINE__); \ fflush(stderr); \ exit(EXIT_FAILURE); \ } #endif #endif /* ! _HAVE_ERROR_H */ httpry-httpry-0.1.8/format.c000066400000000000000000000160011234364221200160330ustar00rootroot00000000000000/* ---------------------------------------------------- httpry - HTTP logging and information retrieval tool ---------------------------------------------------- Copyright (c) 2005-2014 Jason Bittel */ /* The output format data structure is stored as a hash table with all of the nodes additionally chained together as a linked list. This allows insert_value() to utilize the more efficient hash structure to find nodes, while functions that need to traverse all nodes in insertion order can use the linked list. A separate head pointer is maintained for the start of the linked list. The hash table creates some wasted space as the table tends to be rather sparse, but the efficiency amortizes on longer runs and it scales well to longer format strings. */ #include #include #include #include #include #include "error.h" #include "format.h" #include "utility.h" #define HASHSIZE 64 typedef struct format_node FORMAT_NODE; struct format_node { char *name, *value; FORMAT_NODE *next, *list; }; FORMAT_NODE *insert_field(char *str, size_t len); FORMAT_NODE *get_field(char *str); static FORMAT_NODE *fields[HASHSIZE]; static FORMAT_NODE *head = NULL; /* Parse and insert output fields from format string */ void parse_format_string(char *str) { char *name, *tmp, *i; int num_nodes = 0; size_t len; #ifdef DEBUG ASSERT(str); #endif len = strlen(str); if (len == 0) LOG_DIE("Empty format string provided"); /* Make a temporary copy of the string so we don't modify the original */ if ((tmp = str_duplicate(str)) == NULL) LOG_DIE("Cannot allocate memory for format string buffer"); for (i = tmp; (name = strtok(i, ",")); i = NULL) { /* Normalize input field text */ name = str_strip_whitespace(name); name = str_tolower(name); len = strlen(name); if (len == 0) continue; if (insert_field(name, len)) num_nodes++; } free(tmp); if (num_nodes == 0) LOG_DIE("No valid fields found in format string"); #ifdef DEBUG int j, num_buckets = 0, num_chain, max_chain = 0; FORMAT_NODE *node; for (j = 0; j < HASHSIZE; j++) { if (fields[j]) num_buckets++; num_chain = 0; for (node = fields[j]; node != NULL; node = node->next) num_chain++; if (num_chain > max_chain) max_chain = num_chain; } PRINT("----------------------------"); PRINT("Hash buckets: %d", HASHSIZE); PRINT("Nodes inserted: %d", num_nodes); PRINT("Buckets in use: %d", num_buckets); PRINT("Hash collisions: %d", num_nodes - num_buckets); PRINT("Longest hash chain: %d", max_chain); PRINT("----------------------------"); #endif return; } /* Insert a new node into the hash table */ FORMAT_NODE *insert_field(char *name, size_t len) { FORMAT_NODE *node; static FORMAT_NODE *prev = NULL; unsigned int hashval; #ifdef DEBUG ASSERT(name); ASSERT(len > 0); #endif if ((node = get_field(name)) == NULL) { if ((node = (FORMAT_NODE *) malloc(sizeof(FORMAT_NODE))) == NULL) LOG_DIE("Cannot allocate memory for new node"); hashval = hash_str(name, HASHSIZE); #ifdef DEBUG ASSERT((hashval >= 0) && (hashval < HASHSIZE)); #endif node->next = fields[hashval]; fields[hashval] = node; } else { WARN("Format name '%s' already provided", name); return NULL; } if ((node->name = (char *) malloc(len + 1)) == NULL) LOG_DIE("Cannot allocate memory for node name"); str_copy(node->name, name, len + 1); node->value = NULL; node->list = NULL; /* Update the linked list pointers */ if (prev) prev->list = node; prev = node; if (!head) head = node; return node; } /* If the node exists, update its value field */ void insert_value(char *name, char *value) { FORMAT_NODE *node; #ifdef DEBUG ASSERT(name); ASSERT(value); #endif if ((strlen(name) == 0) || (strlen(value) == 0)) return; if ((node = get_field(name))) node->value = value; return; } /* Given the name, return a value from the hash */ char *get_value(char *name) { FORMAT_NODE *node; #ifdef DEBUG ASSERT(name); #endif if (strlen(name) == 0) return EMPTY_FIELD; if ((node = get_field(name))) { return node->value; } else { return EMPTY_FIELD; } } void clear_values() { FORMAT_NODE *node = head; #ifdef DEBUG ASSERT(node); #endif while (node) { node->value = NULL; node = node->list; } return; } /* Print a list of all field names contained in the output format */ void print_format_list() { FORMAT_NODE *node = head; #ifdef DEBUG ASSERT(node); #endif printf("# Fields: "); while (node) { printf("%s", node->name); if (node->list != NULL) printf(","); node = node->list; } printf("\n"); return; } /* Destructively print each node value; once printed, each existing value is assigned to NULL to clear it for the next packet */ void print_format_values() { FORMAT_NODE *node = head; #ifdef DEBUG ASSERT(node); #endif while (node) { if (node->value) { printf("%s", node->value); node->value = NULL; } else { printf("%s", EMPTY_FIELD); } if (node->list != NULL) printf("%s", FIELD_DELIM); node = node->list; } printf("\n"); return; } /* Free all allocated memory for format structure; only called at program termination */ void free_format() { FORMAT_NODE *prev, *curr; if (!head) return; curr = head; while (curr) { prev = curr; curr = curr->list; free(prev->name); free(prev); } return; } /* Lookup a particular node in hash; return pointer to node if found, NULL otherwise */ FORMAT_NODE *get_field(char *str) { FORMAT_NODE *node; #ifdef DEBUG ASSERT(str); ASSERT(strlen(str) > 0); ASSERT((hash_str(str, HASHSIZE) >= 0) && (hash_str(str, HASHSIZE) < HASHSIZE)); #endif for (node = fields[hash_str(str, HASHSIZE)]; node != NULL; node = node->next) if (str_compare(str, node->name) == 0) return node; return NULL; } httpry-httpry-0.1.8/format.h000066400000000000000000000010121234364221200160340ustar00rootroot00000000000000/* ---------------------------------------------------- httpry - HTTP logging and information retrieval tool ---------------------------------------------------- Copyright (c) 2005-2014 Jason Bittel */ #ifndef _HAVE_FORMAT_H #define _HAVE_FORMAT_H void parse_format_string(char *str); void insert_value(char *name, char *value); char *get_value(char *name); void clear_values(); void print_format_list(); void print_format_values(); void free_format(); #endif /* ! _HAVE_FORMAT_H */ httpry-httpry-0.1.8/httpry.1000066400000000000000000000075001234364221200160170ustar00rootroot00000000000000.TH HTTPRY 1 .SH NAME httpry \- HTTP logging and information retrieval tool .SH SYNOPSIS .B httpry [ -dFpq ] [ -b file ] [ -f format ] [ -i device ] [ -m methods ] [ -n count ] [ -o file ] [ -P file ] [ -r file ] [ -S bytes ] [ -u user ] [ 'expression' ] .br .B httpry -s [ -l threshold ] [ -t seconds ] .br .B httpry -h .br .SH DESCRIPTION .PP .I httpry is a tool designed for displaying and logging HTTP traffic. It is not designed to perform analysis itself, but instead to capture, parse and log the traffic for later analysis. It can be run in real-time displaying the live traffic on the wire, or as a daemon process that logs to an output file. .SH OPTIONS .IP "-b \fIfile\fP" Write all processed HTTP packets to a binary pcap dump file. Useful for further analysis of logged data. .IP "-d" Run the program as a daemon process. All program status output will be sent to syslog. A pid file is created for the process in /var/run/httpry.pid by default. Requires an output file specified with -o. .IP "-f \fIformat\fP" Provide a comma-delimited string specifying the parsed HTTP data to output. See the doc/format-string file for further information regarding available options and syntax. .IP "-F" Disable all output buffering. This may be helpful when piping httpry output into another program. .IP "-h" Display a brief description of these options. .IP "-i \fIdevice\fP" Specify an ethernet interface for the program to listen on. If not specified, the program will poll the system for a list of interfaces and select the first one found. .IP "-l \fIthreshold\fP" Specify a requests per second rate threshold value when running in rate statistics mode (-s). Only hosts with a rps value greater than or equal to this number will be displayed. Defaults to 1. .IP "-m \fImethods\fP" Provide a comma-delimited string that specifies the request methods to parse. The program defaults to parsing all of the standard RFC2616 method strings if this option is not set. See the doc/method-string file for more information. .IP "-n \fIcount\fP" Parse this number of HTTP packets and then exit. Defaults to 0, which means loop forever. .IP "-o \fIfile\fP" Specify an output file for writing parsed packet data. .IP "-p" Do not put the NIC in promiscuous mode on startup. Note that the NIC could already be in that mode for another reason. .IP "-P \fIfile\fP" Specify a path and filename for creating the PID file in daemon mode. .IP "-q" Suppress non-critical output (startup banner, statistics, etc.). .IP "-r \fIfile\fP" Provide an input capture file to read from instead of performing a live capture. This option does not require root privileges. .IP "-s" Run httpry in an HTTP request per second display mode. This periodically displays the rate per active host and total rate at a specified interval. .IP "-S" Specify a number of bytes to skip in the ethernet header. This allows for custom header offsets to be accounted for. .IP "-t \fIseconds\fP" Specify the host statistics display interval in seconds when running in rate statistics mode (-s). Defaults to 5 seconds. .IP "-u \fIuser\fP" Specify an alternate user to take ownership of the process and any output files. You will need root privileges to do this; it will switch to the new user after initialization. .IP "'expression'" Specify a bpf-style capture filter, overriding the default. Here are a few basic examples starting with the default filter: 'tcp port 80 or 8080' 'tcp dst port 80' 'tcp dst port 80 and src host 192.168.1.1' These filters will capture all web traffic both directions on two common ports, capture only requests made to port 80, and capture requests to port 80 by a particular host, respectively. See 'man tcpdump' for further information on the syntax and available primitives. .SH AUTHOR .I httpry was written by Jason Bittel . See included COPYING file for specific licensing information httpry-httpry-0.1.8/httpry.c000066400000000000000000000647141234364221200161130ustar00rootroot00000000000000/* ---------------------------------------------------- httpry - HTTP logging and information retrieval tool ---------------------------------------------------- Copyright (c) 2005-2014 Jason Bittel */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "config.h" #include "error.h" #include "format.h" #include "methods.h" #include "tcp.h" #include "rate.h" /* Function declarations */ int getopt(int, char * const *, const char *); pcap_t *prepare_capture(char *interface, int promisc, char *filename, char *capfilter); void set_link_offset(int header_type); void open_outfiles(); void runas_daemon(); void change_user(char *name); void parse_http_packet(u_char *args, const struct pcap_pkthdr *header, const u_char *pkt); int process_ip6_nh(const u_char *pkt, int size_ip, unsigned int caplen, unsigned int offset); char *parse_header_line(char *header_line); int parse_client_request(char *header_line); int parse_server_response(char *header_line); void handle_signal(int sig); void cleanup(); void print_stats(); void display_banner(); void display_usage(); /* Program flags/options, set by arguments or config file */ static unsigned int parse_count = 0; static int daemon_mode = 0; static int eth_skip_bits = 0; static char *use_infile = NULL; static char *interface = NULL; static char *capfilter = NULL; static char *use_outfile = NULL; static int set_promisc = 1; static char *pid_filename = NULL; static char *new_user = NULL; static char *format_str = NULL; static char *methods_str = NULL; static char *use_dumpfile = NULL; static int rate_stats = 0; static int rate_interval = DEFAULT_RATE_INTERVAL; static int rate_threshold = DEFAULT_RATE_THRESHOLD; static int force_flush = 0; int quiet_mode = 0; /* Defined as extern in error.h */ int use_syslog = 0; /* Defined as extern in error.h */ static pcap_t *pcap_hnd = NULL; /* Opened pcap device handle */ static char *buf = NULL; static unsigned int num_parsed = 0; /* Count of fully parsed HTTP packets */ static time_t start_time = 0; /* Start tick for statistics calculations */ static int link_offset = 0; static pcap_dumper_t *dumpfile = NULL; static char default_capfilter[] = DEFAULT_CAPFILTER; static char default_format[] = DEFAULT_FORMAT; static char rate_format[] = RATE_FORMAT; static char default_methods[] = DEFAULT_METHODS; /* Find and prepare ethernet device for capturing */ pcap_t *prepare_capture(char *interface, int promisc, char *filename, char *capfilter) { char errbuf[PCAP_ERRBUF_SIZE]; pcap_t *pcap_hnd; char *dev = NULL; bpf_u_int32 net, mask; struct bpf_program filter; if (!filename) { /* Starting live capture, so find and open network device */ if (!interface) { dev = pcap_lookupdev(errbuf); if (dev == NULL) LOG_DIE("Cannot find a valid capture device: %s", errbuf); } else { dev = interface; } if (pcap_lookupnet(dev, &net, &mask, errbuf) == -1) net = 0; pcap_hnd = pcap_open_live(dev, BUFSIZ, promisc, 1000, errbuf); if (pcap_hnd == NULL) LOG_DIE("Cannot open live capture on '%s': %s", dev, errbuf); } else { /* Reading from a saved capture, so open file */ pcap_hnd = pcap_open_offline(filename, errbuf); if (pcap_hnd == NULL) LOG_DIE("Cannot open saved capture file: %s", errbuf); } set_link_offset(pcap_datalink(pcap_hnd)); /* Compile capture filter and apply to handle */ if (pcap_compile(pcap_hnd, &filter, capfilter, 0, net) == -1) LOG_DIE("Cannot compile capture filter '%s': %s", capfilter, pcap_geterr(pcap_hnd)); if (pcap_setfilter(pcap_hnd, &filter) == -1) LOG_DIE("Cannot apply capture filter: %s", pcap_geterr(pcap_hnd)); pcap_freecode(&filter); if (!filename) LOG_PRINT("Starting capture on %s interface", dev); return pcap_hnd; } /* Set the proper packet header offset length based on the datalink type */ void set_link_offset(int header_type) { #ifdef DEBUG ASSERT(header_type >= 0); #endif switch (header_type) { case DLT_EN10MB: link_offset = 14; break; #ifdef DLT_IEEE802_11 case DLT_IEEE802_11: link_offset = 32; break; #endif #ifdef DLT_LINUX_SLL case DLT_LINUX_SLL: link_offset = 16; break; #endif #ifdef DLT_LOOP case DLT_LOOP: link_offset = 4; break; #endif case DLT_NULL: link_offset = 4; break; case DLT_RAW: link_offset = 0; break; case DLT_PPP: link_offset = 4; break; #ifdef DLT_PPP_SERIAL case DLT_PPP_SERIAL: #endif case DLT_PPP_ETHER: link_offset = 8; break; default: LOG_DIE("Unsupported datalink type: %s", pcap_datalink_val_to_name(header_type)); break; } return; } /* Open any requested output files */ void open_outfiles() { /* Redirect stdout to the specified output file if requested */ if (use_outfile) { if (daemon_mode && (use_outfile[0] != '/')) LOG_WARN("Output file path is not absolute and may be inaccessible after daemonizing"); if (freopen(use_outfile, "a", stdout) == NULL) LOG_DIE("Cannot reopen output stream to '%s'", use_outfile); PRINT("Writing output to file: %s", use_outfile); printf("# %s version %s\n", PROG_NAME, PROG_VER); print_format_list(); } /* Open pcap binary capture file if requested */ if (use_dumpfile) { if (daemon_mode && (use_dumpfile[0] != '/')) LOG_WARN("Binary capture file path is not absolute and may be inaccessible after daemonizing"); if ((dumpfile = pcap_dump_open(pcap_hnd, use_dumpfile)) == NULL) LOG_DIE("Cannot open binary dump file '%s'", use_dumpfile); PRINT("Writing binary dump file: %s", use_dumpfile); } return; } /* Run program as a daemon process */ void runas_daemon() { int child_pid; FILE *pid_file; if (getppid() == 1) return; /* We're already a daemon */ fflush(NULL); child_pid = fork(); if (child_pid < 0) LOG_DIE("Cannot fork child process"); if (child_pid > 0) exit(0); /* Parent bows out */ /* Configure default output streams */ dup2(1,2); close(0); if (freopen(NULL_FILE, "a", stderr) == NULL) LOG_DIE("Cannot reopen stderr to '%s'", NULL_FILE); /* Assign new process group for child */ if (setsid() == -1) LOG_WARN("Cannot assign new session for child process"); umask(022); /* Reset file creation mask */ if (chdir("/") == -1) LOG_DIE("Cannot change run directory to '/'"); /* Create PID file */ if (pid_filename[0] != '/') LOG_WARN("PID file path is not absolute and may be inaccessible after daemonizing"); if ((pid_file = fopen(pid_filename, "w"))) { fprintf(pid_file, "%d", getpid()); fclose(pid_file); } else { LOG_WARN("Cannot open PID file '%s'", pid_filename); } signal(SIGCHLD, SIG_IGN); signal(SIGTSTP, SIG_IGN); signal(SIGTTOU, SIG_IGN); signal(SIGTTIN, SIG_IGN); signal(SIGTERM, &handle_signal); fflush(NULL); return; } /* Change process owner to specified username */ void change_user(char *name) { struct passwd *user = NULL; #ifdef DEBUG ASSERT(name); #endif if ((getuid() != 0) && (geteuid() != 0)) LOG_DIE("You must be root to switch users"); if (!(user = getpwnam(name))) LOG_DIE("User '%s' not found in system", name); /* Change ownership of output files before we drop privs */ if (use_outfile) { if (chown(use_outfile, user->pw_uid, user->pw_gid) < 0) LOG_WARN("Cannot change ownership of output file '%s'", use_outfile); } if (use_dumpfile) { if (chown(use_dumpfile, user->pw_uid, user->pw_gid) < 0) LOG_WARN("Cannot change ownership of dump file '%s'", use_dumpfile); } if (initgroups(name, user->pw_gid)) LOG_DIE("Cannot initialize the group access list"); if (setgid(user->pw_gid)) LOG_DIE("Cannot set GID"); if (setuid(user->pw_uid)) LOG_DIE("Cannot set UID"); /* Test to see if we actually made it to the new user */ if ((getegid() != user->pw_gid) || (geteuid() != user->pw_uid)) LOG_DIE("Cannot change process owner to '%s'", name); return; } /* Process each packet that passes the capture filter */ void parse_http_packet(u_char *args, const struct pcap_pkthdr *header, const u_char *pkt) { struct tm *pkt_time; char *header_line, *req_value; char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN]; char sport[PORTSTRLEN], dport[PORTSTRLEN]; char ts[MAX_TIME_LEN]; int is_request = 0, is_response = 0; unsigned int eth_type = 0, offset; const struct eth_header *eth; const struct ip_header *ip; const struct ip6_header *ip6; const struct tcp_header *tcp; const char *data; int size_ip, size_tcp, size_data, family; /* Check the ethernet type and insert a VLAN offset if necessary */ eth = (struct eth_header *) pkt; eth_type = ntohs(eth->ether_type); if (eth_type == ETHER_TYPE_VLAN) { offset = link_offset + 4; } else { offset = link_offset; } offset += eth_skip_bits; /* Position pointers within packet stream and do sanity checks */ ip = (struct ip_header *) (pkt + offset); ip6 = (struct ip6_header *) (pkt + offset); switch (IP_V(ip)) { case 4: family = AF_INET; break; case 6: family = AF_INET6; break; default: return; } if (family == AF_INET) { size_ip = IP_HL(ip) * 4; if (size_ip < 20) return; if (ip->ip_p != IPPROTO_TCP) return; } else { /* AF_INET6 */ size_ip = sizeof(struct ip6_header); if (ip6->ip6_nh != IPPROTO_TCP) size_ip = process_ip6_nh(pkt, size_ip, header->caplen, offset); if (size_ip < 40) return; } tcp = (struct tcp_header *) (pkt + offset + size_ip); size_tcp = TH_OFF(tcp) * 4; if (size_tcp < 20) return; data = (char *) (pkt + offset + size_ip + size_tcp); size_data = (header->caplen - (offset + size_ip + size_tcp)); if (size_data <= 0) return; /* Check if we appear to have a valid request or response */ if (is_request_method(data)) { is_request = 1; } else if (strncmp(data, HTTP_STRING, strlen(HTTP_STRING)) == 0) { is_response = 1; } else { return; } /* Copy packet data to editable buffer that was created in main() */ if (size_data > BUFSIZ) size_data = BUFSIZ; memcpy(buf, data, size_data); buf[size_data] = '\0'; /* Parse header line, bail if malformed */ if ((header_line = parse_header_line(buf)) == NULL) return; if (is_request) { if (parse_client_request(header_line)) return; } else if (is_response) { if (parse_server_response(header_line)) return; } /* Iterate through request/entity header fields */ while ((header_line = parse_header_line(NULL)) != NULL) { if ((req_value = strchr(header_line, ':')) == NULL) continue; *req_value++ = '\0'; while (isspace(*req_value)) req_value++; insert_value(header_line, req_value); } /* Grab source/destination IP addresses */ if (family == AF_INET) { inet_ntop(family, &ip->ip_src, saddr, sizeof(saddr)); inet_ntop(family, &ip->ip_dst, daddr, sizeof(daddr)); } else { /* AF_INET6 */ inet_ntop(family, &ip6->ip_src, saddr, sizeof(saddr)); inet_ntop(family, &ip6->ip_dst, daddr, sizeof(daddr)); } insert_value("source-ip", saddr); insert_value("dest-ip", daddr); /* Grab source/destination ports */ snprintf(sport, PORTSTRLEN, "%d", ntohs(tcp->th_sport)); snprintf(dport, PORTSTRLEN, "%d", ntohs(tcp->th_dport)); insert_value("source-port", sport); insert_value("dest-port", dport); /* Extract packet capture time */ pkt_time = localtime((time_t *) &header->ts.tv_sec); strftime(ts, MAX_TIME_LEN, "%Y-%m-%d %H:%M:%S", pkt_time); insert_value("timestamp", ts); if (rate_stats) { update_host_stats(get_value("host"), header->ts.tv_sec); clear_values(); } else { print_format_values(); } if (dumpfile) pcap_dump((unsigned char *) dumpfile, header, pkt); num_parsed++; if (parse_count && (num_parsed >= parse_count)) pcap_breakloop(pcap_hnd); return; } /* Iterate through IPv6 extension headers looking for a TCP header. Returns the total size of the IPv6 header, including all extension headers. Return 0 to abort processing of this packet. */ int process_ip6_nh(const u_char *pkt, int size_ip, unsigned int caplen, unsigned int offset) { const struct ip6_ext_header *ip6_eh; unsigned int len = caplen - offset; ip6_eh = (struct ip6_ext_header *) (pkt + offset + size_ip); while (ip6_eh->ip6_eh_nh != IPPROTO_TCP) { switch (ip6_eh->ip6_eh_nh) { case 0: /* Hop-by-hop options */ case 43: /* Routing */ case 44: /* Fragment */ case 51: /* Authentication Header */ case 50: /* Encapsulating Security Payload */ case 60: /* Destination Options */ size_ip = size_ip + (ip6_eh->ip6_eh_len * 8) + 8; break; case 59: /* No next header */ default: return 0; } if (size_ip > len) return 0; ip6_eh = (struct ip6_ext_header *) (pkt + offset + size_ip); } /* Next header is TCP, so increment past the final extension header */ size_ip = size_ip + (ip6_eh->ip6_eh_len * 8) + 8; return size_ip; } /* Tokenize a HTTP header into lines; the first call should pass the string to tokenize, all subsequent calls for the same string should pass NULL */ char *parse_header_line(char *header_line) { static char *pos; char *tmp; if (header_line) pos = header_line; /* Search for a '\n' line terminator, ignoring a leading '\r' if it exists (per RFC2616 section 19.3) */ tmp = strchr(pos, '\n'); if (!tmp && header_line) { return header_line; } else if (!tmp) { return NULL; } *tmp = '\0'; if (*(tmp - 1) == '\r') *(--tmp) = '\0'; if (tmp == pos) return NULL; /* Reached the end of the header */ header_line = pos; /* Increment past the '\0' character(s) inserted above */ if (*tmp == '\0') { tmp++; if (*tmp == '\0') tmp++; } pos = tmp; return header_line; } /* Parse a HTTP client request; bail at first sign of an invalid request */ int parse_client_request(char *header_line) { char *method, *request_uri, *http_version; #ifdef DEBUG ASSERT(header_line); ASSERT(strlen(header_line) > 0); #endif method = header_line; if ((request_uri = strchr(method, ' ')) == NULL) return 1; *request_uri++ = '\0'; while (isspace(*request_uri)) request_uri++; if ((http_version = strchr(request_uri, ' ')) != NULL) { *http_version++ = '\0'; while (isspace(*http_version)) http_version++; if (strncmp(http_version, HTTP_STRING, strlen(HTTP_STRING)) != 0) return 1; insert_value("http-version", http_version); } insert_value("method", method); insert_value("request-uri", request_uri); insert_value("direction", ">"); return 0; } /* Parse a HTTP server response; bail at first sign of an invalid response */ int parse_server_response(char *header_line) { char *http_version, *status_code, *reason_phrase; #ifdef DEBUG ASSERT(header_line); ASSERT(strlen(header_line) > 0); #endif http_version = header_line; if ((status_code = strchr(http_version, ' ')) == NULL) return 1; *status_code++ = '\0'; while (isspace(*status_code)) status_code++; if ((reason_phrase = strchr(status_code, ' ')) == NULL) return 1; *reason_phrase++ = '\0'; while (isspace(*reason_phrase)) reason_phrase++; insert_value("http-version", http_version); insert_value("status-code", status_code); insert_value("reason-phrase", reason_phrase); insert_value("direction", "<"); return 0; } /* Handle signals for clean reloading or shutdown */ void handle_signal(int sig) { #ifdef DEBUG ASSERT(sig > 0); #endif switch (sig) { case SIGHUP: LOG_PRINT("Caught SIGHUP, reloading..."); print_stats(); if (rate_stats) cleanup_rate_stats(); open_outfiles(); if (rate_stats) init_rate_stats(rate_interval, use_infile, rate_threshold); return; case SIGINT: LOG_PRINT("Caught SIGINT, shutting down..."); print_stats(); cleanup(); break; case SIGTERM: LOG_PRINT("Caught SIGTERM, shutting down..."); print_stats(); cleanup(); break; default: LOG_WARN("Ignoring unknown signal '%d'", sig); return; } exit(sig); } /* Perform end of run tasks and prepare to exit gracefully */ void cleanup() { /* This may have already been called, but might not have depending on how we got here */ if (pcap_hnd) pcap_breakloop(pcap_hnd); if (rate_stats) cleanup_rate_stats(); fflush(NULL); free_format(); free_methods(); if (buf) free(buf); /* Note that this won't get removed if we've switched to a user that doesn't have permission to delete the file */ if (daemon_mode) remove(pid_filename); if (pcap_hnd) pcap_close(pcap_hnd); return; } /* Print packet capture statistics */ void print_stats() { struct pcap_stat pkt_stats; float run_time; if (rate_stats) display_rate_stats(use_infile, rate_threshold); if (pcap_hnd && !use_infile) { if (pcap_stats(pcap_hnd, &pkt_stats) != 0) { WARN("Cannot obtain packet capture statistics: %s", pcap_geterr(pcap_hnd)); return; } LOG_PRINT("%u packets received, %u packets dropped, %u http packets parsed", \ pkt_stats.ps_recv, pkt_stats.ps_drop, num_parsed); run_time = (float) (time(0) - start_time); if (run_time > 0) { LOG_PRINT("%0.1f packets/min, %0.1f http packets/min", \ ((pkt_stats.ps_recv * 60) / run_time), ((num_parsed * 60) / run_time)); } } else if (pcap_hnd) { PRINT("%u http packets parsed", num_parsed); } return; } /* Display startup/informational banner */ void display_banner() { PRINT("%s version %s -- " "HTTP logging and information retrieval tool", PROG_NAME, PROG_VER); PRINT("Copyright (c) 2005-2014 Jason Bittel "); return; } /* Display program usage information */ void display_usage() { display_banner(); printf("Usage: %s [ -dFhpqs ] [-b file ] [ -f format ] [ -i device ] [ -l threshold ]\n" " [ -m methods ] [ -n count ] [ -o file ] [ -P file ] [ -r file ]\n" " [ -t seconds] [ -u user ] [ 'expression' ]\n\n", PROG_NAME); printf(" -b file write HTTP packets to a binary dump file\n" " -d run as daemon\n" " -f format specify output format string\n" " -F force output flush\n" " -h print this help information\n" " -i device listen on this interface\n" " -l threshold specify a rps threshold for rate statistics\n" " -m methods specify request methods to parse\n" " -n count set number of HTTP packets to parse\n" " -o file write output to a file\n" " -p disable promiscuous mode\n" " -P file use custom PID filename when running in daemon mode \n" " -q suppress non-critical output\n" " -r file read packets from input file\n" " -s run in HTTP requests per second mode\n" " -t seconds specify the display interval for rate statistics\n" " -u user set process owner\n" " expression specify a bpf-style capture filter\n\n"); printf("Additional information can be found at:\n" " http://dumpsterventures.com/jason/httpry\n\n"); exit(EXIT_SUCCESS); } int main(int argc, char **argv) { int opt; extern char *optarg; extern int optind; int loop_status; signal(SIGHUP, &handle_signal); signal(SIGINT, &handle_signal); /* Process command line arguments */ while ((opt = getopt(argc, argv, "b:df:Fhpqi:l:m:n:o:P:r:st:u:S:")) != -1) { switch (opt) { case 'b': use_dumpfile = optarg; break; case 'd': daemon_mode = 1; use_syslog = 1; break; case 'f': format_str = optarg; break; case 'F': force_flush = 1; break; case 'h': display_usage(); break; case 'i': interface = optarg; break; case 'l': rate_threshold = atoi(optarg); break; case 'm': methods_str = optarg; break; case 'n': parse_count = atoi(optarg); break; case 'o': use_outfile = optarg; break; case 'p': set_promisc = 0; break; case 'P': pid_filename = optarg; break; case 'q': quiet_mode = 1; break; case 'r': use_infile = optarg; break; case 's': rate_stats = 1; break; case 't': rate_interval = atoi(optarg); break; case 'u': new_user = optarg; break; case 'S': eth_skip_bits = atoi(optarg); break; default: display_usage(); } } display_banner(); if (daemon_mode && !use_outfile) LOG_DIE("Daemon mode requires an output file"); if (parse_count < 0) LOG_DIE("Invalid -n value, must be 0 or greater"); if (rate_interval < 1) LOG_DIE("Invalid -t value, must be 1 or greater"); if (rate_threshold < 1) LOG_DIE("Invalid -l value, must be 1 or greater"); if (argv[optind] && *(argv[optind])) { capfilter = argv[optind]; } else { capfilter = default_capfilter; } if (!format_str) format_str = default_format; if (rate_stats) format_str = rate_format; parse_format_string(format_str); if (!methods_str) methods_str = default_methods; parse_methods_string(methods_str); if (force_flush) { if (setvbuf(stdout, NULL, _IONBF, 0) != 0) LOG_WARN("Cannot disable buffering on stdout"); } if (!pid_filename) pid_filename = PID_FILENAME; pcap_hnd = prepare_capture(interface, set_promisc, use_infile, capfilter); open_outfiles(); if (daemon_mode) runas_daemon(); if (new_user) change_user(new_user); if ((buf = malloc(BUFSIZ + 1)) == NULL) LOG_DIE("Cannot allocate memory for packet data buffer"); if (rate_stats) init_rate_stats(rate_interval, use_infile, rate_threshold); start_time = time(0); loop_status = pcap_loop(pcap_hnd, -1, &parse_http_packet, NULL); if (loop_status == -1) { LOG_DIE("Problem reading packets from interface: %s", pcap_geterr(pcap_hnd)); } else if (loop_status == -2) { PRINT("Loop halted, shutting down..."); } print_stats(); cleanup(); return loop_status == -1 ? EXIT_FAILURE : EXIT_SUCCESS; } httpry-httpry-0.1.8/methods.c000066400000000000000000000101131234364221200162040ustar00rootroot00000000000000/* ---------------------------------------------------- httpry - HTTP logging and information retrieval tool ---------------------------------------------------- Copyright (c) 2005-2014 Jason Bittel */ /* The methods data structure is an unbalanced binary tree. All packets are checked to see if they have a method contained here; any packets that do not will be ignored. This doesn't use a hash because the length of the potential method is not known. At this point in the main processing loop the packet data is still in a static buffer, so this gives us a simpler solution. Perhaps at some point the flow of the packet processing will be changed and we can switch to a more traditional lookup table approach. */ #include #include #include #include #include "error.h" #include "methods.h" #include "utility.h" typedef struct method_node METHOD_NODE; struct method_node { char *method; METHOD_NODE *left, *right; }; static METHOD_NODE *methods = NULL; int insert_method(char *str, size_t len); void free_node(METHOD_NODE *node); /* Parse and insert methods from methods string */ void parse_methods_string(char *str) { char *method, *tmp, *i; int num_methods = 0; size_t len; #ifdef DEBUG ASSERT(str); #endif len = strlen(str); if (len == 0) LOG_DIE("Empty methods string provided"); /* Make a temporary copy of the string so we don't modify the original */ if ((tmp = str_duplicate(str)) == NULL) LOG_DIE("Cannot allocate memory for methods string buffer"); for (i = tmp; (method = strtok(i, ",")); i = NULL) { method = str_strip_whitespace(method); method = str_tolower(method); len = strlen(method); if (len == 0) continue; if (insert_method(method, len)) num_methods++; } free(tmp); if (num_methods == 0) LOG_DIE("No valid methods found in string"); return; } /* Insert a new method into the structure */ int insert_method(char *method, size_t len) { METHOD_NODE **node = &methods; int cmp; #ifdef DEBUG ASSERT(method); ASSERT(strlen(method) > 0); #endif while (*node) { cmp = str_compare(method, (*node)->method); if (cmp > 0) { node = &(*node)->right; } else if (cmp < 0) { node = &(*node)->left; } else { WARN("Method '%s' already provided", method); return 0; } } if ((*node = (METHOD_NODE *) malloc(sizeof(METHOD_NODE))) == NULL) { LOG_DIE("Cannot allocate memory for method node"); } if (((*node)->method = (char *) malloc(len + 1)) == NULL) { LOG_DIE("Cannot allocate memory for method string"); } str_copy((*node)->method, method, len + 1); (*node)->left = (*node)->right = NULL; return 1; } /* Search data structure for a matching method */ int is_request_method(const char *str) { METHOD_NODE *node = methods; int cmp; #ifdef DEBUG ASSERT(node); ASSERT(str); #endif if (strlen(str) == 0) return 0; while (node) { cmp = str_compare(str, node->method); if (cmp > 0) { node = node->right; } else if (cmp < 0) { node = node->left; } else { return 1; } } return 0; } /* Wrapper function to free allocated memory at program termination */ void free_methods() { free_node(methods); return; } /* Recursively free all children of the parameter node */ void free_node(METHOD_NODE *node) { if (!node) return; free_node(node->left); free_node(node->right); free(node->method); free(node); return; } httpry-httpry-0.1.8/methods.h000066400000000000000000000006431234364221200162200ustar00rootroot00000000000000/* ---------------------------------------------------- httpry - HTTP logging and information retrieval tool ---------------------------------------------------- Copyright (c) 2005-2014 Jason Bittel */ #ifndef _HAVE_METHODS_H #define _HAVE_METHODS_H void parse_methods_string(char *str); int is_request_method(const char *str); void free_methods(); #endif /* ! _HAVE_METHODS_H */ httpry-httpry-0.1.8/rate.c000066400000000000000000000304021234364221200154770ustar00rootroot00000000000000/* ---------------------------------------------------- httpry - HTTP logging and information retrieval tool ---------------------------------------------------- Copyright (c) 2005-2014 Jason Bittel */ #include #include #include #include #include #include #include #include "config.h" #include "error.h" #include "rate.h" #include "utility.h" #define MAX_HOST_LEN 255 #define HASHSIZE 2048 #define NODE_BLOCKSIZE 100 #define NODE_ALLOC_BLOCKSIZE 10 struct host_stats { char host[MAX_HOST_LEN + 1]; unsigned int count; time_t first_packet; time_t last_packet; struct host_stats *next; }; struct thread_args { char *use_infile; unsigned int rate_interval; int rate_threshold; }; void create_rate_stats_thread(int rate_interval, char *use_infile, int rate_threshold); void exit_rate_stats_thread(); void *run_stats(void *args); struct host_stats *remove_node(struct host_stats *node, struct host_stats *prev); struct host_stats *get_host(char *str); struct host_stats *get_node(); static pthread_t thread; static int thread_created = 0; static pthread_mutex_t stats_lock; static struct host_stats **stats = NULL; static struct host_stats *free_stack = NULL; static struct host_stats **block_alloc = NULL; static struct host_stats totals; static struct thread_args thread_args; /* Initialize rate stats counters and structures, and start up the stats thread if necessary */ void init_rate_stats(int rate_interval, char *use_infile, int rate_threshold) { /* Initialize host totals */ totals.count = 0; totals.first_packet = 0; totals.last_packet = 0; /* Allocate host stats hash array */ if ((stats = (struct host_stats **) calloc(HASHSIZE, sizeof(struct host_stats *))) == NULL) LOG_DIE("Cannot allocate memory for host stats"); if (!use_infile) create_rate_stats_thread(rate_interval, use_infile, rate_threshold); return; } /* Spawn a thread for updating and printing rate statistics */ void create_rate_stats_thread(int rate_interval, char *use_infile, int rate_threshold) { sigset_t set; int s; if (thread_created) return; thread_args.use_infile = use_infile; thread_args.rate_interval = rate_interval; thread_args.rate_threshold = rate_threshold; sigemptyset(&set); sigaddset(&set, SIGINT); sigaddset(&set, SIGHUP); s = pthread_mutex_init(&stats_lock, NULL); if (s != 0) LOG_DIE("Statistics thread mutex initialization failed with error %d", s); s = pthread_sigmask(SIG_BLOCK, &set, NULL); if (s != 0) LOG_DIE("Statistics thread signal blocking failed with error %d", s); s = pthread_create(&thread, NULL, run_stats, (void *) &thread_args); if (s != 0) LOG_DIE("Statistics thread creation failed with error %d", s); s = pthread_sigmask(SIG_UNBLOCK, &set, NULL); if (s != 0) LOG_DIE("Statistics thread signal unblocking failed with error %d", s); thread_created = 1; return; } /* Attempt to cancel the stats thread, cleanup allocated memory and clear necessary counters and structures */ void cleanup_rate_stats() { struct host_stats **i; exit_rate_stats_thread(); if (block_alloc != NULL) { for (i = block_alloc; *i; i++) { free(*i); } free(block_alloc); block_alloc = NULL; } if (stats != NULL) { free(stats); stats = NULL; } free_stack = NULL; return; } /* Explicitly exit rate statistics thread */ void exit_rate_stats_thread() { int s; void *retval; if (!thread_created) return; s = pthread_cancel(thread); if (s != 0) LOG_WARN("Statistics thread cancellation failed with error %d", s); s = pthread_join(thread, &retval); if (s != 0) LOG_WARN("Statistics thread join failed with error %d", s); if (retval != PTHREAD_CANCELED) LOG_WARN("Statistics thread exit value was unexpected"); thread_created = 0; s = pthread_mutex_destroy(&stats_lock); if (s != 0) LOG_WARN("Statistcs thread mutex destroy failed with error %d", s); return; } /* This is our statistics thread */ void *run_stats (void *args) { struct thread_args *thread_args = (struct thread_args *) args; while (1) { sleep(thread_args->rate_interval); display_rate_stats(thread_args->use_infile, thread_args->rate_threshold); } return (void *) 0; } /* Display the running average within each valid stats node */ void display_rate_stats(char *use_infile, int rate_threshold) { time_t now; char st_time[MAX_TIME_LEN]; unsigned int delta, rps = 0; int i; struct host_stats *node, *prev; if (stats == NULL) return; if (thread_created) pthread_mutex_lock(&stats_lock); if (use_infile) { now = totals.last_packet; } else { now = time(NULL); } strftime(st_time, MAX_TIME_LEN, "%Y-%m-%d %H:%M:%S", localtime(&now)); #ifdef DEBUG int j, num_buckets = 0, num_chain, max_chain = 0, num_nodes = 0; for (j = 0; j < HASHSIZE; j++) { if (stats[j]) num_buckets++; num_chain = 0; for (node = stats[j]; node != NULL; node = node->next) num_chain++; if (num_chain > max_chain) max_chain = num_chain; num_nodes += num_chain; } PRINT("----------------------------"); PRINT("Hash buckets: %d", HASHSIZE); PRINT("Nodes inserted: %d", num_nodes); PRINT("Buckets in use: %d", num_buckets); PRINT("Hash collisions: %d", num_nodes - num_buckets); PRINT("Longest hash chain: %d", max_chain); PRINT("----------------------------"); #endif /* Display rate stats for each valid host */ for (i = 0; i < HASHSIZE; i++) { node = stats[i]; prev = NULL; while (node != NULL) { delta = now - node->first_packet; if (delta > 0) { rps = (unsigned int) ceil(node->count / (float) delta); } else { rps = 0; } if (rps >= rate_threshold) { printf("%s%s%s%s%u rps\n", st_time, FIELD_DELIM, node->host, FIELD_DELIM, rps); prev = node; node = node->next; } else { node = remove_node(node, prev); } } } /* Display rate totals */ delta = (unsigned int) (now - totals.first_packet); if (delta > 0) printf("%s%stotals%s%3.2f rps\n", st_time, FIELD_DELIM, FIELD_DELIM, (float) totals.count / delta); if (thread_created) pthread_mutex_unlock(&stats_lock); return; } /* Remove the given node from the hash and return it to the free stack; returns the correct node for continuing to traverse the hash */ struct host_stats *remove_node(struct host_stats *node, struct host_stats *prev) { struct host_stats *next; unsigned int hashval; /* Unlink the node from the hash */ if (prev == NULL) { hashval = hash_str(node->host, HASHSIZE); if (node->next) { stats[hashval] = node->next; } else { stats[hashval] = NULL; } next = stats[hashval]; } else { if (node->next) { prev->next = node->next; } else { prev->next = NULL; } next = prev->next; } /* Add the node to the head of the free stack */ node->next = free_stack; free_stack = node; return next; } /* Update the stats for a given host; if the host is not found in the hash, add it */ void update_host_stats(char *host, time_t t) { struct host_stats *node; unsigned int hashval; if ((host == NULL) || (stats == NULL)) return; if (thread_created) pthread_mutex_lock(&stats_lock); if ((node = get_host(host)) == NULL) { node = get_node(); hashval = hash_str(host, HASHSIZE); #ifdef DEBUG ASSERT((hashval >= 0) && (hashval < HASHSIZE)); #endif str_copy(node->host, host, MAX_HOST_LEN); node->count = 0; node->first_packet = t; /* Link node into hash */ node->next = stats[hashval]; stats[hashval] = node; } if (node->first_packet == 0) node->first_packet = t; node->last_packet = t; node->count++; if (totals.first_packet == 0) totals.first_packet = t; totals.last_packet = t; totals.count++; if (thread_created) pthread_mutex_unlock(&stats_lock); return; } /* Lookup a particular node in hash; return pointer to node if found, NULL otherwise */ struct host_stats *get_host(char *str) { struct host_stats *node; #ifdef DEBUG ASSERT(str); ASSERT(strlen(str) > 0); ASSERT((hash_str(str, HASHSIZE) >= 0) && (hash_str(str, HASHSIZE) < HASHSIZE)); #endif for (node = stats[hash_str(str, HASHSIZE)]; node != NULL; node = node->next) if (str_compare(str, node->host) == 0) return node; return NULL; } /* Get a new node from either the free stack or an allocated block; if the block is empty, allocate a new chunk of memory */ struct host_stats *get_node() { static struct host_stats *block, *tail, **mv; struct host_stats *head, **tmp; static int alloc_size; /* Initialize static variables as necessary */ if (block_alloc == NULL) { block = NULL; alloc_size = 0; } if (free_stack != NULL) { /* Get node from free stack */ head = free_stack; free_stack = free_stack->next; head->next = NULL; } else if (block != NULL) { /* Get node from allocated block */ head = block; if (block == tail) { block = NULL; } else { block++; } } else { /* Out of nodes, allocate a new block */ if ((block = (struct host_stats *) malloc(NODE_BLOCKSIZE * sizeof(struct host_stats))) == NULL) { LOG_DIE("Cannot allocate memory for node block"); } /* Store pointer to allocated block so we can free it later */ if (block_alloc == NULL) { if ((block_alloc = (struct host_stats **) malloc(NODE_ALLOC_BLOCKSIZE * sizeof(struct host_stats *))) == NULL) { LOG_DIE("Cannot allocate memory for blocks array"); } mv = block_alloc; } *mv = block; if (++alloc_size % NODE_ALLOC_BLOCKSIZE == 0) { tmp = realloc(block_alloc, ((alloc_size + NODE_ALLOC_BLOCKSIZE) * sizeof(struct host_stats *))); if (tmp == NULL) { LOG_DIE("Cannot re-allocate memory for blocks array"); } block_alloc = tmp; mv = block_alloc + alloc_size - 1; } mv++; *mv = NULL; tail = block + NODE_BLOCKSIZE - 1; head = block; block++; } return head; } httpry-httpry-0.1.8/rate.h000066400000000000000000000010211234364221200154770ustar00rootroot00000000000000/* ---------------------------------------------------- httpry - HTTP logging and information retrieval tool ---------------------------------------------------- Copyright (c) 2005-2014 Jason Bittel */ #ifndef _HAVE_RATE_H #define _HAVE_RATE_H void init_rate_stats(int display_interval, char *use_infile, int rate_threshold); void cleanup_rate_stats(); void display_rate_stats(char *use_infile, int rate_threshold); void update_host_stats(char *host, time_t t); #endif /* ! _HAVE_RATE_H */ httpry-httpry-0.1.8/rc.httpry000066400000000000000000000037471234364221200162740ustar00rootroot00000000000000#!/bin/sh # # ---------------------------------------------------- # httpry - HTTP logging and information retrieval tool # ---------------------------------------------------- # # Copyright (c) 2005-2014 Jason Bittel # # # Start/stop/restart httpry as a daemon process. This script # was written for use under Slackware Linux, but should be # easily modifiable for a different system. # # You will need to add an output file here for httpry to write # to while in daemon mode. If there are any additional arguments # that need to be passed to httpry, add them to httpry_args. # output_file="httpry.log" httpry_args="" httpry_start() { if [ ! -n "${output_file}" ] ; then echo "Error: No output file provided; edit ${0} accordingly" exit 1 fi echo "Starting httpry using output file '${output_file}'..." if [ -r "/var/run/httpry.pid" ] ; then if [ ps acx | grep -q httpry ] ; then echo "Error: httpry already running with PID `cat /var/run/httpry.pid`" exit 1 else rm -f /var/run/httpry.pid fi fi eval httpry -d -o ${output_file} ${httpry_args} } httpry_stop() { echo "Stopping httpry..." if [ -r "/var/run/httpry.pid" ] ; then kill `cat /var/run/httpry.pid` rm -f /var/run/httpry.pid else killall httpry fi } httpry_reload() { echo "Reloading httpry..." if [ -r "/var/run/httpry.pid" ] ; then kill -HUP `cat /var/run/httpry.pid` else killall -HUP httpry fi } case ${1} in start) httpry_start ;; stop) httpry_stop ;; restart) httpry_stop sleep 1 httpry_start ;; reload) httpry_reload ;; *) echo "Usage: ${0} [start|stop|restart|reload]" ;; esac httpry-httpry-0.1.8/scripts/000077500000000000000000000000001234364221200160705ustar00rootroot00000000000000httpry-httpry-0.1.8/scripts/parse_log.pl000066400000000000000000000270271234364221200204100ustar00rootroot00000000000000#!/usr/bin/perl -w # # ---------------------------------------------------- # httpry - HTTP logging and information retrieval tool # ---------------------------------------------------- # # Copyright (c) 2005-2014 Jason Bittel # use strict; use warnings; use Getopt::Std; use File::Basename; # ----------------------------------------------------------------------------- # GLOBAL CONSTANTS # ----------------------------------------------------------------------------- my $DEFAULT_PLUGIN_DIR = "plugins"; # ----------------------------------------------------------------------------- # GLOBAL VARIABLES # ----------------------------------------------------------------------------- my %enabled = (); my %disabled = (); # Command line arguments my %opts; my $verbose = 0; my $plugin_dir = ""; my $plugin_list = ""; my @input_files = (); # ----------------------------------------------------------------------------- # Main Program # ----------------------------------------------------------------------------- get_arguments(); read_plugin_line($plugin_list) if ($plugin_list); read_plugin_dir($plugin_dir) if ($plugin_dir); read_plugin_dir() if (!$plugin_list && !$plugin_dir); die "Error: No plugins loaded\n" if (keys %enabled == 0); print "Info: " . (keys %enabled) . " plugins loaded\n" if $verbose; process_logfiles(); end_plugins(); # ----------------------------------------------------------------------------- # Parse a comma-delmited string for plugins to initialize # ----------------------------------------------------------------------------- sub read_plugin_line { my $plugin_list = shift; my $i = 0; foreach (split /,/, $plugin_list) { $_ =~ s/^\s+//; $_ =~ s/\s+$//; next if ($_ =~ /^$/); load_plugin($_); $i++; } warn "Warning: No plugins found in plugin list\n" if ($i == 0); print "Info: $i plugins found in plugin list\n" if $verbose; return; } # ----------------------------------------------------------------------------- # Search a directory for plugins to initialize # ----------------------------------------------------------------------------- sub read_plugin_dir { my $custom_dir = shift; my $plugin_dir; my $i = 0; # If a custom plugin directory, assume the user knows best; otherwise, # search the current dir and script base dir for a default plugin folder if ($custom_dir) { $custom_dir =~ s/\/$//; $plugin_dir = $custom_dir; die "Error: '$plugin_dir' is not a valid directory\n" unless (-d $plugin_dir); } else { if (-d './' . $DEFAULT_PLUGIN_DIR) { $plugin_dir = './' . $DEFAULT_PLUGIN_DIR; } elsif (-d dirname($0) . '/' . basename($DEFAULT_PLUGIN_DIR)) { $plugin_dir = dirname($0) . '/' . basename($DEFAULT_PLUGIN_DIR); } else { die "Error: Cannot find the default '$DEFAULT_PLUGIN_DIR' plugin directory\n"; } } print "Info: Reading plugin directory '$plugin_dir'\n" if $verbose; # Load all plugins found in directory opendir PLUGINDIR, $plugin_dir or die "Error: Cannot find or access '$plugin_dir': $!\n"; foreach (grep /\.pm$/, readdir(PLUGINDIR)) { load_plugin($plugin_dir . '/' . $_); $i++; } closedir(PLUGINDIR); warn "Warning: No plugins found in $plugin_dir\n" if ($i == 0); print "Info: $i plugins found in '$plugin_dir' directory\n" if $verbose; return; } # ----------------------------------------------------------------------------- # Load and initialize plugin from a file # ----------------------------------------------------------------------------- sub load_plugin { my $path = shift; my $p = (fileparse($path, '\.pm'))[0]; my $dir = dirname($path); print "Info: Loading plugin file '$path'\n" if $verbose; if (! -e $path) { warn "Warning: $p: Cannot find or access '$path'\n"; return; } if (exists $enabled{$p}) { warn "Warning: $p: Name already registered\n"; return; } eval 'require $path'; if ($@) { warn "Warning: $p: $@" if $verbose; warn "Warning: $p: Failed to load...disabling\n"; delete $enabled{$p}; return; } unless ($enabled{$p}->can('main')) { warn "Warning: $p: Cannot find a required main() function...disabling\n"; delete $enabled{$p}; return; } if ($enabled{$p}->can('init')) { eval '$enabled{$p}->init($dir)'; if ($@) { warn "Warning: $p: $@" if $verbose; warn "Warning: $p: Failed to initialize...disabling\n"; delete $enabled{$p}; return; } } print "Info: Initialized plugin '$p'\n" if $verbose; return; } # ----------------------------------------------------------------------------- # Create list of each plugin's callback information # ----------------------------------------------------------------------------- sub register_plugin { my $package = (caller)[0]; my $p = (fileparse((caller)[1], '\.pm'))[0]; if ($package ne $p) { die "Warning: $p: Package name does not match filename\n"; } if ($package->can('new')) { $enabled{$p} = $package->new(); } else { die "Warning: $p: Cannot find a required new() function\n"; } return; } # ----------------------------------------------------------------------------- # Process all files, passing each line to all registered plugins # ----------------------------------------------------------------------------- sub process_logfiles { my $curr_file; my $curr_line; my @header; my %record; FILE: foreach $curr_file (@input_files) { unless (open INFILE, "$curr_file") { warn "Error: Cannot open $curr_file: $!\n"; next FILE; } print "Info: Processing file '$curr_file'\n" if $verbose; LINE: while ($curr_line = ) { chomp $curr_line; $curr_line =~ s/[^[:print:]\t]//g; # Strip unprintable characters next LINE if $curr_line =~ /^$/; # Handle comment lines if ($curr_line =~ /^#/) { # Check the comment for a field specifier line next LINE unless $curr_line =~ /^# Fields: (.*)$/; @header = map { s/\s//g; lc; } split /\,/, $1; check_fields(@header); if (keys %enabled == 0) { warn "Error: All plugins are disabled...skipping file\n"; next FILE; } %record = (); next LINE; } if (scalar @header == 0) { warn "Error: No field description line found...skipping file\n"; next FILE; } @record{@header} = split /\t/, $curr_line; foreach (keys %enabled) { $enabled{$_}->main(\%record); } } close INFILE or die "Error: Cannot close $curr_file: $!\n";; } return; } # ----------------------------------------------------------------------------- # Check required fields for each plugin against the current header fields # ----------------------------------------------------------------------------- sub check_fields { my @keys = @_; my %fields = map { $keys[$_] => 1 } 0..$#keys; my $p; # Check active plugins to see if they have the required fields PLUGIN: foreach $p (keys %enabled) { next unless $enabled{$p}->can('list'); foreach ($enabled{$p}->list()) { next if $_ eq ''; if (!exists $fields{$_}) { warn "Warning: $p: Required field '$_' not found...disabling\n"; $disabled{$p} = $enabled{$p}; delete $enabled{$p}; next PLUGIN; } } } # Check disabled plugins to see if any should be enabled PLUGIN: foreach $p (keys %disabled) { next unless $disabled{$p}->can('list'); foreach ($disabled{$p}->list()) { next if $_ eq ''; next PLUGIN if (!exists $fields{$_}); } print "Info: Plugin $p has been re-enabled\n" if $verbose; $enabled{$p} = $disabled{$p}; delete $disabled{$p}; } return; } # ----------------------------------------------------------------------------- # Call termination function in each loaded plugin # ----------------------------------------------------------------------------- sub end_plugins { my $p; # Enable all disabled plugins so they can be properly ended foreach $p (keys %disabled) { $enabled{$p} = $disabled{$p}; delete $disabled{$p}; } foreach $p (keys %enabled) { if ($enabled{$p}->can('end')) { print "Info: Ending plugin $p\n" if $verbose; eval '$enabled{$p}->end()'; if ($@) { warn "Warning: $p: $@" if $verbose; warn "Warning: $p: Failed to end\n"; } } } return; } # ----------------------------------------------------------------------------- # Retrieve and process command line arguments # ----------------------------------------------------------------------------- sub get_arguments { getopts('d:hp:v', \%opts) or print_usage(); # Print help/usage information to the screen if necessary print_usage() if ($opts{h}); unless ($ARGV[0]) { warn "Error: No input file(s) provided\n"; print_usage(); } # Copy command line arguments to internal variables @input_files = @ARGV; $plugin_list = $opts{p} if ($opts{p}); $plugin_dir = $opts{d} if ($opts{d}); $verbose = 1 if ($opts{v}); return; } # ----------------------------------------------------------------------------- # Print usage/help information to the screen and exit # ----------------------------------------------------------------------------- sub print_usage { die < # package common_log; use POSIX qw(strftime mktime); # ----------------------------------------------------------------------------- # GLOBAL VARIABLES # ----------------------------------------------------------------------------- my %requests = (); my $fh; my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); # ----------------------------------------------------------------------------- # Plugin core # ----------------------------------------------------------------------------- main::register_plugin(); sub new { return bless {}; } sub init { my $self = shift; my $cfg_dir = shift; _load_config($cfg_dir); open OUTFILE, ">$output_file" or die "Cannot open $output_file: $!\n"; $fh = *OUTFILE; return; } sub list { return qw(direction source-ip dest-ip); } sub main { my $self = shift; my $record = shift; my $line = ""; my $line_suffix; my ($sec, $min, $hour, $mday, $mon, $year); my $tz_offset; if ($record->{'direction'} eq '>') { return unless exists $record->{'timestamp'}; return unless exists $record->{'method'}; return unless exists $record->{'request-uri'}; return unless exists $record->{'http-version'}; # Build the output line: begin with client (remote host) address $line .= $record->{'source-ip'}; # Append ident and authuser fields # NOTE: we use the ident field to display the # hostname/ip of the destination site if (exists $record->{'host'}) { $line .= " $record->{'host'} - "; } else { $line .= " $record->{'dest-ip'} - "; } # Append date field $record->{'timestamp'} =~ /(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})/; ($sec, $min, $hour, $mday, $mon, $year) = ($6, $5, $4, $3, $2-1, $1-1900); # NOTE: We assume the current timezone here; that may not always be accurate, but # timezone data is not stored in the httpry log files $tz_offset = strftime("%z", localtime(mktime($sec, $min, $hour, $mday, $mon, $year))); $line .= sprintf("[%02d/%3s/%04d:%02d:%02d:%02d %5s]", $mday, $months[$mon], $year+1900, $hour, $min, $sec, $tz_offset); # Append request fields $line .= " \"$record->{'method'} $record->{'request-uri'} $record->{'http-version'}\""; if ($combined_format) { # Append referer if (exists $record->{'referer'}) { $line .= "\t \"$record->{'referer'}\""; } else { $line .= "\t \"-\""; } # Append user agent string if (exists $record->{'user-agent'}) { $line .= " \"$record->{'user-agent'}\""; } else { $line .= " \"-\""; } } if ($ignore_response) { print $fh "$line - -\n"; } else { push(@{ $requests{"$record->{'source-ip'}$record->{'dest-ip'}"} }, $line); } } elsif ($record->{'direction'} eq '<') { # NOTE: This is a bit naive, but functional. Basically we match a request with the # next response from that IP pair in the log file. This means that under busy # conditions the response could be matched to the wrong request but currently there # isn't a more accurate way to tie them together. if (exists $requests{"$record->{'dest-ip'}$record->{'source-ip'}"}) { $line = shift(@{ $requests{"$record->{'dest-ip'}$record->{'source-ip'}"} }); return unless $line; if (! @{ $requests{"$record->{'dest-ip'}$record->{'source-ip'}"} }) { delete $requests{"$record->{'dest-ip'}$record->{'source-ip'}"}; } } else { return; } ($line, $line_suffix) = split /\t/, $line, 2 if $combined_format; # Append status code if (exists $record->{'status-code'}) { $line .= " $record->{'status-code'}"; } else { $line .= " -"; } # Append byte count if (exists $record->{'content-length'}) { $line .= " $record->{'content-length'}"; } else { $line .= " -"; } $line .= $line_suffix if $combined_format; print $fh "$line\n"; } return; } sub end { # TODO: Print lines that don't have a matching response? close $fh or die "Cannot close $fh: $!\n"; return; } # ----------------------------------------------------------------------------- # Load config file and check for required options # ----------------------------------------------------------------------------- sub _load_config { my $cfg_dir = shift; # Load config file; by default in same directory as plugin if (-e "$cfg_dir/" . __PACKAGE__ . ".cfg") { require "$cfg_dir/" . __PACKAGE__ . ".cfg"; } else { die "No config file found\n"; } # Check for required options and combinations if (!$output_file) { die "No output file provided\n"; } $output_dir = "." if (!$output_dir); $output_dir =~ s/\/$//; # Remove trailing slash return; } 1; httpry-httpry-0.1.8/scripts/plugins/content_analysis.cfg000066400000000000000000000022731234364221200236130ustar00rootroot00000000000000# # Config file for content_analysis.pm # # $terms_file (required) # A text file containing a list of whitespace # delimited keywords to search for # # $output_dir (not required) # Specify a directory to write client detail # files. If not specified, defaults to current # directory # # $output_file (required) # A valid path to write an output file # containing the collected summary data. # # $file_prefix (not required) # A string that will be prepended to all # output flow files. # # $cluster_flows (required) # Toggle the clustering of scored flows, which # tries to auto-select high scoring flows # versus outputting all scored flows. # # $window_size (required) # Specify the number of lines used for context # around lines containing flagged terms. # # $score_threshold (required) # Flows with a score below this value will be # ignored. # # $terms_threshold (required) # Flows with a term count below this value will # be ignored. # $terms_file = ""; $output_dir = ""; $output_file = "content_analysis.txt"; $file_prefix = "flows_"; $cluster_flows = 1; $window_size = 60; $score_threshold = 10; $terms_threshold = 5; 1; httpry-httpry-0.1.8/scripts/plugins/content_analysis.pm000066400000000000000000000411441234364221200234700ustar00rootroot00000000000000# # ---------------------------------------------------- # httpry - HTTP logging and information retrieval tool # ---------------------------------------------------- # # Copyright (c) 2005-2014 Jason Bittel # package content_analysis; use warnings; # ----------------------------------------------------------------------------- # GLOBAL VARIABLES # ----------------------------------------------------------------------------- # Counter variables my $line_cnt = 0; my $flow_cnt = 0; my $flow_line_cnt = 0; my $flow_min_len = 999999; my $flow_max_len = 0; # Data structures my %flow = (); # Metadata about active flows my %flow_buffer = (); # Individual flow data lines my %scored_flow = (); my @terms = (); # ----------------------------------------------------------------------------- # Plugin core # ----------------------------------------------------------------------------- main::register_plugin(); sub new { return bless {}; } sub init { my $self = shift; my $cfg_dir = shift; my $file; _load_config($cfg_dir); _load_terms(); # Remove any existing text files so they don't accumulate opendir DIR, $output_dir or die "Cannot open directory $output_dir: $!\n"; foreach $file (grep /^$file_prefix[\d\.]+\.txt$/, readdir(DIR)) { unlink "$output_dir/$file"; } closedir(DIR); return; } sub list { return qw(direction timestamp source-ip host request-uri); } sub main { my $self = shift; my $record = shift; my $curr_line; my $decoded_uri; return unless $record->{"direction"} eq '>'; $decoded_uri = $record->{"request-uri"}; $decoded_uri =~ s/%(?:25)+/%/g; $decoded_uri =~ s/%(?:0A|0D)/\./ig; $decoded_uri =~ s/%([a-fA-F0-9][a-fA-F0-9])/chr(hex($1))/eg; $line_cnt++; $curr_line = "$record->{'timestamp'}\t$record->{'host'}\t$decoded_uri\t$record->{'source-ip'}\t$record->{'dest-ip'}\t>"; # Begin a new flow if one doesn't exist if (!exists $flow{$record->{"source-ip"}}) { $flow_cnt++; $flow{$record->{"source-ip"}}->{"length"} = 0; $flow{$record->{"source-ip"}}->{"score"} = 0; $flow{$record->{"source-ip"}}->{"num_terms"} = 0; $flow{$record->{"source-ip"}}->{"streak"} = 0; $flow{$record->{"source-ip"}}->{"dirty"} = 0; $flow{$record->{"source-ip"}}->{"count"} = 0; } # Insert the current line into the buffer $flow{$record->{"source-ip"}}->{"length"}++; push @{ $flow_buffer{$record->{"source-ip"}} }, $curr_line; # If a term is found, flag the buffer as dirty if (_content_check("$record->{'host'}$decoded_uri", $record->{"source-ip"}) > 0) { $flow{$record->{"source-ip"}}->{"dirty"} = 1; $flow{$record->{"source-ip"}}->{"count"} = $window_size; } else { # Term not found, so if buffer is dirty decrement the window count if ($flow{$record->{"source-ip"}}->{"dirty"} == 1) { $flow{$record->{"source-ip"}}->{"count"}--; } } # If buffer is clean and full, drop the oldest line if (($flow{$record->{"source-ip"}}->{"dirty"} == 0) && ($flow{$record->{"source-ip"}}->{"length"} > $window_size)) { $flow{$record->{"source-ip"}}->{"length"}--; shift @{ $flow_buffer{$record->{"source-ip"}} }; } # If buffer is dirty and the window count is 0, flush it if (($flow{$record->{"source-ip"}}->{"dirty"} == 1) && ($flow{$record->{"source-ip"}}->{"count"} == 0)) { _flush_buffer($record->{"source-ip"}); } return; } sub end { my $ip; foreach $ip (keys %flow) { _flush_buffer($ip); } _write_summary_file(); return; } # ----------------------------------------------------------------------------- # Load config file and check for required options # ----------------------------------------------------------------------------- sub _load_config { my $cfg_dir = shift; # Load config file; by default in same directory as plugin if (-e "$cfg_dir/" . __PACKAGE__ . ".cfg") { require "$cfg_dir/" . __PACKAGE__ . ".cfg"; } else { die "No config file found\n"; } # Check for required options and combinations if (!$output_file) { die "No output file provided\n"; } if (!$terms_file) { die "No terms file provided\n"; } $output_dir = "." if (!$output_dir); $output_dir =~ s/\/$//; # Remove trailing slash return; } # ----------------------------------------------------------------------------- # Read in query terms from input file # ----------------------------------------------------------------------------- sub _load_terms { my $line; my $term; unless (open TERMS, "$terms_file") { die "Cannot open $terms_file: $!\n"; } while ($line = ) { chomp $line; $line =~ s/\#.*$//; # Remove comments $line =~ s/^\s+//; # Remove leading whitespace $line =~ s/\s+$//; # Remove trailing whitespace $line =~ s/\s+/ /; # Remove sequential whitespace next if $line =~ /^$/; foreach $term (split /\s/, $line) { push(@terms, lc $term) if $term; } } close TERMS or die "Cannot close $terms_file: $!\n"; return; } # ----------------------------------------------------------------------------- # Search for specified terms in each URI, scoring terms according to rules # based on their position and context # ----------------------------------------------------------------------------- sub _content_check { my $uri = lc shift; my $ip = shift; my $term; my $path_offset = index($uri, '/'); my $query_offset = index($uri, '?', $path_offset); my $term_offset; my $num_terms = 0; my $score = 0; my $pos; foreach $term (@terms) { $pos = 0; while (($term_offset = index($uri, $term, $pos)) > -1) { $num_terms++; $flow{$ip}->{"terms"}->{$term}++; # Term found, so apply scoring rules # Rule 1: Apply a base score of 1 $score += 1; # Rule 2: If found in query, add 2 # If found in path, add 1 # If found in hostname, add 0 if (($query_offset > 0) && ($term_offset > $query_offset)) { $score += 2; } elsif (($path_offset > 0) && ($term_offset > $path_offset)) { $score += 1; } else { $score += 0; } # Rule 3: If stand-alone word (bracketed by non-alpha chars), add 1 if ((substr($uri, $term_offset-1, 1) !~ /[a-z]/) && (substr($uri, $term_offset+length($term), 1) !~ /[a-z]/)) { $score += 1; } $pos = $term_offset + length($term); } } # Rule 4: If more than one term found, add 1 $score += 1 if ($num_terms > 1); # Rule 5: If a streak (more than 3 successive lines containing # terms) is found, add the length of the streak if ($num_terms == 0) { if ($flow{$ip}->{"streak"} > 3) { $score += $flow{$ip}->{"streak"}; } $flow{$ip}->{"streak"} = 0; } else { $flow{$ip}->{"streak"}++; } $flow{$ip}->{"score"} += $score; $flow{$ip}->{"num_terms"} += $num_terms; return $num_terms; } # ----------------------------------------------------------------------------- # Handle end of flow duties: update statistics, save flow scoring data as # necessary and delete the associated data structures # ----------------------------------------------------------------------------- sub _flush_buffer { my $ip = shift; # Update flow statistics $flow_min_len = $flow{$ip}->{"length"} if ($flow{$ip}->{"length"} < $flow_min_len); $flow_max_len = $flow{$ip}->{"length"} if ($flow{$ip}->{"length"} > $flow_max_len); $flow_line_cnt += $flow{$ip}->{"length"}; # We're only interested if the score meets the thresholds if (($flow{$ip}->{"score"} >= $score_threshold) && ($flow{$ip}->{"num_terms"} >= $terms_threshold)) { $scored_flow{$ip}->{"num_flows"}++; $scored_flow{$ip}->{"score"} += $flow{$ip}->{"score"}; $scored_flow{$ip}->{"num_terms"} += $flow{$ip}->{"num_terms"}; foreach (keys %{ $flow{$ip}->{"terms"} }) { $scored_flow{$ip}->{"terms"}->{$_} += $flow{$ip}->{"terms"}->{$_}; } _write_file($ip); } delete $flow{$ip}; delete $flow_buffer{$ip}; return; } # ----------------------------------------------------------------------------- # Append flow data to a detail file based on client IP # ----------------------------------------------------------------------------- sub _write_file { my $ip = shift; my $term; my $line; unless (open OUTFILE, ">>$output_dir/$file_prefix$ip.txt") { warn "Cannot open $output_dir/$file_prefix$ip.txt: $!\n"; return; } print OUTFILE '#' x 80 . "\n"; print OUTFILE "# Fields: timestamp,host,request-uri,source-ip,dest-ip,direction\n"; print OUTFILE "# Length: $flow{$ip}->{'length'} lines (window size: $window_size)\n"; print OUTFILE "# Score: $flow{$ip}->{'score'}\n"; print OUTFILE "# Terms: "; foreach $term (keys %{ $flow{$ip}->{"terms"} }) { print OUTFILE "$term (" . $flow{$ip}->{"terms"}->{$term} . ") "; } print OUTFILE "\n"; foreach $line (@{ $flow_buffer{$ip} }) { print OUTFILE $line, "\n"; } print OUTFILE "\n"; close OUTFILE or die "Cannot close $output_file: $!\n"; return; } # ----------------------------------------------------------------------------- # Format and write summary information to specified output file # ----------------------------------------------------------------------------- sub _write_summary_file { my $ip; my $term; my $scored_flow_cnt = 0; open OUTFILE, ">$output_file" or die "Cannot open $output_file: $!\n"; print OUTFILE "\n\nCONTENT ANALYSIS SUMMARY\n\n"; print OUTFILE "Generated: " . localtime() . "\n"; print OUTFILE "Total lines: $line_cnt\n"; print OUTFILE "Flow lines: $flow_line_cnt\n"; print OUTFILE "Flow count: $flow_cnt\n"; print OUTFILE "Flow length: " . ($flow_cnt > 0 ? "$flow_min_len/$flow_max_len" : "0/0") . " (min/max)\n\n"; if (scalar keys %scored_flow == 0) { print OUTFILE "*** No scored flows found\n"; close OUTFILE or die "Cannot close $output_file: $!\n"; return; } if ($cluster_flows) { _partition_scores(); # Delete flows and associated files from the lower partition foreach $ip (keys %scored_flow) { if ($scored_flow{$ip}->{"cluster"} == 0) { delete $scored_flow{$ip}; unlink "$output_dir/$file_prefix$ip.txt"; } } } foreach (keys %scored_flow) { $scored_flows_cnt += $scored_flow{$_}->{"num_flows"}; } print OUTFILE "Terms file: $terms_file\n"; print OUTFILE "Scored IPs: " . (keys %scored_flow) . "\n"; print OUTFILE "Scored flows: $scored_flows_cnt\n\n"; print OUTFILE "Score\tIP\t\tFlows\tTerms\tTerm List\n"; print OUTFILE "-----\t--\t\t-----\t-----\t---------\n"; foreach $ip (sort { $scored_flow{$b}->{"score"} <=> $scored_flow{$a}->{"score"} } keys %scored_flow) { print OUTFILE "$scored_flow{$ip}->{'score'}\t$ip\t$scored_flow{$ip}->{'num_flows'}\t$scored_flow{$ip}->{'num_terms'}\t"; print OUTFILE join(" ", sort keys %{ $scored_flow{$ip}->{"terms"} }) . "\n"; } close OUTFILE or die "Cannot close $output_file: $!\n"; return; } # ----------------------------------------------------------------------------- # Dynamically partition scored flows into sets using the k-means clustering # algorithm; this allows us to trim the low scoring flows off the bottom # without setting arbitrary thresholds or levels # # K-means code originally taken from: http://www.perlmonks.org/?node_id=541000 # Many subsequent modifications and changes have been made # ----------------------------------------------------------------------------- sub _partition_scores() { my $OUTLIER_THRESHOLD = 3; my $MAX_ITERS = 30; my $mean = 0; my $std_dev = 0; my %temp_flow = (); my $ip; my $diff; my $closest; my $dist; my $max_score = 0; my $new_center; my $num_iters = 0; my $sum; my $centroid; my @center; my @members; # Calculate mean and standard deviation foreach (keys %scored_flow) { $mean += $scored_flow{$_}->{"score"}; } $mean = $mean / (scalar keys %scored_flow); foreach (keys %scored_flow) { $std_dev += $scored_flow{$_}->{"score"} * $scored_flow{$_}->{"score"}; } $std_dev = sqrt($std_dev / (scalar keys %scored_flow)); # Build hash of scores to partition, pruning set outliers that are more than # $OUTLIER_THRESHOLD standard deviations from the mean foreach (keys %scored_flow) { if ($scored_flow{$_}->{"score"} > ($mean + ($std_dev * $OUTLIER_THRESHOLD))) { $scored_flow{$_}->{"cluster"} = 1; } elsif ($scored_flow{$_}->{"score"} < ($mean - ($std_dev * $OUTLIER_THRESHOLD))) { $scored_flow{$_}->{"cluster"} = 0; } else { $temp_flow{$_}->{"score"} = $scored_flow{$_}->{"score"}; $max_score = $temp_flow{$_}->{"score"} if ($temp_flow{$_}->{"score"} > $max_score); } } # Use two centers, starting one at each end of the scores range @center = (0.0, $max_score); do { $diff = 0; # Assign points to nearest center foreach $ip (keys %temp_flow) { $closest = 0; $dist = abs $temp_flow{$ip}->{"score"} - $center[$closest]; foreach (1..$#center) { if (abs $temp_flow{$ip}->{"score"} - $center[$_] < $dist) { $dist = abs $temp_flow{$ip}->{"score"} - $center[$_]; $closest = $_; } } $temp_flow{$ip}->{"cluster"} = $closest; } # Compute new centers based on mean foreach $centroid (0..$#center) { @members = sort map { $temp_flow{$_}->{"score"} } grep { $temp_flow{$_}->{"cluster"} == $centroid } keys %temp_flow; $sum = 0; foreach (@members) { $sum += $_; } $new_center = @members ? $sum / @members : $center[$centroid]; $diff += abs $center[$centroid] - $new_center; $center[$centroid] = $new_center; } $num_iters++; } while (($diff > 0.01) && ($num_iters <= $MAX_ITERS)); # Update cluster membership in scored flows foreach (keys %temp_flow) { $scored_flow{$_}->{"cluster"} = $temp_flow{$_}->{"cluster"}; } return; } 1; httpry-httpry-0.1.8/scripts/plugins/db_dump.cfg000066400000000000000000000014151234364221200216450ustar00rootroot00000000000000# # Config file for db_dump.pm # # $type (required) # Type of destination database; currenly only # 'mysql' is implemented # # $db (required) # Name of the database to connect to # # $host (required) # Hostname of machine where database is located; # use 'localhost' for the local machine # # $port (required) # Port database is listening on; default is 3306 # # $user (not required) # Username required by database, if necessary # # $pass (not required) # Password required by database, if necessary # # $rmbefore (not required) # Delete all data inserted this many days prior; # 0 disables removal completely # $type = "mysql"; $db = "httpry"; $host = "localhost"; $port = 3306; $user = ""; $pass = ""; $rmbefore = 0; 1; httpry-httpry-0.1.8/scripts/plugins/db_dump.mysql000066400000000000000000000025631234364221200222600ustar00rootroot00000000000000-- -- MySQL databse creation script for db_dump.pm -- -- !!! Warning: This script will delete all pre-existing data !!! -- -- This is a rather rudimentary database structure, but it ought -- to at least provide a decent starting point. -- -- -- Create httpry database; wipe database if it already exists -- DROP DATABASE IF EXISTS httpry; CREATE DATABASE httpry; USE httpry; -- -- Create client_data table; wipe table if it already exists -- DROP TABLE IF EXISTS client_data; CREATE TABLE client_data ( id int(10) unsigned NOT NULL auto_increment, timestamp datetime NOT NULL default '0000-00-00 00:00:00', pktstamp datetime NOT NULL default '0000-00-00 00:00:00', src_ip varchar(45) NOT NULL default '', dst_ip varchar(45) NOT NULL default '', hostname varchar(255) NOT NULL default '', uri varchar(255) NOT NULL default '', PRIMARY KEY (id) ) TYPE=InnoDB; -- -- Create server_data table; wipe table if it already exists -- DROP TABLE IF EXISTS server_data; CREATE TABLE server_data ( id int(10) unsigned NOT NULL auto_increment, timestamp datetime NOT NULL default '0000-00-00 00:00:00', pktstamp datetime NOT NULL default '0000-00-00 00:00:00', src_ip varchar(45) NOT NULL default '', dst_ip varchar(45) NOT NULL default '', status_code varchar(255) NOT NULL default '', reason_phrase varchar(255) NOT NULL default '', PRIMARY KEY (id) ) TYPE=InnoDB; httpry-httpry-0.1.8/scripts/plugins/db_dump.pm000066400000000000000000000116701234364221200215260ustar00rootroot00000000000000# # ---------------------------------------------------- # httpry - HTTP logging and information retrieval tool # ---------------------------------------------------- # # Copyright (c) 2005-2014 Jason Bittel # package db_dump; use warnings; use DBI; # ----------------------------------------------------------------------------- # GLOBAL VARIABLES # ----------------------------------------------------------------------------- my $dbh; # ----------------------------------------------------------------------------- # Plugin core # ----------------------------------------------------------------------------- main::register_plugin(); sub new { return bless {}; } sub init { my $self = shift; my $cfg_dir = shift; my $sql; my $limit; _load_config($cfg_dir); $dbh = _connect_db($type, $db, $host, $port, $user, $pass); # Delete data inserted $rmbefore days prior if ($rmbefore > 0) { my ($year, $mon, $day, $hour, $min, $sec) = (localtime(time-(86400*$rmbefore)))[5,4,3,2,1,0]; $limit = ($year+1900) . "-" . ($mon+1) . "-$day $hour:$min:$sec"; $sql = qq{ DELETE FROM client_data WHERE timestamp < '$limit' }; _execute_query($dbh, $sql); $sql = qq{ DELETE FROM server_data WHERE timestamp < '$limit' }; _execute_query($dbh, $sql); } return; } sub list { return qw(direction timestamp source-ip dest-ip); } sub main { my $self = shift; my $record = shift; my $sth; my ($year, $mon, $day, $hour, $min, $sec) = (localtime)[5,4,3,2,1,0]; my $now = ($year+1900) . "-" . ($mon+1) . "-$day $hour:$min:$sec"; my @values = ($now, $record->{"timestamp"}, $record->{"source-ip"}, $record->{"dest-ip"}); if ($record->{"direction"} eq '>') { push @values, $record->{"host"}, $record->{"request-uri"}; $sth = $dbh->prepare(qq{ INSERT INTO client_data (timestamp, pktstamp, src_ip, dst_ip, hostname, uri) VALUES (?, ?, ?, ?, ?, ?) }); } elsif ($record->{"direction"} eq '<') { push @values, $record->{"status-code"}, $record->{"reason-phrase"}; $sth = $dbh->prepare(qq{ INSERT INTO server_data (timestamp, pktstamp, src_ip, dst_ip, status_code, reason_phrase) VALUES (?, ?, ?, ?, ?, ?) }); } $sth->execute(@values); return; } sub end { _disconnect_db(); return; } # ----------------------------------------------------------------------------- # Load config file and check for required options # ----------------------------------------------------------------------------- sub _load_config { my $cfg_dir = shift; # Load config file; by default in same directory as plugin if (-e "$cfg_dir/" . __PACKAGE__ . ".cfg") { require "$cfg_dir/" . __PACKAGE__ . ".cfg"; } else { die "No config file found\n"; } # Check for required options and combinations if (!$type) { die "No database type provided\n"; } if (!$db) { die "No database name provided\n"; } if (!$host) { die "No database hostname provided\n"; } $port = '3306' unless ($port); return; } # ----------------------------------------------------------------------------- # Build connection to specified database # ----------------------------------------------------------------------------- sub _connect_db { my $type = shift; my $db = shift; my $host = shift; my $port = shift; my $user = shift; my $pass = shift; my $dbh; my $dsn; $dsn = "DBI:$type:$db"; $dsn .= ":$host" if $host; $dsn .= ":$port" if $port; if ($dbh = DBI->connect($dsn, $user, $pass, { PrintError => 0, RaiseError => 0, AutoCommit => 1 })) { _execute_query($dbh, qq{ USE $db }); } else { die "Cannot connect to database: " . DBI->errstr . "\n"; } return $dbh; } # ----------------------------------------------------------------------------- # Generalized SQL query execution sub # ----------------------------------------------------------------------------- sub _execute_query { my $dbh = shift; my $sql = shift; my $sth; $sth = $dbh->prepare($sql) or die "Cannot prepare query: " . DBI->errstr . "\n"; $sth->execute() or die "Cannot execute query: " . DBI->errstr . "\n"; return $sth; } # ----------------------------------------------------------------------------- # Terminate active database connection # ----------------------------------------------------------------------------- sub _disconnect_db { $dbh->disconnect; } 1; httpry-httpry-0.1.8/scripts/plugins/find_proxies.cfg000066400000000000000000000010141234364221200227170ustar00rootroot00000000000000# # Config file for find_proxies.pm # # $output_file (required) # A valid path to write an output file # containing the collected summary data. # # @proxy_keywords (required) # An array containing keywords to search # for proxy usage in the hostname and URI. # # $prune_limit (not required) # Tagged hostnames with fewer than this # number of hits will be discarded. Can # be set to any positive integer. $output_file = "find_proxies.txt"; @proxy_keywords = ( "proxy", "nph-" ); $prune_limit = 10; 1; httpry-httpry-0.1.8/scripts/plugins/find_proxies.pm000066400000000000000000000150271234364221200226050ustar00rootroot00000000000000# # ---------------------------------------------------- # httpry - HTTP logging and information retrieval tool # ---------------------------------------------------- # # Copyright (c) 2005-2014 Jason Bittel # package find_proxies; use warnings; # ----------------------------------------------------------------------------- # GLOBAL VARIABLES # ----------------------------------------------------------------------------- my $PRUNE_LIMIT = 20; my %proxy_lines = (); # ----------------------------------------------------------------------------- # Plugin core # ----------------------------------------------------------------------------- main::register_plugin(); sub new { return bless {}; } sub init { my $self = shift; my $cfg_dir = shift; _load_config($cfg_dir); return; } sub list { return qw(direction source-ip host request-uri); } sub main { my $self = shift; my $record = shift; my $word; my $len; my $encoded_uri; my $decoded_uri = ""; my $request_uri; return unless $record->{"direction"} eq '>'; $request_uri = $record->{"request-uri"}; $request_uri =~ s/%(?:25)+/%/g; $request_uri =~ s/%([a-fA-F0-9][a-fA-F0-9])/chr(hex($1))/eg; # Perform hostname and URI keyword search foreach $word (@proxy_keywords) { if ($record->{"host"} =~ /$word/i) { $proxy_lines{$record->{"source-ip"}}->{$record->{"host"}}++; return; } if ($request_uri =~ /$word/i) { $proxy_lines{$record->{"source-ip"}}->{$record->{"host"}}++; return; } } # Perform URI embedded request search; this works, but appears # to generate too many false positives to be useful as is if ($request_uri =~ /(\.pl|\.php|\.asp).*http:\/\/[^\/:]+/) { $proxy_lines{$record->{"source-ip"}}->{$record->{"host"}}++; return; } # Third time's the charm; do a base 64 decode of the URI and # search again for an embedded request if ($request_uri =~ /(\.pl|\.php|\.asp).*=(.+?)(?:\&|\Z)/) { $encoded_uri = $2; $encoded_uri =~ tr|A-Za-z0-9+=/||cd; return if (length($encoded_uri) % 4); $encoded_uri =~ s/=+$//; $encoded_uri =~ tr|A-Za-z0-9+/| -_|; while ($encoded_uri =~ /(.{1,60})/gs) { $len = chr(32 + length($1)*3/4); $decoded_uri .= unpack("u", $len . $1); } if ($decoded_uri =~ /http:\/\/[^\/:]+/) { $proxy_lines{$record->{"source-ip"}}->{$record->{"host"}}++; return; } } return; } sub end { _prune_hits(); _write_output_file(); return; } # ----------------------------------------------------------------------------- # Load config file and check for required options # ----------------------------------------------------------------------------- sub _load_config { my $cfg_dir = shift; # Load config file; by default in same directory as plugin if (-e "$cfg_dir/" . __PACKAGE__ . ".cfg") { require "$cfg_dir/" . __PACKAGE__ . ".cfg"; } else { die "No config file found\n"; } # Check for required options and combinations if (!$output_file) { die "No output file provided\n"; } $prune_limit = $PRUNE_LIMIT unless ($prune_limit > 0); return; } # ----------------------------------------------------------------------------- # Remove hits from results tree that are below our level of interest # ----------------------------------------------------------------------------- sub _prune_hits { my $ip; my $hostname; foreach $ip (keys %proxy_lines) { # Delete individual hostnames/counts that are below the limit foreach $hostname (keys %{$proxy_lines{$ip}}) { if ($proxy_lines{$ip}->{$hostname} < $prune_limit) { delete $proxy_lines{$ip}->{$hostname}; } } # If all hostnames were deleted, remove the empty IP unless (keys %{$proxy_lines{$ip}}) { delete $proxy_lines{$ip}; } } return; } # ----------------------------------------------------------------------------- # Write collected information to specified output file # ----------------------------------------------------------------------------- sub _write_output_file { my $ip; my $hostname; my $domain; my %counts; my %output; open OUTFILE, ">$output_file" or die "Cannot open $output_file: $!\n"; print OUTFILE "\n\nPOTENTIAL PROXIES\n\n"; print OUTFILE "Generated: " . localtime() . "\n\n\n"; if ((keys %proxy_lines) == 0) { print OUTFILE "*** No potential proxies found\n"; close OUTFILE or die "Cannot close $output_file: $!\n"; return; } # Reformat data hash into a formatted output hash, clustering by domain name foreach $ip (keys %proxy_lines) { foreach $hostname (keys %{$proxy_lines{$ip}}) { # Attempt to cluster data by domain if (($hostname =~ /\.([^\.]+?\.[^\.]+?)$/) && !($hostname =~ /\d+\.\d+\.\d+\.\d+/)) { $domain = $1; } else { $domain = $hostname; } push @{$output{$domain}->{$hostname}}, $ip; $counts{$hostname} += $proxy_lines{$ip}->{$hostname}; } } # Print output hash data to file foreach $domain (sort keys %output) { foreach $hostname (sort keys %{$output{$domain}}) { print OUTFILE "($counts{$hostname}) $hostname\n\t[ "; foreach $ip (@{$output{$domain}->{$hostname}}) { print OUTFILE "$ip "; } print OUTFILE "]\n"; } print OUTFILE "\n"; } close OUTFILE or die "Cannot close $output_file: $!\n"; return; } 1; httpry-httpry-0.1.8/scripts/plugins/hostnames.cfg000066400000000000000000000002651234364221200222360ustar00rootroot00000000000000# # Config file for hostnames.pm # # $output_file (required) # A valid path to write an output file # containing the collected hostnames. $output_file = "hostnames.txt"; 1; httpry-httpry-0.1.8/scripts/plugins/hostnames.pm000066400000000000000000000054451234364221200221200ustar00rootroot00000000000000# # ---------------------------------------------------- # httpry - HTTP logging and information retrieval tool # ---------------------------------------------------- # # Copyright (c) 2005-2014 Jason Bittel # package hostnames; use warnings; # ----------------------------------------------------------------------------- # GLOBAL VARIABLES # ----------------------------------------------------------------------------- my %hostnames = (); # ----------------------------------------------------------------------------- # Plugin core # ----------------------------------------------------------------------------- main::register_plugin(); sub new { return bless {}; } sub init { my $self = shift; my $cfg_dir = shift; _load_config($cfg_dir); return; } sub list { return qw(direction host); } sub main { my $self = shift; my $record = shift; my $hostname; return unless $record->{"direction"} eq '>'; $hostname = $record->{"host"}; $hostname =~ s/[^\-\.:0-9A-Za-z]//g; # Eliminate invalid hostnames and online services return if ($hostname eq ""); return if ($hostname eq "-"); return if ($hostname =~ /^ads?\d*?\./); return if ($hostname =~ /^proxy/); return if ($hostname =~ /^redir/); return if ($hostname =~ /^liveupdate/); return if ($hostname =~ /^anti-phishing/); return if ($hostname =~ /^stats/); return if ($hostname =~ /^photos/); return if ($hostname =~ /^images/); return if ($hostname =~ /^myspace/); # Only allow hostnames of the forms: a.b, a.b.c, a.b.c.d (with optional port) return unless ($hostname =~ /^([\-\w]+?\.){1,3}[\-\w]+?(:\d+?)??$/); $hostnames{$hostname}++; return; } sub end { my $host; open OUTFILE, ">$output_file" or die "Cannot open $output_file: $!\n"; foreach $host (keys %hostnames) { print OUTFILE "$hostnames{$host}\t$host\n"; } close OUTFILE or die "Cannot close $output_file: $!\n"; return; } # ----------------------------------------------------------------------------- # Load config file and check for required options # ----------------------------------------------------------------------------- sub _load_config { my $cfg_dir = shift; # Load config file; by default in same directory as plugin if (-e "$cfg_dir/" . __PACKAGE__ . ".cfg") { require "$cfg_dir/" . __PACKAGE__ . ".cfg"; } else { die "No config file found\n"; } # Check for required options and combinations if (!$output_file) { die "No output file provided\n"; } return; } 1; httpry-httpry-0.1.8/scripts/plugins/log_summary.cfg000066400000000000000000000005301234364221200225660ustar00rootroot00000000000000# # Config file for log_summary.pm # # $output_file (required) # A valid path to write an output file # containing the collected summary data. # # $summary_cap (required) # Maximum number of values to output in # summary listings. Can be set to any # positive integer. # $output_file = "log_summary.txt"; $summary_cap = 15; 1; httpry-httpry-0.1.8/scripts/plugins/log_summary.pm000066400000000000000000000162531234364221200224540ustar00rootroot00000000000000# # ---------------------------------------------------- # httpry - HTTP logging and information retrieval tool # ---------------------------------------------------- # # Copyright (c) 2005-2014 Jason Bittel # package log_summary; use warnings; # ----------------------------------------------------------------------------- # GLOBAL CONSTANTS # ----------------------------------------------------------------------------- my $SUMMARY_CAP = 10; # ----------------------------------------------------------------------------- # GLOBAL VARIABLES # ----------------------------------------------------------------------------- my %top_hosts = (); my %top_talkers = (); my %filetypes = (); my %response_codes = (); my %requests_hour = (); my $total_line_cnt = 0; my $ext_cnt = 0; my $requests = 0; my $responses = 0; my $start_time; my $end_time; # ----------------------------------------------------------------------------- # Plugin core # ----------------------------------------------------------------------------- main::register_plugin(); sub new { return bless {}; } sub init { my $self = shift; my $cfg_dir = shift; _load_config($cfg_dir); $start_time = (times)[0]; return; } sub list { return qw(direction); } sub main { my $self = shift; my $record = shift; $total_line_cnt++; if ($record->{"direction"} eq '>') { $requests++; $top_hosts{$record->{"host"}}++ if exists $record->{"host"}; $top_talkers{$record->{"source-ip"}}++ if exists $record->{"source-ip"}; if (exists $record->{"request-uri"}) { if (($record->{"request-uri"} =~ /\.(\w{2,5})$/) or ($record->{"request-uri"} =~ /\.(\w{2,5})\?/)) { $filetypes{lc($1)}++; $ext_cnt++; } } if (exists $record->{"timestamp"}) { $record->{"timestamp"} =~ /(\d\d):\d\d:\d\d$/; $requests_hour{int $1}++; } } elsif ($record->{"direction"} eq '<') { $responses++; $response_codes{$record->{"status-code"}}++ if exists $record->{"status-code"}; } return; } sub end { $end_time = (times)[0]; _write_output_file(); return; } # ----------------------------------------------------------------------------- # Load config file and check for required options # ----------------------------------------------------------------------------- sub _load_config { my $cfg_dir = shift; # Load config file; by default in same directory as plugin if (-e "$cfg_dir/" . __PACKAGE__ . ".cfg") { require "$cfg_dir/" . __PACKAGE__ . ".cfg"; } else { die "No config file found\n"; } # Check for required options and combinations if (!$output_file) { die "No output file provided\n"; } $summary_cap = $SUMMARY_CAP unless ($summary_cap > 0); return; } # ----------------------------------------------------------------------------- # Write collected information to specified output file # ----------------------------------------------------------------------------- sub _write_output_file { my $key; my $count = 0; my $hour; my $num_top_hosts = keys %top_hosts; my $num_top_talkers = keys %top_talkers; my $num_response_codes = keys %response_codes; my $num_filetypes = keys %filetypes; open OUTFILE, ">$output_file" or die "Cannot open $output_file: $!\n"; print OUTFILE "\n\nLOG SUMMARY\n\n"; print OUTFILE "Generated: " . localtime() . "\n"; print OUTFILE "Total lines: " . $total_line_cnt . "\n"; print OUTFILE "Total run time: " . sprintf("%.1f", $end_time - $start_time) . " secs\n"; if (keys %requests_hour) { print OUTFILE "\n\nREQUESTS BY HOUR\n"; print OUTFILE _get_request_hours(0, 11); print OUTFILE _get_request_hours(12, 23); } if ($num_top_hosts) { print OUTFILE "\n\n$summary_cap/$num_top_hosts VISITED HOSTS\n\n"; foreach $key (sort { $top_hosts{$b} <=> $top_hosts{$a} } keys %top_hosts) { print OUTFILE "$top_hosts{$key}\t" . _percent_of($top_hosts{$key}, $requests) . "%\t$key\n"; last if (++$count == $summary_cap); } } if ($num_top_talkers) { $count = 0; print OUTFILE "\n\n$summary_cap/$num_top_talkers TOP TALKERS\n\n"; foreach $key (sort { $top_talkers{$b} <=> $top_talkers{$a} } keys %top_talkers) { print OUTFILE "$top_talkers{$key}\t" . _percent_of($top_talkers{$key}, $requests) . "%\t$key\n"; last if (++$count == $summary_cap); } } if ($num_response_codes) { $count = 0; print OUTFILE "\n\n$summary_cap/$num_response_codes RESPONSE CODES\n\n"; foreach $key (sort { $response_codes{$b} <=> $response_codes{$a} } keys %response_codes) { print OUTFILE "$response_codes{$key}\t" . _percent_of($response_codes{$key}, $responses) . "%\t$key\n"; last if (++$count == $summary_cap); } } if ($num_filetypes) { $count = 0; print OUTFILE "\n\n$summary_cap/$num_filetypes FILE EXTENSIONS\n\n"; foreach $key (sort { $filetypes{$b} <=> $filetypes{$a} } keys %filetypes) { print OUTFILE "$filetypes{$key}\t" . _percent_of($filetypes{$key}, $ext_cnt) . "%\t$key\n"; last if (++$count == $summary_cap); } } close OUTFILE or die "Cannot close $output_file: $!\n"; return; } # ----------------------------------------------------------------------------- # Build a string with request percentages per hour # ----------------------------------------------------------------------------- sub _get_request_hours { my $begin = shift; my $end = shift; my $str; $str = "\n"; for ($begin..$end) { if (exists $requests_hour{$_}) { $str .= sprintf("%3d%% ", _percent_of($requests_hour{$_}, $requests)); } else { $str .= " 0% "; } } $str .= "\n "; for ($begin..$end - 1) { $str .= "|----"; } $str .= "|\n"; for ($begin..$end) { $str .= sprintf(" %02d ", $_); } $str .= "\n"; return $str; } # ----------------------------------------------------------------------------- # Calculate ratio information # ----------------------------------------------------------------------------- sub _percent_of { my $subset = shift; my $total = shift; return sprintf "%.1f", ($subset / $total) * 100; } 1; httpry-httpry-0.1.8/scripts/plugins/sample_plugin.pm000066400000000000000000000072021234364221200227470ustar00rootroot00000000000000# # ---------------------------------------------------- # httpry - HTTP logging and information retrieval tool # ---------------------------------------------------- # # Copyright (c) 2005-2014 Jason Bittel # # This is an example plugin for the perl parse script parse_log.pl. It shows # the basic structure of a simple plugin and provides a good starting point for # writing a custom plugin. Some of the other included plugins will also provide # a good idea of how the different pieces work. Each plugin is essentially a # Perl module dynamically loaded at runtime. A plugin has two required # functions, new() and main(). package sample_plugin; # ----------------------------------------------------------------------------- # GLOBAL CONSTANTS # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- # GLOBAL VARIABLES # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- # Plugin core # ----------------------------------------------------------------------------- # On initialization, this call registers the plugin with the core main::register_plugin(); # This sub is called once at initialization to create the callback sub new { return bless {}; } # This sub is called once at initialization; all startup code should be # included here. Currently this sub only loads the configuration file, but # any startup specific code or subs are handled here. This function may # be omitted completely if it is unneeded. sub init { my $self = shift; my $cfg_dir = shift; # Call a function to load the config file; this can be good to # break out into a separate sub like this, particularly if you # end up with many checks on the config variables _load_config($cfg_dir); return; } # This sub returns a list of fields that the plugin requires. This list # is compared against the header fields, and the plugin is disabled if # the input file does not contain all of the required fields. This # function may be omitted completely if it is unneeded. sub list { return qw(); } # This sub is called once for each data line in the input file(s). Note # that the data is sent here as a single line and so must be parsed (if # necessary) to act on individual components of the line. This function # is required to be present. sub main { my $self = shift; my $record = shift; # Reference to hash containing record data # Simple processing can be handled here; more complex processing # would probably be better handled in a different sub return; } # This sub is called once at program termination; all shutdown code (i.e. # closing files, deleting temp files, etc) should be included here. This # function may be omitted completely if it is unneeded. sub end { return; } # ----------------------------------------------------------------------------- # Load config file and check for required options # ----------------------------------------------------------------------------- sub _load_config { my $cfg_dir = shift; # Load config file; by default in same directory as plugin if (-e "$cfg_dir/" . __PACKAGE__ . ".cfg") { require "$cfg_dir/" . __PACKAGE__ . ".cfg"; } # Check for required options and combinations from the configuration # file variables. This can also be a good place to do file reads for # initializing run time data structures. return; } 1; httpry-httpry-0.1.8/scripts/plugins/search_terms.cfg000066400000000000000000000030511234364221200227100ustar00rootroot00000000000000# # Config file for search_terms.pm # # $output_file (required) # A valid path to write an output file # containing the collected summary data. # # %domains (required) # A hash containing domain/name pairs to # search. The format is self-explanatory; # to augment it, simply add the domain and # the CGI name of the element to extract # the search term. # # %ignore (not required) # A hash containing domain/regexp pairs to # prune from the results list. The regexps # are an array of all patterns to match to # that particular domain. To ignore all # results from a domain, use a regexp of "." $output_file = "search_terms.txt"; %domains = ( ".altavista.com" => "q", "search.aol.com" => "q", ".ask.com" => "q", ".bing.com" => "q", ".google.com" => "q", ".lycos.com" => "query", ".yahoo.com" => "p", ".amazon.com" => "field-keywords", "search.ebay.com" => "satitle", "stumbleupon.com" => "q", "www.facebook.com" => "q", ".wikipedia.org" => "search", ".wolframalpha.com" => "i", ".youtube.com" => "search_query" ); %ignore = ( ".google.com" => [ "^tbn:", "^info:", "^http:", "^music/image" ], "clients1.google.com" => [ "." ], ".mail.yahoo.com" => [ "^mail_candygram" ], ".adserver.yahoo.com" => [ "." ] ); 1; httpry-httpry-0.1.8/scripts/plugins/search_terms.pm000066400000000000000000000121351234364221200225700ustar00rootroot00000000000000# # ---------------------------------------------------- # httpry - HTTP logging and information retrieval tool # ---------------------------------------------------- # # Copyright (c) 2005-2014 Jason Bittel # package search_terms; use warnings; # ----------------------------------------------------------------------------- # GLOBAL VARIABLES # ----------------------------------------------------------------------------- my %search_terms = (); my $num_terms = 0; my $num_queries = 0; # ----------------------------------------------------------------------------- # Plugin core # ----------------------------------------------------------------------------- main::register_plugin(); sub new { return bless {}; } sub init { my $self = shift; my $cfg_dir = shift; _load_config($cfg_dir); return; } sub list { return qw(direction host request-uri source-ip); } sub main { my $self = shift; my $record = shift; my $search_term; my $domain; my $name; my $pattern; return unless $record->{"direction"} eq '>'; # These results can end up being a little messy, but it seems # most useful to simply dump out all search terms and let the user # sift through what they deem interesting foreach $domain (keys %domains) { if (rindex($record->{"host"}, $domain) > -1) { $name = $domains{$domain}; return unless $record->{"request-uri"} =~ /[\?\&]$name=([^\&]+)/; $search_term = $1; last; } } return unless $search_term; # Decode hex characters in the search term $search_term =~ s/%(?:25)+/%/g; $search_term =~ s/%(?:0A|0D)/\./ig; $search_term =~ s/%([a-fA-F0-9][a-fA-F0-9])/chr(hex($1))/eg; # Clean up spaces in search term $search_term =~ s/\+/ /g; $search_term =~ s/^\s+//; $search_term =~ s/\s+$//; $search_term =~ s/\s+/ /g; # Apply rules to ignore unwanted hits foreach $domain (keys %ignore) { if (rindex($record->{"host"}, $domain) > -1) { foreach $pattern (@{ $ignore{$domain} }) { return if $search_term =~ /$pattern/; } } } $search_terms{$record->{"source-ip"}}->{$record->{"host"}}->{$search_term}++; # Count the number of terms in the query, treating quoted strings as a single term $num_terms += ($search_term =~ s/\".*?\"//g); $search_term =~ s/^\s+//; # Strip leading/trailing spaces potentially introduced above $search_term =~ s/\s+$//; # ... $num_terms += ($search_term =~ s/\s+//g); $num_terms++ if ($search_term); $num_queries++; return; } sub end { _write_output_file(); return; } # ----------------------------------------------------------------------------- # Load config file and check for required options # ----------------------------------------------------------------------------- sub _load_config { my $cfg_dir = shift; # Load config file; by default in same directory as plugin if (-e "$cfg_dir/" . __PACKAGE__ . ".cfg") { require "$cfg_dir/" . __PACKAGE__ . ".cfg"; } else { die "Error: No config file found\n"; } # Check for required options and combinations if (!$output_file) { die "No output file provided\n"; } return; } # ----------------------------------------------------------------------------- # Write collected information to specified output file # ----------------------------------------------------------------------------- sub _write_output_file { my $ip; my $hostname; my $term; open OUTFILE, ">$output_file" or die "Cannot open $output_file: $!\n"; print OUTFILE "\n\nSEARCH TERMS SUMMARY\n\n"; print OUTFILE "Generated: " . localtime() . "\n"; if ((keys %search_terms) == 0) { print OUTFILE "\n\n*** No search terms found\n"; close OUTFILE or die "Cannot close $output_file: $!\n"; return; } print OUTFILE "Terms: $num_terms\n"; print OUTFILE "Queries: $num_queries\n"; print OUTFILE "Avg terms/query: " . sprintf("%.1f", ($num_terms / $num_queries)) . "\n\n\n"; foreach $ip (sort keys %search_terms) { print OUTFILE "$ip\n"; foreach $hostname (keys %{ $search_terms{$ip} }) { print OUTFILE "\t$hostname\n"; foreach $term (sort keys %{ $search_terms{$ip}->{$hostname} }) { print OUTFILE "\t\t$search_terms{$ip}->{$hostname}->{$term}\t$term\n"; } print OUTFILE "\n"; } } close OUTFILE or die "Cannot close $output_file: $!\n"; return; } 1; httpry-httpry-0.1.8/scripts/plugins/tokenize.cfg000066400000000000000000000006251234364221200220650ustar00rootroot00000000000000# # Config file for tokenize.pm # # $output_dir (not required) # Specify a directory to write client detail # files. If not specified, defaults to current # directory. # # %stopwords (required) # A hash containing terms to ignore. # $output_dir = ""; %stopwords = ( "com" => "", "edu" => "", "www" => "", "org" => "" ); 1; httpry-httpry-0.1.8/scripts/plugins/tokenize.pm000066400000000000000000000055211234364221200217420ustar00rootroot00000000000000# # ---------------------------------------------------- # httpry - HTTP logging and information retrieval tool # ---------------------------------------------------- # # Copyright (c) 2005-2014 Jason Bittel # package tokenize; # ----------------------------------------------------------------------------- # GLOBAL VARIABLES # ----------------------------------------------------------------------------- my %terms = (); # ----------------------------------------------------------------------------- # Plugin core # ----------------------------------------------------------------------------- main::register_plugin(); sub new { return bless {}; } sub init { my $self = shift; my $cfg_dir = shift; _load_config($cfg_dir); return; } sub list { return qw(source-ip host request-uri); } sub main { my $self = shift; my $record = shift; my $decoded_uri; return unless $record->{'source-ip'} =~ /^(?:\d+)(?:\.\d+){3}$/; $decoded_uri = $record->{"request-uri"}; $decoded_uri =~ s/%(?:25)+/%/g; $decoded_uri =~ s/%([a-fA-F0-9][a-fA-F0-9])/chr(hex($1))/eg; foreach my $term (split /[^A-Za-z0-9]/, "$record->{'host'}$decoded_uri") { next if !$term; next if (length($term) <= 2); next if $term =~ /^\d+$/; # Ignore numbers next if (exists $stopwords{$term}); $terms{$record->{'source-ip'}}->{$term}++; } return; } sub end { my $ip; my $term; my $i; # TODO: This could use more control over the output style and format foreach $ip (keys %terms) { open OUTFILE, ">$output_dir/terms_$ip.txt" or die "Cannot open $output_dir/terms_$ip.txt: $!\n"; foreach $term (keys %{ $terms{$ip} }) { for ($i = 0; $i < $terms{$ip}->{$term}; $i++) { print OUTFILE "$term "; } print OUTFILE "\n"; } close OUTFILE or die "Cannot close $output_dir/terms_$ip.txt: $!\n"; } return; } # ----------------------------------------------------------------------------- # Load config file and check for required options # ----------------------------------------------------------------------------- sub _load_config { my $cfg_dir = shift; # Load config file; by default in same directory as plugin if (-e "$cfg_dir/" . __PACKAGE__ . ".cfg") { require "$cfg_dir/" . __PACKAGE__ . ".cfg"; } else { die "No config file found\n"; } $output_dir = "." if (!$output_dir); $output_dir =~ s/\/$//; # Remove trailing slash return; } 1; httpry-httpry-0.1.8/scripts/plugins/xml_output.cfg000066400000000000000000000007071234364221200224560ustar00rootroot00000000000000# # Config file for xml_output.pm # # $output_file (required) # A valid path to write an output file # containing the XML data. # # $flow_version (required) # Specify the version of the XML flow # structure; this should match the # current httpry version # # $xml_version (required) # (Arbitrary) version number to mark # file structure changes # $output_file = "xml_output.xml"; $flow_version = "0.1.8"; $xml_version = "0.2"; 1; httpry-httpry-0.1.8/scripts/plugins/xml_output.css000066400000000000000000000021241234364221200225020ustar00rootroot00000000000000/* httpry.css | created: 08/31/2006 * * Sample CSS formatting file for the default httpry * XML format output fields. * */ /* Top level block formatting */ * { font-family: Arial, Times, sans-serif; font-size: 10pt; } flow { padding: .25em; display: block; margin: 1px; background: #ddd; } step { padding: .25em; display: block; margin: 1px; background: #eee; white-space: nowrap; } /* Individual field formatting */ timestamp { color: #004080; font-size: smaller; } source-ip { color: #00B300; font-size: smaller; padding-left: 1em; } dest-ip:before { content: " <--> "; } dest-ip { color: #00B300; font-size: smaller; } direction { color: #aaa; font-size: smaller; font-weight: bold; padding-left: 1em; } method { color: #000; font-size: smaller; padding-left: 1em; } host { color: #0000b7; padding-left: 1em; } request-uri { color: #b70000; } http-version { color: #aaa; font-size: smaller; padding-left: 1em; } status-code,reason-phrase { color: #aaa; font-size: smaller; padding-left: 1em; }httpry-httpry-0.1.8/scripts/plugins/xml_output.pm000066400000000000000000000054011234364221200223270ustar00rootroot00000000000000# # ---------------------------------------------------- # httpry - HTTP logging and information retrieval tool # ---------------------------------------------------- # # Copyright (c) 2005-2014 Jason Bittel # package xml_output; use warnings; # ----------------------------------------------------------------------------- # GLOBAL VARIABLES # ----------------------------------------------------------------------------- my $fh; # ----------------------------------------------------------------------------- # Plugin core # ----------------------------------------------------------------------------- main::register_plugin(); sub new { return bless {}; } sub init { my $self = shift; my $cfg_dir = shift; _load_config($cfg_dir); if (-e $output_file) { open OUTFILE, ">>$output_file" or die "Cannot open $output_file: $!\n"; print OUTFILE "\n"; } else { open OUTFILE, ">$output_file" or die "Cannot open $output_file: $!\n"; print OUTFILE "\n"; print OUTFILE "\n"; print OUTFILE "\n"; } $fh = *OUTFILE; return; } sub main { my $self = shift; my $record = shift; my $field; my $data; print $fh ""; foreach $field (keys %$record) { $data = $record->{$field}; $data =~ s/^\s+//; $data =~ s/\s+$//; $data =~ s/&/\&\;/g; $data =~ s//\>\;/g; $data =~ s/\'/\&apos\;/g; $data =~ s/\"/\"\;/g; print $fh "<$field>$data"; } print $fh "\n"; return; } sub end { print $fh "\n"; close $fh or die "Cannot close $fh: $!\n"; return; } # ----------------------------------------------------------------------------- # Load config file and check for required options # ----------------------------------------------------------------------------- sub _load_config { my $cfg_dir = shift; # Load config file; by default in same directory as plugin if (-e "$cfg_dir/" . __PACKAGE__ . ".cfg") { require "$cfg_dir/" . __PACKAGE__ . ".cfg"; } else { die "No config file found\n"; } # Check for required options and combinations if (!$output_file) { die "No output file provided\n"; } return; } 1; httpry-httpry-0.1.8/tcp.h000066400000000000000000000175641234364221200153550ustar00rootroot00000000000000/* ---------------------------------------------------- httpry - HTTP logging and information retrieval tool ---------------------------------------------------- Copyright (c) 2005-2014 Jason Bittel */ #ifndef _HAVE_TCP_H #define _HAVE_TCP_H #include #include #ifndef ETHER_TYPE_VLAN #define ETHER_TYPE_VLAN 0x8100 /* 802.1q VLAN type */ #endif /* These IP and TCP structs/macros are from sniffex.c and were released under the following license: */ /* * sniffex.c * * Sniffer example of TCP/IP packet capture using libpcap. * * Version 0.1.1 (2005-07-05) * Copyright (c) 2005 The Tcpdump Group * * This software is intended to be used as a practical example and * demonstration of the libpcap library; available at: * http://www.tcpdump.org/ * **************************************************************************** * * This software is a modification of Tim Carstens' "sniffer.c" * demonstration source code, released as follows: * * sniffer.c * Copyright (c) 2002 Tim Carstens * 2002-01-07 * Demonstration of using libpcap * timcarst -at- yahoo -dot- com * * "sniffer.c" is distributed under these terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name "Tim Carstens" may not be used to endorse or promote * products derived from this software without prior written permission * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * * This software, "sniffex.c", is a derivative work of "sniffer.c" and is * covered by the following terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Because this is a derivative work, you must comply with the "sniffer.c" * terms reproduced above. * 2. Redistributions of source code must retain the Tcpdump Group copyright * notice at the top of this source file, this list of conditions and the * following disclaimer. * 3. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The names "tcpdump" or "libpcap" may not be used to endorse or promote * products derived from this software without prior written permission. * * THERE IS ABSOLUTELY NO WARRANTY FOR THIS PROGRAM. * BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY * FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN * OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES * PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED * OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS * TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE * PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, * REPAIR OR CORRECTION. * * IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING * WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR * REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, * INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING * OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED * TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY * YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER * PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * */ /* Ethernet addresses are 6 bytes */ #define ETHER_ADDR_LEN 6 /* Ethernet header */ struct eth_header { u_char ether_dhost[ETHER_ADDR_LEN]; /* destination host address */ u_char ether_shost[ETHER_ADDR_LEN]; /* source host address */ u_short ether_type; /* IP? ARP? RARP? etc */ }; /* IP header */ struct ip_header { u_char ip_vhl; /* version << 4 | header length >> 2 */ u_char ip_tos; /* type of service */ u_short ip_len; /* total length */ u_short ip_id; /* identification */ u_short ip_off; /* fragment offset field */ #define IP_RF 0x8000 /* reserved fragment flag */ #define IP_DF 0x4000 /* dont fragment flag */ #define IP_MF 0x2000 /* more fragments flag */ #define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ u_char ip_ttl; /* time to live */ u_char ip_p; /* protocol */ u_short ip_sum; /* checksum */ struct in_addr ip_src,ip_dst; /* source and dest address */ }; #define IP_HL(ip) (((ip)->ip_vhl) & 0x0f) #define IP_V(ip) (((ip)->ip_vhl) >> 4) /* IPv6 header */ struct ip6_header { uint32_t ip6_vtcfl; /* version << 4 | traffic class 8 | flow label >> 20 */ u_short ip6_plen; /* payload length */ u_char ip6_nh; /* next header */ u_char ip6_hl; /* hop limit */ struct in6_addr ip_src,ip_dst; /* source and dest address */ }; #define IP6_V(ip6) (((ip6)->ip6_vtcfl) >> 28) #define IP6_TC(ip6) ((((ip6)->ip6_vtcfl) >> 20) & 0x000000ff) #define IP6_FL(ip6) (((ip6)->ip6_vtcfl) & 0x000fffff) /* IPv6 extension headers */ struct ip6_ext_header { u_char ip6_eh_nh; /* next header */ u_char ip6_eh_len; /* length in 8-octet units, not including first 8-octets */ }; /* TCP header */ typedef u_int tcp_seq; struct tcp_header { u_short th_sport; /* source port */ u_short th_dport; /* destination port */ tcp_seq th_seq; /* sequence number */ tcp_seq th_ack; /* acknowledgement number */ u_char th_offx2; /* data offset, rsvd */ u_char th_flags; #define TH_FIN 0x01 #define TH_SYN 0x02 #define TH_RST 0x04 #define TH_PUSH 0x08 #define TH_ACK 0x10 #define TH_URG 0x20 #define TH_ECE 0x40 #define TH_CWR 0x80 #define TH_FLAGS (TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG|TH_ECE|TH_CWR) u_short th_win; /* window */ u_short th_sum; /* checksum */ u_short th_urp; /* urgent pointer */ }; #define TH_OFF(th) (((th)->th_offx2 & 0xf0) >> 4) #endif /* ! _HAVE_TCP_H */ httpry-httpry-0.1.8/test/000077500000000000000000000000001234364221200153605ustar00rootroot00000000000000httpry-httpry-0.1.8/test/callgrind000077500000000000000000000003131234364221200172420ustar00rootroot00000000000000#!/bin/sh eval valgrind --tool=callgrind \ "$@ -q" 2>&1 1>/dev/null | sed 's/^==[0-9]*==/callgrind:/' callgrind_annotate callgrind.out.* > "`dirname ${0}`/callgrind.log" rm -f callgrind.out.* httpry-httpry-0.1.8/test/format-names000066400000000000000000000005231234364221200176740ustar00rootroot00000000000000Timestamp,Source-IP,Dest-IP,Source-Port,Dest-Port,Direction,Method,Host,Request-URI,HTTP-Version,Status-Code,Reason-Phrase,Accept,Accept-Charset,Accept-Encoding,Accept-Language,Authorization,Expect,From,Host,If-Match,If-Modified-Since,If-None-Match,If-Range,If-Unmodified-Since,Max-Forwards,Proxy-Authorization,Range,Referer,TE,User-Agent httpry-httpry-0.1.8/test/massif000077500000000000000000000002621234364221200165700ustar00rootroot00000000000000#!/bin/sh eval valgrind --tool=massif \ "$@ -q" 2>&1 1>/dev/null | sed 's/^==[0-9]*==/massif:/' ms_print massif.out.* > "`dirname ${0}`/massif.log" rm -f massif.out.* httpry-httpry-0.1.8/test/valgrind000077500000000000000000000003751234364221200171210ustar00rootroot00000000000000#!/bin/sh eval valgrind --tool=memcheck \ --num-callers=20 \ --leak-check=yes \ --leak-resolution=high \ --show-reachable=yes \ "$@ -q" 2>&1 1>/dev/null | sed 's/^==[0-9]*==/valgrind:/' httpry-httpry-0.1.8/utility.c000066400000000000000000000063601234364221200162550ustar00rootroot00000000000000/* ---------------------------------------------------- httpry - HTTP logging and information retrieval tool ---------------------------------------------------- Copyright (c) 2005-2014 Jason Bittel */ #include #include #include #include #include "error.h" /* Strip leading and trailing spaces from parameter string, modifying the string in place and returning a pointer to the (potentially) new starting point */ char *str_strip_whitespace(char *str) { size_t len = strlen(str); #ifdef DEBUG ASSERT(str); ASSERT(strlen(str) > 0); #endif while (isspace(*str)) str++; while (len && isspace(*(str + len - 1))) *(str + (len--) - 1) = '\0'; return str; } /* Convert the paramter string to lowercase */ char *str_tolower(char *str) { char *c; #ifdef DEBUG ASSERT(str); ASSERT(strlen(str) > 0); #endif for (c = str; *c != '\0'; c++) { *c = tolower(*c); } return str; } /* Compare two strings, ignoring the case of str1 and assuming str2 is lowercase. Break if we find a string terminator in str2 and consider it a match as str1 will not always have a string terminator. */ int str_compare(const char *str1, const char *str2) { #ifdef DEBUG ASSERT(str2); ASSERT(strlen(str2) > 0); ASSERT(str1 != str2); #endif while (tolower(*str1) == *str2) { str1++; str2++; if (*str2 == '\0') return 0; } return tolower(*str1) - *str2; } /* Copy at most len characters from src to dest, guaranteeing dest will be properly terminated. Returns the total number of characters copied, not including the string terminator. */ int str_copy(char *dest, const char *src, size_t len) { const char *start = dest; if (len > 0) { while ((*src != '\0') && --len) { *dest++ = *src++; } *dest = '\0'; } return dest - start; } /* Wrapper function around str_copy() that first allocates memory for the destination string and then copies the parameter string into it. */ char *str_duplicate(const char *str) { char *new; size_t len = strlen(str); if ((new = malloc(len + 1)) == NULL) return NULL; #ifdef DEBUG ASSERT(str_copy(new, str, len + 1) <= (len + 1)); #else str_copy(new, str, len + 1); #endif return new; } /* Implementation of Jenkins's One-at-a-Time hash, as described on this page: http://www.burtleburtle.net/bob/hash/doobs.html */ unsigned int hash_str(char *str, unsigned int hashsize) { unsigned long int hash; #ifdef DEBUG ASSERT(str); ASSERT(strlen(str) > 0); #endif for (hash = 0; *str != '\0'; str++) { hash += tolower(*str); hash += (hash << 10); hash ^= (hash >> 6); } hash += (hash << 3); hash ^= (hash >> 11); hash += (hash << 15); /* Restrict hash value to a maximum of hashsize; hashsize must be a power of 2 */ return (unsigned int) (hash & (hashsize - 1)); } httpry-httpry-0.1.8/utility.h000066400000000000000000000011201234364221200162470ustar00rootroot00000000000000/* ---------------------------------------------------- httpry - HTTP logging and information retrieval tool ---------------------------------------------------- Copyright (c) 2005-2014 Jason Bittel */ #ifndef _HAVE_UTILITY_H #define _HAVE_UTILITY_H char *str_strip_whitespace(char *str); char *str_tolower(char *str); int str_compare(const char *str1, const char *str2); int str_copy(char *dest, const char *src, size_t len); char *str_duplicate(const char *str); unsigned int hash_str(char *key, unsigned int hashsize); #endif /* ! _HAVE_UTILITY_H */