tcpflow/0000755000175000017500000000000012263701151011140 5ustar dimadimatcpflow/python/0000755000175000017500000000000012263701151012461 5ustar dimadimatcpflow/python/plot_wifi_aps.py0000644000175000017500000000213612263701151015674 0ustar dimadima#!/usr/bin/env python3.3 # # Read a report.xml file and output a graphviz graph of the nodes # import xml.etree.ElementTree as ET if __name__=="__main__": import sys root = ET.parse(sys.argv[1]) macs = set() ssids = set() print("digraph ssids {") for ssidnode in root.findall('.//ssid'): macs.add(ssidnode.attrib['mac']) ssids.add(ssidnode.attrib['ssid']) print(' "{}" -> "{}";'.format(ssidnode.attrib['mac'],ssidnode.attrib['ssid'])) # Send through the attributes # Make all of the boxes for mac in macs: print(' "{}" [shape=box]'.format(mac)) # color all of the SSIDs c = 1 for ssid in ssids: r = (c)//3 g = (c+1)//3 b = (c+2)//3 color = "#{:02X}{:02X}{:02X}".format(255-r*16,255-g*16,255-b*16) c += 1 if c/3>4: c = 0 print(' "{}" [color="{}",style=filled]'.format(ssid,color)) for macnode in root.findall(".//ssid/[@ssid='{}']".format(ssid)): print(' "{}" [color="{}",style=filled]'.format(macnode.attrib['mac'],color)) print("}") tcpflow/src/0000755000175000017500000000000012263701323011730 5ustar dimadimatcpflow/src/stest.cpp0000644000175000017500000000547312263701151013606 0ustar dimadima#include #include #include #include static int callback(void *NotUsed, int argc, char **argv, char **azColName){ int i; for(i=0; i. * Originally by Will Glynn . * * This source code is under the GNU Public License (GPL) version 3. * See COPYING for details. * */ #include "mime_map.h" #include #include std::map mime_map; class PopulateMimeMap { public: PopulateMimeMap(); }; static PopulateMimeMap populate_mime_map; PopulateMimeMap::PopulateMimeMap() { /* Generated from an OSX-provided mime.types, massaged somewhat by hand */ mime_map["application/andrew-inset"] = "ez"; mime_map["application/applixware"] = "aw"; mime_map["application/atom+xml"] = "atom"; mime_map["application/atomcat+xml"] = "atomcat"; mime_map["application/atomsvc+xml"] = "atomsvc"; mime_map["application/ccxml+xml"] = "ccxml"; mime_map["application/cdmi-capability"] = "cdmia"; mime_map["application/cdmi-container"] = "cdmic"; mime_map["application/cdmi-domain"] = "cdmid"; mime_map["application/cdmi-object"] = "cdmio"; mime_map["application/cdmi-queue"] = "cdmiq"; mime_map["application/cu-seeme"] = "cu"; mime_map["application/davmount+xml"] = "davmount"; mime_map["application/dssc+der"] = "dssc"; mime_map["application/dssc+xml"] = "xdssc"; mime_map["application/ecmascript"] = "ecma"; mime_map["application/emma+xml"] = "emma"; mime_map["application/epub+zip"] = "epub"; mime_map["application/exi"] = "exi"; mime_map["application/font-tdpfr"] = "pfr"; mime_map["application/hyperstudio"] = "stk"; mime_map["application/ipfix"] = "ipfix"; mime_map["application/java-archive"] = "jar"; mime_map["application/java-serialized-object"] = "ser"; mime_map["application/java-vm"] = "class"; mime_map["application/javascript"] = "js"; mime_map["application/json"] = "json"; mime_map["application/lost+xml"] = "lostxml"; mime_map["application/mac-binhex40"] = "hqx"; mime_map["application/mac-compactpro"] = "cpt"; mime_map["application/mads+xml"] = "mads"; mime_map["application/marc"] = "mrc"; mime_map["application/marcxml+xml"] = "mrcx"; mime_map["application/mathematica"] = "mb"; mime_map["application/mathml+xml"] = "mathml"; mime_map["application/mbox"] = "mbox"; mime_map["application/mediaservercontrol+xml"] = "mscml"; mime_map["application/metalink4+xml"] = "meta4"; mime_map["application/mets+xml"] = "mets"; mime_map["application/mods+xml"] = "mods"; mime_map["application/mp21"] = "mp21"; mime_map["application/mp4"] = "mp4s"; mime_map["application/msword"] = "doc"; mime_map["application/mxf"] = "mxf"; mime_map["application/oda"] = "oda"; mime_map["application/oebps-package+xml"] = "opf"; mime_map["application/ogg"] = "ogx"; mime_map["application/onenote"] = "onetoc"; mime_map["application/patch-ops-error+xml"] = "xer"; mime_map["application/pdf"] = "pdf"; mime_map["application/pgp-encrypted"] = "pgp"; mime_map["application/pgp-signature"] = "asc"; mime_map["application/pics-rules"] = "prf"; mime_map["application/pkcs10"] = "p10"; mime_map["application/pkcs7-mime"] = "p7m"; mime_map["application/pkcs7-signature"] = "p7s"; mime_map["application/pkcs8"] = "p8"; mime_map["application/pkix-attr-cert"] = "ac"; mime_map["application/pkix-cert"] = "cer"; mime_map["application/pkix-crl"] = "crl"; mime_map["application/pkix-pkipath"] = "pkipath"; mime_map["application/pkixcmp"] = "pki"; mime_map["application/pls+xml"] = "pls"; mime_map["application/postscript"] = "ps"; mime_map["application/prs.cww"] = "cww"; mime_map["application/pskc+xml"] = "pskcxml"; mime_map["application/rdf+xml"] = "rdf"; mime_map["application/reginfo+xml"] = "rif"; mime_map["application/relax-ng-compact-syntax"] = "rnc"; mime_map["application/resource-lists+xml"] = "rl"; mime_map["application/resource-lists-diff+xml"] = "rld"; mime_map["application/rls-services+xml"] = "rs"; mime_map["application/rsd+xml"] = "rsd"; mime_map["application/rss+xml"] = "rss"; mime_map["application/rtf"] = "rtf"; mime_map["application/sbml+xml"] = "sbml"; mime_map["application/scvp-cv-request"] = "scq"; mime_map["application/scvp-cv-response"] = "scs"; mime_map["application/scvp-vp-request"] = "spq"; mime_map["application/scvp-vp-response"] = "spp"; mime_map["application/sdp"] = "sdp"; mime_map["application/set-payment-initiation"] = "setpay"; mime_map["application/set-registration-initiation"] = "setreg"; mime_map["application/shf+xml"] = "shf"; mime_map["application/smil+xml"] = "smil"; mime_map["application/sparql-query"] = "rq"; mime_map["application/sparql-results+xml"] = "srx"; mime_map["application/srgs"] = "gram"; mime_map["application/srgs+xml"] = "grxml"; mime_map["application/sru+xml"] = "sru"; mime_map["application/ssml+xml"] = "ssml"; mime_map["application/tei+xml"] = "teicorpus"; mime_map["application/thraud+xml"] = "tfi"; mime_map["application/timestamped-data"] = "tsd"; mime_map["application/vnd.3gpp.pic-bw-large"] = "plb"; mime_map["application/vnd.3gpp.pic-bw-small"] = "psb"; mime_map["application/vnd.3gpp.pic-bw-var"] = "pvb"; mime_map["application/vnd.3gpp2.tcap"] = "tcap"; mime_map["application/vnd.3m.post-it-notes"] = "pwn"; mime_map["application/vnd.accpac.simply.aso"] = "aso"; mime_map["application/vnd.accpac.simply.imp"] = "imp"; mime_map["application/vnd.acucobol"] = "acu"; mime_map["application/vnd.acucorp"] = "atc"; mime_map["application/vnd.adobe.air-application-installer-package+zip"] = "air"; mime_map["application/vnd.adobe.fxp"] = "fxp"; mime_map["application/vnd.adobe.xdp+xml"] = "xdp"; mime_map["application/vnd.adobe.xfdf"] = "xfdf"; mime_map["application/vnd.ahead.space"] = "ahead"; mime_map["application/vnd.airzip.filesecure.azf"] = "azf"; mime_map["application/vnd.airzip.filesecure.azs"] = "azs"; mime_map["application/vnd.amazon.ebook"] = "azw"; mime_map["application/vnd.americandynamics.acc"] = "acc"; mime_map["application/vnd.amiga.ami"] = "ami"; mime_map["application/vnd.android.package-archive"] = "apk"; mime_map["application/vnd.anser-web-certificate-issue-initiation"] = "cii"; mime_map["application/vnd.anser-web-funds-transfer-initiation"] = "fti"; mime_map["application/vnd.antix.game-component"] = "atx"; mime_map["application/vnd.apple.installer+xml"] = "mpkg"; mime_map["application/vnd.apple.mpegurl"] = "m3u8"; mime_map["application/vnd.aristanetworks.swi"] = "swi"; mime_map["application/vnd.audiograph"] = "aep"; mime_map["application/vnd.blueice.multipass"] = "mpm"; mime_map["application/vnd.bmi"] = "bmi"; mime_map["application/vnd.businessobjects"] = "rep"; mime_map["application/vnd.chemdraw+xml"] = "cdxml"; mime_map["application/vnd.chipnuts.karaoke-mmd"] = "mmd"; mime_map["application/vnd.cinderella"] = "cdy"; mime_map["application/vnd.claymore"] = "cla"; mime_map["application/vnd.cloanto.rp9"] = "rp9"; mime_map["application/vnd.clonk.c4group"] = "c4g"; mime_map["application/vnd.cluetrust.cartomobile-config"] = "c11amc"; mime_map["application/vnd.cluetrust.cartomobile-config-pkg"] = "c11amz"; mime_map["application/vnd.commonspace"] = "csp"; mime_map["application/vnd.contact.cmsg"] = "cdbcmsg"; mime_map["application/vnd.cosmocaller"] = "cmc"; mime_map["application/vnd.crick.clicker"] = "clkx"; mime_map["application/vnd.crick.clicker.keyboard"] = "clkk"; mime_map["application/vnd.crick.clicker.palette"] = "clkp"; mime_map["application/vnd.crick.clicker.template"] = "clkt"; mime_map["application/vnd.crick.clicker.wordbank"] = "clkw"; mime_map["application/vnd.criticaltools.wbs+xml"] = "wbs"; mime_map["application/vnd.ctc-posml"] = "pml"; mime_map["application/vnd.cups-ppd"] = "ppd"; mime_map["application/vnd.curl.car"] = "car"; mime_map["application/vnd.curl.pcurl"] = "pcurl"; mime_map["application/vnd.data-vision.rdz"] = "rdz"; mime_map["application/vnd.denovo.fcselayout-link"] = "fe_launch"; mime_map["application/vnd.dna"] = "dna"; mime_map["application/vnd.dolby.mlp"] = "mlp"; mime_map["application/vnd.dpgraph"] = "dpg"; mime_map["application/vnd.dreamfactory"] = "dfac"; mime_map["application/vnd.dvb.ait"] = "ait"; mime_map["application/vnd.dvb.service"] = "svc"; mime_map["application/vnd.dynageo"] = "geo"; mime_map["application/vnd.ecowin.chart"] = "mag"; mime_map["application/vnd.enliven"] = "nml"; mime_map["application/vnd.epson.esf"] = "esf"; mime_map["application/vnd.epson.msf"] = "msf"; mime_map["application/vnd.epson.quickanime"] = "qam"; mime_map["application/vnd.epson.salt"] = "slt"; mime_map["application/vnd.epson.ssf"] = "ssf"; mime_map["application/vnd.eszigno3+xml"] = "es3"; mime_map["application/vnd.ezpix-album"] = "ez2"; mime_map["application/vnd.ezpix-package"] = "ez3"; mime_map["application/vnd.fdf"] = "fdf"; mime_map["application/vnd.fdsn.mseed"] = "mseed"; mime_map["application/vnd.fdsn.seed"] = "seed"; mime_map["application/vnd.flographit"] = "gph"; mime_map["application/vnd.fluxtime.clip"] = "ftc"; mime_map["application/vnd.framemaker"] = "fm"; mime_map["application/vnd.frogans.fnc"] = "fnc"; mime_map["application/vnd.frogans.ltf"] = "ltf"; mime_map["application/vnd.fsc.weblaunch"] = "fsc"; mime_map["application/vnd.fujitsu.oasys"] = "oas"; mime_map["application/vnd.fujitsu.oasys2"] = "oa2"; mime_map["application/vnd.fujitsu.oasys3"] = "oa3"; mime_map["application/vnd.fujitsu.oasysgp"] = "fg5"; mime_map["application/vnd.fujitsu.oasysprs"] = "bh2"; mime_map["application/vnd.fujixerox.ddd"] = "ddd"; mime_map["application/vnd.fujixerox.docuworks"] = "xdw"; mime_map["application/vnd.fujixerox.docuworks.binder"] = "xbd"; mime_map["application/vnd.fuzzysheet"] = "fzs"; mime_map["application/vnd.genomatix.tuxedo"] = "txd"; mime_map["application/vnd.geogebra.file"] = "ggb"; mime_map["application/vnd.geogebra.tool"] = "ggt"; mime_map["application/vnd.geometry-explorer"] = "gex"; mime_map["application/vnd.geonext"] = "gxt"; mime_map["application/vnd.geoplan"] = "g2w"; mime_map["application/vnd.geospace"] = "g3w"; mime_map["application/vnd.gmx"] = "gmx"; mime_map["application/vnd.google-earth.kml+xml"] = "kml"; mime_map["application/vnd.google-earth.kmz"] = "kmz"; mime_map["application/vnd.grafeq"] = "gqf"; mime_map["application/vnd.groove-account"] = "gac"; mime_map["application/vnd.groove-help"] = "ghf"; mime_map["application/vnd.groove-identity-message"] = "gim"; mime_map["application/vnd.groove-injector"] = "grv"; mime_map["application/vnd.groove-tool-message"] = "gtm"; mime_map["application/vnd.groove-tool-template"] = "tpl"; mime_map["application/vnd.groove-vcard"] = "vcg"; mime_map["application/vnd.hal+xml"] = "hal"; mime_map["application/vnd.handheld-entertainment+xml"] = "zmm"; mime_map["application/vnd.hbci"] = "hbci"; mime_map["application/vnd.hhe.lesson-player"] = "les"; mime_map["application/vnd.hp-hpgl"] = "hpgl"; mime_map["application/vnd.hp-hpid"] = "hpid"; mime_map["application/vnd.hp-hps"] = "hps"; mime_map["application/vnd.hp-jlyt"] = "jlt"; mime_map["application/vnd.hp-pcl"] = "pcl"; mime_map["application/vnd.hp-pclxl"] = "pclxl"; mime_map["application/vnd.hydrostatix.sof-data"] = "sfd-hdstx"; mime_map["application/vnd.hzn-3d-crossword"] = "x3d"; mime_map["application/vnd.ibm.minipay"] = "mpy"; mime_map["application/vnd.ibm.modcap"] = "afp"; mime_map["application/vnd.ibm.rights-management"] = "irm"; mime_map["application/vnd.ibm.secure-container"] = "sc"; mime_map["application/vnd.iccprofile"] = "icc"; mime_map["application/vnd.igloader"] = "igl"; mime_map["application/vnd.immervision-ivp"] = "ivp"; mime_map["application/vnd.immervision-ivu"] = "ivu"; mime_map["application/vnd.insors.igm"] = "igm"; mime_map["application/vnd.intercon.formnet"] = "xpw"; mime_map["application/vnd.intergeo"] = "i2g"; mime_map["application/vnd.intu.qbo"] = "qbo"; mime_map["application/vnd.intu.qfx"] = "qfx"; mime_map["application/vnd.ipunplugged.rcprofile"] = "rcprofile"; mime_map["application/vnd.irepository.package+xml"] = "irp"; mime_map["application/vnd.is-xpr"] = "xpr"; mime_map["application/vnd.isac.fcs"] = "fcs"; mime_map["application/vnd.jam"] = "jam"; mime_map["application/vnd.jcp.javame.midlet-rms"] = "rms"; mime_map["application/vnd.jisp"] = "jisp"; mime_map["application/vnd.joost.joda-archive"] = "joda"; mime_map["application/vnd.kahootz"] = "ktz"; mime_map["application/vnd.kde.karbon"] = "karbon"; mime_map["application/vnd.kde.kchart"] = "chrt"; mime_map["application/vnd.kde.kformula"] = "kfo"; mime_map["application/vnd.kde.kivio"] = "flw"; mime_map["application/vnd.kde.kontour"] = "kon"; mime_map["application/vnd.kde.kpresenter"] = "kpr"; mime_map["application/vnd.kde.kspread"] = "ksp"; mime_map["application/vnd.kde.kword"] = "kwd"; mime_map["application/vnd.kenameaapp"] = "htke"; mime_map["application/vnd.kidspiration"] = "kia"; mime_map["application/vnd.kinar"] = "knp"; mime_map["application/vnd.koan"] = "skp"; mime_map["application/vnd.kodak-descriptor"] = "sse"; mime_map["application/vnd.las.las+xml"] = "lasxml"; mime_map["application/vnd.llamagraphics.life-balance.desktop"] = "lbd"; mime_map["application/vnd.llamagraphics.life-balance.exchange+xml"] = "lbe"; mime_map["application/vnd.lotus-1-2-3"] = "123"; mime_map["application/vnd.lotus-approach"] = "apr"; mime_map["application/vnd.lotus-freelance"] = "pre"; mime_map["application/vnd.lotus-notes"] = "nsf"; mime_map["application/vnd.lotus-organizer"] = "org"; mime_map["application/vnd.lotus-screencam"] = "scm"; mime_map["application/vnd.lotus-wordpro"] = "lwp"; mime_map["application/vnd.macports.portpkg"] = "portpkg"; mime_map["application/vnd.mcd"] = "mcd"; mime_map["application/vnd.medcalcdata"] = "mc1"; mime_map["application/vnd.mediastation.cdkey"] = "cdkey"; mime_map["application/vnd.mfer"] = "mwf"; mime_map["application/vnd.mfmp"] = "mfm"; mime_map["application/vnd.micrografx.flo"] = "flo"; mime_map["application/vnd.micrografx.igx"] = "igx"; mime_map["application/vnd.mif"] = "mif"; mime_map["application/vnd.mobius.daf"] = "daf"; mime_map["application/vnd.mobius.dis"] = "dis"; mime_map["application/vnd.mobius.mbk"] = "mbk"; mime_map["application/vnd.mobius.mqy"] = "mqy"; mime_map["application/vnd.mobius.msl"] = "msl"; mime_map["application/vnd.mobius.plc"] = "plc"; mime_map["application/vnd.mobius.txf"] = "txf"; mime_map["application/vnd.mophun.application"] = "mpn"; mime_map["application/vnd.mophun.certificate"] = "mpc"; mime_map["application/vnd.mozilla.xul+xml"] = "xul"; mime_map["application/vnd.ms-artgalry"] = "cil"; mime_map["application/vnd.ms-cab-compressed"] = "cab"; mime_map["application/vnd.ms-excel"] = "xls"; mime_map["application/vnd.ms-excel.addin.macroenabled.12"] = "xlam"; mime_map["application/vnd.ms-excel.sheet.binary.macroenabled.12"] = "xlsb"; mime_map["application/vnd.ms-excel.sheet.macroenabled.12"] = "xlsm"; mime_map["application/vnd.ms-excel.template.macroenabled.12"] = "xltm"; mime_map["application/vnd.ms-fontobject"] = "eot"; mime_map["application/vnd.ms-htmlhelp"] = "chm"; mime_map["application/vnd.ms-ims"] = "ims"; mime_map["application/vnd.ms-lrm"] = "lrm"; mime_map["application/vnd.ms-officetheme"] = "thmx"; mime_map["application/vnd.ms-pki.seccat"] = "cat"; mime_map["application/vnd.ms-pki.stl"] = "stl"; mime_map["application/vnd.ms-powerpoint"] = "ppt"; mime_map["application/vnd.ms-powerpoint.addin.macroenabled.12"] = "ppam"; mime_map["application/vnd.ms-powerpoint.presentation.macroenabled.12"] = "pptm"; mime_map["application/vnd.ms-powerpoint.slide.macroenabled.12"] = "sldm"; mime_map["application/vnd.ms-powerpoint.slideshow.macroenabled.12"] = "ppsm"; mime_map["application/vnd.ms-powerpoint.template.macroenabled.12"] = "potm"; mime_map["application/vnd.ms-project"] = "mpp"; mime_map["application/vnd.ms-word.document.macroenabled.12"] = "docm"; mime_map["application/vnd.ms-word.template.macroenabled.12"] = "dotm"; mime_map["application/vnd.ms-works"] = "wps"; mime_map["application/vnd.ms-wpl"] = "wpl"; mime_map["application/vnd.ms-xpsdocument"] = "xps"; mime_map["application/vnd.mseq"] = "mseq"; mime_map["application/vnd.musician"] = "mus"; mime_map["application/vnd.muvee.style"] = "msty"; mime_map["application/vnd.neurolanguage.nlu"] = "nlu"; mime_map["application/vnd.noblenet-directory"] = "nnd"; mime_map["application/vnd.noblenet-sealer"] = "nns"; mime_map["application/vnd.noblenet-web"] = "nnw"; mime_map["application/vnd.nokia.n-gage.data"] = "ngdat"; mime_map["application/vnd.nokia.n-gage.symbian.install"] = "n-gage"; mime_map["application/vnd.nokia.radio-preset"] = "rpst"; mime_map["application/vnd.nokia.radio-presets"] = "rpss"; mime_map["application/vnd.novadigm.edm"] = "edm"; mime_map["application/vnd.novadigm.edx"] = "edx"; mime_map["application/vnd.novadigm.ext"] = "ext"; mime_map["application/vnd.oasis.opendocument.chart"] = "odc"; mime_map["application/vnd.oasis.opendocument.chart-template"] = "otc"; mime_map["application/vnd.oasis.opendocument.database"] = "odb"; mime_map["application/vnd.oasis.opendocument.formula"] = "odf"; mime_map["application/vnd.oasis.opendocument.formula-template"] = "odft"; mime_map["application/vnd.oasis.opendocument.graphics"] = "odg"; mime_map["application/vnd.oasis.opendocument.graphics-template"] = "otg"; mime_map["application/vnd.oasis.opendocument.image"] = "odi"; mime_map["application/vnd.oasis.opendocument.image-template"] = "oti"; mime_map["application/vnd.oasis.opendocument.presentation"] = "odp"; mime_map["application/vnd.oasis.opendocument.presentation-template"] = "otp"; mime_map["application/vnd.oasis.opendocument.spreadsheet"] = "ods"; mime_map["application/vnd.oasis.opendocument.spreadsheet-template"] = "ots"; mime_map["application/vnd.oasis.opendocument.text"] = "odt"; mime_map["application/vnd.oasis.opendocument.text-master"] = "odm"; mime_map["application/vnd.oasis.opendocument.text-template"] = "ott"; mime_map["application/vnd.oasis.opendocument.text-web"] = "oth"; mime_map["application/vnd.olpc-sugar"] = "xo"; mime_map["application/vnd.oma.dd2+xml"] = "dd2"; mime_map["application/vnd.openofficeorg.extension"] = "oxt"; mime_map["application/vnd.openxmlformats-officedocument.presentationml.presentation"] = "pptx"; mime_map["application/vnd.openxmlformats-officedocument.presentationml.slide"] = "sldx"; mime_map["application/vnd.openxmlformats-officedocument.presentationml.slideshow"] = "ppsx"; mime_map["application/vnd.openxmlformats-officedocument.presentationml.template"] = "potx"; mime_map["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"] = "xlsx"; mime_map["application/vnd.openxmlformats-officedocument.spreadsheetml.template"] = "xltx"; mime_map["application/vnd.openxmlformats-officedocument.wordprocessingml.document"] = "docx"; mime_map["application/vnd.openxmlformats-officedocument.wordprocessingml.template"] = "dotx"; mime_map["application/vnd.osgeo.mapguide.package"] = "mgp"; mime_map["application/vnd.osgi.dp"] = "dp"; mime_map["application/vnd.palm"] = "pdb"; mime_map["application/vnd.pawaafile"] = "paw"; mime_map["application/vnd.pg.format"] = "str"; mime_map["application/vnd.pg.osasli"] = "ei6"; mime_map["application/vnd.picsel"] = "efif"; mime_map["application/vnd.pmi.widget"] = "wg"; mime_map["application/vnd.pocketlearn"] = "plf"; mime_map["application/vnd.powerbuilder6"] = "pbd"; mime_map["application/vnd.previewsystems.box"] = "box"; mime_map["application/vnd.proteus.magazine"] = "mgz"; mime_map["application/vnd.publishare-delta-tree"] = "qps"; mime_map["application/vnd.pvi.ptid1"] = "ptid"; mime_map["application/vnd.quark.quarkxpress"] = "qxd"; mime_map["application/vnd.realvnc.bed"] = "bed"; mime_map["application/vnd.recordare.musicxml"] = "mxl"; mime_map["application/vnd.recordare.musicxml+xml"] = "musicxml"; mime_map["application/vnd.rig.cryptonote"] = "cryptonote"; mime_map["application/vnd.rim.cod"] = "cod"; mime_map["application/vnd.rn-realmedia"] = "rm"; mime_map["application/vnd.route66.link66+xml"] = "link66"; mime_map["application/vnd.sailingtracker.track"] = "st"; mime_map["application/vnd.seemail"] = "see"; mime_map["application/vnd.sema"] = "sema"; mime_map["application/vnd.semd"] = "semd"; mime_map["application/vnd.semf"] = "semf"; mime_map["application/vnd.shana.informed.formdata"] = "ifm"; mime_map["application/vnd.shana.informed.formtemplate"] = "itp"; mime_map["application/vnd.shana.informed.interchange"] = "iif"; mime_map["application/vnd.shana.informed.package"] = "ipk"; mime_map["application/vnd.simtech-mindmapper"] = "twd"; mime_map["application/vnd.smaf"] = "mmf"; mime_map["application/vnd.smart.teacher"] = "teacher"; mime_map["application/vnd.solent.sdkm+xml"] = "sdkm"; mime_map["application/vnd.spotfire.dxp"] = "dxp"; mime_map["application/vnd.spotfire.sfs"] = "sfs"; mime_map["application/vnd.stardivision.calc"] = "sdc"; mime_map["application/vnd.stardivision.draw"] = "sda"; mime_map["application/vnd.stardivision.impress"] = "sdd"; mime_map["application/vnd.stardivision.math"] = "smf"; mime_map["application/vnd.stardivision.writer"] = "sdw"; mime_map["application/vnd.stardivision.writer-global"] = "sgl"; mime_map["application/vnd.stepmania.stepchart"] = "sm"; mime_map["application/vnd.sun.xml.calc"] = "sxc"; mime_map["application/vnd.sun.xml.calc.template"] = "stc"; mime_map["application/vnd.sun.xml.draw"] = "sxd"; mime_map["application/vnd.sun.xml.draw.template"] = "std"; mime_map["application/vnd.sun.xml.impress"] = "sxi"; mime_map["application/vnd.sun.xml.impress.template"] = "sti"; mime_map["application/vnd.sun.xml.math"] = "sxm"; mime_map["application/vnd.sun.xml.writer"] = "sxw"; mime_map["application/vnd.sun.xml.writer.global"] = "sxg"; mime_map["application/vnd.sun.xml.writer.template"] = "stw"; mime_map["application/vnd.sus-calendar"] = "sus"; mime_map["application/vnd.svd"] = "svd"; mime_map["application/vnd.symbian.install"] = "sis"; mime_map["application/vnd.syncml+xml"] = "xsm"; mime_map["application/vnd.syncml.dm+wbxml"] = "bdm"; mime_map["application/vnd.syncml.dm+xml"] = "xdm"; mime_map["application/vnd.tao.intent-module-archive"] = "tao"; mime_map["application/vnd.tmobile-livetv"] = "tmo"; mime_map["application/vnd.trid.tpt"] = "tpt"; mime_map["application/vnd.triscape.mxs"] = "mxs"; mime_map["application/vnd.trueapp"] = "tra"; mime_map["application/vnd.ufdl"] = "ufdl"; mime_map["application/vnd.uiq.theme"] = "utz"; mime_map["application/vnd.umajin"] = "umj"; mime_map["application/vnd.unity"] = "unityweb"; mime_map["application/vnd.uoml+xml"] = "uoml"; mime_map["application/vnd.vcx"] = "vcx"; mime_map["application/vnd.visio"] = "vsd"; mime_map["application/vnd.visionary"] = "vis"; mime_map["application/vnd.vsf"] = "vsf"; mime_map["application/vnd.wap.wbxml"] = "wbxml"; mime_map["application/vnd.wap.wmlc"] = "wmlc"; mime_map["application/vnd.wap.wmlscriptc"] = "wmlsc"; mime_map["application/vnd.webturbo"] = "wtb"; mime_map["application/vnd.wolfram.player"] = "nbp"; mime_map["application/vnd.wordperfect"] = "wpd"; mime_map["application/vnd.wqd"] = "wqd"; mime_map["application/vnd.wt.stf"] = "stf"; mime_map["application/vnd.xara"] = "xar"; mime_map["application/vnd.xfdl"] = "xfdl"; mime_map["application/vnd.yamaha.hv-dic"] = "hvd"; mime_map["application/vnd.yamaha.hv-script"] = "hvs"; mime_map["application/vnd.yamaha.hv-voice"] = "hvp"; mime_map["application/vnd.yamaha.openscoreformat"] = "osf"; mime_map["application/vnd.yamaha.openscoreformat.osfpvg+xml"] = "osfpvg"; mime_map["application/vnd.yamaha.smaf-audio"] = "saf"; mime_map["application/vnd.yamaha.smaf-phrase"] = "spf"; mime_map["application/vnd.yellowriver-custom-menu"] = "cmp"; mime_map["application/vnd.zul"] = "zir"; mime_map["application/vnd.zzazz.deck+xml"] = "zaz"; mime_map["application/voicexml+xml"] = "vxml"; mime_map["application/widget"] = "wgt"; mime_map["application/winhlp"] = "hlp"; mime_map["application/wsdl+xml"] = "wsdl"; mime_map["application/wspolicy+xml"] = "wspolicy"; mime_map["application/x-7z-compressed"] = "7z"; mime_map["application/x-abiword"] = "abw"; mime_map["application/x-ace-compressed"] = "ace"; mime_map["application/x-authorware-map"] = "aam"; mime_map["application/x-authorware-seg"] = "aas"; mime_map["application/x-bcpio"] = "bcpio"; mime_map["application/x-bittorrent"] = "torrent"; mime_map["application/x-bzip"] = "bz"; mime_map["application/x-bzip2"] = "bz2"; mime_map["application/x-cdlink"] = "vcd"; mime_map["application/x-chat"] = "chat"; mime_map["application/x-chess-pgn"] = "pgn"; mime_map["application/x-cpio"] = "cpio"; mime_map["application/x-csh"] = "csh"; mime_map["application/x-debian-package"] = "deb"; mime_map["application/x-director"] = "dir"; mime_map["application/x-doom"] = "wad"; mime_map["application/x-dtbncx+xml"] = "ncx"; mime_map["application/x-dtbook+xml"] = "dtb"; mime_map["application/x-dtbresource+xml"] = "res"; mime_map["application/x-dvi"] = "dvi"; mime_map["application/x-font-bdf"] = "bdf"; mime_map["application/x-font-ghostscript"] = "gsf"; mime_map["application/x-font-linux-psf"] = "psf"; mime_map["application/x-font-otf"] = "otf"; mime_map["application/x-font-pcf"] = "pcf"; mime_map["application/x-font-snf"] = "snf"; mime_map["application/x-font-ttf"] = "ttf"; mime_map["application/x-font-type1"] = "afm"; mime_map["application/x-font-woff"] = "woff"; mime_map["application/x-futuresplash"] = "spl"; mime_map["application/x-gnumeric"] = "gnumeric"; mime_map["application/x-gtar"] = "gtar"; mime_map["application/x-hdf"] = "hdf"; mime_map["application/x-java-jnlp-file"] = "jnlp"; mime_map["application/x-latex"] = "latex"; mime_map["application/x-mobipocket-ebook"] = "mobi"; mime_map["application/x-mpegurl"] = "m3u8"; mime_map["application/x-ms-application"] = "application"; mime_map["application/x-ms-wmd"] = "wmd"; mime_map["application/x-ms-wmz"] = "wmz"; mime_map["application/x-ms-xbap"] = "xbap"; mime_map["application/x-msaccess"] = "mdb"; mime_map["application/x-msbinder"] = "obd"; mime_map["application/x-mscardfile"] = "crd"; mime_map["application/x-msclip"] = "clp"; mime_map["application/x-msmediaview"] = "mvb"; mime_map["application/x-msmetafile"] = "wmf"; mime_map["application/x-msmoney"] = "mny"; mime_map["application/x-mspublisher"] = "pub"; mime_map["application/x-msschedule"] = "scd"; mime_map["application/x-msterminal"] = "trm"; mime_map["application/x-mswrite"] = "wri"; mime_map["application/x-netcdf"] = "nc"; mime_map["application/x-pkcs12"] = "p12"; mime_map["application/x-pkcs7-certificates"] = "p7b"; mime_map["application/x-pkcs7-certreqresp"] = "p7r"; mime_map["application/x-rar-compressed"] = "rar"; mime_map["application/x-sh"] = "sh"; mime_map["application/x-shar"] = "shar"; mime_map["application/x-shockwave-flash"] = "swf"; mime_map["application/x-silverlight-app"] = "xap"; mime_map["application/x-stuffit"] = "sit"; mime_map["application/x-stuffitx"] = "sitx"; mime_map["application/x-sv4cpio"] = "sv4cpio"; mime_map["application/x-sv4crc"] = "sv4crc"; mime_map["application/x-tar"] = "tar"; mime_map["application/x-tcl"] = "tcl"; mime_map["application/x-tex"] = "tex"; mime_map["application/x-tex-tfm"] = "tfm"; mime_map["application/x-texinfo"] = "texi"; mime_map["application/x-ustar"] = "ustar"; mime_map["application/x-wais-source"] = "src"; mime_map["application/x-x509-ca-cert"] = "crt"; mime_map["application/x-xfig"] = "fig"; mime_map["application/x-xpinstall"] = "xpi"; mime_map["application/xcap-diff+xml"] = "xdf"; mime_map["application/xenc+xml"] = "xenc"; mime_map["application/xhtml+xml"] = "xhtml"; mime_map["application/xml"] = "xml"; mime_map["application/xml-dtd"] = "dtd"; mime_map["application/xop+xml"] = "xop"; mime_map["application/xslt+xml"] = "xslt"; mime_map["application/xspf+xml"] = "xspf"; mime_map["application/xv+xml"] = "xvml"; mime_map["application/yang"] = "yang"; mime_map["application/yin+xml"] = "yin"; mime_map["application/zip"] = "zip"; mime_map["audio/adpcm"] = "adp"; mime_map["audio/basic"] = "au"; mime_map["audio/midi"] = "mid"; mime_map["audio/mp4"] = "mp4a"; mime_map["audio/mp4a-latm"] = "m4a"; mime_map["audio/mpeg"] = "mpga"; mime_map["audio/ogg"] = "ogg"; mime_map["audio/vnd.dece.audio"] = "uvva"; mime_map["audio/vnd.digital-winds"] = "eol"; mime_map["audio/vnd.dra"] = "dra"; mime_map["audio/vnd.dts"] = "dts"; mime_map["audio/vnd.dts.hd"] = "dtshd"; mime_map["audio/vnd.lucent.voice"] = "lvp"; mime_map["audio/vnd.ms-playready.media.pya"] = "pya"; mime_map["audio/vnd.nuera.ecelp4800"] = "ecelp4800"; mime_map["audio/vnd.nuera.ecelp7470"] = "ecelp7470"; mime_map["audio/vnd.nuera.ecelp9600"] = "ecelp9600"; mime_map["audio/vnd.rip"] = "rip"; mime_map["audio/webm"] = "weba"; mime_map["audio/x-aac"] = "aac"; mime_map["audio/x-aiff"] = "aiff"; mime_map["audio/x-mpegurl"] = "m3u"; mime_map["audio/x-ms-wax"] = "wax"; mime_map["audio/x-ms-wma"] = "wma"; mime_map["audio/x-pn-realaudio"] = "ram"; mime_map["audio/x-pn-realaudio-plugin"] = "rmp"; mime_map["audio/x-wav"] = "wav"; mime_map["chemical/x-cdx"] = "cdx"; mime_map["chemical/x-cif"] = "cif"; mime_map["chemical/x-cmdf"] = "cmdf"; mime_map["chemical/x-cml"] = "cml"; mime_map["chemical/x-csml"] = "csml"; mime_map["chemical/x-xyz"] = "xyz"; mime_map["image/bmp"] = "bmp"; mime_map["image/cgm"] = "cgm"; mime_map["image/g3fax"] = "g3"; mime_map["image/gif"] = "gif"; mime_map["image/ief"] = "ief"; mime_map["image/jp2"] = "jp2"; mime_map["image/jpeg"] = "jpg"; mime_map["image/ktx"] = "ktx"; mime_map["image/pict"] = "pict"; mime_map["image/png"] = "png"; mime_map["image/prs.btif"] = "btif"; mime_map["image/svg+xml"] = "svg"; mime_map["image/tiff"] = "tiff"; mime_map["image/vnd.adobe.photoshop"] = "psd"; mime_map["image/vnd.dece.graphic"] = "uvi"; mime_map["image/vnd.djvu"] = "djvu"; mime_map["image/vnd.dvb.subtitle"] = "sub"; mime_map["image/vnd.dwg"] = "dwg"; mime_map["image/vnd.dxf"] = "dxf"; mime_map["image/vnd.fastbidsheet"] = "fbs"; mime_map["image/vnd.fpx"] = "fpx"; mime_map["image/vnd.fst"] = "fst"; mime_map["image/vnd.fujixerox.edmics-mmr"] = "mmr"; mime_map["image/vnd.fujixerox.edmics-rlc"] = "rlc"; mime_map["image/vnd.ms-modi"] = "mdi"; mime_map["image/vnd.net-fpx"] = "npx"; mime_map["image/vnd.wap.wbmp"] = "wbmp"; mime_map["image/vnd.xiff"] = "xif"; mime_map["image/webp"] = "webp"; mime_map["image/x-cmu-raster"] = "ras"; mime_map["image/x-cmx"] = "cmx"; mime_map["image/x-freehand"] = "fh"; mime_map["image/x-icon"] = "ico"; mime_map["image/x-macpaint"] = "pntg"; mime_map["image/x-pcx"] = "pcx"; mime_map["image/x-pict"] = "pict"; mime_map["image/x-portable-anymap"] = "pnm"; mime_map["image/x-portable-bitmap"] = "pbm"; mime_map["image/x-portable-graymap"] = "pgm"; mime_map["image/x-portable-pixmap"] = "ppm"; mime_map["image/x-quicktime"] = "qtif"; mime_map["image/x-rgb"] = "rgb"; mime_map["image/x-xbitmap"] = "xbm"; mime_map["image/x-xpixmap"] = "xpm"; mime_map["image/x-xwindowdump"] = "xwd"; mime_map["message/rfc822"] = "eml"; mime_map["model/iges"] = "iges"; mime_map["model/mesh"] = "mesh"; mime_map["model/vnd.collada+xml"] = "dae"; mime_map["model/vnd.dwf"] = "dwf"; mime_map["model/vnd.gdl"] = "gdl"; mime_map["model/vnd.gtw"] = "gtw"; mime_map["model/vnd.mts"] = "mts"; mime_map["model/vnd.vtu"] = "vtu"; mime_map["model/vrml"] = "vrml"; mime_map["text/cache-manifest"] = "manifest"; mime_map["text/calendar"] = "ics"; mime_map["text/css"] = "css"; mime_map["text/csv"] = "csv"; mime_map["text/html"] = "html"; mime_map["text/n3"] = "n3"; mime_map["text/plain"] = "txt"; mime_map["text/prs.lines.tag"] = "dsc"; mime_map["text/richtext"] = "rtx"; mime_map["text/sgml"] = "sgml"; mime_map["text/tab-separated-values"] = "tsv"; mime_map["text/troff"] = "roff"; mime_map["text/turtle"] = "ttl"; mime_map["text/uri-list"] = "urls"; mime_map["text/vnd.curl"] = "curl"; mime_map["text/vnd.curl.dcurl"] = "dcurl"; mime_map["text/vnd.curl.mcurl"] = "mcurl"; mime_map["text/vnd.curl.scurl"] = "scurl"; mime_map["text/vnd.fly"] = "fly"; mime_map["text/vnd.fmi.flexstor"] = "flx"; mime_map["text/vnd.graphviz"] = "gv"; mime_map["text/vnd.in3d.3dml"] = "3dml"; mime_map["text/vnd.in3d.spot"] = "spot"; mime_map["text/vnd.sun.j2me.app-descriptor"] = "jad"; mime_map["text/vnd.wap.wml"] = "wml"; mime_map["text/vnd.wap.wmlscript"] = "wmls"; mime_map["text/x-asm"] = "asm"; mime_map["text/x-c"] = "c"; mime_map["text/x-fortran"] = "f"; mime_map["text/x-java-source"] = "java"; mime_map["text/x-pascal"] = "pas"; mime_map["text/x-setext"] = "etx"; mime_map["text/x-uuencode"] = "uu"; mime_map["text/x-vcalendar"] = "vcs"; mime_map["text/x-vcard"] = "vcf"; mime_map["video/3gpp"] = "3gp"; mime_map["video/3gpp2"] = "3g2"; mime_map["video/h261"] = "h261"; mime_map["video/h263"] = "h263"; mime_map["video/h264"] = "h264"; mime_map["video/jpeg"] = "jpgv"; mime_map["video/jpm"] = "jpm"; mime_map["video/mj2"] = "mj2"; mime_map["video/mp2t"] = "ts"; mime_map["video/mp4"] = "m4v"; mime_map["video/mpeg"] = "mpg"; mime_map["video/ogg"] = "ogv"; mime_map["video/quicktime"] = "mov"; mime_map["video/vnd.dece.hd"] = "uvvh"; mime_map["video/vnd.dece.mobile"] = "uvvm"; mime_map["video/vnd.dece.pd"] = "uvvp"; mime_map["video/vnd.dece.sd"] = "uvvs"; mime_map["video/vnd.dece.video"] = "uvvv"; mime_map["video/vnd.fvt"] = "fvt"; mime_map["video/vnd.mpegurl"] = "m4u"; mime_map["video/vnd.ms-playready.media.pyv"] = "pyv"; mime_map["video/vnd.uvvu.mp4"] = "uvvu"; mime_map["video/vnd.vivo"] = "viv"; mime_map["video/webm"] = "webm"; mime_map["video/x-dv"] = "dv"; mime_map["video/x-f4v"] = "f4v"; mime_map["video/x-fli"] = "fli"; mime_map["video/x-flv"] = "flv"; mime_map["video/x-m4v"] = "m4v"; mime_map["video/x-ms-asf"] = "asf"; mime_map["video/x-ms-wm"] = "wm"; mime_map["video/x-ms-wmv"] = "wmv"; mime_map["video/x-ms-wmx"] = "wmx"; mime_map["video/x-ms-wvx"] = "wvx"; mime_map["video/x-msvideo"] = "avi"; mime_map["video/x-sgi-movie"] = "movie"; mime_map["x-conference/x-cooltalk"] = "ice"; } std::string get_extension_for_mime_type(const std::string& mime_type) { /* Strip anything after a semicolon (e.g. text/html; charset=utf-8) */ std::string base_type(mime_type); size_t semicolon_pos = base_type.find_first_of(";"); if (semicolon_pos != std::string::npos) { base_type = base_type.substr(0, semicolon_pos); } /* Downcase it for comparsion purposes */ std::transform(base_type.begin(), base_type.end(), base_type.begin(), ::tolower); /* Look it up in the map */ return mime_map[base_type]; } tcpflow/src/netviz/0000755000175000017500000000000012263701151013246 5ustar dimadimatcpflow/src/netviz/address_histogram_view.h0000644000175000017500000000251312263701151020154 0ustar dimadima/* * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick */ #ifndef ADDRESS_HISTOGRAM_VIEW_H #define ADDRESS_HISTOGRAM_VIEW_H #include "config.h" #ifdef HAVE_LIBCAIRO #include "plot_view.h" #include "address_histogram.h" class address_histogram_view : public plot_view { public: address_histogram_view(const address_histogram &histogram_); class bucket_view { public: bucket_view(const iptree::addr_elem &bucket_, const rgb_t &color_) : bucket(bucket_), color(color_) {} const iptree::addr_elem &bucket; const rgb_t &color; static const double label_font_size; void render(cairo_t *cr, const bounds_t &bounds); void render_label(cairo_t *cr, const bounds_t &bounds); }; const address_histogram &histogram; rgb_t bar_color; rgb_t cdf_color; static const double bar_space_factor; static const size_t compressed_ip6_str_max_len; static const double cdf_line_width; static const double data_width_factor; void render(cairo_t *cr, const bounds_t &bounds); void render_data(cairo_t *cr, const bounds_t &bounds); const address_histogram &get_data() const; static std::string compressed_ip6_str(iptree::addr_elem address); }; #endif #endif tcpflow/src/netviz/port_histogram_view.cpp0000644000175000017500000001472712263701151020060 0ustar dimadima/** * port_histogram_view.cpp: * Show packets received vs port * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #include "config.h" #ifdef HAVE_LIBCAIRO #include "tcpflow.h" #include "port_histogram_view.h" #include using namespace std; port_histogram_view::port_histogram_view(port_histogram &histogram_, const map &color_map_, const rgb_t &default_color_, const rgb_t &cdf_color_) : histogram(histogram_), color_map(color_map_), default_color(default_color_), cdf_color(cdf_color_) { subtitle = ""; title_on_bottom = true; pad_left_factor = 0.1; pad_right_factor = 0.1; x_label = ""; y_label = ""; y_tick_font_size = 6.0; right_tick_font_size = 6.0; } const double port_histogram_view::bar_space_factor = 1.2; const double port_histogram_view::bar_chip_size_factor = 0.04; const double port_histogram_view::cdf_line_width = 0.5; const double port_histogram_view::data_width_factor = 0.95; void port_histogram_view::render(cairo_t *cr, const plot_view::bounds_t &bounds) { y_tick_labels.push_back(plot_view::pretty_byte_total(0)); if(histogram.size() > 0) { y_tick_labels.push_back(plot_view::pretty_byte_total(histogram.at(0).count, 0)); } right_tick_labels.push_back("0%"); right_tick_labels.push_back("100%"); plot_view::render(cr, bounds); } void port_histogram_view::render_data(cairo_t *cr, const plot_view::bounds_t &bounds) { if(histogram.size() < 1 || histogram.at(0).count == 0) { return; } double data_width = bounds.width * data_width_factor; double data_offset = 0; bounds_t data_bounds(bounds.x + data_offset, bounds.y, data_width, bounds.height); double visibility_chip_height = data_bounds.height * bar_chip_size_factor; double offset_unit = data_bounds.width / histogram.size(); double bar_width = offset_unit / bar_space_factor; double space_width = (offset_unit - bar_width) / 2.0; uint64_t greatest = histogram.at(0).count; unsigned int index = 0; double cdf_last_x = bounds.x, cdf_last_y = bounds.y + data_bounds.height; for(vector::const_iterator it = histogram.begin(); it != histogram.end(); it++) { double bar_height = (((double) it->count) / ((double) greatest)) * data_bounds.height; // bar double bar_x = data_bounds.x + (index * offset_unit + space_width); double bar_y = data_bounds.y + (data_bounds.height - bar_height); bounds_t bar_bounds(bar_x, bar_y, bar_width, bar_height); rgb_t bar_color = default_color; map::const_iterator color = color_map.find(it->port); if(color != color_map.end()) { bar_color = color->second; } bucket_view view(*it, bar_color); if(bar_height < visibility_chip_height && bar_color != default_color) { view.chip_height = visibility_chip_height; view.chip_offset = visibility_chip_height * 0.6; } view.render(cr, bar_bounds); // CDF double cdf_x = cdf_last_x + offset_unit; // account for left and right padding of bars if(index == 0) { cdf_x += data_offset; } if(index == histogram.size() - 1) { cdf_x = bounds.x + bounds.width; } double cdf_y = cdf_last_y - ((double) it->count / (double) histogram.ingest_count()) * data_bounds.height; cairo_move_to(cr, cdf_last_x, cdf_last_y); // don't draw over the left-hand y axis if(index == 0) { cairo_move_to(cr, cdf_last_x, cdf_y); } else { cairo_line_to(cr, cdf_last_x, cdf_y); } cairo_line_to(cr, cdf_x, cdf_y); cairo_set_source_rgb(cr, cdf_color.r, cdf_color.g, cdf_color.b); cairo_set_line_width(cr, cdf_line_width); cairo_stroke(cr); cdf_last_x = cdf_x; cdf_last_y = cdf_y; index++; } index = 0; // labels must be done after the fact to avoid awkward interaction with the CDF for(vector::const_iterator it = histogram.begin(); it != histogram.end(); it++) { double bar_height = (((double) it->count) / ((double) greatest)) * data_bounds.height; double bar_x = data_bounds.x + (index * offset_unit + space_width); double bar_y = data_bounds.y + (data_bounds.height - bar_height); bounds_t bar_bounds(bar_x, bar_y, bar_width, bar_height); // bar label bucket_view view(*it, default_color); view.render_label(cr, bar_bounds); index++; } } port_histogram &port_histogram_view::get_data() { return histogram; } // bucket view const double port_histogram_view::bucket_view::label_font_size = 6.0; const double port_histogram_view::bucket_view::chip_width_factor = 0.4; void port_histogram_view::bucket_view::render(cairo_t *cr, const bounds_t &bounds) { cairo_set_source_rgb(cr, color.r, color.g, color.b); cairo_rectangle(cr, bounds.x, bounds.y, bounds.width, bounds.height); cairo_fill(cr); if(chip_height > 0.0) { double chip_x = bounds.x + (bounds.width * ((1.0 - chip_width_factor) / 2.0)); double chip_y = bounds.y + bounds.height + chip_offset; double chip_width = bounds.width * chip_width_factor; cairo_rectangle(cr, chip_x, chip_y, chip_width, chip_height); cairo_fill(cr); } } void port_histogram_view::bucket_view::render_label(cairo_t *cr, const bounds_t &bounds) { cairo_matrix_t unrotated_matrix; cairo_get_matrix(cr, &unrotated_matrix); cairo_rotate(cr, -M_PI / 4.0); cairo_set_font_size(cr, label_font_size); string label = ssprintf("%d", bucket.port); cairo_text_extents_t label_extents; cairo_text_extents(cr, label.c_str(), &label_extents); double label_x = bounds.x + bounds.width / 2.0; double label_y = bounds.y - 2.0; cairo_device_to_user(cr, &label_x, &label_y); cairo_set_source_rgb(cr, 1.0, 1.0, 1.0); cairo_rectangle(cr, label_x, label_y, label_extents.width, -label_extents.height); cairo_fill(cr); cairo_rectangle(cr, label_x, label_y, label_extents.width, -label_extents.height); cairo_set_line_width(cr, 2.0); cairo_stroke(cr); cairo_move_to(cr, label_x, label_y); cairo_set_source_rgb(cr, 0.0, 0.0, 0.0); cairo_show_text(cr, label.c_str()); cairo_set_matrix(cr, &unrotated_matrix); } #endif tcpflow/src/netviz/time_histogram.cpp0000644000175000017500000001667512263701151017004 0ustar dimadima/** * time_histogram.cpp: * organize packet count histograms of various granularities while transparently * exposing the best-fit * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #include #include "time_histogram.h" time_histogram::time_histogram() : histograms(), best_fit_index(0), earliest_ts(), latest_ts(), insert_count(0) { // zero value structs courtesy stackoverflow // http://stackoverflow.com/questions/6462093/reinitialize-timeval-struct earliest_ts = (struct timeval) { 0 }; latest_ts = (struct timeval) { 0 }; for(std::vector::const_iterator it = spans.begin(); it != spans.end(); it++) { histograms.push_back(histogram_map(*it)); } } const float time_histogram::underflow_pad_factor = 0.1; // spans dictates the granularities of each histogram. One histogram // will be created per entry in this vector. Each value must have a greater // value of seconds than the previous const std::vector time_histogram::spans = time_histogram::build_spans(); const time_histogram::bucket time_histogram::empty_bucket; // an empty bucket const unsigned int time_histogram::F_NON_TCP = 0x01; void time_histogram::insert(const struct timeval &ts, const in_port_t port, const uint64_t count, const unsigned int flags) { insert_count += count; if(earliest_ts.tv_sec == 0 || (ts.tv_sec < earliest_ts.tv_sec || (ts.tv_sec == earliest_ts.tv_sec && ts.tv_usec < earliest_ts.tv_usec))) { earliest_ts = ts; } if(ts.tv_sec > latest_ts.tv_sec || (ts.tv_sec == latest_ts.tv_sec && ts.tv_usec > latest_ts.tv_usec)) { latest_ts = ts; } for(std::vector::iterator it = histograms.begin() + best_fit_index; it != histograms.end(); it++) { bool overflowed = it->insert(ts, port, count, flags); // if there was an overflow and the best fit isn't already the least // granular histogram, downgrade granularity by one step if(overflowed && best_fit_index < histograms.size() - 1) { best_fit_index++; } } } // combine each bucket with (factor - 1) subsequent neighbors and increase bucket width by factor // lots of possible optimizations ignored for simplicity's sake void time_histogram::condense(double factor) { const histogram_map &original = histograms.at(best_fit_index); histogram_map condensed(span_params(original.span.usec, (uint64_t) ((double) original.span.bucket_count / factor))); for(histogram_map::buckets_t::const_iterator it = original.buckets.begin(); it != original.buckets.end(); it++) { bucket &bkt = *(it->second); uint64_t recons_usec = it->first * original.bucket_width + original.base_time; struct timeval reconstructed_ts; reconstructed_ts.tv_usec = (time_t) (recons_usec % (1000LL * 1000LL)); reconstructed_ts.tv_sec = (time_t) (recons_usec / (1000LL * 1000LL)); for(bucket::counts_t::const_iterator jt = bkt.counts.begin(); jt != bkt.counts.end(); jt++) { condensed.insert(reconstructed_ts, jt->first, jt->second); } condensed.insert(reconstructed_ts, 0, bkt.portless_count, F_NON_TCP); } histograms.at(best_fit_index) = condensed; } uint64_t time_histogram::usec_per_bucket() const { return histograms.at(best_fit_index).bucket_width; } uint64_t time_histogram::packet_count() const { return histograms.at(best_fit_index).insert_count; } time_t time_histogram::start_date() const { return earliest_ts.tv_sec; } time_t time_histogram::end_date() const { return latest_ts.tv_sec; } uint64_t time_histogram::tallest_bar() const { return histograms.at(best_fit_index).greatest_bucket_sum(); } const time_histogram::bucket &time_histogram::at(uint32_t index) const { const histogram_map::buckets_t hgram = histograms.at(best_fit_index).buckets; histogram_map::buckets_t::const_iterator bkt = hgram.find(index); if(bkt == hgram.end()) { return empty_bucket; } return *(bkt->second); } size_t time_histogram::size() const { return histograms.at(best_fit_index).buckets.size(); } // calculate the number of buckets if this were a non-sparse data structure like a vector size_t time_histogram::non_sparse_size() const { histogram_map::buckets_t buckets = histograms.at(best_fit_index).buckets; histogram_map::buckets_t::const_iterator least = buckets.begin(); if(least == buckets.end()) { return 0; } histogram_map::buckets_t::const_reverse_iterator most = buckets.rbegin(); return most->first - least->first + 1; } time_histogram::histogram_map::buckets_t::const_iterator time_histogram::begin() const { return histograms.at(best_fit_index).buckets.begin(); } time_histogram::histogram_map::buckets_t::const_iterator time_histogram::end() const { return histograms.at(best_fit_index).buckets.end(); } time_histogram::histogram_map::buckets_t::const_reverse_iterator time_histogram::rbegin() const { return histograms.at(best_fit_index).buckets.rbegin(); } time_histogram::histogram_map::buckets_t::const_reverse_iterator time_histogram::rend() const { return histograms.at(best_fit_index).buckets.rend(); } /* This should be rewritten, because currently it is building a bunch of spans and then returning a vector which has to be copied. * It's very inefficient. */ time_histogram::span_params_vector_t time_histogram::build_spans() { span_params_vector_t output; output.push_back(span_params( 1000LL * 1000LL * 60LL, // minute 600)); // 600 0.1 second buckets output.push_back(span_params( 1000LL * 1000LL * 60LL * 60LL, // hour 3600)); // 3,600 1 second buckets output.push_back(span_params( 1000LL * 1000LL * 60LL * 60LL * 24LL, // day 1440)); // 1,440 1 minute buckets output.push_back(span_params( 1000LL * 1000LL * 60LL * 60LL * 24LL * 7LL, // week 1008)); // 1,008 10 minute buckets output.push_back(span_params( 1000LL * 1000LL * 60LL * 60LL * 24LL * 30LL, // month 720)); // 720 1 hour buckets output.push_back(span_params( 1000LL * 1000LL * 60LL * 60LL * 24LL * 30LL * 12LL, // year 360)); // 360 1 day buckets output.push_back(span_params( 1000LL * 1000LL * 60LL * 60LL * 24LL * 3598LL, // approximate decade 514)); // 514 1 week buckets output.push_back(span_params( 1000LL * 1000LL * 60LL * 60LL * 24LL * 30LL * 12LL * 50LL, // semicentury 200)); // 200 3 month intervals return output; } /* * Insert into the time_histogram. * * This is optimized to be as fast as possible, so we compute the sums as necessary when generating the histogram. */ bool time_histogram::histogram_map::insert(const struct timeval &ts, const in_port_t port, const uint64_t count, const unsigned int flags) { uint32_t target_index = scale_timeval(ts); if(target_index >= span.bucket_count) { return true; // overflow; will cause this histogram to be shut down } buckets_t::iterator it = buckets.find(target_index); if(it==buckets.end()){ buckets[target_index] = new bucket(); } buckets[target_index]->increment(port, count, flags); insert_count += count; return false; } tcpflow/src/netviz/port_histogram.h0000644000175000017500000000256112263701151016464 0ustar dimadima/** * port_histogram.h: * Show packets received vs port * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #ifndef PORT_HISTOGRAM_H #define PORT_HISTOGRAM_H class port_histogram { public: port_histogram() : port_counts(), data_bytes_ingested(0), buckets(), buckets_dirty(true) {} class port_count { public: port_count(uint16_t port_, uint64_t count_) : port(port_), count(count_) {} uint16_t port; uint64_t count; }; //typedef uint16_t port_t; class descending_counts { public: bool operator()(const port_count &a, const port_count &b); }; void increment(uint16_t port, uint64_t delta); const port_count &at(size_t index); size_t size(); uint64_t ingest_count() const; typedef std::vector port_count_vector; port_count_vector::const_iterator begin(); port_count_vector::const_iterator end(); port_count_vector::const_reverse_iterator rbegin(); port_count_vector::const_reverse_iterator rend(); static const size_t bucket_count; private: typedef std::map port_counts_t; port_counts_t port_counts; uint64_t data_bytes_ingested; std::vector buckets; bool buckets_dirty; void refresh_buckets(); }; #endif tcpflow/src/netviz/address_histogram.h0000644000175000017500000000165412263701151017127 0ustar dimadima/** * address histogram class. * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #ifndef ADDRESS_HISTOGRAM_H #define ADDRESS_HISTOGRAM_H #include "iptree.h" class address_histogram { public: address_histogram(const iptree &tree); class iptree_node_comparator { public: bool operator()(const iptree::addr_elem &a, const iptree::addr_elem &b); }; static const size_t bucket_count; const iptree::addr_elem &at(size_t index) const; size_t size() const; uint64_t ingest_count() const; typedef std::vector ipt_addrs; ipt_addrs::const_iterator begin() const; ipt_addrs::const_iterator end() const; ipt_addrs::const_reverse_iterator rbegin() const; ipt_addrs::const_reverse_iterator rend() const; private: ipt_addrs buckets; uint64_t datagrams_ingested; }; #endif tcpflow/src/netviz/plot_view.cpp0000644000175000017500000003145712263701151015774 0ustar dimadima/** * plot_view.cpp: * Render titles, axes, and legends for various plots * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #include "config.h" #include "tcpflow.h" // for ssprintf #include "plot_view.h" #ifdef HAVE_LIBCAIRO #include const double plot_view::rgb_t::epsilon = 1.0 / 256.0; const double plot_view::text_line_base_width = 0.05; const double plot_view::span_arrow_angle = M_PI / 4.0; const double plot_view::span_stop_angle = M_PI / 2.0; const std::vector plot_view::size_suffixes = plot_view::build_size_suffixes(); void plot_view::render(cairo_t *cr, const plot_view::bounds_t &bounds) { cairo_matrix_t original_matrix; cairo_get_matrix(cr, &original_matrix); // purple background for padding checking //cairo_set_source_rgb(cr, 0.50, 0.00, 0.50); //cairo_rectangle(cr, bounds.x, bounds.y, bounds.width, bounds.height); //cairo_fill(cr); double pad_left = width * pad_left_factor; double pad_top = height * pad_top_factor; double pad_bottom = height * pad_bottom_factor; double pad_right = width * pad_right_factor; // compute bounds for subclasses to render content into bounds_t content_bounds; content_bounds.x = bounds.x + pad_left; content_bounds.y = bounds.y + pad_top; content_bounds.width = bounds.width - pad_right - pad_left; content_bounds.height = bounds.height - pad_bottom - pad_top; cairo_text_extents_t title_extents; cairo_text_extents_t subtitle_extents; double font_size_title = title_font_size; cairo_translate(cr, bounds.x, bounds.y); double title_base_y = 0.0; if(title_on_bottom) { title_base_y = bounds.height - pad_bottom; } cairo_select_font_face(cr, "Sans", CAIRO_FONT_SLANT_NORMAL, CAIRO_FONT_WEIGHT_NORMAL); cairo_set_font_size(cr, font_size_title); cairo_set_source_rgb(cr, 0, 0, 0); cairo_text_extents(cr, title.c_str(), &title_extents); // Is the title too wide? double title_max_width = bounds.width * title_max_width_ratio; if(title_extents.width > title_max_width) { // scale the font size accordingly font_size_title *= title_max_width / title_extents.width; cairo_set_font_size(cr, font_size_title); cairo_text_extents(cr, title.c_str(), &title_extents); } // derive subtitle size and measure double font_size_subtitle = font_size_title * subtitle_font_size_factor; cairo_set_font_size(cr, font_size_subtitle); cairo_text_extents(cr, subtitle.c_str(), &subtitle_extents); double intertitle_padding = subtitle_extents.height * subtitle_y_pad_factor; cairo_set_font_size(cr, font_size_title); double title_padded_height = title_extents.height * title_y_pad_factor; // render title text cairo_move_to(cr, (bounds.width - title_extents.width) / 2.0, title_base_y + title_extents.height + (title_padded_height - title_extents.height) / 2); cairo_show_text(cr, title.c_str()); // render subtitle text cairo_set_font_size(cr, font_size_subtitle); cairo_move_to(cr, (bounds.width - subtitle_extents.width) / 2.0, title_base_y + ((title_padded_height - title_extents.height) / 2) + title_extents.height + intertitle_padding + subtitle_extents.height); cairo_show_text(cr, subtitle.c_str()); // render axis labels cairo_matrix_t unrotated_matrix; cairo_get_matrix(cr, &unrotated_matrix); cairo_text_extents_t axis_label_extents; cairo_set_font_size(cr, y_axis_font_size); cairo_text_extents(cr, y_label.c_str(), &axis_label_extents); double y_label_x = 0.0 + axis_label_extents.height; double y_label_centering_pad = ((content_bounds.height - axis_label_extents.width) / 2.0); double y_label_y = pad_top + y_label_centering_pad + axis_label_extents.width; cairo_move_to(cr, y_label_x, y_label_y); cairo_rotate(cr, -M_PI / 2.0); cairo_show_text(cr, y_label.c_str()); cairo_set_matrix(cr, &unrotated_matrix); // add y axis decoration // TODO not implemented for brevity cairo_set_font_size(cr, x_axis_font_size); cairo_text_extents(cr, x_label.c_str(), &axis_label_extents); double x_label_centering_pad = (content_bounds.width - axis_label_extents.width) / 2.0; double x_label_x = pad_left + x_label_centering_pad; double x_label_y = bounds.height; cairo_move_to(cr, x_label_x, x_label_y); cairo_show_text(cr, x_label.c_str()); // add x axis decoration if(x_axis_decoration == AXIS_SPAN_ARROW || x_axis_decoration == AXIS_SPAN_STOP) { double angle = span_arrow_angle; double line_width = x_axis_font_size * text_line_base_width; double tip_length = line_width * 10.0; if(x_axis_decoration == AXIS_SPAN_STOP) { angle = span_stop_angle; tip_length = line_width * 5.0; } double gap = line_width * 10.0; double x = x_label_x - gap; double y = x_label_y - axis_label_extents.height / 3.0; double pr_x, pr_y; // previous x and y positions // left of label cairo_move_to(cr, x, y); pr_x = x; pr_y = y; x = pr_x - (x_label_centering_pad - gap); y = pr_y; cairo_line_to(cr, x, y); pr_x = x; pr_y = y; x = pr_x + tip_length * sin(angle + M_PI / 2.0); y = pr_y + tip_length * cos(angle + M_PI / 2.0); cairo_line_to(cr, x, y); cairo_move_to(cr, pr_x, pr_y); x = pr_x + tip_length * sin(-angle + M_PI / 2.0); y = pr_y + tip_length * cos(-angle + M_PI / 2.0); cairo_line_to(cr, x, y); // right of label x = x_label_x + axis_label_extents.width + gap; y = x_label_y - axis_label_extents.height / 3.0; cairo_move_to(cr, x, y); pr_x = x; pr_y = y; x = pr_x + (x_label_centering_pad - gap); y = pr_y; cairo_line_to(cr, x, y); pr_x = x; pr_y = y; x = pr_x + tip_length * sin(angle - M_PI / 2.0); y = pr_y - tip_length * cos(angle - M_PI / 2.0); cairo_line_to(cr, x, y); cairo_move_to(cr, pr_x, pr_y); x = pr_x + tip_length * sin(-angle - M_PI / 2.0); y = pr_y - tip_length * cos(-angle - M_PI / 2.0); cairo_line_to(cr, x, y); cairo_set_source_rgb(cr, 0.0, 0.0, 0.0); cairo_set_line_width(cr, line_width); cairo_stroke(cr); } // render ticks double tick_length = bounds.width * tick_length_factor; double tick_width = bounds.height * tick_width_factor; // y ticks (packet counts) cairo_set_font_size(cr, y_tick_font_size); // translate down so the top of the window aligns with the top of // the graph itself cairo_translate(cr, 0, pad_top); double y_height = bounds.height - pad_bottom - pad_top; double y_tick_spacing = 0.0; if(y_tick_labels.size() > 1) { y_tick_spacing = y_height / (double) (y_tick_labels.size() - 1); } for(size_t ii = 0; ii < y_tick_labels.size(); ii++) { cairo_text_extents_t label_extents; double yy = y_height - (((double) ii) * y_tick_spacing); std::string label = y_tick_labels.at(ii); cairo_text_extents(cr, label.c_str(), &label_extents); cairo_move_to(cr, (pad_left - tick_length - label_extents.width), yy + (label_extents.height / 2)); cairo_show_text(cr, label.c_str()); // tick mark cairo_rectangle(cr, pad_left - tick_length, yy - (tick_width / 2), tick_length, tick_width); cairo_fill(cr); } // right ticks (packet counts) cairo_set_font_size(cr, right_tick_font_size); if(right_tick_labels.size() > 1) { y_tick_spacing = y_height / (double) (right_tick_labels.size() - 1); } for(size_t ii = 0; ii < right_tick_labels.size(); ii++) { cairo_text_extents_t label_extents; double yy = y_height - (((double) ii) * y_tick_spacing); std::string label = right_tick_labels.at(ii); cairo_text_extents(cr, label.c_str(), &label_extents); cairo_move_to(cr, (bounds.width - pad_right + tick_length), yy + (label_extents.height / 2)); cairo_show_text(cr, label.c_str()); // tick mark cairo_rectangle(cr, bounds.width - pad_right, yy - (tick_width / 2), tick_length, tick_width); cairo_fill(cr); } cairo_set_matrix(cr, &original_matrix); cairo_translate(cr, bounds.x, bounds.y); // x ticks (time) // TODO prevent overlap cairo_set_font_size(cr, x_tick_font_size); cairo_translate(cr, pad_left, bounds.height - pad_bottom); double x_width = bounds.width - (pad_right + pad_left); double x_tick_spacing = x_width / (x_tick_labels.size() - 1); for(size_t ii = 0; ii < x_tick_labels.size(); ii++) { cairo_text_extents_t label_extents; double xx = ii * x_tick_spacing; const char *label = x_tick_labels.at(ii).c_str(); cairo_text_extents(cr, label, &label_extents); double pad = ((label_extents.height * x_tick_label_pad_factor) - label_extents.height) / 2; // prevent labels from running off the edge of the image double label_x = xx - (label_extents.width / 2.0); label_x = std::max(label_x, - pad_left); label_x = std::min(bounds.width - label_extents.width, label_x); cairo_move_to(cr, label_x, label_extents.height + pad); cairo_show_text(cr, label); } cairo_set_matrix(cr, &original_matrix); cairo_translate(cr, bounds.x, bounds.y); // render legend cairo_text_extents_t legend_label_extents; double chip_length = 0.0; // derive color chip size from largest label height for(size_t ii = 0; ii < legend.size(); ii++) { const legend_entry_t &entry = legend.at(ii); cairo_text_extents(cr, entry.label.c_str(), &legend_label_extents); chip_length = std::max(chip_length, legend_label_extents.height); } chip_length *= legend_chip_factor; cairo_translate(cr, bounds.width - (pad_right * 0.9), pad_top); cairo_set_font_size(cr, legend_font_size); for(size_t ii = 0; ii < legend.size(); ii++) { const legend_entry_t &entry = legend.at(ii); // chip cairo_set_source_rgb(cr, entry.color.r, entry.color.g, entry.color.b); cairo_rectangle(cr, 0, 0, chip_length, chip_length); cairo_fill(cr); // label cairo_set_source_rgb(cr, 0, 0, 0); cairo_text_extents(cr, entry.label.c_str(), &legend_label_extents); cairo_move_to(cr, chip_length * 1.2, (chip_length / 2.0) + (legend_label_extents.height / 2.0)); cairo_show_text(cr, entry.label.c_str()); // translate down for the next legend entry cairo_translate(cr, 0, chip_length); } cairo_set_source_rgb(cr, 0, 0, 0); cairo_set_matrix(cr, &original_matrix); // render axes and update content bounds double axis_width = bounds.height * axis_thickness_factor; cairo_rectangle(cr, content_bounds.x, content_bounds.y, axis_width, content_bounds.height); cairo_rectangle(cr, content_bounds.x, content_bounds.y + (content_bounds.height - axis_width), content_bounds.width, axis_width); // if there are right hand ticks, draw a right-hand axis if(right_tick_labels.size() > 0) { cairo_rectangle(cr, content_bounds.x + content_bounds.width - axis_width, content_bounds.y, axis_width, content_bounds.height); } cairo_fill(cr); content_bounds.x += axis_width; content_bounds.width -= axis_width; if(right_tick_labels.size() > 0) { content_bounds.width -= axis_width; } content_bounds.height -= axis_width; // render data! render_data(cr, content_bounds); } std::string plot_view::pretty_byte_total(uint64_t byte_count, uint8_t precision) { //// packet count/size uint64_t size_log_1000 = (uint64_t) (log(byte_count) / log(1000)); if(size_log_1000 >= size_suffixes.size()) { size_log_1000 = 0; } // only put decimal places if using a unit less granular than the byte (2.00 bytes looks silly) if(size_log_1000 == 0) { precision = 0; } return ssprintf("%.*f %sB", precision, (double) byte_count / pow(1000.0, (double) size_log_1000), size_suffixes.at(size_log_1000).c_str()); } std::string plot_view::pretty_byte_total(uint64_t byte_count) { return pretty_byte_total(byte_count, 2); } std::vector plot_view::build_size_suffixes() { std::vector v; v.push_back(""); v.push_back("K"); v.push_back("M"); v.push_back("G"); v.push_back("T"); v.push_back("P"); v.push_back("E"); return v; } #endif tcpflow/src/netviz/address_histogram.cpp0000644000175000017500000000447512263701151017466 0ustar dimadima/** * address_histogram.cpp: * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #include "config.h" #ifdef HAVE_LIBCAIRO #include "tcpflow.h" #include "tcpip.h" #include #include #include #include "address_histogram.h" using namespace std; address_histogram::address_histogram(const iptree &tree) : buckets(), datagrams_ingested(0) { // convert iptree to suitable vector for count histogram iptree::histogram_t addresses; tree.get_histogram(addresses); if(addresses.size() <= bucket_count) { sort(addresses.begin(), addresses.end(), iptree_node_comparator()); } else { partial_sort(addresses.begin(), addresses.begin() + bucket_count, addresses.end(), iptree_node_comparator()); } buckets.clear(); vector::const_iterator it = addresses.begin(); for(size_t ii = 0; ii < bucket_count && it != addresses.end(); ii++, it++) { buckets.push_back(*it); } datagrams_ingested = tree.sum(); } const size_t address_histogram::bucket_count = 10; const iptree::addr_elem &address_histogram::at(size_t index) const { return buckets.at(index); } size_t address_histogram::size() const { return buckets.size(); } uint64_t address_histogram::ingest_count() const { return datagrams_ingested; } address_histogram::ipt_addrs::const_iterator address_histogram::begin() const { return buckets.begin(); } address_histogram::ipt_addrs::const_iterator address_histogram::end() const { return buckets.end(); } address_histogram::ipt_addrs::const_reverse_iterator address_histogram::rbegin() const { return buckets.rbegin(); } address_histogram::ipt_addrs::const_reverse_iterator address_histogram::rend() const { return buckets.rend(); } bool address_histogram::iptree_node_comparator::operator()(const iptree::addr_elem &a, const iptree::addr_elem &b) { if(a.count > b.count) { return true; } else if(a.count < b.count) { return false; } for(size_t ii = 0; ii < sizeof(a.addr); ii++) { if(a.addr[ii] > b.addr[ii]) { return true; } else if(a.addr[ii] < b.addr[ii]) { return false; } } return false; } #endif tcpflow/src/netviz/address_histogram_view.cpp0000644000175000017500000001356212263701151020515 0ustar dimadima/** * address_histogram_view.cpp: * Show packets received vs addr * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #include "config.h" #ifdef HAVE_LIBCAIRO #include "tcpflow.h" #include #include #include "address_histogram_view.h" using namespace std; address_histogram_view::address_histogram_view(const address_histogram &histogram_) : histogram(histogram_), bar_color(0.0, 0.0, 0.0), cdf_color(0.0, 0.0, 0.0) { subtitle = ""; title_on_bottom = true; pad_left_factor = 0.1; pad_right_factor = 0.1; pad_top_factor = 0.5; x_label = ""; y_label = ""; y_tick_font_size = 6.0; right_tick_font_size = 6.0; } const double address_histogram_view::bar_space_factor = 1.2; const size_t address_histogram_view::compressed_ip6_str_max_len = 16; const double address_histogram_view::cdf_line_width = 0.5; const double address_histogram_view::data_width_factor = 0.85; void address_histogram_view::render(cairo_t *cr, const bounds_t &bounds) { y_tick_labels.push_back(plot_view::pretty_byte_total(0)); if(histogram.size() > 0) { y_tick_labels.push_back(plot_view::pretty_byte_total(histogram.at(0).count, 0)); } right_tick_labels.push_back("0%"); right_tick_labels.push_back("100%"); plot_view::render(cr, bounds); } void address_histogram_view::render_data(cairo_t *cr, const bounds_t &bounds) { if(histogram.size() < 1 || histogram.at(0).count == 0) { return; } double data_width = bounds.width * data_width_factor; double data_offset = 0; bounds_t data_bounds(bounds.x + data_offset, bounds.y, data_width, bounds.height); double offset_unit = data_bounds.width / histogram.size(); double bar_width = offset_unit / bar_space_factor; double space_width = (offset_unit - bar_width) / 2.0; uint64_t greatest = histogram.at(0).count; unsigned int index = 0; double cdf_last_x = bounds.x, cdf_last_y = bounds.y + data_bounds.height; for(address_histogram::ipt_addrs::const_iterator it = histogram.begin(); it != histogram.end(); it++) { double bar_height = (((double) it->count) / ((double) greatest)) * data_bounds.height; // bar double bar_x = data_bounds.x + (index * offset_unit + space_width); double bar_y = data_bounds.y + (data_bounds.height - bar_height); bounds_t bar_bounds(bar_x, bar_y, bar_width, bar_height); bucket_view view(*it, bar_color); view.render(cr, bar_bounds); // CDF double cdf_x = cdf_last_x + offset_unit; // account for left and right padding of bars if(index == 0) { cdf_x += data_offset; } if(index == histogram.size() - 1) { cdf_x = bounds.x + bounds.width; } double cdf_y = cdf_last_y - ((double) it->count / (double) histogram.ingest_count()) * data_bounds.height; cairo_move_to(cr, cdf_last_x, cdf_last_y); // don't draw over the left-hand y axis if(index == 0) { cairo_move_to(cr, cdf_last_x, cdf_y); } else { cairo_line_to(cr, cdf_last_x, cdf_y); } cairo_line_to(cr, cdf_x, cdf_y); cairo_set_source_rgb(cr, cdf_color.r, cdf_color.g, cdf_color.b); cairo_set_line_width(cr, cdf_line_width); cairo_stroke(cr); cdf_last_x = cdf_x; cdf_last_y = cdf_y; index++; } index = 0; // labels must be done after the fact to avoid awkward interaction with the CDF for(address_histogram::ipt_addrs::const_iterator it = histogram.begin(); it != histogram.end(); it++) { double bar_height = (((double) it->count) / ((double) greatest)) * data_bounds.height; double bar_x = data_bounds.x + (index * offset_unit + space_width); double bar_y = data_bounds.y + (data_bounds.height - bar_height); bounds_t bar_bounds(bar_x, bar_y, bar_width, bar_height); bucket_view view(*it, bar_color); view.render_label(cr, bar_bounds); index++; } } const address_histogram &address_histogram_view::get_data() const { return histogram; } string address_histogram_view::compressed_ip6_str(iptree::addr_elem address) { return ssprintf("%x:%x...%x", (address.addr[0] << 8) + address.addr[1], (address.addr[2] << 8) + address.addr[3], (address.addr[14] << 8) + address.addr[15]); } // bucket view const double address_histogram_view::bucket_view::label_font_size = 6.0; void address_histogram_view::bucket_view::render(cairo_t *cr, const bounds_t &bounds) { cairo_set_source_rgb(cr, color.r, color.g, color.b); cairo_rectangle(cr, bounds.x, bounds.y, bounds.width, bounds.height); cairo_fill(cr); } void address_histogram_view::bucket_view::render_label(cairo_t *cr, const bounds_t &bounds) { cairo_matrix_t unrotated_matrix; cairo_get_matrix(cr, &unrotated_matrix); cairo_rotate(cr, -M_PI / 4.0); string label = bucket.str(); if(!bucket.is4() && label.length() > compressed_ip6_str_max_len) { label = compressed_ip6_str(bucket); } cairo_set_font_size(cr, label_font_size); cairo_text_extents_t label_extents; cairo_text_extents(cr, label.c_str(), &label_extents); double label_x = bounds.x + bounds.width / 2.0; double label_y = bounds.y - 2.0; cairo_device_to_user(cr, &label_x, &label_y); cairo_set_source_rgb(cr, 1.0, 1.0, 1.0); cairo_rectangle(cr, label_x, label_y, label_extents.width, -label_extents.height); cairo_fill(cr); cairo_rectangle(cr, label_x, label_y, label_extents.width, -label_extents.height); cairo_set_line_width(cr, 2.0); cairo_stroke(cr); cairo_move_to(cr, label_x, label_y); cairo_set_source_rgb(cr, 0.0, 0.0, 0.0); cairo_show_text(cr, label.c_str()); cairo_set_matrix(cr, &unrotated_matrix); } #endif tcpflow/src/netviz/port_histogram_view.h0000644000175000017500000000311112263701151017506 0ustar dimadima/** * port_histogram_view.h: * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #ifndef PORT_HISTOGRAM_VIEW_H #define PORT_HISTOGRAM_VIEW_H #include "config.h" #ifdef HAVE_LIBCAIRO #include "plot_view.h" #include "port_histogram.h" class port_histogram_view : public plot_view { public: port_histogram_view(port_histogram &histogram_, const std::map &color_map_, const rgb_t &default_color, const rgb_t &cdf_color_); class bucket_view { public: bucket_view(const port_histogram::port_count &bucket_, const rgb_t &color_) : bucket(bucket_), color(color_), chip_height(0.0), chip_offset(0.0) {} const port_histogram::port_count &bucket; const rgb_t &color; double chip_height; double chip_offset; static const double label_font_size; static const double chip_width_factor; void render(cairo_t *cr, const bounds_t &bounds); void render_label(cairo_t *cr, const bounds_t &bounds); }; port_histogram &histogram; const std::map &color_map; const rgb_t &default_color; const rgb_t &cdf_color; static const double bar_space_factor; static const double bar_chip_size_factor; static const double cdf_line_width; static const double data_width_factor; void render(cairo_t *cr, const bounds_t &bounds); void render_data(cairo_t *cr, const bounds_t &bounds); port_histogram &get_data(); }; #endif #endif tcpflow/src/netviz/net_map.h0000644000175000017500000000065312263701151015046 0ustar dimadima/** * net_map.h: * Show map of network traffic by host * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #ifndef NET_MAP_H #define NET_MAP_H #include "plot_view.h" class net_map { public: net_map() {} void ingest_packet(const be13::packet_info &pi); void render(cairo_t *cr, const plot_view::bounds_t &bounds); }; #endif tcpflow/src/netviz/port_histogram.cpp0000644000175000017500000000443312263701151017017 0ustar dimadima/** * port_histogram.cpp: * Show packets received vs port * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #include "config.h" #ifdef HAVE_LIBCAIRO #include "tcpflow.h" #include "port_histogram.h" #include #include using namespace std; const size_t port_histogram::bucket_count = 10; bool port_histogram::descending_counts::operator()(const port_count &a, const port_count &b) { if(a.count > b.count) { return true; } if(a.count < b.count) { return false; } return a.port < b.port; } void port_histogram::increment(uint16_t port, uint64_t delta) { port_counts[port] += delta; data_bytes_ingested += delta; buckets_dirty = true; } const port_histogram::port_count &port_histogram::at(size_t index) { refresh_buckets(); return buckets.at(index); } size_t port_histogram::size() { refresh_buckets(); return buckets.size(); } uint64_t port_histogram::ingest_count() const { return data_bytes_ingested; } port_histogram::port_count_vector::const_iterator port_histogram::begin() { refresh_buckets(); return buckets.begin(); } port_histogram::port_count_vector::const_iterator port_histogram::end() { refresh_buckets(); return buckets.end(); } port_histogram::port_count_vector::const_reverse_iterator port_histogram::rbegin() { refresh_buckets(); return buckets.rbegin(); } port_histogram::port_count_vector::const_reverse_iterator port_histogram::rend() { refresh_buckets(); return buckets.rend(); } void port_histogram::refresh_buckets() { if(!buckets_dirty) { return; } buckets.clear(); for(port_counts_t::const_iterator it = port_counts.begin(); it != port_counts.end(); it++) { buckets.push_back(port_count(it->first, it->second)); } if(buckets.size() <= bucket_count) { sort(buckets.begin(), buckets.end(), descending_counts()); } else { partial_sort(buckets.begin(), buckets.begin() + bucket_count, buckets.end(), descending_counts()); } if(buckets.size() > bucket_count) { buckets.erase(buckets.begin() + bucket_count, buckets.end()); } buckets_dirty = false; } #endif tcpflow/src/netviz/packetfall.cpp0000644000175000017500000000202412263701151016056 0ustar dimadima/** * packetfall.cpp: * Show packets received vs port * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #include "config.h" #ifdef HAVE_LIBCAIRO #include "tcpflow.h" #include "packetfall.h" void packetfall::ingest_packet(const be13::packet_info &pi) { } void packetfall::render(cairo_t *cr, const plot_view::bounds_t &bounds) { cairo_set_source_rgb(cr, 0.67, 0.67, 0.67); cairo_rectangle(cr, bounds.x, bounds.y, bounds.width, bounds.height); cairo_fill(cr); double font_size = 16.0; std::string label = "pretty packetfall"; cairo_text_extents_t extents; cairo_set_font_size(cr, font_size); cairo_set_source_rgb(cr, 0.0, 0.0, 0.0); cairo_text_extents(cr, label.c_str(), &extents); double text_x = bounds.x + (bounds.width - extents.width) / 2.0; double text_y = bounds.y + (bounds.height + extents.height) / 2.0; cairo_move_to(cr, text_x, text_y); cairo_show_text(cr, label.c_str()); } #endif tcpflow/src/netviz/one_page_report.cpp0000644000175000017500000005460112263701151017130 0ustar dimadima/** * one_page_report.cpp: * Generate a one-page visualization from TCP packets * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #include "config.h" #include "be13_api/utils.h" #include "plot_view.h" #ifdef HAVE_LIBCAIRO #include "tcpflow.h" #include "tcpip.h" #include #include #include #include "one_page_report.h" using namespace std; const unsigned int one_page_report::max_bars = 100; const unsigned int one_page_report::port_colors_count = 4; // string constants const string one_page_report::title_version = PACKAGE_NAME " " PACKAGE_VERSION; const string one_page_report::generic_legend_format = "Port %d"; const vector one_page_report::display_transports = one_page_report::build_display_transports(); // ratio constants const double one_page_report::page_margin_factor = 0.05; const double one_page_report::line_space_factor = 0.25; const double one_page_report::histogram_pad_factor_y = 1.1; const double one_page_report::address_histogram_width_divisor = 2.2; // size constants const double one_page_report::packet_histogram_height = 100.0; const double one_page_report::address_histogram_height = 125.0; const double one_page_report::port_histogram_height = 100.0; const double one_page_report::legend_height = 16.0; // color constants const plot_view::rgb_t one_page_report::default_color(0.67, 0.67, 0.67); const plot_view::rgb_t one_page_report::color_orange(1.00, 0.47, 0.00); const plot_view::rgb_t one_page_report::color_red(1.00, 0.00, 0.00); const plot_view::rgb_t one_page_report::color_magenta(0.75, 0.00, 0.60); const plot_view::rgb_t one_page_report::color_purple(0.58, 0.00, 0.75); const plot_view::rgb_t one_page_report::color_deep_purple(0.40, 0.00, 0.75); const plot_view::rgb_t one_page_report::color_blue(0.02, 0.00, 1.00); const plot_view::rgb_t one_page_report::color_teal(0.00, 0.75, 0.65); const plot_view::rgb_t one_page_report::color_green(0.02, 0.75, 0.00); const plot_view::rgb_t one_page_report::color_yellow(0.99, 1.00, 0.00); const plot_view::rgb_t one_page_report::color_light_orange(1.00, 0.73, 0.00); const plot_view::rgb_t one_page_report::cdf_color(0.00, 0.00, 0.00); one_page_report::one_page_report(int max_histogram_size) : source_identifier(), filename("report.pdf"), bounds(0.0, 0.0, 611.0, 792.0), header_font_size(8.0), top_list_font_size(8.0), histogram_show_top_n_text(3), packet_count(0), byte_count(0), earliest(), latest(), transport_counts(), ports_in_time_histogram(), color_labels(), packet_histogram(), src_port_histogram(), dst_port_histogram(), pfall(), netmap(), src_tree(max_histogram_size), dst_tree(max_histogram_size), port_aliases(), port_colormap() { earliest = (struct timeval) { 0 }; latest = (struct timeval) { 0 }; port_colormap[PORT_HTTP] = color_blue; port_colormap[PORT_HTTP_ALT_0] = color_blue; port_colormap[PORT_HTTP_ALT_1] = color_blue; port_colormap[PORT_HTTP_ALT_2] = color_blue; port_colormap[PORT_HTTP_ALT_3] = color_blue; port_colormap[PORT_HTTP_ALT_4] = color_blue; port_colormap[PORT_HTTP_ALT_5] = color_blue; port_colormap[PORT_HTTPS] = color_green; port_colormap[PORT_SSH] = color_purple; port_colormap[PORT_FTP_CONTROL] = color_red; port_colormap[PORT_FTP_DATA] = color_red; // build null alias map to avoid requiring special handling for unmapped ports for(int ii = 0; ii <= 65535; ii++) { port_aliases[ii] = ii; } } void one_page_report::ingest_packet(const be13::packet_info &pi) { if(earliest.tv_sec == 0 || (pi.ts.tv_sec < earliest.tv_sec || (pi.ts.tv_sec == earliest.tv_sec && pi.ts.tv_usec < earliest.tv_usec))) { earliest = pi.ts; } if(pi.ts.tv_sec > latest.tv_sec || (pi.ts.tv_sec == latest.tv_sec && pi.ts.tv_usec > latest.tv_usec)) { latest = pi.ts; } size_t packet_length = pi.pcap_hdr->len; packet_count++; byte_count += packet_length; transport_counts[pi.ether_type()] += packet_length; // should we handle VLANs? // break out TCP/IP info and feed child views // feed IP-only views uint8_t ip_ver = 0; if(pi.is_ip4()) { ip_ver = 4; src_tree.add((uint8_t *) pi.ip_data + pi.ip4_src_off, IP4_ADDR_LEN, packet_length); dst_tree.add((uint8_t *) pi.ip_data + pi.ip4_dst_off, IP4_ADDR_LEN, packet_length); } else if(pi.is_ip6()) { ip_ver = 6; src_tree.add((uint8_t *) pi.ip_data + pi.ip6_src_off, IP6_ADDR_LEN, packet_length); dst_tree.add((uint8_t *) pi.ip_data + pi.ip6_dst_off, IP6_ADDR_LEN, packet_length); } else { packet_histogram.insert(pi.ts, 0, packet_length, time_histogram::F_NON_TCP); return; } // feed TCP views uint16_t tcp_src = 0, tcp_dst = 0; bool has_tcp = false; switch(ip_ver) { case 4: if(!pi.is_ip4_tcp()) { break; } tcp_src = pi.get_ip4_tcp_sport(); tcp_dst = pi.get_ip4_tcp_dport(); has_tcp = true; break; case 6: if(!pi.is_ip6_tcp()) { break; } tcp_src = pi.get_ip6_tcp_sport(); tcp_dst = pi.get_ip6_tcp_dport(); has_tcp = true; break; default: return; } if(!has_tcp) { packet_histogram.insert(pi.ts, 0, packet_length, time_histogram::F_NON_TCP); return; } // if either the TCP source or destination is a pre-colored port, submit that // port to the time histogram port_colormap_t::const_iterator tcp_src_color = port_colormap.find(tcp_src); port_colormap_t::const_iterator tcp_dst_color = port_colormap.find(tcp_dst); in_port_t packet_histogram_port = tcp_src; // if dst is colored and src isn't; use dst instead if(tcp_dst_color != port_colormap.end() && tcp_src_color == port_colormap.end()) { packet_histogram_port = tcp_dst; } // if both are colored, alternate src and dst else if(tcp_src_color != port_colormap.end() && tcp_dst_color != port_colormap.end() && packet_count % 2 == 0) { packet_histogram_port = tcp_dst; } // record that this port appears in the histogram for legend building purposes ports_in_time_histogram[packet_histogram_port] = true; packet_histogram.insert(pi.ts, packet_histogram_port, packet_length); src_port_histogram.increment(tcp_src, packet_length); dst_port_histogram.increment(tcp_dst, packet_length); } void one_page_report::render(const string &outdir) { string fname = outdir + "/" + filename; cairo_surface_t *surface = cairo_pdf_surface_create(fname.c_str(), bounds.width, bounds.height); cairo_t *cr = cairo_create(surface); // // Configure views // double pad_size = bounds.width * page_margin_factor; plot_view::bounds_t pad_bounds(bounds.x + pad_size, bounds.y + pad_size, bounds.width - pad_size * 2, bounds.height - pad_size * 2); // iff a colored common port appears in the time histogram, add its color to the legend if(ports_in_time_histogram[PORT_HTTP] || ports_in_time_histogram[PORT_HTTP_ALT_0] || ports_in_time_histogram[PORT_HTTP_ALT_1] || ports_in_time_histogram[PORT_HTTP_ALT_2] || ports_in_time_histogram[PORT_HTTP_ALT_3] || ports_in_time_histogram[PORT_HTTP_ALT_4] || ports_in_time_histogram[PORT_HTTP_ALT_5]) { color_labels.push_back(legend_view::entry_t(color_blue, "HTTP", PORT_HTTP)); } if(ports_in_time_histogram[PORT_HTTPS]) { color_labels.push_back(legend_view::entry_t(color_green, "HTTPS", PORT_HTTPS)); } if(ports_in_time_histogram[PORT_SSH]) { color_labels.push_back(legend_view::entry_t(color_purple, "SSH", PORT_SSH)); } if(ports_in_time_histogram[PORT_FTP_DATA] || ports_in_time_histogram[PORT_FTP_CONTROL]) { color_labels.push_back(legend_view::entry_t(color_red, "FTP", PORT_FTP_DATA)); } // assign the top 4 source ports colors if they don't already have them vector::const_iterator it = src_port_histogram.begin(); for(size_t count = 0; count < port_colors_count && it != src_port_histogram.end(); it++) { port_colormap_t::const_iterator color = port_colormap.find(it->port); if(color == port_colormap.end()) { string label = ssprintf(generic_legend_format.c_str(), it->port); switch(count) { case 0: if(ports_in_time_histogram[it->port]) { color_labels.push_back(legend_view::entry_t(color_orange, label, it->port)); } port_colormap[it->port] = color_orange; break; case 1: if(ports_in_time_histogram[it->port]) { color_labels.push_back(legend_view::entry_t(color_magenta, label, it->port)); } port_colormap[it->port] = color_magenta; break; case 2: if(ports_in_time_histogram[it->port]) { color_labels.push_back(legend_view::entry_t(color_deep_purple, label, it->port)); } port_colormap[it->port] = color_deep_purple; break; case 3: if(ports_in_time_histogram[it->port]) { color_labels.push_back(legend_view::entry_t(color_teal, label, it->port)); } port_colormap[it->port] = color_teal; break; default: break; } count++; } } sort(color_labels.begin(), color_labels.end()); // time histogram double condension_factor = (double) packet_histogram.non_sparse_size() / (double) max_bars; if(condension_factor > 1.1) { // condense only by whole numbers to avoid messing up bar labels packet_histogram.condense(((int) condension_factor) + 1); } time_histogram_view th_view(packet_histogram, port_colormap, default_color, cdf_color); // color legend legend_view lg_view(color_labels); // address histograms // histograms are built from iptree here address_histogram src_addr_histogram(src_tree); address_histogram dst_addr_histogram(dst_tree); address_histogram_view src_ah_view(src_addr_histogram); if(src_addr_histogram.size() > 0) { src_ah_view.title = "Top Source Addresses"; } else { src_ah_view.title = "No Source Addresses"; } src_ah_view.bar_color = default_color; src_ah_view.cdf_color = cdf_color; address_histogram_view dst_ah_view(dst_addr_histogram); if(dst_addr_histogram.size() > 0) { dst_ah_view.title = "Top Destination Addresses"; } else { dst_ah_view.title = "No Destination Addresses"; } dst_ah_view.bar_color = default_color; dst_ah_view.cdf_color = cdf_color; // port histograms port_histogram_view sp_view(src_port_histogram, port_colormap, default_color, cdf_color); port_histogram_view dp_view(dst_port_histogram, port_colormap, default_color, cdf_color); if(src_port_histogram.size()) { sp_view.title = "Top Source Ports"; } else { sp_view.title = "No Source Ports"; } if(dst_port_histogram.size()) { dp_view.title = "Top Destination Ports"; } else { dp_view.title = "No Destination Ports"; } // // run configured views through render pass // render_pass pass(*this, cr, pad_bounds); pass.render_header(); pass.render(th_view); pass.render(lg_view); if(getenv("DEBUG")) { pass.render_map(); pass.render_packetfall(); } pass.render(src_ah_view, dst_ah_view); pass.render(sp_view, dp_view); // cleanup cairo_destroy (cr); cairo_surface_destroy(surface); } void one_page_report::render_pass::render_header() { string formatted; // title double title_line_space = report.header_font_size * line_space_factor; //// version render_text_line(title_version, report.header_font_size, title_line_space); //// input formatted = ssprintf("Input: %s", report.source_identifier.c_str()); render_text_line(formatted.c_str(), report.header_font_size, title_line_space); //// date generated time_t gen_unix = time(0); struct tm gen_time = *localtime(&gen_unix); formatted = ssprintf("Generated: %04d-%02d-%02d %02d:%02d:%02d", 1900 + gen_time.tm_year, 1 + gen_time.tm_mon, gen_time.tm_mday, gen_time.tm_hour, gen_time.tm_min, gen_time.tm_sec); render_text_line(formatted.c_str(), report.header_font_size, title_line_space); //// trailing pad end_of_content += title_line_space * 4; // quick stats //// date range time_t tstart = report.earliest.tv_sec; struct tm start; localtime_r(&tstart,&start); time_t tstop = report.latest.tv_sec; struct tm stop; localtime_r(&tstop,&stop); formatted = ssprintf("Date range: %04d-%02d-%02d %02d:%02d:%02d -- %04d-%02d-%02d %02d:%02d:%02d", 1900 + start.tm_year, 1 + start.tm_mon, start.tm_mday, start.tm_hour, start.tm_min, start.tm_sec, 1900 + stop.tm_year, 1 + stop.tm_mon, stop.tm_mday, stop.tm_hour, stop.tm_min, stop.tm_sec); render_text_line(formatted.c_str(), report.header_font_size, title_line_space); //// packet count/size formatted = ssprintf("Packets analyzed: %s (%s)", comma_number_string(report.packet_count).c_str(), plot_view::pretty_byte_total(report.byte_count).c_str()); render_text_line(formatted.c_str(), report.header_font_size, title_line_space); //// protocol breakdown uint64_t transport_total = 0; for(map::const_iterator ii = report.transport_counts.begin(); ii != report.transport_counts.end(); ii++) { transport_total += ii->second; } stringstream ss; unsigned int percentage = 0; uint64_t classified_total = 0; ss << "Transports: "; if(transport_total > 0) { for(vector::const_iterator it = display_transports.begin(); it != display_transports.end(); it++) { uint64_t count = report.transport_counts[it->ethertype]; classified_total += count; percentage = (unsigned int) (((double) count / (double) transport_total) * 100.0); if(percentage > 0) { ss << it->name << " " << percentage << "% "; } } percentage = (unsigned int) (((double) (transport_total - classified_total) / transport_total) * 100.0); if(percentage > 0) { ss << "Other " << percentage << "% "; } } formatted = ss.str(); render_text_line(formatted.c_str(), report.header_font_size, title_line_space); // trailing pad for entire header end_of_content += title_line_space * 4; } void one_page_report::render_pass::render_text(string text, double font_size, double x_offset, cairo_text_extents_t &rendered_extents) { cairo_set_font_size(surface, font_size); cairo_set_source_rgb(surface, 0.0, 0.0, 0.0); cairo_text_extents(surface, text.c_str(), &rendered_extents); cairo_move_to(surface, surface_bounds.x + x_offset, surface_bounds.y + end_of_content + rendered_extents.height); cairo_show_text(surface, text.c_str()); } void one_page_report::render_pass::render_text_line(string text, double font_size, double line_space) { cairo_text_extents_t extents; render_text(text, font_size, 0.0, extents); end_of_content += extents.height + line_space; } void one_page_report::render_pass::render(time_histogram_view &view) { plot_view::bounds_t bnds(surface_bounds.x, surface_bounds.y + end_of_content, surface_bounds.width, packet_histogram_height); view.render(surface, bnds); end_of_content += bnds.height * histogram_pad_factor_y; } void one_page_report::render_pass::render_packetfall() { plot_view::bounds_t bnds(surface_bounds.x, surface_bounds.y + end_of_content, surface_bounds.width, packet_histogram_height); report.pfall.render(surface, bnds); end_of_content += bnds.height * histogram_pad_factor_y; } void one_page_report::render_pass::render_map() { plot_view::bounds_t bnds(surface_bounds.x, surface_bounds.y + end_of_content, surface_bounds.width, packet_histogram_height); report.netmap.render(surface, bnds); end_of_content += bnds.height * histogram_pad_factor_y; } void one_page_report::render_pass::render(address_histogram_view &left, address_histogram_view &right) { double width = surface_bounds.width / address_histogram_width_divisor; const address_histogram &left_data = left.get_data(); const address_histogram &right_data = right.get_data(); uint64_t total_datagrams = left_data.ingest_count(); plot_view::bounds_t left_bounds(surface_bounds.x, surface_bounds.y + end_of_content, width, address_histogram_height); left.render(surface, left_bounds); plot_view::bounds_t right_bounds(surface_bounds.x + (surface_bounds.width - width), surface_bounds.y + end_of_content, width, address_histogram_height); right.render(surface, right_bounds); end_of_content += max(left_bounds.height, right_bounds.height); // text stats string stat_line_format = "%d) %s - %s (%d%%)"; for(size_t ii = 0; ii < report.histogram_show_top_n_text; ii++) { cairo_text_extents_t left_extents, right_extents; if(left_data.size() > ii && left_data.at(ii).count > 0) { const iptree::addr_elem &addr = left_data.at(ii); uint8_t percentage = 0; percentage = (uint8_t) (((double) addr.count / (double) total_datagrams) * 100.0); string str = ssprintf(stat_line_format.c_str(), ii + 1, addr.str().c_str(), plot_view::pretty_byte_total(addr.count).c_str(), percentage); render_text(str.c_str(), report.top_list_font_size, left_bounds.x, left_extents); } if(right_data.size() > ii && right_data.at(ii).count > 0) { const iptree::addr_elem &addr = right_data.at(ii); uint8_t percentage = 0; percentage = (uint8_t) (((double) addr.count / (double) total_datagrams) * 100.0); string str = ssprintf(stat_line_format.c_str(), ii + 1, addr.str().c_str(), plot_view::pretty_byte_total(addr.count).c_str(), percentage); render_text(str.c_str(), report.top_list_font_size, right_bounds.x, right_extents); } if((left_data.size() > ii && left_data.at(ii).count > 0) || (right_data.size() > ii && right_data.at(ii).count > 0)) { end_of_content += max(left_extents.height, right_extents.height) * 1.5; } } end_of_content += max(left_bounds.height, right_bounds.height) * (histogram_pad_factor_y - 1.0); } void one_page_report::render_pass::render(port_histogram_view &left, port_histogram_view &right) { port_histogram &left_data = left.get_data(); port_histogram &right_data = right.get_data(); uint64_t total_bytes = left_data.ingest_count(); double width = surface_bounds.width / address_histogram_width_divisor; plot_view::bounds_t left_bounds(surface_bounds.x, surface_bounds.y + end_of_content, width, port_histogram_height); left.render(surface, left_bounds); plot_view::bounds_t right_bounds(surface_bounds.x + (surface_bounds.width - width), surface_bounds.y + end_of_content, width, port_histogram_height); right.render(surface, right_bounds); end_of_content += max(left_bounds.height, right_bounds.height); // text stats string stat_line_format = "%d) %d - %s (%d%%)"; for(size_t ii = 0; ii < report.histogram_show_top_n_text; ii++) { cairo_text_extents_t left_extents, right_extents; if(left_data.size() > ii && left_data.at(ii).count > 0) { port_histogram::port_count port = left_data.at(ii); uint8_t percentage = 0; percentage = (uint8_t) (((double) port.count / (double) total_bytes) * 100.0); string str = ssprintf(stat_line_format.c_str(), ii + 1, port.port, plot_view::pretty_byte_total(port.count).c_str(), percentage); render_text(str.c_str(), report.top_list_font_size, left_bounds.x, left_extents); } if(right_data.size() > ii && right_data.at(ii).count > 0) { port_histogram::port_count port = right_data.at(ii); uint8_t percentage = 0; percentage = (uint8_t) (((double) port.count / (double) total_bytes) * 100.0); string str = ssprintf(stat_line_format.c_str(), ii + 1, port.port, plot_view::pretty_byte_total(port.count).c_str(), percentage); render_text(str.c_str(), report.top_list_font_size, right_bounds.x, right_extents); } if((left_data.size() > ii && left_data.at(ii).count > 0) || (right_data.size() > ii && right_data.at(ii).count > 0)) { end_of_content += max(left_extents.height, right_extents.height) * 1.5; } } end_of_content += max(left_bounds.height, right_bounds.height) * (histogram_pad_factor_y - 1.0); } void one_page_report::render_pass::render(const legend_view &view) { plot_view::bounds_t view_bounds(surface_bounds.x, surface_bounds.y + end_of_content, surface_bounds.width, legend_height); view.render(surface, view_bounds); end_of_content += legend_height; } vector one_page_report::build_display_transports() { vector v; v.push_back(transport_type(ETHERTYPE_IP, "IPv4")); v.push_back(transport_type(ETHERTYPE_IPV6, "IPv6")); v.push_back(transport_type(ETHERTYPE_ARP, "ARP")); v.push_back(transport_type(ETHERTYPE_VLAN, "VLAN")); return v; } void one_page_report::dump(int dbg) { if(dbg){ std::cout << "src_tree:\n" << src_tree << "\n" << "dst_tree:\n" << dst_tree << "\n"; } } #endif tcpflow/src/netviz/one_page_report.h0000644000175000017500000001060612263701151016572 0ustar dimadima/** * one_page_report.h: * Show map of network traffic by host * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #ifndef ONE_PAGE_REPORT_H #define ONE_PAGE_REPORT_H #include "plot_view.h" #include "time_histogram.h" #include "time_histogram_view.h" #include "address_histogram.h" #include "address_histogram_view.h" #include "port_histogram.h" #include "port_histogram_view.h" #include "packetfall.h" #include "net_map.h" #include "iptree.h" #include "legend_view.h" class one_page_report { public: class transport_type { public: transport_type(uint16_t ethertype_, std::string name_) : ethertype(ethertype_), name(name_) {} uint16_t ethertype; std::string name; }; typedef std::map port_aliases_t; typedef std::map port_colormap_t; typedef std::vector transport_type_vector; std::string source_identifier; std::string filename; plot_view::bounds_t bounds; double header_font_size; double top_list_font_size; unsigned int histogram_show_top_n_text; // a single render event: content moves down a bounded cairo surface as // indicated by end_of_content between render method invocations class render_pass { public: render_pass(one_page_report &report_, cairo_t *surface_, const plot_view::bounds_t &bounds_) : report(report_), surface(surface_), surface_bounds(bounds_), end_of_content(0.0) {} void render_text_line(std::string text, double font_size, double line_space); void render_text(std::string text, double font_size, double x_offset, cairo_text_extents_t &rendered_extents); void render_header(); void render(time_histogram_view &view); void render(address_histogram_view &left, address_histogram_view &right); void render(port_histogram_view &left, port_histogram_view &right); void render(const legend_view &view); void render_map(); void render_packetfall(); one_page_report &report; cairo_t *surface; plot_view::bounds_t surface_bounds; double end_of_content; }; friend class render_pass; one_page_report(int max_histogram_size); void ingest_packet(const be13::packet_info &pi); void render(const std::string &outdir); plot_view::rgb_t port_color(uint16_t port) const; void dump(int debug); static transport_type_vector build_display_transports(); static const unsigned int max_bars; static const unsigned int port_colors_count; // string constants static const std::string title_version; static const std::string generic_legend_format; static const transport_type_vector display_transports; // ratio constants static const double page_margin_factor; static const double line_space_factor; static const double histogram_pad_factor_y; static const double address_histogram_width_divisor; // size constants static const double packet_histogram_height; static const double address_histogram_height; static const double port_histogram_height; static const double legend_height; // color constants static const plot_view::rgb_t default_color; static const plot_view::rgb_t color_orange; static const plot_view::rgb_t color_red; static const plot_view::rgb_t color_magenta; static const plot_view::rgb_t color_purple; static const plot_view::rgb_t color_deep_purple; static const plot_view::rgb_t color_blue; static const plot_view::rgb_t color_teal; static const plot_view::rgb_t color_green; static const plot_view::rgb_t color_yellow; static const plot_view::rgb_t color_light_orange; static const plot_view::rgb_t cdf_color; private: uint64_t packet_count; uint64_t byte_count; struct timeval earliest; struct timeval latest; std::map transport_counts; std::map ports_in_time_histogram; legend_view::entries_t color_labels; time_histogram packet_histogram; port_histogram src_port_histogram; port_histogram dst_port_histogram; packetfall pfall; net_map netmap; public: iptree src_tree; iptree dst_tree; port_aliases_t port_aliases; port_colormap_t port_colormap; }; #endif tcpflow/src/netviz/time_histogram.h0000644000175000017500000001150612263701151016435 0ustar dimadima/** * Interface for the timehistogram class * Currently this is a histogram that's specialized to create a stacked bar graph * with up to 2^16 different values on each bar. * * Times are stored as 64-bit microseconds since January 1, 1970 * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * * History: * 2013-01-01 - Initial version by Mike Shick */ #ifndef TIME_HISTOGRAM_H #define TIME_HISTOGRAM_H #include "tcpflow.h" #include class time_histogram { public: time_histogram(); //typedef uint64_t count_t; // counts in a slot //typedef uint16_t port_t; // port number //typedef int32_t timescale_off_t; // ordinal offset within the histogram // parameter for...? class span_params { public: span_params(uint64_t usec_, uint64_t bucket_count_) : usec(usec_), bucket_count(bucket_count_) {} uint64_t usec; uint64_t bucket_count; }; typedef std::vector span_params_vector_t; // a bucket counts packets received in a given timeframe, organized by TCP port class bucket { public: typedef std::map counts_t; bucket() : counts(), portless_count(){}; uint64_t sum() const { /* this could be done with std::accumulate */ uint64_t count = 0; for(counts_t::const_iterator it=counts.begin();it!=counts.end();it++){ count += it->second; } count += portless_count; return count; }; counts_t counts; uint64_t portless_count; void increment(in_port_t port, uint64_t delta, unsigned int flags = 0x00) { if(flags & F_NON_TCP) { portless_count += delta; } else { counts[port] += delta; } } }; class histogram_map { public: typedef std::map buckets_t; buckets_t buckets; histogram_map(span_params span_) : buckets(), span(span_), bucket_width(span.usec / span.bucket_count), base_time(0), insert_count(0){} span_params span; uint64_t bucket_width; // in microseconds uint64_t base_time; // microseconds since Jan 1, 1970; set on first call to scale_timeval uint64_t insert_count; // of entire histogram uint64_t greatest_bucket_sum() const { uint64_t greatest = 0; for(buckets_t::const_iterator it = buckets.begin();it!=buckets.end();it++){ if(it->second->sum() > greatest) greatest = it->second->sum(); } return greatest; } /** convert timeval to a scaled time. */ uint32_t scale_timeval(const struct timeval &ts) { uint64_t raw_time = ts.tv_sec * (1000LL * 1000LL) + ts.tv_usec; if(base_time == 0) { base_time = raw_time - (bucket_width * ((uint64_t)(span.bucket_count * underflow_pad_factor))); // snap base time to nearest bucket_width to simplify bar labelling later uint64_t unit = span.usec / span.bucket_count; base_time = (base_time / unit) * unit; } if (raw_time < base_time) return -1; // underflow return (raw_time - base_time) / bucket_width; } // returns true if the insertion resulted in over/underflow bool insert(const struct timeval &ts, const in_port_t port, const uint64_t count = 1, const unsigned int flags = 0x00); }; void insert(const struct timeval &ts, const in_port_t port, const uint64_t count = 1, const unsigned int flags = 0x00); void condense(double factor); uint64_t usec_per_bucket() const; uint64_t packet_count() const; time_t start_date() const; time_t end_date() const; uint64_t tallest_bar() const; const bucket &at(uint32_t index) const; size_t size() const; size_t non_sparse_size() const; /* iterators for the buckets */ histogram_map::buckets_t::const_iterator begin() const; histogram_map::buckets_t::const_iterator end() const; histogram_map::buckets_t::const_reverse_iterator rbegin() const; histogram_map::buckets_t::const_reverse_iterator rend() const; static span_params_vector_t build_spans(); private: std::vector histograms; uint32_t best_fit_index; struct timeval earliest_ts, latest_ts; uint64_t insert_count; /** configuration: */ static const uint32_t bucket_count; static const float underflow_pad_factor; static const std::vector spans; // in microseconds static const bucket empty_bucket; public: static const unsigned int F_NON_TCP; }; #endif tcpflow/src/netviz/packetfall.h0000644000175000017500000000066412263701151015533 0ustar dimadima/** * packetfall.h: * Show packets received vs port * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #ifndef PACKETFALL_H #define PACKETFALL_H #include "plot_view.h" class packetfall { public: packetfall() {} void ingest_packet(const be13::packet_info &pi); void render(cairo_t *cr, const plot_view::bounds_t &bounds); }; #endif tcpflow/src/netviz/legend_view.cpp0000644000175000017500000000577612263701151016261 0ustar dimadima/** * legend_view.cpp: * Show packets received vs port * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #include "config.h" #ifdef HAVE_LIBCAIRO #include "tcpflow.h" #include "legend_view.h" using namespace std; const string legend_view::empty_legend_label = "No TCP"; const double legend_view::base_font_size = 6.0; const double legend_view::chip_length = 8.0; const double legend_view::chip_label_space = 4.0; const double legend_view::inter_item_space = 12.0; const double legend_view::padding = 8.0; const double legend_view::border_width = 0.5; const plot_view::rgb_t legend_view::border_color(0.67, 0.67, 0.67); void legend_view::render(cairo_t *cr, const plot_view::bounds_t &bounds) const { double font_size = base_font_size; if(entries.size() == 0) { font_size *= 2.0; } cairo_set_font_size(cr, font_size); double tallest = 0.0; double total_width = 0.0; for(entries_t::const_iterator it = entries.begin(); it != entries.end(); ++it) { cairo_text_extents_t extents; cairo_text_extents(cr, it->label.c_str(), &extents); total_width += chip_length + chip_label_space + extents.width; if(it + 1 != entries.end()) { total_width += inter_item_space; } if(extents.height > tallest) { tallest = extents.height; } } if(entries.size() == 0) { cairo_text_extents_t extents; cairo_text_extents(cr, empty_legend_label.c_str(), &extents); total_width += extents.width; tallest = extents.height; } double chip_y = bounds.y + ((bounds.height - chip_length) / 2.0); double label_y = bounds.y + ((bounds.height + tallest) / 2.0); double x = bounds.x + ((bounds.width - total_width) / 2.0); cairo_set_source_rgb(cr, border_color.r, border_color.g, border_color.b); cairo_set_line_width(cr, border_width); cairo_rectangle(cr, x, bounds.y, total_width + (padding * 2.0), bounds.height); cairo_stroke(cr); x += padding; for(entries_t::const_iterator it = entries.begin(); it != entries.end(); ++it) { cairo_text_extents_t extents; cairo_text_extents(cr, it->label.c_str(), &extents); const plot_view::rgb_t &color = it->color; cairo_set_source_rgb(cr, color.r, color.g, color.b); cairo_rectangle(cr, x, chip_y, chip_length, chip_length); cairo_fill(cr); x += chip_length + chip_label_space; cairo_set_source_rgb(cr, 0.0, 0.0, 0.0); cairo_move_to(cr, x, label_y); cairo_show_text(cr, it->label.c_str()); x += extents.width + inter_item_space; } if(entries.size() == 0) { cairo_text_extents_t extents; cairo_text_extents(cr, empty_legend_label.c_str(), &extents); cairo_set_source_rgb(cr, 0.0, 0.0, 0.0); cairo_move_to(cr, x, label_y); cairo_show_text(cr, empty_legend_label.c_str()); x += extents.width + inter_item_space; } } #endif tcpflow/src/netviz/time_histogram_view.h0000644000175000017500000000560312263701151017470 0ustar dimadima/** * time_histogram_view.h: * Make fancy time histograms * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #ifndef TIME_HISTOGRAM_VIEW_H #define TIME_HISTOGRAM_VIEW_H #include "config.h" #ifdef HAVE_LIBCAIRO #include "plot_view.h" #include "time_histogram.h" #define SECOND_NAME "second" #define MINUTE_NAME "minute" #define HOUR_NAME "hour" #define DAY_NAME "day" #define WEEK_NAME "week" #define MONTH_NAME "month" #define YEAR_NAME "year" class time_histogram_view : public plot_view { public: typedef std::map colormap_t; time_histogram_view(const time_histogram &histogram_, const colormap_t &port_colors_, const rgb_t &default_color_, const rgb_t &cdf_color_); class time_unit { public: time_unit(std::string name_, uint64_t seconds_) : name(name_), seconds(seconds_) {} std::string name; uint64_t seconds; }; class si_prefix { public: si_prefix(std::string prefix_, uint64_t magnitude_) : prefix(prefix_), magnitude(magnitude_) {} std::string prefix; uint64_t magnitude; }; class bucket_view { public: bucket_view(const time_histogram::bucket &bucket_, const colormap_t &color_map_, const rgb_t &default_color_) : bucket(bucket_), color_map(color_map_), default_color(default_color_) {} const time_histogram::bucket &bucket; const colormap_t &color_map; const rgb_t &default_color; void render(cairo_t *cr, const bounds_t &bounds); }; const time_histogram &histogram; const colormap_t port_colors; const rgb_t default_color; const rgb_t cdf_color; static const uint8_t y_tick_count; static const double bar_space_factor; static const double cdf_line_width; static const std::vector time_units; static const std::vector si_prefixes; static const double blank_bar_line_width; static const rgb_t blank_bar_line_color; static const double bar_label_font_size; static const double bar_label_width_factor; static const rgb_t bar_label_normal_color; static const rgb_t bar_label_highlight_color; void render(cairo_t *cr, const bounds_t &bounds); void render_data(cairo_t *cr, const bounds_t &bounds); static std::string next_bar_label(const std::string &unit, unsigned &numeric_label, unsigned delta, rgb_t &label_color); private: // for labelling purposes, a bar is s wide std::string bar_time_unit; uint32_t bar_time_value; // if the bar time unit isn't exact, we can't label bars because they'll drift uint32_t bar_time_remainder; static std::vector build_time_units(); static std::vector build_si_prefixes(); }; #endif #endif tcpflow/src/netviz/plot_view.h0000644000175000017500000001215412263701151015432 0ustar dimadima/** * plotview.h: * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #ifndef PLOT_VIEW_H #define PLOT_VIEW_H #ifdef HAVE_LIBCAIRO #ifdef HAVE_CAIRO_H #include #elif defined HAVE_CAIRO_CAIRO_H #include #endif #ifdef HAVE_CAIRO_PDF_H #include #elif defined HAVE_CAIRO_CAIRO_PDF_H #include #endif #include #include #include class plot_view { public: plot_view() : title("graph of things"), subtitle("x vs y"), x_label("x axis"), y_label("y axis"), x_tick_labels(), y_tick_labels(), right_tick_labels(), legend(), width(161.803), height(100.000), title_on_bottom(false), title_font_size(8.0), x_axis_font_size(8.0), y_axis_font_size(8.0), title_max_width_ratio(0.8), title_y_pad_factor(2.0), subtitle_y_pad_factor(0.2), subtitle_font_size_factor(0.4), axis_thickness_factor(0.002), tick_length_factor(0.0124), tick_width_factor(0.002), x_tick_label_pad_factor(4.0), y_tick_label_pad_factor(2.0), right_tick_label_pad_factor(2.0), x_tick_font_size(3.0), y_tick_font_size(3.0), right_tick_font_size(3.0), pad_left_factor(0.148), pad_top_factor(0.2), pad_bottom_factor(0.2), pad_right_factor(0.148), legend_chip_factor(1.2), legend_font_size(2.5), x_axis_decoration(AXIS_NO_DECO), y_axis_decoration(AXIS_NO_DECO) {} typedef enum { AXIS_NO_DECO = 0, AXIS_SPAN_ARROW, AXIS_SPAN_STOP } axis_decoration_t; class rgb_t { public: rgb_t() : r(0.0), g(0.0), b(0.0) {} rgb_t(const double r_, const double g_, const double b_) : r(r_), g(g_), b(b_) {} double r; double g; double b; static const double epsilon; // 1/256.0 (not inline due to -Wgnu) }; class legend_entry_t { public: legend_entry_t(const rgb_t color_, const std::string label_) : color(color_), label(label_) {} rgb_t color; std::string label; }; class bounds_t { public: bounds_t() : x(0.0), y(0.0), width(0.0), height(0.0) {} bounds_t(const double x_, const double y_, const double width_, const double height_) : x(x_), y(y_), width(width_), height(height_) {} double x; double y; double width; double height; }; std::string title, subtitle; std::string x_label, y_label; std::vector x_tick_labels, y_tick_labels, right_tick_labels; std::vector legend; // width and height are in pt double width, height; bool title_on_bottom; double title_font_size; double x_axis_font_size, y_axis_font_size; // Title text will be shrunk if needed such that it takes up no more // than this ratio of the image width double title_max_width_ratio; // multiple of title height to be allocated above graph double title_y_pad_factor; // multiple of the subtitle height that will separate the subtitle from // the title double subtitle_y_pad_factor; // multiple of the title font size for the subtitle font size double subtitle_font_size_factor; // axis scale double axis_thickness_factor; // size of scale ticks, in pt double tick_length_factor, tick_width_factor; // multiple of label dummy text length to allocate for spacing double x_tick_label_pad_factor, y_tick_label_pad_factor, right_tick_label_pad_factor; double x_tick_font_size, y_tick_font_size, right_tick_font_size; // non-dynamic padding for the right and bottom of graph double pad_left_factor, pad_top_factor, pad_bottom_factor, pad_right_factor; // legend double legend_chip_factor; double legend_font_size; // axis decoration axis_decoration_t x_axis_decoration, y_axis_decoration; static const double text_line_base_width; static const double span_arrow_angle; static const double span_stop_angle; static const std::vector size_suffixes; virtual ~plot_view() = 0; // render everything common to all plots (everything but the data) void render(cairo_t *cr, const bounds_t &bounds); // called by render(); subclass-specific data rendering virtual void render_data(cairo_t *cr, const bounds_t &bounds) = 0; // format a byte count for humans ( 12 MB etc) static std::string pretty_byte_total(uint64_t byte_count, uint8_t precision); static std::string pretty_byte_total(uint64_t byte_count); static std::vector build_size_suffixes(); }; inline plot_view::~plot_view() {} inline bool operator==(const plot_view::rgb_t &a, const plot_view::rgb_t &b) { return fabs(a.r - b.r) < plot_view::rgb_t::epsilon && fabs(a.g - b.g) < plot_view::rgb_t::epsilon && fabs(a.b - b.b) < plot_view::rgb_t::epsilon; } inline bool operator!=(const plot_view::rgb_t &a, const plot_view::rgb_t &b) { return !(a == b); } inline bool operator<(const plot_view::rgb_t &a, const plot_view::rgb_t &b) { return a.r < b.r || a.g < b.g || a.b < b.b; } #endif #endif tcpflow/src/netviz/time_histogram_view.cpp0000644000175000017500000004567212263701151020035 0ustar dimadima/** * time_histogram_view.cpp: * Make fancy time histograms * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #include "config.h" #ifdef HAVE_LIBCAIRO #include "time_histogram_view.h" time_histogram_view::time_histogram_view(const time_histogram &histogram_, const colormap_t &port_colors_, const rgb_t &default_color_, const rgb_t &cdf_color_) : histogram(histogram_), port_colors(port_colors_), default_color(default_color_), cdf_color(cdf_color_), bar_time_unit(), bar_time_value(), bar_time_remainder() { title = ""; subtitle = ""; pad_left_factor = 0.2; pad_top_factor = 0.1; y_tick_font_size = 5.0; right_tick_font_size = 6.0; x_axis_font_size = 8.0; x_axis_decoration = plot_view::AXIS_SPAN_STOP; y_label = ""; } const uint8_t time_histogram_view::y_tick_count = 5; const double time_histogram_view::bar_space_factor = 1.2; const double time_histogram_view::cdf_line_width = 0.5; const std::vector time_histogram_view::time_units = time_histogram_view::build_time_units(); const std::vector time_histogram_view::si_prefixes = time_histogram_view::build_si_prefixes(); const double time_histogram_view::blank_bar_line_width = 0.25; const time_histogram_view::rgb_t time_histogram_view::blank_bar_line_color(0.0, 0.0, 0.0); const double time_histogram_view::bar_label_font_size = 6.0; const double time_histogram_view::bar_label_width_factor = 0.8; const time_histogram_view::rgb_t time_histogram_view::bar_label_normal_color(0.0, 0.0, 0.0); const time_histogram_view::rgb_t time_histogram_view::bar_label_highlight_color(0.86, 0.08, 0.24); void time_histogram_view::render(cairo_t *cr, const bounds_t &bounds) { // // create x label based on duration of capture // uint64_t bar_interval = histogram.usec_per_bucket() / (1000 * 1000); // add a second to duration; considering a partial second a second makes // edge cases look nicer time_t duration = histogram.end_date() - histogram.start_date() + 1; if(histogram.packet_count() == 0) { x_label = "no packets received"; x_axis_decoration = plot_view::AXIS_SPAN_STOP; } else { std::stringstream ss; // how long does is the total capture? if(duration < 1) { ss << "<1 second"; } else { // the total time is represented by the two (or one) coursest appropriate units // example: // 5 hours, 10 minutes // 58 seconds // but never: // 5 hours. 10 minutes, 30 seconds // break the duration down into its constituent parts std::vector duration_values; std::vector duration_names; int remainder = duration; for(std::vector::const_reverse_iterator it = time_units.rbegin(); it != time_units.rend(); it++) { duration_values.push_back(remainder / it->seconds); duration_names.push_back(it->name); remainder %= it->seconds; } int print_count = 0; // find how many time units are worth printing (for comma insertion) for(std::vector::const_iterator it = duration_values.begin(); it != duration_values.end(); it++) { if(*it > 0) { print_count++; } // if we've seen a nonzero unit, and now a zero unit, abort because skipping // a unit is weird (2 months, 1 second) else if(print_count > 0) { break; } } // work back through the values and print the two coursest nonzero print_count = std::min(print_count, 2); int printed = 0; for(size_t ii = 0; ii < time_units.size(); ii++) { std::string name = duration_names.at(ii); uint64_t value = duration_values.at(ii); // skip over insignificant units if(value == 0 && printed == 0) { continue; } printed++; // don't actually print intermediate zero values (no 3 hours, 0 minutes, 30 seconds) if(value > 0) { ss << value << " " << name; } if(value > 1) { ss << "s"; } if(printed < print_count) { ss << ", "; } if(printed == print_count) { break; } } } // how long does each bar represent? if(bar_interval < 1 && duration >= 1) { ss << " (<1 second intervals)"; } else if(bar_interval >= 1) { for(std::vector::const_iterator it = time_units.begin(); it != time_units.end(); it++) { if(it + 1 == time_units.end() || bar_interval < (it+1)->seconds) { bar_time_unit = it->name; bar_time_value = bar_interval / it->seconds; bar_time_remainder = bar_interval % it->seconds; break; } } ss << " ("; if(bar_time_remainder != 0) { ss << "~"; } ss << bar_time_value << " " << bar_time_unit << " intervals)"; } x_label = ss.str(); } // // choose y axis tick labels // // scale raw bucket totals uint8_t unit_log_1000 = (uint8_t) (log(histogram.tallest_bar()) / log(1000)); if(unit_log_1000 >= si_prefixes.size()) { unit_log_1000 = 0; } si_prefix unit = si_prefixes.at(unit_log_1000); double y_scale_range = histogram.tallest_bar() / (double) unit.magnitude; double y_scale_interval = y_scale_range / (y_tick_count - 1); uint64_t next_value = 0; for(int ii = 0; ii < y_tick_count; ii++) { uint64_t value = next_value; double next_raw_value = (ii + 1) * y_scale_interval; next_value = (uint64_t) floor(next_raw_value + 0.5); if(value == next_value && ii < y_tick_count - 1) { continue; } std::string label = ssprintf("%d %sB", value, unit.prefix.c_str()); y_tick_labels.push_back(label); } right_tick_labels.push_back("0%"); right_tick_labels.push_back("100%"); plot_view::render(cr, bounds); } void time_histogram_view::render_data(cairo_t *cr, const bounds_t &bounds) { size_t bars = histogram.non_sparse_size(); double bar_allocation = bounds.width / (double) bars; // bar width with spacing double bar_width = bar_allocation / bar_space_factor; // bar width as rendered double bar_leading_pad = (bar_allocation - bar_width) / 2.0; time_histogram::histogram_map::buckets_t::const_iterator it = histogram.begin(); if(it == histogram.end()) { return; } uint32_t first_offset = it->first; double tallest_bar = (double) histogram.tallest_bar(); for(; it != histogram.end(); it++) { double bar_height = (double) it->second->sum() / tallest_bar * bounds.height; double bar_x = bounds.x + (it->first - first_offset) * bar_allocation + bar_leading_pad; double bar_y = bounds.y + (bounds.height - bar_height); bounds_t bar_bounds(bar_x, bar_y, bar_width, bar_height); bucket_view bar(*it->second, port_colors, default_color); bar.render(cr, bar_bounds); } unsigned bar_label_numeric = 0; int distinct_label_count = 0; // choose initial bar value if(bar_time_unit.length() > 0) { time_t start = histogram.start_date(); struct tm start_time = *localtime(&start); if(bar_time_unit == SECOND_NAME) { bar_label_numeric = start_time.tm_sec; distinct_label_count = 60; } else if(bar_time_unit == MINUTE_NAME) { bar_label_numeric = start_time.tm_min; distinct_label_count = 60; } else if(bar_time_unit == HOUR_NAME) { bar_label_numeric = start_time.tm_hour; distinct_label_count = 24; } else if(bar_time_unit == DAY_NAME) { bar_label_numeric = start_time.tm_wday; distinct_label_count = 7; } else if(bar_time_unit == MONTH_NAME) { bar_label_numeric = start_time.tm_mon; distinct_label_count = 12; } else if(bar_time_unit == YEAR_NAME) { bar_label_numeric = start_time.tm_year; } // snap label to same alignment of histogram bars bar_label_numeric -= (bar_label_numeric % bar_time_value); } // create bar lables so an appropriate font size can be selected std::vector bar_labels; std::vector bar_label_colors; // if bars are thinner than 10pt, thin out the bar labels appropriately int label_every_n_bars = ((int) (10.0 / bar_allocation)) + 1; unsigned label_bars_offset = 0; // find the offset that will cause the '00' label to appear if(distinct_label_count > 0) { label_bars_offset = ((distinct_label_count - bar_label_numeric) % (bar_time_value * label_every_n_bars)) / bar_time_value; } bar_label_numeric += (label_bars_offset * bar_time_value); double widest_bar_label = 0; double tallest_bar_label = 0; cairo_set_font_size(cr, bar_label_font_size); for(size_t ii = 0; ii < bars; ii++) { if(ii % label_every_n_bars != label_bars_offset) { continue; } rgb_t bar_label_color; std::string bar_label = next_bar_label(bar_time_unit, bar_label_numeric, bar_time_value * label_every_n_bars, bar_label_color); cairo_text_extents_t label_extents; cairo_text_extents(cr, bar_label.c_str(), &label_extents); if(label_extents.width > widest_bar_label) { widest_bar_label = label_extents.width; } if(label_extents.height > tallest_bar_label) { tallest_bar_label = label_extents.height; } // add to list for later rendering bar_labels.push_back(bar_label); bar_label_colors.push_back(bar_label_color); } // don't let labels be wider than bars double safe_bar_label_font_size = bar_label_font_size; double bar_label_descent = tallest_bar_label * 1.75; double target_width = bar_width * bar_label_width_factor; if(widest_bar_label > target_width) { double factor = target_width / widest_bar_label; safe_bar_label_font_size *= factor; bar_label_descent *= factor; } // if we're skipping bars for labelling, increase the label size appropriately double label_size_multiplier = pow(1.2, (double) (label_every_n_bars - 1)); safe_bar_label_font_size *= label_size_multiplier; bar_label_descent *= label_size_multiplier; // CDF and bar labels double accumulator = 0.0; double histogram_sum = (double) histogram.packet_count(); cairo_move_to(cr, bounds.x, bounds.y + bounds.height); for(size_t ii = 0; ii < bars; ii++) { const time_histogram::bucket bkt = histogram.at(ii + first_offset); accumulator += (double) bkt.sum() / histogram_sum; double x = bounds.x + ii * bar_allocation; double next_x = x + bar_allocation; double y = bounds.y + (1.0 - accumulator) * bounds.height; // don't draw over the left-hand y axis if(ii == 0) { cairo_move_to(cr, x, y); } else { cairo_line_to(cr, x, y); } cairo_line_to(cr, next_x, y); // draw bar label if(bar_time_unit.length() > 0 && bar_time_remainder == 0 && ii % label_every_n_bars == label_bars_offset) { std::string label = bar_labels.at(ii / label_every_n_bars); rgb_t color = bar_label_colors.at(ii / label_every_n_bars); cairo_set_font_size(cr, safe_bar_label_font_size); cairo_set_source_rgb(cr, color.r, color.g, color.b); cairo_text_extents_t label_extents; cairo_text_extents(cr, label.c_str(), &label_extents); double label_x = x + ((bar_allocation - label_extents.width) / 2.0); double label_y = bounds.y + bounds.height + bar_label_descent; cairo_move_to(cr, label_x, label_y); cairo_show_text(cr, label.c_str()); // move back to appropriate place for next CDF step cairo_move_to(cr, next_x, y); } } cairo_set_source_rgb(cr, cdf_color.r, cdf_color.g, cdf_color.b); cairo_set_line_width(cr, cdf_line_width); cairo_stroke(cr); } // create a new bar label based on numeric_label, then increment numeric_label // by delta example: when invoked with ("day", 0, 2), "S" for sunday is // returned and numeric_label is updated to 2 which will return "T" for tuesday // next time std::string time_histogram_view::next_bar_label(const std::string &unit, unsigned &numeric_label, unsigned delta, rgb_t &label_color) { std::string output; if(numeric_label < delta) { label_color = bar_label_highlight_color; } else { label_color = bar_label_normal_color; } if(unit == SECOND_NAME || unit == MINUTE_NAME) { output = ssprintf("%02d", numeric_label); numeric_label = (numeric_label + delta) % 60; } else if(unit == HOUR_NAME) { output = ssprintf("%02d", numeric_label); numeric_label = (numeric_label + delta) % 24; } else if(unit == DAY_NAME) { label_color = bar_label_normal_color; switch(numeric_label) { case 6: case 0: label_color = bar_label_highlight_color; output = "S"; break; case 1: output = "M"; break; case 2: output = "T"; break; case 3: output = "W"; break; case 4: output = "R"; break; case 5: output = "F"; break; } numeric_label = (numeric_label + delta) % 7; } else if(unit == MONTH_NAME) { switch(numeric_label) { case 0: output = "Jan"; break; case 1: output = "Feb"; break; case 2: output = "Mar"; break; case 3: output = "Apr"; break; case 4: output = "May"; break; case 5: output = "Jun"; break; case 6: output = "Jul"; break; case 7: output = "Aug"; break; case 8: output = "Sep"; break; case 9: output = "Oct"; break; case 10: output = "Nov"; break; case 11: output = "Dec"; break; } numeric_label = (numeric_label + delta) % 12; } else if(unit == YEAR_NAME) { if(delta > 20) { output = ssprintf("%04d", numeric_label); } else { output = ssprintf("%02d", numeric_label % 100); } numeric_label = (numeric_label + delta); } return output; } std::vector time_histogram_view::build_time_units() { std::vector output; output.push_back(time_unit(SECOND_NAME, 1L)); output.push_back(time_unit(MINUTE_NAME, 60L)); output.push_back(time_unit(HOUR_NAME, 60L * 60L)); output.push_back(time_unit(DAY_NAME, 60L * 60L * 24L)); output.push_back(time_unit(WEEK_NAME, 60L * 60L * 24L * 7L)); output.push_back(time_unit(MONTH_NAME, 60L * 60L * 24L * 30L)); output.push_back(time_unit(YEAR_NAME, 60L * 60L * 24L * 360L)); return output; } std::vector time_histogram_view::build_si_prefixes() { std::vector output; output.push_back(si_prefix("", 1LL)); output.push_back(si_prefix("K", 1000LL)); output.push_back(si_prefix("M", 1000LL * 1000LL)); output.push_back(si_prefix("G", 1000LL * 1000LL * 1000LL)); output.push_back(si_prefix("T", 1000LL * 1000LL * 1000LL * 1000LL)); output.push_back(si_prefix("P", 1000LL * 1000LL * 1000LL * 1000LL * 1000LL)); output.push_back(si_prefix("E", 1000LL * 1000LL * 1000LL * 1000LL * 1000LL * 1000LL)); return output; } // bucket view void time_histogram_view::bucket_view::render(cairo_t *cr, const bounds_t &bounds) { // how far up the bar have we rendered so far? double total_height = bounds.y + bounds.height; // if multiple sections of the same color follow, simply accumulate their height double height_accumulator = 0.0; rgb_t next_color = default_color; // The loop below is a bit confusing for(time_histogram::bucket::counts_t::const_iterator it = bucket.counts.begin(); it != bucket.counts.end();) { double height = bounds.height * ((double) it->second / (double) bucket.sum()); // on first section, preload the first color as the 'next' color if(it == bucket.counts.begin()) { colormap_t::const_iterator color_pair = color_map.find(it->first); if(color_pair != color_map.end()) { next_color = color_pair->second; } } // advance to the next color rgb_t color = next_color; next_color = default_color; // if there's a next bucket, get its color for the next color // next consolidate this section with the next if the colors match it++; if(it != bucket.counts.end()) { /* This gets after every bar except the last bar */ colormap_t::const_iterator color_pair = color_map.find(it->first); if(color_pair != color_map.end()) { next_color = color_pair->second; } if(color == next_color) { height_accumulator += height; continue; } } /* this gets run after every bar */ cairo_set_source_rgb(cr, color.r, color.g, color.b); // account for consolidated sections height += height_accumulator; height_accumulator = 0.0; cairo_rectangle(cr, bounds.x, total_height - height, bounds.width, height); cairo_fill(cr); total_height -= height; } // non-TCP packets if(bucket.portless_count > 0) { double height = bounds.height * ((double) bucket.portless_count / (double) bucket.sum()); cairo_set_source_rgb(cr, blank_bar_line_color.r, blank_bar_line_color.g, blank_bar_line_color.b); double offset = blank_bar_line_width / 2; cairo_set_line_width(cr, blank_bar_line_width); cairo_rectangle(cr, bounds.x + offset, total_height - height + offset, bounds.width - blank_bar_line_width, height - blank_bar_line_width); cairo_stroke(cr); } } #endif tcpflow/src/netviz/net_map.cpp0000644000175000017500000000200712263701151015374 0ustar dimadima/** * net_map.cpp: * Show map of network traffic by host * * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick * */ #include "config.h" #ifdef HAVE_LIBCAIRO #include "tcpflow.h" #include "net_map.h" void net_map::ingest_packet(const be13::packet_info &pi) { } void net_map::render(cairo_t *cr, const plot_view::bounds_t &bounds) { cairo_set_source_rgb(cr, 0.67, 0.67, 0.67); cairo_rectangle(cr, bounds.x, bounds.y, bounds.width, bounds.height); cairo_fill(cr); double font_size = 16.0; std::string label = "pretty map"; cairo_text_extents_t extents; cairo_set_font_size(cr, font_size); cairo_set_source_rgb(cr, 0.0, 0.0, 0.0); cairo_text_extents(cr, label.c_str(), &extents); double text_x = bounds.x + (bounds.width - extents.width) / 2.0; double text_y = bounds.y + (bounds.height + extents.height) / 2.0; cairo_move_to(cr, text_x, text_y); cairo_show_text(cr, label.c_str()); } #endif tcpflow/src/netviz/legend_view.h0000644000175000017500000000227312263701151015713 0ustar dimadima/* * This source file is public domain, as it is not based on the original tcpflow. * * Author: Michael Shick */ #ifndef LEGEND_VIEW_H #define LEGEND_VIEW_H #include "plot_view.h" class legend_view { public: // legend_view::entry to everyone else class entry_t { public: entry_t(plot_view::rgb_t color_, std::string label_, uint16_t port_) : color(color_), label(label_), port(port_) {} plot_view::rgb_t color; std::string label; uint16_t port; }; typedef std::vector entries_t; legend_view(entries_t entries_) : entries(entries_) {} void render(cairo_t *cr, const plot_view::bounds_t &bounds) const; static const std::string empty_legend_label; static const double base_font_size; static const double chip_length; static const double chip_label_space; static const double inter_item_space; static const double padding; static const double border_width; static const plot_view::rgb_t border_color; private: const entries_t entries; }; inline bool operator<(const legend_view::entry_t &a, const legend_view::entry_t &b) { return a.port < b.port; } #endif tcpflow/src/datalink_wifi.h0000644000175000017500000000307012263701151014705 0ustar dimadima#ifndef DATALINK_WIFI_H #define DATALINK_WIFI_H #include #include #include "wifipcap.h" //#define DEBUG_WIFI class TFCB : public WifipcapCallbacks { private: public: bool opt_check_fcs; typedef struct mac_ssid { mac_ssid(const MAC &mac_,const std::string &ssid_):mac(mac_),ssid(ssid_){} const MAC mac; const std::string ssid; bool operator<(const struct mac_ssid &b) const{ if (mac < b.mac) return true; if (b.mac < mac) return false; return ssid < b.ssid; }; } mac_ssid_t; typedef struct { bool operator() (const struct mac_ssid &a, const struct mac_ssid &b) const { if (a.mac < b.mac) return true; if (b.mac < a.mac) return false; return a.ssid < b.ssid; } } mac_ssid_lt; typedef std::set mac_ssid_set_t; typedef std::map mac_ssid_map_t; mac_ssid_map_t mac_to_ssid; // mapping of macs to SSIDs static TFCB theTFCB; TFCB():opt_check_fcs(true),mac_to_ssid(){} virtual bool Check80211FCS(const WifiPacket &p) { return opt_check_fcs; } virtual void Handle80211(const WifiPacket &p,u_int16_t fc, const MAC& sa, const MAC& da, const MAC& ra, const MAC& ta, const u_char *ptr, size_t len) ; void HandleLLC(const WifiPacket &p,const struct llc_hdr_t *hdr, const u_char *rest, size_t len) ; void Handle80211MgmtBeacon(const WifiPacket &p,const mgmt_header_t *hdr, const mgmt_body_t *body) ; }; #endif tcpflow/src/NOTES.txt0000644000175000017500000000011112263701151013351 0ustar dimadima to update be13_api: cd gits/tcpflow/src/be13_api git pull origin master tcpflow/src/scan_md5.cpp0000644000175000017500000000212412263701151014123 0ustar dimadima/** * * scan_md5: * plug-in demonstration that shows how to write a simple plug-in scanner that calculates * the MD5 of each file.. */ #include "config.h" #include "bulk_extractor_i.h" #include "dfxml/src/hash_t.h" #include #include extern "C" void scan_md5(const class scanner_params &sp,const recursion_control_block &rcb) { if(sp.sp_version!=scanner_params::CURRENT_SP_VERSION){ std::cerr << "scan_md5 requires sp version " << scanner_params::CURRENT_SP_VERSION << "; " << "got version " << sp.sp_version << "\n"; exit(1); } if(sp.phase==scanner_params::PHASE_STARTUP){ sp.info->name = "md5"; sp.info->flags = scanner_info::SCANNER_DISABLED; return; /* No feature files created */ } #ifdef HAVE_EVP_GET_DIGESTBYNAME if(sp.phase==scanner_params::PHASE_SCAN){ static const std::string hash0(""); static const std::string hash1(""); if(sp.sxml){ (*sp.sxml) << hash0 << md5_generator::hash_buf(sp.sbuf.buf,sp.sbuf.bufsize).hexdigest() << hash1; } return; } #endif } tcpflow/src/tcpdemux.cpp0000644000175000017500000005641212263701151014274 0ustar dimadima/** * * tcpdemux.cpp * A tcpip demultiplier. * * This file is part of tcpflow by Simson Garfinkel, * originally by Jeremy Elson * * This source code is under the GNU Public License (GPL). See * LICENSE for details. * */ #include "tcpflow.h" #include "tcpip.h" #include "tcpdemux.h" #include #include #include /* static */ uint32_t tcpdemux::max_saved_flows = 100; /* static */ uint32_t tcpdemux::tcp_timeout = 0; tcpdemux::tcpdemux(): #ifdef HAVE_SQLITE3 db(),insert_flow(), #endif outdir("."),flow_counter(0),packet_counter(0), xreport(0),pwriter(0),max_open_flows(),max_fds(get_max_fds()-NUM_RESERVED_FDS), flow_map(),open_flows(),saved_flow_map(), saved_flows(),start_new_connections(false),opt(),fs() { } void tcpdemux::openDB() { #ifdef HAVE_SQLITE3 int rc = sqlite3_open("test.db", &db); if( rc ){ fprintf(stderr, "Can't open database: %s\n", sqlite3_errmsg(db)); db = 0; } /* Create SQL statement */ const char *sql = "CREATE TABLE connections (" "starttime TEXT NOT NULL," "endtime TEXT NOT NULL," "src_ipn TEXT," "dst_ipn TEXT," "mac_daddr TEXT," "mac_saddr TEXT," "packets INTEGER," "srcport INTEGER," "dstport INTEGER," "hashdigest_md5 TEXT);"; /* Execute SQL statement */ rc = sqlite3_exec(db, sql, callback, 0, &zErrMsg); if( rc != SQLITE_OK ){ fprintf(stderr, "SQL error: %s\n", zErrMsg); sqlite3_free(zErrMsg); sqlite3_close(db); return 0; } const char* zSql = "INSERT INTO connections (starttime,endtime,src_ipn,dst_ipn,mac_daddr,mac_saddr,packets,srcport,dstport,hashdigest_md5) VALUES (?,?,?,?,?,?,?,?,?,?)"; if(sqlite3_prepare_v2(db, zSql, strlen(zSql), &insert_stmt, NULL)!=SQLITE_OK ){ fprintf(stderr, "SQL prepare error"); db = 0; insert_stmt=0; return(0); } #endif } void tcpdemux::write_flow_record(const std::string &starttime,const std::string &endtime, const std::string &src_ipn,const std::string &dst_ipn, const std::string &mac_daddr,const std::string &mac_saddr, uint64_t packets,uint16_t srcport,uint16_t dstport, const std::string &hashdigest_md5) { } /* static */ tcpdemux *tcpdemux::getInstance() { static tcpdemux * theInstance = 0; if(theInstance==0) theInstance = new tcpdemux(); return theInstance; } /** * Implement a list of open_flows, each with an associated file descriptor. * When a new file needs to be opened, we can close a flow if necessary. */ void tcpdemux::close_all_fd() { tcpset open_flows_copy(open_flows); // make a copy because we're going to modify it for(tcpset::const_iterator it = open_flows_copy.begin();it!=open_flows_copy.end();it++){ (*it)->close_file(); } assert(open_flows.size()==0); // we've closed them all } /** * find the flow that has been written to in the furthest past and close it. */ void tcpdemux::close_oldest_fd() { tcpip *oldest_tcp=0; for(tcpset::iterator it = open_flows.begin();it!=open_flows.end();it++){ if(oldest_tcp==0 || (*it)->last_packet_number < oldest_tcp->last_packet_number){ oldest_tcp = (*it); } } if(oldest_tcp) oldest_tcp->close_file(); } /* Open a file, closing one of the existing flows f necessary. */ int tcpdemux::retrying_open(const std::string &filename,int oflag,int mask) { while(true){ if(open_flows.size() >= max_fds) close_oldest_fd(); int fd = ::open(filename.c_str(),oflag,mask); DEBUG(2)("retrying_open ::open(fn=%s,oflag=x%x,mask:x%x)=%d",filename.c_str(),oflag,mask,fd); if(fd>=0){ /* Open was successful */ return fd; } DEBUG(2)("retrying_open ::open failed with errno=%d",errno); if (errno != ENFILE && errno != EMFILE){ DEBUG(2)("retrying_open ::open failed with errno=%d (%s)",errno,strerror(errno)); return -1; // wonder what it was } DEBUG(5) ("too many open files -- contracting FD ring (size=%d)", (int)open_flows.size()); close_oldest_fd(); } } /* Find previously a previously created flow state in the database. */ tcpip *tcpdemux::find_tcpip(const flow_addr &flow) { flow_map_t::const_iterator it = flow_map.find(flow); if (it==flow_map.end()){ return NULL; // flow not found } return it->second; } /* Create a new flow state structure for a given flow. * Puts the flow in the map. * Returns a pointer to the new state. * * This is called by tcpdemux::process_tcp(). (Only place it is called) * * @param - pi - first packet seen on this connection. * * NOTE: We keep pointers to tcp structures in the map, rather than * the structures themselves. This makes the map slightly more efficient, * since it doesn't need to shuffle entire structures. * * * TK: Note that the flow() is created on the stack and then used in new tcpip(). * This is resulting in an unnecessary copy. */ tcpip *tcpdemux::create_tcpip(const flow_addr &flowa, be13::tcp_seq isn,const be13::packet_info &pi) { /* create space for the new state */ flow flow(flowa,flow_counter++,pi); tcpip *new_tcpip = new tcpip(*this,flow,isn); new_tcpip->nsn = isn+1; // expected sequence number of the first byte DEBUG(5) ("new flow %s. path: %s next seq num (nsn):%d", flowa.str().c_str(),new_tcpip->flow_pathname.c_str(),new_tcpip->nsn); flow_map[flow] = new_tcpip; return new_tcpip; } /** * Remove a flow from the database. * Close the flow file. * Write to the report.xml object. * Save in the sqlite database. * This is the ONLY place where a tcpip object is deleted so there is no chance of finding it again. * * Flows are post-processed when a FIN is received and all bytes are received. * If a FIN is received and bytes are outstanding, they are post-processed when the last byte is received. * When the program shut down, all open flows are post-processed. */ void tcpdemux::post_process(tcpip *tcp) { std::stringstream xmladd; // for this if(opt.post_processing && tcp->file_created && tcp->last_byte>0){ /** * After the flow is finished, if more than a byte was * written, then put it in an SBUF and process it. if we are * doing post-processing. This is called from tcpip::~tcpip() * in tcpip.cpp. */ /* Open the fd if it is not already open */ tcp->open_file(); if(tcp->fd>=0){ sbuf_t *sbuf = sbuf_t::map_file(tcp->flow_pathname,tcp->fd); if(sbuf){ be13::plugin::process_sbuf(scanner_params(scanner_params::PHASE_SCAN,*sbuf,*(fs),&xmladd)); delete sbuf; sbuf = 0; } } } tcp->close_file(); if(xreport) tcp->dump_xml(xreport,xmladd.str()); /** * Before we delete the tcp structure, save information about the saved flow */ save_flow(tcp); delete tcp; } void tcpdemux::remove_flow(const flow_addr &flow) { flow_map_t::iterator it = flow_map.find(flow); if(it!=flow_map.end()){ post_process(it->second); flow_map.erase(it); } } void tcpdemux::remove_all_flows() { for(flow_map_t::iterator it=flow_map.begin();it!=flow_map.end();it++){ post_process(it->second); } flow_map.clear(); } /**************************************************************** *** tcpdemultiplexer ****************************************************************/ /* Try to find the maximum number of FDs this system can have open */ unsigned int tcpdemux::get_max_fds(void) { int max_descs = 0; const char *method=0; /* No longer users OPEN_MAX */ #if defined (HAVE_GETDTABLESIZE) method = "getdtablesize"; max_descs = getdtablesize(); #elif defined(RLIMIT_NOFILE) { struct rlimit limit; memset(&limit,0,sizeof(limit)); method = "rlimit"; if (getrlimit(RLIMIT_NOFILE, &limit) < 0) { perror("getrlimit"); exit(1); } /* set the current to the maximum or specified value */ limit.rlim_cur = limit.rlim_max; #ifdef OPEN_MAX if(limit.rlim_cur > OPEN_MAX) limit.rlim_cur = OPEN_MAX; #endif if (setrlimit(RLIMIT_NOFILE, &limit) < 0) { perror("setrlimit"); exit(1); } max_descs = limit.rlim_max; #ifdef RLIM_INFINITY if (limit.rlim_max == RLIM_INFINITY) max_descs = MAX_FD_GUESS * 4; /* pick a more reasonable max */ #endif } #elif defined (_SC_OPEN_MAX) /* Okay, you don't have getrlimit() and you don't have OPEN_MAX. * Time to try the POSIX sysconf() function. (See Stevens' * _Advanced Programming in the UNIX Environment_). */ method = "POSIX sysconf"; errno = 0; if ((max_descs = sysconf(_SC_OPEN_MAX)) < 0) { if (errno == 0) max_descs = MAX_FD_GUESS * 4; else { perror("calling sysconf"); exit(1); } } /* if everything has failed, we'll just take a guess */ #else method = "MAX_FD_GUESS"; max_descs = MAX_FD_GUESS; #endif /* this must go here, after rlimit code */ DEBUG(10) ("found max FDs to be %d using %s", max_descs, method); return max_descs; } /* * open the packet save flow */ void tcpdemux::save_unk_packets(const std::string &ofname,const std::string &ifname) { pwriter = pcap_writer::open_copy(ofname,ifname); } /** * save information on this flow needed to handle strangling packets */ int c = 0; void tcpdemux::save_flow(tcpip *tcp) { /* First remove the oldest flow if we are in overload */ if(saved_flows.size()>0 && saved_flows.size()>max_saved_flows){ saved_flow *flow0 = saved_flows.at(0); saved_flow_map.erase(flow0->addr); // remove from the map saved_flows.erase(saved_flows.begin()); // remove from the vector delete flow0; // and delete the saved flow } /* Now save the flow */ saved_flow *sf = new saved_flow(tcp); saved_flow_map[sf->addr] = sf; saved_flows.push_back(sf); } /** * process_tcp(): * * Called to processes a tcp packet from either process_ip4() or process_ip6(). * The caller breaks out the ip addresses and finds the start of the tcp header. * * Skips but otherwise ignores TCP options. * * creates a new tcp connection if necessary, then asks the connection to either * print the packet or store it. * * Returns 0 if packet is processed, 1 if it is not processed, -1 if error */ #define FLAG_SET(vector, flag) ((vector) & (flag)) #pragma GCC diagnostic ignored "-Wcast-align" #include "iptree.h" int tcpdemux::process_tcp(const ipaddr &src, const ipaddr &dst,sa_family_t family, const u_char *ip_data, uint32_t ip_payload_len, const be13::packet_info &pi) { if (ip_payload_len < sizeof(struct be13::tcphdr)) { DEBUG(6) ("received truncated TCP segment! (%u<%u)", (u_int)ip_payload_len,(u_int)sizeof(struct be13::tcphdr)); return 1; } struct be13::tcphdr *tcp_header = (struct be13::tcphdr *) ip_data; /* fill in the flow_addr structure with info that identifies this flow */ flow_addr this_flow(src,dst,ntohs(tcp_header->th_sport),ntohs(tcp_header->th_dport),family); be13::tcp_seq seq = ntohl(tcp_header->th_seq); bool syn_set = FLAG_SET(tcp_header->th_flags, TH_SYN); bool ack_set = FLAG_SET(tcp_header->th_flags, TH_ACK); bool fin_set = FLAG_SET(tcp_header->th_flags, TH_FIN); /* calculate the total length of the TCP header including options */ u_int tcp_header_len = tcp_header->th_off * 4; /* Find the beginning of the tcp data. */ const u_char *tcp_data = ip_data + tcp_header_len; /* figure out how much tcp data we have, taking into account tcp options */ size_t tcp_datalen = (ip_payload_len > tcp_header_len) ? (ip_payload_len - tcp_header_len) : 0; /* see if we have state about this flow; if not, create it */ int32_t delta = 0; // from current position in tcp connection; must be SIGNED 32 bit! tcpip *tcp = find_tcpip(this_flow); DEBUG(60)("%s%s%s tcp_header_len=%d tcp_datalen=%d seq=%u tcp=%p", (syn_set?"SYN ":""),(ack_set?"ACK ":""),(fin_set?"FIN ":""),(int)tcp_header_len,(int)tcp_datalen,(int)seq,tcp); /* If this_flow is not in the database and the start_new_connections flag is false, just return */ if(tcp==0 && start_new_connections==false) return 0; if(syn_set && tcp && tcp->syn_count>0 && tcp->pos>0){ std::cerr << "SYN TO IGNORE! SYN tcp="<second->isn - 1; bool data_match = false; int fd = open(it->second->saved_filename.c_str(),O_RDONLY | O_BINARY); if(fd>0){ char *buf = (char *)malloc(tcp_datalen); if(buf){ DEBUG(100)("lseek(fd,%" PRId64 ",SEEK_SET)",(int64_t)(offset)); lseek(fd,offset,SEEK_SET); ssize_t r = read(fd,buf,tcp_datalen); data_match = (r==(ssize_t)tcp_datalen) && memcmp(buf,tcp_data,tcp_datalen)==0; free(buf); } close(fd); } DEBUG(60)("Packet matches saved flow. offset=%u len=%d filename=%s data match=%d\n", (u_int)offset,(u_int)tcp_datalen,it->second->saved_filename.c_str(),(u_int)data_match); if(data_match) return 0; } } } /* flow is in the database; make sure the gap isn't too big.*/ if(tcp){ /* Compute delta based on next expected sequence number. * If delta will be too much, start a new flow. * * NOTE: I hope we don't get a packet from the old flow when * we are processing the new one. Perhaps we should be able to have * multiple flows at the same time with the same quad, and they are * at different window areas... * */ delta = seq - tcp->nsn; // notice that signed offset is calculated if(abs(delta) > opt.max_seek){ remove_flow(this_flow); tcp = 0; } } /* At this point, tcp may be NULL because: * case 1 - It's a new connection and SYN IS SET; normal case * case 2 - Extra packets on a now-closed connection * case 3 - Packets for which the initial part of the connection was missed * case 4 - It's a connecton that had a huge gap and was expired out of the databsae * * THIS IS THE ONLY PLACE THAT create_tcpip() is called. */ /* q: what if syn is set AND there is data? */ /* q: what if syn is set AND we already know about this connection? */ if (tcp==NULL){ /* Don't process if this is not a SYN and there is no data. */ if(syn_set==false && tcp_datalen==0) return 0; /* Create a new connection. * delta will be 0, because it's a new connection! */ be13::tcp_seq isn = syn_set ? seq : seq-1; tcp = create_tcpip(this_flow, isn, pi); } /* Now tcp is valid */ tcp->myflow.tlast = pi.ts; // most recently seen packet tcp->last_packet_number = packet_counter++; tcp->myflow.packet_count++; /* * 2012-10-24 slg - the first byte is sent at SEQ==ISN+1. * The first byte in POSIX files have an LSEEK of 0. * The original code overcame this issue by introducing an intentional off-by-one * error with the statement tcp->isn++. * * With the new TCP state-machine we simply follow the spec. * * The new state machine works by examining the SYN and ACK packets * in accordance with the TCP spec. */ if(syn_set){ /* If the syn is set this is either a SYN or SYN-ACK. We use this information to set the direction * flag, but that's it. The direction flag is only used for coloring. */ if(tcp->syn_count>1){ DEBUG(2)("Multiple SYNs (%d) seen on connection %s",tcp->syn_count,tcp->flow_pathname.c_str()); } tcp->syn_count++; if( !ack_set ){ DEBUG(50) ("packet is handshake SYN"); /* First packet of three-way handshake */ tcp->dir = tcpip::dir_cs; // client->server } else { DEBUG(50) ("packet is handshake SYN/ACK"); /* second packet of three-way handshake */ tcp->dir = tcpip::dir_sc; // server->client } if(tcp_datalen>0){ tcp->violations++; DEBUG(1) ("TCP PROTOCOL VIOLATION: SYN with data! (length=%d)",(int)tcp_datalen); } } if(tcp_datalen==0) DEBUG(50) ("got TCP segment with no data"); // seems pointless to notify /* process any data. * Notice that this typically won't be called for the SYN or SYN/ACK, * since they both have no data by definition. */ if (tcp_datalen>0){ if (opt.console_output) { tcp->print_packet(tcp_data, tcp_datalen); } else { if (opt.store_output){ tcp->store_packet(tcp_data, tcp_datalen, delta); } } } /* Count the FINs. * If this is a fin, determine the size of the stream */ if (fin_set){ tcp->fin_count++; if(tcp->fin_count==1){ tcp->fin_size = (seq+tcp_datalen-tcp->isn)-1; } } /* If a fin was sent and we've seen all of the bytes, close the stream */ DEBUG(50)("%d>0 && %d == %d",tcp->fin_count,tcp->seen_bytes(),tcp->fin_size); if (tcp->fin_count>0 && tcp->seen_bytes() == tcp->fin_size){ DEBUG(50)("all bytes have been received; removing flow"); remove_flow(this_flow); // take it out of the map } DEBUG(50)("fin_set=%d seq=%u fin_count=%d seq_count=%d len=%d isn=%u", fin_set,seq,tcp->fin_count,tcp->syn_count,(int)tcp_datalen,tcp->isn); return 0; // successfully processed } #pragma GCC diagnostic warning "-Wcast-align" /* This is called when we receive an IPv4 datagram. We make sure that * it's valid and contains a TCP segment; if so, we pass it to * process_tcp() for further processing. * * Note: we currently don't know how to handle IP fragments. */ #pragma GCC diagnostic ignored "-Wcast-align" int tcpdemux::process_ip4(const be13::packet_info &pi) { /* make sure that the packet is at least as long as the min IP header */ if (pi.ip_datalen < sizeof(struct be13::ip4)) { DEBUG(6) ("received truncated IP datagram!"); return -1; // couldn't process } const struct be13::ip4 *ip_header = (struct be13::ip4 *) pi.ip_data; DEBUG(100)("process_ip4. caplen=%d vlan=%d ip_p=%d",(int)pi.pcap_hdr->caplen,(int)pi.vlan(),(int)ip_header->ip_p); if(debug>200){ sbuf_t sbuf(pos0_t(),(const uint8_t *)pi.ip_data,pi.ip_datalen,pi.ip_datalen,false); sbuf.hex_dump(std::cerr); } /* for now we're only looking for TCP; throw away everything else */ if (ip_header->ip_p != IPPROTO_TCP) { DEBUG(50) ("got non-TCP frame -- IP proto %d", ip_header->ip_p); return -1; // couldn't process } /* check and see if we got everything. NOTE: we must use * ip_total_len after this, because we may have captured bytes * beyond the end of the packet (e.g. ethernet padding). */ size_t ip_len = ntohs(ip_header->ip_len); if (pi.ip_datalen < ip_len) { DEBUG(6) ("warning: captured only %ld bytes of %ld-byte IP datagram", (long) pi.ip_datalen, (long) ip_len); } /* XXX - throw away everything but fragment 0; this version doesn't * know how to do fragment reassembly. */ if (ntohs(ip_header->ip_off) & 0x1fff) { DEBUG(2) ("warning: throwing away IP fragment from X to X"); return -1; } /* figure out where the IP header ends */ size_t ip_header_len = ip_header->ip_hl * 4; /* make sure there's some data */ if (ip_header_len > ip_len) { DEBUG(6) ("received truncated IP datagram!"); return -1; } /* do TCP processing, faking an ipv6 address */ uint16_t ip_payload_len = ip_len - ip_header_len; ipaddr src(ip_header->ip_src.addr); ipaddr dst(ip_header->ip_dst.addr); return process_tcp(src, dst, AF_INET, pi.ip_data + ip_header_len, ip_payload_len, pi); } #pragma GCC diagnostic warning "-Wcast-align" /* This is called when we receive an IPv6 datagram. * * Note: we don't support IPv6 extended headers */ /* These might be defined from an include file, so undef them to be sure */ int tcpdemux::process_ip6(const be13::packet_info &pi) { /* make sure that the packet is at least as long as the IPv6 header */ if (pi.ip_datalen < sizeof(struct be13::ip6_hdr)) { DEBUG(6) ("received truncated IPv6 datagram!"); return -1; } const struct be13::ip6_hdr *ip_header = (struct be13::ip6_hdr *) pi.ip_data; /* for now we're only looking for TCP; throw away everything else */ if (ip_header->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { DEBUG(50) ("got non-TCP frame -- IP proto %d", ip_header->ip6_ctlun.ip6_un1.ip6_un1_nxt); return -1; } /* do TCP processing */ uint16_t ip_payload_len = ntohs(ip_header->ip6_ctlun.ip6_un1.ip6_un1_plen); ipaddr src(ip_header->ip6_src.addr.addr8); ipaddr dst(ip_header->ip6_dst.addr.addr8); return process_tcp(src, dst ,AF_INET6, pi.ip_data + sizeof(struct be13::ip6_hdr),ip_payload_len,pi); } /* This is called when we receive an IPv4 or IPv6 datagram. * This function calls process_ip4 or process_ip6 * Returns 0 if packet is processed, 1 if it is not processed, -1 if error. */ #pragma GCC diagnostic ignored "-Wcast-align" int tcpdemux::process_pkt(const be13::packet_info &pi) { DEBUG(10)("process_pkt.............................................................................."); int r = 1; // not processed yet switch(pi.ip_version()){ case 4: r = process_ip4(pi); break; case 6: r = process_ip6(pi); break; } if(r!=0){ // packet not processed? /* Write the packet if we didn't process it */ if(pwriter) pwriter->writepkt(pi.pcap_hdr,pi.pcap_data); } /* Process the timeout, if there is any */ if(tcp_timeout){ /* Get a list of the flows that need to be closed. */ std::vector to_close; for(flow_map_t::iterator it = flow_map.begin(); it!=flow_map.end(); it++){ tcpip &tcp = *(it->second); uint32_t age = pi.ts.tv_sec - tcp.myflow.tlast.tv_sec; if (age > tcp_timeout){ to_close.push_back(&tcp.myflow); } } /* Close them. This removes the flows from the flow_map(), which is why we need * to create the list first. */ for(std::vector::iterator it = to_close.begin(); it!=to_close.end(); it++){ remove_flow(*(*it)); } } return r; } #pragma GCC diagnostic warning "-Wcast-align" tcpflow/src/datalink.cpp0000644000175000017500000001760012263701151014226 0ustar dimadima/** * * This file is part of tcpflow. Originally by Jeremy Elson * , rewritten by Simson Garfinkel. * * Initial Release: 7 April 1999. * * This source code is under the GNU Public License (GPL). See * COPYING for details. * * This file contains datalink handlers which are called by the pcap callback. * The purpose of each handler is to make a packet_info() object and then call * process_packet. The packet_info() object contains both the original * MAC-layer (with some of the fields broken out) and the packet data layer. * * For wifi datalink handlers, please see datalink_wifi.cpp */ #include "tcpflow.h" /* The DLT_NULL packet header is 4 bytes long. It contains a network * order 32 bit integer that specifies the family, e.g. AF_INET. * DLT_NULL is used by the localhost interface. */ #define NULL_HDRLEN 4 /* Some systems hasn't defined ETHERTYPE_IPV6 */ #ifndef ETHERTYPE_IPV6 # define ETHERTYPE_IPV6 0x86DD #endif int32_t datalink_tdelta = 0; #pragma GCC diagnostic ignored "-Wcast-align" void dl_null(u_char *user, const struct pcap_pkthdr *h, const u_char *p) { u_int caplen = h->caplen; u_int length = h->len; uint32_t family = *(uint32_t *)p; if (length != caplen) { DEBUG(6) ("warning: only captured %d bytes of %d byte null frame", caplen, length); } if (caplen < NULL_HDRLEN) { DEBUG(6) ("warning: received incomplete null frame"); return; } /* make sure this is AF_INET */ if (family != AF_INET && family != AF_INET6) { DEBUG(6)("warning: received null frame with unknown type (type 0x%x) (AF_INET=%x; AF_INET6=%x)", family,AF_INET,AF_INET6); return; } struct timeval tv; be13::packet_info pi(DLT_NULL,h,p,tvshift(tv,h->ts),p+NULL_HDRLEN,caplen - NULL_HDRLEN); be13::plugin::process_packet(pi); } #pragma GCC diagnostic warning "-Wcast-align" static uint64_t counter=0; /* DLT_RAW: just a raw IP packet, no encapsulation or link-layer * headers. Used for PPP connections under some OSs including Linux * and IRIX. */ void dl_raw(u_char *user, const struct pcap_pkthdr *h, const u_char *p) { if (h->caplen != h->len) { DEBUG(6) ("warning: only captured %d bytes of %d byte raw frame", h->caplen, h->len); } struct timeval tv; be13::packet_info pi(DLT_RAW,h,p,tvshift(tv,h->ts),p, h->caplen); counter++; be13::plugin::process_packet(pi); } /* Ethernet datalink handler; used by all 10 and 100 mbit/sec * ethernet. We are given the entire ethernet header so we check to * make sure it's marked as being IP. */ #pragma GCC diagnostic ignored "-Wcast-align" void dl_ethernet(u_char *user, const struct pcap_pkthdr *h, const u_char *p) { u_int caplen = h->caplen; u_int length = h->len; struct be13::ether_header *eth_header = (struct be13::ether_header *) p; /* Variables to support VLAN */ const u_short *ether_type = ð_header->ether_type; /* where the ether type is located */ const u_char *ether_data = p+sizeof(struct be13::ether_header); /* where the data is located */ if (length != caplen) { DEBUG(6) ("warning: only captured %d bytes of %d byte ether frame", caplen, length); } /* Handle basic VLAN packets */ if (ntohs(*ether_type) == ETHERTYPE_VLAN) { //vlan = ntohs(*(u_short *)(p+sizeof(struct ether_header))); ether_type += 2; /* skip past VLAN header (note it skips by 2s) */ ether_data += 4; /* skip past VLAN header */ caplen -= 4; } if (caplen < sizeof(struct be13::ether_header)) { DEBUG(6) ("warning: received incomplete ethernet frame"); return; } /* Create a packet_info structure with ip data and data length */ struct timeval tv; be13::packet_info pi(DLT_IEEE802,h,p,tvshift(tv,h->ts), ether_data, caplen - sizeof(struct be13::ether_header)); switch (ntohs(*ether_type)){ case ETHERTYPE_IP: case ETHERTYPE_IPV6: be13::plugin::process_packet(pi); break; #ifdef ETHERTYPE_ARP case ETHERTYPE_ARP: /* What should we do for ARP? */ break; #endif #ifdef ETHERTYPE_LOOPBACK case ETHERTYPE_LOOPBACK: /* What do do for loopback? */ break; #endif #ifdef ETHERTYPE_REVARP case ETHERTYPE_REVARP: /* What to do for REVARP? */ break; #endif default: /* Unknown Ethernet Frame Type */ DEBUG(6) ("warning: received ethernet frame with unknown type 0x%x", ntohs(eth_header->ether_type)); break; } } #pragma GCC diagnostic warning "-Wcast-align" /* The DLT_PPP packet header is 4 bytes long. We just move past it * without parsing it. It is used for PPP on some OSs (DLT_RAW is * used by others; see below) */ #define PPP_HDRLEN 4 void dl_ppp(u_char *user, const struct pcap_pkthdr *h, const u_char *p) { u_int caplen = h->caplen; u_int length = h->len; if (length != caplen) { DEBUG(6) ("warning: only captured %d bytes of %d byte PPP frame", caplen, length); } if (caplen < PPP_HDRLEN) { DEBUG(6) ("warning: received incomplete PPP frame"); return; } struct timeval tv; be13::packet_info pi(DLT_PPP,h,p,tvshift(tv,h->ts),p + PPP_HDRLEN, caplen - PPP_HDRLEN); be13::plugin::process_packet(pi); } #ifdef DLT_LINUX_SLL #define SLL_HDR_LEN 16 #define SLL_ADDRLEN 8 #ifndef ETHERTYPE_MPLS #define ETHERTYPE_MPLS 0x8847 #endif #ifndef ETHERTYPE_MPLS_MULTI #define ETHERTYPE_MPLS_MULTI 0x8848 #endif #pragma GCC diagnostic ignored "-Wcast-align" void dl_linux_sll(u_char *user, const struct pcap_pkthdr *h, const u_char *p) { u_int caplen = h->caplen; u_int length = h->len; if (length != caplen) { DEBUG(6) ("warning: only captured %d bytes of %d byte Linux cooked frame", caplen, length); } if (caplen < SLL_HDR_LEN) { DEBUG(6) ("warning: received incomplete Linux cooked frame"); return; } struct _sll_header { u_int16_t sll_pkttype; /* packet type */ u_int16_t sll_hatype; /* link-layer address type */ u_int16_t sll_halen; /* link-layer address length */ u_int8_t sll_addr[SLL_ADDRLEN]; /* link-layer address */ u_int16_t sll_protocol; /* protocol */ }; _sll_header *sllp = (_sll_header*)p; u_int mpls_sz = 0; if (ntohs(sllp->sll_protocol) == ETHERTYPE_MPLS) { // unwind MPLS stack do { if(caplen < SLL_HDR_LEN + mpls_sz + 4){ DEBUG(6) ("warning: MPLS stack overrun"); return; } mpls_sz += 4; caplen -= 4; } while ((p[SLL_HDR_LEN + mpls_sz - 2] & 1) == 0 ); } struct timeval tv; be13::packet_info pi(DLT_LINUX_SLL,h,p,tvshift(tv,h->ts),p + SLL_HDR_LEN + mpls_sz, caplen - SLL_HDR_LEN); be13::plugin::process_packet(pi); } #endif #ifndef DLT_IEEE802_11_RADIO #define DLT_IEEE802_11_RADIO 127 /* 802.11 plus radiotap radio header */ #endif /* List of callbacks for each data link type */ dlt_handler_t handlers[] = { { dl_null, DLT_NULL }, /* Some systems define DLT_RAW as 12, some as 14, and some as 101. * So it is hard-coded here. */ { dl_raw, 12 }, { dl_raw, 14 }, { dl_raw, 101 }, { dl_ethernet, DLT_EN10MB }, { dl_ethernet, DLT_IEEE802 }, { dl_ppp, DLT_PPP }, #ifdef DLT_LINUX_SLL { dl_linux_sll, DLT_LINUX_SLL }, #endif #ifndef WIN32 { dl_ieee802_11_radio, DLT_IEEE802_11 }, { dl_ieee802_11_radio, DLT_IEEE802_11_RADIO }, { dl_prism, DLT_PRISM_HEADER}, #endif { NULL, 0 } }; pcap_handler find_handler(int datalink_type, const char *device) { int i; DEBUG(3) ("looking for handler for datalink type %d for interface %s", datalink_type, device); for (i = 0; handlers[i].handler != NULL; i++){ if (handlers[i].type == datalink_type){ return handlers[i].handler; } } die("sorry - unknown datalink type %d on interface %s", datalink_type, device); return NULL; /* NOTREACHED */ } tcpflow/src/tcpip.cpp0000644000175000017500000003533012263701151013556 0ustar dimadima/* * This file is part of tcpflow by Simson Garfinkel, * originally by Jeremy Elson * * This source code is under the GNU Public License (GPL). See * LICENSE for details. * */ #include "tcpflow.h" #include "tcpip.h" #include "tcpdemux.h" #include #include #pragma GCC diagnostic ignored "-Weffc++" #pragma GCC diagnostic ignored "-Wshadow" /* Create a new tcp object. * * Creating a new object creates a new passive TCP/IP decoder. * It will *NOT* append to a flow that is already on the disk or in memory. * * called from tcpdemux::create_tcpip() */ tcpip::tcpip(tcpdemux &demux_,const flow &flow_,be13::tcp_seq isn_): demux(demux_),myflow(flow_),dir(unknown),isn(isn_),nsn(0), syn_count(0),fin_count(0),fin_size(0),pos(0), flow_pathname(),fd(-1),file_created(false), seen(new recon_set()), last_byte(), last_packet_number(),out_of_order_count(0),violations(0) { } uint32_t tcpip::seen_bytes() { if(seen) return seen->size(); return 0; } void tcpip::dump_seen() { if(seen){ for(recon_set::const_iterator it = seen->begin(); it!=seen->end(); it++){ std::cerr << *it << ", "; } std::cerr << std::endl; } } void tcpip::dump_xml(class dfxml_writer *xreport,const std::string &xmladd) { static const std::string fileobject_str("fileobject"); static const std::string filesize_str("filesize"); static const std::string filename_str("filename"); static const std::string tcpflow_str("tcpflow"); xreport->push(fileobject_str); if(flow_pathname.size()) xreport->xmlout(filename_str,flow_pathname); xreport->xmlout(filesize_str,last_byte); std::stringstream attrs; attrs << "startime='" << dfxml_writer::to8601(myflow.tstart) << "' "; attrs << "endtime='" << dfxml_writer::to8601(myflow.tlast) << "' "; attrs << "src_ipn='" << myflow.src << "' "; attrs << "dst_ipn='" << myflow.dst << "' "; if(myflow.has_mac_daddr()) attrs << "mac_daddr='" << macaddr(myflow.mac_daddr) << "' "; if(myflow.has_mac_saddr()) attrs << "mac_saddr='" << macaddr(myflow.mac_saddr) << "' "; attrs << "packets='" << myflow.packet_count << "' "; attrs << "srcport='" << myflow.sport << "' "; attrs << "dstport='" << myflow.dport << "' "; attrs << "family='" << (int)myflow.family << "' "; if(out_of_order_count) attrs << "out_of_order_count='" << out_of_order_count << "' "; if(violations) attrs << "violations='" << violations << "' "; xreport->xmlout(tcpflow_str,"",attrs.str(),false); if(xmladd.size()>0) xreport->xmlout("",xmladd,"",false); xreport->pop(); xreport->flush(); } /** * Destructor is called when flow is closed. * It implements "after" processing. * This should only be called from remove_flow() or remove_all_flows() * when a flow is deleted. */ tcpip::~tcpip() { assert(fd<0); // file must be closed if(seen) delete seen; } #pragma GCC diagnostic warning "-Weffc++" #pragma GCC diagnostic warning "-Wshadow" /**************************************************************** ** SAVE FILE MANAGEMENT **************************************************************** * * Unlike the tcp/ip object, which is created once, the file can be opened, closed, and * re-opened depending on the availability of file handles. * * Closing the file does not delete the tcp/ip object. */ /* Closes the file belonging to a flow. * Does not take tcpip out of flow database. * Does not change pos. */ void tcpip::close_file() { if (fd>=0){ struct timeval times[2]; times[0] = myflow.tstart; times[1] = myflow.tstart; DEBUG(5) ("%s: closing file in tcpip::close_file", flow_pathname.c_str()); /* close the file and remember that it's closed */ #if defined(HAVE_FUTIMES) if(futimes(fd,times)){ fprintf(stderr,"%s: futimes(fd=%d)\n",strerror(errno),fd); abort(); } #elif defined(HAVE_FUTIMENS) struct timespec tstimes[2]; for(int i=0;i<2;i++){ tstimes[i].tv_sec = times[i].tv_sec; tstimes[i].tv_nsec = times[i].tv_usec * 1000; } if(futimens(fd,tstimes)){ perror("futimens(fd=%d)",fd); } #endif close(fd); fd = -1; } demux.open_flows.erase(this); // we are no longer open } /* * Opens the file transcript file (creating file if necessary). * Called by store_packet() * Does not change pos. */ int tcpip::open_file() { if(fd<0){ //std::cerr << "open_file0 " << ct << " " << *this << "\n"; /* If we don't have a filename, create the flow */ if(flow_pathname.size()==0) { flow_pathname = myflow.new_filename(&fd,O_RDWR|O_BINARY|O_CREAT,0666); file_created = true; // remember we made it DEBUG(5) ("%s: created new file",flow_pathname.c_str()); } else { /* open an existing flow */ fd = demux.retrying_open(flow_pathname,O_RDWR | O_BINARY | O_CREAT,0666); lseek(fd,pos,SEEK_SET); DEBUG(5) ("%s: opening existing file", flow_pathname.c_str()); } /* If the file isn't open at this point, there's a problem */ if (fd < 0 ) { /* we had some problem opening the file -- set FINISHED so we * don't keep trying over and over again to reopen it */ perror(flow_pathname.c_str()); return -1; } /* Remember that we have this open */ demux.open_flows.insert(this); if(demux.open_flows.size() > demux.max_open_flows) demux.max_open_flows = demux.open_flows.size(); //std::cerr << "open_file1 " << *this << "\n"; } return 0; } /*************************************************************************/ /* print the contents of this packet to the console. * This is nice for immediate satisfaction, but it can't handle * out of order packets, etc. */ void tcpip::print_packet(const u_char *data, uint32_t length) { /* green, blue, read */ const char *color[3] = { "\033[0;32m", "\033[0;34m", "\033[0;31m" }; if(demux.opt.max_bytes_per_flow>0){ if(last_byte > demux.opt.max_bytes_per_flow) return; /* too much has been printed */ if(length > demux.opt.max_bytes_per_flow - last_byte){ length = demux.opt.max_bytes_per_flow - last_byte; /* can only output this much */ if(length==0) return; } } #ifdef HAVE_PTHREAD if(semlock){ if(sem_wait(semlock)){ fprintf(stderr,"%s: attempt to acquire semaphore failed: %s\n",progname,strerror(errno)); exit(1); } } #endif if (demux.opt.use_color) fputs(dir==dir_cs ? color[1] : color[2], stdout); if (demux.opt.suppress_header == 0){ if(flow_pathname.size()==0) flow_pathname = myflow.filename(0); printf("%s: ", flow_pathname.c_str()); if(demux.opt.output_hex) putchar('\n'); } size_t written = 0; if(demux.opt.output_hex){ const size_t bytes_per_line = 32; size_t max_spaces = 0; for(u_int i=0;imax_spaces) max_spaces=spaces; for(;spaces=' ' && ch<='~') fputc(ch,stdout); else fputc('.',stdout); } fputc('\n',stdout); } written = length; // just fake it. } else if(demux.opt.output_strip_nonprint){ for(const u_char *cc = data;cc::closed(pos,pos+length-1); } } /* store the contents of this packet to its place in its file * This has to handle out-of-order packets as well as writes * past the 4GiB boundary. * * 2012-10-24 Originally this code simply computed the 32-bit offset * from the beginning of the file using the isn. The new version tracks * nsn (the expected next sequence number for the open file). * * A relative seek before the beginning of the file means that we need * to insert. A relative seek more than max_seek means that we have a * different flow that needs to be separately handled. * * called from tcpdemux::process_tcp_packet() */ void tcpip::store_packet(const u_char *data, uint32_t length, int32_t delta) { if(length==0) return; // no need to do anything uint32_t insert_bytes=0; uint64_t offset = pos+delta; // where the data will go in absolute byte positions (first byte is pos=0) if((int64_t)offset < 0){ /* We got bytes before the beginning of the TCP connection. * Either this is a protocol violation, * or else we never saw a SYN and we got the ISN wrong. */ if(syn_count>0){ DEBUG(2)("packet received with offset %" PRId64 "; ignoring",offset); violations++; return; } insert_bytes = -offset; // open up this much space offset = 0; // and write the data here } /* reduce length to write if it goes beyond the number of bytes per flow, * but remember to seek out to the actual position after the truncated write... */ uint32_t wlength = length; // length to write if (demux.opt.max_bytes_per_flow){ if(offset >= demux.opt.max_bytes_per_flow){ wlength = 0; } if(offset < demux.opt.max_bytes_per_flow && offset+length > demux.opt.max_bytes_per_flow){ DEBUG(2) ("packet truncated by max_bytes_per_flow on %s", flow_pathname.c_str()); wlength = demux.opt.max_bytes_per_flow - offset; } } /* if we don't have a file open for this flow, try to open it. * return if the open fails. Note that we don't have to explicitly * save the return value because open_tcpfile() puts the file pointer * into the structure for us. */ if (fd < 0 && wlength>0) { if (open_file()) { DEBUG(1)("unable to open TCP file %s fd=%d wlength=%d", flow_pathname.c_str(),fd,(int)wlength); return; } } /* Shift the file now if we were going shift it */ if(insert_bytes>0){ if(fd>=0) shift_file(fd,insert_bytes); isn -= insert_bytes; // it's really earlier lseek(fd,(off_t)0,SEEK_SET); // put at the beginning pos = 0; nsn = isn+1; out_of_order_count++; DEBUG(25)("%s: insert(0,%d); lseek(%d,0,SEEK_SET) out_of_order_count=%" PRId64, flow_pathname.c_str(), insert_bytes, fd,out_of_order_count); /* TK: If we have seen packets, everything in the recon set needs to be shifted as well.*/ if(seen){ delete seen; seen = 0; } } /* if we're not at the correct point in the file, seek there */ if (offset != pos) { if(fd>=0) lseek(fd,(off_t)delta,SEEK_CUR); if(delta<0) out_of_order_count++; // only increment for backwards seeks DEBUG(25)("%s: lseek(%d,%d,SEEK_CUR) offset=%" PRId64 " pos=%" PRId64 " out_of_order_count=%" PRId64, flow_pathname.c_str(), fd,(int)delta,offset,pos,out_of_order_count); pos += delta; // where we are now nsn += delta; // what we expect the nsn to be now } /* write the data into the file */ DEBUG(25) ("%s: %s write %ld bytes @%" PRId64, flow_pathname.c_str(), fd>=0 ? "will" : "won't", (long) wlength, offset); if(fd>=0){ if ((uint32_t)write(fd,data, wlength) != wlength) { DEBUG(1) ("write to %s failed: ", flow_pathname.c_str()); if (debug >= 1) perror(""); } if(wlength != length){ off_t p = lseek(fd,length-wlength,SEEK_CUR); // seek out the space we didn't write DEBUG(100)(" lseek(%" PRId64 ",SEEK_CUR)=%" PRId64,(int64_t)(length-wlength),(int64_t)p); } } /* Update the database of bytes that we've seen */ if(seen) update_seen(seen,pos,length); /* Update the position in the file and the next expected sequence number */ pos += length; nsn += length; // expected next sequence number if(pos>last_byte) last_byte = pos; if(debug>=100){ uint64_t rpos = lseek(fd,(off_t)0,SEEK_CUR); DEBUG(100)(" pos=%" PRId64 " lseek(fd,0,SEEK_CUR)=%" PRId64,pos,rpos); assert(pos==rpos); } #ifdef DEBUG_REOPEN_LOGIC /* For debugging, force this connection closed */ demux.close_tcpip_fd(this); #endif } #pragma GCC diagnostic ignored "-Weffc++" #pragma GCC diagnostic ignored "-Wshadow" /* Note --- Turn off warning so that creating the seen() map doesn't throw an error */ //#pragma GCC diagnostic ignored "-Weffc++" tcpflow/src/hold/0000755000175000017500000000000012263701151012655 5ustar dimadimatcpflow/src/hold/tcp_options.cpp0000644000175000017500000003477412263701151015741 0ustar dimadima/* * tcp option parser. * Originally by Doug Madory */ /* These tcp optinos do not have the size octet */ #define ZEROLENOPT(o) ((o) == TCPOPT_EOL || (o) == TCPOPT_NOP) void parse_tcp_opts(std::list& opts, const u_char *cp, u_int hlen) { if (hlen == 0) return; register u_int i, opt, datalen; register u_int len; //putchar(' '); //ch = '<'; while (hlen > 0) { tcp_opt_t tcpopt; //putchar(ch); //TCHECK(*cp); opt = *cp++; if (ZEROLENOPT(opt)) len = 1; else { //TCHECK(*cp); len = *cp++; /* total including type, len */ if (len < 2 || len > hlen) // stop processing on bad opt break; --hlen; /* account for length byte */ } --hlen; /* account for type byte */ datalen = 0; /* Bail if "l" bytes of data are not left or were not captured */ #define LENCHECK(l) { if ((l) > hlen) break; } tcpopt.type = opt; tcpopt.data_raw = cp; switch (opt) { case TCPOPT_MAXSEG: //(void)printf("mss"); datalen = 2; LENCHECK(datalen); //(void)printf(" %u", EXTRACT_16BITS(cp)); tcpopt.data.mss = EXTRACT_16BITS(cp); break; case TCPOPT_EOL: //(void)printf("eol"); break; case TCPOPT_NOP: //(void)printf("nop"); break; case TCPOPT_WSCALE: //(void)printf("wscale"); datalen = 1; LENCHECK(datalen); //(void)printf(" %u", *cp); tcpopt.data.wscale = *cp; break; case TCPOPT_SACKOK: //(void)printf("sackOK"); break; case TCPOPT_SACK: datalen = len - 2; if (datalen % 8 != 0) { //(void)printf("malformed sack"); } else { u_int32_t s, e; //(void)printf("sack %d ", datalen / 8); for (i = 0; i < datalen; i += 8) { LENCHECK(i + 4); s = EXTRACT_32BITS(cp + i); LENCHECK(i + 8); e = EXTRACT_32BITS(cp + i + 4); /* XXX leave application to do this translation? if (threv) { s -= thseq; e -= thseq; } else { s -= thack; e -= thack; } (void)printf("{%u:%u}", s, e); */ tcpopt.data_sack.push_back(std::pair(s,e)); } } break; case TCPOPT_ECHO: //(void)printf("echo"); datalen = 4; LENCHECK(datalen); //(void)printf(" %u", EXTRACT_32BITS(cp)); tcpopt.data.echo = EXTRACT_32BITS(cp); break; case TCPOPT_ECHOREPLY: //(void)printf("echoreply"); datalen = 4; LENCHECK(datalen); //(void)printf(" %u", EXTRACT_32BITS(cp)); tcpopt.data.echoreply = EXTRACT_32BITS(cp); break; case TCPOPT_TIMESTAMP: //(void)printf("timestamp"); datalen = 8; //LENCHECK(4); //(void)printf(" %u", EXTRACT_32BITS(cp)); LENCHECK(datalen); //(void)printf(" %u", EXTRACT_32BITS(cp + 4)); tcpopt.data.timestamp.tsval = EXTRACT_32BITS(cp); tcpopt.data.timestamp.tsecr = EXTRACT_32BITS(cp + 4); break; case TCPOPT_CC: //(void)printf("cc"); datalen = 4; LENCHECK(datalen); //(void)printf(" %u", EXTRACT_32BITS(cp)); tcpopt.data.cc = EXTRACT_32BITS(cp); break; case TCPOPT_CCNEW: //(void)printf("ccnew"); datalen = 4; LENCHECK(datalen); //(void)printf(" %u", EXTRACT_32BITS(cp)); tcpopt.data.ccnew = EXTRACT_32BITS(cp); break; case TCPOPT_CCECHO: //(void)printf("ccecho"); datalen = 4; LENCHECK(datalen); //(void)printf(" %u", EXTRACT_32BITS(cp)); tcpopt.data.ccecho = EXTRACT_32BITS(cp); break; case TCPOPT_SIGNATURE: //(void)printf("md5:"); datalen = TCP_SIGLEN; LENCHECK(datalen); for (i = 0; i < TCP_SIGLEN; ++i) //(void)printf("%02x", cp[i]); tcpopt.data.signature[i] = cp[i]; break; default: //(void)printf("opt-%u:", opt); datalen = len - 2; /* for (i = 0; i < datalen; ++i) { LENCHECK(i); (void)printf("%02x", cp[i]); } */ break; } /* Account for data printed */ cp += datalen; hlen -= datalen; /* Check specification against observed length */ //++datalen; /* option octet */ //if (!ZEROLENOPT(opt)) // ++datalen; /* size octet */ //if (datalen != len) // (void)printf("[len %d]", len); //ch = ','; tcpopt.len = datalen; opts.push_back(tcpopt); if (opt == TCPOPT_EOL) break; } //putchar('>'); } void handle_tcp(const struct timeval& t, WifipcapCallbacks *cbs, const u_char *bp, u_int length, struct ip4_hdr_t *ip4h, struct ip6_hdr_t *ip6h, int fragmented) { struct tcphdr *tp; tp = (struct tcphdr *)bp; int hlen; // truncated header if (length < sizeof(*tp)) { cbs->HandleTCP(t, ip4h, ip6h, NULL, NULL, 0, bp, length); return; } hlen = TH_OFF(tp) * 4; // bad header length || missing tcp options if (hlen < (int)sizeof(*tp) || length < (int)sizeof(*tp) || hlen > (int)length) { cbs->HandleTCP(t, ip4h, ip6h, NULL, NULL, 0, bp, length); return; } tcp_hdr_t hdr; hdr.sport = EXTRACT_16BITS(&tp->th_sport); hdr.dport = EXTRACT_16BITS(&tp->th_dport); hdr.seq = EXTRACT_32BITS(&tp->th_seq); hdr.ack = EXTRACT_32BITS(&tp->th_ack); hdr.dataoff = TH_OFF(tp) * 4; hdr.flags = tp->th_flags; hdr.win = EXTRACT_16BITS(&tp->th_win); hdr.cksum = EXTRACT_16BITS(&tp->th_sum); hdr.urgptr = EXTRACT_16BITS(&tp->th_urp); //parse_tcp_opts(hdr.opts, bp+sizeof(*tp), hlen-sizeof(*tp)); cbs->HandleTCP(t, ip4h, ip6h, &hdr, hlen==sizeof(*tp)?NULL:bp+sizeof(*tp), hlen-sizeof(*tp), bp+hlen, length-hlen); } void handle_udp(const struct timeval& t, WifipcapCallbacks *cbs, const u_char *bp, u_int length, struct ip4_hdr_t *ip4h, struct ip6_hdr_t *ip6h, int fragmented) { struct udphdr *uh; uh = (struct udphdr *)bp; if (length < sizeof(struct udphdr)) { // truncated udp header cbs->HandleUDP(t, ip4h, ip6h, NULL, bp, length); return; } udp_hdr_t hdr; hdr.sport = EXTRACT_16BITS(&uh->uh_sport); hdr.dport = EXTRACT_16BITS(&uh->uh_dport); hdr.len = EXTRACT_16BITS(&uh->uh_ulen); hdr.cksum = EXTRACT_16BITS(&uh->uh_sum); cbs->HandleUDP(t, ip4h, ip6h, &hdr, bp+sizeof(struct udphdr), length-sizeof(struct udphdr)); } void handle_icmp(const struct timeval& t, WifipcapCallbacks *cbs, const u_char *bp, u_int length, struct ip4_hdr_t *ip4h, struct ip6_hdr_t *ip6h, int fragmented) { struct icmp *dp; dp = (struct icmp *)bp; if (length < 4) { // truncated icmp header cbs->HandleICMP(t, ip4h, ip6h, -1, -1, bp, length); return; } cbs->HandleICMP(t, ip4h, ip6h, dp->icmp_type, dp->icmp_code, bp+4, length-4); } /////////////////////////////////////////////////////////////////////////////// void handle_ip(const struct timeval& t, WifipcapCallbacks *cbs, const u_char *ptr, int len); void handle_ip6(const struct timeval& t, WifipcapCallbacks *cbs, const u_char *ptr, int len); struct ip_print_demux_state { struct ip *ip; const u_char *cp; u_int len, off; u_char nh; int advance; }; void ip_demux(const struct timeval& t, WifipcapCallbacks *cbs, ip4_hdr_t *hdr, struct ip_print_demux_state *ipds, int len) { //struct protoent *proto; //again: switch (ipds->nh) { case IPPROTO_TCP: /* pass on the MF bit plus the offset to detect fragments */ handle_tcp(t, cbs, ipds->cp, ipds->len, hdr, NULL, ipds->off & (IP_MF|IP_OFFMASK)); break; case IPPROTO_UDP: /* pass on the MF bit plus the offset to detect fragments */ handle_udp(t, cbs, ipds->cp, ipds->len, hdr, NULL, ipds->off & (IP_MF|IP_OFFMASK)); break; case IPPROTO_ICMP: /* pass on the MF bit plus the offset to detect fragments */ handle_icmp(t, cbs, ipds->cp, ipds->len, hdr, NULL, ipds->off & (IP_MF|IP_OFFMASK)); break; case IPPROTO_IPV4: /* DVMRP multicast tunnel (ip-in-ip encapsulation) */ //handle_ip(t, cbs, ipds->cp, ipds->len); //break; case IPPROTO_IPV6: /* ip6-in-ip encapsulation */ //handle_ip6(t, cbs, ipds->cp, ipds->len); //break; ///// Jeff: XXX Some day handle these maybe (see tcpdump code) case IPPROTO_AH: /* ipds->nh = *ipds->cp; ipds->advance = ah_print(ipds->cp); if (ipds->advance <= 0) break; ipds->cp += ipds->advance; ipds->len -= ipds->advance; goto again; */ case IPPROTO_ESP: { /* int enh, padlen; ipds->advance = esp_print(ndo, ipds->cp, ipds->len, (const u_char *)ipds->ip, &enh, &padlen); if (ipds->advance <= 0) break; ipds->cp += ipds->advance; ipds->len -= ipds->advance + padlen; ipds->nh = enh & 0xff; goto again; */ } case IPPROTO_IPCOMP: { /* int enh; ipds->advance = ipcomp_print(ipds->cp, &enh); if (ipds->advance <= 0) break; ipds->cp += ipds->advance; ipds->len -= ipds->advance; ipds->nh = enh & 0xff; goto again; */ } case IPPROTO_SCTP: /* sctp_print(ipds->cp, (const u_char *)ipds->ip, ipds->len); break; */ case IPPROTO_DCCP: /* dccp_print(ipds->cp, (const u_char *)ipds->ip, ipds->len); break; */ case IPPROTO_PIGP: /* * XXX - the current IANA protocol number assignments * page lists 9 as "any private interior gateway * (used by Cisco for their IGRP)" and 88 as * "EIGRP" from Cisco. * * Recent BSD headers define * IP_PROTO_PIGP as 9 and IP_PROTO_IGRP as 88. * We define IP_PROTO_PIGP as 9 and * IP_PROTO_EIGRP as 88; those names better * match was the current protocol number * assignments say. */ /* igrp_print(ipds->cp, ipds->len, (const u_char *)ipds->ip); break; */ case IPPROTO_EIGRP: /* eigrp_print(ipds->cp, ipds->len); break; */ case IPPROTO_ND: /* ND_PRINT((ndo, " nd %d", ipds->len)); break; */ case IPPROTO_EGP: /* egp_print(ipds->cp, ipds->len); break; */ case IPPROTO_OSPF: /* ospf_print(ipds->cp, ipds->len, (const u_char *)ipds->ip); break; */ case IPPROTO_IGMP: /* igmp_print(ipds->cp, ipds->len); break; */ case IPPROTO_RSVP: /* rsvp_print(ipds->cp, ipds->len); break; */ case IPPROTO_GRE: /* do it */ /* gre_print(ipds->cp, ipds->len); break; */ case IPPROTO_MOBILE: /* mobile_print(ipds->cp, ipds->len); break; */ case IPPROTO_PIM: /* pim_print(ipds->cp, ipds->len); break; */ case IPPROTO_VRRP: /* vrrp_print(ipds->cp, ipds->len, ipds->ip->ip_ttl); break; */ case IPPROTO_PGM: /* pgm_print(ipds->cp, ipds->len, (const u_char *)ipds->ip); break; */ default: /* if ((proto = getprotobynumber(ipds->nh)) != NULL) ND_PRINT((ndo, " %s", proto->p_name)); else ND_PRINT((ndo, " ip-proto-%d", ipds->nh)); ND_PRINT((ndo, " %d", ipds->len)); */ cbs->HandleL3Unknown(t, hdr, NULL, ipds->cp, ipds->len); break; } } void handle_ip(const struct timeval& t, WifipcapCallbacks *cbs, const u_char *ptr, int len) { struct ip_print_demux_state ipd; struct ip_print_demux_state *ipds=&ipd; u_int hlen; // truncated (in fact, nothing!) if (len == 0) { cbs->HandleIP(t, NULL, NULL, 0, ptr, len); return; } ipds->ip = (struct ip *)ptr; if (IP_V(ipds->ip) != 4) { if (IP_V(ipds->ip) == 6) { // wrong link-layer encap! handle_ip6(t, cbs, ptr, len); return; } } if (len < sizeof (struct ip)) { // truncated! cbs->HandleIP(t, NULL, NULL, 0, ptr, len); return; } hlen = IP_HL(ipds->ip) * 4; ipds->len = EXTRACT_16BITS(&ipds->ip->ip_len); if (len < (int)ipds->len) { // truncated IP // this is ok, we'll just report the truncation later } if (ipds->len < hlen) { // missing some ip options! cbs->HandleIP(t, NULL, NULL, 0, ptr, len); } ipds->len -= hlen; ipds->off = EXTRACT_16BITS(&ipds->ip->ip_off); struct ip4_hdr_t hdr; hdr.ver = IP_V(ipds->ip); hdr.hlen = IP_HL(ipds->ip) * 4; hdr.tos = ipds->ip->ip_tos; hdr.len = EXTRACT_16BITS(&ipds->ip->ip_len); hdr.id = EXTRACT_16BITS(&ipds->ip->ip_id); hdr.df = (bool)((ipds->off & IP_DF) != 0); hdr.mf = (bool)((ipds->off & IP_MF) != 0); hdr.fragoff = (ipds->off & IP_OFFMASK); hdr.ttl = ipds->ip->ip_ttl; hdr.proto = ipds->ip->ip_p; hdr.cksum = EXTRACT_16BITS(&ipds->ip->ip_sum); hdr.src = ipds->ip->ip_src; hdr.dst = ipds->ip->ip_dst; cbs->HandleIP(t, &hdr, hlen==sizeof(struct ip)?NULL:ptr+sizeof(struct ip), hlen-sizeof(struct ip), ptr+hlen, len-hlen); /* * If this is fragment zero, hand it to the next higher * level protocol. */ if ((ipds->off & 0x1fff) == 0) { ipds->cp = (const u_char *)ipds->ip + hlen; ipds->nh = ipds->ip->ip_p; ip_demux(t, cbs, &hdr, ipds, len); } else { // This is a fragment of a previous packet. can't demux it return; } } void handle_ip6(const struct timeval& t, WifipcapCallbacks *cbs, const u_char *ptr, int len) { const struct ip6_hdr *ip6; if (len < sizeof (struct ip6_hdr)) { cbs->HandleIP6(t, NULL, ptr, len); return; } ip6 = (const struct ip6_hdr *)ptr; ip6_hdr_t hdr; memcpy(&hdr, ip6, sizeof(&hdr)); hdr.ip6_plen = EXTRACT_16BITS(&ip6->ip6_plen); hdr.ip6_flow = EXTRACT_32BITS(&ip6->ip6_flow); cbs->HandleIP6(t, &hdr, ptr+sizeof(hdr), len-sizeof(hdr)); int nh = ip6->ip6_nxt; switch(nh) { case IPPROTO_TCP: handle_tcp(t, cbs, ptr+sizeof(ip6_hdr), len-sizeof(ip6_hdr), NULL, &hdr, 0); break; case IPPROTO_UDP: handle_udp(t, cbs, ptr+sizeof(ip6_hdr), len-sizeof(ip6_hdr), NULL, &hdr, 0); break; default: cbs->HandleL3Unknown(t, NULL, &hdr, ptr+sizeof(ip6_hdr), len-sizeof(ip6_hdr)); break; } } void handle_arp(const struct timeval& t, WifipcapCallbacks *cbs, const u_char *ptr, int len) { struct arp_pkthdr *ap; //u_short pro, hrd, op; if (len < sizeof(struct arp_pkthdr)) { cbs->HandleARP(t, NULL, ptr, len); return; } ap = (struct arp_pkthdr *)ptr; cbs->HandleARP(t, ap, ptr+ARP_HDRLEN, len-ARP_HDRLEN); } /////////////////////////////////////////////////////////////////////////////// void handle_ether(const struct timeval& t, WifipcapCallbacks *cbs, const u_char *ptr, int len) { ether_hdr_t hdr; hdr.da = ether2MAC(ptr); hdr.sa = ether2MAC(ptr+6); hdr.type = EXTRACT_16BITS(ptr + 12); ptr += 14; len -= 14; cbs->HandleEthernet(t, &hdr, ptr, len); switch (hdr.type) { case ETHERTYPE_IP: handle_ip(t, cbs, ptr, len); return; case ETHERTYPE_IPV6: handle_ip6(t, cbs, ptr, len); return; case ETHERTYPE_ARP: handle_arp(t, cbs, ptr, len); return; default: cbs->HandleL2Unknown(t, hdr.type, ptr, len); return; } } tcpflow/src/tcpflow.h0000644000175000017500000001703012263701151013557 0ustar dimadima/* * This file is part of tcpflow by Simson Garfinkel, * originally by Jeremy Elson * * This source code is under the GNU Public License (GPL). See * LICENSE for details. * * * */ #ifndef TCPFLOW_H #define TCPFLOW_H #include "config.h" /* Older versions of autoconf define PACKAGE and VERSION. * Newer versions define PACKAGE_VERSION and PACKAGE_NAME. * We now use the new variables; allow the old ones. */ #ifndef PACKAGE_VERSION #define PACKAGE_VERSION VERSION #endif #ifndef PACKAGE_NAME #define PACKAGE_NAME PACAKGE #endif /**************************************************************** *** Windows/mingw compatability seciton. *** *** If we are compiling for Windows, including the Windows-specific *** include files first and disable pthread support. ***/ #ifdef WIN32 # undef HAVE_PTHREAD_H # undef HAVE_SEMAPHORE_H # undef HAVE_PTHREAD # undef HAVE_INET_NTOP /* it's not there. Really. */ # undef HAVE_EXTERN_PROGNAME // don't work properly on mingw # define MKDIR(a,b) mkdir(a) // MKDIR only takes 1 argument on windows /* Defines not present in Microsoft Windows stack */ #else /*** Unix-specific elements for windows compatibility section ***/ # define MKDIR(a,b) mkdir(a,b) // MKDIR takes 2 arguments on Posix #endif /*** *** end of windows compatibility section ****************************************************************/ /* If we are including inttypes.h, mmake sure __STDC_FORMAT_MACROS is defined */ #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS #endif /* We want the BSD flavor of defines if possible */ #ifndef __FAVOR_BSD #define __FAVOR_BSD #endif #ifndef __USE_BSD #define __USE_BSD #endif #include /* required per C++ standard - use the C++ versions*/ #include #include #include #include #include #include #include #include #ifndef O_BINARY #define O_BINARY 0 #endif // These are the required include files; they better be present #include #include #ifdef HAVE_SYS_CDEFS_H # include #endif #ifdef HAVE_STRING_H # include #endif #ifdef HAVE_STRINGS_H # include #endif #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_SYS_BITYPES_H # include #endif #ifdef TIME_WITH_SYS_TIME # include # include #else # if HAVE_SYS_TIME_H # include # else # include # endif #endif #ifdef HAVE_SYS_SOCKET_H # include #endif #ifdef HAVE_NE_IF_VAR_H #include #endif #ifdef HAVE_NET_IF_H # include #endif /* We have given up on keeping track of this all and are just including our own definitions. */ //#ifdef HAVE_NETINET_IN_SYSTM_H //# include //#endif //#ifdef HAVE_NETINET_IP6_H //#include //#endif //#ifdef HAVE_NETINET_IP_VAR_H //# include // FREEBSD //#endif //#ifdef HAVE_NETINET_IF_ETHER_H //# include //#endif //#ifdef HAVE_NETINET_TCP_H //# include //#endif //#ifdef HAVE_NETINET_TCPIP_H //# include // FREEBSD //#endif //#ifdef HAVE_ARPA_INET_H //# include //#endif ///* // * Oracle Enterprise Linux is missing the definition for // * ETHERTYPE_VLAN // */ //#ifndef ETHERTYPE_VLAN //# define ETHERTYPE_VLAN 0x8100 //#endif #ifdef HAVE_SIGNAL_H # include #endif /****************** Ugly System Dependencies ******************************/ /* We always want to refer to RLIMIT_NOFILE, even if what you actually * have is RLIMIT_OFILE */ #if defined(RLIMIT_OFILE) && !defined(RLIMIT_NOFILE) # define RLIMIT_NOFILE RLIMIT_OFILE #endif /* OPEN_MAX is the maximum number of files to open. * Unfortunately, some systems called this FOPEN_MAX... */ #if defined(FOPEN_MAX) && !defined(OPEN_MAX) # define OPEN_MAX FOPEN_MAX #endif /* some systems don't define SEEK_SET... sigh */ #ifndef SEEK_SET # define SEEK_SET 0 #endif /* SEEK_SET */ /* These may not be defined on some systems */ #ifndef MAX_IPv4_STR_LEN #define MAX_IPv4_STR_LEN (3*4+3) #endif #ifndef MAX_IPv6_STR_LEN #define MAX_IPv6_STR_LEN 256 #endif #ifndef HAVE_SOCKLEN_T typedef size_t socklen_t; #endif #ifndef IN6_IS_ADDR_V4MAPPED #define IN6_IS_ADDR_V4MAPPED(x) 0 #endif #ifndef IN6_IS_ADDR_V4COMPAT #define IN6_IS_ADDR_V4COMPAT(x) 0 #endif #undef s6_addr #define s6_addr __u6_addr.__u6_addr8 #undef s6_addr16 #define s6_addr16 __u6_addr.__u6_addr16 #undef s6_addr32 #define s6_addr32 __u6_addr.__u6_addr32 /**************************** Constants ***********************************/ #define DEFAULT_DEBUG_LEVEL 1 #define MAX_FD_GUESS 64 #define SNAPLEN 65536 /* largest possible MTU we'll see */ /* Reserve FDs for stdin, stdout, stderr, and the packet filter; one for breathing * room (we open new files before closing old ones), and one more to * be safe. */ #define NUM_RESERVED_FDS 6 /* number of FDs to set aside; allows files to be opened as necessary */ #include "be13_api/bulk_extractor_i.h" /***************************** Main Support *************************************/ /* tcpflow.cpp - CLI */ extern const char *progname; void terminate(int sig) __attribute__ ((__noreturn__)); #ifndef HAVE_INET_NTOP const char *inet_ntop(int af, const void *src,char *dst, socklen_t size); #endif #ifdef HAVE_PTHREAD #include extern sem_t *semlock; #endif #ifndef __MAIN_C__ extern int debug; #endif #define DEBUG(message_level) if (debug >= message_level) debug_real /************************* per-file globals ****************************/ /* datalink.cpp - callback for libpcap */ extern int32_t datalink_tdelta; // time delta to add to each packet pcap_handler find_handler(int datalink_type, const char *device); // callback for pcap typedef struct { pcap_handler handler; int type; } dlt_handler_t; void dl_ieee802_11_radio(u_char *user, const struct pcap_pkthdr *h, const u_char *p); void dl_prism(u_char *user, const struct pcap_pkthdr *h, const u_char *p); /** * shift the time value, in line with what the user requested... * previously this returned a structure on the stack, but that * created an optimization problem with gcc 4.7.2 */ inline const timeval &tvshift(struct timeval &tv,const struct timeval &tv_) { tv.tv_sec = tv_.tv_sec + datalink_tdelta; tv.tv_usec = tv_.tv_usec; return tv; } /* util.cpp - utility functions */ extern int debug; std::string ssprintf(const char *fmt,...); std::string comma_number_string(int64_t input); void mkdirs_for_path(std::string path); // creates any directories necessary for the path std::string macaddr(const uint8_t *addr); #define DEBUG_PEDANTIC 0x0001 // check values more rigorously void init_debug(const char *progname,int include_pid); void (*portable_signal(int signo, void (*func)(int)))(int); void debug_real(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); void die(const char *fmt, ...) __attribute__ ((__noreturn__)) __attribute__ ((format (printf, 1, 2))); /* scanners */ extern "C" scanner_t scan_md5; extern "C" scanner_t scan_http; extern "C" scanner_t scan_tcpdemux; extern "C" scanner_t scan_netviz; extern "C" scanner_t scan_wifiviz; #ifndef HAVE_TIMEVAL_OUT #define HAVE_TIMEVAL_OUT inline std::ostream& operator<<(std::ostream& os, const struct timeval *t) { return os << t->tv_sec << "." << std::setw(6) << std::setfill('0') << t->tv_usec; } #endif #endif /* __TCPFLOW_H__ */ tcpflow/src/tcpip.h0000644000175000017500000003120512263701151013220 0ustar dimadima#ifndef TCPIP_H #define TCPIP_H /** On windows, there is no in_addr_t; this is from * /usr/include/netinet/in.h */ #ifndef HAVE_NETINET_IN_H typedef uint32_t in_addr_t; #endif #ifndef HAVE_SA_FAMILY_T typedef unsigned short int sa_family_t; #endif /** * ipaddress class. * represents IPv4 and IPv6 addresses. * IPv4 addresses have address in bytes 0..3 and all NULL for bytes 4..11 */ class ipaddr { public:; ipaddr(){ memset(addr,0,sizeof(addr)); } ipaddr(const in_addr_t &a){ // copy operator addr[0] = ((uint8_t *)&a)[0]; // copy the bottom 4 octets and blank the top 12 addr[1] = ((uint8_t *)&a)[1]; addr[2] = ((uint8_t *)&a)[2]; addr[3] = ((uint8_t *)&a)[3]; memset(addr+4,0,12); } ipaddr(const uint8_t a[16]){ // begin wiped memcpy(addr,a,16); } uint8_t addr[16]; // holds v4 or v16 bool bit(int i) const { // get the ith bit; 0 is MSB return (addr[i / 8]) & (1<<(7-i%8)); } uint32_t quad(int i) const { // gets the ith quad as a 32-bit value return (addr[i*4+0]<<24) | (addr[i*4+2]<<16) | (addr[i*4+1]<<8) | (addr[i*4+3]<<0); } uint64_t dquad(int i) const { // gets the first 64-bit half or the second 64-bit half return (uint64_t)(quad(i*2+1))<<32 | (uint64_t)(quad(i*2)); } inline bool operator ==(const ipaddr &b) const { return memcmp(this->addr,b.addr,sizeof(addr))==0; }; inline bool operator <=(const ipaddr &b) const { return memcmp(this->addr,b.addr,sizeof(addr))<=0; }; inline bool operator > (const ipaddr &b) const { return memcmp(this->addr,b.addr,sizeof(addr))>0; }; inline bool operator >=(const ipaddr &b) const { return memcmp(this->addr,b.addr,sizeof(addr))>=0; }; inline bool operator < (const ipaddr &b) const { return memcmp(this->addr,b.addr,sizeof(this->addr))<0; } #pragma GCC diagnostic ignored "-Wcast-align" inline bool isv4() const { // is this an IPv6 address? uint32_t *i = (uint32_t *)((uint8_t *)&addr); return i[1]==0 && i[2]==0 && i[3]==0; } #pragma GCC diagnostic warning "-Wcast-align" }; inline std::ostream & operator <<(std::ostream &os,const ipaddr &b) { os << (int)b.addr[0] <<"."<<(int)b.addr[1] << "." << (int)b.addr[2] << "." << (int)b.addr[3]; return os; } inline bool operator ==(const struct timeval &a,const struct timeval &b) { return a.tv_sec==b.tv_sec && a.tv_usec==b.tv_usec; } inline bool operator <(const struct timeval &a,const struct timeval &b) { return (a.tv_secsrc==b.src && this->dst==b.dst && this->sport==b.sport && this->dport==b.dport && this->family==b.family; } inline bool operator <(const flow_addr &b) const { if (this->src < b.src) return true; if (this->src > b.src) return false; if (this->dst < b.dst) return true; if (this->dst > b.dst) return false; if (this->sport < b.sport) return true; if (this->sport > b.sport) return false; if (this->dport < b.dport) return true; if (this->dport > b.dport) return false; if (this->family < b.family) return true; if (this->family > b.family) return false; return false; /* they are equal! */ } std::string str() const { std::stringstream s; s << "flow[" << src << ":" << sport << "->" << dst << ":" << dport << "]"; return s.str(); } }; inline std::ostream & operator <<(std::ostream &os,const flow_addr &f) { os << f.str(); return os; } /* * A flow is a flow_addr that has additional information regarding when it was seen * and how many packets were seen. The address is used to locate the flow in the array. * Notice that it contains no pointers, so it can be copied with the default operator. */ class flow : public flow_addr { public:; static void usage(); // print information on flow notation static std::string filename_template; // static std::string outdir; // where the output gets written flow():id(),vlan(),mac_daddr(),mac_saddr(),tstart(),tlast(),packet_count(){}; flow(const flow_addr &flow_addr_,uint64_t id_,const be13::packet_info &pi): flow_addr(flow_addr_),id(id_),vlan(pi.vlan()), mac_daddr(), mac_saddr(), tstart(pi.ts),tlast(pi.ts), packet_count(0){ if(pi.pcap_hdr){ memcpy(mac_daddr,pi.get_ether_dhost(),sizeof(mac_daddr)); memcpy(mac_saddr,pi.get_ether_shost(),sizeof(mac_saddr)); } } virtual ~flow(){}; uint64_t id; // flow_counter when this flow was created int32_t vlan; // vlan interface we first observed; -1 means no vlan uint8_t mac_daddr[6]; // dst mac address of first packet uint8_t mac_saddr[6]; // source mac address of first packet struct timeval tstart; // when first seen struct timeval tlast; // when last seen uint64_t packet_count; // packet count // return a filename for a flow based on the template and the connection count std::string filename(uint32_t connection_count); // return a new filename for a flow based on the temlate, // optionally opening the file and returning a fd if &fd is provided std::string new_filename(int *fd,int flags,int mode); bool has_mac_daddr(){ return mac_daddr[0] || mac_daddr[1] || mac_daddr[2] || mac_daddr[3] || mac_daddr[4] || mac_daddr[5]; } bool has_mac_saddr(){ return mac_saddr[0] || mac_saddr[1] || mac_saddr[2] || mac_saddr[3] || mac_saddr[4] || mac_saddr[5]; } }; /* * Convenience class for working with TCP headers */ #define PORT_HTTP 80 #define PORT_HTTP_ALT_0 8080 #define PORT_HTTP_ALT_1 8000 #define PORT_HTTP_ALT_2 8888 #define PORT_HTTP_ALT_3 81 #define PORT_HTTP_ALT_4 82 #define PORT_HTTP_ALT_5 8090 #define PORT_HTTPS 443 #define PORT_SSH 22 #define PORT_FTP_DATA 20 #define PORT_FTP_CONTROL 21 class tcp_header_t { public: #pragma GCC diagnostic ignored "-Wcast-align" tcp_header_t(const u_char *data): tcp_header((struct be13::tcphdr *)data){}; #pragma GCC diagnostic warning "-Wcast-align" tcp_header_t(const tcp_header_t &b): tcp_header(b.tcp_header){} tcp_header_t &operator=(const tcp_header_t &that) { this->tcp_header = that.tcp_header; return *this; } virtual ~tcp_header_t(){} struct be13::tcphdr *tcp_header; size_t tcp_header_len(){ return tcp_header->th_off * 4; } uint16_t sport() {return ntohs(tcp_header->th_sport);} uint16_t dport() {return ntohs(tcp_header->th_dport);} be13::tcp_seq seq() {return ntohl(tcp_header->th_seq);} bool th_fin() {return tcp_header->th_flags & TH_FIN;} bool th_ack() {return tcp_header->th_flags & TH_ACK;} bool th_syn() {return tcp_header->th_flags & TH_SYN;} }; /* * The tcpip class is a passive tcp/ip implementation. * It can reconstruct flows! * * It includes: * - the flow (as an embedded object) * - Information about where the flow is written. * - Information about how much of the flow has been captured. * Currently flows only go in one direction and do not know about their sibling flow */ #pragma GCC diagnostic ignored "-Weffc++" #pragma GCC diagnostic ignored "-Wshadow" #pragma GCC diagnostic ignored "-Wall" #pragma GCC diagnostic ignored "-Wmissing-noreturn" #if defined(HAVE_BOOST_ICL_INTERVAL_HPP) && defined(HAVE_BOOST_ICL_INTERVAL_MAP_HPP) && defined(HAVE_BOOST_ICL_INTERVAL_SET_HPP) #include #include #include typedef boost::icl::interval_set recon_set; // Boost interval set of bytes that were reconstructed. #endif #pragma GCC diagnostic warning "-Weffc++" #pragma GCC diagnostic warning "-Wshadow" #pragma GCC diagnostic warning "-Wall" #pragma GCC diagnostic warning "-Wmissing-noreturn" class tcpip { public: /** track the direction of the flow; this is largely unused */ typedef enum { unknown=0, // unknown direction dir_sc, // server-to-client 1 dir_cs // client-to-server 2 } dir_t; private: /*** Begin Effective C++ error suppression *** *** This class does not implement assignment or copying. *** ***/ tcpip(const tcpip &t); tcpip &operator=(const tcpip &that); /*** End Effective C++ error suppression */ public:; tcpip(class tcpdemux &demux_,const flow &flow_,be13::tcp_seq isn_); /* constructor in tcpip.cpp */ virtual ~tcpip(); // destructor class tcpdemux &demux; // our demultiplexer /* State information for the flow being reconstructed */ flow myflow; /* Description of this flow */ dir_t dir; // direction of flow be13::tcp_seq isn; // Flow's initial sequence number be13::tcp_seq nsn; // fd - expected next sequence number uint32_t syn_count; // number of SYNs seen uint32_t fin_count; // number of FINs received uint32_t fin_size; // length of stream as determined when fin is sent uint64_t pos; // fd - current position+1 (next byte in stream to be written) /* Archiving information */ std::string flow_pathname; // path where flow is saved int fd; // file descriptor for file storing this flow's data bool file_created; // true if file was created /* Stats */ recon_set *seen; // what we've seen; it must be * due to boost lossage uint64_t last_byte; // last byte in flow processed uint64_t last_packet_number; // for finding most recent packet written uint64_t out_of_order_count; // all packets were contigious uint64_t violations; // protocol violation count /* Methods */ void close_file(); // close fd int open_file(); // opens save file; return -1 if failure, 0 if success void print_packet(const u_char *data, uint32_t length); void store_packet(const u_char *data, uint32_t length, int32_t delta); void process_packet(const struct timeval &ts,const int32_t delta,const u_char *data,const uint32_t length); uint32_t seen_bytes(); void dump_seen(); void dump_xml(class dfxml_writer *xmlreport,const std::string &xmladd); }; /* print a tcpip data structure. Largely for debugging */ inline std::ostream & operator <<(std::ostream &os,const tcpip &f) { os << "tcpip[" << f.myflow << " dir:" << int(f.dir) << " isn:" << f.isn << " nsn: " << f.nsn << " sc:" << f.syn_count << " fc:" << f.fin_count << " fs:" << f.fin_size << " pos:" << f.pos << " fd: " << f.fd << " cr:" << f.file_created << " lb:" << f.last_byte << " lpn:" << f.last_packet_number << " ooc:" << f.out_of_order_count << "]"; if(f.fd>0) os << " ftell(" << f.fd << ")=" << lseek(f.fd,0L,SEEK_CUR); return os; } /* * An saved_flow is a flow for which all of the packets have been received and tcpip state * has been discarded. The saved_flow allows matches against newly received packets * that are not SYN or ACK packets but have data. We can see if the data matches data that's * been written to disk. To do this we need ot know the filename and the ISN... */ class saved_flow { public: saved_flow(tcpip *tcp):addr(tcp->myflow), saved_filename(tcp->flow_pathname), isn(tcp->isn) {} flow_addr addr; // flow address std::string saved_filename; // where the flow was saved be13::tcp_seq isn; // the flow's ISN virtual ~saved_flow(){}; }; #endif tcpflow/src/tcpflow.cpp0000644000175000017500000006505012263701151014117 0ustar dimadima/* * This file is part of tcpflow by Simson Garfinkel . * Originally by Jeremy Elson . * * This source code is under the GNU Public License (GPL) version 3. * See COPYING for details. * */ #define __MAIN_C__ #include "config.h" #include "tcpflow.h" #include "tcpip.h" #include "tcpdemux.h" #include "bulk_extractor_i.h" #include "iptree.h" #include "be13_api/utils.h" #include #include #include #include /* bring in inet_ntop if it is not present */ #define ETH_ALEN 6 #ifndef HAVE_INET_NTOP #include "inet_ntop.c" #endif scanner_info::scanner_config be_config; // system configuration typedef struct { const char *name; const char *dvalue; const char *help; } default_t; default_t defaults[] = { {"tdelta","0","Time delta in seconds"}, {0,0,0} }; #ifdef HAVE_NETINET_IP_H #include #endif const char *progname = 0; // name of the program int debug = DEFAULT_DEBUG_LEVEL; // global variable, not clear why /* semaphore prevents multiple copies from outputing on top of each other */ #ifdef HAVE_PTHREAD #include sem_t *semlock = 0; #endif #define DEFAULT_REPORT_FILENAME "report.xml" /**************************************************************** *** SCANNER PLUG-IN SYSTEM ****************************************************************/ scanner_t *scanners_builtin[] = { scan_md5, scan_http, scan_netviz, scan_tcpdemux, scan_wifiviz, 0}; bool opt_no_promisc = false; // true if we should not use promiscious mode /**************************************************************** *** USAGE ****************************************************************/ static int usage_count = 0; static void usage() { switch(++usage_count){ case 1: std::cout << PACKAGE_NAME << " version " << PACKAGE_VERSION << "\n\n"; std::cout << "usage: " << progname << " [-aBcCDhJpsvVZ] [-b max_bytes] [-d debug_level] \n"; std::cout << " [-[eE] scanner] [-f max_fds] [-F[ctTXMkmg]] [-i iface] [-L semlock]\n"; std::cout << " [-m min_bytes] [-o outdir] [-r file] [-R file] \n"; std::cout << " [-S name=value] [-T template] [-w file] [-x scanner] [-X xmlfile]\n"; std::cout << " [expression]\n\n"; std::cout << " -a: do ALL post-processing.\n"; std::cout << " -b max_bytes: max number of bytes per flow to save\n"; std::cout << " -d debug_level: debug level; default is " << DEFAULT_DEBUG_LEVEL << "\n"; std::cout << " -f: maximum number of file descriptors to use\n"; std::cout << " -h: print this help message (-hh for more help)\n"; std::cout << " -H: print detailed information about each scanner\n"; std::cout << " -i: network interface on which to listen\n"; std::cout << " -J: output each flow in alternating colors (note change!)\n"; std::cout << " -l: treat non-flag arguments as input files rather than a pcap expression\n"; std::cout << " -L semlock - specifies that writes are locked using a named semaphore\n"; std::cout << " -p: don't use promiscuous mode\n"; std::cout << " -q: quiet mode - do not print warnings\n"; std::cout << " -r file: read packets from tcpdump pcap file (may be repeated)\n"; std::cout << " -R file: read packets from tcpdump pcap file TO FINISH CONNECTIONS\n"; std::cout << " -v: verbose operation equivalent to -d 10\n"; std::cout << " -V: print version number and exit\n"; std::cout << " -w file: write packets not processed to file\n"; std::cout << " -o outdir : specify output directory (default '.')\n"; std::cout << " -X filename : DFXML output to filename\n"; std::cout << " -m bytes : specifies skip that starts a new stream (default " << (unsigned)tcpdemux::options::MAX_SEEK << ").\n"; std::cout << " -F{p} : filename prefix/suffix (-hh for options)\n"; std::cout << " -T{t} : filename template (-hh for options; default " << flow::filename_template << ")\n"; std::cout << " -Z: do not decompress gzip-compressed HTTP transactions\n"; std::cout << "\nControl of Scanners:\n"; std::cout << " -E scanner - turn off all scanners except scanner\n"; std::cout << " -S name=value Set a configuration parameter (-hh for info)\n"; be13::plugin::info_scanners(false,true,scanners_builtin,'e','x'); std::cout << "Console output options:\n"; std::cout << " -B: binary output, even with -c or -C (normally -c or -C turn it off)\n"; std::cout << " -c: console print only (don't create files)\n"; std::cout << " -C: console print only, but without the display of source/dest header\n"; std::cout << " -s: strip non-printable characters (change to '.')\n"; std::cout << " -D: output in hex (useful to combine with -c or -C)\n"; std::cout << "\n"; #ifndef HAVE_LIBCAIRO std::cout << "Rendering not available because Cairo was not installed.\n\n"; #endif std::cout << "expression: tcpdump-like filtering expression\n"; std::cout << "\nSee the man page for additional information.\n\n"; break; case 2: std::cout << "Filename Prefixes:\n"; std::cout << " -Fc : append the connection counter to ALL filenames\n"; std::cout << " -Ft : prepend the time_t timestamp to ALL filenames\n"; std::cout << " -FT : prepend the ISO8601 timestamp to ALL filenames\n"; std::cout << " -FX : Do not output any files (other than report files)\n"; std::cout << " -FM : Calculate the MD5 for every flow (stores in DFXML)\n"; std::cout << " -Fk : Bin output in 1K directories\n"; std::cout << " -Fm : Bin output in 1M directories (2 levels)\n"; std::cout << " -Fg : Bin output in 1G directories (3 levels)\n"; flow::usage(); std::cout << "-S name=value options:\n"; for(int i=0;defaults[i].name;i++){ std::stringstream ss; ss << defaults[i].name << "=" << defaults[i].dvalue; printf(" %-20s %s\n",ss.str().c_str(),defaults[i].help); } std::cout << "\n"; std::cout << "DEBUG Levels (specify with -dNN):\n"; std::cout << "get_max_fds() = " << tcpdemux::getInstance()->get_max_fds() << "\n"; std::cout << "NUM_RESERVED_FDS = " << NUM_RESERVED_FDS << "\n"; break; } } /** * Create the dfxml output */ static void dfxml_create(class dfxml_writer &xreport,const std::string &command_line) { xreport.push("dfxml","xmloutputversion='1.0'"); xreport.push("metadata", "\n xmlns='http://afflib.org/tcpflow/' " "\n xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' " "\n xmlns:dc='http://purl.org/dc/elements/1.1/'" ); xreport.xmlout("dc:type","Feature Extraction","",false); xreport.pop(); xreport.add_DFXML_creator(PACKAGE_NAME,PACKAGE_VERSION,"",command_line); } /* String replace. Perhaps not the most efficient, but it works */ void replace(std::string &str,const std::string &from,const std::string &to) { if(from.size()==0) return; bool changed = false; std::stringstream ss; for(unsigned int i=0;i inflaters_t; static inflaters_t *build_inflaters() { inflaters_t *output = new inflaters_t(); // gzip output->push_back(new inflater(".gz", "gunzip -c '%s'")); // zip output->push_back(new inflater(".zip", "unzip -p '%s'")); // bz2 output->push_back(new inflater(".bz2", "bunzip2 -c '%s'")); // xz output->push_back(new inflater(".xz", "unxz -c '%s'")); // lzma output->push_back(new inflater(".lzma", "unlzma -c '%s'")); return output; } #define HAVE_INFLATER #endif /* * process an input file or device * May be repeated. * If start is false, do not initiate new connections */ static inflaters_t *inflaters = 0; static void process_infile(const std::string &expression,const char *device,const std::string &infile) { char error[PCAP_ERRBUF_SIZE]; pcap_t *pd=0; int dlt=0; pcap_handler handler; if(inflaters==0) inflaters = build_inflaters(); if (infile!=""){ std::string file_path = infile; // decompress input if necessary #ifdef HAVE_INFLATER for(inflaters_t::const_iterator it = inflaters->begin(); it != inflaters->end(); it++) { if((*it)->appropriate(infile)) { int fd = (*it)->invoke(infile); file_path = ssprintf("/dev/fd/%d", fd); if(fd < 0) { std::cerr << "decompression of '" << infile << "' failed" << std::endl; exit(1); } if(access(file_path.c_str(), R_OK)) { std::cerr << "decompression of '" << infile << "' is not available on this system" << std::endl; exit(1); } break; } } #endif if ((pd = pcap_open_offline(file_path.c_str(), error)) == NULL){ /* open the capture file */ die("%s", error); } dlt = pcap_datalink(pd); /* get the handler for this kind of packets */ handler = find_handler(dlt, infile.c_str()); } else { /* if the user didn't specify a device, try to find a reasonable one */ if (device == NULL){ if ((device = pcap_lookupdev(error)) == NULL){ die("%s", error); } } /* make sure we can open the device */ if ((pd = pcap_open_live(device, SNAPLEN, !opt_no_promisc, 1000, error)) == NULL){ die("%s", error); } #if defined(HAVE_SETUID) && defined(HAVE_GETUID) /* drop root privileges - we don't need them any more */ if(setuid(getuid())){ perror("setuid"); } #endif /* get the handler for this kind of packets */ dlt = pcap_datalink(pd); handler = find_handler(dlt, device); } DEBUG(20) ("filter expression: '%s'",expression.c_str()); /* install the filter expression in libpcap */ struct bpf_program fcode; if (pcap_compile(pd, &fcode, expression.c_str(), 1, 0) < 0){ die("%s", pcap_geterr(pd)); } if (pcap_setfilter(pd, &fcode) < 0){ die("%s", pcap_geterr(pd)); } /* initialize our flow state structures */ /* set up signal handlers for graceful exit (pcap uses onexit to put * interface back into non-promiscuous mode */ portable_signal(SIGTERM, terminate); portable_signal(SIGINT, terminate); #ifdef SIGHUP portable_signal(SIGHUP, terminate); #endif /* start listening or reading from the input file */ if (infile == "") DEBUG(1) ("listening on %s", device); if (pcap_loop(pd, -1, handler, (u_char *)tcpdemux::getInstance()) < 0){ die("%s: %s", infile.c_str(),pcap_geterr(pd)); } } int main(int argc, char *argv[]) { bool didhelp = false; feature_recorder::set_main_threadid(); sbuf_t::set_map_file_delimiter(""); // no delimiter on carving #ifdef BROKEN std::cerr << "WARNING: YOU ARE USING AN EXPERIMENTAL VERSION OF TCPFLOW \n"; std::cerr << "THAT DOES NOT WORK PROPERLY. PLEASE USE A RELEASE DOWNLOADED\n"; std::cerr << "FROM http://digitalcorpora.org/downloads/tcpflow\n"; std::cerr << "\n"; #endif bool force_binary_output = false; const char *device = ""; const char *lockname = 0; int need_usage = 0; std::string reportfilename; std::vector Rfiles; // files for finishing std::vector rfiles; // files to read tcpdemux &demux = *tcpdemux::getInstance(); // the demux object we will be using. std::string command_line = dfxml_writer::make_command_line(argc,argv); std::string opt_unk_packets; bool opt_quiet = false; /* Set up debug system */ progname = argv[0]; init_debug(progname,1); /* Make sure that the system was compiled properly */ if(sizeof(struct be13::ip4)!=20 || sizeof(struct be13::tcphdr)!=20){ fprintf(stderr,"COMPILE ERROR.\n"); fprintf(stderr," sizeof(struct ip)=%d; should be 20.\n", (int)sizeof(struct be13::ip4)); fprintf(stderr," sizeof(struct tcphdr)=%d; should be 20.\n", (int)sizeof(struct be13::tcphdr)); fprintf(stderr,"CANNOT CONTINUE\n"); exit(1); } bool trailing_input_list = false; int arg; while ((arg = getopt(argc, argv, "aA:Bb:cCd:DE:e:E:F:f:Hhi:JlL:m:o:pqR:r:S:sT:Vvw:x:X:Z")) != EOF) { switch (arg) { case 'a': demux.opt.post_processing = true; demux.opt.opt_md5 = true; be13::plugin::scanners_enable_all(); break; case 'A': fprintf(stderr,"-AH has been deprecated. Just use -a\n"); need_usage=true; break; case 'b': demux.opt.max_bytes_per_flow = atoi(optarg); if(debug > 1) { std::cout << "capturing max of " << demux.opt.max_bytes_per_flow << " bytes per flow." << std::endl; } break; case 'B': force_binary_output = true; demux.opt.output_strip_nonprint = false; DEBUG(10) ("converting non-printable characters to '.'"); break; case 'C': demux.opt.console_output = true; DEBUG(10) ("printing packets to console only"); demux.opt.suppress_header = 1; DEBUG(10) ("packet header dump suppressed"); break; case 'c': demux.opt.console_output = true; DEBUG(10) ("printing packets to console only"); break; case 'd': if ((debug = atoi(optarg)) < 0) { debug = DEFAULT_DEBUG_LEVEL; DEBUG(1) ("warning: -d flag with 0 debug level '%s'", optarg); } break; case 'D': demux.opt.output_hex = true;DEBUG(10) ("Console output in hex"); demux.opt.output_strip_nonprint = false; DEBUG(10) ("Will not convert non-printablesto '.'"); break; case 'E': be13::plugin::scanners_disable_all(); be13::plugin::scanners_enable(optarg); break; case 'e': be13::plugin::scanners_enable(optarg); demux.opt.post_processing = true; // enable post processing if anything is turned on break; case 'F': for(const char *cc=optarg;*cc;cc++){ switch(*cc){ case 'c': replace(flow::filename_template,"%c","%C"); break; case 'k': flow::filename_template = "%K/" + flow::filename_template; break; case 'm': flow::filename_template = "%M000-%M999/%M%K/" + flow::filename_template; break; case 'g': flow::filename_template = "%G000000-%G999999/%G%M000-%G%M999/%G%M%K/" + flow::filename_template; break; case 't': flow::filename_template = "%tT" + flow::filename_template; break; case 'T': flow::filename_template = "%T" + flow::filename_template; break; case 'X': demux.opt.store_output = false;break; case 'M': demux.opt.opt_md5 = true;break; default: fprintf(stderr,"-F invalid format specification '%c'\n",*cc); need_usage = true; } } break; case 'f': { int mnew = atoi(optarg); DEBUG(1)("changing max_fds from %d to %d",demux.max_fds,mnew); demux.max_fds = mnew; break; } case 'i': device = optarg; break; case 'J': demux.opt.use_color = 1; DEBUG(10) ("using colors"); break; case 'l': trailing_input_list = true; break; case 'L': lockname = optarg; break; case 'm': demux.opt.max_seek = atoi(optarg); DEBUG(10) ("max_seek set to %d",demux.opt.max_seek); break; case 'o': demux.outdir = optarg; flow::outdir = optarg; break; case 'p': opt_no_promisc = true; DEBUG(10) ("NOT turning on promiscuous mode"); break; case 'q': opt_quiet = true; break; case 'R': Rfiles.push_back(optarg); break; case 'r': rfiles.push_back(optarg); break; case 'S': { std::vector params = split(optarg,'='); if(params.size()!=2){ std::cerr << "Invalid paramter: " << optarg << "\n"; exit(1); } be_config.namevals[params[0]] = params[1]; continue; } case 's': demux.opt.output_strip_nonprint = 1; DEBUG(10) ("converting non-printable characters to '.'"); break; case 'T': flow::filename_template = optarg; if(flow::filename_template.find("%c")==std::string::npos){ flow::filename_template += std::string("%C%c"); // append %C%c if not present } break; case 'V': std::cout << PACKAGE_NAME << " " << PACKAGE_VERSION << "\n"; exit (1); case 'v': debug = 10; break; case 'w': opt_unk_packets = optarg;break; case 'x': be13::plugin::scanners_disable(optarg);break; case 'X': reportfilename = optarg;break; case 'Z': demux.opt.gzip_decompress = 0; break; case 'H': be13::plugin::info_scanners(true,true,scanners_builtin,'e','x'); didhelp = true; break; case 'h': case '?': usage(); didhelp = true; break; default: DEBUG(1) ("error: unrecognized switch '%c'", arg); need_usage = 1; break; } } if(didhelp) exit(0); if(demux.opt.post_processing && !demux.opt.store_output){ std::cerr << "ERROR: post_processing currently requires storing output.\n"; exit(1); } argc -= optind; argv += optind; /* Load all the scanners and enable the ones we care about */ if(demux.opt.opt_md5) be13::plugin::scanners_enable("md5"); be13::plugin::load_scanners(scanners_builtin,be_config); be13::plugin::scanners_process_enable_disable_commands(); /* If there is no report filename, call it report.xml in the output directory */ if( reportfilename.size()==0 ){ reportfilename = demux.outdir + "/" + DEFAULT_REPORT_FILENAME; } /* print help and exit if there was an error in the arguments */ if (need_usage) { usage(); exit(1); } /* remaining arguments are either an input list (-l flag) or a pcap expression (default) */ std::string expression = ""; if(trailing_input_list) { for(int ii = 0; ii < argc; ii++) { rfiles.push_back(argv[ii]); } } else { /* get the user's expression out of remainder of the arg... */ for(int i=0;i0) expression+=" "; expression += argv[i]; } } /* More option processing */ /* was a semaphore provided for the lock? */ if(lockname){ #if defined(HAVE_SEMAPHORE_H) && defined(HAVE_PTHREAD) semlock = sem_open(lockname,O_CREAT,0777,1); // get the semaphore #else fprintf(stderr,"%s: attempt to create lock pthreads not present\n",argv[0]); exit(1); #endif } if(force_binary_output) demux.opt.output_strip_nonprint = false; /* make sure outdir is a directory. If it isn't, try to make it.*/ struct stat stbuf; if(stat(demux.outdir.c_str(),&stbuf)==0){ if(!S_ISDIR(stbuf.st_mode)){ std::cerr << "outdir is not a directory: " << demux.outdir << "\n"; exit(1); } } else { if(MKDIR(demux.outdir.c_str(),0777)){ std::cerr << "cannot create " << demux.outdir << ": " << strerror(errno) << "\n"; exit(1); } } std::string input_fname; if(rfiles.size() > 0) { input_fname = rfiles.at(0); if(rfiles.size() > 1) { input_fname += ssprintf(" + %d more", rfiles.size() - 1); } } /* report file specified? */ if(reportfilename.size()>0){ xreport = new dfxml_writer(reportfilename,false); dfxml_create(*xreport,command_line); demux.xreport = xreport; } if(opt_unk_packets.size()>0){ if(input_fname.size()==0){ std::cerr << "currently the -w option requires the -r option\n"; exit(1); } if(access(input_fname.c_str(),R_OK)) die("cannot read: %s: %s",input_fname.c_str(),strerror(errno)); demux.save_unk_packets(opt_unk_packets,input_fname); } scanner_info si; si.config = &be_config; /* Debug prefix set? */ std::string debug_prefix=progname; si.get_config("debug-prefix",&debug_prefix,"Prefix for debug output"); init_debug(debug_prefix.c_str(),0); DEBUG(10) ("%s version %s ", PACKAGE_NAME, PACKAGE_VERSION); feature_file_names_t feature_file_names; be13::plugin::get_scanner_feature_file_names(feature_file_names); feature_recorder_set fs(0); fs.init(feature_file_names,input_fname.size()>0 ? input_fname : device,demux.outdir); the_fs = &fs; demux.fs = &fs; si.get_config("tdelta",&datalink_tdelta,"Time offset for packets"); /* Record the configuration */ if(xreport){ xreport->push("configuration"); xreport->pop(); // configuration xreport->xmlout("tdelta",datalink_tdelta); } /* Process r files and R files */ if(xreport){ xreport->push("configuration"); } if(rfiles.size()==0 && Rfiles.size()==0){ /* live capture */ #if defined(HAVE_SETUID) && defined(HAVE_GETUID) /* Since we don't need network access, drop root privileges */ if(setuid(getuid())){ perror("setuid"); } #endif demux.start_new_connections = true; process_infile(expression,device,""); input_fname = device; } else { /* first pick up the new connections with -r */ demux.start_new_connections = true; for(std::vector::const_iterator it=rfiles.begin();it!=rfiles.end();it++){ process_infile(expression,device,*it); } /* now pick up the outstanding connection with -R, but don't start new connections */ demux.start_new_connections = false; for(std::vector::const_iterator it=Rfiles.begin();it!=Rfiles.end();it++){ process_infile(expression,device,*it); } } /* -1 causes pcap_loop to loop forever, but it finished when the input file is exhausted. */ DEBUG(2)("Open FDs at end of processing: %d",(int)demux.open_flows.size()); DEBUG(2)("demux.max_open_flows: %d",(int)demux.max_open_flows); DEBUG(2)("Flow map size at end of processing: %d",(int)demux.flow_map.size()); DEBUG(2)("Flows seen: %d",(int)demux.flow_counter); int open_fds = (int)demux.open_flows.size(); int flow_map_size = (int)demux.flow_map.size(); demux.close_all_fd(); std::stringstream ss; be13::plugin::phase_shutdown(fs,xreport ? &ss : 0); /* * Note: funny formats below are a result of mingw problems with PRId64. */ const std::string total_flow_processed("Total flows processed: %" PRId64); const std::string total_packets_processed("Total packets processed: %" PRId64); DEBUG(2)(total_flow_processed.c_str(),demux.flow_counter); DEBUG(2)(total_packets_processed.c_str(),demux.packet_counter); if(xreport){ demux.remove_all_flows(); // empty the map to capture the state xreport->pop(); // fileobjects xreport->xmlout("summary",ss.str(),"",false); xreport->xmlout("open_fds_at_end",open_fds); xreport->xmlout("max_open_flows",demux.max_open_flows); xreport->xmlout("total_flows",demux.flow_counter); xreport->xmlout("flow_map_size",flow_map_size); xreport->xmlout("total_packets",demux.packet_counter); xreport->add_rusage(); xreport->pop(); // bulk_extractor xreport->close(); delete xreport; } if(demux.flow_counter > tcpdemux::WARN_TOO_MANY_FILES){ if(!opt_quiet){ /* Start counting how many files we have in the output directory. * If we find more than 10,000, print the warning, and keep counting... */ uint64_t filecount=0; DIR *dirp = opendir(demux.outdir.c_str()); if(dirp){ struct dirent *dp=0; while((dp=readdir(dirp))!=NULL){ filecount++; if(filecount==10000){ std::cerr << "*** tcpflow WARNING:\n"; std::cerr << "*** Modern operating systems do not perform well \n"; std::cerr << "*** with more than 10,000 entries in a directory.\n"; std::cerr << "***\n"; } } closedir(dirp); } if(filecount>=10000){ std::cerr << "*** tcpflow created " << filecount << " files in output directory " << demux.outdir << "\n"; std::cerr << "***\n"; std::cerr << "*** Next time, specify command-line options: -Fk , -Fm , or -Fg \n"; std::cerr << "*** This will automatically bin output into subdirectories.\n"; std::cerr << "*** type 'tcpflow -hhh' for more information.\n"; } } } exit(0); // return(0) causes crash on Windows } tcpflow/src/flow.cpp0000644000175000017500000001271012263701151013403 0ustar dimadima/** * * flow.cpp: * * The flow class is used to track individual TCP/IP flows (2 per connection). * The class implements the methods that turn a flow into a filename. * * This file is part of tcpflow by Jeremy Elson * Initial Release: 7 April 1999. * * This source code is under the GNU Public License (GPL). See * LICENSE for details. * */ #include "tcpflow.h" #include "tcpip.h" #include "tcpdemux.h" #ifdef HAVE_ARPA_INET_H #include // inet_ntop #endif #include #include #include std::string flow::filename_template("%A.%a-%B.%b%V%v%C%c"); std::string flow::outdir("."); void flow::usage() { std::cout << "Filename template format:\n"; std::cout << " %A/%a - source IP address/port; %B/%b - dest IP address/port\n"; std::cout << " %E/%e - source/dest Ethernet Mac address\n"; std::cout << " %V/%v - VLAN number, '--' if no vlan/'' if no vlan\n"; std::cout << " %T/%t - Timestamp in ISO8601 format/unix time_t\n"; std::cout << " %c - connection_count for connections>0 / %# for all connections;"; std::cout << " %C - 'c' if connection_count >0\n"; std::cout << " %N - (connection_number ) % 1000\n"; std::cout << " %K - (connection_number / 1000) % 1000\n"; std::cout << " %M - (connection_number / 1000000) % 1000\n"; std::cout << " %G - (connection_number / 1000000000) % 1000\n"; std::cout << " %% - Output a '%'\n"; std::cout << "\n"; } std::string flow::filename(uint32_t connection_count) { std::stringstream ss; /* Add the outdir */ if(flow::outdir!="." && flow::outdir!=""){ ss << flow::outdir; ss << '/'; } for(unsigned int i=0;i0 if(connection_count>0) ss << "c"; break; case 'c': // connection_count if connection_count >0 if(connection_count>0) ss << connection_count; break; case '#': // always output connection count ss << connection_count; break; case '%': // Output a '%' ss << "%"; break; default: std::cerr << "Invalid filename_template: " << filename_template << "\n"; std::cerr << "unknown character: " << filename_template.at(i+1) << "\n"; exit(1); } if(buf[0]) ss << buf; } } return ss.str(); } /** * Find an unused filename for the flow and optionally open it. * This is called from tcpip::open_file(). */ std::string flow::new_filename(int *fd,int flags,int mode) { /* Loop connection count until we find a file that doesn't exist */ for(uint32_t connection_count=0;;connection_count++){ std::string nfn = filename(connection_count); if(nfn.find('/')!=std::string::npos) mkdirs_for_path(nfn.c_str()); int nfd = tcpdemux::getInstance()->retrying_open(nfn,flags,mode); if(nfd>=0){ *fd = nfd; return nfn; } if(errno!=EEXIST) die("Cannot open: %s",nfn.c_str()); } return std::string("<>"); // error; no file } tcpflow/src/Makefile.am0000644000175000017500000000571312263701323013772 0ustar dimadima# Programs that we compile: bin_PROGRAMS = tcpflow AM_CPPFLAGS = -I${top_srcdir}/src/be13_api -I${top_srcdir}/src/wifipcap include dfxml/src/Makefile.defs include be13_api/Makefile.defs # http://stackoverflow.com/questions/11438613/conditional-subdir-objects AUTOMAKE_OPTIONS = subdir-objects NETVIZ = \ netviz/plot_view.cpp \ netviz/plot_view.h \ netviz/time_histogram_view.cpp \ netviz/time_histogram_view.h \ netviz/time_histogram.cpp \ netviz/time_histogram.h \ netviz/address_histogram_view.cpp \ netviz/address_histogram_view.h \ netviz/address_histogram.cpp \ netviz/address_histogram.h \ netviz/port_histogram_view.cpp \ netviz/port_histogram_view.h \ netviz/port_histogram.cpp \ netviz/port_histogram.h \ netviz/packetfall.cpp \ netviz/packetfall.h \ netviz/net_map.cpp \ netviz/net_map.h \ netviz/legend_view.cpp \ netviz/legend_view.h \ netviz/one_page_report.cpp \ netviz/one_page_report.h WIFI = datalink_wifi.cpp \ datalink_wifi.h \ wifipcap/TimeVal.cpp \ wifipcap/TimeVal.h \ wifipcap/arp.h \ wifipcap/cpack.cpp \ wifipcap/cpack.h \ wifipcap/ether.h \ wifipcap/ethertype.h \ wifipcap/extract.h \ wifipcap/icmp.h \ wifipcap/ieee802_11_radio.h \ wifipcap/ip.h \ wifipcap/ip6.h \ wifipcap/ipproto.h \ wifipcap/llc.h \ wifipcap/os.h \ wifipcap/oui.h \ wifipcap/prism.h \ wifipcap/radiotap.h \ wifipcap/tcp.h \ wifipcap/types.h \ wifipcap/udp.h \ wifipcap/util.h \ wifipcap/wifipcap.cpp \ wifipcap/wifipcap.h tcpflow_SOURCES = \ $(DFXML_WRITER) $(NETVIZ) $(BE13_API) $(WIFI) \ datalink.cpp flow.cpp \ tcpflow.cpp \ tcpip.h tcpip.cpp \ tcpdemux.h tcpdemux.cpp \ tcpflow.h util.cpp \ scan_md5.cpp \ scan_http.cpp \ scan_tcpdemux.cpp \ scan_netviz.cpp \ scan_wifiviz.cpp \ pcap_writer.h \ iptree.h \ http-parser/http_parser.c \ http-parser/http_parser.h \ mime_map.cpp \ mime_map.h EXTRA_DIST =\ http-parser/AUTHORS \ http-parser/CONTRIBUTIONS \ http-parser/LICENSE-MIT \ http-parser/Makefile \ http-parser/README.md \ http-parser/http_parser.gyp \ wifipcap/README.txt \ wifipcap/TimeVal.cpp \ wifipcap/TimeVal.h \ wifipcap/arp.h \ wifipcap/ether.h \ wifipcap/ethertype.h \ wifipcap/extract.h \ wifipcap/icmp.h \ wifipcap/ieee802_11_radio.h \ wifipcap/ip.h \ wifipcap/ip6.h \ wifipcap/ipproto.h \ wifipcap/os.h \ wifipcap/oui.h \ wifipcap/prism.h \ wifipcap/radiotap.h \ wifipcap/sample.cpp \ wifipcap/tcp.h \ wifipcap/types.h \ wifipcap/udp.h \ wifipcap/util.cpp \ wifipcap/util.h \ wifipcap/wifipcap.cpp \ wifipcap/wifipcap.h testiph: tcpflow echo Testing the IP histogram for i in 100 1000 10000 ; \ do /bin/rm -f test-$$i out-$$i.txt ; \ ./tcpflow -o test1 -S iphtest=1 -S iphtrim=$$i -r /corp/nps/packets/2008-nitroba/nitroba.pcap > iphtest-nitroba-$$i.txt ; \ done diff ../tests/iphtest-nitroba-100.txt iphtest-nitroba-100.txt diff ../tests/iphtest-nitroba-1000.txt iphtest-nitroba-1000.txt diff ../tests/iphtest-nitroba-10000.txt iphtest-nitroba-10000.txt echo iptree appears okay. tcpflow/src/pcap_writer.h0000644000175000017500000000626512263701151014430 0ustar dimadima/* * pcap_writer.h: * * A class for writing pcap files */ #ifndef HAVE_PCAP_WRITER_H #define HAVE_PCAP_WRITER_H class pcap_writer { /* These are not implemented */ pcap_writer &operator=(const pcap_writer &that); pcap_writer(const pcap_writer &t); class write_error: public std::exception { virtual const char *what() const throw() { return "write error in pcap_write"; } }; enum {PCAP_RECORD_HEADER_SIZE = 16, PCAP_MAX_PKT_LEN = 65535, // wire shark may reject larger PCAP_HEADER_SIZE = 4+2+2+4+4+4+4, }; FILE *fcap; // where file is written void write_bytes(const uint8_t * const val, size_t num_bytes) { size_t count = fwrite(val,1,num_bytes,fcap); if (count != num_bytes) throw new write_error(); } void write2(const uint16_t val) { size_t count = fwrite(&val,1,2,fcap); if (count != 2) throw new write_error(); } void write4(const uint32_t val) { size_t count = fwrite(&val,1,4,fcap); if (count != 4) throw new write_error(); } void open(const std::string &fname) { fcap = fopen(fname.c_str(),"wb"); // write the output if(fcap==0) throw new write_error(); } void write_header(){ write4(0xa1b2c3d4); write2(2); // major version number write2(4); // minor version number write4(0); // time zone offset; always 0 write4(0); // accuracy of time stamps in the file; always 0 write4(PCAP_MAX_PKT_LEN); // snapshot length write4(DLT_EN10MB); // link layer encapsulation } void copy_header(const std::string &ifname){ /* assert byte order is correct */ FILE *f2 = fopen(ifname.c_str(),"rb"); if(f2==0) throw new write_error(); u_char buf[PCAP_HEADER_SIZE]; if(fread(buf,1,sizeof(buf),f2)!=sizeof(buf)) throw new write_error(); if((buf[0]!=0xd4) || (buf[1]!=0xc3) || (buf[2]!=0xb2) || (buf[3]!=0xa1)){ std::cout << "pcap file " << ifname << " is in wrong byte order. Cannot continue.\n"; throw new write_error(); } if(fwrite(buf,1,sizeof(buf),fcap)!=sizeof(buf)) throw new write_error(); if(fclose(f2)!=0) throw new write_error(); } pcap_writer():fcap(0){} public: static pcap_writer *open_new(const std::string &ofname){ pcap_writer *pcw = new pcap_writer(); pcw->open(ofname); pcw->write_header(); return pcw; } static pcap_writer *open_copy(const std::string &ofname,const std::string &ifname){ pcap_writer *pcw = new pcap_writer(); pcw->open(ofname); pcw->copy_header(ifname); return pcw; } virtual ~pcap_writer(){ if(fcap) fclose(fcap); } void writepkt(const struct pcap_pkthdr *h,const u_char *p) { /* Write a packet */ write4(h->ts.tv_sec); // time stamp, seconds avalue write4(h->ts.tv_usec); // time stamp, microseconds write4(h->caplen); write4(h->len); size_t count = fwrite(p,1,h->caplen,fcap); // the packet if(count!=h->caplen) throw new write_error(); } }; #endif tcpflow/src/datalink_wifi.cpp0000644000175000017500000000270312263701151015242 0ustar dimadima/** * wifi datalink function and callbacks to handle 802.11 * In addition to calling process_packet_info() for the packets, * it maintains some 802.11 specific databases. */ #include "tcpflow.h" #include "datalink_wifi.h" /** * TFCB --- TCPFLOW callbacks for wifippcap */ void TFCB::Handle80211(const WifiPacket &p, u_int16_t fc, const MAC& sa, const MAC& da, const MAC& ra, const MAC& ta, const u_char *ptr, size_t len) { } void TFCB::HandleLLC(const WifiPacket &p, const struct llc_hdr_t *hdr, const u_char *rest, size_t len) { sbuf_t sb(pos0_t(),rest,len,len,0); struct timeval tv; be13::packet_info pi(p.header_type,p.header,p.packet,tvshift(tv,p.header->ts),rest,len); be13::plugin::process_packet(pi); } void TFCB::Handle80211MgmtBeacon(const WifiPacket &p, const mgmt_header_t *hdr, const mgmt_body_t *body) { #ifdef DEBUG_WIFI std::cerr << " " << "802.11 mgmt: " << hdr->sa << " beacon " << body->ssid.ssid << "\""; #endif mac_ssid bcn(hdr->sa,std::string(body->ssid.ssid)); mac_to_ssid[bcn] += 1; } /* Entrance point */ TFCB TFCB::theTFCB; // singleton static Wifipcap theWcap; void dl_ieee802_11_radio(u_char *user, const struct pcap_pkthdr *h, const u_char *p) { theWcap.handle_packet(&TFCB::theTFCB,DLT_IEEE802_11_RADIO,h,p); } void dl_prism(u_char *user, const struct pcap_pkthdr *h, const u_char *p) { theWcap.handle_packet(&TFCB::theTFCB,DLT_PRISM_HEADER,h,p); } tcpflow/src/tcpdemux.h0000644000175000017500000001620012263701151013730 0ustar dimadima#ifndef TCPDEMUX_H #define TCPDEMUX_H /** * tcpdemux.h * * a tcpip demultiplier. * * Defines the basic classes used by the tcpflow program. This includes: * - IP, TCP and UDP structures * - class ipaddr - IP address (IPv4 and IPv6) * - class flow_addr - The flow address (source addr & port; dest addr & port; family) * - class flow - All of the information for a flow that's being tracked * - class tcp_header_t - convenience class for working with TCP headers * - class tcpip - A one-sided TCP implementation * - class tcpdemux - Processes individual packets, identifies flows, * and creates tcpip objects as required */ #include "pcap_writer.h" #include "dfxml/src/dfxml_writer.h" #include "dfxml/src/hash_t.h" #if defined(HAVE_SQLITE3_H) #include #endif #if defined(HAVE_UNORDERED_MAP) # include # include # undef HAVE_TR1_UNORDERED_MAP // be sure we don't use it #else # if defined(HAVE_TR1_UNORDERED_MAP) # include # include # else # error Requires or # endif #endif #include /** * the tcp demultiplixer * This is a singleton class; we only need a single demultiplexer. */ class tcpdemux { /* These are not implemented */ tcpdemux(const tcpdemux &t); tcpdemux &operator=(const tcpdemux &that); /* see http://mikecvet.wordpress.com/tag/hashing/ */ typedef struct { long operator() (const flow_addr &k) const {return k.hash(); } } flow_addr_hash; typedef struct { bool operator() (const flow_addr &x, const flow_addr &y) const { return x==y;} } flow_addr_key_eq; #ifdef HAVE_TR1_UNORDERED_MAP typedef std::tr1::unordered_set tcpset; typedef std::tr1::unordered_map flow_map_t; // active flows typedef std::tr1::unordered_map saved_flow_map_t; // flows that have been saved #else typedef std::unordered_set tcpset; typedef std::unordered_map flow_map_t; // active flows typedef std::unordered_map saved_flow_map_t; // flows that have been saved #endif typedef std::vector saved_flows_t; // needs to be ordered tcpdemux(); #ifdef HAVE_SQLITE3 sqlite3 *db; sqlite3_stmt *insert_flow; #endif public: static uint32_t tcp_timeout; static unsigned int get_max_fds(void); // returns the max virtual ~tcpdemux(){ if(xreport) delete xreport; if(pwriter) delete pwriter; } /* The pure options class means we can add new options without having to modify the tcpdemux constructor. */ class options { public:; enum { MAX_SEEK=1024*1024*16 }; options():console_output(false),store_output(true),opt_md5(false), post_processing(false),gzip_decompress(true), max_bytes_per_flow(), max_flows(0),suppress_header(0), output_strip_nonprint(true),output_hex(false),use_color(0),max_seek(MAX_SEEK){ } bool console_output; bool store_output; // do we output? bool opt_md5; // do we calculate MD5 on DFXML output? bool post_processing; // decode headers after tcp connection closes bool gzip_decompress; uint64_t max_bytes_per_flow; uint32_t max_flows; bool suppress_header; bool output_strip_nonprint; bool output_hex; bool use_color; int32_t max_seek; // signed becuase we compare with abs() }; enum { WARN_TOO_MANY_FILES=10000}; // warn if more than this number of files in a directory std::string outdir; /* output directory */ uint64_t flow_counter; // how many flows have we seen? uint64_t packet_counter; // monotomically increasing dfxml_writer *xreport; // DFXML output file pcap_writer *pwriter; // where we should write packets unsigned int max_open_flows; // how large did it ever get? unsigned int max_fds; // maximum number of file descriptors for this tcpdemux flow_map_t flow_map; // db of open tcpip objects, indexed by flow tcpset open_flows; // the tcpip flows with open files saved_flow_map_t saved_flow_map; // db of saved flows, indexed by flow saved_flows_t saved_flows; // the flows that were saved bool start_new_connections; // true if we should start new connections options opt; class feature_recorder_set *fs; // where features extracted from each flow should be stored static uint32_t max_saved_flows; // how many saved flows are kept in the saved_flow_map static tcpdemux *getInstance(); /* Databse */ void openDB(); // open the database file if we are using it in outdir directory. void write_flow_record(const std::string &starttime,const std::string &endtime, const std::string &src_ipn,const std::string &dst_ipn, const std::string &mac_daddr,const std::string &mac_saddr, uint64_t packets,uint16_t srcport,uint16_t dstport, const std::string &hashdigest_md5); void save_unk_packets(const std::string &wfname,const std::string &ifname); // save unknown packets at this location void post_process(tcpip *tcp); // just before closing; writes XML and closes fd /* management of open fds and in-process tcpip flows*/ void close_all_fd(); void close_tcpip_fd(tcpip *); void close_oldest_fd(); void remove_flow(const flow_addr &flow); // remove a flow from the database, closing open files if necessary void remove_all_flows(); // stop processing all tcpip connections /* open a new file, closing an fd in the openflow database if necessary */ int retrying_open(const std::string &filename,int oflag,int mask); /* the flow database holds in-process tcpip connections */ tcpip *create_tcpip(const flow_addr &flow, be13::tcp_seq isn, const be13::packet_info &pi); tcpip *find_tcpip(const flow_addr &flow); /* saved flows are completed flows that we remember in case straggling packets * show up. Remembering the flows lets us resolve the packets rather than creating * new flows. */ void save_flow(tcpip *); /** packet processing. * Each returns 0 if processed, 1 if not processed, -1 if error. */ int process_tcp(const ipaddr &src, const ipaddr &dst,sa_family_t family, const u_char *tcp_data, uint32_t tcp_length, const be13::packet_info &pi); int process_ip4(const be13::packet_info &pi); int process_ip6(const be13::packet_info &pi); int process_pkt(const be13::packet_info &pi); }; #endif tcpflow/src/mime_map.h0000644000175000017500000000060112263701151013661 0ustar dimadima/* * This file is part of tcpflow by Simson Garfinkel . * Originally by Will Glynn . * * This source code is under the GNU Public License (GPL) version 3. * See COPYING for details. * */ #ifndef MIME_MAP_H #define MIME_MAP_H #include std::string get_extension_for_mime_type(const std::string& mime_type); #endif /* MIME_MAP_H */tcpflow/src/scan_tcpdemux.cpp0000644000175000017500000000337112263701151015274 0ustar dimadima/** * tcp demultiplixier scanner. * * We have a single global tcpdemultiplixer because it needs to manage * a global resource --- the maximum number of open files. We get the * singleton instance and put it in the user argument of the global * callback array. We could have designed the callback system to take * an instance which is subclassed from an abstract superclass, but * that would require a virtual function resolution on every function * call, whereas here we simply have a function call with two * arguments (which is faster, but less safe.) */ #include "config.h" #include "tcpflow.h" #include "tcpip.h" #include "tcpdemux.h" #include #include #include "bulk_extractor_i.h" /** callback called by process_packet() */ static void packet_handler(void *user,const be13::packet_info &pi) { reinterpret_cast(user)->process_pkt(pi); } extern "C" void scan_tcpdemux(const class scanner_params &sp,const recursion_control_block &rcb) { if(sp.sp_version!=scanner_params::CURRENT_SP_VERSION){ std::cerr << "scan_tcpdemux requires sp version " << scanner_params::CURRENT_SP_VERSION << "; " << "got version " << sp.sp_version << "\n"; exit(1); } if(sp.phase==scanner_params::PHASE_STARTUP){ sp.info->name = "tcpdemux"; sp.info->author= "Simson Garfinkel"; sp.info->packet_user = tcpdemux::getInstance(); sp.info->packet_cb = packet_handler; sp.info->get_config("tcp_timeout",&tcpdemux::getInstance()->tcp_timeout,"Timeout for TCP connections"); return; /* No feature files created */ } if(sp.phase==scanner_params::PHASE_SCAN){ static const std::string hash0(""); static const std::string hash1(""); return; } } tcpflow/src/scan_http.cpp0000644000175000017500000004460512263701151014427 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /** * * scan_http: * Decodes HTTP responses */ #include "config.h" #include "tcpflow.h" #include "tcpip.h" #include "tcpdemux.h" #include "http-parser/http_parser.h" #include "mime_map.h" #ifdef HAVE_SYS_WAIT_H #include #endif #ifdef HAVE_LIBZ # define ZLIB_CONST # ifdef GNUC_HAS_DIAGNOSTIC_PRAGMA # pragma GCC diagnostic ignored "-Wundef" # pragma GCC diagnostic ignored "-Wcast-qual" # endif # ifdef HAVE_ZLIB_H # include # endif #else # define z_stream void * // prevents z_stream from generating an error #endif #define MIN_HTTP_BUFSIZE 80 // don't bother parsing smaller than this #include #include #include #include #include #define HTTP_CMD "http_cmd" #define HTTP_ALERT_FD "http_alert_fd" /* options */ std::string http_cmd; // command to run on each http object int http_subproc_max = 10; // how many subprocesses are we allowed? int http_subproc = 0; // how many do we currently have? int http_alert_fd = -1; // where should we send alerts? /* define a callback object for sharing state between scan_http() and its callbacks */ class scan_http_cbo { private: typedef enum {NOTHING,FIELD,VALUE} last_on_header_t; scan_http_cbo(const scan_http_cbo& c); // not implemented scan_http_cbo &operator=(const scan_http_cbo &c); // not implemented public: virtual ~scan_http_cbo(){ on_message_complete(); // make sure message was ended } scan_http_cbo(const std::string& path_,const char *base_,std::stringstream *xmlstream_) : path(path_), base(base_),xmlstream(xmlstream_),xml_fo(),request_no(0), headers(), last_on_header(NOTHING), header_value(), header_field(), output_path(), fd(-1), first_body(true),bytes_written(0),unzip(false),zs(),zinit(false),zfail(false){}; private: const std::string path; // where data gets written const char *base; // where data started in memory std::stringstream *xmlstream; // if present, where to put the fileobject annotations std::stringstream xml_fo; // xml stream for this file object int request_no; // request number /* parsed headers */ std::map headers; /* placeholders for possibly-incomplete header data */ last_on_header_t last_on_header; std::string header_value, header_field; std::string output_path; int fd; // fd for writing bool first_body; // first call to on_body after headers uint64_t bytes_written; /* decompression for gzip-encoded streams. */ bool unzip; // should we be decompressing? z_stream zs; // zstream (avoids casting and memory allocation) bool zinit; // we have initialized the zstream bool zfail; // zstream failed in some manner, so ignore the rest of this stream /* The static functions are callbacks; they wrap the method calls */ #define CBO (reinterpret_cast(parser->data)) public: static int scan_http_cb_on_message_begin(http_parser * parser) { return CBO->on_message_begin();} static int scan_http_cb_on_url(http_parser * parser, const char *at, size_t length) { return 0;} static int scan_http_cb_on_header_field(http_parser * parser, const char *at, size_t length) { return CBO->on_header_field(at,length);} static int scan_http_cb_on_header_value(http_parser * parser, const char *at, size_t length) { return CBO->on_header_value(at,length); } static int scan_http_cb_on_headers_complete(http_parser * parser) { return CBO->on_headers_complete();} static int scan_http_cb_on_body(http_parser * parser, const char *at, size_t length) { return CBO->on_body(at,length);} static int scan_http_cb_on_message_complete(http_parser * parser) {return CBO->on_message_complete();} #undef CBO private: int on_message_begin(); int on_url(const char *at, size_t length); int on_header_field(const char *at, size_t length); int on_header_value(const char *at, size_t length); int on_headers_complete(); int on_body(const char *at, size_t length); int on_message_complete(); }; /** * on_message_begin: * Increment request nubmer. Note that the first request is request_no = 1 */ int scan_http_cbo::on_message_begin() { request_no ++; return 0; } /** * on_url currently not implemented. */ int scan_http_cbo::on_url(const char *at, size_t length) { return 0; } /* Note 1: The state machine is defined in http-parser/README.md * Note 2: All header field names are converted to lowercase. * This is consistent with the RFC. */ int scan_http_cbo::on_header_field(const char *at,size_t length) { std::string field(at,length); std::transform(field.begin(), field.end(), field.begin(), ::tolower); switch(last_on_header){ case NOTHING: // Allocate new buffer and copy callback data into it header_field = field; break; case VALUE: // New header started. // Copy current name,value buffers to headers // list and allocate new buffer for new name headers[header_field] = header_value; header_field = field; break; case FIELD: // Previous name continues. Reallocate name // buffer and append callback data to it header_field.append(field); break; } last_on_header = FIELD; return 0; } int scan_http_cbo::on_header_value(const char *at, size_t length) { const std::string value(at,length); switch(last_on_header){ case FIELD: //Value for current header started. Allocate //new buffer and copy callback data to it header_value = value; break; case VALUE: //Value continues. Reallocate value buffer //and append callback data to it header_value.append(value); break; case NOTHING: // this shouldn't happen DEBUG(10)("Internal error in http-parser"); break; } last_on_header = VALUE; return 0; } /** * called when last header is read. * Determine the filename based on request_no and extension. * Also see if decompressing is happening... */ int scan_http_cbo::on_headers_complete() { tcpdemux *demux = tcpdemux::getInstance(); /* Add the most recently read header to the map, if any */ if (last_on_header==VALUE) { headers[header_field] = header_value; header_field=""; } /* Set output path to -HTTPBODY-nnn.ext for each part. * This is not consistent with tcpflow <= 1.3.0, which supported only one HTTPBODY, * but it's correct... */ std::stringstream os; os << path << "-HTTPBODY-" << std::setw(3) << std::setfill('0') << request_no << std::setw(0); /* See if we can guess a file extension */ std::string extension = get_extension_for_mime_type(headers["content-type"]); if (extension.size()) { os << "." << extension; } output_path = os.str(); /* Choose an output function based on the content encoding */ std::string content_encoding(headers["content-encoding"]); if ((content_encoding == "gzip" || content_encoding == "deflate") && (demux->opt.gzip_decompress)){ #ifdef HAVE_LIBZ DEBUG(10) ( "%s: detected zlib content, decompressing", output_path.c_str()); unzip = true; #else /* We can't decompress, so just give it a .gz */ output_path.append(".gz"); DEBUG(5) ( "%s: refusing to decompress since zlib is unavailable", output_path.c_str() ); #endif } /* Open the output path */ fd = demux->retrying_open(output_path.c_str(), O_WRONLY|O_CREAT|O_BINARY|O_TRUNC, 0644); if (fd < 0) { DEBUG(1) ("unable to open HTTP body file %s", output_path.c_str()); } if(http_alert_fd>=0){ std::stringstream ss; ss << "open\t" << output_path << "\n"; const std::string &sso = ss.str(); if(write(http_alert_fd,sso.c_str(),sso.size())!=(int)sso.size()){ perror("write"); } } first_body = true; // next call to on_body will be the first one /* We can do something smart with the headers here. * * For example, we could: * - Record all headers into the report.xml * - Pick the intended filename if we see Content-Disposition: attachment; name="..." * - Record headers into filesystem extended attributes on the body file */ return 0; } /* Write to fd, optionally decompressing as we go */ int scan_http_cbo::on_body(const char *at,size_t length) { if (fd < 0) return -1; // no open fd? (internal error)x if (length==0) return 0; // nothing to write if(first_body){ // stuff for first time on_body is called xml_fo << " " << output_path << ""; first_body = false; } /* If not decompressing, just write the data and return. */ if(unzip==false){ int rv = write(fd,at,length); if(rv<0) return -1; // write error; that's bad bytes_written += rv; return 0; } #ifndef HAVE_LIBZ assert(0); // shoudln't have gotten here #endif if(zfail) return 0; // stream was corrupt; ignore rest /* set up this round of decompression, using a small local buffer */ /* Call init if we are not initialized */ char decompressed[65536]; // where decompressed data goes if (!zinit) { memset(&zs,0,sizeof(zs)); zs.next_in = (Bytef*)at; zs.avail_in = length; zs.next_out = (Bytef*)decompressed; zs.avail_out = sizeof(decompressed); int rv = inflateInit2(&zs, 32 + MAX_WBITS); /* 32 auto-detects gzip or deflate */ if (rv != Z_OK) { /* fail! */ DEBUG(3) ("decompression failed at stream initialization; rv=%d bad Content-Encoding?",rv); zfail = true; return 0; } zinit = true; // successfully initted } else { zs.next_in = (Bytef*)at; zs.avail_in = length; zs.next_out = (Bytef*)decompressed; zs.avail_out = sizeof(decompressed); } /* iteratively decompress, writing each time */ while (zs.avail_in > 0) { /* decompress as much as possible */ int rv = inflate(&zs, Z_SYNC_FLUSH); if (rv == Z_STREAM_END) { /* are we done with the stream? */ if (zs.avail_in > 0) { /* ...no. */ DEBUG(3) ("decompression completed, but with trailing garbage"); return 0; } } else if (rv != Z_OK) { /* some other error */ DEBUG(3) ("decompression failed (corrupted stream?)"); zfail = true; // ignore the rest of this stream return 0; } /* successful decompression, at least partly */ /* write the result */ int bytes_decompressed = sizeof(decompressed) - zs.avail_out; ssize_t written = write(fd, decompressed, bytes_decompressed); if (written < bytes_decompressed) { DEBUG(3) ("writing decompressed data failed"); zfail= true; return 0; } bytes_written += written; /* reset the buffer for the next iteration */ zs.next_out = (Bytef*)decompressed; zs.avail_out = sizeof(decompressed); } return 0; } /** * called at the conclusion of each HTTP body. * Clean out all of the state for this HTTP header/body pair. */ int scan_http_cbo::on_message_complete() { /* Close the file */ headers.clear(); header_field = ""; header_value = ""; last_on_header = NOTHING; if(fd >= 0) { if (::close(fd) != 0) { perror("close() of http body"); } fd = -1; } /* Erase zero-length files and update the DFXML */ if(bytes_written>0){ /* Update DFXML */ if(xmlstream){ xml_fo << "" << bytes_written << "\n"; if(xmlstream) *xmlstream << xml_fo.str(); } if(http_alert_fd>=0){ std::stringstream ss; ss << "close\t" << output_path << "\n"; const std::string &sso = ss.str(); if(write(http_alert_fd,sso.c_str(),sso.size()) != (int)sso.size()){ perror("write"); } } if(http_cmd.size()>0 && output_path.size()>0){ /* If we are at maximum number of subprocesses, wait for one to exit */ std::string cmd = http_cmd + " " + output_path; #ifdef HAVE_FORK int status=0; pid_t pid = 0; while(http_subproc >= http_subproc_max){ pid = wait(&status); http_subproc--; } /* Fork off a child */ pid = fork(); if(pid<0) die("Cannot fork child"); if(pid==0){ /* We are the child */ exit(system(cmd.c_str())); } http_subproc++; #else system(cmd.c_str()); #endif } } else { /* Nothing written; erase the file */ if(output_path.size() > 0){ ::unlink(output_path.c_str()); } } /* Erase the state variables for this part */ xml_fo.str(""); output_path = ""; bytes_written=0; unzip = false; if(zinit){ inflateEnd(&zs); zinit = false; } zfail = false; return 0; } /*** * the HTTP scanner plugin itself */ extern "C" void scan_http(const class scanner_params &sp,const recursion_control_block &rcb) { if(sp.sp_version!=scanner_params::CURRENT_SP_VERSION){ std::cerr << "scan_http requires sp version " << scanner_params::CURRENT_SP_VERSION << "; " << "got version " << sp.sp_version << "\n"; exit(1); } if(sp.phase==scanner_params::PHASE_STARTUP){ sp.info->name = "http"; sp.info->flags = scanner_info::SCANNER_DISABLED; // default disabled sp.info->get_config(HTTP_CMD,&http_cmd,"Command to execute on each HTTP attachment"); sp.info->get_config(HTTP_ALERT_FD,&http_alert_fd,"File descriptor to send information about completed HTTP attachments"); return; /* No feature files created */ } if(sp.phase==scanner_params::PHASE_SCAN){ /* See if there is an HTTP response */ if(sp.sbuf.bufsize>=MIN_HTTP_BUFSIZE && sp.sbuf.memcmp(reinterpret_cast("HTTP/1."),0,7)==0){ /* Smells enough like HTTP to try parsing */ /* Set up callbacks */ http_parser_settings scan_http_parser_settings; memset(&scan_http_parser_settings,0,sizeof(scan_http_parser_settings)); // in the event that new callbacks get created scan_http_parser_settings.on_message_begin = scan_http_cbo::scan_http_cb_on_message_begin; scan_http_parser_settings.on_url = scan_http_cbo::scan_http_cb_on_url; scan_http_parser_settings.on_header_field = scan_http_cbo::scan_http_cb_on_header_field; scan_http_parser_settings.on_header_value = scan_http_cbo::scan_http_cb_on_header_value; scan_http_parser_settings.on_headers_complete = scan_http_cbo::scan_http_cb_on_headers_complete; scan_http_parser_settings.on_body = scan_http_cbo::scan_http_cb_on_body; scan_http_parser_settings.on_message_complete = scan_http_cbo::scan_http_cb_on_message_complete; if(sp.sxml) (*sp.sxml) << "\n \n"; for(size_t offset=0;;){ /* Set up a parser instance for the next chunk of HTTP responses and data. * This might be repeated several times due to connection re-use and multiple requests. * Note that the parser is not a C++ library but it can pass a "data" to the * callback. We put the address for the scan_http_cbo object in the data and * recover it with a cast in each of the callbacks. */ /* Make an sbuf for the remaining data. * Note that this may not be necessary, because in our test runs the parser * processed all of the data the first time through... */ sbuf_t sub_buf(sp.sbuf, offset); const char *base = reinterpret_cast(sub_buf.buf); http_parser parser; http_parser_init(&parser, HTTP_RESPONSE); scan_http_cbo cbo(sp.sbuf.pos0.path,base,sp.sxml); parser.data = &cbo; /* Parse */ size_t parsed = http_parser_execute(&parser, &scan_http_parser_settings, base, sub_buf.size()); assert(parsed <= sub_buf.size()); /* Indicate EOF (flushing callbacks) and terminate if we parsed the entire buffer. */ if (parsed == sub_buf.size()) { http_parser_execute(&parser, &scan_http_parser_settings, NULL, 0); break; } /* Stop parsing if we parsed nothing, as that indicates something header! */ if (parsed == 0) { break; } /* Stop parsing if we're a connection upgrade (e.g. WebSockets) */ if (parser.upgrade) { DEBUG(9) ("upgrade connection detected (WebSockets?); cowardly refusing to dump further"); break; } /* Bump the offset for next iteration */ offset += parsed; } if(sp.sxml) (*sp.sxml) << " "; } } } tcpflow/src/inet_ntop.c0000644000175000017500000000252412263701151014075 0ustar dimadima/** * private implementaiton if inet_ntop for systems that don't have it. * Functionally, correct, this version doesn't do condensing of IPv6 addresses, * and is kind of slow. * * This is included if the OS does not have inet_ntop. * * PUBLIC DOMAIN. * Simson L. Garfinkel, Jan 20, 2013 */ static const char *inet_ntop4(const struct in_addr *addr, char *buf, socklen_t buflen) { const uint8_t *a = (uint8_t *)addr; snprintf(buf,buflen,"%03d.%03d.%03d.%03d", a[0], a[1], a[2], a[3]); return buf; } static const char *inet_ntop6(const struct private_in6_addr *addr, char *buf, socklen_t buflen) { const char *obuf=buf; const uint8_t *a = (uint8_t *)addr; for(size_t i=0;i<16;i++){ if(buflen<2) return 0; /* can't convert */ snprintf(buf,buflen,"%02x",a[i]); buf+=2; buflen-=2; if(i>0 && i<15 && i%2==1){ if(buflen<1) return 0; buf[0] = ':'; buf++; buflen--; } } if(buflen<1) return 0; buf[0] = 0; return obuf; } const char * inet_ntop(int af, const void *addr, char *buf, socklen_t len) { switch(af){ case AF_INET: return inet_ntop4((const struct in_addr *)addr, buf, len); case AF_INET6: return inet_ntop6((const struct private_in6_addr *)addr, buf, len); } return NULL; } tcpflow/src/template_demo.cpp0000644000175000017500000000126312263701151015254 0ustar dimadima/* * How do we do a template like this? */ #include #include #include template class A { private: T var_; uint64_t count_; public: A(T v):var_(v),count(0){ } uint64_t count() const { return count_;} T var() const { return var_;} void inc_count(); }; template void A::inc_count() { count_++; }; template std::ostream & operator <<(std::ostream &os, const A &e) { os << e.count() << "=" << e.var(); return os; }; int main(int argc,char **argv) { A a(3); a.inc_count(); std::cout << a << "\n"; a.inc_count(); std::cout << a << "\n"; } tcpflow/src/scan_netviz.cpp0000644000175000017500000000431112263701151014755 0ustar dimadima/** * scan_netviz: * * Our first try at a pcap visualization engine. * Requires LIBCAIRO */ #include "config.h" #include #include #include "bulk_extractor_i.h" #ifdef HAVE_LIBCAIRO #include "netviz/one_page_report.h" /* These control the size of the iptable histogram * and whether or not it is dumped. The histogram should be kept * either small enough that it is not expensive to maintain, or large * enough so that it never needs to be pruned. */ #define HISTOGRAM_SIZE "netviz_histogram_size" #define HISTOGRAM_DUMP "netviz_histogram_dump" #define DEFAULT_MAX_HISTOGRAM_SIZE 1000 static one_page_report *report=0; static void netviz_process_packet(void *user,const be13::packet_info &pi) { report->ingest_packet(pi); } #endif static int histogram_dump = 0; extern "C" void scan_netviz(const class scanner_params &sp,const recursion_control_block &rcb) { if(sp.sp_version!=scanner_params::CURRENT_SP_VERSION){ std::cout << "scan_timehistogram requires sp version " << scanner_params::CURRENT_SP_VERSION << "; " << "got version " << sp.sp_version << "\n"; exit(1); } if(sp.phase==scanner_params::PHASE_STARTUP){ sp.info->name = "netviz"; sp.info->flags = scanner_info::SCANNER_DISABLED; sp.info->author= "Mike Shick"; sp.info->packet_user = 0; #ifdef HAVE_LIBCAIRO sp.info->description = "Performs 1-page visualization of network packets"; sp.info->packet_cb = netviz_process_packet; sp.info->get_config(HISTOGRAM_DUMP,&histogram_dump,"Dumps the histogram"); int max_histogram_size = DEFAULT_MAX_HISTOGRAM_SIZE; sp.info->get_config(HISTOGRAM_SIZE,&max_histogram_size,"Maximum histogram size"); report = new one_page_report(max_histogram_size); #else sp.info->description = "Disabled (compiled without libcairo"; #endif } #ifdef HAVE_LIBCAIRO if(sp.phase==scanner_params::PHASE_SHUTDOWN){ assert(report!=0); if(histogram_dump){ report->src_tree.dump_stats(std::cout); report->dump(histogram_dump); } report->source_identifier = sp.fs.get_input_fname(); report->render(sp.fs.get_outdir()); delete report; report = 0; } #endif } tcpflow/src/http-parser/0000755000175000017500000000000012263701336014205 5ustar dimadimatcpflow/src/radiotap_old.c0000644000175000017500000003676312263701151014553 0ustar dimadima/* * Radiotap parser * * Copyright 2007 Andy Green * Copyright 2009 Johannes Berg * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * Alternatively, this software may be distributed under the terms of BSD * license. * * See COPYING for more details. */ /** COPYING FOLLOWS */ /* Copyright (c) 2007-2009 Andy Green Copyright (c) 2007-2009 Johannes Berg Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS #endif #include #include #include #include "radiotap_iter.h" static uint16_t get_unaligned_le16(const uint8_t *b) { return b[0] | (b[1]<<8); } static uint32_t get_unaligned_le32(const uint8_t *b) { return b[0] | (b[1]<<8) | (b[2]<<16) | (b[3]<<24); } /* function prototypes and related defs are in radiotap_iter.h */ static const struct radiotap_align_size rtap_namespace_sizes[] = { [IEEE80211_RADIOTAP_TSFT] = { .align = 8, .size = 8, }, [IEEE80211_RADIOTAP_FLAGS] = { .align = 1, .size = 1, }, [IEEE80211_RADIOTAP_RATE] = { .align = 1, .size = 1, }, [IEEE80211_RADIOTAP_CHANNEL] = { .align = 2, .size = 4, }, [IEEE80211_RADIOTAP_FHSS] = { .align = 2, .size = 2, }, [IEEE80211_RADIOTAP_DBM_ANTSIGNAL] = { .align = 1, .size = 1, }, [IEEE80211_RADIOTAP_DBM_ANTNOISE] = { .align = 1, .size = 1, }, [IEEE80211_RADIOTAP_LOCK_QUALITY] = { .align = 2, .size = 2, }, [IEEE80211_RADIOTAP_TX_ATTENUATION] = { .align = 2, .size = 2, }, [IEEE80211_RADIOTAP_DB_TX_ATTENUATION] = { .align = 2, .size = 2, }, [IEEE80211_RADIOTAP_DBM_TX_POWER] = { .align = 1, .size = 1, }, [IEEE80211_RADIOTAP_ANTENNA] = { .align = 1, .size = 1, }, [IEEE80211_RADIOTAP_DB_ANTSIGNAL] = { .align = 1, .size = 1, }, [IEEE80211_RADIOTAP_DB_ANTNOISE] = { .align = 1, .size = 1, }, [IEEE80211_RADIOTAP_RX_FLAGS] = { .align = 2, .size = 2, }, [IEEE80211_RADIOTAP_TX_FLAGS] = { .align = 2, .size = 2, }, [IEEE80211_RADIOTAP_RTS_RETRIES] = { .align = 1, .size = 1, }, [IEEE80211_RADIOTAP_DATA_RETRIES] = { .align = 1, .size = 1, }, [IEEE80211_RADIOTAP_MCS] = { .align = 1, .size = 3, }, [IEEE80211_RADIOTAP_AMPDU_STATUS] = { .align = 4, .size = 8, }, /* * add more here as they are defined in radiotap.h */ }; static const struct ieee80211_radiotap_namespace radiotap_ns = { .n_bits = sizeof(rtap_namespace_sizes) / sizeof(rtap_namespace_sizes[0]), .align_size = rtap_namespace_sizes, }; /** * ieee80211_radiotap_iterator_init - radiotap parser iterator initialization * @iterator: radiotap_iterator to initialize * @radiotap_header: radiotap header to parse * @max_length: total length we can parse into (eg, whole packet length) * * Returns: 0 or a negative error code if there is a problem. * * This function initializes an opaque iterator struct which can then * be passed to ieee80211_radiotap_iterator_next() to visit every radiotap * argument which is present in the header. It knows about extended * present headers and handles them. * * How to use: * call __ieee80211_radiotap_iterator_init() to init a semi-opaque iterator * struct ieee80211_radiotap_iterator (no need to init the struct beforehand) * checking for a good 0 return code. Then loop calling * __ieee80211_radiotap_iterator_next()... it returns either 0, * -ENOENT if there are no more args to parse, or -EINVAL if there is a problem. * The iterator's @this_arg member points to the start of the argument * associated with the current argument index that is present, which can be * found in the iterator's @this_arg_index member. This arg index corresponds * to the IEEE80211_RADIOTAP_... defines. * * Radiotap header length: * You can find the CPU-endian total radiotap header length in * iterator->max_length after executing ieee80211_radiotap_iterator_init() * successfully. * * Alignment Gotcha: * You must take care when dereferencing iterator.this_arg * for multibyte types... the pointer is not aligned. Use * get_unaligned((type *)iterator.this_arg) to dereference * iterator.this_arg for type "type" safely on all arches. * * Example code: parse.c */ int ieee80211_radiotap_iterator_init( struct ieee80211_radiotap_iterator *iterator, struct ieee80211_radiotap_header *radiotap_header, int max_length, const struct ieee80211_radiotap_vendor_namespaces *vns) { /* Linux only supports version 0 radiotap format */ if (radiotap_header->it_version) return -EINVAL; /* sanity check for allowed length and radiotap length field */ if (max_length < get_unaligned_le16((const uint8_t *)&radiotap_header->it_len)) return -EINVAL; iterator->_rtheader = radiotap_header; iterator->_max_length = get_unaligned_le16((const uint8_t *)&radiotap_header->it_len); iterator->_arg_index = 0; iterator->_bitmap_shifter = get_unaligned_le32((const uint8_t *)&radiotap_header->it_present); iterator->_arg = (uint8_t *)radiotap_header + sizeof(*radiotap_header); iterator->_reset_on_ext = 0; iterator->_next_bitmap = &radiotap_header->it_present; iterator->_next_bitmap++; iterator->_vns = vns; iterator->current_namespace = &radiotap_ns; iterator->is_radiotap_ns = 1; #ifdef RADIOTAP_SUPPORT_OVERRIDES iterator->n_overrides = 0; iterator->overrides = NULL; #endif /* find payload start allowing for extended bitmap(s) */ if (iterator->_bitmap_shifter & (1<_arg) & (1 << IEEE80211_RADIOTAP_EXT)) { iterator->_arg += sizeof(uint32_t); /* * check for insanity where the present bitmaps * keep claiming to extend up to or even beyond the * stated radiotap header length */ if ((unsigned long)iterator->_arg - (unsigned long)iterator->_rtheader > (unsigned long)iterator->_max_length) return -EINVAL; } iterator->_arg += sizeof(uint32_t); /* * no need to check again for blowing past stated radiotap * header length, because ieee80211_radiotap_iterator_next * checks it before it is dereferenced */ } iterator->this_arg = iterator->_arg; /* we are all initialized happily */ return 0; } static void find_ns(struct ieee80211_radiotap_iterator *iterator, uint32_t oui, uint8_t subns) { int i; iterator->current_namespace = NULL; if (!iterator->_vns) return; for (i = 0; i < iterator->_vns->n_ns; i++) { if (iterator->_vns->ns[i].oui != oui) continue; if (iterator->_vns->ns[i].subns != subns) continue; iterator->current_namespace = &iterator->_vns->ns[i]; break; } } #ifdef RADIOTAP_SUPPORT_OVERRIDES static int find_override(struct ieee80211_radiotap_iterator *iterator, int *align, int *size) { int i; if (!iterator->overrides) return 0; for (i = 0; i < iterator->n_overrides; i++) { if (iterator->_arg_index == iterator->overrides[i].field) { *align = iterator->overrides[i].align; *size = iterator->overrides[i].size; if (!*align) /* erroneous override */ return 0; return 1; } } return 0; } #endif /** * ieee80211_radiotap_iterator_next - return next radiotap parser iterator arg * @iterator: radiotap_iterator to move to next arg (if any) * * Returns: 0 if there is an argument to handle, * -ENOENT if there are no more args or -EINVAL * if there is something else wrong. * * This function provides the next radiotap arg index (IEEE80211_RADIOTAP_*) * in @this_arg_index and sets @this_arg to point to the * payload for the field. It takes care of alignment handling and extended * present fields. @this_arg can be changed by the caller (eg, * incremented to move inside a compound argument like * IEEE80211_RADIOTAP_CHANNEL). The args pointed to are in * little-endian format whatever the endianess of your CPU. * * Alignment Gotcha: * You must take care when dereferencing iterator.this_arg * for multibyte types... the pointer is not aligned. Use * get_unaligned((type *)iterator.this_arg) to dereference * iterator.this_arg for type "type" safely on all arches. */ int ieee80211_radiotap_iterator_next( struct ieee80211_radiotap_iterator *iterator) { while (1) { int hit = 0; int pad, align, size, subns; uint32_t oui; /* if no more EXT bits, that's it */ if ((iterator->_arg_index % 32) == IEEE80211_RADIOTAP_EXT && !(iterator->_bitmap_shifter & 1)) return -ENOENT; if (!(iterator->_bitmap_shifter & 1)) goto next_entry; /* arg not present */ /* get alignment/size of data */ switch (iterator->_arg_index % 32) { case IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE: case IEEE80211_RADIOTAP_EXT: align = 1; size = 0; break; case IEEE80211_RADIOTAP_VENDOR_NAMESPACE: align = 2; size = 6; break; default: #ifdef RADIOTAP_SUPPORT_OVERRIDES if (find_override(iterator, &align, &size)) { /* all set */ } else #endif if (!iterator->current_namespace || iterator->_arg_index >= iterator->current_namespace->n_bits) { if (iterator->current_namespace == &radiotap_ns) return -ENOENT; align = 0; } else { align = iterator->current_namespace->align_size[iterator->_arg_index].align; size = iterator->current_namespace->align_size[iterator->_arg_index].size; } if (!align) { /* skip all subsequent data */ iterator->_arg = iterator->_next_ns_data; /* give up on this namespace */ iterator->current_namespace = NULL; goto next_entry; } break; } /* * arg is present, account for alignment padding * * Note that these alignments are relative to the start * of the radiotap header. There is no guarantee * that the radiotap header itself is aligned on any * kind of boundary. * * The above is why get_unaligned() is used to dereference * multibyte elements from the radiotap area. */ pad = ((unsigned long)iterator->_arg - (unsigned long)iterator->_rtheader) & (align - 1); if (pad) iterator->_arg += align - pad; if (iterator->_arg_index % 32 == IEEE80211_RADIOTAP_VENDOR_NAMESPACE) { int vnslen; if ((unsigned long)iterator->_arg + size - (unsigned long)iterator->_rtheader > (unsigned long)iterator->_max_length) return -EINVAL; oui = (*iterator->_arg << 16) | (*(iterator->_arg + 1) << 8) | *(iterator->_arg + 2); subns = *(iterator->_arg + 3); find_ns(iterator, oui, subns); vnslen = get_unaligned_le16(iterator->_arg + 4); iterator->_next_ns_data = iterator->_arg + size + vnslen; if (!iterator->current_namespace) size += vnslen; } /* * this is what we will return to user, but we need to * move on first so next call has something fresh to test */ iterator->this_arg_index = iterator->_arg_index; iterator->this_arg = iterator->_arg; iterator->this_arg_size = size; /* internally move on the size of this arg */ iterator->_arg += size; /* * check for insanity where we are given a bitmap that * claims to have more arg content than the length of the * radiotap section. We will normally end up equalling this * max_length on the last arg, never exceeding it. */ if ((unsigned long)iterator->_arg - (unsigned long)iterator->_rtheader > (unsigned long)iterator->_max_length) return -EINVAL; /* these special ones are valid in each bitmap word */ switch (iterator->_arg_index % 32) { case IEEE80211_RADIOTAP_VENDOR_NAMESPACE: iterator->_reset_on_ext = 1; iterator->is_radiotap_ns = 0; /* * If parser didn't register this vendor * namespace with us, allow it to show it * as 'raw. Do do that, set argument index * to vendor namespace. */ iterator->this_arg_index = IEEE80211_RADIOTAP_VENDOR_NAMESPACE; if (!iterator->current_namespace) hit = 1; goto next_entry; case IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE: iterator->_reset_on_ext = 1; iterator->current_namespace = &radiotap_ns; iterator->is_radiotap_ns = 1; goto next_entry; case IEEE80211_RADIOTAP_EXT: /* * bit 31 was set, there is more * -- move to next u32 bitmap */ iterator->_bitmap_shifter = get_unaligned_le32((const uint8_t *)iterator->_next_bitmap); iterator->_next_bitmap++; if (iterator->_reset_on_ext) iterator->_arg_index = 0; else iterator->_arg_index++; iterator->_reset_on_ext = 0; break; default: /* we've got a hit! */ hit = 1; next_entry: iterator->_bitmap_shifter >>= 1; iterator->_arg_index++; } /* if we found a valid arg earlier, return it now */ if (hit) return 0; } } /**************************************************************** ** this code from parse.c ****************************************************************/ static const struct radiotap_align_size align_size_000000_00[] = { [0] = { .align = 1, .size = 4, }, [52] = { .align = 1, .size = 4, }, }; const struct ieee80211_radiotap_namespace vns_array[] = { { .oui = 0x000000, .subns = 0, .n_bits = sizeof(align_size_000000_00), .align_size = align_size_000000_00, }, }; const struct ieee80211_radiotap_vendor_namespaces radiotap_vns = { .ns = vns_array, .n_ns = sizeof(vns_array)/sizeof(vns_array[0]), }; /* Need to create these */ #define le64toh(x) (x) #define le32toh(x) (x) #define le16toh(x) (x) static int fcshdr = 0; void print_radiotap_namespace(struct ieee80211_radiotap_iterator *iter) { switch (iter->this_arg_index) { case IEEE80211_RADIOTAP_TSFT: printf("\tTSFT: %"PRIu64"\n", le64toh(*(unsigned long long *)iter->this_arg)); break; case IEEE80211_RADIOTAP_FLAGS: printf("\tflags: %02x\n", *iter->this_arg); break; case IEEE80211_RADIOTAP_RATE: printf("\trate: %lf\n", (double)*iter->this_arg/2); break; case IEEE80211_RADIOTAP_CHANNEL: case IEEE80211_RADIOTAP_FHSS: case IEEE80211_RADIOTAP_DBM_ANTSIGNAL: case IEEE80211_RADIOTAP_DBM_ANTNOISE: case IEEE80211_RADIOTAP_LOCK_QUALITY: case IEEE80211_RADIOTAP_TX_ATTENUATION: case IEEE80211_RADIOTAP_DB_TX_ATTENUATION: case IEEE80211_RADIOTAP_DBM_TX_POWER: case IEEE80211_RADIOTAP_ANTENNA: case IEEE80211_RADIOTAP_DB_ANTSIGNAL: case IEEE80211_RADIOTAP_DB_ANTNOISE: case IEEE80211_RADIOTAP_TX_FLAGS: break; case IEEE80211_RADIOTAP_RX_FLAGS: if (fcshdr) { printf("\tFCS in header: %.8x\n", le32toh(*(uint32_t *)iter->this_arg)); break; } printf("\tRX flags: %#.4x\n", le16toh(*(uint16_t *)iter->this_arg)); break; case IEEE80211_RADIOTAP_RTS_RETRIES: case IEEE80211_RADIOTAP_DATA_RETRIES: break; break; default: printf("\tBOGUS DATA\n"); break; } } void print_test_namespace(struct ieee80211_radiotap_iterator *iter) { switch (iter->this_arg_index) { case 0: case 52: printf("\t00:00:00-00|%d: %.2x/%.2x/%.2x/%.2x\n", iter->this_arg_index, *iter->this_arg, *(iter->this_arg + 1), *(iter->this_arg + 2), *(iter->this_arg + 3)); break; default: printf("\tBOGUS DATA - vendor ns %d\n", iter->this_arg_index); break; } } static const struct radiotap_override overrides[] = { { .field = 14, .align = 4, .size = 4, } }; tcpflow/src/scan_wifiviz.cpp0000644000175000017500000000261612263701151015133 0ustar dimadima/** * scan_wifiviz: * * Use the wifipcap and do some basic visualizations */ #include "config.h" #include #include #include "bulk_extractor_i.h" #include "datalink_wifi.h" extern "C" void scan_wifiviz(const class scanner_params &sp,const recursion_control_block &rcb) { if(sp.sp_version!=scanner_params::CURRENT_SP_VERSION){ std::cout << "scan_timehistogram requires sp version " << scanner_params::CURRENT_SP_VERSION << "; " << "got version " << sp.sp_version << "\n"; exit(1); } if(sp.phase==scanner_params::PHASE_STARTUP){ sp.info->name = "wifiviz"; sp.info->flags = scanner_info::SCANNER_DISABLED; sp.info->author= "Simson Garfinkel"; sp.info->packet_user = 0; sp.info->description = "Performs wifi isualization"; sp.info->get_config("check_fcs",&TFCB::theTFCB.opt_check_fcs,"Require valid Frame Check Sum (FCS)"); } if(sp.phase==scanner_params::PHASE_SHUTDOWN){ if(sp.sxml){ (*sp.sxml) << "\n"; for(TFCB::mac_ssid_map_t::const_iterator it=TFCB::theTFCB.mac_to_ssid.begin(); it!=TFCB::theTFCB.mac_to_ssid.end();it++){ (*sp.sxml) << " \n"; } (*sp.sxml) << "\n"; } } } tcpflow/src/util.cpp0000644000175000017500000001344112263701151013413 0ustar dimadima/* * This file is part of tcpflow. * Originally by Jeremy Elson * Now maintained by Simson L. Garfinkel * * This source code is under the GNU Public License (GPL). * See LICENSE for details. * */ #include "tcpflow.h" #include static char *debug_prefix = NULL; /* * STD String sprintf wrapper for sane CPP formatting */ std::string ssprintf(const char *fmt,...) { char buf[65536]; va_list ap; va_start(ap,fmt); vsnprintf(buf,sizeof(buf),fmt,ap); va_end(ap); return std::string(buf); } /* * Insert readability commas into an integer without writing a custom locale facet */ std::string comma_number_string(int64_t input) { std::vector tokens; std::stringstream ss; ss << std::setfill('0'); int sign = 1; if(input < 0) { sign = -1; input *= -1; } while(input >= 1000) { tokens.push_back(input % 1000); input /= 1000; } ss << (input * sign); for(std::vector::const_reverse_iterator it = tokens.rbegin(); it != tokens.rend(); it++) { ss << "," << std::setw(3) << *it; } return ss.str(); } std::string macaddr(const uint8_t *addr) { char buf[256]; snprintf(buf,sizeof(buf),"%02x:%02x:%02x:%02x:%02x:%02x", addr[0],addr[1],addr[2],addr[3],addr[4],addr[5]); return std::string(buf); } /* * Remember our program name and process ID so we can use them later * for printing debug messages * */ void init_debug(const char *pfx,int include_pid) { if(debug_prefix) free(debug_prefix); size_t debug_prefix_size = strlen(pfx) + 16; debug_prefix = (char *)calloc(sizeof(char), debug_prefix_size); if(debug_prefix==0) die("malloc failed"); if(include_pid){ snprintf(debug_prefix, debug_prefix_size, "%s[%d]", pfx, (int) getpid()); } else { snprintf(debug_prefix, debug_prefix_size, "%s", pfx); } } /****************************************************************/ /* C++ string splitting code from http://stackoverflow.com/questions/236129/how-to-split-a-string-in-c */ static std::vector &split(const std::string &s, char delim, std::vector &elems) { std::stringstream ss(s); std::string item; while(std::getline(ss, item, delim)) { elems.push_back(item); } return elems; } static std::vector split(const std::string &s, char delim) { std::vector elems; return split(s, delim, elems); } /* mkdir all of the containing directories in path. * keep track of those made so we don't need to keep remaking them. */ void mkdirs_for_path(std::string path) { static std::set made_dirs; // track what we made std::string mpath; // the path we are making if(path.at(0)=='/'){ mpath = "/"; path = path.substr(1); } std::vector parts = split(path,'/'); /* Notice that this won't mkdir for the last part. * That's okay, because it's a filename. */ for(std::vector::const_iterator it=parts.begin();it!=parts.end();it++){ if(made_dirs.find(mpath)==made_dirs.end()){ if(mpath.size()){ int r = MKDIR(mpath.c_str(),0777); if(r<0){ /* Can't make path; see if we can execute it*/ if(access(mpath.c_str(),X_OK)<0){ perror(mpath.c_str()); exit(1); } } made_dirs.insert(mpath); } } if(mpath.size()>0) mpath += "/"; mpath += *it; } } /* * Print a debugging message, given a va_list */ void print_debug_message(const char *fmt, va_list ap) { /* print debug prefix */ fprintf(stderr, "%s: ", debug_prefix); /* print the var-arg buffer passed to us */ vfprintf(stderr, fmt, ap); /* add newline */ fprintf(stderr, "\n"); (void) fflush(stderr); } /* Print a debugging or informational message */ void debug_real(const char *fmt, ...) { va_list ap; va_start(ap, fmt); print_debug_message(fmt, ap); va_end(ap); } /* Print a debugging or informatioal message, then exit */ void die(const char *fmt, ...) { va_list ap; va_start(ap, fmt); print_debug_message(fmt, ap); exit(1); } /* An attempt at making signal() portable. * * If we detect sigaction, use that; * otherwise if we have setsig, use that; * otherwise, cross our fingers and hope for the best using plain old signal(). * * Our first choice is sigaction (sigaction() is POSIX; signal() is * not.) Taken from Stevens' _Advanced Programming in the UNIX Environment_. * * 10/6/08 - slg - removed RETSIGTYPE, since it hasn't been needed to 15 years */ void (*portable_signal(int signo, void (*func)(int)))(int) { #if defined(HAVE_SIGACTION) struct sigaction act, oact; memset(&act, 0, sizeof(act)); memset(&oact, 0, sizeof(oact)); act.sa_handler = func; sigemptyset(&act.sa_mask); act.sa_flags = 0; if (sigaction(signo, &act, &oact) < 0) return (SIG_ERR); return (oact.sa_handler); #elif defined(HAVE_SIGSET) return sigset(signo, func); #else return signal(signo, func); #endif /* HAVE_SIGACTION, HAVE_SIGSET */ } /************ *** MMAP *** ************/ #ifdef HAVE_SYS_MMAN_H #include #endif /** * fake implementation of mmap and munmap if we don't have them */ #if !defined(HAVE_MMAP) #define PROT_READ 0 #define MAP_FILE 0 #define MAP_SHARED 0 void *mmap(void *addr,size_t length,int prot, int flags, int fd, off_t offset) { void *buf = (void *)malloc(length); if(!buf) return 0; read(fd,buf,length); // should explore return code return buf; } void munmap(void *buf,size_t size) { free(buf); } #endif tcpflow/src/be13_api/0000755000175000017500000000000012263701331013312 5ustar dimadimatcpflow/src/be13_api/net_ethernet.h0000644000175000017500000000622712263701331016156 0ustar dimadima/* Copyright (C) 1997, 1999, 2001, 2008 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ /* Based on the FreeBSD version of this file. Curiously, that file lacks a copyright in the header. */ #ifndef __NET_ETHERNET_H #define __NET_ETHERNET_H 1 #include #include //#include /* IEEE 802.3 Ethernet constants */ __BEGIN_DECLS /* This is a name for the 48 bit ethernet address available on many systems. */ struct ether_addr { u_int8_t ether_addr_octet[ETH_ALEN]; } __attribute__ ((__packed__)); /* 10Mb/s ethernet header */ struct ether_header { u_int8_t ether_dhost[ETH_ALEN]; /* destination eth addr */ u_int8_t ether_shost[ETH_ALEN]; /* source ether addr */ u_int16_t ether_type; /* packet type ID field */ } __attribute__ ((__packed__)); /* Ethernet protocol ID's */ #define ETHERTYPE_PUP 0x0200 /* Xerox PUP */ #define ETHERTYPE_SPRITE 0x0500 /* Sprite */ #define ETHERTYPE_IP 0x0800 /* IP */ #define ETHERTYPE_ARP 0x0806 /* Address resolution */ #define ETHERTYPE_REVARP 0x8035 /* Reverse ARP */ #define ETHERTYPE_AT 0x809B /* AppleTalk protocol */ #define ETHERTYPE_AARP 0x80F3 /* AppleTalk ARP */ #define ETHERTYPE_VLAN 0x8100 /* IEEE 802.1Q VLAN tagging */ #define ETHERTYPE_IPX 0x8137 /* IPX */ #define ETHERTYPE_IPV6 0x86dd /* IP protocol version 6 */ #define ETHERTYPE_LOOPBACK 0x9000 /* used to test interfaces */ #define ETHER_ADDR_LEN ETH_ALEN /* size of ethernet addr */ #define ETHER_TYPE_LEN 2 /* bytes in type field */ #define ETHER_CRC_LEN 4 /* bytes in CRC field */ #define ETHER_HDR_LEN ETH_HLEN /* total octets in header */ #define ETHER_MIN_LEN (ETH_ZLEN + ETHER_CRC_LEN) /* min packet length */ #define ETHER_MAX_LEN (ETH_FRAME_LEN + ETHER_CRC_LEN) /* max packet length */ /* make sure ethenet length is valid */ #define ETHER_IS_VALID_LEN(foo) \ ((foo) >= ETHER_MIN_LEN && (foo) <= ETHER_MAX_LEN) /* * The ETHERTYPE_NTRAILER packet types starting at ETHERTYPE_TRAIL have * (type-ETHERTYPE_TRAIL)*512 bytes of data followed * by an ETHER type (as given above) and then the (variable-length) header. */ #define ETHERTYPE_TRAIL 0x1000 /* Trailer packet */ #define ETHERTYPE_NTRAILER 16 #define ETHERMTU ETH_DATA_LEN #define ETHERMIN (ETHER_MIN_LEN - ETHER_HDR_LEN - ETHER_CRC_LEN) __END_DECLS #endif /* net/ethernet.h */ tcpflow/src/be13_api/cppmutex.h0000644000175000017500000000247712263701331015342 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /** * Cppmutex is an easy-to-use mutex class. * Create a cppmutex instance for a mutex. * Create a cppmutex::lock(M) object to get a lock; delete the object to free it. * * BE SURE THAT HAVE_PTHREAD IS DEFINED BEFORE INCLUDING THIS FILE */ #ifndef CPPMUTEX_H #define CPPMUTEX_H #include #include #include #include #include #include class cppmutex { // default copy construction and assignment are meaningless and not implemented cppmutex(const cppmutex &c); cppmutex &operator=(const cppmutex &cp); public: pthread_mutex_t M; public: cppmutex():M(){ if(pthread_mutex_init(&M,NULL)){ std::cerr << "pthread_mutex_init failed: " << strerror(errno) << "\n"; exit(1); } } virtual ~cppmutex(){ pthread_mutex_destroy(&M); } class lock { // get private: cppmutex &myMutex; lock(const lock &l); // copy of locks is meaningless lock &operator=(const lock &l); public: lock(cppmutex &m):myMutex(m){ pthread_mutex_lock(&myMutex.M); } ~lock(){ pthread_mutex_unlock(&myMutex.M); } }; }; #endif tcpflow/src/be13_api/feature_recorder.h0000644000175000017500000003204512263701331017007 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ #ifndef FEATURE_RECORDER_H #define FEATURE_RECORDER_H /** * \addtogroup bulk_extractor_APIs * @{ */ /** * feature_recorder.h: * * System for recording features from the scanners into the feature files. * * There is one feature_recorder per feature file. It is used both to record * the features and to perform the histogram calculation. * (That should probably be moved to a different class.) It also also previously * had the ability to do a merge sort, but we took that out because it was * not necessary. * * The feature recorders can also check the global alert_list to see * if the feature should be written to the alert file. It's opened on * demand and immediately flushed and closed. A special mutex is used * to protect it. * * Finally, the feature recorder supports the global stop_list, which * is a list of features that are not written to the main file but are * written to a stop list. That is implemented with a second * feature_recorder. * * There is one feature_recorder_set per process. * The file assumes that bulk_extractor.h is being included. */ #include #include #include #include #include #include #include "cppmutex.h" #include "dfxml/src/dfxml_writer.h" #include "dfxml/src/hash_t.h" #include "atomic_set_map.h" /* histogram_def should be within the feature_recorder_set class. Oh well. */ class histogram_def { public: /** * @param feature- the feature file to histogram (no .txt) * @param re - the regular expression to extract * @param require- require this string on the line (usually in context) * @param suffix - the suffix to add to the histogram file after feature name before .txt * @param flags - any flags (see above) */ histogram_def(std::string feature_,std::string re_,std::string suffix_,uint32_t flags_=0): feature(feature_),pattern(re_),require(),suffix(suffix_),flags(flags_){} histogram_def(std::string feature_,std::string re_,std::string require_,std::string suffix_,uint32_t flags_=0): feature(feature_),pattern(re_),require(require_),suffix(suffix_),flags(flags_){ } std::string feature; /* feature file */ std::string pattern; /* extract pattern; "" means use entire feature */ std::string require; /* text required somewhere on the feature line; used for IP histograms */ std::string suffix; /* suffix to append; "" means "histogram" */ uint32_t flags; // defined in histogram.h }; typedef std::set histogram_defs_t; // a set of histogram definitions inline bool operator <(class histogram_def h1,class histogram_def h2) { if (h1.featureh2.feature) return false; if (h1.patternh2.pattern) return false; if (h1.suffixh2.suffix) return false; return false; /* equal */ }; inline bool operator !=(class histogram_def h1,class histogram_def h2) { return h1.feature!=h2.feature || h1.pattern!=h2.pattern || h1.suffix!=h2.suffix; }; class feature_recorder { // default copy construction and assignment are meaningless // and not implemented feature_recorder(const feature_recorder &); feature_recorder &operator=(const feature_recorder &); static uint32_t debug; // are we debugging? static pthread_t main_threadid; // main threads ID static void MAINTHREAD(); // called if can only be run in the main thread uint32_t flags; // flags for this feature recorder /****************************************************************/ public: typedef atomic_histogram mhistogram_t; // memory histogram typedef void (dump_callback_t)(void *,const feature_recorder &fr, const std::string &feature,const uint64_t &count); static void set_main_threadid(){ #ifndef WIN32 main_threadid=pthread_self(); #endif }; // set the main static void set_debug(uint32_t ndebug){debug=ndebug;} typedef std::string offset_t; /** * \name Flags that control scanners * @{ * These flags control scanners. Set them with set_flag(). */ /** Disable this recorder. */ static const int FLAG_DISABLED=0x01; // Disabled static const int FLAG_NO_CONTEXT=0x02; // Do not write context. static const int FLAG_NO_STOPLIST=0x04; // Do not honor the stoplist/alertlist. static const int FLAG_NO_ALERTLIST=0x04; // Do not honor the stoplist/alertlist. /** * Normally feature recorders automatically quote non-UTF8 characters * with \x00 notation and quote "\" as \x5C. Specify FLAG_NO_QUOTE to * disable this behavior. */ static const int FLAG_NO_QUOTE=0x08; // do not escape UTF8 codes /** * Use this flag the feature recorder is sending UTF-8 XML. * non-UTF8 will be quoted but "\" will not be escaped. */ static const int FLAG_XML = 0x10; // will be sending XML /** * histogram support. */ static const int FLAG_MEM_HISTOGRAM = 0x20; // enable the in-memory histogram static const int FLAG_NO_FEATURES = 0x40; // do not record features (just histogram) /** @} */ static const int max_histogram_files = 10; // don't make more than 10 files in low-memory conditions static const std::string histogram_file_header; static const std::string feature_file_header; static const std::string bulk_extractor_version_header; // These must only be changed in the main thread: static uint32_t opt_max_context_size; static uint32_t opt_max_feature_size; static int64_t offset_add; // added to every reported offset, for use with hadoop static std::string banner_file; // banner for top of every file static std::string extract_feature(const std::string &line); feature_recorder(class feature_recorder_set &fs, const std::string &outdir, const std::string &input_fname,const std::string &name); virtual ~feature_recorder(); virtual void set_flag(uint32_t flags_); virtual void unset_flag(uint32_t flags_); bool flag_set(uint32_t f) const {return flags & f;} bool flag_notset(uint32_t f) const {return !(flags & f);} uint32_t get_flags() const {return flags;} static size_t context_window_default; // global option const std::string outdir; // where output goes (could be static, I guess const std::string input_fname; // image we are analyzing const std::string name; // name of this feature recorder private: std::string ignore_encoding; // encoding to ignore for carving std::fstream ios; // where features are written protected:; histogram_defs_t histogram_defs; // histograms that are to be created for this feature recorder class feature_recorder_set &fs; // the set in which this feature_recorder resides int64_t count_; /* number of records written */ size_t context_window_before; // context window size_t context_window_after; // context window mutable cppmutex Mf; // protects the file mutable cppmutex Mr; // protects the redlist protected: mhistogram_t *mhistogram; // if we are building an in-memory-histogram class feature_recorder *stop_list_recorder; // where stopped features get written int64_t file_number_; /* starts at 0; gets incremented by carve(); for binning */ public: /* these are not threadsafe and should only be called in startup */ void set_stop_list_recorder(class feature_recorder *fr){ MAINTHREAD(); stop_list_recorder = fr; } void set_context_window(size_t win){ MAINTHREAD(); context_window_before = win; context_window_after = win; } void set_context_window_before(size_t win){ MAINTHREAD(); context_window_before = win;} void set_context_window_after(size_t win){ MAINTHREAD(); context_window_after = win; } void set_carve_ignore_encoding(const std::string &encoding){ MAINTHREAD();ignore_encoding = encoding;} /* End non-threadsafe */ uint64_t file_number_add(uint64_t i){ #ifdef HAVE___SYNC_ADD_AND_FETCH return __sync_add_and_fetch(&file_number_,i); #else cppmutex::lock lock(Mf); file_number_ += i; return file_number_; #endif } void banner_stamp(std::ostream &os,const std::string &header); // stamp banner, and header /* where stopped items (on stop_list or context_stop_list) get recorded: */ std::string fname_counter(std::string suffix) const; static std::string quote_string(const std::string &feature); // turns unprintable characters to octal escape static std::string unquote_string(const std::string &feature); // turns octal escape back to binary characters /* feature file management */ virtual void open(); virtual void close(); virtual void flush(); static void dump_callback_test(void *user,const feature_recorder &fr, const std::string &str,const uint64_t &count); // test callback for you to use! /* TK: The histogram_def should be provided at the beginning, so it can be used for in-memory histograms. * The callback needs to have the specific atomic set as the callback as well. */ virtual void add_histogram(const class histogram_def &def); // adds a histogram to process virtual void dump_histogram(const class histogram_def &def,void *user,feature_recorder::dump_callback_t cb); typedef void (*xml_notifier_t)(const std::string &xmlstring); virtual void dump_histograms(void *user,feature_recorder::dump_callback_t cb, xml_notifier_t xml_error_notifier); /* Methods to get info */ uint64_t count() const {return count_;} /* Methods to write. * write() is the basic write - you say where, and it does it. * write_buf() writes from a position within the buffer, with context. * It won't write a feature that starts in the margin. * pos0 gives the location and prefix for the beginning of the buffer */ /**************************************************************** *** External entry points. ****************************************************************/ /** * write() actually does the writing to the file. * It uses locks and is threadsafe. * Callers therefore do not need locks. */ virtual void write(const std::string &str); /** * support for writing features */ // only virtual functions may be called by plug-ins // printf() prints to the feature file. virtual void printf(const char *fmt_,...) __attribute__((format(printf, 2, 3))); // // write a feature and its context; the feature may be in the context, but doesn't need to be. // write() calls write0() after histogram, quoting, and stoplist processing virtual void write0(const pos0_t &pos0,const std::string &feature,const std::string &context); // write a feature and its context; the feature may be in the context, but doesn't need to be. // entries processed by write below will be processed by histogram system virtual void write(const pos0_t &pos0,const std::string &feature,const std::string &context); // write a feature located at a given place within an sbuf. // Context is written automatically virtual void write_buf(const sbuf_t &sbuf,size_t pos,size_t len); /* writes with context */ /** * support for carving. * Carving writes the filename to the feature file; the context is the file's hash using the provided function. * Automatically de-duplicates. */ enum carve_mode_t { CARVE_NONE=0, CARVE_ENCODED=1, CARVE_ALL=2}; #define CARVE_MODE_DESCRIPTION "0=carve none; 1=carve encoded; 2=carve all" carve_mode_t carve_mode; typedef std::string (*hashing_function_t)(const sbuf_t &sbuf); // returns a hex value void set_carve_mode(carve_mode_t aMode){MAINTHREAD();carve_mode=aMode;} // Carve a file; returns filename of carved file or empty string if nothing carved virtual std::string carve(const sbuf_t &sbuf,size_t pos,size_t len, const std::string &ext, // appended to forensic path const struct be13::hash_def &hasher); // Set the time of the carved file to iso8601 file virtual void set_carve_mtime(const std::string &fname, const std::string &mtime_iso8601); }; // function that can only be called from main thread inline void feature_recorder::MAINTHREAD() { #ifndef WIN32 assert(main_threadid==pthread_self()); #endif }; /** @} */ #endif tcpflow/src/be13_api/word_and_context_list.cpp0000644000175000017500000001065312263701331020417 0ustar dimadima#include "config.h" #include "word_and_context_list.h" #include "beregex.h" //#include "feature_recorder.h" void word_and_context_list::add_regex(const std::string &pat) { patterns.push_back(new beregex(pat,0)); } /** * Insert a feature and context, but only if not already present. * Returns true if added. */ bool word_and_context_list::add_fc(const std::string &f,const std::string &c) { context ctx(f,c); // ctx includes feature, before and after if(c.size()>0 && context_set.find(c) != context_set.end()) return false; // already present context_set.insert(c); // now we've seen it. fcmap.insert(std::pair(f,ctx)); if(fcmap.size()%100==0) std::cerr << "fcmap size=" << fcmap.size() << "\n"; return true; } /** returns 0 if success, -1 if fail. */ int word_and_context_list::readfile(const std::string &filename) { std::ifstream i(filename.c_str()); if(!i.is_open()) return -1; printf("Reading context stop list %s\n",filename.c_str()); std::string line; uint64_t total_context=0; uint64_t line_counter = 0; uint64_t features_read = 0; while(getline(i,line)){ line_counter++; if(line.size()==0) continue; // if(line_counter==1 && line.size()>3 // && line[0]==feature_recorder::UTF8_BOM[0] // && line[1]==feature_recorder::UTF8_BOM[1] // && line[2]==feature_recorder::UTF8_BOM[2]){ // line = line.substr(3); // remove the UTF8 BOM // } if(line[0]=='#') continue; // it's a comment if((*line.end())=='\r'){ line.erase(line.end()); /* remove the last character if it is a \r */ } if(line.size()==0) continue; // no line content ++features_read; // If there are two tabs, this is a line from a feature file size_t tab1 = line.find('\t'); if(tab1!=std::string::npos){ size_t tab2 = line.find('\t',tab1+1); if(tab2!=std::string::npos){ size_t tab3 = line.find('\t',tab2+1); if(tab3==std::string::npos) tab3=line.size(); std::string f = line.substr(tab1+1,(tab2-1)-tab1); std::string c = line.substr(tab2+1,(tab3-1)-tab2); if(add_fc(f,c)){ ++total_context; } } else { std::string f = line.substr(tab1+1); add_fc(f,""); // Insert a feature with no context } continue; } // If there is no tab, then this must be a simple item to ignore. // If it is a regular expression, add it to the list of REs if(beregex::is_regex(line)){ patterns.push_back(new beregex(line,REG_ICASE)); } else { // Otherwise, add it as a feature with no context fcmap.insert(std::pair(line,context(line))); } } std::cout << "Stop list read.\n"; std::cout << " Total features read: " << features_read << "\n"; std::cout << " List Size: " << fcmap.size() << "\n"; std::cout << " Context Strings: " << total_context << "\n"; std::cout << " Regular Expressions: " << patterns.size() << "\n"; return 0; } /** check() is threadsafe. */ bool word_and_context_list::check(const std::string &probe,const std::string &before,const std::string &after) const { /* First check literals, because they are faster */ for(stopmap_t::const_iterator it =fcmap.find(probe);it!=fcmap.end();it++){ if((rstrcmp((*it).second.before,before)==0) && (rstrcmp((*it).second.after,after)==0) && ((*it).second.feature==probe)){ return true; } } /* Now check the patterns; do this second */ for(beregex_vector::const_iterator it=patterns.begin(); it != patterns.end(); it++){ if((*it)->search(probe,0,0,0)){ return true; // yep } } return false; }; bool word_and_context_list::check_feature_context(const std::string &probe,const std::string &context) const { std::string before; std::string after; context::extract_before_after(probe,context,before,after); return check(probe,before,after); } void word_and_context_list::dump() { std::cout << "dump context list:\n"; for(stopmap_t::const_iterator it =fcmap.begin();it!=fcmap.end();it++){ std::cout << (*it).first << " = " << (*it).second << "\n"; } std::cout << "dump RE list:\n"; for(beregex_vector::const_iterator it=patterns.begin(); it != patterns.end(); it++){ std::cout << (*it)->pat << "\n"; } } #ifdef STAND int main(int argc,char **argv) { cout << "testing contxt_list\n"; word_and_context_list cl; while(--argc){ argv++; if(cl.readfile(*argv)){ err(1,"Cannot read %s",*argv); } } cl.dump(); exit(1); } #endif tcpflow/src/be13_api/feature_recorder.cpp0000644000175000017500000006537712263701331017360 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ #include "config.h" #include "bulk_extractor_i.h" #include "unicode_escape.h" #include "beregex.h" #include "histogram.h" #include #include #include #ifdef HAVE_STDARG_H #include #endif #ifndef MAXPATHLEN #define MAXPATHLEN 65536 #endif #ifndef O_BINARY #define O_BINARY 0 #endif #ifndef DEBUG_PEDANTIC #define DEBUG_PEDANTIC 0x0001// check values more rigorously #endif #ifndef WIN32 pthread_t feature_recorder::main_threadid = 0; #endif size_t feature_recorder::context_window_default=16; /* number of bytes of context */ int64_t feature_recorder::offset_add = 0; std::string feature_recorder::banner_file; uint32_t feature_recorder::opt_max_context_size=1024*1024; uint32_t feature_recorder::opt_max_feature_size=1024*1024; uint32_t feature_recorder::debug=0; void feature_recorder::banner_stamp(std::ostream &os,const std::string &header) { int banner_lines = 0; if(banner_file!=""){ std::ifstream i(banner_file.c_str()); if(i.is_open()){ std::string line; while(getline(i,line)){ if(line.size()>0 && ((*line.end()=='\r') || (*line.end()=='\n'))){ line.erase(line.end()); /* remove the last character while it is a \n or \r */ } os << "# " << line << "\n"; banner_lines++; } i.close(); } } if(banner_lines==0){ os << "# BANNER FILE NOT PROVIDED (-b option)\n"; } os << bulk_extractor_version_header; os << "# Feature-Recorder: " << name << "\n"; if(input_fname.size()) os << "# Filename: " << input_fname << "\n"; if(debug!=0){ os << "# DEBUG: " << debug << " ("; if(debug & DEBUG_PEDANTIC) os << " DEBUG_PEDANTIC "; os << ")\n"; } os << header; } /** * Create a feature recorder object. Each recorder records a certain * kind of feature. Features are stored in a file. The filename is * permutated based on the total number of threads and the current * thread that's recording. Each thread records to a different file, * and thus a different feature recorder, to avoid locking * problems. * * @param outdir_ - where the feature file is written * @param input_fname_ - the file (disk image) that these features were extracted from. * - We should probably have a callback function to annotate the feature file. * @param name - the name of the feature being recorded. */ feature_recorder::feature_recorder(class feature_recorder_set &fs_, const std::string &outdir_,const std::string &input_fname_,const std::string &name_): flags(0), outdir(outdir_),input_fname(input_fname_),name(name_),ignore_encoding(),ios(), histogram_defs(), fs(fs_), count_(0),context_window_before(context_window_default),context_window_after(context_window_default), Mf(),Mr(),mhistogram(), stop_list_recorder(0), file_number_(0),carve_mode(CARVE_ENCODED) { } /* Don't have to delete the stop_list_recorder because it is in the * feature_recorder_set and will be separately deleted. */ feature_recorder::~feature_recorder() { if(ios.is_open()){ ios.close(); } } /** * Return the filename with a counter */ std::string feature_recorder::fname_counter(std::string suffix) const { return outdir + "/" + this->name + (suffix.size()>0 ? (std::string("_") + suffix) : "") + ".txt"; } /** * open a feature recorder file in the specified output directory. */ void feature_recorder::open() { std::string fname = fname_counter(""); ios.open(fname.c_str(),std::ios_base::in|std::ios_base::out|std::ios_base::ate); if(ios.is_open()){ // opened existing stream ios.seekg(0L,std::ios_base::end); while(ios.is_open()){ /* Get current position */ if(int(ios.tellg())==0){ // at beginning of file; stamp and return ios.seekp(0L,std::ios_base::beg); // be sure we are at the beginning of the file return; } ios.seekg(-1,std::ios_base::cur); // backup to once less than the end of the file if (ios.peek()=='\n'){ // we are finally on the \n ios.seekg(1L,std::ios_base::cur); // move the getting one forward ios.seekp(ios.tellg(),std::ios_base::beg); // put the putter at the getter location count_ = 1; // greater than zero return; } } } // Just open the stream for output ios.open(fname.c_str(),std::ios_base::out); if(!ios.is_open()){ std::cerr << "*** feature_recorder::open CANNOT OPEN FEATURE FILE FOR WRITING " << fname << ":" << strerror(errno) << "\n"; exit(1); } } void feature_recorder::close() { if(ios.is_open()){ ios.close(); } } void feature_recorder::flush() { cppmutex::lock lock(Mf); // get the lock; released when object is deallocated. ios.flush(); } static inline bool isodigit(char c) { return c>='0' && c<='7'; } /* statics */ const std::string feature_recorder::feature_file_header("# Feature-File-Version: 1.1\n"); const std::string feature_recorder::histogram_file_header("# Histogram-File-Version: 1.1\n"); const std::string feature_recorder::bulk_extractor_version_header("# " PACKAGE_NAME "-Version: " PACKAGE_VERSION " ($Rev: 10844 $)\n"); static inline int hexval(char ch) { if(ch>='0' && ch<='9') return ch-'0'; if(ch>='a' && ch<='f') return ch-'a'+10; if(ch>='A' && ch<='F') return ch-'a'+10; return 0; } /** * Unquote Python or octal-style quoting of a string */ std::string feature_recorder::unquote_string(const std::string &s) { size_t len = s.size(); if(len<4) return s; // too small for a quote std::string out; for(size_t i=0;idump_sorted(static_cast(&mcbo),mhistogram_callback::callback); return; } beregex reg(def.pattern,REG_EXTENDED); std::string ifname = fname_counter(""); // source of features std::ifstream f(ifname.c_str()); if(!f.is_open()){ std::cerr << "Cannot open histogram input file: " << ifname << "\n"; return; } /* Read each line of the feature file and add it to the histogram. * If we run out of memory, dump that histogram to a file and start * on the next histogram. */ for(int histogram_counter = 0;histogram_counter0) real_suffix << histogram_counter; std::string ofname = fname_counter(real_suffix.str()); // histogram name std::ofstream o; o.open(ofname.c_str()); if(!o.is_open()){ std::cerr << "Cannot open histogram output file: " << ofname << "\n"; return; } HistogramMaker::FrequencyReportVector *fr = h.makeReport(); if(fr->size()>0){ banner_stamp(o,histogram_file_header); o << *fr; // sends the entire histogram } delete fr; o.close(); if(f.is_open()==false){ return; // input file was closed } } std::cerr << "Looped " << max_histogram_files << " times on histogram; something seems wrong\n"; } void feature_recorder::add_histogram(const histogram_def &def) { histogram_defs.insert(def); } /* Dump all of our histograms */ void feature_recorder::dump_histograms(void *user,feature_recorder::dump_callback_t cb, feature_recorder_set::xml_notifier_t xml_error_notifier) { /* See if we have an in-memory histograms */ if(flag_set(feature_recorder::FLAG_MEM_HISTOGRAM)){ std::cerr << "***************** " << name << " has a memory histogram\n"; histogram_def d("","","",0); // empty dump_histogram(d,user,cb); } /* Loop through all the histograms */ for(histogram_defs_t::const_iterator it = histogram_defs.begin();it!=histogram_defs.end();it++){ std::cout << std::string(" ") << name << " " << (*it).suffix + "...\n"; std::cout.flush(); try { if(flag_set(feature_recorder::FLAG_MEM_HISTOGRAM)){ std::cerr << name << " cannot have both a regular histogram and a memory histogram\n"; } else { dump_histogram((*it),user,cb); } } catch (const std::exception &e) { std::cerr << "ERROR: " ; std::cerr.flush(); std::cerr << e.what() << " computing histogram " << name << "\n"; if(xml_error_notifier){ std::string error = std::string("p(maxsize); if(p.buf==0) return; va_list ap; va_start(ap,fmt); vsnprintf(p.buf,maxsize,fmt,ap); va_end(ap); this->write(p.buf); } /** * Combine the pos0, feature and context into a single line and write it to the feature file. */ void feature_recorder::write0(const pos0_t &pos0,const std::string &feature,const std::string &context) { std::stringstream ss; ss << pos0.shift(feature_recorder::offset_add).str() << '\t' << feature; if(flag_notset(FLAG_NO_CONTEXT) && (context.size()>0)) ss << '\t' << context; this->write(ss.str()); } /** * the main entry point of writing a feature and its context to the feature file. * processes the stop list */ void feature_recorder::write(const pos0_t &pos0,const std::string &feature_,const std::string &context_) { if(flags & FLAG_DISABLED) return; // disabled if(debug & DEBUG_PEDANTIC){ if(feature_.size() > opt_max_feature_size){ std::cerr << "feature_recorder::write : feature_.size()=" << feature_.size() << "\n"; assert(0); } if(context_.size() > opt_max_context_size){ std::cerr << "feature_recorder::write : context_.size()=" << context_.size() << "\n"; assert(0); } } /* By default quote string that is not UTF-8, and quote backslashes. */ bool escape_bad_utf8 = true; bool escape_backslash = true; if(flags & FLAG_NO_QUOTE){ // don't quote either escape_bad_utf8 = false; escape_backslash = false; } if(flags & FLAG_XML){ // only quote bad utf8 escape_bad_utf8 = true; escape_backslash = false; } std::string feature = validateOrEscapeUTF8(feature_, escape_bad_utf8,escape_backslash); std::string context; if(flag_notset(FLAG_NO_CONTEXT)){ context = validateOrEscapeUTF8(context_,escape_bad_utf8,escape_backslash); } if(feature.size() > opt_max_feature_size) feature.resize(opt_max_feature_size); if(context.size() > opt_max_context_size) context.resize(opt_max_context_size); if(feature.size()==0){ std::cerr << "zero length feature at " << pos0 << "\n"; if(debug & DEBUG_PEDANTIC) assert(0); return; } if(debug & DEBUG_PEDANTIC){ /* Check for tabs or newlines in feature and and context */ for(size_t i=0;icheck_feature_context(feature,context)){ stop_list_recorder->write(pos0,feature,context); return; } } /* The alert list is a special features that are called out. * If we have one of those, write it to the redlist. */ if(flag_notset(FLAG_NO_ALERTLIST) && fs.alert_list && fs.alert_list->check_feature_context(feature,context)){ std::string alert_fn = outdir + "/ALERTS_found.txt"; cppmutex::lock lock(Mr); // notce we are locking the redlist std::ofstream rf(alert_fn.c_str(),std::ios_base::app); if(rf.is_open()){ rf << pos0.shift(feature_recorder::offset_add).str() << '\t' << feature << '\t' << "\n"; } } /* Support in-memory histograms */ if(mhistogram){ mhistogram->add(feature,1); } /* Finally write out the feature and the context */ if(flag_notset(FLAG_NO_FEATURES)){ this->write0(pos0,feature,context); } } /** * Given a buffer, an offset into that buffer of the feature, and the length * of the feature, make the context and write it out. This is mostly used * for writing from within the lexical analyzers. */ void feature_recorder::write_buf(const sbuf_t &sbuf,size_t pos,size_t len) { #ifdef DEBUG_SCANNER if(debug & DEBUG_SCANNER){ std::cerr << "*** write_buf " << name << " sbuf=" << sbuf << " pos=" << pos << " len=" << len << "\n"; // for debugging, print Imagine that when pos= the location where the crash is happening. // then set a breakpoint at std::cerr. if(pos==9999999){ std::cerr << "Imagine that\n"; } } #endif /* If we are in the margin, ignore; it will be processed again */ if(pos >= sbuf.pagesize && pos < sbuf.bufsize){ return; } if(pos >= sbuf.bufsize){ /* Sanity checks */ std::cerr << "*** write_buf: WRITE OUTSIDE BUFFER. " << " pos=" << pos << " sbuf=" << sbuf << "\n"; return; } /* Asked to write beyond bufsize; bring it in */ if(pos+len > sbuf.bufsize){ len = sbuf.bufsize - pos; } std::string feature = sbuf.substr(pos,len); std::string context; if((flags & FLAG_NO_CONTEXT)==0){ /* Context write; create a clean context */ size_t p0 = context_window_before < pos ? pos-context_window_before : 0; size_t p1 = pos+len+context_window_after; if(p1>sbuf.bufsize) p1 = sbuf.bufsize; assert(p0<=p1); context = sbuf.substr(p0,p1-p0); } this->write(sbuf.pos0+pos,feature,context); #ifdef DEBUG_SCANNER if(debug & DEBUG_SCANNER){ std::cerr << ".\n"; } #endif } /** * replace a character in a string with another */ std::string replace(const std::string &src,char f,char t) { std::string ret; for(size_t i=0;i=128 || ch=='"' || ch=='*' || ch=='+' || ch==',' || ch=='/' || ch==':' || ch==';' || ch=='<' || ch=='=' || ch=='>' || ch=='?' || ch=='\\' || ch=='[' || ch==']' || ch=='|'){ out.push_back('_'); } else { out.push_back(ch); } } return out; } #include /** * @param sbuf - the buffer to carve * @param pos - offset in the buffer to carve * @param len - how many bytes to carve * @param hasher - to compute the hash of the carved object. * */ std::string feature_recorder::carve(const sbuf_t &sbuf,size_t pos,size_t len, const std::string &ext, const be13::hash_def &hasher) { if(flags & FLAG_DISABLED) return std::string(); // disabled /* If we are in the margin, ignore; it will be processed again */ if(pos >= sbuf.pagesize && pos < sbuf.bufsize){ return std::string(); } if(pos >= sbuf.bufsize){ /* Sanity checks */ std::cerr << "*** carve: WRITE OUTSIDE BUFFER. pos=" << pos << " sbuf=" << sbuf << "\n"; return std::string(); } /* Carve to a file depending on the carving mode. The purpose * of CARVE_ENCODED is to allow us to carve JPEGs when they are * embedded in, say, GZIP files, but not carve JPEGs that are * bare. The difficulty arises when you have a tool that can go * into, say, ZIP files. In this case, we don't want to carve * every ZIP file, just the (for example) XORed ZIP files. So the * ZIP carver doesn't carve every ZIP file, just the ZIP files * that are in HIBER files. That is, we want to not carve a path * of ZIP-234234 but we do want to carve a path of * 1000-HIBER-33423-ZIP-2343. This is implemented by having an * ignore_encoding. the ZIP carver sets it to ZIP so it won't * carve things that are just found in a ZIP file. This means that * it won't carve disembodied ZIP files found in unallocated * space. You might want to do that. If so, set ZIP's carve mode * to CARVE_ALL. */ switch(carve_mode){ case CARVE_NONE: return std::string(); // carve nothing case CARVE_ENCODED: if(sbuf.pos0.path.size()==0) return std::string(); // not encoded if(sbuf.pos0.alphaPart()==ignore_encoding) return std::string(); // ignore if it is just encoded with this break; // otherwise carve case CARVE_ALL: break; } /* If the directory doesn't exist, make it. * If two threads try to make the directory, * that's okay, because the second one will fail. */ uint64_t this_file_number = file_number_add(1); std::string dirname1 = outdir + "/" + name; std::stringstream ss; ss << dirname1 << "/" << std::setw(3) << std::setfill('0') << (this_file_number / 1000); std::string dirname2 = ss.str(); std::string fname = dirname2 + std::string("/") + valid_dosname(sbuf.pos0.str() + ext); std::string carved_hash_hexvalue = (*hasher.func)(sbuf.buf,sbuf.bufsize); /* Record what was found in the feature file. */ ss.str(std::string()); // clear the stringstream ss << "" << fname << "" << len << "" << "" << carved_hash_hexvalue << ""; this->write(sbuf.pos0+len,fname,ss.str()); /* Make the directory if it doesn't exist. */ if (access(dirname2.c_str(),R_OK)!=0){ #ifdef WIN32 mkdir(dirname1.c_str()); mkdir(dirname2.c_str()); #else mkdir(dirname1.c_str(),0777); mkdir(dirname2.c_str(),0777); #endif } /* Check to make sure that directory is there. We don't just the return code * because there could have been two attempts to make the directory simultaneously, * so the mkdir could fail but the directory could nevertheless exist. We need to * remember the error number because the access() call may clear it. */ int oerrno = errno; // remember error number if (access(dirname2.c_str(),R_OK)!=0){ std::cerr << "Could not make directory " << dirname2 << ": " << strerror(oerrno) << "\n"; return std::string(); } /* Write the file into the directory */ int fd = ::open(fname.c_str(),O_CREAT|O_BINARY|O_RDWR,0666); if(fd<0){ std::cerr << "*** carve: Cannot create " << fname << ": " << strerror(errno) << "\n"; return std::string(); } ssize_t ret = sbuf.write(fd,pos,len); if(ret<0){ std::cerr << "*** carve: Cannot write(pos=" << fd << "," << pos << " len=" << len << "): "<< strerror(errno) << "\n"; } ::close(fd); return fname; } /** * Currently, we need strptime() and utimes() to set the time. */ void feature_recorder::set_carve_mtime(const std::string &fname, const std::string &mtime_iso8601) { if(flags & FLAG_DISABLED) return; // disabled #if defined(HAVE_STRPTIME) && defined(HAVE_UTIMES) if(fname.size()){ struct tm tm; if(strptime(mtime_iso8601.c_str(),"%Y-%m-%dT%H:%M:%S",&tm)){ time_t t = mktime(&tm); if(t>0){ const struct timeval times[2] = {{t,0},{t,0}}; utimes(fname.c_str(),times); } } } #endif } tcpflow/src/be13_api/beregex.h0000644000175000017500000000461012263701331015105 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /* * beregex.h: * * simple cover for regular expression class. * The class allocates and frees the strings */ #ifndef BEREGEX_H #define BEREGEX_H #ifdef HAVE_TRE_TRE_H # include #else # ifdef HAVE_REGEX_H # include # endif #endif #include #include #include #include #include #include #include class beregex { private: void compile(); beregex & operator=(const beregex&that); // don't use this, please public: /** Bargain-basement detector of things that might be regular expressions. */ static const char *version(); static bool is_regex(const std::string &str); std::string pat; /* our pattern */ int flags; void *nreg_; // would be regex_t *, but that's in regex.h which is included in beregex.c beregex(const beregex &that); beregex(std::string pat_,int flags_); ~beregex(); /** * perform a search for a single hit. If there is a group and something is found, * set *found to be what was found, *offset to be the starting offset, and *len to be * the length. Note that this only handles a single group. */ int search(const std::string &line,std::string *found,size_t *offset,size_t *len) const; int search(const std::string &line,std::string *matches,int REGMAX) const; std::string search(const std::string &line) const; }; typedef std::vector beregex_vector; /** * The regex_list maintains a list of regular expressions. * The list can be read out of a file. * check() returns true if the provided string is inside the list * This should be combined with the word_and_context_list */ class regex_list { public: std::vector patterns; regex_list():patterns(){} size_t size(){ return patterns.size(); } /** * Read a file; returns 0 if successful, -1 if failure. * @param fname - the file to read. */ virtual ~regex_list(){ for(std::vector::iterator it=patterns.begin(); it != patterns.end(); it++){ delete *it; } } void add_regex(const std::string &pat); int readfile(std::string fname); /** check() is threadsafe. */ bool check(const std::string &probe,std::string *found, size_t *offset,size_t *len) const; }; #endif tcpflow/src/be13_api/.gitignore0000644000175000017500000000033112263701331015277 0ustar dimadima*.swp *.swo # http://www.gnu.org/software/automake *~ *.o *.so Makefile.in # http://www.gnu.org/software/autoconf /autom4te.cache /aclocal.m4 /compile /configure /depcomp /install-sh /missing .deps .dirstamp _deps tcpflow/src/be13_api/plugin.cpp0000644000175000017500000006212512263701331015322 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /* * common.cpp: * bulk_extractor backend stuff, used for both standalone executable and bulk_extractor. */ #include "config.h" #include #include #include #include #ifdef HAVE_ERR_H #include #endif #ifdef HAVE_DLFCN_H #include #endif #include "bulk_extractor_i.h" #include "aftimer.h" #include "../dfxml/src/hash_t.h" uint32_t scanner_def::max_depth = 7; // max recursion depth uint32_t scanner_def::max_ngram = 10; // max recursion depth static int debug; // local debug variable static uint32_t max_depth_seen=0; static cppmutex max_depth_seenM; bool be13::plugin::dup_data_alerts = false; // by default, is disabled uint64_t be13::plugin::dup_data_encountered = 0; // amount that was not processed class scanner_command { public: enum command_t {DISABLE_ALL=0,ENABLE_ALL,DISABLE,ENABLE}; scanner_command(const scanner_command &sc):command(sc.command),name(sc.name){}; scanner_command(scanner_command::command_t c,const std::string &n):command(c),name(n){}; command_t command; std::string name; }; static std::vector scanner_commands; bool scanner_commands_processed = false; /**************************************************************** *** misc support ****************************************************************/ #ifndef HAVE_ERR #include // noreturn attribute to avoid warning with GCC on Linux static void err(int eval,const char *fmt,...) __attribute__ ((noreturn)); static void err(int eval,const char *fmt,...) { va_list ap; va_start(ap,fmt); vfprintf(stderr,fmt,ap); va_end(ap); fprintf(stderr,": %s\n",strerror(errno)); exit(eval); } #endif #ifndef HAVE_ERRX #include // noreturn attribute to avoid warning with GCC on Linux static void errx(int eval,const char *fmt,...) __attribute__ ((noreturn)); static void errx(int eval,const char *fmt,...) { va_list ap; va_start(ap,fmt); vfprintf(stderr,fmt,ap); fprintf(stderr,"%s\n",strerror(errno)); va_end(ap); exit(eval); } #endif /**************************************************************** *** SCANNER PLUG-IN SYSTEM ****************************************************************/ /* scanner_params */ scanner_params::PrintOptions scanner_params::no_options; /* vector object for keeping track of packet callbacks */ class packet_plugin_info { public: packet_plugin_info(void *user_,packet_callback_t *callback_):user(user_),callback(callback_){}; void *user; packet_callback_t *callback; }; typedef std::vector packet_plugin_info_vector_t; packet_plugin_info_vector_t packet_handlers; // pcap callback handlers /* plugin */ /** * the vector of current scanners */ be13::plugin::scanner_vector be13::plugin::current_scanners; void be13::plugin::set_scanner_debug(int adebug) { debug = adebug; } /** * return true a scanner is enabled */ /* enable or disable a specific scanner. * enable = 0 - disable that scanner. * enable = 1 - enable that scanner * 'all' is a special scanner that enables all scanners. */ void be13::plugin::set_scanner_enabled(const std::string &name,bool enable) { for(scanner_vector::iterator it = current_scanners.begin();it!=current_scanners.end();it++){ if(name=="all" && (((*it)->info.flags & scanner_info::SCANNER_NO_ALL)==0)){ (*it)->enabled = enable; } if((*it)->info.name==name){ (*it)->enabled = enable; return; } } if(name=="all") return; std::cerr << "Invalid scanner name '" << name << "'\n"; exit(1); } void be13::plugin::set_scanner_enabled_all(bool enable) { for(scanner_vector::const_iterator it = current_scanners.begin();it!=current_scanners.end();it++){ (*it)->enabled = enable; } } /** Name of feature files that should be histogramed. * The histogram should be done in the plug-in */ /**************************************************************** *** scanner plugin loading ****************************************************************/ /** * plugin system phase 0: Load a scanner. * * As part of scanner loading: * - pass configuration to the scanner * - feature files that the scanner requires * - Histograms that the scanner makes * This is called before scanners are enabled or disabled, so the pcap handlers * need to be set afterwards */ void be13::plugin::load_scanner(scanner_t scanner,const scanner_info::scanner_config &sc) { /* If scanner is already loaded, return */ for(scanner_vector::const_iterator it = current_scanners.begin();it!=current_scanners.end();it++){ if((*it)->scanner==scanner) return; } /* Use an empty sbuf and an empty feature recorder set as the parameters for the sp below. * we use static values so that the sbuf is not constantly being created and destroyed. */ static const sbuf_t sbuf; static feature_recorder_set fs(feature_recorder_set::SET_DISABLED); // dummy // // Each scanner's params are stored in a scanner_def object that // is created here and retained for the duration of the run. // The scanner_def includes its own scanner_info structure. // We pre-load the structure with the configuration for this scanner // and the global debug variable // // currently every scanner gets the same config. In the future, we might // want to give different scanners different variables. // scanner_params sp(scanner_params::PHASE_STARTUP,sbuf,fs); // scanner_def *sd = new scanner_def(); sd->scanner = scanner; sd->info.config = ≻ sp.info = &sd->info; // Make an empty recursion control block and call the scanner's // initialization function. recursion_control_block rcb(0,""); (*scanner)(sp,rcb); // phase 0 sd->enabled = !(sd->info.flags & scanner_info::SCANNER_DISABLED); current_scanners.push_back(sd); } void be13::plugin::load_scanner_file(std::string fn,const scanner_info::scanner_config &sc) { /* Figure out the function name */ size_t extloc = fn.rfind('.'); if(extloc==std::string::npos){ errx(1,"Cannot find '.' in %s",fn.c_str()); } std::string func_name = fn.substr(0,extloc); size_t slashloc = func_name.rfind('/'); if(slashloc!=std::string::npos) func_name = func_name.substr(slashloc+1); slashloc = func_name.rfind('\\'); if(slashloc!=std::string::npos) func_name = func_name.substr(slashloc+1); std::cout << "Loading: " << fn << " (" << func_name << ")\n"; scanner_t *scanner = 0; #if defined(HAVE_DLOPEN) void *lib=dlopen(fn.c_str(), RTLD_LAZY); if(lib==0){ errx(1,"dlopen: %s\n",dlerror()); } /* Resolve the symbol */ scanner = (scanner_t *)dlsym(lib, func_name.c_str()); if(scanner==0) errx(1,"dlsym: %s\n",dlerror()); #elif defined(HAVE_LOADLIBRARY) /* Use Win32 LoadLibrary function */ /* See http://msdn.microsoft.com/en-us/library/ms686944(v=vs.85).aspx */ HINSTANCE hinstLib = LoadLibrary(TEXT(fn.c_str())); if(hinstLib==0) errx(1,"LoadLibrary(%s) failed",fn.c_str()); scanner = (scanner_t *)GetProcAddress(hinstLib,func_name.c_str()); if(scanner==0) errx(1,"GetProcAddress(%s) failed",func_name.c_str()); #else std::cout << " ERROR: Support for loadable libraries not enabled\n"; return; #endif load_scanner(*scanner,sc); } void be13::plugin::load_scanners(scanner_t * const *scanners,const scanner_info::scanner_config &sc) { for(int i=0;scanners[i];i++){ load_scanner(scanners[i],sc); } } void be13::plugin::load_scanner_directory(const std::string &dirname,const scanner_info::scanner_config &sc ) { DIR *dirp = opendir(dirname.c_str()); if(dirp==0){ err(1,"Cannot open directory %s:",dirname.c_str()); } struct dirent *dp; while ((dp = readdir(dirp)) != NULL){ std::string fname = dp->d_name; if(fname.substr(0,5)=="scan_" || fname.substr(0,5)=="SCAN_"){ size_t extloc = fname.rfind('.'); if(extloc==std::string::npos) continue; // no '.' std::string ext = fname.substr(extloc+1); #ifdef WIN32 if(ext!="DLL") continue; // not a DLL #else if(ext!="so") continue; // not a shared library #endif load_scanner_file(dirname+"/"+fname,sc ); } } } void be13::plugin::load_scanner_directories(const std::vector &dirnames, const scanner_info::scanner_config &sc) { for(std::vector::const_iterator it = dirnames.begin();it!=dirnames.end();it++){ load_scanner_directory(*it,sc); } } void be13::plugin::load_scanner_packet_handlers() { for(scanner_vector::const_iterator it = current_scanners.begin(); it!=current_scanners.end(); it++){ if((*it)->enabled){ const scanner_def *sd = (*it); if(sd->info.packet_cb){ packet_handlers.push_back(packet_plugin_info(sd->info.packet_user,sd->info.packet_cb)); } } } } // send every enabled scanner the phase message void be13::plugin::message_enabled_scanners(scanner_params::phase_t phase,feature_recorder_set &fs) { /* make an empty sbuf and feature recorder set */ const sbuf_t sbuf; scanner_params sp(phase,sbuf,fs); for(scanner_vector::iterator it = current_scanners.begin(); it!=current_scanners.end(); it++){ if((*it)->enabled){ recursion_control_block rcb(0,""); // dummy rcb ((*it)->scanner)(sp,rcb); } } } scanner_t *be13::plugin::find_scanner(const std::string &search_name) { for(scanner_vector::const_iterator it = current_scanners.begin();it!=current_scanners.end();it++){ if(search_name == (*it)->info.name){ return (*it)->scanner; } } return 0; } // put the enabled scanners into the vector void be13::plugin::get_enabled_scanners(std::vector &svector) { for(scanner_vector::const_iterator it=current_scanners.begin();it!=current_scanners.end();it++){ if((*it)->enabled){ svector.push_back((*it)->info.name); } } } bool be13::plugin::find_scanner_enabled() { for(scanner_vector::const_iterator it = current_scanners.begin(); it!=current_scanners.end(); it++){ if( ((*it)->info.flags & scanner_info::SCANNER_FIND_SCANNER) && ((*it)->enabled)){ return true; } } return false; } void be13::plugin::add_enabled_scanner_histograms_to_feature_recorder_set(feature_recorder_set &fs) { for(scanner_vector::const_iterator it = current_scanners.begin(); it!=current_scanners.end(); it++){ if((*it)->enabled){ const scanner_def *sd = (*it); for(histogram_defs_t::const_iterator i2 = sd->info.histogram_defs.begin(); i2 != sd->info.histogram_defs.end(); i2++){ fs.add_histogram((*i2)); } } } } void be13::plugin::scanners_init(feature_recorder_set &fs) { assert(scanner_commands_processed==true); message_enabled_scanners(scanner_params::PHASE_INIT,fs); // tell all enabled scanners to init } /**************************************************************** *** Scanner Commands (which one is enabled or disabled) ****************************************************************/ void be13::plugin::scanners_disable_all() { assert(scanner_commands_processed==false); scanner_commands.push_back(scanner_command(scanner_command::DISABLE_ALL,std::string(""))); } void be13::plugin::scanners_enable_all() { assert(scanner_commands_processed==false); scanner_commands.push_back(scanner_command(scanner_command::ENABLE_ALL,std::string(""))); } void be13::plugin::scanners_enable(const std::string &name) { assert(scanner_commands_processed==false); scanner_commands.push_back(scanner_command(scanner_command::ENABLE,name)); } void be13::plugin::scanners_disable(const std::string &name) { assert(scanner_commands_processed==false); scanner_commands.push_back(scanner_command(scanner_command::DISABLE,name)); } void be13::plugin::scanners_process_enable_disable_commands() { for(std::vector::const_iterator it=scanner_commands.begin(); it!=scanner_commands.end();it++){ switch((*it).command){ case scanner_command::ENABLE_ALL: set_scanner_enabled_all(true);break; case scanner_command::DISABLE_ALL: set_scanner_enabled_all(false); break; case scanner_command::ENABLE: set_scanner_enabled((*it).name,true);break; case scanner_command::DISABLE: set_scanner_enabled((*it).name,false);break; } } load_scanner_packet_handlers(); // can't do until enable/disable commands are run scanner_commands_processed = true; } /**************************************************************** *** PHASE_SHUTDOWN (formerly phase 2): shut down the scanners ****************************************************************/ void be13::plugin::phase_shutdown(feature_recorder_set &fs,std::stringstream *sxml) { assert(scanner_commands_processed==true); for(scanner_vector::iterator it = current_scanners.begin();it!=current_scanners.end();it++){ if((*it)->enabled){ const sbuf_t sbuf; // empty sbuf scanner_params sp(scanner_params::PHASE_SHUTDOWN,sbuf,fs,sxml); recursion_control_block rcb(0,""); // empty rcb (*(*it)->scanner)(sp,rcb); } } } /**************************************************************** *** PHASE HISTOGRAM (formerly phase 3): Create the histograms ****************************************************************/ /** * Note currently we have two kinds of histograms: * post-processing histograms specified by the histogram library, and in-memory histograms. * that are really only used by scan_bulk. */ /* option processing */ /* Get the config and build the help strings at the same time! */ std::stringstream scanner_info::helpstream; void scanner_info::get_config(const scanner_info::config_t &c, const std::string &n,std::string *val,const std::string &help) { /* Check to see if we are being called as part of a help operation */ helpstream << " -S " << n << "=" << *val << " " << help << " (" << name << ")\n"; scanner_info::config_t::const_iterator it = c.find(n); if(it!=c.end() && val){ *val = it->second; } } void scanner_info::get_config(const std::string &n,std::string *val,const std::string &help) { scanner_info::get_config(config->namevals,n,val,help); } #define GET_CONFIG(T) void scanner_info::get_config(const std::string &n,T *val,const std::string &help) {\ std::stringstream ss;\ ss << *val;\ std::string v(ss.str());\ get_config(n,&v,help);\ ss.str(v);\ ss >> *val;\ } GET_CONFIG(uint64_t) GET_CONFIG(int32_t) // both int32_t and uint32_t GET_CONFIG(uint32_t) GET_CONFIG(uint16_t) #ifdef HAVE_GET_CONFIG_SIZE_T GET_CONFIG(size_t) #endif /* uint8_t needs cast to uint32_t for << * Otherwise it is interpreted as a character. */ void scanner_info::get_config(const std::string &n,uint8_t *val_,const std::string &help) { uint32_t val = *val_; std::stringstream ss; ss << val; std::string v(ss.str()); get_config(n,&v,help); ss.str(v); ss >> val; *val_ = (uint8_t)val; } /* bool needs special processing for YES/NO/TRUE/FALSE */ void scanner_info::get_config(const std::string &n,bool *val,const std::string &help) { std::stringstream ss; ss << ((*val) ? "YES" : "NO"); std::string v(ss.str()); get_config(n,&v,help); switch(v.at(0)){ case 'Y':case 'y':case 'T':case 't':case '1': *val = true; break; default: *val = false; } } /** * Print a list of scanners. * We need to load them to do this, so they are loaded with empty config * Note that scanners can only be loaded once, so this exits. */ void be13::plugin::info_scanners(bool detailed_info, bool detailed_settings, scanner_t * const *scanners_builtin, const char enable_opt,const char disable_opt) { const scanner_info::scanner_config empty_config; load_scanners(scanners_builtin,empty_config); std::cout << "\n"; std::vector enabled_wordlist; std::vector disabled_wordlist; for(scanner_vector::const_iterator it = current_scanners.begin();it!=current_scanners.end();it++){ if(detailed_info){ if ((*it)->info.name.size()) std::cout << "Scanner Name: " << (*it)->info.name << "\n"; std::cout << "flags: " << scanner_info::flag_to_string((*it)->info.flags) << "\n"; std::cout << "Scanner Interface version: " << (*it)->info.si_version << "\n"; if ((*it)->info.author.size()) std::cout << "Author: " << (*it)->info.author << "\n"; if ((*it)->info.description.size()) std::cout << "Description: " << (*it)->info.description << "\n"; if ((*it)->info.url.size()) std::cout << "URL: " << (*it)->info.url << "\n"; if ((*it)->info.scanner_version.size()) std::cout << "Scanner Version: " << (*it)->info.scanner_version << "\n"; std::cout << "Feature Names: "; for(std::set::const_iterator i2 = (*it)->info.feature_names.begin(); i2 != (*it)->info.feature_names.end(); i2++){ std::cout << *i2 << " "; } std::cout << "\n\n"; } if((*it)->info.flags & scanner_info::SCANNER_NO_USAGE) continue; if((*it)->info.flags & scanner_info::SCANNER_DISABLED){ disabled_wordlist.push_back((*it)->info.name); } else { enabled_wordlist.push_back((*it)->info.name); } } if(detailed_settings){ std::cout << "Settable Options (and their defaults): \n"; std::cout << scanner_info::helpstr(); } sort(disabled_wordlist.begin(),disabled_wordlist.end()); sort(enabled_wordlist.begin(),enabled_wordlist.end()); std::cout << "\n"; for(std::vector::const_iterator it = disabled_wordlist.begin(); it!=disabled_wordlist.end();it++){ std::cout << " -" << enable_opt << " " << *it << " - enable scanner " << *it << "\n"; } std::cout << "\n"; for(std::vector::const_iterator it = enabled_wordlist.begin();it!=enabled_wordlist.end();it++){ std::cout << " -" << disable_opt << " " << *it << " - disable scanner " << *it << "\n"; } } /** * upperstr - Turns an ASCII string into upper case (should be UTF-8) */ static std::string upperstr(const std::string &str) { std::string ret; for(std::string::const_iterator i=str.begin();i!=str.end();i++){ ret.push_back(toupper(*i)); } return ret; } /* Determine if the sbuf consists of a repeating ngram */ static size_t find_ngram_size(const sbuf_t &sbuf) { for(size_t ngram_size = 1; ngram_size < scanner_def::max_ngram; ngram_size++){ bool ngram_match = true; for(size_t i=ngram_size;i max_depth_seen) max_depth_seen = sp.depth; } /* If we are too deep, error out */ if(sp.depth >= scanner_def::max_depth){ feature_recorder *fr = fs.get_alert_recorder(); if(fr) fr->write(pos0,"process_extract: MAX DEPTH REACHED",""); return; } /* Determine if we have seen this buffer before */ bool seen_before = fs.check_previously_processed(sp.sbuf.buf,sp.sbuf.bufsize); if(seen_before){ md5_t md5 = md5_generator::hash_buf(sp.sbuf.buf,sp.sbuf.bufsize); feature_recorder *alert_recorder = fs.get_alert_recorder(); std::stringstream ss; ss << "" << sp.sbuf.bufsize << ""; if(alert_recorder && dup_data_alerts) alert_recorder->write(sp.sbuf.pos0,"DUP SBUF "+md5.hexdigest(),ss.str()); #ifdef HAVE__SYNC_ADD_AND_FETCH __sync_add_and_fetch(&dup_data_encountered,sp.sbuf.bufsize); #endif } /* Determine if the sbuf consists of a repeating ngram. If so, * it's only passed to the parsers that want ngrams. (By default, * such sbufs are booring.) */ size_t ngram_size = find_ngram_size(sp.sbuf); /**************************************************************** *** CALL EACH OF THE SCANNERS ON THE SBUF ****************************************************************/ for(scanner_vector::iterator it = current_scanners.begin();it!=current_scanners.end();it++){ // Look for reasons not to run a scanner if((*it)->enabled==false) continue; // not enabled if(((*it)->info.flags & scanner_info::SCANNER_WANTS_NGRAMS)==0){ /* If the scanner does not want ngrams, don't run it if we have ngrams or duplicate data */ if(ngram_size > 0) continue; if(seen_before) continue; } if(sp.depth > 0 && ((*it)->info.flags & scanner_info::SCANNER_DEPTH_0)){ // depth >0 and this scanner only run at depth 0 continue; } const std::string &name = (*it)->info.name; try { /* Compute the effective path for stats */ bool inname=false; std::string epath; for(std::string::const_iterator cc=sp.sbuf.pos0.path.begin();cc!=sp.sbuf.pos0.path.end();cc++){ if(isupper(*cc)) inname=true; if(inname) epath.push_back(toupper(*cc)); if(*cc=='-') inname=false; } if(epath.size()>0) epath.push_back('-'); for(std::string::const_iterator cc=name.begin();cc!=name.end();cc++){ epath.push_back(toupper(*cc)); } /* Create a RCB that will recursively call process_sbuf() */ recursion_control_block rcb(process_sbuf,upperstr(name)); /* Call the scanner.*/ { aftimer t; if(debug & DEBUG_PRINT_STEPS){ std::cerr << "sbuf.pos0=" << sp.sbuf.pos0 << " calling scanner " << name << "\n"; } t.start(); ((*it)->scanner)(sp,rcb); t.stop(); if(debug & DEBUG_PRINT_STEPS){ std::cerr << "sbuf.pos0=" << sp.sbuf.pos0 << " scanner " << name << " t=" << t.elapsed_seconds() << "\n"; } sp.fs.add_stats(epath,t.elapsed_seconds()); } } catch (const std::exception &e ) { std::stringstream ss; ss << "std::exception Scanner: " << name << " Exception: " << e.what() << " sbuf.pos0: " << sp.sbuf.pos0 << " bufsize=" << sp.sbuf.bufsize << "\n"; std::cerr << ss.str(); feature_recorder *alert_recorder = fs.get_alert_recorder(); if(alert_recorder) alert_recorder->write(sp.sbuf.pos0,"scanner="+name, std::string("")+e.what()+""); } catch (...) { std::stringstream ss; ss << "std::exception Scanner: " << name << " Unknown Exception " << " sbuf.pos0: " << sp.sbuf.pos0 << " bufsize=" << sp.sbuf.bufsize << "\n"; std::cerr << ss.str(); feature_recorder *alert_recorder = fs.get_alert_recorder(); if(alert_recorder) alert_recorder->write(sp.sbuf.pos0,"scanner="+name,""); } } fs.flush_all(); } /** * Process a pcap packet. * Designed to be very efficient because we have so many packets. */ void be13::plugin::process_packet(const be13::packet_info &pi) { for(packet_plugin_info_vector_t::iterator it = packet_handlers.begin(); it != packet_handlers.end(); it++){ (*(*it).callback)((*it).user,pi); } } void be13::plugin::get_scanner_feature_file_names(feature_file_names_t &feature_file_names) { for(scanner_vector::const_iterator it=current_scanners.begin();it!=current_scanners.end();it++){ if((*it)->enabled){ for(std::set::const_iterator fi=(*it)->info.feature_names.begin(); fi!=(*it)->info.feature_names.end(); fi++){ feature_file_names.insert(*fi); } } } } tcpflow/src/be13_api/pcap_fake.cpp0000644000175000017500000001515112263701331015732 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ #include "config.h" #ifndef HAVE_LIBPCAP #include "pcap_fake.h" #include #include #include #include #include #ifdef WIN32 #define SET_BINMODE(f) _setmode(_fileno(f), _O_BINARY) #else #define SET_BINMODE(f) /* ignore */ #endif /* pcap_fake's struct pcap just keeps track of the file that was opened and * whether or not it was byteswapped. */ struct pcap { FILE *fp; // input file we are reading from int swapped; // whether magic number was swapped? uint32_t linktype; bool error; // an error occured bool break_loop; // break_loop was called bool must_close; char err_buf[128]; uint8_t *pktbuf; }; char *pcap_geterr(pcap_t *p) { snprintf(p->err_buf,sizeof(p->err_buf),"not implemented in pcap_fake"); return p->err_buf; } /** * pcap_open_offline() * -- "The name "-" is a synonym for stdin" (pcap manual) * -- allocate the pcap_t structure * -- open a pcap capture file. */ pcap_t *pcap_open_offline(const char *fname, char *errbuf) { FILE *fp = strcmp(fname,"-")==0 ? stdin : fopen(fname,"rb"); if(!fp){ snprintf(errbuf,PCAP_ERRBUF_SIZE,"%s:%s",fname,strerror(errno)); return 0; } pcap_t *p = pcap_fopen_offline(fp,errbuf); if(p && p->fp!=stdin) p->must_close = true; return p; } char *pcap_lookupdev(char *) // not implemented { fprintf(stderr,"pcap_fake.cpp:pcap_lookupdev: tcpflow was compiled without LIBPCAP. Will not live capture.\n"); return 0; } pcap_t *pcap_open_live(const char *, int, int, int, char *) { fprintf(stderr,"pcap_fake.cpp:pcap_open_live: tcpflow was compiled without LIBPCAP. Will not live capture.\n"); return 0; } inline uint32_t swap4(uint32_t x) { return ( ((x & 0xff000000) >> 24) | ((x & 0x00ff0000) >> 8) | ((x & 0x0000ff00) << 8) | ((x & 0x000000ff) << 24)); } inline uint32_t swap2(uint16_t x) { return ( ((x & 0xff00) >> 8) | ((x & 0x00ff) << 8)); } pcap_t *pcap_fopen_offline(FILE *fp, char *errbuf) { SET_BINMODE(fp); bool swapped = false; struct pcap_file_header header; if(fread(&header,sizeof(header),1,fp)!=1){ snprintf(errbuf,PCAP_ERRBUF_SIZE,"Cannot read pcap header"); return 0; // cannot read header } if(header.magic==0xd4c3b2a1){ // check for swap header.magic = swap4(header.magic); header.version_major = swap2(header.version_major); header.version_minor = swap2(header.version_minor); header.thiszone = swap4(header.thiszone); header.sigfigs = swap4(header.sigfigs); header.snaplen = swap4(header.snaplen); header.linktype = swap4(header.linktype); swapped = true; } if(header.magic != 0xa1b2c3d4){ snprintf(errbuf, PCAP_ERRBUF_SIZE,"Cannot decode pcap header 0x%x; swapped=%d", header.magic,swapped); return 0; } if(header.version_major!=PCAP_VERSION_MAJOR || header.version_minor!=PCAP_VERSION_MINOR){ snprintf(errbuf, PCAP_ERRBUF_SIZE,"Cannot read pcap version %d.%d", header.version_major,header.version_minor); return 0; } pcap_t *ret = (pcap_t *)calloc(1,sizeof(pcap_t)); if(ret==0){ snprintf(errbuf, PCAP_ERRBUF_SIZE,"Cannot calloc %u bytes",(unsigned int)sizeof(pcap_t)); return 0; } ret->pktbuf = (uint8_t *)malloc(header.snaplen); if(ret->pktbuf==0) { // did we get the snaplen? std::cerr << "Couldn't get header snaplen"; return 0; } //DEBUG(100) ("pcap_fake.cpp DEBUG: header.magic = %x", header.magic); //DEBUG(100) ("pcap_fake.cpp DEBUG: header.version_major = %d", header.version_major); //DEBUG(100) ("pcap_fake.cpp DEBUG: header.version_minor = %d", header.version_minor); //DEBUG(100) ("pcap_fake.cpp DEBUG: header.thiszone = %d", header.thiszone); //DEBUG(100) ("pcap_fake.cpp DEBUG: header.sigfigs = %d", header.sigfigs); //DEBUG(100) ("pcap_fake.cpp DEBUG: header.snaplen = %d", header.snaplen); //DEBUG(100) ("pcap_fake.cpp DEBUG: header.linktype = %d",header.linktype); //DEBUG(100) ("pcap_fake.cpp DEBUG: ret->pktbuf = %s". ret->pktbuf); ret->fp = fp; ret->swapped = swapped; ret->linktype = header.linktype; return ret; } /* * These are not implemented in pcap_fake */ int pcap_compile(pcap_t *p, struct bpf_program *program, const char *expression, int optimize, uint32_t mask) { if(strlen(expression)==0){ program->valid = true; return 0; // we can compile the empty expression } return -1; // we cannot compile otherwise } int pcap_datalink(pcap_t *p) { return p->linktype; } int pcap_setfilter(pcap_t *p, struct bpf_program *prog) { if(prog->valid) return 0; return -1; } int pcap_loop(pcap_t *p, int cnt, pcap_handler callback, uint8_t *user) { while(cnt !=0 && !feof(p->fp) && p->break_loop==false){ uint32_t tv_sec; uint32_t tv_usec; struct pcap_pkthdr hdr; /* Note: struct timeval is 16 bytes on MacOS and not 8 bytes, * so we manually read and set up the structure */ if(fread(&tv_sec,sizeof(uint32_t),1,p->fp)!=1) break; if(fread(&tv_usec,sizeof(uint32_t),1,p->fp)!=1) break; hdr.ts.tv_sec = tv_sec; hdr.ts.tv_usec = tv_usec; if(fread(&hdr.caplen,sizeof(uint32_t),1,p->fp)!=1) break; if(fread(&hdr.len,sizeof(uint32_t),1,p->fp)!=1) break; /* Swap the header if necessary */ if(p->swapped){ hdr.ts.tv_sec = swap4(hdr.ts.tv_sec); hdr.ts.tv_usec = swap4(hdr.ts.tv_usec); hdr.caplen = swap4(hdr.caplen); hdr.len = swap4(hdr.len); } /* Read the packet */ if(fread(p->pktbuf,hdr.caplen,1,p->fp)!=1) break; // no more to read //DEBUG(100) ("pcap_fake: read tv_sec.tv_usec=%d.%06d caplen=%d len=%d", // (int)hdr.ts.tv_sec,(int)hdr.ts.tv_usec,hdr.caplen,hdr.len); /* Process the packet */ (*callback)(user,&hdr,p->pktbuf); /* And loop */ if(cnt>0) cnt--; // decrease the packet count } return 0; } void pcap_break_loop(pcap_t *p) { p->break_loop=true; } void pcap_close(pcap_t *p) // close the file { if(p->must_close) fclose(p->fp); free(p->pktbuf); free(p); } #endif tcpflow/src/be13_api/sbuf_stream.cpp0000644000175000017500000001066012263701331016333 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ #include "config.h" #include "bulk_extractor_i.h" #include "sbuf_stream.h" /* * Stream interfaces */ sbuf_stream::sbuf_stream(const sbuf_t &sbuf_): sbuf(sbuf_),offset(0) { } sbuf_stream::~sbuf_stream() { } void sbuf_stream::seek(size_t offset_) { offset = offset_; } size_t sbuf_stream::tell() { return offset; } /* * unsigned integers, default little endian */ uint8_t sbuf_stream::get8u() { uint8_t value = sbuf.get8u(offset); offset++; return value; } uint16_t sbuf_stream::get16u() { uint16_t value = sbuf.get16u(offset); offset+=2; return value; } uint32_t sbuf_stream::get32u() { uint32_t value = sbuf.get32u(offset); offset+=4; return value; } uint64_t sbuf_stream::get64u() { uint64_t value = sbuf.get64u(offset); offset+=8; return value; } /* * unsigned integers, big endian */ uint8_t sbuf_stream::get8uBE() { uint8_t value = sbuf.get8uBE(offset); offset++; return value; } uint16_t sbuf_stream::get16uBE() { uint16_t value = sbuf.get16uBE(offset); offset+=2; return value; } uint32_t sbuf_stream::get32uBE() { uint32_t value = sbuf.get32uBE(offset); offset+=4; return value; } uint64_t sbuf_stream::get64uBE() { uint64_t value = sbuf.get64uBE(offset); offset+=8; return value; } /* * unsigned integers, byte order specified */ uint8_t sbuf_stream::get8u(sbuf_t::byte_order_t bo) { uint8_t value = sbuf.get8u(offset, bo); offset++; return value; } uint16_t sbuf_stream::get16u(sbuf_t::byte_order_t bo) { uint16_t value = sbuf.get16u(offset, bo); offset+=2; return value; } uint32_t sbuf_stream::get32u(sbuf_t::byte_order_t bo) { uint32_t value = sbuf.get32u(offset, bo); offset+=4; return value; } uint64_t sbuf_stream::get64u(sbuf_t::byte_order_t bo) { uint64_t value = sbuf.get64u(offset, bo); offset+=8; return value; } /* * signed integers, default little endian */ int8_t sbuf_stream::get8i() { int8_t value = sbuf.get8i(offset); offset++; return value; } int16_t sbuf_stream::get16i() { int16_t value = sbuf.get16i(offset); offset+=2; return value; } int32_t sbuf_stream::get32i() { int32_t value = sbuf.get32i(offset); offset+=4; return value; } int64_t sbuf_stream::get64i() { int64_t value = sbuf.get64i(offset); offset+=8; return value; } /* * signed integers, big endian */ int8_t sbuf_stream::get8iBE() { int8_t value = sbuf.get8iBE(offset); offset++; return value; } int16_t sbuf_stream::get16iBE() { int16_t value = sbuf.get16iBE(offset); offset+=2; return value; } int32_t sbuf_stream::get32iBE() { int32_t value = sbuf.get32iBE(offset); offset+=4; return value; } int64_t sbuf_stream::get64iBE() { int64_t value = sbuf.get64iBE(offset); offset+=8; return value; } /* * signed integers, byte order specified */ int8_t sbuf_stream::get8i(sbuf_t::byte_order_t bo) { uint8_t value = sbuf.get8i(offset, bo); offset++; return value; } int16_t sbuf_stream::get16i(sbuf_t::byte_order_t bo) { uint16_t value = sbuf.get16i(offset, bo); offset+=2; return value; } int32_t sbuf_stream::get32i(sbuf_t::byte_order_t bo) { uint32_t value = sbuf.get32i(offset, bo); offset+=4; return value; } int64_t sbuf_stream::get64i(sbuf_t::byte_order_t bo) { uint64_t value = sbuf.get64i(offset, bo); offset+=8; return value; } /* * string readers */ void sbuf_stream::getUTF8WithQuoting(size_t num_octets_requested, string &utf8_string) { sbuf.getUTF8WithQuoting(num_octets_requested, utf8_string); offset += utf8_string.length(); return; } void sbuf_stream::getUTF8WithQuoting(string &utf8_string) { sbuf.getUTF8WithQuoting(offset, utf8_string); size_t num_bytes = utf8_string.length(); if (num_bytes > 0) { // if anything was read then also skip \0 num_bytes ++; } offset += num_bytes; return; } void sbuf_stream::getUTF16(size_t code_units_requested, wstring &utf16_string) { sbuf.getUTF16(offset, code_units_requested, utf16_string); offset += utf16_string.length() * 2; return; } void sbuf_stream::getUTF16(wstring &utf16_string) { sbuf.getUTF16(offset, utf16_string); size_t num_bytes = utf16_string.length() * 2; if (num_bytes > 0) { // if anything was read then also skip \U0000 num_bytes += 2; } offset += num_bytes; return; } tcpflow/src/be13_api/pcap_fake.h0000644000175000017500000000621612263701331015401 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /* * pcap_fake.h * A fake libpcap implementation that can only read files without a filter. */ #include #include #include #include __BEGIN_DECLS /* * Version number of the current version of the pcap file format. * * NOTE: this is *NOT* the version number of the libpcap library. * To fetch the version information for the version of libpcap * you're using, use pcap_lib_version(). */ #define PCAP_VERSION_MAJOR 2 #define PCAP_VERSION_MINOR 4 #define PCAP_ERRBUF_SIZE 256 struct pcap_file_header { uint32_t magic; // d4 c3 b2 a1 uint16_t version_major; // 02 00 uint16_t version_minor; // 04 00 int32_t thiszone; /* gmt to local correction - 00 00 00 00*/ uint32_t sigfigs; /* accuracy of timestamps */ uint32_t snaplen; /* max length saved portion of each pkt */ uint32_t linktype; /* data link type (LINKTYPE_*) */ } __attribute__((packed)); struct pcap_pkthdr { struct timeval ts; /* time stamp; native */ uint32_t caplen; /* length of portion present */ uint32_t len; /* length this packet (off wire) */ }__attribute__((packed)); /* What we need after opening the file to process each next packet */ typedef struct pcap pcap_t; /* * Taken from pcap-int.h */ //typedef int (*setfilter_op_t)(pcap_t *, struct bpf_program *); typedef void (*pcap_handler)(uint8_t *, const struct pcap_pkthdr *, const uint8_t *); struct bpf_program { int valid; // set true if filter is valid }; char *pcap_lookupdev(char *); // not implemented pcap_t *pcap_open_live(const char *, int, int, int, char *); // not implemented pcap_t *pcap_open_offline(const char *, char *); // open the file; set f pcap_t *pcap_fopen_offline(FILE *fp,char *errbuf); void pcap_close(pcap_t *); // close the file int pcap_loop(pcap_t *, int, pcap_handler, uint8_t *); // read the file and call loopback on each packet int pcap_datalink(pcap_t *); // noop int pcap_setfilter(pcap_t *, struct bpf_program *); // noop int pcap_compile(pcap_t *, struct bpf_program *, const char *, int, uint32_t); // generate error if filter provided char *pcap_geterr(pcap_t *); /* * These are the types that are the same on all platforms, and that * have been defined by for ages. */ #define DLT_NULL 0 /* BSD loopback encapsulation */ #define DLT_EN10MB 1 /* Ethernet (10Mb) */ #define DLT_EN3MB 2 /* Experimental Ethernet (3Mb) */ #define DLT_AX25 3 /* Amateur Radio AX.25 */ #define DLT_PRONET 4 /* Proteon ProNET Token Ring */ #define DLT_CHAOS 5 /* Chaos */ #define DLT_IEEE802 6 /* 802.5 Token Ring */ #define DLT_ARCNET 7 /* ARCNET, with BSD-style header */ #define DLT_SLIP 8 /* Serial Line IP */ #define DLT_PPP 9 /* Point-to-point Protocol */ #define DLT_FDDI 10 /* FDDI */ #define DLT_RAW 101 /* just packets */ __END_DECLS tcpflow/src/be13_api/word_and_context_list.h0000644000175000017500000001103512263701331020057 0ustar dimadima#ifndef WORD_AND_CONTEXT_LIST_H #define WORD_AND_CONTEXT_LIST_H #include "beregex.h" /** * \addtogroup internal_interfaces * @{ * \file * word_and_context_list: * * A re-implementation of the basic stop list, regular expression * stop_list, and context-sensitive stop list. * * Method: * Each entry in the stop list can be represented as: * - a feature that is stopped, with optional context. * - a regular expression * * Context is represented as a std::string before the feature and a std::string after. * * The stop list contains is a map of features that are stopped. * For each feature, there may be no context or a list of context. * If there is no context and the feature is in the list, */ /* * context is a class that records the feature, the text before, and the text after. * Typically this is used for stop lists and alert lists. */ #if defined(HAVE_UNORDERED_SET) #include #else #include #endif /* includes both unordered_map and unordered_multimap */ #if defined(HAVE_UNORDERED_MAP) #include #else #include #endif #include // brings in map and multimap class context { public: static void extract_before_after(const std::string &feature,const std::string &ctx, std::string &before,std::string &after){ if(feature.size() <= ctx.size()){ /* The most simple algorithm is a sliding window */ for(size_t i = 0;i stopmap_t; #else typedef std::tr1::unordered_multimap stopmap_t; #endif stopmap_t fcmap; // maps features to contexts; for finding them #if defined(HAVE_UNORDERED_SET) typedef std::unordered_set< std::string > stopset_t; #else typedef std::tr1::unordered_set< std::string > stopset_t; #endif stopset_t context_set; // presence of a pair in fcmap beregex_vector patterns; public: /** * rstrcmp is like strcmp, except it compares std::strings right-aligned * and only compares the minimum sized std::string of the two. */ static int rstrcmp(const std::string &a,const std::string &b); word_and_context_list():fcmap(),context_set(),patterns(){ } ~word_and_context_list(){ for(beregex_vector::iterator it=patterns.begin(); it != patterns.end(); it++){ delete *it; } } size_t size(){ return fcmap.size() + patterns.size();} void add_regex(const std::string &pat); // not threadsafe bool add_fc(const std::string &f,const std::string &c); // not threadsafe int readfile(const std::string &fname); // not threadsafe // return true if the probe with context is in the list or in the stopmap bool check(const std::string &probe,const std::string &before, const std::string &after) const; // threadsafe bool check_feature_context(const std::string &probe,const std::string &context) const; // threadsafe void dump(); }; inline int word_and_context_list::rstrcmp(const std::string &a,const std::string &b) { size_t alen = a.size(); size_t blen = b.size(); size_t len = alen < blen ? alen : blen; for(size_t i=0;i b[bpos]) return 1; } return 0; } #endif tcpflow/src/be13_api/unicode_escape.h0000644000175000017500000000053612263701331016435 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ #ifndef UNICODE_ESCAPE_H #define UNICODE_ESCAPE_H #include /** \addtogroup bulk_extractor_APIs * @{ */ /** \file */ extern bool validateOrEscapeUTF8_validate; std::string validateOrEscapeUTF8(const std::string &input, bool escape_bad_UTF8,bool escape_backslash); #endif tcpflow/src/be13_api/sbuf.h0000644000175000017500000005322012263701331014424 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /* * sbuf.h: * * sbuf ("safer buffer") provides a typesafe means to * refer to binary data within the context of a C++ computer forensics * tool. The sbuf is a const buffer for which the first byte's * position is tracked in the "pos0" variable (the position of * byte[0]). The buffer may come from a disk, a disk image, or be the * result of decompressing or otherwise decoding other data. * * Created and maintained by Simson Garfinkel, 2007--2012. * * sbuf_stream is a stream-oriented interface for reading sbuf data. */ #ifndef SBUF_H #define SBUF_H #ifdef HAVE_STRING_H #include #endif //Don't turn this on; it currently makes scan_net crash. //#define SBUF_TRACK /* required per C++ standard */ #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS #endif #include #include #include #include #include /**************************************************************** *** pos0_t ****************************************************************/ /** \addtogroup bulk_extractor_APIs * @{ */ /** \file */ /** * \class pos0_t * The pos0_t structure is used to record the forensic path of the * first byte of an sbuf. The forensic path can include strings associated * with decompressors and ordinals associated with offsets. * * e.g., 1000-GZIP-300-BASE64-30 means go 1000 bytes into the stream, * unzip, go 300 bytes into the decompressed stream, un-BASE64, and * go 30 bytes into that. * * pos0_t uses a string to hold the base path and the offset into that path * in a 64-bit number. */ inline int64_t stoi64(std::string str){ int64_t val(0); std::istringstream ss(str); ss >> val; return val; } class pos0_t { public: std::string path; /* forensic path of decoders*/ uint64_t offset; /* location of buf[0] */ explicit pos0_t():path(""),offset(0){} pos0_t(std::string s):path(s),offset(0){} pos0_t(const pos0_t &obj):path(obj.path),offset(obj.offset){ } std::string str() const { // convert to a string, with offset included std::stringstream ss; if(path.size()>0){ ss << path << "-"; } ss << offset; return ss.str(); } bool isRecursive() const { // is there a path? return path.size() > 0; } std::string firstPart() const { // the first part of the path size_t p = path.find('-'); if(p==std::string::npos) return std::string(""); return path.substr(0,p); } std::string lastAddedPart() const { // the last part of the path, before the offset size_t p = path.rfind('-'); if(p==std::string::npos) return std::string(""); return path.substr(p+1); } std::string alphaPart() const { // return the non-numeric parts, with /'s between each std::string desc; bool inalpha = false; /* Now get the std::string part of pos0 */ for(std::string::const_iterator it = path.begin();it!=path.end();it++){ if((*it)=='-'){ if(desc.size()>0 && desc.at(desc.size()-1)!='/') desc += '/'; inalpha=false; } if(isalpha(*it) || (inalpha && isdigit(*it))){ desc += *it; inalpha=true; } } return desc; } /** * Return a new position that's been shifted by an offset */ pos0_t shift(int64_t s) const { if(s==0) return *this; pos0_t ret; size_t p = path.find('-'); if(p==std::string::npos){ // no path ret.path=""; ret.offset = offset + s; return ret; } /* Figure out the value of the shift */ int64_t baseOffset = stoi64(path.substr(0,p-1)); std::stringstream ss; ss << (baseOffset+s) << path.substr(p); ret.path = ss.str(); ret.offset = offset; return ret; } }; /** iostream support for the pos0_t */ inline std::ostream & operator <<(std::ostream &os,const class pos0_t &pos0) { os << "(" << pos0.path << "|" << pos0.offset << ")"; return os; } /** Append a string (subdir). * The current offset is a prefix to the subdir. */ inline class pos0_t operator +(pos0_t pos0,const std::string &subdir) { std::stringstream ss; ss << pos0.offset; pos0.path += (pos0.path.size()>0 ? "-" : "") + ss.str() + "-" + subdir; pos0.offset = 0; return pos0; }; /** Adding an offset */ inline class pos0_t operator +(pos0_t pos0,int64_t delta) { pos0.offset += delta; return pos0; }; /** \name Comparision operations * @{ */ inline bool operator <(const class pos0_t &pos0,const class pos0_t & pos1) { if(pos0.path.size()==0 && pos1.path.size()==0) return pos0.offset < pos1.offset; if(pos0.path == pos1.path) return pos0.offset < pos1.offset; return pos0.path < pos1.path; }; inline bool operator >(const class pos0_t & pos0,const class pos0_t &pos1) { if(pos0.path.size()==0 && pos1.path.size()==0) return pos0.offset > pos1.offset; if(pos0.path == pos1.path) return pos0.offset > pos1.offset; return pos0.path > pos1.path; }; inline bool operator ==(const class pos0_t & pos0,const class pos0_t &pos1) { return pos0.path==pos1.path && pos0.offset==pos1.offset; }; /** @} */ /** * \class managed_malloc Like new[], but it automatically gets freed when the object is dropped. * throws std::bad_alloc if no memory. */ template < class TYPE > class managed_malloc { // default construction, copy construction and assignment are meaningless // and not implemented managed_malloc& operator=(const managed_malloc&); managed_malloc(const managed_malloc&); managed_malloc(); public: TYPE *buf; managed_malloc(size_t bytes):buf(new TYPE[bytes]){ } ~managed_malloc(){ if(buf) delete []buf; } }; /** * \class sbuf_t * This class describes the search buffer. * The accessors are safe so that no buffer overflow can happen. * Integer readers may throw sbuf_bounds_exception. * * This structure actually holds the data. * We use a pos0_t to maintain the address of the first byte. * * There are lots of ways for allocating an sbuf_t: * - map from a file. * - set from a block of memory. * - a subset of an existing sbuf_t (sbuf+10 gives you 10 bytes in, and therefore 10 bytes shorter) * * The subf_t class remembers how the sbuf_t was allocated and * automatically frees whatever resources are needed when it is freed. * * \warning DANGER: You must delete sbuf_t structures First-In, * Last-out, otherwise bad things can happen. (For example, if you * make a subset sbuf_t from a mapped file and unmap the file, the * subset will now point to unallocated memory.) */ class sbuf_t { private: /* The private structures keep track of memory management */ int fd; /* file this came from if mmapped file */ public:; bool should_unmap; /* munmap buffer when done */ bool should_free; /* should buf be freed when this sbuf is deleted? */ bool should_close; /* close(fd) when done. */ static size_t min(size_t a,size_t b){ return aadd_child(*this); } /** * Make an sbuf from a parent but with a different path. */ explicit sbuf_t(const pos0_t &that_pos0, const sbuf_t &that_sbuf ): fd(0),should_unmap(false),should_free(false),should_close(false), page_number(that_sbuf.page_number),pos0(that_pos0), parent(that_sbuf.highest_parent()),children(0), buf(that_sbuf.buf),bufsize(that_sbuf.bufsize),pagesize(that_sbuf.pagesize){ parent->add_child(*this); } /** * make an sbuf from a parent but with an indent. */ sbuf_t(const sbuf_t &that_sbuf,size_t off): fd(0),should_unmap(false),should_free(false),should_close(false), page_number(that_sbuf.page_number),pos0(that_sbuf.pos0+off), parent(that_sbuf.highest_parent()),children(0), buf(that_sbuf.buf+off), bufsize(that_sbuf.bufsize > off ? that_sbuf.bufsize-off : 0), pagesize(that_sbuf.pagesize > off ? that_sbuf.pagesize-off : 0){ } /** Allocate from an existing sbuf. * The allocated buf MUST be freed before the source, since no copy is made... */ explicit sbuf_t(const sbuf_t &sbuf,size_t off,size_t len): fd(0), should_unmap(false), should_free(false), should_close(false), page_number(sbuf.page_number),pos0(sbuf.pos0+off), parent(sbuf.highest_parent()), children(0), buf(sbuf.buf+off), bufsize(off+lenadd_child(*this); }; /**************************************************************** *** Allocators that allocate from memory ****************************************************************/ /* Allocators */ /** Allocate a new buffer of a given size for filling. * This is the one case where buf is written into... * This should probably be a subclass mutable_sbuf_t() for clarity. */ /* Allocate from an existing buffer, optionally freeing that buffer */ explicit sbuf_t(const pos0_t &pos0_,const uint8_t *buf_, size_t bufsize_,size_t pagesize_, int fd_, bool should_unmap_,bool should_free_,bool should_close_): fd(fd_), should_unmap(should_unmap_), should_free(should_free_), should_close(should_close_), page_number(0),pos0(pos0_),parent(0),children(0),buf(buf_),bufsize(bufsize_), pagesize(min(pagesize_,bufsize_)){ }; /* Similar to above, but with no fd */ explicit sbuf_t(const pos0_t &pos0_,const uint8_t *buf_, size_t bufsize_,size_t pagesize_,bool should_free_): fd(0), should_unmap(false), should_free(should_free_), should_close(false), page_number(0),pos0(pos0_),parent(0),children(0),buf(buf_),bufsize(bufsize_), pagesize(min(pagesize_,bufsize_)){ }; /** * the + operator returns a new sbuf that is i bytes in and, therefore, i bytes smaller. * Note: * 1. We assume that pagesize is always smaller than or equal to bufsize. * 2. The child sbuf uses the parent's memory. If the parent gets deleted, the child points * to invalid data. * * 3. If i is bigger than pagesize, then an sbuf is returned with * 0 bytes in the page and all of the margin. * * (Because we won't return what's in the margin as page data.) */ sbuf_t operator +(size_t off ) const { return sbuf_t(*this,off); } virtual ~sbuf_t(){ #if defined(SBUF_TRACK) && defined(HAVE___SYNC_ADD_AND_FETCH) assert(__sync_fetch_and_add(&children,0)==0); #endif if(parent) parent->del_child(*this); release(); } /* Allocate a sbuf from a file mapped into memory */ static sbuf_t *map_file(const std::string &fname); static sbuf_t *map_file(const std::string &fname,int fd); // if file is already opened static const std::string U10001C; // default delimeter character in bulk_extractor static std::string map_file_delimiter; // character placed static void set_map_file_delimiter(const std::string &new_delim){ map_file_delimiter = new_delim; } /* Properties */ size_t size() const {return bufsize;} // return the number of bytes size_t left(size_t n) const {return n=0); #endif } /** Find the offset of a byte */ size_t offset(const uint8_t *loc) const { if(locbuf+bufsize) return bufsize; return loc-buf; } /** * asString - returns the sbuf as a string */ std::string asString() const {return std::string((reinterpret_cast(buf)),bufsize);} /**************************************************************** *** range_exception_t *** An sbuf_range_exception object is thrown if the attempted sbuf access is out of range. ****************************************************************/ /** * sbuf_t raises an sbuf_range_exception when an attempt is made to read past the end of buf. */ class range_exception_t: public std::exception { public: virtual const char *what() const throw() { return "Error: Read past end of sbuf"; } }; /**************************************************************** *** The following get functions read integer and string types *** or else throw an sbuf_range_exception if out of range. ****************************************************************/ /* Search functions --- memcmp at a particular location */ int memcmp(const uint8_t *cbuf,size_t at,size_t len) const; /** * \name unsigned int Intel (littel-endian) readers * @{ * these get functions safely return an unsigned integer value for the offset of i, * in Intel (little-endian) byte order or else throw sbuf_range_exception if out of range. */ uint8_t get8u(size_t i) const; uint16_t get16u(size_t i) const; uint32_t get32u(size_t i) const; uint64_t get64u(size_t i) const; /** @} */ /** * \name unsigned int Motorola (big-endian) readers * @{ * these get functions safely return an unsigned integer value for the offset of i, * in Motorola (big-endian) byte order or else throw sbuf_range_exception if out of range. */ uint8_t get8uBE(size_t i) const; uint16_t get16uBE(size_t i) const; uint32_t get32uBE(size_t i) const; uint64_t get64uBE(size_t i) const; /** @} */ /** * \name signed int Intel (little-endian) readers * @{ * these get functions safely return a signed integer value for the offset of i, * in Intel (little-endian) byte order or else throw sbuf_range_exception if out of range. */ int8_t get8i(size_t i) const; int16_t get16i(size_t i) const; int32_t get32i(size_t i) const; int64_t get64i(size_t i) const; /** @} */ /** * \name signed int Motorola (big-endian) readers * @{ * these get functions safely return a signed integer value for the offset of i, * in Motorola (big-endian) byte order or else throw sbuf_range_exception if out of range. */ int8_t get8iBE(size_t i) const; int16_t get16iBE(size_t i) const; int32_t get32iBE(size_t i) const; int64_t get64iBE(size_t i) const; /** @} */ /** * some get functions take byte_order_t as a specifier to indicate which endian format to use. */ typedef enum {BO_LITTLE_ENDIAN=0,BO_BIG_ENDIAN=1} byte_order_t; /** * \name unsigned int, byte-order specified readers * @{ * these get functions safely return an unsigned integer value for the offset of i, * in the byte order of your choice or else throw sbuf_range_exception if out of range. */ uint8_t get8u(size_t i,byte_order_t bo) const; uint16_t get16u(size_t i,byte_order_t bo) const; uint32_t get32u(size_t i,byte_order_t bo) const; uint64_t get64u(size_t i,byte_order_t bo) const; /** @} */ /** * \name signed int, byte-order specified readers * @{ * these get functions safely return a signed integer value for the offset of i, * in the byte order of your choice or else throw sbuf_range_exception if out of range. */ int8_t get8i(size_t i,byte_order_t bo) const; int16_t get16i(size_t i,byte_order_t bo) const; int32_t get32i(size_t i,byte_order_t bo) const; int64_t get64i(size_t i,byte_order_t bo) const; /** @} */ /** * \name string readers * @{ * These get functions safely read string */ void getUTF8WithQuoting(size_t i, size_t num_octets_requested, std::string &utf8_string) const; void getUTF8WithQuoting(size_t i, std::string &utf8_string) const; /** @} */ /** * \name wstring readers * @{ * These get functions safely read wstring */ void getUTF16(size_t i, size_t num_code_units_requested, std::wstring &utf16_string) const; void getUTF16(size_t i, std::wstring &utf16_string) const; void getUTF16(size_t i, size_t num_code_units_requested, byte_order_t bo, std::wstring &utf16_string) const; void getUTF16(size_t i, byte_order_t bo, std::wstring &utf16_string) const; /** @} */ /** * The [] operator safely returns what's at index [i] or else returns 0 if out of range. * We made a decision that this would not throw the exception * Notice that we don't need to check to see if i<0 because i is unsigned. */ uint8_t operator [](size_t i) const { return (ipagesize,ch); } // Return a pointer to a structure contained within the sbuf if there is // room, otherwise return a null pointer. template const TYPE * get_struct_ptr(uint32_t pos) const { if (pos + sizeof(TYPE) <= bufsize) { return reinterpret_cast (buf+pos); } return NULL; } /** * These are largely for debugging, but they also support the BEViewer. * Dump the sbuf to a stream. */ void raw_dump(std::ostream &os,uint64_t start,uint64_t len) const; void raw_dump(int fd,uint64_t start,uint64_t len) const; // writes to a raw file descriptor void hex_dump(std::ostream &os,uint64_t start,uint64_t len) const; void hex_dump(std::ostream &os) const; /* dump all */ ssize_t write(int fd,size_t loc,size_t len) const; /* write to a file descriptor, returns # bytes written */ ssize_t write(FILE *f,size_t loc,size_t len) const; /* write to a file descriptor, returns # bytes written */ }; std::ostream & operator <<(std::ostream &os,const sbuf_t &sbuf); #include "sbuf_private.h" #endif tcpflow/src/be13_api/CODING_STANDARDS.txt0000644000175000017500000000215712263701331016426 0ustar dimadimaCoding Standards v1.0 Simson L. Garfinkel December 3, 2013 All standards are based on compromise. These standards seem to be a good compromise between a variety of coding styles and existing standards. Executive summary: * No tabs in source code. Legacy code has tabs at 8 characters; they can be freely converted to spaces as necessary. * Indent at 4 spaces. * Open braces start on the SAME LINE for: - if statements - inline functions in .h headers - Java function declarations * Open braces start on NEXT LINE for: - C function declarations * We use the following lines/configuration variables to try to enforce the above: For EMACS at the top of c programs: /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ In .emacs files: (setq-default indent-tabs-mode nil) (setq c-basic-offset 4) References: =========== * http://www.emacswiki.org/emacs/NoTabs * http://www.jwz.org/doc/tabs-vs-spaces.html * http://slashdot.org/pollBooth.pl?qid=395&aid=-1 * http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml * http://www.python.org/dev/peps/pep-0008/#maximum-line-length tcpflow/src/be13_api/utils.cpp0000644000175000017500000000565612263701331015172 0ustar dimadima/** * A collection of utility functions that are useful. */ // Just for this module #define _FILE_OFFSET_BITS 64 /* required per C++ standard */ #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS #endif #include "config.h" #include "cppmutex.h" #include "utils.h" #ifdef HAVE_UNISTD_H #include #endif #include #include #include #include #include #include #include #include #include #ifndef HAVE_ERR #include void err(int eval,const char *fmt,...) { va_list ap; va_start(ap,fmt); vfprintf(stderr,fmt,ap); va_end(ap); fprintf(stderr,": %s\n",strerror(errno)); exit(eval); } #endif #ifndef HAVE_ERRX #include void errx(int eval,const char *fmt,...) { va_list ap; va_start(ap,fmt); vfprintf(stderr,fmt,ap); fprintf(stderr,"%s\n",strerror(errno)); va_end(ap); exit(eval); } #endif #ifndef HAVE_WARN #include void warn(const char *fmt, ...) { va_list args; va_start(args,fmt); vfprintf(stderr,fmt, args); fprintf(stderr,": %s\n",strerror(errno)); } #endif #ifndef HAVE_WARNX #include void warnx(const char *fmt,...) { va_list ap; va_start(ap,fmt); vfprintf(stderr,fmt,ap); va_end(ap); } #endif /** Extract a buffer... * @param buf - the buffer to extract; * @param buflen - the size of the page to extract * @param pos0 - the byte position of buf[0] */ #ifndef HAVE_LOCALTIME_R /* locking localtime_r implementation */ cppmutex localtime_mutex; void localtime_r(time_t *t,struct tm *tm) { cppmutex::lock lock(localtime_mutex); *tm = *localtime(t); } #endif #ifndef HAVE_GMTIME_R /* locking gmtime_r implementation */ cppmutex gmtime_mutex; void gmtime_r(time_t *t,struct tm *tm) { if(t && tm){ cppmutex::lock lock(gmtime_mutex); struct tm *tmret = gmtime(t); if(tmret){ *tm = *tmret; } else { memset(tm,0,sizeof(*tm)); } } } #endif bool ends_with(const std::string &buf,const std::string &with) { size_t buflen = buf.size(); size_t withlen = with.size(); return buflen>withlen && buf.substr(buflen-withlen,withlen)==with; } bool ends_with(const std::wstring &buf,const std::wstring &with) { size_t buflen = buf.size(); size_t withlen = with.size(); return buflen>withlen && buf.substr(buflen-withlen,withlen)==with; } #include /****************************************************************/ /* C++ string splitting code from http://stackoverflow.com/questions/236129/how-to-split-a-string-in-c */ std::vector &split(const std::string &s, char delim, std::vector &elems) { std::stringstream ss(s); std::string item; while(std::getline(ss, item, delim)) { elems.push_back(item); } return elems; } std::vector split(const std::string &s, char delim) { std::vector elems; return split(s, delim, elems); } tcpflow/src/be13_api/atomic_set_map.h0000644000175000017500000000757312263701331016463 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /** * defines atomic_map and atomic_set * */ #ifndef ATOMIC_SET_MAP_H #define ATOMIC_SET_MAP_H #include "cppmutex.h" #include #include #if defined(HAVE_UNORDERED_MAP) # include # undef HAVE_TR1_UNORDERED_MAP // be sure we don't use it #else # if defined(HAVE_TR1_UNORDERED_MAP) # include # else # error Requires or # endif #endif #if defined(HAVE_UNORDERED_SET) #include #undef HAVE_TR1_UNORDERED_SET // be sure we don't use it #else #if defined(HAVE_TR1_UNORDERED_SET) #include #else #error Requires or #endif #endif template class atomic_histogram { #ifdef HAVE_UNORDERED_MAP typedef std::unordered_map hmap_t; #endif #ifdef HAVE_TR1_UNORDERED_MAP typedef std::tr1::unordered_map hmap_t; #endif hmap_t amap; // the locked atomic map mutable cppmutex M; // my lock public: atomic_histogram():amap(),M(){}; typedef void (*dump_callback_t)(void *user,const TYPE &val,const CTYPE &count); // add and return the count // http://www.cplusplus.com/reference/unordered_map/unordered_map/insert/ CTYPE add(const TYPE &val,const CTYPE &count){ cppmutex::lock lock(M); std::pair p = amap.insert(std::make_pair(val,count)); if (!p.second) { p.first->second += count; } return p.first->second; } // Dump the database to a user-provided callback. void dump(void *user,dump_callback_t dump_cb) const{ cppmutex::lock lock(M); for(typename hmap_t::const_iterator it = amap.begin();it!=amap.end();it++){ (*dump_cb)(user,(*it).first,(*it).second); } } struct ReportElement { ReportElement(TYPE aValue,uint64_t aTally):value(aValue),tally(aTally){ } TYPE value; CTYPE tally; static bool compare(const ReportElement *e1, const ReportElement *e2) { if (e1->tally > e2->tally) return true; if (e1->tally < e2->tally) return false; return e1->value < e2->value; } virtual ~ReportElement(){}; }; typedef std::vector< const ReportElement *> element_vector_t; void dump_sorted(void *user,dump_callback_t dump_cb) const{ /* Create a list of new elements, sort it, then report the sorted list */ element_vector_t evect; { cppmutex::lock lock(M); for(typename hmap_t::const_iterator it = amap.begin();it!=amap.end();it++){ evect.push_back( new ReportElement((*it).first, (*it).second)); } } std::sort(evect.begin(),evect.end(),ReportElement::compare); for(typename element_vector_t::const_iterator it = evect.begin();it!=evect.end();it++){ (*dump_cb)(user,(*it)->value,(*it)->tally); delete *it; } } uint64_t size_estimate() const; // Estimate the size of the database }; template class atomic_set { cppmutex M; #ifdef HAVE_UNORDERED_SET std::unordered_setmyset; #endif #ifdef HAVE_TR1_UNORDERED_SET std::tr1::unordered_setmyset; #endif public: atomic_set():M(),myset(){} bool contains(const TYPE &s){ cppmutex::lock lock(M); return myset.find(s)!=myset.end(); } void insert(const TYPE &s){ cppmutex::lock lock(M); myset.insert(s); } bool check_for_presence_and_insert(const TYPE &s){ cppmutex::lock lock(M); if(myset.find(s)!=myset.end()) return true; // in the set myset.insert(s); // otherwise insert it return false; // and return that it wasn't } }; #endif tcpflow/src/be13_api/beregex.cpp0000644000175000017500000001220212263701331015434 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ #include "config.h" #include "beregex.h" #include #include #include #include #if defined(HAVE_LIBTRE) && defined(HAVE_TRE_REGCOMP) #define REGCOMP tre_regcomp #define REGFREE tre_regfree #define REGEXEC tre_regexec #define nreg (regex_t *)nreg_ #define HAVE_REGULAR_EXPRESSIONS static const char *regex_version = "tre"; #endif /* use regcomp() if tre_regcomp() is not available */ #if defined(HAVE_REGCOMP) && !defined(HAVE_REGULAR_EXPRESSIONS) #define REGCOMP regcomp #define REGFREE regfree #define REGEXEC regexec #define nreg (regex_t *)nreg_ #define HAVE_REGULAR_EXPRESSIONS static const char *regex_version = "system"; #endif #ifndef HAVE_REGULAR_EXPRESSIONS #error bulk_extractor requires tre_regcomp or regcomp to run #error download tre from "http://laurikari.net/tre/download/" #endif const char *beregex::version(){return regex_version;} bool beregex::is_regex(const std::string &str) { return true; // SLG TEST for(std::string::const_iterator it = str.begin();it!=str.end();it++){ switch(*it){ case '?': case '*': case '.': case '+': case '[': case '(': return true; } } return false; } beregex::beregex(const beregex &that):pat(that.pat),flags(that.flags),nreg_(0) { compile(); } beregex::beregex(std::string pat_,int flags_):pat(pat_),flags(flags_),nreg_(0) { compile(); } void beregex::compile() // compile the regex { if(pat.size()==0) return; nreg_ = calloc(sizeof(regex_t),1); if(REGCOMP(nreg,pat.c_str(),flags | REG_EXTENDED)!=0){ std::cerr << "regular expression compile error '" << pat << "' flags=" << flags << "\n"; exit(1); } } beregex::~beregex(){ if(nreg_){ REGFREE(nreg); free(nreg_); nreg_ = 0; } } /** * perform a search for a single hit. If there is a group and something is found, * set *found to be what was found, *offset to be the starting offset, and *len to be * the length. Note that this only handles a single group. */ int beregex::search(const std::string &line,std::string *found,size_t *offset,size_t *len) const { static const int REGMAX=2; regmatch_t pmatch[REGMAX]; if(!nreg_) return 0; memset(pmatch,0,sizeof(pmatch)); int r = REGEXEC(nreg,line.c_str(),REGMAX,pmatch,0); if(r==REG_NOMATCH) return 0; if(r!=0) return 0; /* some kind of failure */ /* Make copies of the first group */ if(pmatch[1].rm_so != pmatch[1].rm_eo){ if(found) *found = line.substr(pmatch[1].rm_so,pmatch[1].rm_eo-pmatch[1].rm_so); if(offset) *offset = pmatch[1].rm_so; if(len) *len = pmatch[1].rm_eo-pmatch[1].rm_so; } return 1; /* success */ } /** Perform a search with an array of strings. Return 0 if success, return code if fail.*/ int beregex::search(const std::string &line,std::string *matches,int REGMAX) const { regmatch_t *pmatch = (regmatch_t *)calloc(sizeof(regmatch_t),REGMAX+1); if(!nreg) return 0; int r = REGEXEC(nreg,line.c_str(),REGMAX+1,pmatch,0); if(r==0){ for(int i=0;i0 && (*line.end())=='\r'){ line.erase(line.end()); /* remove the last character while it is a \n or \r */ } patterns.push_back(new beregex(line,0)); } f.close(); return 0; } return -1; } void regex_list::add_regex(const std::string &pat) { patterns.push_back(new beregex(pat,0)); } /* Find the FIRST match in buf */ bool regex_list::check(const std::string &buf,std::string *found, size_t *offset,size_t *len) const { /* Now check check pattern */ /* First check literals, because they are faster */ bool first = true; bool fnd = false; for(std::vector::const_iterator it=patterns.begin(); it != patterns.end(); it++){ std::string nfound; size_t noffset=0; size_t nlen=0; if((*it)->search(buf,&nfound,&noffset,&nlen)){ if(first || noffset<*offset){ fnd = true; *found = nfound; *offset = noffset; *len = nlen; first = false; } } } return fnd; } tcpflow/src/be13_api/be13_configure.m40000644000175000017500000000204712263701331016352 0ustar dimadima# # mix-ins for be13 # AC_MSG_NOTICE([Including be13_configure.m4 from be13_api]) AC_CHECK_HEADERS([err.h pwd.h sys/cdefs.h sys/mman.h sys/resource.h sys/utsname.h unistd.h ]) AC_CHECK_FUNCS([ishexnumber isxdigit unistd.h mmap err errx warn warnx pread64 pread strptime _lseeki64 utimes ]) AC_TRY_COMPILE([#pragma GCC diagnostic ignored "-Wredundant-decls"],[int a=3;], [AC_DEFINE(HAVE_DIAGNOSTIC_REDUNDANT_DECLS,1,[define 1 if GCC supports -Wredundant-decls])] ) AC_TRY_COMPILE([#pragma GCC diagnostic ignored "-Wcast-align"],[int a=3;], [AC_DEFINE(HAVE_DIAGNOSTIC_CAST_ALIGN,1,[define 1 if GCC supports -Wcast-align])] ) AC_TRY_LINK([#include ], [uint64_t ul; __sync_add_and_fetch(&ul,0);], AC_DEFINE(HAVE___SYNC_ADD_AND_FETCH,1,[define 1 if __sync_add_and_fetch works on 64-bit numbers])) # # Figure out which version of unordered_map we are going to use # AC_LANG_PUSH(C++) AC_CHECK_HEADERS([unordered_map unordered_set],[],[ AC_CHECK_HEADERS([tr1/unordered_map tr1/unordered_set])]) AC_LANG_POP() tcpflow/src/be13_api/histogram.h0000644000175000017500000000722012263701331015461 0ustar dimadima#ifndef HISTOGRAM_H #define HISTOGRAM_H /** * \addtogroup internal_interfaces * @{ */ /* C++ Histogram classes. * * Eventually this may become a single class */ #include #include /** * \class CharClass * Examine a block of text and count the number of characters * in various ranges. This is useful for determining if a block of * bytes is coded in BASE16, BASE64, etc. */ class CharClass { public: uint32_t range_0_9; // a range_0_9 character uint32_t range_A_Fi; // a-f or A-F uint32_t range_g_z; // g-z uint32_t range_G_Z; // G-Z CharClass():range_0_9(0),range_A_Fi(0),range_g_z(0),range_G_Z(0){ } void add(uint8_t ch){ if(ch>='a' && ch<='f') range_A_Fi++; if(ch>='A' && ch<='F') range_A_Fi++; if(ch>='g' && ch<='z') range_g_z++; if(ch>='G' && ch<='Z') range_G_Z++; if(ch>='0' && ch<='9') range_0_9++; } void add(uint8_t *buf,size_t len){ for(size_t i=0;i e2.tally.count) return true; if (e1.tally.count < e2.tally.count) return false; return e1.value < e2.value; } virtual ~ReportElement(){}; }; private: /** A HistogramMap holds the histogram while it is being computed. */ typedef std::map HistogramMap; HistogramMap h; // holds the histogram uint32_t flags; // see above public: /** * Determine if a string probably has utf16. */ static bool looks_like_utf16(const std::string &str,bool &little_endian); HistogramMaker(uint32_t flags_):h(),flags(flags_){} void clear(){h.clear();} void add(const std::string &key); // adds a string to the histogram count /** A FrequencyReportVector is a vector of report elements when the report is generatedn. */ typedef std::vector FrequencyReportVector; /** makeReport() makes a report and returns a * FrequencyReportVector. */ FrequencyReportVector *makeReport() const; // return a report with all of them FrequencyReportVector *makeReport(int topN) const; // returns just the topN virtual ~HistogramMaker(){} }; std::ostream & operator <<(std::ostream &os,const HistogramMaker::FrequencyReportVector &rep); #endif tcpflow/src/be13_api/aftimer.h0000644000175000017500000001050712263701331015115 0ustar dimadima#ifndef __AFTIMER_H__ #define __AFTIMER_H__ #ifdef __cplusplus #ifndef WIN32 #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS #endif #include #include #endif #include #include #include class aftimer { struct timeval t0; bool running; long total_sec; long total_usec; double lap_time_; // time from when we last did a "stop" public: aftimer():t0(),running(false),total_sec(0),total_usec(0),lap_time_(0){} void start(); // start the timer void stop(); // stop the timer time_t tstart() const { return t0.tv_sec;} // time we started double elapsed_seconds() const; // how long timer has been running, total double lap_time() const; // how long the timer is running this time double eta(double fraction_done) const; // calculate ETA in seconds, given fraction std::string hms(long t) const; // turn a number of seconds into h:m:s std::string elapsed_text() const; /* how long we have been running */ std::string eta_text(double fraction_done) const; // h:m:s std::string eta_time(double fraction_done) const; // the actual time }; /* This code in part from * http://social.msdn.microsoft.com/Forums/en/vcgeneral/thread/430449b3-f6dd-4e18-84de-eebd26a8d668 */ #ifdef WIN32 # include # include # ifndef DELTA_EPOCH_IN_MICROSECS # if defined(_MSC_VER) || defined(_MSC_EXTENSIONS) # define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64 # else # define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL # endif # endif #endif inline void timestamp(struct timeval *t) { #ifdef WIN32 FILETIME ft; GetSystemTimeAsFileTime(&ft); unsigned __int64 tmpres = 0; tmpres |= ft.dwHighDateTime; tmpres <<= 32; tmpres |= ft.dwLowDateTime; /*converting file time to unix epoch*/ tmpres -= DELTA_EPOCH_IN_MICROSECS; tmpres /= 10; /*convert into microseconds*/ t->tv_sec = (long)(tmpres / 1000000UL); t->tv_usec = (long)(tmpres % 1000000UL); #else gettimeofday(t,NULL); #endif } inline void aftimer::start() { timestamp(&t0); running = 1; } inline void aftimer::stop(){ if(running){ struct timeval t; timestamp(&t); total_sec += t.tv_sec - t0.tv_sec; total_usec += t.tv_usec - t0.tv_usec; lap_time_ = (double)(t.tv_sec - t0.tv_sec) + (double)(t.tv_usec - t0.tv_usec)/1000000.0; running = false; } } inline double aftimer::lap_time() const { return lap_time_; } inline double aftimer::elapsed_seconds() const { double ret = (double)total_sec + (double)total_usec/1000000.0; if(running){ struct timeval t; timestamp(&t); ret += t.tv_sec - t0.tv_sec; ret += (t.tv_usec - t0.tv_usec) / 1000000.0; } return ret; } inline std::string aftimer::hms(long t) const { char buf[64]; int days = t / (60*60*24); t = t % (60*60*24); /* what's left */ int h = t / 3600; int m = (t / 60) % 60; int s = t % 60; buf[0] = 0; switch(days){ case 0: snprintf(buf,sizeof(buf),"%2d:%02d:%02d",h,m,s); break; case 1: snprintf(buf,sizeof(buf),"%d day, %2d:%02d:%02d",days,h,m,s); break; default: snprintf(buf,sizeof(buf),"%d days %2d:%02d:%02d",days,h,m,s); } return std::string(buf); } inline std::string aftimer::elapsed_text() const { return hms((int)elapsed_seconds()); } /** * returns the number of seconds until the job is complete. */ inline double aftimer::eta(double fraction_done) const { double t = elapsed_seconds(); if(t<=0) return -1; // can't figure it out if(fraction_done<=0) return -1; // can't figure it out return (t * 1.0/fraction_done - t); } /** * Retuns the number of hours:minutes:seconds until the job is done. */ inline std::string aftimer::eta_text(double fraction_done) const { double e = eta(fraction_done); if(e<0) return std::string("n/a"); // can't figure it out return hms((long)e); } /** * Returns the time when data is due. */ inline std::string aftimer::eta_time(double fraction_done) const { time_t t = time_t(eta(fraction_done)) + time(0); struct tm tm; #ifdef HAVE_LOCALTIME_R localtime_r(&t,&tm); #else tm = *localtime(&t); #endif char buf[64]; snprintf(buf,sizeof(buf),"%02d:%02d:%02d",tm.tm_hour,tm.tm_min,tm.tm_sec); return std::string(buf); } #endif #endif tcpflow/src/be13_api/feature_recorder_set.cpp0000644000175000017500000001407212263701331020215 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ #include "config.h" #include "bulk_extractor_i.h" #include "histogram.h" /**************************************************************** *** feature_recorder_set *** No mutex is needed for the feature_recorder_set because it is never *** modified after it is created, only the contained feature_recorders are modified. ****************************************************************/ const std::string feature_recorder_set::ALERT_RECORDER_NAME = "alerts"; const std::string feature_recorder_set::DISABLED_RECORDER_NAME = "disabled"; /* Create an empty recorder */ feature_recorder_set::feature_recorder_set(uint32_t flags_):flags(flags_),seen_set(),input_fname(), outdir(), frm(),map_lock(), histogram_defs(), alert_list(),stop_list(), scanner_stats() { if(flags & SET_DISABLED){ create_name(DISABLED_RECORDER_NAME,false); frm[DISABLED_RECORDER_NAME]->set_flag(feature_recorder::FLAG_DISABLED); } } /** * Create a properly functioning feature recorder set. * If disabled, create a disabled feature_recorder that can respond to functions as requested. */ void feature_recorder_set::init(const feature_file_names_t &feature_files, const std::string &input_fname_, const std::string &outdir_) { input_fname = input_fname_; outdir = outdir_; create_name(feature_recorder_set::ALERT_RECORDER_NAME,false); // make the alert recorder /* Create the requested feature files */ for(std::set::const_iterator it=feature_files.begin();it!=feature_files.end();it++){ create_name(*it,flags & CREATE_STOP_LIST_RECORDERS); } } void feature_recorder_set::flush_all() { for(feature_recorder_map::iterator i = frm.begin();i!=frm.end();i++){ i->second->flush(); } } void feature_recorder_set::close_all() { for(feature_recorder_map::iterator i = frm.begin();i!=frm.end();i++){ i->second->close(); } } bool feature_recorder_set::has_name(std::string name) const { return frm.find(name) != frm.end(); } /* * Gets a feature_recorder_set. */ feature_recorder *feature_recorder_set::get_name(const std::string &name) { const std::string *thename = &name; if(flags & SET_DISABLED){ // if feature recorder set is disabled, return the disabled recorder. thename = &feature_recorder_set::DISABLED_RECORDER_NAME; } if(flags & ONLY_ALERT){ thename = &feature_recorder_set::ALERT_RECORDER_NAME; } cppmutex::lock lock(map_lock); feature_recorder_map::const_iterator it = frm.find(*thename); if(it!=frm.end()) return it->second; return(0); // feature recorder does not exist } feature_recorder *feature_recorder_set::create_name_factory(const std::string &outdir_,const std::string &input_fname_,const std::string &name_){ return new feature_recorder(*this,outdir_,input_fname_,name_); } void feature_recorder_set::create_name(const std::string &name,bool create_stop_file) { if(frm.find(name)!=frm.end()){ std::cerr << "create_name: feature recorder '" << name << "' already exists\n"; return; } feature_recorder *fr = create_name_factory(outdir,input_fname,name); feature_recorder *fr_stopped = 0; frm[name] = fr; if(create_stop_file){ std::string name_stopped = name+"_stopped"; fr_stopped = create_name_factory(outdir,input_fname,name_stopped); fr->set_stop_list_recorder(fr_stopped); frm[name_stopped] = fr_stopped; } if(flags & SET_DISABLED) return; // don't open if we are disabled /* Open the output!*/ fr->open(); if(fr_stopped) fr_stopped->open(); } feature_recorder *feature_recorder_set::get_alert_recorder() { return get_name(feature_recorder_set::ALERT_RECORDER_NAME); } /* * uses md5 to determine if a block was prevously seen. */ bool feature_recorder_set::check_previously_processed(const uint8_t *buf,size_t bufsize) { std::string md5 = md5_generator::hash_buf(buf,bufsize).hexdigest(); return seen_set.check_for_presence_and_insert(md5); } void feature_recorder_set::add_stats(const std::string &bucket,double seconds) { cppmutex::lock lock(map_lock); struct pstats &p = scanner_stats[bucket]; // get the location of the stats p.seconds += seconds; p.calls ++; } void feature_recorder_set::get_stats(void *user,stat_callback_t stat_callback) { for(scanner_stats_map::const_iterator it = scanner_stats.begin();it!=scanner_stats.end();it++){ (*stat_callback)(user,(*it).first,(*it).second.calls,(*it).second.seconds); } } void feature_recorder_set::dump_name_count_stats(dfxml_writer &writer) { cppmutex::lock lock(map_lock); writer.push("feature_files"); for(feature_recorder_map::const_iterator ij = frm.begin(); ij != frm.end(); ij++){ writer.set_oneline(true); writer.push("feature_file"); writer.xmlout("name",ij->second->name); writer.xmlout("count",ij->second->count()); writer.pop(); writer.set_oneline(false); } } static const int LINE_LEN = 80; // keep track of where we are on the line void feature_recorder_set::add_histogram(const histogram_def &def) { feature_recorder *fr = get_name(def.feature); if(fr) fr->add_histogram(def); } void feature_recorder_set::dump_histograms(void *user,feature_recorder::dump_callback_t cb, feature_recorder_set::xml_notifier_t xml_error_notifier) { /* Ask each feature recorder to dump its histograms */ for(feature_recorder_map::const_iterator it = frm.begin(); it!=frm.end(); it++){ feature_recorder *fr = it->second; fr->dump_histograms(user,cb,xml_error_notifier); } } tcpflow/src/be13_api/feature_recorder_set.h0000644000175000017500000001105312263701331017656 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ #ifndef FEATURE_RECORDER_SET_H #define FEATURE_RECORDER_SET_H #include "feature_recorder.h" #include "cppmutex.h" #include "dfxml/src/dfxml_writer.h" #include "dfxml/src/hash_t.h" #include "word_and_context_list.h" #include #include /** \addtogroup internal_interfaces * @{ */ /** \file */ /** * \class feature_recorder_set * A singleton class that holds a set of recorders. * This used to be done with a set, but now it's done with a map. * */ typedef std::map feature_recorder_map; typedef std::setfeature_file_names_t; class feature_recorder_set { // neither copying nor assignment is implemented feature_recorder_set(const feature_recorder_set &fs); feature_recorder_set &operator=(const feature_recorder_set &fs); uint32_t flags; atomic_set seen_set; // hex hash values of pages that have been seen std::string input_fname; // input file std::string outdir; // where output goes feature_recorder_map frm; // map of feature recorders, by name cppmutex map_lock; // locks frm and scanner_stats_map histogram_defs_t histogram_defs; // histograms that are to be created. public: struct pstats { double seconds; uint64_t calls; }; typedef std::map scanner_stats_map; const word_and_context_list *alert_list; /* shold be flagged */ const word_and_context_list *stop_list; /* should be ignored */ scanner_stats_map scanner_stats; static const std::string ALERT_RECORDER_NAME; // the name of the alert recorder static const std::string DISABLED_RECORDER_NAME; // the fake disabled feature recorder /* flags */ static const uint32_t ONLY_ALERT=0x01; // always return the alert recorder static const uint32_t SET_DISABLED=0x02; // the set is effectively disabled; for path-printer static const uint32_t CREATE_STOP_LIST_RECORDERS=0x04; // virtual ~feature_recorder_set() { for(feature_recorder_map::iterator i = frm.begin();i!=frm.end();i++){ delete i->second; } } std::string get_input_fname() const {return input_fname;} std::string get_outdir() const {return outdir;} void set_stop_list(const word_and_context_list *alist){stop_list=alist;} void set_alert_list(const word_and_context_list *alist){alert_list=alist;} /** create an emptry feature recorder set. If disabled, create a disabled recorder. */ feature_recorder_set(uint32_t flags_); /** Initialize a feature_recorder_set. Previously this was a constructor, but it turns out that * virtual functions for the create_name_factory aren't honored in constructors. * * init() is called after all of the scanners have been loaded. It * tells each feature file about its histograms (among other * things) */ void init(const feature_file_names_t &feature_files, const std::string &input_fname,const std::string &outdir); void flush_all(); void close_all(); bool has_name(std::string name) const; /* does the named feature exist? */ void set_flag(uint32_t f){flags|=f;} void clear_flag(uint32_t f){flags|=f;} void add_histogram(const histogram_def &def); // adds it to a local set or to the specific feature recorder typedef void (*xml_notifier_t)(const std::string &xmlstring); void dump_histograms(void *user,feature_recorder::dump_callback_t cb, xml_notifier_t xml_error_notifier); virtual feature_recorder *create_name_factory(const std::string &outdir_, const std::string &input_fname_,const std::string &name_); virtual void create_name(const std::string &name,bool create_stop_also); virtual const std::string &get_outdir(){ return outdir;} void add_stats(const std::string &bucket,double seconds); typedef void (*stat_callback_t)(void *user,const std::string &name,uint64_t calls,double seconds); void get_stats(void *user,stat_callback_t stat_callback); void dump_name_count_stats(dfxml_writer &writer); // Management of previously seen data virtual bool check_previously_processed(const uint8_t *buf,size_t bufsize); // NOTE: // only virtual functions may be called by plugins! virtual feature_recorder *get_name(const std::string &name); virtual feature_recorder *get_alert_recorder(); }; #endif tcpflow/src/be13_api/Makefile.defs0000644000175000017500000000205012263701331015667 0ustar dimadimaBE13_API= \ be13_api/atomic_set_map.h \ be13_api/aftimer.h \ be13_api/beregex.cpp \ be13_api/beregex.h \ be13_api/bulk_extractor_i.h \ be13_api/cppmutex.h \ be13_api/feature_recorder.cpp \ be13_api/feature_recorder.h \ be13_api/feature_recorder_set.cpp \ be13_api/feature_recorder_set.h \ be13_api/histogram.h \ be13_api/histogram.cpp \ be13_api/net_ethernet.h \ be13_api/pcap_fake.cpp \ be13_api/pcap_fake.h \ be13_api/plugin.cpp \ be13_api/sbuf.cpp \ be13_api/sbuf.h \ be13_api/sbuf_private.h \ be13_api/sbuf_stream.cpp \ be13_api/sbuf_stream.h \ be13_api/unicode_escape.cpp \ be13_api/unicode_escape.h \ be13_api/utf8.h \ be13_api/utf8/checked.h \ be13_api/utf8/core.h \ be13_api/utf8/unchecked.h \ be13_api/utils.cpp \ be13_api/utils.h \ be13_api/word_and_context_list.cpp \ be13_api/word_and_context_list.h tcpflow/src/be13_api/sbuf_stream.h0000644000175000017500000000355412263701331016004 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ #ifndef SBUF_STREAM_H #define SBUF_STREAM_H /* required per C++ standard */ #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS #endif using namespace std; #include #include #include #include #include "sbuf.h" /** \addtogroup bulk_extractor_APIs * @{ */ /** \file */ /** * sbuf_stream provides the get services of sbuf_t but wrapped in a Stream interface. * Note that sbuf_stream is not particularly optimized; it is simply a wrapper. */ class sbuf_stream { private: const sbuf_t sbuf; size_t offset; public: sbuf_stream(const sbuf_t &sbuf_); ~sbuf_stream(); void seek(size_t offset); size_t tell(); /** * \name integer-based stream readers * @{ */ uint8_t get8u(); uint16_t get16u(); uint32_t get32u(); uint64_t get64u(); uint8_t get8uBE(); uint16_t get16uBE(); uint32_t get32uBE(); uint64_t get64uBE(); uint8_t get8u(sbuf_t::byte_order_t bo); uint16_t get16u(sbuf_t::byte_order_t bo); uint32_t get32u(sbuf_t::byte_order_t bo); uint64_t get64u(sbuf_t::byte_order_t bo); int8_t get8i(); int16_t get16i(); int32_t get32i(); int64_t get64i(); int8_t get8iBE(); int16_t get16iBE(); int32_t get32iBE(); int64_t get64iBE(); int8_t get8i(sbuf_t::byte_order_t bo); int16_t get16i(sbuf_t::byte_order_t bo); int32_t get32i(sbuf_t::byte_order_t bo); int64_t get64i(sbuf_t::byte_order_t bo); /** @} */ /** * \name string and wstring stream readers * @{ */ void getUTF8WithQuoting(string &utf8_string); void getUTF8WithQuoting(size_t num_octets_requested, string &utf8_string); void getUTF16(wstring &utf16_string); void getUTF16(size_t num_code_units_requested, wstring &utf16_string); /** @} */ }; #endif tcpflow/src/be13_api/sbuf_private.h0000644000175000017500000001122012263701331016150 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ #ifndef SBUF_PRIVATE_H #define SBUF_PRIVATE_H #include #ifdef HAVE_MMAP #include #endif inline int sbuf_t::memcmp(const uint8_t *cbuf,size_t at,size_t len) const { if(left(at) < len) throw sbuf_t::range_exception_t(); return ::memcmp(this->buf+at,cbuf,len); } /** * Unsigned get interfaces */ inline uint8_t sbuf_t::get8u(size_t i) const { if(i+1>bufsize) throw sbuf_t::range_exception_t(); return this->buf[i]; } inline uint16_t sbuf_t::get16u(size_t i) const { if(i+2>bufsize) throw sbuf_t::range_exception_t(); return 0 | (uint16_t)(this->buf[i+0]<<0) | (uint16_t)(this->buf[i+1]<<8); } inline uint32_t sbuf_t::get32u(size_t i) const { if(i+4>bufsize) throw sbuf_t::range_exception_t(); return 0 | (uint32_t)(this->buf[i+0]<<0) | (uint32_t)(this->buf[i+1]<<8) | (uint32_t)(this->buf[i+2]<<16) | (uint32_t)(this->buf[i+3]<<24); } inline uint64_t sbuf_t::get64u(size_t i) const { if(i+8>bufsize) throw sbuf_t::range_exception_t(); return 0 | ((uint64_t)(this->buf[i+0])<<0) | ((uint64_t)(this->buf[i+1])<<8) | ((uint64_t)(this->buf[i+2])<<16) | ((uint64_t)(this->buf[i+3])<<24) | ((uint64_t)(this->buf[i+4])<<32) | ((uint64_t)(this->buf[i+5])<<40) | ((uint64_t)(this->buf[i+6])<<48) | ((uint64_t)(this->buf[i+7])<<56); } inline uint8_t sbuf_t::get8uBE(size_t i) const { if(i+1>bufsize) throw sbuf_t::range_exception_t(); return this->buf[i]; } inline uint16_t sbuf_t::get16uBE(size_t i) const { if(i+2>bufsize) throw sbuf_t::range_exception_t(); return 0 | (uint16_t)(this->buf[i+1]<<0) | (uint16_t)(this->buf[i+0]<<8); } inline uint32_t sbuf_t::get32uBE(size_t i) const { if(i+4>bufsize) throw sbuf_t::range_exception_t(); return 0 | (uint32_t)(this->buf[i+3]<<0) | (uint32_t)(this->buf[i+2]<<8) | (uint32_t)(this->buf[i+1]<<16) | (uint32_t)(this->buf[i+0]<<24); } inline uint64_t sbuf_t::get64uBE(size_t i) const { if(i+8>bufsize) throw sbuf_t::range_exception_t(); return 0 | ((uint64_t)(this->buf[i+7])<<0) | ((uint64_t)(this->buf[i+6])<<8) | ((uint64_t)(this->buf[i+5])<<16) | ((uint64_t)(this->buf[i+4])<<24) | ((uint64_t)(this->buf[i+3])<<32) | ((uint64_t)(this->buf[i+2])<<40) | ((uint64_t)(this->buf[i+1])<<48) | ((uint64_t)(this->buf[i+0])<<56); } inline uint8_t sbuf_t::get8u(size_t i,sbuf_t::byte_order_t bo) const { return bo==BO_LITTLE_ENDIAN ? get8u(i) : get8uBE(i); } inline uint16_t sbuf_t::get16u(size_t i,sbuf_t::byte_order_t bo) const { return bo==BO_LITTLE_ENDIAN ? get16u(i) : get16uBE(i); } inline uint32_t sbuf_t::get32u(size_t i,sbuf_t::byte_order_t bo) const { return bo==BO_LITTLE_ENDIAN ? get32u(i) : get32uBE(i); } inline uint64_t sbuf_t::get64u(size_t i,sbuf_t::byte_order_t bo) const { return bo==BO_LITTLE_ENDIAN ? get64u(i) : get64uBE(i); } /** * Signed get interfaces simply call the unsigned interfaces and * the return gets cast. */ inline int8_t sbuf_t::get8i(size_t i) const { return get8u(i);} inline int16_t sbuf_t::get16i(size_t i) const { return get16u(i);} inline int32_t sbuf_t::get32i(size_t i) const { return get32u(i);} inline int64_t sbuf_t::get64i(size_t i) const { return get64u(i);} inline int8_t sbuf_t::get8iBE(size_t i) const { return get8uBE(i);} inline int16_t sbuf_t::get16iBE(size_t i) const { return get16uBE(i);} inline int32_t sbuf_t::get32iBE(size_t i) const { return get32uBE(i);} inline int64_t sbuf_t::get64iBE(size_t i) const { return get64uBE(i);} inline int8_t sbuf_t::get8i(size_t i,sbuf_t::byte_order_t bo) const { return bo==BO_LITTLE_ENDIAN ? get8u(i) : get8uBE(i); } inline int16_t sbuf_t::get16i(size_t i,sbuf_t::byte_order_t bo) const { return bo==BO_LITTLE_ENDIAN ? get16u(i) : get16uBE(i); } inline int32_t sbuf_t::get32i(size_t i,sbuf_t::byte_order_t bo) const { return bo==BO_LITTLE_ENDIAN ? get32u(i) : get32uBE(i); } inline int64_t sbuf_t::get64i(size_t i,sbuf_t::byte_order_t bo) const { return bo==BO_LITTLE_ENDIAN ? get64u(i) : get64uBE(i); } inline void sbuf_t::release() { #ifdef HAVE_MMAP if(should_unmap && buf){ munmap((void *)buf,bufsize); should_unmap = false; buf = 0; } #endif if(should_close && fd>0){ ::close(fd); should_close = false; fd=0; } if(should_free && buf){ free((void *)buf); should_free = false; buf = 0; } page_number = 0; bufsize = 0; pagesize = 0; } #endif tcpflow/src/be13_api/utf8.h0000644000175000017500000000312612263701331014353 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // Copyright 2006 Nemanja Trifunovic /* Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 #define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 #include "utf8/checked.h" #include "utf8/unchecked.h" #endif // header guard tcpflow/src/be13_api/bulk_extractor_i.h0000644000175000017500000010757612263701331017043 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /* * By design, this file can be read without reading config.h * #include "config.h" must appear as the first line of your .cpp file. */ #ifndef PACKAGE_NAME #error bulk_extractor_i.h included before config.h #endif #ifndef BULK_EXTRACTOR_I_H #define BULK_EXTRACTOR_I_H #define DEBUG_PEDANTIC 0x0001 // check values more rigorously #define DEBUG_PRINT_STEPS 0x0002 // prints as each scanner is started #define DEBUG_SCANNER 0x0004 // dump all feature writes to stderr #define DEBUG_NO_SCANNERS 0x0008 // do not run the scanners #define DEBUG_DUMP_DATA 0x0010 // dump data as it is seen #define DEBUG_INFO 0x0040 // print extra info #define DEBUG_EXIT_EARLY 1000 // just print the size of the volume and exis #define DEBUG_ALLOCATE_512MiB 1002 // Allocate 512MiB, but don't set any flags /* We need netinet/in.h or windowsx.h */ #ifdef HAVE_NETINET_IN_H # include #endif #include #ifdef WIN32 # include # include # include #endif /* If byte_order hasn't been defined, assume its intel */ #if defined(WIN32) || !defined(__BYTE_ORDER) # define __LITTLE_ENDIAN 1234 # define __BIG_ENDIAN 4321 # define __BYTE_ORDER __LITTLE_ENDIAN #endif #if (__BYTE_ORDER == __LITTLE_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) # error Invalid __BYTE_ORDER #endif /** * \addtogroup plugin_module * @{ */ /** * \file * bulk_extractor scanner plug_in architecture. * * Scanners are called with two parameters: * A reference to a scanner_params (SP) object. * A reference to a recursion_control_block (RCB) object. * * On startup, each scanner is called with a special SP and RCB. * The scanners respond by setting fields in the SP and returning. * * When executing, once again each scanner is called with the SP and RCB. * This is the only file that needs to be included for a scanner. * * \li \c phase_startup - scanners are loaded and register the names of the feature files they want. * \li \c phase_scan - each scanner is called to analyze 1 or more sbufs. * \li \c phase_shutdown - scanners are given a chance to shutdown */ #ifndef __cplusplus # error bulk_extractor_i.h requires C++ #endif #include "sbuf.h" #include "utf8.h" #include #include #include namespace be13 { struct hash_def { hash_def():name(),func(){}; std::string name; // v3: (input) function to perform hashing with std::string (*func)(const uint8_t *buf,size_t bufsize); // v3: (input) function to perform hashing with }; }; #include "feature_recorder.h" #include "feature_recorder_set.h" /* Network includes */ /**************************************************************** *** pcap.h --- If we don't have it, fake it. --- ***/ #ifdef HAVE_NETINET_IF_ETHER_H # include #endif #ifdef HAVE_NETINET_IN_H # include #endif #ifdef HAVE_NET_ETHERNET_H # include // for freebsd #endif #if defined(HAVE_LIBPCAP) # ifdef HAVE_DIAGNOSTIC_REDUNDANT_DECLS # pragma GCC diagnostic ignored "-Wredundant-decls" # endif # if defined(HAVE_PCAP_PCAP_H) # include # define GOT_PCAP # endif # if defined(HAVE_PCAP_H) && !defined(GOT_PCAP) # include # define GOT_PCAP # endif # if defined(HAVE_WPCAP_PCAP_H) && !defined(GOT_PCAP) # include # define GOT_PCAP # endif # ifdef HAVE_DIAGNOSTIC_REDUNDANT_DECLS # pragma GCC diagnostic warning "-Wredundant-decls" # endif #else # include "pcap_fake.h" #endif /** * \class scanner_params * The scanner params class is the primary way that the bulk_extractor framework * communicates with the scanners. * @param sbuf - the buffer to be scanned * @param feature_names - if fs==0, add to feature_names the feature file types that this * scanner records.. The names can have a /c appended to indicate * that the feature files should have context enabled. Do not scan. * @param fs - where the features should be saved. Must be provided if feature_names==0. **/ /***************************************************************** *** bulk_extractor has a private implementation of IPv4 and IPv6, *** UDP and TCP. *** *** We did this becuase we found slightly different versions on *** MacOS, Ubuntu Linux, Fedora Linux, Centos, Mingw, and Cygwin. *** TCP/IP isn't changing anytime soon, and when it changes (as it *** did with IPv6), these different systems all implemented it slightly *** differently, and that caused a lot of problems for us. *** So the BE13 API has a single implementation and it's good enough *** for our uses. ***/ namespace be13 { #ifndef ETH_ALEN # define ETH_ALEN 6 // ethernet address len #endif #ifndef IPPROTO_TCP # define IPPROTO_TCP 6 /* tcp */ #endif struct ether_addr { uint8_t ether_addr_octet[ETH_ALEN]; } __attribute__ ((__packed__)); /* 10Mb/s ethernet header */ struct ether_header { uint8_t ether_dhost[ETH_ALEN]; /* destination eth addr */ uint8_t ether_shost[ETH_ALEN]; /* source ether addr */ uint16_t ether_type; /* packet type ID field */ } __attribute__ ((__packed__)); /* The mess below is becuase these items are typedefs and * structs on some systems and #defines on other systems * So in the interest of portability we need to define *new* * structures that are only used here */ typedef uint32_t ip4_addr_t; // historical // on windows we use the definition that's in winsock struct ip4_addr { ip4_addr_t addr; }; /* * Structure of an internet header, naked of options. */ struct ip4 { #if __BYTE_ORDER == __LITTLE_ENDIAN uint8_t ip_hl:4; /* header length */ uint8_t ip_v:4; /* version */ #endif #if __BYTE_ORDER == __BIG_ENDIAN uint8_t ip_v:4; /* version */ uint8_t ip_hl:4; /* header length */ #endif uint8_t ip_tos; /* type of service */ uint16_t ip_len; /* total length */ uint16_t ip_id; /* identification */ uint16_t ip_off; /* fragment offset field */ #define IP_RF 0x8000 /* reserved fragment flag */ #define IP_DF 0x4000 /* dont fragment flag */ #define IP_MF 0x2000 /* more fragments flag */ #define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ uint8_t ip_ttl; /* time to live */ uint8_t ip_p; /* protocol */ uint16_t ip_sum; /* checksum */ struct ip4_addr ip_src, ip_dst; /* source and dest address */ } __attribute__ ((__packed__)); struct ip4_dgram { const struct ip4 *header; const uint8_t *payload; uint16_t payload_len; }; /* * IPv6 header structure */ struct ip6_addr { // our own private ipv6 definition union { uint8_t addr8[16]; // three ways to get the data uint16_t addr16[8]; uint32_t addr32[4]; } addr; /* 128-bit IP6 address */ }; struct ip6_hdr { union { struct ip6_hdrctl { uint32_t ip6_un1_flow; /* 20 bits of flow-ID */ uint16_t ip6_un1_plen; /* payload length */ uint8_t ip6_un1_nxt; /* next header */ uint8_t ip6_un1_hlim; /* hop limit */ } ip6_un1; uint8_t ip6_un2_vfc; /* 4 bits version, top 4 bits class */ } ip6_ctlun; struct ip6_addr ip6_src; /* source address */ struct ip6_addr ip6_dst; /* destination address */ } __attribute__((__packed__)); struct ip6_dgram { const struct ip6_hdr *header; const uint8_t *payload; uint16_t payload_len; }; /* * TCP header. * Per RFC 793, September, 1981. */ typedef uint32_t tcp_seq; struct tcphdr { uint16_t th_sport; /* source port */ uint16_t th_dport; /* destination port */ tcp_seq th_seq; /* sequence number */ tcp_seq th_ack; /* acknowledgement number */ # if __BYTE_ORDER == __LITTLE_ENDIAN uint8_t th_x2:4; /* (unused) */ uint8_t th_off:4; /* data offset */ # endif # if __BYTE_ORDER == __BIG_ENDIAN uint8_t th_off:4; /* data offset */ uint8_t th_x2:4; /* (unused) */ # endif uint8_t th_flags; # define TH_FIN 0x01 # define TH_SYN 0x02 # define TH_RST 0x04 # define TH_PUSH 0x08 # define TH_ACK 0x10 # define TH_URG 0x20 uint16_t th_win; /* window */ uint16_t th_sum; /* checksum */ uint16_t th_urp; /* urgent pointer */ }; /* * The packet_info structure records packets after they are read from the pcap library. * It preserves the original pcap information and information decoded from the MAC and * VLAN (IEEE 802.1Q) layers, as well as information that might be present from 802.11 * interfaces. However it does not preserve the full radiotap information. * * packet_info is created to make it easier to write network forensic software. It encapsulates * much of the common knowledge needed to operate on packet-based IP networks. * * @param ts - the actual packet time to use (adjusted) * @param pcap_data - Original data offset point from pcap * @param data - the actual packet data, minus the MAC layer * @param datalen - How much data is available at the datalen pointer * */ class packet_info { public: // IPv4 header offsets static const size_t ip4_proto_off = 9; static const size_t ip4_src_off = 12; static const size_t ip4_dst_off = 16; // IPv6 header offsets static const size_t ip6_nxt_hdr_off = 6; static const size_t ip6_plen_off = 4; static const size_t ip6_src_off = 8; static const size_t ip6_dst_off = 24; // TCP header offsets static const size_t tcp_sport_off = 0; static const size_t tcp_dport_off = 2; class frame_too_short : public std::logic_error { public: frame_too_short() : std::logic_error("frame too short to contain requisite network structures") {} }; enum vlan_t {NO_VLAN=-1}; /** create a packet, usually an IP packet. * @param d - start of MAC packet * @param d2 - start of IP data */ packet_info(const int dlt,const struct pcap_pkthdr *h,const u_char *d, const struct timeval &ts_,const uint8_t *d2,size_t dl2): pcap_dlt(dlt),pcap_hdr(h),pcap_data(d),ts(ts_),ip_data(d2),ip_datalen(dl2){} packet_info(const int dlt,const struct pcap_pkthdr *h,const u_char *d): pcap_dlt(dlt),pcap_hdr(h),pcap_data(d),ts(h->ts),ip_data(d),ip_datalen(h->caplen){} const int pcap_dlt; // data link type; needed by libpcap, not provided const struct pcap_pkthdr *pcap_hdr; // provided by libpcap const u_char *pcap_data; // provided by libpcap; where the MAC layer begins const struct timeval &ts; // when packet received; possibly modified before packet_info created const uint8_t *const ip_data; // pointer to where ip data begins const size_t ip_datalen; // length of ip data static u_short nshort(const u_char *buf,size_t pos); // return a network byte order short at offset pos int ip_version() const; // returns 4, 6 or 0 u_short ether_type() const; // returns 0 if not IEEE802, otherwise returns ether_type int vlan() const; // returns NO_VLAN if not IEEE802 or not VLAN, othererwise VID const uint8_t *get_ether_dhost() const; // returns a pointer to ether dhost if ether packet const uint8_t *get_ether_shost() const; // returns a pointer to ether shost if ether packet // packet typing bool is_ip4() const; bool is_ip6() const; bool is_ip4_tcp() const; bool is_ip6_tcp() const; // packet extraction // IPv4 - return pointers to fields or throws frame_too_short exception const struct in_addr *get_ip4_src() const; const struct in_addr *get_ip4_dst() const; uint8_t get_ip4_proto() const; // IPv6 uint8_t get_ip6_nxt_hdr() const; uint16_t get_ip6_plen() const; const struct ip6_addr *get_ip6_src() const; const struct ip6_addr *get_ip6_dst() const; // TCP uint16_t get_ip4_tcp_sport() const; uint16_t get_ip4_tcp_dport() const; uint16_t get_ip6_tcp_sport() const; uint16_t get_ip6_tcp_dport() const; }; #ifdef DLT_IEEE802 inline u_short packet_info::ether_type() const { if(pcap_dlt==DLT_IEEE802 || pcap_dlt==DLT_EN10MB){ const struct ether_header *eth_header = (struct ether_header *) pcap_data; return ntohs(eth_header->ether_type); } return 0; } #endif #ifndef ETHERTYPE_PUP #define ETHERTYPE_PUP 0x0200 /* Xerox PUP */ #endif #ifndef ETHERTYPE_SPRITE #define ETHERTYPE_SPRITE 0x0500 /* Sprite */ #endif #ifndef ETHERTYPE_IP #define ETHERTYPE_IP 0x0800 /* IP */ #endif #ifndef ETHERTYPE_ARP #define ETHERTYPE_ARP 0x0806 /* Address resolution */ #endif #ifndef ETHERTYPE_REVARP #define ETHERTYPE_REVARP 0x8035 /* Reverse ARP */ #endif #ifndef ETHERTYPE_AT #define ETHERTYPE_AT 0x809B /* AppleTalk protocol */ #endif #ifndef ETHERTYPE_AARP #define ETHERTYPE_AARP 0x80F3 /* AppleTalk ARP */ #endif #ifndef ETHERTYPE_VLAN #define ETHERTYPE_VLAN 0x8100 /* IEEE 802.1Q VLAN tagging */ #endif #ifndef ETHERTYPE_IPX #define ETHERTYPE_IPX 0x8137 /* IPX */ #endif #ifndef ETHERTYPE_IPV6 #define ETHERTYPE_IPV6 0x86dd /* IP protocol version 6 */ #endif #ifndef ETHERTYPE_LOOPBACK #define ETHERTYPE_LOOPBACK 0x9000 /* used to test interfaces */ #endif inline u_short packet_info::nshort(const u_char *buf,size_t pos) { return (buf[pos]<<8) | (buf[pos+1]); } inline int packet_info::vlan() const { if(ether_type()==ETHERTYPE_VLAN){ return nshort(pcap_data,sizeof(struct ether_header)); } return -1; } inline int packet_info::ip_version() const { /* This takes advantage of the fact that ip4 and ip6 put the version number in the same place */ if (ip_datalen >= sizeof(struct ip4)) { const struct ip4 *ip_header = (struct ip4 *) ip_data; switch(ip_header->ip_v){ case 4: return 4; case 6: return 6; } } return 0; } // packet typing inline bool packet_info::is_ip4() const { return ip_version() == 4; } inline bool packet_info::is_ip6() const { return ip_version() == 6; } inline bool packet_info::is_ip4_tcp() const { if(ip_datalen < sizeof(struct ip4) + sizeof(struct tcphdr)) { return false; } return *((uint8_t*) (ip_data + ip4_proto_off)) == IPPROTO_TCP; return false; } inline bool packet_info::is_ip6_tcp() const { if(ip_datalen < sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) { return false; } return *((uint8_t*) (ip_data + ip6_nxt_hdr_off)) == IPPROTO_TCP; } // packet extraction // precondition: the apropriate packet type function must return true before using these functions. // example: is_ip4_tcp() must return true before calling get_ip4_tcp_sport() // Get ether addresses; should this handle vlan and such? inline const uint8_t *packet_info::get_ether_dhost() const { if(pcap_hdr->caplen < sizeof(struct ether_addr)){ throw new frame_too_short(); } return ((const struct ether_header *)pcap_data)->ether_dhost; } inline const uint8_t *packet_info::get_ether_shost() const { if(pcap_hdr->caplen < sizeof(struct ether_addr)){ throw new frame_too_short(); } return ((const struct ether_header *)pcap_data)->ether_shost; } // IPv4 # ifdef HAVE_DIAGNOSTIC_CAST_ALIGN # pragma GCC diagnostic ignored "-Wcast-align" # endif inline const struct in_addr *packet_info::get_ip4_src() const { if(ip_datalen < sizeof(struct ip4)) { throw new frame_too_short(); } return (const struct in_addr *) ip_data + ip4_src_off; } inline const struct in_addr *packet_info::get_ip4_dst() const { if(ip_datalen < sizeof(struct ip4)) { throw new frame_too_short(); } return (const struct in_addr *) ip_data + ip4_dst_off; } # ifdef HAVE_DIAGNOSTIC_CAST_ALIGN # pragma GCC diagnostic warning "-Wcast-align" # endif inline uint8_t packet_info::get_ip4_proto() const { if(ip_datalen < sizeof(struct ip4)) { throw new frame_too_short(); } return *((uint8_t *) (ip_data + ip4_proto_off)); } // IPv6 inline uint8_t packet_info::get_ip6_nxt_hdr() const { if(ip_datalen < sizeof(struct ip6_hdr)) { throw new frame_too_short(); } return *((uint8_t *) (ip_data + ip6_nxt_hdr_off)); } inline uint16_t packet_info::get_ip6_plen() const { if(ip_datalen < sizeof(struct ip6_hdr)) { throw new frame_too_short(); } //return ntohs(*((uint16_t *) (ip_data + ip6_plen_off))); return nshort(ip_data,ip6_plen_off); } # ifdef HAVE_DIAGNOSTIC_CAST_ALIGN # pragma GCC diagnostic ignored "-Wcast-align" # endif inline const struct ip6_addr *packet_info::get_ip6_src() const { if(ip_datalen < sizeof(struct ip6_hdr)) { throw new frame_too_short(); } return (const struct ip6_addr *) ip_data + ip6_src_off; } inline const struct ip6_addr *packet_info::get_ip6_dst() const { if(ip_datalen < sizeof(struct ip6_hdr)) { throw new frame_too_short(); } return (const struct ip6_addr *) ip_data + ip6_dst_off; } # ifdef HAVE_DIAGNOSTIC_CAST_ALIGN # pragma GCC diagnostic warning "-Wcast-align" # endif // TCP inline uint16_t packet_info::get_ip4_tcp_sport() const { if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip4)) { throw new frame_too_short(); } //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip4) + tcp_sport_off))); return nshort(ip_data,sizeof(struct ip4) + tcp_sport_off); } inline uint16_t packet_info::get_ip4_tcp_dport() const { if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip4)) { throw new frame_too_short(); } //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip4) + tcp_dport_off))); return nshort(ip_data,sizeof(struct ip4) + tcp_dport_off); // } inline uint16_t packet_info::get_ip6_tcp_sport() const { if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip6_hdr)) { throw new frame_too_short(); } //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip6_hdr) + tcp_sport_off))); return nshort(ip_data,sizeof(struct ip6_hdr) + tcp_sport_off); // } inline uint16_t packet_info::get_ip6_tcp_dport() const { if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip6_hdr)) { throw new frame_too_short(); } //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip6_hdr) + tcp_dport_off))); return nshort(ip_data,sizeof(struct ip6_hdr) + tcp_dport_off); // } }; typedef void scanner_t(const class scanner_params &sp,const class recursion_control_block &rcb); typedef void process_t(const class scanner_params &sp); typedef void packet_callback_t(void *user,const be13::packet_info &pi); /** scanner_info gets filled in by the scanner to tell the caller about the scanner. * */ class scanner_info { private: static std::stringstream helpstream; // where scanner info help messages are saved. // default copy construction and assignment are meaningless // and not implemented scanner_info(const scanner_info &i); scanner_info &operator=(const scanner_info &i); public: static std::string helpstr(){return helpstream.str();} typedef std::map config_t; // configuration for scanner passed in /* scanner flags */ static const int SCANNER_DISABLED = 0x001; // v1: enabled by default static const int SCANNER_NO_USAGE = 0x002; // v1: do not show scanner in usage static const int SCANNER_NO_ALL = 0x004; // v2: do not enable with -eall static const int SCANNER_FIND_SCANNER = 0x008; // v2: this scanner uses the find_list static const int SCANNER_RECURSE = 0x010; // v3: this scanner will recurse static const int SCANNER_RECURSE_EXPAND = 0x020; // v3: recurses AND result is >= original size static const int SCANNER_WANTS_NGRAMS = 0x040; // v3: Scanner gets buffers that are constant n-grams static const int SCANNER_FAST_FIND = 0x080; // v3: This scanner is a very fast FIND scanner static const int SCANNER_DEPTH_0 = 0x100; // v3: scanner only runs at detph 0 by default static const int CURRENT_SI_VERSION = 4; static const std::string flag_to_string(const int flag){ std::string ret; if(flag==0) ret += "NONE "; if(flag & SCANNER_DISABLED) ret += "SCANNER_DISABLED "; if(flag & SCANNER_NO_USAGE) ret += "SCANNER_NO_USAGE "; if(flag & SCANNER_NO_ALL) ret += "SCANNER_NO_ALL "; if(flag & SCANNER_FIND_SCANNER) ret += "SCANNER_FIND_SCANNER "; if(flag & SCANNER_RECURSE) ret += "SCANNER_RECURSE "; if(flag & SCANNER_RECURSE_EXPAND) ret += "SCANNER_RECURSE_EXPAND "; if(flag & SCANNER_WANTS_NGRAMS) ret += "SCANNER_WANTS_NGRAMS "; return ret; } /* Global config is passed to each scanner as a pointer when it is loaded. * Scanner histograms are added to 'histograms' by machinery. */ struct scanner_config { scanner_config():namevals(),debug(),hasher() /* ,histograms() */{}; virtual ~scanner_config(){} config_t namevals; // v3: (input) name=val map int debug; // v3: (input) current debug level struct be13::hash_def hasher; // v3: (input) hasher to use }; // never change the order or delete old fields, or else you will // break backwards compatability scanner_info():si_version(CURRENT_SI_VERSION), name(),author(),description(),url(),scanner_version(),flags(0),feature_names(), histogram_defs(),packet_user(),packet_cb(),config(){} /* PASSED FROM SCANNER to API: */ int si_version; // version number for this structure std::string name; // v1: (output) scanner name std::string author; // v1: (output) who wrote me? std::string description; // v1: (output) what do I do? std::string url; // v1: (output) where I come from std::string scanner_version; // v1: (output) version for the scanner uint64_t flags; // v1: (output) flags std::set feature_names; // v1: (output) features I need histogram_defs_t histogram_defs; // v1: (output) histogram definition info void *packet_user; // v2: (output) data for network callback packet_callback_t *packet_cb; // v2: (output) callback for processing network packets, or NULL /* PASSED FROM API TO SCANNER; access with functions below */ const scanner_config *config; // v3: (intput to scanner) config // These methods are implemented in the plugin system for the scanner to get config information. // The get_config methods should be called on the si object during PHASE_STARTUP virtual void get_config(const scanner_info::config_t &c, const std::string &name,std::string *val,const std::string &help); virtual void get_config(const std::string &name,std::string *val,const std::string &help); virtual void get_config(const std::string &name,uint64_t *val,const std::string &help); virtual void get_config(const std::string &name,int32_t *val,const std::string &help); virtual void get_config(const std::string &name,uint32_t *val,const std::string &help); virtual void get_config(const std::string &name,uint16_t *val,const std::string &help); virtual void get_config(const std::string &name,uint8_t *val,const std::string &help); #ifdef __APPLE__ virtual void get_config(const std::string &name,size_t *val,const std::string &help); #define HAVE_GET_CONFIG_SIZE_T #endif virtual void get_config(const std::string &name,bool *val,const std::string &help); virtual ~scanner_info(){}; }; #include /** * The scanner_params class is a way for sending the scanner parameters * for this particular sbuf to be scanned. */ class scanner_params { public: enum print_mode_t {MODE_NONE=0,MODE_HEX,MODE_RAW,MODE_HTTP}; static const int CURRENT_SP_VERSION=3; typedef std::map PrintOptions; static print_mode_t getPrintMode(const PrintOptions &po){ PrintOptions::const_iterator p = po.find("print_mode_t"); if(p != po.end()){ if(p->second=="MODE_NONE") return MODE_NONE; if(p->second=="MODE_HEX") return MODE_HEX; if(p->second=="MODE_RAW") return MODE_RAW; if(p->second=="MODE_HTTP") return MODE_HTTP; } return MODE_NONE; } static void setPrintMode(PrintOptions &po,int mode){ switch(mode){ default: case MODE_NONE:po["print_mode_t"]="MODE_NONE";return; case MODE_HEX:po["print_mode_t"]="MODE_HEX";return; case MODE_RAW:po["print_mode_t"]="MODE_RAW";return; case MODE_HTTP:po["print_mode_t"]="MODE_HTTP";return; } } // phase_t specifies when the scanner is being called typedef enum { PHASE_NONE = -1, PHASE_STARTUP = 0, // called in main thread when scanner loads; called on EVERY scanner (called for help) PHASE_INIT = 3, // called in main thread for every ENABLED scanner after all scanners loaded PHASE_THREAD_BEFORE_SCAN = 4, // called in worker thread for every ENABLED scanner before first scan PHASE_SCAN = 1, // called in worker thread for every ENABLED scanner to scan an sbuf PHASE_SHUTDOWN = 2, // called in main thread for every ENABLED scanner when scanner is shutdown } phase_t ; static PrintOptions no_options; // in common.cpp /******************** *** CONSTRUCTORS *** ********************/ /* A scanner params with all of the instance variables, typically for scanning */ scanner_params(phase_t phase_,const sbuf_t &sbuf_,class feature_recorder_set &fs_, PrintOptions &print_options_): sp_version(CURRENT_SP_VERSION), phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(print_options_),info(0),sxml(0){ } /* A scanner params with no print options */ scanner_params(phase_t phase_,const sbuf_t &sbuf_, class feature_recorder_set &fs_): sp_version(CURRENT_SP_VERSION), phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(no_options),info(0),sxml(0){ } /* A scanner params with no print options but an xmlstream */ scanner_params(phase_t phase_,const sbuf_t &sbuf_,class feature_recorder_set &fs_,std::stringstream *xmladd): sp_version(CURRENT_SP_VERSION), phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(no_options),info(0),sxml(xmladd){ } /** Construct a scanner_params for recursion from an existing sp and a new sbuf. * Defaults to phase1 */ scanner_params(const scanner_params &sp_existing,const sbuf_t &sbuf_new): sp_version(CURRENT_SP_VERSION),phase(sp_existing.phase), sbuf(sbuf_new),fs(sp_existing.fs),depth(sp_existing.depth+1), print_options(sp_existing.print_options),info(sp_existing.info),sxml(0){ assert(sp_existing.sp_version==CURRENT_SP_VERSION); }; /** * A scanner params with an empty info */ /************************** *** INSTANCE VARIABLES *** **************************/ const int sp_version; /* version number of this structure */ const phase_t phase; /* v1: 0=startup, 1=normal, 2=shutdown (changed to phase_t in v1.3) */ const sbuf_t &sbuf; /* v1: what to scan / only valid in SCAN_PHASE */ class feature_recorder_set &fs; /* v1: where to put the results / only valid in SCAN_PHASE */ const uint32_t depth; /* v1: how far down are we? / only valid in SCAN_PHASE */ PrintOptions &print_options; /* v1: how to print / NOT USED IN SCANNERS */ scanner_info *info; /* v2: set/get parameters on startup */ std::stringstream *sxml; /* v3: on scanning and shutdown: CDATA added to XML stream (advanced feature) */ }; inline std::ostream & operator <<(std::ostream &os,const class scanner_params &sp){ os << "scanner_params(" << sp.sbuf << ")"; return os; }; class recursion_control_block { public: /** * @param callback_ - the function to call back * @param partName_ - the part of the forensic path processed by this scanner. */ recursion_control_block(process_t *callback_,std::string partName_): callback(callback_),partName(partName_){} process_t *callback; std::string partName; /* eg "ZIP", "GZIP" */ }; /* plugin.cpp. This will become a class... */ class scanner_def { public:; static uint32_t max_depth; // maximum depth to scan for the scanners static uint32_t max_ngram; // maximum ngram size to change scanner_def():scanner(0),enabled(false),info(),pathPrefix(){}; scanner_t *scanner; // pointer to the primary entry point bool enabled; // is enabled? scanner_info info; // info block sent to and returned by scanner std::string pathPrefix; /* path prefix for recursive scanners */ }; namespace be13 { /* plugin.cpp */ struct plugin { typedef std::vector scanner_vector; static scanner_vector current_scanners; // current scanners static bool dup_data_alerts; // notify when duplicate data is not processed static uint64_t dup_data_encountered; // amount of dup data encountered static void set_scanner_debug(int debug); static void load_scanner(scanner_t scanner,const scanner_info::scanner_config &sc); // load a specific scanner static void load_scanner_file(std::string fn,const scanner_info::scanner_config &sc); // load a scanner from a file static void load_scanners(scanner_t * const *scanners_builtin,const scanner_info::scanner_config &sc); // load the scan_ plugins static void load_scanner_directory(const std::string &dirname,const scanner_info::scanner_config &sc); // load scanners in the directory static void load_scanner_directories(const std::vector &dirnames,const scanner_info::scanner_config &sc); static void load_scanner_packet_handlers(); // send every enabled scanner the phase message static void message_enabled_scanners(scanner_params::phase_t phase,feature_recorder_set &fs); // returns the named scanner, or 0 if no scanner of that name static scanner_t *find_scanner(const std::string &name); static void get_enabled_scanners(std::vector &svector); // put the enabled scanners into the vector static void add_enabled_scanner_histograms_to_feature_recorder_set(feature_recorder_set &fs); static bool find_scanner_enabled(); // return true if a find scanner is enabled // print info about the scanners: static void scanners_disable_all(); // saves a command to disable all static void scanners_enable_all(); // enable all of them static void set_scanner_enabled(const std::string &name,bool enable); static void set_scanner_enabled_all(bool enable); static void scanners_enable(const std::string &name); // saves a command to enable this scanner static void scanners_disable(const std::string &name); // saves a command to disable this scanner static void scanners_process_enable_disable_commands(); // process the enable/disable and config commands static void scanners_init(feature_recorder_set &fs); // init the scanners static void info_scanners(bool detailed_info, bool detailed_settings, scanner_t * const *scanners_builtin,const char enable_opt,const char disable_opt); /* Run the phases on the scanners */ static void phase_shutdown(feature_recorder_set &fs,std::stringstream *sxml=0); // sxml is where to put XML from scanners that shutdown static uint32_t get_max_depth_seen(); static void process_sbuf(const class scanner_params &sp); /* process for feature extraction */ static void process_packet(const be13::packet_info &pi); /* recorders */ static void get_scanner_feature_file_names(feature_file_names_t &feature_file_names); }; }; inline std::string itos(int i){ std::stringstream ss; ss << i;return ss.str();} inline std::string dtos(double d){ std::stringstream ss; ss << d;return ss.str();} inline std::string utos(unsigned int i){ std::stringstream ss; ss << i;return ss.str();} inline std::string utos(uint64_t i){ std::stringstream ss; ss << i;return ss.str();} inline std::string utos(uint16_t i){ std::stringstream ss; ss << i;return ss.str();} inline std::string safe_utf16to8(std::wstring s){ // needs to be cleaned up std::string utf8_line; try { utf8::utf16to8(s.begin(),s.end(),back_inserter(utf8_line)); } catch(utf8::invalid_utf16){ /* Exception thrown: bad UTF16 encoding */ utf8_line = ""; } return utf8_line; } // truncate string at the matching char inline void truncate_at(std::string &line, char ch) { size_t pos = line.find(ch); if(pos != std::string::npos) line.resize(pos); } #ifndef HAVE_ISXDIGIT inline int isxdigit(int c) { return (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F'); } #endif /* Useful functions for scanners */ #define ONE_HUNDRED_NANO_SEC_TO_SECONDS 10000000 #define SECONDS_BETWEEN_WIN32_EPOCH_AND_UNIX_EPOCH 11644473600LL /* * 11644473600 is the number of seconds between the Win32 epoch * and the Unix epoch. * * http://arstechnica.com/civis/viewtopic.php?f=20&t=111992 * gmtime_r() is Linux-specific. You'll find a copy in util.cpp for Windows. */ #ifndef HAVE_GMTIME_R void gmtime_r(time_t *t,struct tm *tm); #endif inline std::string microsoftDateToISODate(const uint64_t &time) { time_t tmp = (time / ONE_HUNDRED_NANO_SEC_TO_SECONDS) - SECONDS_BETWEEN_WIN32_EPOCH_AND_UNIX_EPOCH; struct tm time_tm; gmtime_r(&tmp, &time_tm); char buf[256]; strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &time_tm); // Zulu time return std::string(buf); } /* Many internal windows and Linux structures require a valid printable name in ASCII */ inline bool validASCIIName(const std::string &name) { for(size_t i = 0; i< name.size(); i++){ if(((u_char)name[i]) & 0x80) return false; // high bit should not be set if(((u_char)name[i]) < ' ') return false; // should not be control character } return true; } #endif tcpflow/src/be13_api/README.md0000644000175000017500000000426212263701331014575 0ustar dimadimabe13_api ======== API for bulk_extractor version 1.3 Set remote origin: http://stackoverflow.com/questions/7259535/git-setting-up-a-remote-origin git remote add origin git@github.com:simsong/be13_api.git http://stackoverflow.com/questions/5828324/update-git-submodule Update to this repository to master: git pull origin master Push changes in this repository to master: git push origin master If you get this error: error: failed to push some refs to 'git@github.com:simsong/be13_api.git' hint: Updates were rejected because a pushed branch tip is behind its remote hint: counterpart. If you did not intend to push that branch, you may want to hint: specify branches to push or set the 'push.default' configuration hint: variable to 'current' or 'upstream' to push only the current branch. $ Do this: $ git checkout -b tmp ; git checkout master ; git merge tmp ; git branch -d tmp ; git push git@github.com:simsong/be13_api.git master Extended: $ git checkout -b tmp Switched to a new branch 'tmp' $ git checkout master Switched to branch 'master' Your branch is behind 'origin/master' by 8 commits, and can be fast-forwarded. $ git merge tmp Updating 0dbc904..74aca46 Fast-forward CODING_STANDARDS.txt | 4 ++++ README.md | 12 +++++++++++- bulk_extractor_i.h | 14 ++++++++++++-- pcap_fake.cpp | 13 ++++++++++++- pcap_fake.h | 5 +++++ sbuf.cpp | 36 +++++++++++++++++++++--------------- sbuf.h | 41 ++++++++++++++++++++++++----------------- 7 files changed, 89 insertions(+), 36 deletions(-) $ git branch -d tmp Deleted branch tmp (was 74aca46). $ git push git@github.com:simsong/be13_api.git master Counting objects: 7, done. Delta compression using up to 4 threads. Compressing objects: 100% (4/4), done. Writing objects: 100% (4/4), 562 bytes, done. Total 4 (delta 2), reused 0 (delta 0) To git@github.com:simsong/be13_api.git 2d14a08..74aca46 master -> master $ Summary: $ git checkout -b newbranch $ git checkout master $ git merge newbranch $ git branch -d newbranch or: $ git checkout -b tmp ; git checkout master ; git merge tmp ; git branch -d tmp ; git push git@github.com:simsong/be13_api.git master tcpflow/src/be13_api/histogram.cpp0000644000175000017500000001477312263701331016027 0ustar dimadima/** * histogram.cpp: * Maintain a histogram for Unicode strings provided as UTF-8 and UTF-16 encodings. * Track number of each coding provided. * */ #include "config.h" #include "bulk_extractor_i.h" #include "unicode_escape.h" #include "histogram.h" #include "utf8.h" using namespace std; ostream & operator << (ostream &os, const HistogramMaker::FrequencyReportVector &rep){ for(HistogramMaker::FrequencyReportVector::const_iterator i = rep.begin(); i!=rep.end();i++){ os << "n=" << i->tally.count << "\t" << validateOrEscapeUTF8(i->value, true, true); if(i->tally.count16>0) os << "\t(utf16=" << i->tally.count16<<")"; os << "\n"; } return os; } HistogramMaker::FrequencyReportVector *HistogramMaker::makeReport() const { FrequencyReportVector *rep = new FrequencyReportVector(); for(HistogramMap::const_iterator it = h.begin(); it != h.end(); it++){ rep->push_back(ReportElement(it->first,it->second)); } sort(rep->begin(),rep->end(),ReportElement::compare); return rep; } HistogramMaker::FrequencyReportVector *HistogramMaker::makeReport(int topN) const { HistogramMaker::FrequencyReportVector *r2 = makeReport(); // gets a new report HistogramMaker::FrequencyReportVector::iterator i = r2->begin(); while(topN>0 && i!=r2->end()){ // iterate through the first set i++; topN--; } r2->erase(i,r2->end()); return r2; } bool HistogramMaker::looks_like_utf16(const std::string &str,bool &little_endian) { if((uint8_t)str[0]==0xff && (uint8_t)str[1]==0xfe){ little_endian = true; return true; // begins with FFFE } if((uint8_t)str[0]==0xfe && (uint8_t)str[1]==0xff){ little_endian = false; return true; // begins with FFFE } /* If none of the even characters are NULL and some of the odd characters are NULL, it's UTF-16 */ uint32_t even_null_count = 0; uint32_t odd_null_count = 0; for(size_t i=0;i+11){ little_endian = true; return true; } if(odd_null_count==0 && even_null_count>1){ little_endian = false; return true; } return false; } /** * Takes a string (the key) and adds it to the histogram. * automatically determines if the key is UTF-16 and converts * it to UTF8 if so. */ uint32_t HistogramMaker::debug_histogram_malloc_fail_frequency = 0; void HistogramMaker::add(const std::string &key) { if(key.size()==0) return; // don't deal with zero-length keys /** * "key" passed in is a const reference. * But we might want to change it. So keyToAdd points to what will be added. * If we need to change key, we allocate more memory, and make keyToAdd * point to the memory that was allocated. This way we only make a copy * if we need to make a copy. */ const std::string *keyToAdd = &key; // should be a reference, but that doesn't work std::string *tempKey = 0; // place to hold UTF8 key bool found_utf16 = false; bool little_endian=false; if(looks_like_utf16(*keyToAdd,little_endian)){ /* re-image this string as UTF16*/ found_utf16 = true; std::wstring utf16; for(size_t i=0;isize()>0) { size_t nullpos = tempKey->find('\000'); if(nullpos==string::npos) break; tempKey->erase(nullpos,1); } keyToAdd = tempKey; } catch(utf8::invalid_utf16){ /* Exception; bad UTF16 encoding */ delete tempKey; tempKey = 0; // give up on temp key; otherwise its invalidated below } } /* If any conversion is necessary AND we have not converted key from UTF-16 to UTF-8, * then the original key is still in 'key'. Allocate tempKey and copy key to tempKey. */ if(flags & (FLAG_LOWERCASE |FLAG_NUMERIC)){ if(tempKey==0){ tempKey = new std::string(key); keyToAdd = tempKey; } } /* Apply the flags */ // See: http://stackoverflow.com/questions/1081456/wchar-t-vs-wint-t if(flags & FLAG_LOWERCASE){ /* keyToAdd is UTF-8; convert to UTF-16, downcase, and convert back to UTF-8 */ try{ std::wstring utf16key; utf8::utf8to16(tempKey->begin(),tempKey->end(),std::back_inserter(utf16key)); for(std::wstring::iterator it = utf16key.begin();it!=utf16key.end();it++){ *it = towlower(*it); } /* erase tempKey and copy the utf16 back into tempKey as utf8 */ tempKey->clear(); // erase the characters utf8::utf16to8(utf16key.begin(),utf16key.end(),std::back_inserter(*tempKey)); } catch(utf8::exception){ /* Exception thrown during utf8 or 16 conversions. * So the string we thought was valid utf8 wasn't valid utf8 afterall. * tempKey will remain unchanged. */ } } if(flags & FLAG_NUMERIC){ /* keyToAdd is UTF-8; convert to UTF-16, extract digits, and convert back to UTF-8 */ std::string originalTempKey(*tempKey); try{ std::wstring utf16key; std::wstring utf16digits; utf8::utf8to16(tempKey->begin(),tempKey->end(),std::back_inserter(utf16key)); for(std::wstring::iterator it = utf16key.begin();it!=utf16key.end();it++){ if(iswdigit(*it) || *it==static_cast('+')){ utf16digits.push_back(*it); } } /* convert it back */ tempKey->clear(); // erase the characters utf8::utf16to8(utf16digits.begin(),utf16digits.end(),std::back_inserter(*tempKey)); } catch(utf8::exception){ /* Exception during utf8 or 16 conversions*. * So the string wasn't utf8. Fall back to just extracting the digits */ tempKey->clear(); for(std::string::iterator it = originalTempKey.begin(); it!=originalTempKey.end(); it++){ if(isdigit(*it)){ tempKey->push_back(*it); } } } } /* For debugging low-memory handling logic, * specify DEBUG_MALLOC_FAIL to make malloc occasionally fail */ if(debug_histogram_malloc_fail_frequency){ if((h.size() % debug_histogram_malloc_fail_frequency)==(debug_histogram_malloc_fail_frequency-1)){ throw bad_alloc(); } } h[*keyToAdd].count++; if(found_utf16) h[*keyToAdd].count16++; // track how many UTF16s were converted if(tempKey){ // if we allocated tempKey, free it delete tempKey; } } tcpflow/src/be13_api/utils.h0000644000175000017500000000407712263701331014633 0ustar dimadima/**************************************************************** *** utils.h *** *** To use utils.c/utils.h, be sure this is in your configure.ac file: m4_include([be13_api/be13_configure.m4]) *** ****************************************************************/ #ifndef UTILS_H #define UTILS_H #include #include #include #if defined(__cplusplus) #include #include bool ends_with(const std::string &buf,const std::string &with); bool ends_with(const std::wstring &buf,const std::wstring &with); std::vector &split(const std::string &s, char delim, std::vector &elems); std::vector split(const std::string &s, char delim); #endif #ifndef __BEGIN_DECLS #if defined(__cplusplus) #define __BEGIN_DECLS extern "C" { #define __END_DECLS } #else #define __BEGIN_DECLS #define __END_DECLS #endif #endif __BEGIN_DECLS #ifdef HAVE_ERR_H #include #else void err(int eval,const char *fmt,...) __attribute__((format(printf, 2, 0))) __attribute__ ((__noreturn__)); void errx(int eval,const char *fmt,...) __attribute__((format(printf, 2, 0))) __attribute__ ((__noreturn__)); void warn(const char *fmt, ...) __attribute__((format(printf, 1, 0))); void warnx(const char *fmt,...) __attribute__((format(printf, 1, 0))); #endif #ifndef HAVE_LOCALTIME_R void localtime_r(time_t *t,struct tm *tm); #endif // gmtime.h definition moved to bulk_extractor_i.h //#ifndef HAVE_GMTIME_R //void gmtime_r(time_t *t,struct tm *tm); //#endif int64_t get_filesize(int fd); #ifndef HAVE_ISHEXNUMBER int ishexnumber(int c); inline int ishexnumber(int c) { switch(c){ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': return 1; } return 0; } #endif __END_DECLS #endif tcpflow/src/be13_api/utf8/0000755000175000017500000000000012263701331014200 5ustar dimadimatcpflow/src/be13_api/utf8/core.h0000644000175000017500000002677112263701331015316 0ustar dimadima// Copyright 2006 Nemanja Trifunovic /* Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 #define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 #include namespace utf8 { // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers // You may need to change them to match your system. // These typedefs have the same names as ones from cstdint, or boost/cstdint typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; // Helper code - not intended to be directly called by the library users. May be changed at any time namespace internal { // Unicode constants // Leading (high) surrogates: 0xd800 - 0xdbff // Trailing (low) surrogates: 0xdc00 - 0xdfff const uint16_t LEAD_SURROGATE_MIN = 0xd800u; const uint16_t LEAD_SURROGATE_MAX = 0xdbffu; const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u; const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu; const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10); const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN; // Maximum valid value for a Unicode code point const uint32_t CODE_POINT_MAX = 0x0010ffffu; template inline uint8_t mask8(octet_type oc) { return static_cast(0xff & oc); } template inline uint16_t mask16(u16_type oc) { return static_cast(0xffff & oc); } template inline bool is_trail(octet_type oc) { return ((mask8(oc) >> 6) == 0x2); } template inline bool is_lead_surrogate(u16 cp) { return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX); } template inline bool is_trail_surrogate(u16 cp) { return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); } template inline bool is_surrogate(u16 cp) { return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); } template inline bool is_code_point_valid(u32 cp) { return (cp <= CODE_POINT_MAX && !is_surrogate(cp)); } template inline typename std::iterator_traits::difference_type sequence_length(octet_iterator lead_it) { uint8_t lead = mask8(*lead_it); if (lead < 0x80) return 1; else if ((lead >> 5) == 0x6) return 2; else if ((lead >> 4) == 0xe) return 3; else if ((lead >> 3) == 0x1e) return 4; else return 0; } template inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length) { if (cp < 0x80) { if (length != 1) return true; } else if (cp < 0x800) { if (length != 2) return true; } else if (cp < 0x10000) { if (length != 3) return true; } return false; } enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT}; /// get_sequence_x functions decode utf-8 sequences of the length x template utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t* code_point) { if (it != end) { if (code_point) *code_point = mask8(*it); return UTF8_OK; } return NOT_ENOUGH_ROOM; } template utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t* code_point) { utf_error ret_code = NOT_ENOUGH_ROOM; if (it != end) { uint32_t cp = mask8(*it); if (++it != end) { if (is_trail(*it)) { cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); if (code_point) *code_point = cp; ret_code = UTF8_OK; } else ret_code = INCOMPLETE_SEQUENCE; } else ret_code = NOT_ENOUGH_ROOM; } return ret_code; } template utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t* code_point) { utf_error ret_code = NOT_ENOUGH_ROOM; if (it != end) { uint32_t cp = mask8(*it); if (++it != end) { if (is_trail(*it)) { cp = ((cp << 12) & 0xffff) + ((mask8(*it) << 6) & 0xfff); if (++it != end) { if (is_trail(*it)) { cp += (*it) & 0x3f; if (code_point) *code_point = cp; ret_code = UTF8_OK; } else ret_code = INCOMPLETE_SEQUENCE; } else ret_code = NOT_ENOUGH_ROOM; } else ret_code = INCOMPLETE_SEQUENCE; } else ret_code = NOT_ENOUGH_ROOM; } return ret_code; } template utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t* code_point) { utf_error ret_code = NOT_ENOUGH_ROOM; if (it != end) { uint32_t cp = mask8(*it); if (++it != end) { if (is_trail(*it)) { cp = ((cp << 18) & 0x1fffff) + ((mask8(*it) << 12) & 0x3ffff); if (++it != end) { if (is_trail(*it)) { cp += (mask8(*it) << 6) & 0xfff; if (++it != end) { if (is_trail(*it)) { cp += (*it) & 0x3f; if (code_point) *code_point = cp; ret_code = UTF8_OK; } else ret_code = INCOMPLETE_SEQUENCE; } else ret_code = NOT_ENOUGH_ROOM; } else ret_code = INCOMPLETE_SEQUENCE; } else ret_code = NOT_ENOUGH_ROOM; } else ret_code = INCOMPLETE_SEQUENCE; } else ret_code = NOT_ENOUGH_ROOM; } return ret_code; } template utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t* code_point) { // Save the original value of it so we can go back in case of failure // Of course, it does not make much sense with i.e. stream iterators octet_iterator original_it = it; uint32_t cp = 0; // Determine the sequence length based on the lead octet typedef typename std::iterator_traits::difference_type octet_difference_type; octet_difference_type length = sequence_length(it); if (length == 0) return INVALID_LEAD; // Now that we have a valid sequence length, get trail octets and calculate the code point utf_error err = UTF8_OK; switch (length) { case 1: err = get_sequence_1(it, end, &cp); break; case 2: err = get_sequence_2(it, end, &cp); break; case 3: err = get_sequence_3(it, end, &cp); break; case 4: err = get_sequence_4(it, end, &cp); break; } if (err == UTF8_OK) { // Decoding succeeded. Now, security checks... if (is_code_point_valid(cp)) { if (!is_overlong_sequence(cp, length)){ // Passed! Return here. if (code_point) *code_point = cp; ++it; return UTF8_OK; } else err = OVERLONG_SEQUENCE; } else err = INVALID_CODE_POINT; } // Failure branch - restore the original value of the iterator it = original_it; return err; } template inline utf_error validate_next(octet_iterator& it, octet_iterator end) { return validate_next(it, end, 0); } } // namespace internal /// The library API - functions intended to be called by the users // Byte order mark const uint8_t bom[] = {0xef, 0xbb, 0xbf}; template octet_iterator find_invalid(octet_iterator start, octet_iterator end) { octet_iterator result = start; while (result != end) { internal::utf_error err_code = internal::validate_next(result, end); if (err_code != internal::UTF8_OK) return result; } return result; } template inline bool is_valid(octet_iterator start, octet_iterator end) { return (find_invalid(start, end) == end); } template inline bool starts_with_bom (octet_iterator it, octet_iterator end) { return ( ((it != end) && (internal::mask8(*it++)) == bom[0]) && ((it != end) && (internal::mask8(*it++)) == bom[1]) && ((it != end) && (internal::mask8(*it)) == bom[2]) ); } //Deprecated in release 2.3 template inline bool is_bom (octet_iterator it) { return ( (internal::mask8(*it++)) == bom[0] && (internal::mask8(*it++)) == bom[1] && (internal::mask8(*it)) == bom[2] ); } } // namespace utf8 #endif // header guard tcpflow/src/be13_api/utf8/unchecked.h0000644000175000017500000002070012263701331016301 0ustar dimadima// Copyright 2006 Nemanja Trifunovic /* Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 #define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 #include "core.h" namespace utf8 { namespace unchecked { template octet_iterator append(uint32_t cp, octet_iterator result) { if (cp < 0x80) // one octet *(result++) = static_cast(cp); else if (cp < 0x800) { // two octets *(result++) = static_cast((cp >> 6) | 0xc0); *(result++) = static_cast((cp & 0x3f) | 0x80); } else if (cp < 0x10000) { // three octets *(result++) = static_cast((cp >> 12) | 0xe0); *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); *(result++) = static_cast((cp & 0x3f) | 0x80); } else { // four octets *(result++) = static_cast((cp >> 18) | 0xf0); *(result++) = static_cast(((cp >> 12) & 0x3f)| 0x80); *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); *(result++) = static_cast((cp & 0x3f) | 0x80); } return result; } template uint32_t next(octet_iterator& it) { uint32_t cp = internal::mask8(*it); typename std::iterator_traits::difference_type length = utf8::internal::sequence_length(it); switch (length) { case 1: break; case 2: it++; cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); break; case 3: ++it; cp = ((cp << 12) & 0xffff) + ((internal::mask8(*it) << 6) & 0xfff); ++it; cp += (*it) & 0x3f; break; case 4: ++it; cp = ((cp << 18) & 0x1fffff) + ((internal::mask8(*it) << 12) & 0x3ffff); ++it; cp += (internal::mask8(*it) << 6) & 0xfff; ++it; cp += (*it) & 0x3f; break; } ++it; return cp; } template uint32_t peek_next(octet_iterator it) { return next(it); } template uint32_t prior(octet_iterator& it) { while (internal::is_trail(*(--it))) ; octet_iterator temp = it; return next(temp); } // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous) template inline uint32_t previous(octet_iterator& it) { return prior(it); } template void advance (octet_iterator& it, distance_type n) { for (distance_type i = 0; i < n; ++i) next(it); } template typename std::iterator_traits::difference_type distance (octet_iterator first, octet_iterator last) { typename std::iterator_traits::difference_type dist; for (dist = 0; first < last; ++dist) next(first); return dist; } template octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) { while (start != end) { uint32_t cp = internal::mask16(*start++); // Take care of surrogate pairs first if (internal::is_lead_surrogate(cp)) { uint32_t trail_surrogate = internal::mask16(*start++); cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; } result = append(cp, result); } return result; } template u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) { while (start < end) { uint32_t cp = next(start); if (cp > 0xffff) { //make a surrogate pair *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); } else *result++ = static_cast(cp); } return result; } template octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) { while (start != end) result = append(*(start++), result); return result; } template u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) { while (start < end) (*result++) = next(start); return result; } // The iterator class template class iterator : public std::iterator { octet_iterator it; public: iterator () {}; explicit iterator (const octet_iterator& octet_it): it(octet_it) {} // the default "big three" are OK octet_iterator base () const { return it; } uint32_t operator * () const { octet_iterator temp = it; return next(temp); } bool operator == (const iterator& rhs) const { return (it == rhs.it); } bool operator != (const iterator& rhs) const { return !(operator == (rhs)); } iterator& operator ++ () { std::advance(it, internal::sequence_length(it)); return *this; } iterator operator ++ (int) { iterator temp = *this; std::advance(it, internal::sequence_length(it)); return temp; } iterator& operator -- () { prior(it); return *this; } iterator operator -- (int) { iterator temp = *this; prior(it); return temp; } }; // class iterator } // namespace utf8::unchecked } // namespace utf8 #endif // header guard tcpflow/src/be13_api/utf8/checked.h0000644000175000017500000002730112263701331015742 0ustar dimadima// Copyright 2006 Nemanja Trifunovic /* Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 #define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 #include "core.h" #include namespace utf8 { // Base for the exceptions that may be thrown from the library class exception : public std::exception { }; // Exceptions that may be thrown from the library functions. class invalid_code_point : public exception { uint32_t cp; public: invalid_code_point(uint32_t cp_) : cp(cp_) {} virtual const char* what() const throw() { return "Invalid code point"; } uint32_t code_point() const {return cp;} }; class invalid_utf8 : public exception { uint8_t u8; public: invalid_utf8 (uint8_t u) : u8(u) {} virtual const char* what() const throw() { return "Invalid UTF-8"; } uint8_t utf8_octet() const {return u8;} }; class invalid_utf16 : public exception { uint16_t u16; public: invalid_utf16 (uint16_t u) : u16(u) {} virtual const char* what() const throw() { return "Invalid UTF-16"; } uint16_t utf16_word() const {return u16;} }; class not_enough_room : public exception { public: virtual const char* what() const throw() { return "Not enough space"; } }; /// The library API - functions intended to be called by the users template output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement) { while (start != end) { octet_iterator sequence_start = start; internal::utf_error err_code = internal::validate_next(start, end); switch (err_code) { case internal::UTF8_OK : for (octet_iterator it = sequence_start; it != start; ++it) *out++ = *it; break; case internal::NOT_ENOUGH_ROOM: throw not_enough_room(); case internal::INVALID_LEAD: append (replacement, out); ++start; break; case internal::INCOMPLETE_SEQUENCE: case internal::OVERLONG_SEQUENCE: case internal::INVALID_CODE_POINT: append (replacement, out); ++start; // just one replacement mark for the sequence while (internal::is_trail(*start) && start != end) ++start; break; } } return out; } template inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out) { static const uint32_t replacement_marker = internal::mask16(0xfffd); return replace_invalid(start, end, out, replacement_marker); } template octet_iterator append(uint32_t cp, octet_iterator result) { if (!internal::is_code_point_valid(cp)) throw invalid_code_point(cp); if (cp < 0x80) // one octet *(result++) = static_cast(cp); else if (cp < 0x800) { // two octets *(result++) = static_cast((cp >> 6) | 0xc0); *(result++) = static_cast((cp & 0x3f) | 0x80); } else if (cp < 0x10000) { // three octets *(result++) = static_cast((cp >> 12) | 0xe0); *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); *(result++) = static_cast((cp & 0x3f) | 0x80); } else { // four octets *(result++) = static_cast((cp >> 18) | 0xf0); *(result++) = static_cast(((cp >> 12) & 0x3f) | 0x80); *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); *(result++) = static_cast((cp & 0x3f) | 0x80); } return result; } template uint32_t next(octet_iterator& it, octet_iterator end) { uint32_t cp = 0; internal::utf_error err_code = internal::validate_next(it, end, &cp); switch (err_code) { case internal::UTF8_OK : break; case internal::NOT_ENOUGH_ROOM : throw not_enough_room(); case internal::INVALID_LEAD : case internal::INCOMPLETE_SEQUENCE : case internal::OVERLONG_SEQUENCE : throw invalid_utf8(*it); case internal::INVALID_CODE_POINT : throw invalid_code_point(cp); } return cp; } template uint32_t peek_next(octet_iterator it, octet_iterator end) { return next(it, end); } template uint32_t prior(octet_iterator& it, octet_iterator start) { // can't do much if it == start if (it == start) throw not_enough_room(); octet_iterator end = it; // Go back until we hit either a lead octet or start while (internal::is_trail(*(--it))) if (it == start) throw invalid_utf8(*it); // error - no lead byte in the sequence return peek_next(it, end); } /// Deprecated in versions that include "prior" template uint32_t previous(octet_iterator& it, octet_iterator pass_start) { octet_iterator end = it; while (internal::is_trail(*(--it))) if (it == pass_start) throw invalid_utf8(*it); // error - no lead byte in the sequence octet_iterator temp = it; return next(temp, end); } template void advance (octet_iterator& it, distance_type n, octet_iterator end) { for (distance_type i = 0; i < n; ++i) next(it, end); } template typename std::iterator_traits::difference_type distance (octet_iterator first, octet_iterator last) { typename std::iterator_traits::difference_type dist; for (dist = 0; first < last; ++dist) next(first, last); return dist; } template octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) { while (start != end) { uint32_t cp = internal::mask16(*start++); // Take care of surrogate pairs first if (internal::is_lead_surrogate(cp)) { if (start != end) { uint32_t trail_surrogate = internal::mask16(*start++); if (internal::is_trail_surrogate(trail_surrogate)) cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; else throw invalid_utf16(static_cast(trail_surrogate)); } else throw invalid_utf16(static_cast(cp)); } // Lone trail surrogate else if (internal::is_trail_surrogate(cp)) throw invalid_utf16(static_cast(cp)); result = append(cp, result); } return result; } template u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) { while (start != end) { uint32_t cp = next(start, end); if (cp > 0xffff) { //make a surrogate pair *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); } else *result++ = static_cast(cp); } return result; } template octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) { while (start != end) result = append(*(start++), result); return result; } template u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) { while (start != end) (*result++) = next(start, end); return result; } // The iterator class template class iterator : public std::iterator { octet_iterator it; octet_iterator range_start; octet_iterator range_end; public: iterator () {}; explicit iterator (const octet_iterator& octet_it, const octet_iterator& range_start_, const octet_iterator& range_end_) : it(octet_it), range_start(range_start_), range_end(range_end_) { if (it < range_start || it > range_end) throw std::out_of_range("Invalid utf-8 iterator position"); } // the default "big three" are OK octet_iterator base () const { return it; } uint32_t operator * () const { octet_iterator temp = it; return next(temp, range_end); } bool operator == (const iterator& rhs) const { if (range_start != rhs.range_start || range_end != rhs.range_end) throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); return (it == rhs.it); } bool operator != (const iterator& rhs) const { return !(operator == (rhs)); } iterator& operator ++ () { next(it, range_end); return *this; } iterator operator ++ (int) { iterator temp = *this; next(it, range_end); return temp; } iterator& operator -- () { prior(it, range_start); return *this; } iterator operator -- (int) { iterator temp = *this; prior(it, range_start); return temp; } }; // class iterator } // namespace utf8 #endif //header guard tcpflow/src/be13_api/sbuf.cpp0000644000175000017500000002566612263701331014774 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ #include "config.h" #include #include #include #include "bulk_extractor_i.h" #include "unicode_escape.h" /**************************************************************** *** SBUF_T ****************************************************************/ #ifndef O_BINARY #define O_BINARY 0 #endif /** * Map a file; falls back to read if mmap is not available */ const std::string sbuf_t::U10001C("\xf4\x80\x80\x9c"); std::string sbuf_t::map_file_delimiter(sbuf_t::U10001C); sbuf_t *sbuf_t::map_file(const std::string &fname) { int fd = open(fname.c_str(),O_RDONLY|O_BINARY,0); if(fd<0) return 0; /* cannot open file */ sbuf_t *sbuf = sbuf_t::map_file(fname,fd); if(sbuf) { sbuf->should_close = true; // be sure to close the file } return sbuf; } /* Map a file when we are given an open fd. * The fd is not closed when the file is unmapped. * If there is no mmap, just allocate space and read the file */ sbuf_t *sbuf_t::map_file(const std::string &fname,int fd) { struct stat st; if(fstat(fd,&st)){ close(fd); return 0; /* cannot stat */ } #ifdef HAVE_MMAP uint8_t *buf = (uint8_t *)mmap(0,st.st_size,PROT_READ,MAP_FILE|MAP_SHARED,fd,0); bool should_free = false; bool should_unmap = true; #else uint8_t *buf = (uint8_t *)malloc(st.st_size); if(buf==0){ /* malloc failed */ return 0; } lseek(fd,0,SEEK_SET); // go to beginning of file size_t r = (size_t)read(fd,(void *)buf,st.st_size); if(r!=(size_t)st.st_size){ free((void *)buf); /* read failed */ return 0; } close(fd); fd = 0; bool should_free = true; bool should_unmap = false; #endif sbuf_t *sbuf = new sbuf_t(pos0_t(fname+sbuf_t::map_file_delimiter), buf, st.st_size, st.st_size, fd, should_unmap, should_free, false); // the caller's job is to close return sbuf; } /* * Returns self or the highest parent of self, whichever is higher */ const sbuf_t *sbuf_t::highest_parent() const { const sbuf_t *hp = this; while(hp->parent != 0){ hp = hp->parent; } return hp; } /** * rawdump the sbuf to an ostream. */ void sbuf_t::raw_dump(std::ostream &os,uint64_t start,uint64_t len) const { for(uint64_t i=start;ibufsize-start) len=bufsize-start; // maximum left uint64_t written = ::write(fd2,buf+start,len); if(written!=len){ std::cerr << "write: cannot write sbuf.\n"; } } static std::string hexch(unsigned char ch) { char buf[4]; snprintf(buf,sizeof(buf),"%02x",ch); return std::string(buf); } /** * hexdump the sbuf. */ void sbuf_t::hex_dump(std::ostream &os,uint64_t start,uint64_t len) const { const size_t bytes_per_line = 32; size_t max_spaces = 0; for(uint64_t i=start;imax_spaces) max_spaces=spaces; for(;spaces=' ' && ch<='~') os << ch; else os << '.'; } os << "\n"; } } /* Write to a file descriptor */ ssize_t sbuf_t::write(int fd_,size_t loc,size_t len) const { if(loc>=bufsize) return 0; // cannot write if(loc+len>bufsize) len=bufsize-loc; // clip at the end return ::write(fd_,buf+loc,len); } /* Write to a FILE */ ssize_t sbuf_t::write(FILE *f,size_t loc,size_t len) const { if(loc>=bufsize) return 0; // cannot write if(loc+len>bufsize) len=bufsize-loc; // clip at the end return ::fwrite(buf+loc,1,len,f); } /* Return a substring */ std::string sbuf_t::substr(size_t loc,size_t len) const { if(loc>=bufsize) return std::string(""); // cannot write if(loc+len>bufsize) len=bufsize-loc; // clip at the end return std::string((const char *)buf+loc,len); } bool sbuf_t::is_constant(size_t off,size_t len,uint8_t ch) const // verify that it's constant { while(len>0){ if(((*this)[off])!=ch) return false; off++; len--; } return true; } void sbuf_t::hex_dump(std::ostream &os) const { hex_dump(os,0,bufsize); } /** * Convert a binary blob to a hex representation */ #ifndef NSRL_HEXBUF_UPPERCASE #define NSRL_HEXBUF_UPPERCASE 0x01 #define NSRL_HEXBUF_SPACE2 0x02 #define NSRL_HEXBUF_SPACE4 0x04 #endif static int hexcharvals[256] = {-1,0}; static const char *hexbuf(char *dst,int dst_len,const unsigned char *bin,int bytes,int flag) { int charcount = 0; const char *start = dst; // remember where the start of the string is const char *fmt = (flag & NSRL_HEXBUF_UPPERCASE) ? "%02X" : "%02x"; if(hexcharvals[0]==-1){ /* Need to initialize this */ for(int i=0;i<256;i++){ hexcharvals[i] = 0; } for(int i=0;i<10;i++){ hexcharvals['0'+i] = i; } for(int i=10;i<16;i++){ hexcharvals['A'+i-10] = i; hexcharvals['a'+i-10] = i; } } *dst = 0; // begin with null termination while(bytes>0 && dst_len > 3){ sprintf(dst,fmt,*bin); // convert the next byte dst += 2; bin += 1; dst_len -= 2; bytes--; charcount++; // how many characters if((flag & NSRL_HEXBUF_SPACE2) || ((flag & NSRL_HEXBUF_SPACE4) && charcount%2==0)) *dst++ = ' '; *dst = '\000'; dst_len -= 1; } return start; // return the start } std::ostream & operator <<(std::ostream &os,const sbuf_t &t){ char hex[17]; hexbuf(hex,sizeof(hex),t.buf,8,0); os << "sbuf[page_number=" << t.page_number << " pos0=" << t.pos0 << " " << "buf[0..8]=0x" << hex << " bufsize=" << t.bufsize << " pagesize=" << t.pagesize << "]"; return os; } /** * Read the requested number of UTF-8 format string octets including any \0. */ void sbuf_t::getUTF8WithQuoting(size_t i, size_t num_octets_requested, std::string &utf8_string) const { // clear any residual value utf8_string = ""; if(i>=bufsize) { // past EOF return; } if(i+num_octets_requested>bufsize) { // clip at EOF num_octets_requested = bufsize - i; } utf8_string = std::string((const char *)buf+i,num_octets_requested); // validate or escape utf8_string utf8_string = validateOrEscapeUTF8(utf8_string, true, true); } /** * Read UTF-8 format code octets into string up to but not including \0. */ void sbuf_t::getUTF8WithQuoting(size_t i, std::string &utf8_string) const { // clear any residual value utf8_string = ""; // read octets for (size_t off=i; off=bufsize) { // past EOF return; } if(i+num_code_units_requested*2+1>bufsize) { // clip at EOF num_code_units_requested = ((bufsize-1)-i)/2; } // NOTE: we can't use wstring constructor because we require 16 bits, // not whatever sizeof(wchar_t) is. // utf16_string = std::wstring((const char *)buf+i,num_code_units_requested); // get code units individually for (size_t j = 0; j < num_code_units_requested; j++) { utf16_string.push_back(get16u(i + j)); } } /** * Read UTF-16 format code units into wstring up to but not including \U0000. */ void sbuf_t::getUTF16(size_t i, std::wstring &utf16_string) const { // clear any residual value utf16_string = std::wstring(); // read the code units size_t off; for (off=i; off=bufsize) { // past EOF return; } if(i+num_code_units_requested*2+1>bufsize) { // clip at EOF num_code_units_requested = ((bufsize-1)-i)/2; } // NOTE: we can't use wstring constructor because we require 16 bits, // not whatever sizeof(wchar_t) is. // utf16_string = std::wstring((const char *)buf+i,num_code_units_requested); // get code units individually for (size_t j = 0; j < num_code_units_requested; j++) { utf16_string.push_back(get16u(i + j, bo)); } } /** * Read UTF-16 format code units using the specified byte order into wstring up to but not including \U0000. */ void sbuf_t::getUTF16(size_t i, byte_order_t bo, std::wstring &utf16_string) const { // clear any residual value utf16_string = std::wstring(); // read the code units size_t off; for (off=i; off #include #include #include #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS #endif #ifdef HAVE_STDINT_H #include #endif #define IS_IN_RANGE(c, f, l) (((c) >= (f)) && ((c) <= (l))) #include "utf8.h" //extern int debug; std::string hexesc(unsigned char ch) { char buf[10]; snprintf(buf,sizeof(buf),"\\x%02X",ch); return std::string(buf); } /** returns true if this is a UTF8 continuation character */ bool utf8cont(unsigned char ch) { return ((ch&0x80)==0x80) && ((ch & 0x40)==0); } /** * After a UTF-8 sequence is decided, this function is called * to determine if the character is invalid. The UTF-8 spec now * says that if a UTF-8 decoding produces an invalid character, or * a surrogate, it is not valid. (There were some nasty security * vulnerabilities that were exploited before this came out.) * So we do a lot of checks here. */ bool valid_utf8codepoint(uint32_t unichar) { // Check for invalid characters in the bmp switch(unichar){ case 0xfffe: return false; // reversed BOM case 0xffff: return false; default: break; } if(unichar >= 0xd800 && unichar <=0xdfff) return false; // high and low surrogates if(unichar < 0x10000) return true; // looks like it is in the BMP // check some regions outside the bmp // Plane 1: if(unichar > 0x13fff && unichar < 0x16000) return false; if(unichar > 0x16fff && unichar < 0x1b000) return false; if(unichar > 0x1bfff && unichar < 0x1d000) return false; // Plane 2 if(unichar > 0x2bfff && unichar < 0x2f000) return false; // Planes 3--13 are unassigned if(unichar >= 0x30000 && unichar < 0xdffff) return false; // Above Plane 16 is invalid if(unichar > 0x10FFFF) return false; // above plane 16? return true; // must be valid } /** * validateOrEscapeUTF8 * Input: UTF8 string (possibly corrupt) * Input: do_escape, indicating whether invalid encodings shall be escaped. * Note: * - if not escaping but an invalid encoding is present and DEBUG_PEDANTIC is set, then assert() is called. * - DO NOT USE wchar_t because it is 16-bits on Windows and 32-bits on Unix. * Output: * - UTF8 string. If do_escape is set, then corruptions are escaped in \xFF notation where FF is a hex character. */ //int count=0; bool validateOrEscapeUTF8_validate=false; std::string validateOrEscapeUTF8(const std::string &input, bool escape_bad_utf8,bool escape_backslash) { // // skip the validation if not escaping and not DEBUG_PEDANTIC if (escape_bad_utf8==false && escape_backslash==false && !validateOrEscapeUTF8_validate){ return input; } // validate or escape input std::string output; for(std::string::size_type i =0; i< input.length(); ) { uint8_t ch = (uint8_t)input.at(i); // utf8 1 byte prefix (0xxx xxxx) if((ch & 0x80)==0x00){ // 00 .. 0x7f if(ch=='\\' && escape_backslash){ // escape the escape character as \x92 output += hexesc(ch); i++; continue; } if( ch < ' '){ // not printable are escaped output += hexesc(ch); i++; continue; } output += ch; // printable is not escaped i++; continue; } // utf8 2 bytes (110x xxxx) prefix if(((ch & 0xe0)==0xc0) // 2-byte prefix && (i+1 < input.length()) && utf8cont((uint8_t)input.at(i+1))){ uint32_t unichar = (((uint8_t)input.at(i) & 0x1f) << 6) | (((uint8_t)input.at(i+1) & 0x3f)); // check for valid 2-byte encoding if(valid_utf8codepoint(unichar) && ((uint8_t)input.at(i)!=0xc0) && (unichar >= 0x80)){ output += (uint8_t)input.at(i++); // byte1 output += (uint8_t)input.at(i++); // byte2 continue; } } // utf8 3 bytes (1110 xxxx prefix) if(((ch & 0xf0) == 0xe0) && (i+2 < input.length()) && utf8cont((uint8_t)input.at(i+1)) && utf8cont((uint8_t)input.at(i+2))){ uint32_t unichar = (((uint8_t)input.at(i) & 0x0f) << 12) | (((uint8_t)input.at(i+1) & 0x3f) << 6) | (((uint8_t)input.at(i+2) & 0x3f)); // check for a valid 3-byte code point if(valid_utf8codepoint(unichar) && unichar>=0x800){ output += (uint8_t)input.at(i++); // byte1 output += (uint8_t)input.at(i++); // byte2 output += (uint8_t)input.at(i++); // byte3 continue; } } // utf8 4 bytes (1111 0xxx prefix) if((( ch & 0xf8) == 0xf0) && (i+3 < input.length()) && utf8cont((uint8_t)input.at(i+1)) && utf8cont((uint8_t)input.at(i+2)) && utf8cont((uint8_t)input.at(i+3))){ uint32_t unichar =( (((uint8_t)input.at(i) & 0x07) << 18) |(((uint8_t)input.at(i+1) & 0x3f) << 12) |(((uint8_t)input.at(i+2) & 0x3f) << 6) |(((uint8_t)input.at(i+3) & 0x3f))); if(valid_utf8codepoint(unichar) && unichar>=0x1000000){ output += (uint8_t)input.at(i++); // byte1 output += (uint8_t)input.at(i++); // byte2 output += (uint8_t)input.at(i++); // byte3 output += (uint8_t)input.at(i++); // byte4 continue; } } if (escape_bad_utf8) { // Just escape the next byte and carry on output += hexesc((uint8_t)input.at(i++)); } else { // fatal if we are debug pedantic, otherwise just ignore // note: we shouldn't be here anyway, since if we are not escaping and we are not // pedantic we should have returned above if(validateOrEscapeUTF8_validate){ std::ofstream os("bad_unicode.txt"); os << input << "\n"; os.close(); std::cerr << "INTERNAL ERROR: bad unicode stored in bad_unicode.txt\n"; assert(0); } } } return output; } #ifdef STANDALONE void show(const std::string &ugly) { for(size_t j=0;j, which may declare some of these, we don't * get a bunch of complaints from the C compiler about redefinitions * of these values. * * We declare all of them here so that no file has to include * if all it needs are ETHERTYPE_ values. */ #ifndef ETHERTYPE_LEN #define ETHERTYPE_LEN 2 #endif #ifndef ETHERTYPE_GRE_ISO #define ETHERTYPE_GRE_ISO 0x00FE /* not really an ethertype only used in GRE */ #endif #ifndef ETHERTYPE_PUP #define ETHERTYPE_PUP 0x0200 /* PUP protocol */ #endif #ifndef ETHERTYPE_IP #define ETHERTYPE_IP 0x0800 /* IP protocol */ #endif #ifndef ETHERTYPE_ARP #define ETHERTYPE_ARP 0x0806 /* Addr. resolution protocol */ #endif #ifndef ETHERTYPE_REVARP #define ETHERTYPE_REVARP 0x8035 /* reverse Addr. resolution protocol */ #endif #ifndef ETHERTYPE_NS #define ETHERTYPE_NS 0x0600 #endif #ifndef ETHERTYPE_SPRITE #define ETHERTYPE_SPRITE 0x0500 #endif #ifndef ETHERTYPE_TRAIL #define ETHERTYPE_TRAIL 0x1000 #endif #ifndef ETHERTYPE_MOPDL #define ETHERTYPE_MOPDL 0x6001 #endif #ifndef ETHERTYPE_MOPRC #define ETHERTYPE_MOPRC 0x6002 #endif #ifndef ETHERTYPE_DN #define ETHERTYPE_DN 0x6003 #endif #ifndef ETHERTYPE_LAT #define ETHERTYPE_LAT 0x6004 #endif #ifndef ETHERTYPE_SCA #define ETHERTYPE_SCA 0x6007 #endif #ifndef ETHERTYPE_LANBRIDGE #define ETHERTYPE_LANBRIDGE 0x8038 #endif #ifndef ETHERTYPE_DECDNS #define ETHERTYPE_DECDNS 0x803c #endif #ifndef ETHERTYPE_DECDTS #define ETHERTYPE_DECDTS 0x803e #endif #ifndef ETHERTYPE_VEXP #define ETHERTYPE_VEXP 0x805b #endif #ifndef ETHERTYPE_VPROD #define ETHERTYPE_VPROD 0x805c #endif #ifndef ETHERTYPE_ATALK #define ETHERTYPE_ATALK 0x809b #endif #ifndef ETHERTYPE_AARP #define ETHERTYPE_AARP 0x80f3 #endif #ifndef ETHERTYPE_8021Q #define ETHERTYPE_8021Q 0x8100 #endif #ifndef ETHERTYPE_IPX #define ETHERTYPE_IPX 0x8137 #endif #ifndef ETHERTYPE_IPV6 #define ETHERTYPE_IPV6 0x86dd #endif #ifndef ETHERTYPE_PPP #define ETHERTYPE_PPP 0x880b #endif #ifndef ETHERTYPE_SLOW #define ETHERTYPE_SLOW 0x8809 #endif #ifndef ETHERTYPE_MPLS #define ETHERTYPE_MPLS 0x8847 #endif #ifndef ETHERTYPE_MPLS_MULTI #define ETHERTYPE_MPLS_MULTI 0x8848 #endif #ifndef ETHERTYPE_PPPOED #define ETHERTYPE_PPPOED 0x8863 #endif #ifndef ETHERTYPE_PPPOES #define ETHERTYPE_PPPOES 0x8864 #endif #ifndef ETHERTYPE_JUMBO #define ETHERTYPE_JUMBO 0x8870 #endif #ifndef ETHERTYPE_EAPOL #define ETHERTYPE_EAPOL 0x888e #endif #ifndef ETHERTYPE_LOOPBACK #define ETHERTYPE_LOOPBACK 0x9000 #endif #ifndef ETHERTYPE_VMAN #define ETHERTYPE_VMAN 0x9100 /* Extreme VMAN Protocol */ #endif #ifndef ETHERTYPE_ISO #define ETHERTYPE_ISO 0xfefe /* nonstandard - used in Cisco HDLC encapsulation */ #endif //extern const struct tok ethertype_values[]; #endif tcpflow/src/wifipcap/prism.h0000644000175000017500000000201112263701151015026 0ustar dimadima #include "os.h" /* // prism header: added (from wlan-ng) #define WLAN_DEVNAMELEN_MAX 16 typedef struct { uint32_t did; uint16_t status; uint16_t len; uint32_t data; } __attribute__((__packed__)) p80211item_uint32_t; typedef struct { uint32_t msgcode; uint32_t msglen; uint8_t devname[WLAN_DEVNAMELEN_MAX]; p80211item_uint32_t hosttime; p80211item_uint32_t mactime; p80211item_uint32_t channel; p80211item_uint32_t rssi; p80211item_uint32_t sq; p80211item_uint32_t signal; p80211item_uint32_t noise; p80211item_uint32_t rate; p80211item_uint32_t istx; p80211item_uint32_t frmlen; } __attribute__((__packed__)) prism2_pkthdr; */ #ifdef _WIN32 #pragma pack(push, 1) #endif struct prism2_pkthdr { u_int32_t host_time; u_int32_t mac_time; u_int32_t channel; u_int32_t rssi; u_int32_t sq; int signal; int noise; u_int32_t rate; u_int32_t istx; u_int32_t frmlen; } _PACKED_; #ifdef _WIN32 #pragma pack(pop) #endif tcpflow/src/wifipcap/extract.h0000644000175000017500000001302512263701151015355 0ustar dimadima/* * Copyright (c) 1992, 1993, 1994, 1995, 1996 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that: (1) source code distributions * retain the above copyright notice and this paragraph in its entirety, (2) * distributions including binary code include the above copyright notice and * this paragraph in its entirety in the documentation or other materials * provided with the distribution, and (3) all advertising materials mentioning * features or use of this software display the following acknowledgement: * ``This product includes software developed by the University of California, * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of * the University nor the names of its contributors may be used to endorse * or promote products derived from this software without specific prior * written permission. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * * @(#) $Header: /home/cvs/wifitools/wifipcap/extract.h,v 1.1.1.1 2006/12/14 01:22:11 jpang Exp $ (LBL) */ #ifndef UNI_EXTRACT_H #define UNI_EXTRACT_H /* * Macros to extract possibly-unaligned big-endian integral values. */ #ifdef LBL_ALIGN /* * The processor doesn't natively handle unaligned loads. */ #ifdef HAVE___ATTRIBUTE__ /* * We have __attribute__; we assume that means we have __attribute__((packed)). * Declare packed structures containing a u_int16_t and a u_int32_t, * cast the pointer to point to one of those, and fetch through it; * the GCC manual doesn't appear to explicitly say that * __attribute__((packed)) causes the compiler to generate unaligned-safe * code, but it apppears to do so. * * We do this in case the compiler can generate, for this instruction set, * better code to do an unaligned load and pass stuff to "ntohs()" or * "ntohl()" than the code to fetch the bytes one at a time and * assemble them. (That might not be the case on a little-endian platform, * where "ntohs()" and "ntohl()" might not be done inline.) */ typedef struct { u_int16_t val; } __attribute__((packed)) unaligned_u_int16_t; typedef struct { u_int32_t val; } __attribute__((packed)) unaligned_u_int32_t; #define EXTRACT_16BITS(p) \ ((u_int16_t)ntohs(((const unaligned_u_int16_t *)(p))->val)) #define EXTRACT_32BITS(p) \ ((u_int32_t)ntohl(((const unaligned_u_int32_t *)(p))->val)) #define EXTRACT_64BITS(p) \ ((u_int64_t)(((u_int64_t)ntohl(((const unaligned_u_int32_t *)(p) + 0)->val)) << 32 | \ ((u_int64_t)ntohl(((const unaligned_u_int32_t *)(p) + 1)->val)) << 0)) #else /* HAVE___ATTRIBUTE__ */ /* * We don't have __attribute__, so do unaligned loads of big-endian * quantities the hard way - fetch the bytes one at a time and * assemble them. */ #define EXTRACT_16BITS(p) \ ((u_int16_t)((u_int16_t)*((const u_int8_t *)(p) + 0) << 8 | \ (u_int16_t)*((const u_int8_t *)(p) + 1))) #define EXTRACT_32BITS(p) \ ((u_int32_t)((u_int32_t)*((const u_int8_t *)(p) + 0) << 24 | \ (u_int32_t)*((const u_int8_t *)(p) + 1) << 16 | \ (u_int32_t)*((const u_int8_t *)(p) + 2) << 8 | \ (u_int32_t)*((const u_int8_t *)(p) + 3))) #define EXTRACT_64BITS(p) \ ((u_int64_t)((u_int64_t)*((const u_int8_t *)(p) + 0) << 56 | \ (u_int64_t)*((const u_int8_t *)(p) + 1) << 48 | \ (u_int64_t)*((const u_int8_t *)(p) + 2) << 40 | \ (u_int64_t)*((const u_int8_t *)(p) + 3) << 32 | \ (u_int64_t)*((const u_int8_t *)(p) + 4) << 24 | \ (u_int64_t)*((const u_int8_t *)(p) + 5) << 16 | \ (u_int64_t)*((const u_int8_t *)(p) + 6) << 8 | \ (u_int64_t)*((const u_int8_t *)(p) + 7))) #endif /* HAVE___ATTRIBUTE__ */ #else /* LBL_ALIGN */ /* * The processor natively handles unaligned loads, so we can just * cast the pointer and fetch through it. */ #define EXTRACT_16BITS(p) \ ((u_int16_t)ntohs(*(const u_int16_t *)(p))) #define EXTRACT_32BITS(p) \ ((u_int32_t)ntohl(*(const u_int32_t *)(p))) #define EXTRACT_64BITS(p) \ ((u_int64_t)(((u_int64_t)ntohl(*((const u_int32_t *)(p) + 0))) << 32 | \ ((u_int64_t)ntohl(*((const u_int32_t *)(p) + 1))) << 0)) #endif /* LBL_ALIGN */ #define EXTRACT_24BITS(p) \ ((u_int32_t)((u_int32_t)*((const u_int8_t *)(p) + 0) << 16 | \ (u_int32_t)*((const u_int8_t *)(p) + 1) << 8 | \ (u_int32_t)*((const u_int8_t *)(p) + 2))) /* * Macros to extract possibly-unaligned little-endian integral values. * XXX - do loads on little-endian machines that support unaligned loads? */ #define EXTRACT_LE_8BITS(p) (*(p)) #define EXTRACT_LE_16BITS(p) \ ((u_int16_t)((u_int16_t)*((const u_int8_t *)(p) + 1) << 8 | \ (u_int16_t)*((const u_int8_t *)(p) + 0))) #define EXTRACT_LE_32BITS(p) \ ((u_int32_t)((u_int32_t)*((const u_int8_t *)(p) + 3) << 24 | \ (u_int32_t)*((const u_int8_t *)(p) + 2) << 16 | \ (u_int32_t)*((const u_int8_t *)(p) + 1) << 8 | \ (u_int32_t)*((const u_int8_t *)(p) + 0))) #define EXTRACT_LE_64BITS(p) \ ((u_int64_t)((u_int64_t)*((const u_int8_t *)(p) + 7) << 56 | \ (u_int64_t)*((const u_int8_t *)(p) + 6) << 48 | \ (u_int64_t)*((const u_int8_t *)(p) + 5) << 40 | \ (u_int64_t)*((const u_int8_t *)(p) + 4) << 32 | \ (u_int64_t)*((const u_int8_t *)(p) + 3) << 24 | \ (u_int64_t)*((const u_int8_t *)(p) + 2) << 16 | \ (u_int64_t)*((const u_int8_t *)(p) + 1) << 8 | \ (u_int64_t)*((const u_int8_t *)(p) + 0))) #endif tcpflow/src/wifipcap/udp.h0000644000175000017500000000747412263701151014506 0ustar dimadima/* @(#) $Header: /home/cvs/wifitools/wifipcap/udp.h,v 1.1.1.1 2006/12/14 01:22:11 jpang Exp $ (LBL) */ /* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp.h 8.1 (Berkeley) 6/10/93 */ /* * Udp protocol header. * Per RFC 768, September, 1981. */ struct udphdr { u_int16_t uh_sport; /* source port */ u_int16_t uh_dport; /* destination port */ u_int16_t uh_ulen; /* udp length */ u_int16_t uh_sum; /* udp checksum */ }; #define TFTP_PORT 69 /*XXX*/ #define KERBEROS_PORT 88 /*XXX*/ #define SUNRPC_PORT 111 /*XXX*/ #define SNMP_PORT 161 /*XXX*/ #define NTP_PORT 123 /*XXX*/ #define SNMPTRAP_PORT 162 /*XXX*/ #define ISAKMP_PORT 500 /*XXX*/ #define SYSLOG_PORT 514 /* rfc3164 */ #define TIMED_PORT 525 /*XXX*/ #define RIP_PORT 520 /*XXX*/ #define LDP_PORT 646 #define AODV_PORT 654 /*XXX*/ #define KERBEROS_SEC_PORT 750 /*XXX*/ #define L2TP_PORT 1701 /*XXX*/ #define SIP_PORT 5060 #define ISAKMP_PORT_NATT 4500 /* rfc3948 */ #define ISAKMP_PORT_USER1 7500 /*XXX - nonstandard*/ #define ISAKMP_PORT_USER2 8500 /*XXX - nonstandard*/ #define RX_PORT_LOW 7000 /*XXX*/ #define RX_PORT_HIGH 7009 /*XXX*/ #define NETBIOS_NS_PORT 137 #define NETBIOS_DGRAM_PORT 138 #define CISCO_AUTORP_PORT 496 /*XXX*/ #define RADIUS_PORT 1645 #define RADIUS_NEW_PORT 1812 #define RADIUS_ACCOUNTING_PORT 1646 #define RADIUS_NEW_ACCOUNTING_PORT 1813 #define HSRP_PORT 1985 /*XXX*/ #define LMP_PORT 701 /* rfc4204 */ #define LWRES_PORT 921 #define ZEPHYR_SRV_PORT 2103 #define ZEPHYR_CLT_PORT 2104 #define MPLS_LSP_PING_PORT 3503 /* draft-ietf-mpls-lsp-ping-02.txt */ #define BFD_CONTROL_PORT 3784 /* draft-katz-ward-bfd-v4v6-1hop-00.txt */ #define BFD_ECHO_PORT 3785 /* draft-katz-ward-bfd-v4v6-1hop-00.txt */ #ifdef INET6 #define RIPNG_PORT 521 /*XXX*/ #define DHCP6_SERV_PORT 546 /*XXX*/ #define DHCP6_CLI_PORT 547 /*XXX*/ #endif /* Jeff: endian-fixed udp header */ struct udp_hdr_t { u_int16_t sport; /* source port */ u_int16_t dport; /* destination port */ u_int16_t len; /* udp length */ u_int16_t cksum; /* udp checksum */ }; tcpflow/src/wifipcap/types.h0000644000175000017500000000401112263701151015042 0ustar dimadima//////////////////////////////////////////////////////////////////////////////// // Mercury and Colyseus Software Distribution // // Copyright (C) 2004-2005 Ashwin Bharambe (ashu@cs.cmu.edu) // 2004-2005 Jeffrey Pang (jeffpang@cs.cmu.edu) // 2004 Mukesh Agrawal (mukesh@cs.cmu.edu) // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2, or (at // your option) any later version. // // This program is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 // USA //////////////////////////////////////////////////////////////////////////////// #ifndef __TYPES__H #define __TYPES__H // basic types and utility macros typedef unsigned char byte; typedef unsigned char ubyte; typedef unsigned char uint8; typedef unsigned short uint16; typedef unsigned int uint32; typedef unsigned long long uint64; typedef char sbyte; typedef char sint8; typedef short sint16; typedef int sint32; typedef long long sint64; typedef float real; typedef float real32; typedef double real64; //typedef long double real64; typedef unsigned int guint; typedef uint32 guint32; typedef uint16 guint16; typedef uint8 guint8; #ifndef MAX #define MAX(x,y) ((x)>(y)?(x):(y)) #endif #ifndef MIN #define MIN(x,y) ((x)<(y)?(x):(y)) #endif #endif // vim: set sw=4 sts=4 ts=8 noet: // Local Variables: // Mode: c++ // c-basic-offset: 4 // tab-width: 8 // indent-tabs-mode: t // End: tcpflow/src/wifipcap/radiotap.h0000644000175000017500000000203512263701151015505 0ustar dimadima #include "os.h" #ifdef _WIN32 #pragma pack(push, 1) #endif struct radiotap_hdr { bool has_channel; int channel; bool has_fhss; int fhss_fhset; int fhss_fhpat; bool has_rate; int rate; bool has_signal_dbm; int signal_dbm; bool has_noise_dbm; int noise_dbm; bool has_signal_db; int signal_db; bool has_noise_db; int noise_db; bool has_quality; int quality; bool has_txattenuation; int txattenuation; bool has_txattenuation_db; int txattenuation_db; bool has_txpower_dbm; int txpower_dbm; bool has_flags; bool flags_cfp; bool flags_short_preamble; bool flags_wep; bool flags_fragmented; bool flags_badfcs; bool has_antenna; int antenna; bool has_tsft; u_int64_t tsft; bool has_rxflags; int rxflags; bool has_txflags; int txflags; bool has_rts_retries; int rts_retries; bool has_data_retries; int data_retries; } _PACKED_; #ifdef _WIN32 #pragma pack(pop) #endif tcpflow/src/wifipcap/wifipcap.cpp0000644000175000017500000015374712263701151016060 0ustar dimadima/********************************************************************** * Log: * 2006-03-12: Parts originally authored by Doug Madory as wifi_parser.c * 2013-03-15: Substantially modified by Simson Garfinkel for inclusion into tcpflow * 2013-11-18: reworked static calls to be entirely calls to a class. Changed TimeVal pointer to an instance variable that includes the full packet header. **********************************************************************/ //*do 11-18 #include "config.h" // pull in HAVE_ defines #define __STDC_FORMAT_MACROS #include #include #include #include #include #include #include #include #ifdef HAVE_NET_ETHERNET_H #include #endif #ifdef HAVE_ARPA_INET_H #include #endif #ifdef HAVE_UNISTD_H #include #endif #pragma GCC diagnostic ignored "-Wcast-align" #include "wifipcap.h" #include "cpack.h" #include "extract.h" #include "oui.h" #include "ethertype.h" #include "icmp.h" #include "ipproto.h" /* wifipcap uses a MAC class which is somewhat lame, but works */ MAC MAC::broadcast(0xffffffffffffULL); MAC MAC::null((uint64_t)0); int WifiPacket::debug=0; int MAC::print_fmt(MAC::PRINT_FMT_COLON); std::ostream& operator<<(std::ostream& out, const MAC& mac) { const char *fmt = MAC::print_fmt == MAC::PRINT_FMT_COLON ? "%02x:%02x:%02x:%02x:%02x:%02x" : "%02x%02x%02x%02x%02x%02x"; char buf[24]; sprintf(buf, fmt, (int)((mac.val>>40)&0xff), (int)((mac.val>>32)&0xff), (int)((mac.val>>24)&0xff), (int)((mac.val>>16)&0xff), (int)((mac.val>>8)&0xff), (int)((mac.val)&0xff) ); out << buf; return out; } std::ostream& operator<<(std::ostream& out, const struct in_addr& ip) { out << inet_ntoa(ip); return out; } struct tok { int v; /* value */ const char *s; /* string */ }; static const struct tok ethertype_values[] = { { ETHERTYPE_IP, "IPv4" }, { ETHERTYPE_MPLS, "MPLS unicast" }, { ETHERTYPE_MPLS_MULTI, "MPLS multicast" }, { ETHERTYPE_IPV6, "IPv6" }, { ETHERTYPE_8021Q, "802.1Q" }, { ETHERTYPE_VMAN, "VMAN" }, { ETHERTYPE_PUP, "PUP" }, { ETHERTYPE_ARP, "ARP"}, { ETHERTYPE_REVARP, "Reverse ARP"}, { ETHERTYPE_NS, "NS" }, { ETHERTYPE_SPRITE, "Sprite" }, { ETHERTYPE_TRAIL, "Trail" }, { ETHERTYPE_MOPDL, "MOP DL" }, { ETHERTYPE_MOPRC, "MOP RC" }, { ETHERTYPE_DN, "DN" }, { ETHERTYPE_LAT, "LAT" }, { ETHERTYPE_SCA, "SCA" }, { ETHERTYPE_LANBRIDGE, "Lanbridge" }, { ETHERTYPE_DECDNS, "DEC DNS" }, { ETHERTYPE_DECDTS, "DEC DTS" }, { ETHERTYPE_VEXP, "VEXP" }, { ETHERTYPE_VPROD, "VPROD" }, { ETHERTYPE_ATALK, "Appletalk" }, { ETHERTYPE_AARP, "Appletalk ARP" }, { ETHERTYPE_IPX, "IPX" }, { ETHERTYPE_PPP, "PPP" }, { ETHERTYPE_SLOW, "Slow Protocols" }, { ETHERTYPE_PPPOED, "PPPoE D" }, { ETHERTYPE_PPPOES, "PPPoE S" }, { ETHERTYPE_EAPOL, "EAPOL" }, { ETHERTYPE_JUMBO, "Jumbo" }, { ETHERTYPE_LOOPBACK, "Loopback" }, { ETHERTYPE_ISO, "OSI" }, { ETHERTYPE_GRE_ISO, "GRE-OSI" }, { 0, NULL} }; /*max length of an IEEE 802.11 packet*/ #ifndef MAX_LEN_80211 #define MAX_LEN_80211 3000 #endif /* from ethereal packet-prism.c */ #define pletohs(p) ((u_int16_t) \ ((u_int16_t)*((const u_int8_t *)(p)+1)<<8| \ (u_int16_t)*((const u_int8_t *)(p)+0)<<0)) #define pntohl(p) ((u_int32_t)*((const u_int8_t *)(p)+0)<<24| \ (u_int32_t)*((const u_int8_t *)(p)+1)<<16| \ (u_int32_t)*((const u_int8_t *)(p)+2)<<8| \ (u_int32_t)*((const u_int8_t *)(p)+3)<<0) #define COOK_FRAGMENT_NUMBER(x) ((x) & 0x000F) #define COOK_SEQUENCE_NUMBER(x) (((x) & 0xFFF0) >> 4) /* end ethereal code */ /* Sequence number gap */ #define SEQ_GAP(current, last)(0xfff & (current - last)) /* In the following three arrays, even though the QoS subtypes are listed, in the rest of the program * the QoS subtypes are treated as "OTHER_TYPES". The file "ieee802_11.h" currently doesn't account for * the existence of QoS subtypes. The QoS subtypes might need to be accomodated there in the future. */ #if 0 static const char * mgmt_subtype_text[] = { "AssocReq", "AssocResp", "ReAssocReq", "ReAssocResp", "ProbeReq", "ProbeResp", "", "", "Beacon", "ATIM", "Disassoc", "Auth", "DeAuth", "Action", /*QoS mgmt_subtype*/ "", "" }; static const char * ctrl_subtype_text[] = { "", "", "", "", "", "", "", "", "BlockAckReq", /*QoS ctrl_subtype*/ "BlockAck", /*QoS ctrl_subtype*/ "PS-Poll", "RTS", "CTS", "ACK", "CF-End", "CF-End+CF-Ack" }; static const char * data_subtype_text[] = { "Data", "Data+CF-Ack", "Data+CF-Poll", "Data+CF-Ack+CF-Poll", "Null(no_data)", "CF-Ack(no_data)", "CF-Poll(no_data)", "CF-Ack+CF-Poll(no_data)", "QoS_Data", /*QoS data_subtypes from here on*/ "QoS_Data+CF-Ack", "QoS_Data+CF-Poll", "QoS_Data+CF-Ack+CF-Poll", "QoS_Null(no_data)", "", "QoS_CF-Poll(no_data)", "QoS_CF-Ack+CF-Poll(no_data)" }; #endif /////////////////////////////////////////////////////////////////////////////// // crc32 implementation needed for wifi checksum /* crc32.c * CRC-32 routine * * $Id: crc32.cpp,v 1.1 2007/02/14 00:05:50 jpang Exp $ * * Ethereal - Network traffic analyzer * By Gerald Combs * Copyright 1998 Gerald Combs * * Copied from README.developer * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Credits: * * Table from Solomon Peachy * Routine from Chris Waters */ /* * Table for the AUTODIN/HDLC/802.x CRC. * * Polynomial is * * x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^8 + x^7 + * x^5 + x^4 + x^2 + x + 1 */ static const uint32_t crc32_ccitt_table[256] = { 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d }; #define CRC32_CCITT_SEED 0xFFFFFFFF static uint32_t crc32_ccitt_seed(const uint8_t *buf, size_t len, uint32_t seed); static uint32_t crc32_ccitt(const uint8_t *buf, size_t len) { return ( crc32_ccitt_seed(buf, len, CRC32_CCITT_SEED) ); } static uint32_t crc32_ccitt_seed(const uint8_t *buf, size_t len, uint32_t seed) { uint32_t crc32 = seed; for (unsigned int i = 0; i < len; i++){ crc32 = crc32_ccitt_table[(crc32 ^ buf[i]) & 0xff] ^ (crc32 >> 8); } return ( ~crc32 ); } /* * IEEE 802.x version (Ethernet and 802.11, at least) - byte-swap * the result of "crc32()". * * XXX - does this mean we should fetch the Ethernet and 802.11 * Frame Checksum (FCS) with "tvb_get_letohl()" rather than "tvb_get_ntohl()", * or is fetching it big-endian and byte-swapping the CRC done * to cope with 802.x sending stuff out in reverse bit order? */ static uint32_t crc32_802(const unsigned char *buf, size_t len) { uint32_t c_crc; c_crc = crc32_ccitt(buf, len); /* Byte reverse. */ c_crc = ((unsigned char)(c_crc>>0)<<24) | ((unsigned char)(c_crc>>8)<<16) | ((unsigned char)(c_crc>>16)<<8) | ((unsigned char)(c_crc>>24)<<0); return ( c_crc ); } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* Translate Ethernet address, as seen in struct ether_header, to type MAC. */ /* Extract header length. */ static size_t extract_header_length(u_int16_t fc) { switch (FC_TYPE(fc)) { case T_MGMT: return MGMT_HDRLEN; case T_CTRL: switch (FC_SUBTYPE(fc)) { case CTRL_PS_POLL: return CTRL_PS_POLL_HDRLEN; case CTRL_RTS: return CTRL_RTS_HDRLEN; case CTRL_CTS: return CTRL_CTS_HDRLEN; case CTRL_ACK: return CTRL_ACK_HDRLEN; case CTRL_CF_END: return CTRL_END_HDRLEN; case CTRL_END_ACK: return CTRL_END_ACK_HDRLEN; default: return 0; } case T_DATA: return (FC_TO_DS(fc) && FC_FROM_DS(fc)) ? 30 : 24; default: return 0; } } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// #pragma GCC diagnostic ignored "-Wcast-align" void WifiPacket::handle_llc(const mac_hdr_t &mac,const u_char *ptr, size_t len,u_int16_t fc) { if (len < 7) { // truncated header! cbs->HandleLLC(*this,NULL, ptr, len); return; } // http://www.wildpackets.com/resources/compendium/wireless_lan/wlan_packets llc_hdr_t hdr; hdr.dsap = EXTRACT_LE_8BITS(ptr); // Destination Service Access point hdr.ssap = EXTRACT_LE_8BITS(ptr + 1); // Source Service Access Point hdr.control= EXTRACT_LE_8BITS(ptr + 2); // ignored by most protocols hdr.oui = EXTRACT_24BITS(ptr + 3); hdr.type = EXTRACT_16BITS(ptr + 6); /* "When both the DSAP and SSAP are set to 0xAA, the type is * interpreted as a protocol not defined by IEEE and the LSAP is * referred to as SubNetwork Access Protocol (SNAP). In SNAP, the * 5 bytes that follow the DSAP, SSAP, and control byte are called * the Protocol Discriminator." */ if(hdr.dsap==0xAA && hdr.ssap==0xAA){ cbs->HandleLLC(*this,&hdr,ptr+8,len-8); return; } if (hdr.oui == OUI_ENCAP_ETHER || hdr.oui == OUI_CISCO_90) { cbs->HandleLLC(*this,&hdr, ptr+8, len-8); return; } cbs->HandleLLCUnknown(*this,ptr, len); } void WifiPacket::handle_wep(const u_char *ptr, size_t len) { // Jeff: XXX handle TKIP/CCMP ? how can we demultiplex different // protection protocols? struct wep_hdr_t hdr; u_int32_t iv; if (len < IEEE802_11_IV_LEN + IEEE802_11_KID_LEN) { // truncated! cbs->HandleWEP(*this,NULL, ptr, len); return; } iv = EXTRACT_LE_32BITS(ptr); hdr.iv = IV_IV(iv); hdr.pad = IV_PAD(iv); hdr.keyid = IV_KEYID(iv); cbs->HandleWEP(*this,&hdr, ptr, len); } /////////////////////////////////////////////////////////////////////////////// static const char *auth_alg_text[]={"Open System","Shared Key","EAP"}; #define NUM_AUTH_ALGS (sizeof auth_alg_text / sizeof auth_alg_text[0]) static const char *status_text[] = { "Succesful", /* 0 */ "Unspecified failure", /* 1 */ "Reserved", /* 2 */ "Reserved", /* 3 */ "Reserved", /* 4 */ "Reserved", /* 5 */ "Reserved", /* 6 */ "Reserved", /* 7 */ "Reserved", /* 8 */ "Reserved", /* 9 */ "Cannot Support all requested capabilities in the Capability Information field", /* 10 */ "Reassociation denied due to inability to confirm that association exists", /* 11 */ "Association denied due to reason outside the scope of the standard", /* 12 */ "Responding station does not support the specified authentication algorithm ", /* 13 */ "Received an Authentication frame with authentication transaction " \ "sequence number out of expected sequence", /* 14 */ "Authentication rejected because of challenge failure", /* 15 */ "Authentication rejected due to timeout waiting for next frame in sequence", /* 16 */ "Association denied because AP is unable to handle additional associated stations", /* 17 */ "Association denied due to requesting station not supporting all of the " \ "data rates in BSSBasicRateSet parameter", /* 18 */ }; #define NUM_STATUSES (sizeof status_text / sizeof status_text[0]) static const char *reason_text[] = { "Reserved", /* 0 */ "Unspecified reason", /* 1 */ "Previous authentication no longer valid", /* 2 */ "Deauthenticated because sending station is leaving (or has left) IBSS or ESS", /* 3 */ "Disassociated due to inactivity", /* 4 */ "Disassociated because AP is unable to handle all currently associated stations", /* 5 */ "Class 2 frame received from nonauthenticated station", /* 6 */ "Class 3 frame received from nonassociated station", /* 7 */ "Disassociated because sending station is leaving (or has left) BSS", /* 8 */ "Station requesting (re)association is not authenticated with responding station", /* 9 */ }; #define NUM_REASONS (sizeof reason_text / sizeof reason_text[0]) const char *Wifipcap::WifiUtil::MgmtAuthAlg2Txt(uint v) { return v < NUM_AUTH_ALGS ? auth_alg_text[v] : "Unknown"; } const char *Wifipcap::WifiUtil::MgmtStatusCode2Txt(uint v) { return v < NUM_STATUSES ? status_text[v] : "Reserved"; } const char *Wifipcap::WifiUtil::MgmtReasonCode2Txt(uint v) { return v < NUM_REASONS ? reason_text[v] : "Reserved"; } /////////////////////////////////////////////////////////////////////////////// // Jeff: HACK -- tcpdump uses a global variable to check truncation #define TTEST2(_p, _l) ((const u_char *)&(_p) - p + (_l) <= (ssize_t)len) void WifiPacket::parse_elements(struct mgmt_body_t *pbody, const u_char *p, int offset, size_t len) { /* * We haven't seen any elements yet. */ pbody->challenge_status = NOT_PRESENT; pbody->ssid_status = NOT_PRESENT; pbody->rates_status = NOT_PRESENT; pbody->ds_status = NOT_PRESENT; pbody->cf_status = NOT_PRESENT; pbody->tim_status = NOT_PRESENT; for (;;) { if (!TTEST2(*(p + offset), 1)) return; switch (*(p + offset)) { case E_SSID: /* Present, possibly truncated */ pbody->ssid_status = TRUNCATED; if (!TTEST2(*(p + offset), 2)) return; memcpy(&pbody->ssid, p + offset, 2); offset += 2; if (pbody->ssid.length != 0) { if (pbody->ssid.length > sizeof(pbody->ssid.ssid) - 1) return; if (!TTEST2(*(p + offset), pbody->ssid.length)) return; memcpy(&pbody->ssid.ssid, p + offset, pbody->ssid.length); offset += pbody->ssid.length; } pbody->ssid.ssid[pbody->ssid.length] = '\0'; /* Present and not truncated */ pbody->ssid_status = PRESENT; break; case E_CHALLENGE: /* Present, possibly truncated */ pbody->challenge_status = TRUNCATED; if (!TTEST2(*(p + offset), 2)) return; memcpy(&pbody->challenge, p + offset, 2); offset += 2; if (pbody->challenge.length != 0) { if (pbody->challenge.length > sizeof(pbody->challenge.text) - 1) return; if (!TTEST2(*(p + offset), pbody->challenge.length)) return; memcpy(&pbody->challenge.text, p + offset, pbody->challenge.length); offset += pbody->challenge.length; } pbody->challenge.text[pbody->challenge.length] = '\0'; /* Present and not truncated */ pbody->challenge_status = PRESENT; break; case E_RATES: /* Present, possibly truncated */ pbody->rates_status = TRUNCATED; if (!TTEST2(*(p + offset), 2)) return; memcpy(&(pbody->rates), p + offset, 2); offset += 2; if (pbody->rates.length != 0) { if (pbody->rates.length > sizeof pbody->rates.rate) return; if (!TTEST2(*(p + offset), pbody->rates.length)) return; memcpy(&pbody->rates.rate, p + offset, pbody->rates.length); offset += pbody->rates.length; } /* Present and not truncated */ pbody->rates_status = PRESENT; break; case E_DS: /* Present, possibly truncated */ pbody->ds_status = TRUNCATED; if (!TTEST2(*(p + offset), 3)) return; memcpy(&pbody->ds, p + offset, 3); offset += 3; /* Present and not truncated */ pbody->ds_status = PRESENT; break; case E_CF: /* Present, possibly truncated */ pbody->cf_status = TRUNCATED; if (!TTEST2(*(p + offset), 8)) return; memcpy(&pbody->cf, p + offset, 8); offset += 8; /* Present and not truncated */ pbody->cf_status = PRESENT; break; case E_TIM: /* Present, possibly truncated */ pbody->tim_status = TRUNCATED; if (!TTEST2(*(p + offset), 2)) return; memcpy(&pbody->tim, p + offset, 2); offset += 2; if (!TTEST2(*(p + offset), 3)) return; memcpy(&pbody->tim.count, p + offset, 3); offset += 3; if (pbody->tim.length <= 3) break; if (pbody->rates.length > sizeof pbody->tim.bitmap) return; if (!TTEST2(*(p + offset), pbody->tim.length - 3)) return; memcpy(pbody->tim.bitmap, p + (pbody->tim.length - 3), (pbody->tim.length - 3)); offset += pbody->tim.length - 3; /* Present and not truncated */ pbody->tim_status = PRESENT; break; default: #ifdef DEBUG_WIFI printf("(1) unhandled element_id (%d) ", *(p + offset) ); #endif if (!TTEST2(*(p + offset), 2)) return; if (!TTEST2(*(p + offset + 2), *(p + offset + 1))) return; offset += *(p + offset + 1) + 2; break; } } } /********************************************************************************* * Print Handle functions for the management frame types *********************************************************************************/ int WifiPacket::handle_beacon( const struct mgmt_header_t *pmh, const u_char *p, size_t len) { struct mgmt_body_t pbody; int offset = 0; memset(&pbody, 0, sizeof(pbody)); if (!TTEST2(*p, IEEE802_11_TSTAMP_LEN + IEEE802_11_BCNINT_LEN + IEEE802_11_CAPINFO_LEN)) return 0; memcpy(&pbody.timestamp, p, IEEE802_11_TSTAMP_LEN); offset += IEEE802_11_TSTAMP_LEN; pbody.beacon_interval = EXTRACT_LE_16BITS(p+offset); offset += IEEE802_11_BCNINT_LEN; pbody.capability_info = EXTRACT_LE_16BITS(p+offset); offset += IEEE802_11_CAPINFO_LEN; parse_elements(&pbody, p, offset, len); /* PRINT_SSID(pbody); PRINT_RATES(pbody); printf(" %s", CAPABILITY_ESS(pbody.capability_info) ? "ESS" : "IBSS"); PRINT_DS_CHANNEL(pbody); */ cbs->Handle80211MgmtBeacon(*this, pmh, &pbody); return 1; } int WifiPacket::handle_assoc_request( const struct mgmt_header_t *pmh, const u_char *p, size_t len) { struct mgmt_body_t pbody; int offset = 0; memset(&pbody, 0, sizeof(pbody)); if (!TTEST2(*p, IEEE802_11_CAPINFO_LEN + IEEE802_11_LISTENINT_LEN)) return 0; pbody.capability_info = EXTRACT_LE_16BITS(p); offset += IEEE802_11_CAPINFO_LEN; pbody.listen_interval = EXTRACT_LE_16BITS(p+offset); offset += IEEE802_11_LISTENINT_LEN; parse_elements(&pbody, p, offset, len); /* PRINT_SSID(pbody); PRINT_RATES(pbody); */ cbs->Handle80211MgmtAssocRequest(*this, pmh, &pbody); return 1; } int WifiPacket::handle_assoc_response( const struct mgmt_header_t *pmh, const u_char *p, size_t len, bool reassoc) { struct mgmt_body_t pbody; int offset = 0; memset(&pbody, 0, sizeof(pbody)); if (!TTEST2(*p, IEEE802_11_CAPINFO_LEN + IEEE802_11_STATUS_LEN + IEEE802_11_AID_LEN)) return 0; pbody.capability_info = EXTRACT_LE_16BITS(p); offset += IEEE802_11_CAPINFO_LEN; pbody.status_code = EXTRACT_LE_16BITS(p+offset); offset += IEEE802_11_STATUS_LEN; pbody.aid = EXTRACT_LE_16BITS(p+offset); offset += IEEE802_11_AID_LEN; parse_elements(&pbody, p, offset, len); /* printf(" AID(%x) :%s: %s", ((u_int16_t)(pbody.aid << 2 )) >> 2 , CAPABILITY_PRIVACY(pbody.capability_info) ? " PRIVACY " : "", (pbody.status_code < NUM_STATUSES ? status_text[pbody.status_code] : "n/a")); */ if (!reassoc) cbs->Handle80211MgmtAssocResponse(*this, pmh, &pbody); else cbs->Handle80211MgmtReassocResponse(*this, pmh, &pbody); return 1; } int WifiPacket::handle_reassoc_request( const struct mgmt_header_t *pmh, const u_char *p, size_t len) { struct mgmt_body_t pbody; int offset = 0; memset(&pbody, 0, sizeof(pbody)); if (!TTEST2(*p, IEEE802_11_CAPINFO_LEN + IEEE802_11_LISTENINT_LEN + IEEE802_11_AP_LEN)) return 0; pbody.capability_info = EXTRACT_LE_16BITS(p); offset += IEEE802_11_CAPINFO_LEN; pbody.listen_interval = EXTRACT_LE_16BITS(p+offset); offset += IEEE802_11_LISTENINT_LEN; memcpy(&pbody.ap, p+offset, IEEE802_11_AP_LEN); offset += IEEE802_11_AP_LEN; parse_elements(&pbody, p, offset, len); /* PRINT_SSID(pbody); printf(" AP : %s", etheraddr_string( pbody.ap )); */ cbs->Handle80211MgmtReassocRequest(*this, pmh, &pbody); return 1; } int WifiPacket::handle_reassoc_response( const struct mgmt_header_t *pmh, const u_char *p, size_t len) { /* Same as a Association Reponse */ return handle_assoc_response(pmh, p, len, true); } int WifiPacket::handle_probe_request( const struct mgmt_header_t *pmh, const u_char *p, size_t len) { struct mgmt_body_t pbody; int offset = 0; memset(&pbody, 0, sizeof(pbody)); parse_elements(&pbody, p, offset, len); /* PRINT_SSID(pbody); PRINT_RATES(pbody); */ cbs->Handle80211MgmtProbeRequest(*this, pmh, &pbody); return 1; } int WifiPacket::handle_probe_response( const struct mgmt_header_t *pmh, const u_char *p, size_t len) { struct mgmt_body_t pbody; int offset = 0; memset(&pbody, 0, sizeof(pbody)); if (!TTEST2(*p, IEEE802_11_TSTAMP_LEN + IEEE802_11_BCNINT_LEN + IEEE802_11_CAPINFO_LEN)) return 0; memcpy(&pbody.timestamp, p, IEEE802_11_TSTAMP_LEN); offset += IEEE802_11_TSTAMP_LEN; pbody.beacon_interval = EXTRACT_LE_16BITS(p+offset); offset += IEEE802_11_BCNINT_LEN; pbody.capability_info = EXTRACT_LE_16BITS(p+offset); offset += IEEE802_11_CAPINFO_LEN; parse_elements(&pbody, p, offset, len); /* PRINT_SSID(pbody); PRINT_RATES(pbody); PRINT_DS_CHANNEL(pbody); */ cbs->Handle80211MgmtProbeResponse(*this, pmh, &pbody); return 1; } int WifiPacket::handle_atim( const struct mgmt_header_t *pmh, const u_char *p, size_t len) { /* the frame body for ATIM is null. */ cbs->Handle80211MgmtATIM(*this, pmh); return 1; } int WifiPacket::handle_disassoc( const struct mgmt_header_t *pmh, const u_char *p, size_t len) { struct mgmt_body_t pbody; memset(&pbody, 0, sizeof(pbody)); if (!TTEST2(*p, IEEE802_11_REASON_LEN)) return 0; pbody.reason_code = EXTRACT_LE_16BITS(p); /* printf(": %s", (pbody.reason_code < NUM_REASONS) ? reason_text[pbody.reason_code] : "Reserved" ); */ cbs->Handle80211MgmtDisassoc(*this, pmh, &pbody); return 1; } int WifiPacket::handle_auth( const struct mgmt_header_t *pmh, const u_char *p, size_t len) { struct mgmt_body_t pbody; int offset = 0; memset(&pbody, 0, sizeof(pbody)); if (!TTEST2(*p, 6)) return 0; pbody.auth_alg = EXTRACT_LE_16BITS(p); offset += 2; pbody.auth_trans_seq_num = EXTRACT_LE_16BITS(p + offset); offset += 2; pbody.status_code = EXTRACT_LE_16BITS(p + offset); offset += 2; parse_elements(&pbody, p, offset, len); /* if ((pbody.auth_alg == 1) && ((pbody.auth_trans_seq_num == 2) || (pbody.auth_trans_seq_num == 3))) { printf(" (%s)-%x [Challenge Text] %s", (pbody.auth_alg < NUM_AUTH_ALGS) ? auth_alg_text[pbody.auth_alg] : "Reserved", pbody.auth_trans_seq_num, ((pbody.auth_trans_seq_num % 2) ? ((pbody.status_code < NUM_STATUSES) ? status_text[pbody.status_code] : "n/a") : "")); return 1; } printf(" (%s)-%x: %s", (pbody.auth_alg < NUM_AUTH_ALGS) ? auth_alg_text[pbody.auth_alg] : "Reserved", pbody.auth_trans_seq_num, (pbody.auth_trans_seq_num % 2) ? ((pbody.status_code < NUM_STATUSES) ? status_text[pbody.status_code] : "n/a") : ""); */ cbs->Handle80211MgmtAuth(*this, pmh, &pbody); return 1; } int WifiPacket::handle_deauth( const struct mgmt_header_t *pmh, const u_char *p, size_t len) { struct mgmt_body_t pbody; int offset = 0; //const char *reason = NULL; memset(&pbody, 0, sizeof(pbody)); if (!TTEST2(*p, IEEE802_11_REASON_LEN)) return 0; pbody.reason_code = EXTRACT_LE_16BITS(p); offset += IEEE802_11_REASON_LEN; /* reason = (pbody.reason_code < NUM_REASONS) ? reason_text[pbody.reason_code] : "Reserved"; if (eflag) { printf(": %s", reason); } else { printf(" (%s): %s", etheraddr_string(pmh->sa), reason); } */ cbs->Handle80211MgmtDeauth(*this, pmh, &pbody); return 1; } /********************************************************************************* * Print Body funcs *********************************************************************************/ /** Decode a management request. * @return 0 - failure, non-zero success * * NOTE — this function and all that it calls should be handled as methods in WifipcapCallbacks */ int WifiPacket::decode_mgmt_body(u_int16_t fc, struct mgmt_header_t *pmh, const u_char *p, size_t len) { if(debug) std::cerr << "decode_mgmt_body FC_SUBTYPE(fc)="<<(int)FC_SUBTYPE(fc)<<" "; switch (FC_SUBTYPE(fc)) { case ST_ASSOC_REQUEST: return handle_assoc_request(pmh, p, len); case ST_ASSOC_RESPONSE: return handle_assoc_response(pmh, p, len); case ST_REASSOC_REQUEST: return handle_reassoc_request(pmh, p, len); case ST_REASSOC_RESPONSE: return handle_reassoc_response(pmh, p, len); case ST_PROBE_REQUEST: return handle_probe_request(pmh, p, len); case ST_PROBE_RESPONSE: return handle_probe_response(pmh, p, len); case ST_BEACON: return handle_beacon(pmh, p, len); case ST_ATIM: return handle_atim(pmh, p, len); case ST_DISASSOC: return handle_disassoc(pmh, p, len); case ST_AUTH: if (len < 3) { return 0; } if ((p[0] == 0 ) && (p[1] == 0) && (p[2] == 0)) { //printf("Authentication (Shared-Key)-3 "); cbs->Handle80211MgmtAuthSharedKey(*this, pmh, p, len); return 0; } return handle_auth(pmh, p, len); case ST_DEAUTH: return handle_deauth(pmh, p, len); break; default: return 0; } } int WifiPacket::decode_mgmt_frame(const u_char * ptr, size_t len, u_int16_t fc, u_int8_t hdrlen) { mgmt_header_t hdr; u_int16_t seq_ctl; hdr.da = MAC::ether2MAC(ptr + 4); hdr.sa = MAC::ether2MAC(ptr + 10); hdr.bssid = MAC::ether2MAC(ptr + 16); hdr.duration = EXTRACT_LE_16BITS(ptr+2); seq_ctl = pletohs(ptr + 22); hdr.seq = COOK_SEQUENCE_NUMBER(seq_ctl); hdr.frag = COOK_FRAGMENT_NUMBER(seq_ctl); cbs->Handle80211(*this, fc, hdr.sa, hdr.da, MAC::null, MAC::null, ptr, len); int ret = decode_mgmt_body(fc, &hdr, ptr+MGMT_HDRLEN, len-MGMT_HDRLEN); if (ret==0) { cbs->Handle80211Unknown(*this, fc, ptr, len); return 0; } return 0; } int WifiPacket::decode_data_frame(const u_char * ptr, size_t len, u_int16_t fc) { mac_hdr_t hdr; hdr.fc = fc; hdr.duration = EXTRACT_LE_16BITS(ptr+2); hdr.seq_ctl = pletohs(ptr + 22); hdr.seq = COOK_SEQUENCE_NUMBER(hdr.seq_ctl); hdr.frag = COOK_FRAGMENT_NUMBER(hdr.seq_ctl); if(FC_TYPE(fc)==2 && FC_SUBTYPE(fc)==8){ // quality of service? hdr.qos = 1; } size_t hdrlen=0; const MAC address1 = MAC::ether2MAC(ptr+4); const MAC address2 = MAC::ether2MAC(ptr+10); const MAC address3 = MAC::ether2MAC(ptr+16); /* call the 80211 callback data callback */ if (FC_TO_DS(fc)==0 && FC_FROM_DS(fc)==0) { /* ad hoc IBSS */ hdr.da = address1; hdr.sa = address2; hdr.bssid = address3; hdrlen = DATA_HDRLEN; if(hdr.qos) hdrlen+=2; cbs->Handle80211( *this, fc, hdr.sa, hdr.da, hdr.ra, hdr.ta, ptr, len); cbs->Handle80211DataIBSS( *this, hdr, ptr+hdrlen, len-hdrlen); } else if (FC_TO_DS(fc)==0 && FC_FROM_DS(fc)) { /* from AP to STA */ hdr.da = address1; hdr.bssid = address2; hdr.sa = address3; hdrlen = DATA_HDRLEN; if(hdr.qos) hdrlen+=2; cbs->Handle80211( *this, fc, hdr.sa, hdr.da, hdr.ra, hdr.ta, ptr, len); cbs->Handle80211DataFromAP( *this, hdr, ptr+hdrlen, len-hdrlen); } else if (FC_TO_DS(fc) && FC_FROM_DS(fc)==0) { /* frame from STA to AP */ hdr.bssid = address1; hdr.sa = address2; hdr.da = address3; hdrlen = DATA_HDRLEN; if(hdr.qos) hdrlen+=2; cbs->Handle80211( *this, fc, hdr.sa, hdr.da, hdr.ra, hdr.ta, ptr, len); cbs->Handle80211DataToAP( *this, hdr, ptr+hdrlen, len-hdrlen); } else if (FC_TO_DS(fc) && FC_FROM_DS(fc)) { /* WDS */ const MAC address4 = MAC::ether2MAC(ptr+18); hdr.ra = address1; hdr.ta = address2; hdr.da = address3; hdr.sa = address4; hdrlen = DATA_WDS_HDRLEN; if(hdr.qos) hdrlen+=2; cbs->Handle80211( *this, fc, hdr.sa, hdr.da, hdr.ra, hdr.ta, ptr, len); cbs->Handle80211DataWDS( *this, hdr, ptr+hdrlen, len-hdrlen); } /* Handle either the WEP or the link layer. This handles the data itself */ if (FC_WEP(fc)) { handle_wep(ptr+hdrlen, len-hdrlen-4 ); } else { handle_llc(hdr, ptr+hdrlen, len-hdrlen-4, fc); } return 0; } int WifiPacket::decode_ctrl_frame(const u_char * ptr, size_t len, u_int16_t fc) { u_int16_t du = EXTRACT_LE_16BITS(ptr+2); //duration switch (FC_SUBTYPE(fc)) { case CTRL_PS_POLL: { ctrl_ps_poll_t hdr; hdr.fc = fc; hdr.aid = du; hdr.bssid = MAC::ether2MAC(ptr+4); hdr.ta = MAC::ether2MAC(ptr+10); cbs->Handle80211( *this, fc, MAC::null, MAC::null, MAC::null, hdr.ta, ptr, len); cbs->Handle80211CtrlPSPoll( *this, &hdr); break; } case CTRL_RTS: { ctrl_rts_t hdr; hdr.fc = fc; hdr.duration = du; hdr.ra = MAC::ether2MAC(ptr+4); hdr.ta = MAC::ether2MAC(ptr+10); cbs->Handle80211( *this, fc, MAC::null, MAC::null, hdr.ra, hdr.ta, ptr, len); cbs->Handle80211CtrlRTS( *this, &hdr); break; } case CTRL_CTS: { ctrl_cts_t hdr; hdr.fc = fc; hdr.duration = du; hdr.ra = MAC::ether2MAC(ptr+4); cbs->Handle80211( *this, fc, MAC::null, MAC::null, hdr.ra, MAC::null, ptr, len); cbs->Handle80211CtrlCTS( *this, &hdr); break; } case CTRL_ACK: { ctrl_ack_t hdr; hdr.fc = fc; hdr.duration = du; hdr.ra = MAC::ether2MAC(ptr+4); cbs->Handle80211( *this, fc, MAC::null, MAC::null, hdr.ra, MAC::null, ptr, len); cbs->Handle80211CtrlAck( *this, &hdr); break; } case CTRL_CF_END: { ctrl_end_t hdr; hdr.fc = fc; hdr.duration = du; hdr.ra = MAC::ether2MAC(ptr+4); hdr.bssid = MAC::ether2MAC(ptr+10); cbs->Handle80211( *this, fc, MAC::null, MAC::null, hdr.ra, MAC::null, ptr, len); cbs->Handle80211CtrlCFEnd( *this, &hdr); break; } case CTRL_END_ACK: { ctrl_end_ack_t hdr; hdr.fc = fc; hdr.duration = du; hdr.ra = MAC::ether2MAC(ptr+4); hdr.bssid = MAC::ether2MAC(ptr+10); cbs->Handle80211( *this, fc, MAC::null, MAC::null, hdr.ra, MAC::null, ptr, len); cbs->Handle80211CtrlEndAck( *this, &hdr); break; } default: { cbs->Handle80211( *this, fc, MAC::null, MAC::null, MAC::null, MAC::null, ptr, len); cbs->Handle80211Unknown( *this, fc, ptr, len); return -1; //add the case statements for QoS control frames once ieee802_11.h is updated } } return 0; } #ifndef roundup2 #define roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ #endif void WifiPacket::handle_80211(const u_char * pkt, size_t len /* , int pad */) { if (debug) std::cerr << "handle_80211(len= " << len << " "; if (len < 2) { cbs->Handle80211( *this, 0, MAC::null, MAC::null, MAC::null, MAC::null, pkt, len); cbs->Handle80211Unknown( *this, -1, pkt, len); return; } u_int16_t fc = EXTRACT_LE_16BITS(pkt); //frame control size_t hdrlen = extract_header_length(fc); /* if (pad) { hdrlen = roundup2(hdrlen, 4); } */ if (debug) std::cerr << "FC_TYPE(fc)= " << FC_TYPE(fc) << " "; if (len < IEEE802_11_FC_LEN || len < hdrlen) { cbs->Handle80211Unknown( *this, fc, pkt, len); return; } /* Always calculate the frame checksum, but only process the packets if the FCS or if we are ignoring it */ if (len >= hdrlen + 4) { // assume fcs is last 4 bytes (?) u_int32_t fcs_sent = EXTRACT_32BITS(pkt+len-4); u_int32_t fcs = crc32_802(pkt, len-4); /* if (fcs != fcs_sent) { cerr << "bad fcs: "; fprintf (stderr, "%08x != %08x\n", fcs_sent, fcs); } */ fcs_ok = (fcs == fcs_sent); } if (cbs->Check80211FCS(*this) && fcs_ok==false){ cbs->Handle80211Unknown(*this,fc,pkt,len); return; } // fill in current_frame: type, sn switch (FC_TYPE(fc)) { case T_MGMT: if(decode_mgmt_frame(pkt, len, fc, hdrlen)<0) return; break; case T_DATA: if(decode_data_frame(pkt, len, fc)<0) return; break; case T_CTRL: if(decode_ctrl_frame(pkt, len, fc)<0) return; break; default: cbs->Handle80211( *this, fc, MAC::null, MAC::null, MAC::null, MAC::null, pkt, len); cbs->Handle80211Unknown( *this, fc, pkt, len); return; } } int WifiPacket::print_radiotap_field(struct cpack_state *s, u_int32_t bit, int *pad, radiotap_hdr *hdr) { union { int8_t i8; u_int8_t u8; int16_t i16; u_int16_t u16; u_int32_t u32; u_int64_t u64; } u, u2, u3; int rc; switch (bit) { case IEEE80211_RADIOTAP_FLAGS: rc = cpack_uint8(s, &u.u8); if (u.u8 & IEEE80211_RADIOTAP_F_DATAPAD) *pad = 1; break; case IEEE80211_RADIOTAP_RATE: case IEEE80211_RADIOTAP_DB_ANTSIGNAL: case IEEE80211_RADIOTAP_DB_ANTNOISE: case IEEE80211_RADIOTAP_ANTENNA: rc = cpack_uint8(s, &u.u8); break; case IEEE80211_RADIOTAP_DBM_ANTSIGNAL: case IEEE80211_RADIOTAP_DBM_ANTNOISE: rc = cpack_int8(s, &u.i8); break; case IEEE80211_RADIOTAP_CHANNEL: rc = cpack_uint16(s, &u.u16); if (rc != 0) break; rc = cpack_uint16(s, &u2.u16); break; case IEEE80211_RADIOTAP_FHSS: case IEEE80211_RADIOTAP_LOCK_QUALITY: case IEEE80211_RADIOTAP_TX_ATTENUATION: rc = cpack_uint16(s, &u.u16); break; case IEEE80211_RADIOTAP_DB_TX_ATTENUATION: rc = cpack_uint8(s, &u.u8); break; case IEEE80211_RADIOTAP_DBM_TX_POWER: rc = cpack_int8(s, &u.i8); break; case IEEE80211_RADIOTAP_TSFT: rc = cpack_uint64(s, &u.u64); break; case IEEE80211_RADIOTAP_RX_FLAGS: rc = cpack_uint16(s, &u.u16); break; case IEEE80211_RADIOTAP_TX_FLAGS: rc = cpack_uint16(s, &u.u16); break; case IEEE80211_RADIOTAP_RTS_RETRIES: rc = cpack_uint8(s, &u.u8); break; case IEEE80211_RADIOTAP_DATA_RETRIES: rc = cpack_uint8(s, &u.u8); break; // simson add follows: case IEEE80211_RADIOTAP_XCHANNEL: rc = cpack_uint8(s, &u.u8); // simson guess break; case IEEE80211_RADIOTAP_MCS: rc = cpack_uint8(s, &u.u8) || cpack_uint8(s, &u2.u8) || cpack_uint8(s, &u3.u8); // simson guess break; // simson end default: /* this bit indicates a field whose * size we do not know, so we cannot * proceed. */ //printf("[0x%08x] ", bit); fprintf(stderr, "wifipcap: unknown radiotap bit: %d (%d)\n", bit,IEEE80211_RADIOTAP_XCHANNEL); return -1 ; } if (rc != 0) { //printf("[|802.11]"); fprintf(stderr, "wifipcap: truncated radiotap header for bit: %d\n", bit); return rc ; } switch (bit) { case IEEE80211_RADIOTAP_CHANNEL: //printf("%u MHz ", u.u16); if (u2.u16 != 0) //printf("(0x%04x) ", u2.u16); hdr->has_channel = true; hdr->channel = u2.u16; break; case IEEE80211_RADIOTAP_FHSS: //printf("fhset %d fhpat %d ", u.u16 & 0xff, (u.u16 >> 8) & 0xff); hdr->has_fhss = true; hdr->fhss_fhset = u.u16 & 0xff; hdr->fhss_fhpat = (u.u16 >> 8) & 0xff; break; case IEEE80211_RADIOTAP_RATE: //PRINT_RATE("", u.u8, " Mb/s "); hdr->has_rate = true; hdr->rate = u.u8; break; case IEEE80211_RADIOTAP_DBM_ANTSIGNAL: //printf("%ddB signal ", u.i8); hdr->has_signal_dbm = true; hdr->signal_dbm = u.i8; break; case IEEE80211_RADIOTAP_DBM_ANTNOISE: //printf("%ddB noise ", u.i8); hdr->has_noise_dbm = true; hdr->noise_dbm = u.i8; break; case IEEE80211_RADIOTAP_DB_ANTSIGNAL: //printf("%ddB signal ", u.u8); hdr->has_signal_db = true; hdr->signal_db = u.u8; break; case IEEE80211_RADIOTAP_DB_ANTNOISE: //printf("%ddB noise ", u.u8); hdr->has_noise_db = true; hdr->noise_db = u.u8; break; case IEEE80211_RADIOTAP_LOCK_QUALITY: //printf("%u sq ", u.u16); hdr->has_quality = true; hdr->quality = u.u16; break; case IEEE80211_RADIOTAP_TX_ATTENUATION: //printf("%d tx power ", -(int)u.u16); hdr->has_txattenuation = true; hdr->txattenuation = -(int)u.u16; break; case IEEE80211_RADIOTAP_DB_TX_ATTENUATION: //printf("%ddB tx power ", -(int)u.u8); hdr->has_txattenuation_db = true; hdr->txattenuation_db = -(int)u.u8; break; case IEEE80211_RADIOTAP_DBM_TX_POWER: //printf("%ddBm tx power ", u.i8); hdr->has_txpower_dbm = true; hdr->txpower_dbm = u.i8; break; case IEEE80211_RADIOTAP_FLAGS: hdr->has_flags = true; if (u.u8 & IEEE80211_RADIOTAP_F_CFP) //printf("cfp "); hdr->flags_cfp = true; if (u.u8 & IEEE80211_RADIOTAP_F_SHORTPRE) //printf("short preamble "); hdr->flags_short_preamble = true; if (u.u8 & IEEE80211_RADIOTAP_F_WEP) //printf("wep "); hdr->flags_wep = true; if (u.u8 & IEEE80211_RADIOTAP_F_FRAG) //printf("fragmented "); hdr->flags_fragmented = true; if (u.u8 & IEEE80211_RADIOTAP_F_BADFCS) //printf("bad-fcs "); hdr->flags_badfcs = true; break; case IEEE80211_RADIOTAP_ANTENNA: //printf("antenna %d ", u.u8); hdr->has_antenna = true; hdr->antenna = u.u8; break; case IEEE80211_RADIOTAP_TSFT: //printf("%" PRIu64 "us tsft ", u.u64); hdr->has_tsft = true; hdr->tsft = u.u64; break; case IEEE80211_RADIOTAP_RX_FLAGS: hdr->has_rxflags = true; hdr->rxflags = u.u16; break; case IEEE80211_RADIOTAP_TX_FLAGS: hdr->has_txflags = true; hdr->txflags = u.u16; break; case IEEE80211_RADIOTAP_RTS_RETRIES: hdr->has_rts_retries = true; hdr->rts_retries = u.u8; break; case IEEE80211_RADIOTAP_DATA_RETRIES: hdr->has_data_retries = true; hdr->data_retries = u.u8; break; } return 0 ; } void WifiPacket::handle_radiotap(const u_char *p,size_t caplen) { #define BITNO_32(x) (((x) >> 16) ? 16 + BITNO_16((x) >> 16) : BITNO_16((x))) #define BITNO_16(x) (((x) >> 8) ? 8 + BITNO_8((x) >> 8) : BITNO_8((x))) #define BITNO_8(x) (((x) >> 4) ? 4 + BITNO_4((x) >> 4) : BITNO_4((x))) #define BITNO_4(x) (((x) >> 2) ? 2 + BITNO_2((x) >> 2) : BITNO_2((x))) #define BITNO_2(x) (((x) & 2) ? 1 : 0) #define BIT(n) (1 << n) #define IS_EXTENDED(__p) (EXTRACT_LE_32BITS(__p) & BIT(IEEE80211_RADIOTAP_EXT)) != 0 // If caplen is too small, just give it a try and carry on. if (caplen < sizeof(struct ieee80211_radiotap_header)) { cbs->HandleRadiotap( *this, NULL, p, caplen); return; } struct ieee80211_radiotap_header *hdr = (struct ieee80211_radiotap_header *)p; size_t len = EXTRACT_LE_16BITS(&hdr->it_len); // length of radiotap header if (caplen < len) { //printf("[|802.11]"); cbs->HandleRadiotap( *this, NULL, p, caplen); return;// caplen; } uint32_t *last_presentp=0; for (last_presentp = &hdr->it_present; IS_EXTENDED(last_presentp) && (u_char*)(last_presentp + 1) <= p + len; last_presentp++){ } /* are there more bitmap extensions than bytes in header? */ if (IS_EXTENDED(last_presentp)) { //printf("[|802.11]"); cbs->HandleRadiotap( *this, NULL, p, caplen); return;// caplen; } const u_char *iter = (u_char*)(last_presentp + 1); struct cpack_state cpacker; if (cpack_init(&cpacker, (u_int8_t*)iter, len - (iter - p)) != 0) { /* XXX */ //printf("[|802.11]"); cbs->HandleRadiotap( *this, NULL, p, caplen); return;// caplen; } radiotap_hdr ohdr; memset(&ohdr, 0, sizeof(ohdr)); /* Assume no Atheros padding between 802.11 header and body */ int pad = 0; uint32_t *presentp; int bit0=0; for (bit0 = 0, presentp = &hdr->it_present; presentp <= last_presentp; presentp++, bit0 += 32) { u_int32_t present, next_present; for (present = EXTRACT_LE_32BITS(presentp); present; present = next_present) { /* clear the least significant bit that is set */ next_present = present & (present - 1); /* extract the least significant bit that is set */ enum ieee80211_radiotap_type bit = (enum ieee80211_radiotap_type) (bit0 + BITNO_32(present ^ next_present)); /* print the next radiotap field */ int r = print_radiotap_field(&cpacker, bit, &pad, &ohdr); /* If we got an error, break both loops */ if(r!=0) goto done; } } done:; cbs->HandleRadiotap( *this, &ohdr, p, caplen); //return len + ieee802_11_print(p + len, length - len, caplen - len, pad); #undef BITNO_32 #undef BITNO_16 #undef BITNO_8 #undef BITNO_4 #undef BITNO_2 #undef BIT handle_80211(p+len, caplen-len); } void WifiPacket::handle_prism(const u_char *pc, size_t len) { prism2_pkthdr hdr; /* get the fields */ hdr.host_time = EXTRACT_LE_32BITS(pc+32); hdr.mac_time = EXTRACT_LE_32BITS(pc+44); hdr.channel = EXTRACT_LE_32BITS(pc+56); hdr.rssi = EXTRACT_LE_32BITS(pc+68); hdr.sq = EXTRACT_LE_32BITS(pc+80); hdr.signal = EXTRACT_LE_32BITS(pc+92); hdr.noise = EXTRACT_LE_32BITS(pc+104); hdr.rate = EXTRACT_LE_32BITS(pc+116)/2; hdr.istx = EXTRACT_LE_32BITS(pc+128); cbs->HandlePrism( *this, &hdr, pc + 144, len - 144); handle_80211(pc+144,len-144); } /////////////////////////////////////////////////////////////////////////////// /// /// handle_*: /// handle each of the packet types /// void WifiPacket::handle_ether(const u_char *ptr, size_t len) { #if 0 ether_hdr_t hdr; hdr.da = MAC::ether2MAC(ptr); hdr.sa = MAC::ether2MAC(ptr+6); hdr.type = EXTRACT_16BITS(ptr + 12); ptr += 14; len -= 14; cbs->HandleEthernet(*this, &hdr, ptr, len); switch (hdr.type) { case ETHERTYPE_IP: handle_ip(ptr, len); return; case ETHERTYPE_IPV6: handle_ip6(ptr, len); return; case ETHERTYPE_ARP: handle_arp( ptr, len); return; default: cbs->HandleL2Unknown(*this, hdr.type, ptr, len); return; } #endif } /////////////////////////////////////////////////////////////////////////////// /* These are all static functions */ #if 0 void Wifipcap::dl_prism(const PcapUserData &data, const struct pcap_pkthdr *header, const u_char * packet) { WifipcapCallbacks *cbs = data.cbs; if(header->caplen < 144) return; // prism header cbs->PacketBegin( packet, header->caplen, header->len); handle_prism(cbs,packet+144,header->caplen-144); cbs->PacketEnd(); } void Wifipcap::dl_prism(u_char *user, const struct pcap_pkthdr *header, const u_char * packet) { PcapUserData *data = reinterpret_cast(user); Wifipcap::dl_prism(*data,header,packet); } #endif #if 0 void Wifipcap::dl_ieee802_11_radio(const PcapUserData &data, const struct pcap_pkthdr *header, const u_char * packet) { data.cbs->PacketBegin( packet, header->caplen, header->len); handle_radiotap(packet, header->caplen); data.cbs->PacketEnd(); } #endif void Wifipcap::dl_ieee802_11_radio(const u_char *user, const struct pcap_pkthdr *header, const u_char * packet) { const PcapUserData *data = reinterpret_cast(user); WifiPacket pkt(data->cbs,data->header_type,header,packet); data->cbs->PacketBegin(pkt,packet,header->caplen,header->len); pkt.handle_radiotap(packet,header->caplen); data->cbs->PacketEnd(pkt); //Wifipcap::dl_ieee802_11_radio(*data,header,packet); } /////////////////////////////////////////////////////////////////////////////// /* None of these are used in tcpflow */ bool Wifipcap::InitNext() { if (morefiles.size() < 1){ return false; } if (descr) { pcap_close(descr); } Init(morefiles.front(), false); morefiles.pop_front(); return true; } void Wifipcap::Init(const char *name, bool live) { if (verbose){ std::cerr << "wifipcap: initializing '" << name << "'" << std::endl; } if (!live) { #ifdef _WIN32 std::cerr << "Trace replay is unsupported in windows." << std::endl; exit(1); #else // mini hack: handle gziped files since all our traces are in // this format int slen = strlen(name); bool gzip = !strcmp(name+slen-3, ".gz"); bool bzip = !strcmp(name+slen-4, ".bz2"); char cmd[256]; char errbuf[256]; if (gzip) sprintf(cmd, "zcat %s", name); else if (bzip) sprintf(cmd, "bzcat %s", name); else // using cat here instead of pcap_open or fopen is intentional // neither of these may be able to handle large files (>2GB files) // but cat uses the linux routines to allow it to sprintf(cmd, "cat %s", name); FILE *pipe = popen(cmd, "r"); if (pipe == NULL) { printf("popen(): %s\n", strerror(errno)); exit(1); } descr = pcap_fopen_offline(pipe, errbuf); if(descr == NULL) { printf("pcap_open_offline(): %s\n", errbuf); exit(1); } #endif } else { char errbuf[256]; descr = pcap_open_live(name,BUFSIZ,1,-1,errbuf); if(descr == NULL) { printf("pcap_open_live(): %s\n", errbuf); exit(1); } } datalink = pcap_datalink(descr); if (datalink != DLT_PRISM_HEADER && datalink != DLT_IEEE802_11_RADIO && datalink != DLT_IEEE802_11) { if (datalink == DLT_EN10MB) { printf("warning: ethernet datalink type: %s\n", pcap_datalink_val_to_name(datalink)); } else { printf("warning: unrecognized datalink type: %s\n", pcap_datalink_val_to_name(datalink)); } } } /* object-oriented version of pcap callback. Called with the callbacks object, * the DLT type, the header and the packet. * This is the main packet processor. * It records some stats and then dispatches to the appropriate callback. */ void Wifipcap::handle_packet(WifipcapCallbacks *cbs,int header_type, const struct pcap_pkthdr *header, const u_char * packet) { /* Record start time if we don't have it */ if (startTime == TIME_NONE) { startTime = header->ts; lastPrintTime = header->ts; } /* Print stats if necessary */ if (header->ts.tv_sec > lastPrintTime.tv_sec + Wifipcap::PRINT_TIME_INTERVAL) { if (verbose) { int hours = (header->ts.tv_sec - startTime.tv_sec)/3600; int days = hours/24; int left = hours%24; fprintf(stderr, "wifipcap: %2d days %2d hours, %10" PRId64 " pkts\n", days, left, packetsProcessed); } lastPrintTime = header->ts; } packetsProcessed++; /* Create the packet object and call the appropriate callbacks */ WifiPacket pkt(cbs,header_type,header,packet); /* Notify callback */ cbs->PacketBegin(pkt, packet, header->caplen, header->len); //int frameLen = header->caplen; switch(header_type) { case DLT_PRISM_HEADER: pkt.handle_prism(packet,header->caplen); break; case DLT_IEEE802_11_RADIO: pkt.handle_radiotap(packet,header->caplen); break; case DLT_IEEE802_11: pkt.handle_80211(packet,header->caplen); break; case DLT_EN10MB: pkt.handle_ether(packet,header->caplen); break; default: #if 0 // try handling it as default IP assuming framing is ethernet // (this is for testing) pkt.handle_ip(packet,header->caplen); #endif break; } cbs->PacketEnd(pkt); } /* The raw callback from pcap; jump back into the object-oriented domain */ /* note: u_char *user may not be const according to spec */ void Wifipcap::handle_packet_callback(u_char *user, const struct pcap_pkthdr *header, const u_char * packet) { Wifipcap::PcapUserData *data = reinterpret_cast(user); data->wcap->handle_packet(data->cbs,data->header_type,header,packet); } const char *Wifipcap::SetFilter(const char *filter) { struct bpf_program fp; bpf_u_int32 netp=PCAP_NETMASK_UNKNOWN; if(pcap_compile(descr,&fp,(char *)filter,0,netp) == -1) { return "Error calling pcap_compile"; } if(pcap_setfilter(descr,&fp) == -1) { return "Error setting filter"; } return NULL; } void Wifipcap::Run(WifipcapCallbacks *cbs, int maxpkts) { /* NOTE: This needs to be fixed so that the correct handle_packet is called */ packetsProcessed = 0; do { PcapUserData data(this,cbs,DLT_IEEE802_11_RADIO); pcap_loop(descr, maxpkts > 0 ? maxpkts - packetsProcessed : 0, Wifipcap::handle_packet_callback, reinterpret_cast(&data)); } while ( InitNext() ); } /////////////////////////////////////////////////////////////////////////////// tcpflow/src/wifipcap/arp.h0000644000175000017500000000672412263701151014475 0ustar dimadima/* * Copyright (c) 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that: (1) source code distributions * retain the above copyright notice and this paragraph in its entirety, (2) * distributions including binary code include the above copyright notice and * this paragraph in its entirety in the documentation or other materials * provided with the distribution, and (3) all advertising materials mentioning * features or use of this software display the following acknowledgement: * ``This product includes software developed by the University of California, * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of * the University nor the names of its contributors may be used to endorse * or promote products derived from this software without specific prior * written permission. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ /* * Address Resolution Protocol. * * See RFC 826 for protocol description. ARP packets are variable * in size; the arphdr structure defines the fixed-length portion. * Protocol type values are the same as those for 10 Mb/s Ethernet. * It is followed by the variable-sized fields ar_sha, arp_spa, * arp_tha and arp_tpa in that order, according to the lengths * specified. Field names used correspond to RFC 826. */ struct arp_pkthdr { u_short ar_hrd; /* format of hardware address */ #define ARPHRD_ETHER 1 /* ethernet hardware format */ #define ARPHRD_IEEE802 6 /* token-ring hardware format */ #define ARPHRD_ARCNET 7 /* arcnet hardware format */ #define ARPHRD_FRELAY 15 /* frame relay hardware format */ #define ARPHRD_STRIP 23 /* Ricochet Starmode Radio hardware format */ #define ARPHRD_IEEE1394 24 /* IEEE 1394 (FireWire) hardware format */ u_short ar_pro; /* format of protocol address */ u_char ar_hln; /* length of hardware address */ u_char ar_pln; /* length of protocol address */ u_short ar_op; /* one of: */ #define ARPOP_REQUEST 1 /* request to resolve address */ #define ARPOP_REPLY 2 /* response to previous request */ #define ARPOP_REVREQUEST 3 /* request protocol address given hardware */ #define ARPOP_REVREPLY 4 /* response giving protocol address */ #define ARPOP_INVREQUEST 8 /* request to identify peer */ #define ARPOP_INVREPLY 9 /* response identifying peer */ /* * The remaining fields are variable in size, * according to the sizes above. */ #ifdef COMMENT_ONLY u_char ar_sha[]; /* sender hardware address */ u_char ar_spa[]; /* sender protocol address */ u_char ar_tha[]; /* target hardware address */ u_char ar_tpa[]; /* target protocol address */ #endif #define ar_sha(ap) (((const u_char *)((ap)+1))+0) #define ar_spa(ap) (((const u_char *)((ap)+1))+ (ap)->ar_hln) #define ar_tha(ap) (((const u_char *)((ap)+1))+ (ap)->ar_hln+(ap)->ar_pln) #define ar_tpa(ap) (((const u_char *)((ap)+1))+2*(ap)->ar_hln+(ap)->ar_pln) }; #define ARP_HDRLEN 8 #define HRD(ap) EXTRACT_16BITS(&(ap)->ar_hrd) #define HLN(ap) ((ap)->ar_hln) #define PLN(ap) ((ap)->ar_pln) #define OP(ap) EXTRACT_16BITS(&(ap)->ar_op) #define PRO(ap) EXTRACT_16BITS(&(ap)->ar_pro) #define SHA(ap) (ar_sha(ap)) #define SPA(ap) (ar_spa(ap)) #define THA(ap) (ar_tha(ap)) #define TPA(ap) (ar_tpa(ap)) tcpflow/src/wifipcap/ether.h0000644000175000017500000000007412263701151015012 0ustar dimadima struct ether_hdr_t { MAC sa, da; uint16_t type; }; tcpflow/src/wifipcap/os.h0000644000175000017500000000056312263701151014327 0ustar dimadima#pragma once #ifdef _WIN32 #define _PACKED_ #include #define u_int8_t UCHAR #define u_int16_t USHORT #define u_int32_t ULONG #define u_int64_t ULONGLONG #define int8_t CHAR #define int16_t SHORT #define int32_t LONG #define int64_t LONGLONG #define u_char UCHAR #else #define _PACKED_ __attribute__((__packed__)) #endif tcpflow/src/wifipcap/llc.h0000644000175000017500000000730612263701151014462 0ustar dimadima/* * Copyright (c) 1993, 1994, 1997 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that: (1) source code distributions * retain the above copyright notice and this paragraph in its entirety, (2) * distributions including binary code include the above copyright notice and * this paragraph in its entirety in the documentation or other materials * provided with the distribution, and (3) all advertising materials mentioning * features or use of this software display the following acknowledgement: * ``This product includes software developed by the University of California, * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of * the University nor the names of its contributors may be used to endorse * or promote products derived from this software without specific prior * written permission. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * * @(#) $Header: /home/cvs/wifitools/wifipcap/llc.h,v 1.1.1.1 2006/12/14 01:22:11 jpang Exp $ (LBL) */ /* * Definitions for information in the LLC header. */ #ifndef UNI_LLC_H #define UNI_LLC_H #define LLC_U_FMT 3 #define LLC_GSAP 1 #define LLC_IG 1 /* Individual / Group */ #define LLC_S_FMT 1 #define LLC_U_POLL 0x10 #define LLC_IS_POLL 0x0100 #define LLC_XID_FI 0x81 #define LLC_U_CMD(u) ((u) & 0xef) #define LLC_UI 0x03 #define LLC_UA 0x63 #define LLC_DISC 0x43 #define LLC_DM 0x0f #define LLC_SABME 0x6f #define LLC_TEST 0xe3 #define LLC_XID 0xaf #define LLC_FRMR 0x87 #define LLC_S_CMD(is) (((is) >> 2) & 0x03) #define LLC_RR 0x0001 #define LLC_RNR 0x0005 #define LLC_REJ 0x0009 #define LLC_IS_NR(is) (((is) >> 9) & 0x7f) #define LLC_I_NS(is) (((is) >> 1) & 0x7f) #ifndef LLCSAP_NULL #define LLCSAP_NULL 0x00 #endif #ifndef LLCSAP_GLOBAL #define LLCSAP_GLOBAL 0xff #endif #ifndef LLCSAP_8021B_I #define LLCSAP_8021B_I 0x02 #endif #ifndef LLCSAP_8021B_G #define LLCSAP_8021B_G 0x03 #endif #ifndef LLCSAP_SNA #define LLCSAP_SNA 0x04 #endif #ifndef LLCSAP_IP #define LLCSAP_IP 0x06 #endif #ifndef LLCSAP_PROWAYNM #define LLCSAP_PROWAYNM 0x0e #endif #ifndef LLCSAP_8021D #define LLCSAP_8021D 0x42 #endif #ifndef LLCSAP_RS511 #define LLCSAP_RS511 0x4e #endif #ifndef LLCSAP_ISO8208 #define LLCSAP_ISO8208 0x7e #endif #ifndef LLCSAP_PROWAY #define LLCSAP_PROWAY 0x8e #endif #ifndef LLCSAP_SNAP #define LLCSAP_SNAP 0xaa #endif #ifndef LLCSAP_IPX #define LLCSAP_IPX 0xe0 #endif #ifndef LLCSAP_NETBEUI #define LLCSAP_NETBEUI 0xf0 #endif #ifndef LLCSAP_ISONS #define LLCSAP_ISONS 0xfe #endif /* * PIDs for use with OUI_CISCO. */ #define PID_CISCO_CDP 0x2000 /* Cisco Discovery Protocol */ /* * PIDs for use with OUI_RFC2684. */ #define PID_RFC2684_ETH_FCS 0x0001 /* Ethernet, with FCS */ #define PID_RFC2684_ETH_NOFCS 0x0007 /* Ethernet, without FCS */ #define PID_RFC2684_802_4_FCS 0x0002 /* 802.4, with FCS */ #define PID_RFC2684_802_4_NOFCS 0x0008 /* 802.4, without FCS */ #define PID_RFC2684_802_5_FCS 0x0003 /* 802.5, with FCS */ #define PID_RFC2684_802_5_NOFCS 0x0009 /* 802.5, without FCS */ #define PID_RFC2684_FDDI_FCS 0x0004 /* FDDI, with FCS */ #define PID_RFC2684_FDDI_NOFCS 0x000a /* FDDI, without FCS */ #define PID_RFC2684_802_6_FCS 0x0005 /* 802.6, with FCS */ #define PID_RFC2684_802_6_NOFCS 0x000b /* 802.6, without FCS */ #define PID_RFC2684_BPDU 0x000e /* BPDUs */ /* Jeff: endian-fixed llc/snap header + ethernet type */ struct llc_hdr_t { uint8_t dsap; uint8_t ssap; uint8_t control; uint16_t oui; uint16_t type; }; #endif tcpflow/src/wifipcap/ip6.h0000644000175000017500000001752112263701151014406 0ustar dimadima/* @(#) $Header: /home/cvs/wifitools/wifipcap/ip6.h,v 1.1.1.1 2006/12/14 01:22:11 jpang Exp $ (LBL) */ /* $NetBSD: ip6.h,v 1.9 2000/07/13 05:34:21 itojun Exp $ */ /* $KAME: ip6.h,v 1.9 2000/07/02 21:01:32 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip.h 8.1 (Berkeley) 6/10/93 */ #ifndef _NETINET_IP6_H_ #define _NETINET_IP6_H_ /* * Definition for internet protocol version 6. * RFC 2460 */ struct ip6_hdr { union { struct ip6_hdrctl { u_int32_t ip6_un1_flow; /* 20 bits of flow-ID */ u_int16_t ip6_un1_plen; /* payload length */ u_int8_t ip6_un1_nxt; /* next header */ u_int8_t ip6_un1_hlim; /* hop limit */ } ip6_un1; u_int8_t ip6_un2_vfc; /* 4 bits version, top 4 bits class */ } ip6_ctlun; struct in6_addr ip6_src; /* source address */ struct in6_addr ip6_dst; /* destination address */ }; #define ip6_vfc ip6_ctlun.ip6_un2_vfc #define ip6_flow ip6_ctlun.ip6_un1.ip6_un1_flow #define ip6_plen ip6_ctlun.ip6_un1.ip6_un1_plen #define ip6_nxt ip6_ctlun.ip6_un1.ip6_un1_nxt #define ip6_hlim ip6_ctlun.ip6_un1.ip6_un1_hlim #define ip6_hops ip6_ctlun.ip6_un1.ip6_un1_hlim /* in network endian */ #define IPV6_FLOWINFO_MASK ((u_int32_t)htonl(0x0fffffff)) /* flow info (28 bits) */ #define IPV6_FLOWLABEL_MASK ((u_int32_t)htonl(0x000fffff)) /* flow label (20 bits) */ #if 1 /* ECN bits proposed by Sally Floyd */ #define IP6TOS_CE 0x01 /* congestion experienced */ #define IP6TOS_ECT 0x02 /* ECN-capable transport */ #endif /* * Extension Headers */ struct ip6_ext { u_char ip6e_nxt; u_char ip6e_len; }; /* Hop-by-Hop options header */ /* XXX should we pad it to force alignment on an 8-byte boundary? */ struct ip6_hbh { u_int8_t ip6h_nxt; /* next header */ u_int8_t ip6h_len; /* length in units of 8 octets */ /* followed by options */ }; /* Destination options header */ /* XXX should we pad it to force alignment on an 8-byte boundary? */ struct ip6_dest { u_int8_t ip6d_nxt; /* next header */ u_int8_t ip6d_len; /* length in units of 8 octets */ /* followed by options */ }; /* Option types and related macros */ #define IP6OPT_PAD1 0x00 /* 00 0 00000 */ #define IP6OPT_PADN 0x01 /* 00 0 00001 */ #define IP6OPT_JUMBO 0xC2 /* 11 0 00010 = 194 */ #define IP6OPT_JUMBO_LEN 6 #define IP6OPT_ROUTER_ALERT 0x05 /* 00 0 00101 */ #define IP6OPT_RTALERT_LEN 4 #define IP6OPT_RTALERT_MLD 0 /* Datagram contains an MLD message */ #define IP6OPT_RTALERT_RSVP 1 /* Datagram contains an RSVP message */ #define IP6OPT_RTALERT_ACTNET 2 /* contains an Active Networks msg */ #define IP6OPT_MINLEN 2 #define IP6OPT_BINDING_UPDATE 0xc6 /* 11 0 00110 */ #define IP6OPT_BINDING_ACK 0x07 /* 00 0 00111 */ #define IP6OPT_BINDING_REQ 0x08 /* 00 0 01000 */ #define IP6OPT_HOME_ADDRESS 0xc9 /* 11 0 01001 */ #define IP6OPT_EID 0x8a /* 10 0 01010 */ #define IP6OPT_TYPE(o) ((o) & 0xC0) #define IP6OPT_TYPE_SKIP 0x00 #define IP6OPT_TYPE_DISCARD 0x40 #define IP6OPT_TYPE_FORCEICMP 0x80 #define IP6OPT_TYPE_ICMP 0xC0 #define IP6OPT_MUTABLE 0x20 /* Routing header */ struct ip6_rthdr { u_int8_t ip6r_nxt; /* next header */ u_int8_t ip6r_len; /* length in units of 8 octets */ u_int8_t ip6r_type; /* routing type */ u_int8_t ip6r_segleft; /* segments left */ /* followed by routing type specific data */ }; /* Type 0 Routing header */ struct ip6_rthdr0 { u_int8_t ip6r0_nxt; /* next header */ u_int8_t ip6r0_len; /* length in units of 8 octets */ u_int8_t ip6r0_type; /* always zero */ u_int8_t ip6r0_segleft; /* segments left */ u_int8_t ip6r0_reserved; /* reserved field */ u_int8_t ip6r0_slmap[3]; /* strict/loose bit map */ struct in6_addr ip6r0_addr[1]; /* up to 23 addresses */ }; /* Fragment header */ struct ip6_frag { u_int8_t ip6f_nxt; /* next header */ u_int8_t ip6f_reserved; /* reserved field */ u_int16_t ip6f_offlg; /* offset, reserved, and flag */ u_int32_t ip6f_ident; /* identification */ }; #define IP6F_OFF_MASK 0xfff8 /* mask out offset from ip6f_offlg */ #define IP6F_RESERVED_MASK 0x0006 /* reserved bits in ip6f_offlg */ #define IP6F_MORE_FRAG 0x0001 /* more-fragments flag */ #endif /* not _NETINET_IP6_H_ */ /* Jeff: endian-fixed, decoded version passed to apps (XXX TODO) */ struct ip6_hdr_t { union { struct ip6_hdrctl { u_int32_t ip6_un1_flow; /* 20 bits of flow-ID */ u_int16_t ip6_un1_plen; /* payload length */ u_int8_t ip6_un1_nxt; /* next header */ u_int8_t ip6_un1_hlim; /* hop limit */ } ip6_un1; u_int8_t ip6_un2_vfc; /* 4 bits version, top 4 bits class */ } ip6_ctlun; struct in6_addr ip6_src; /* source address */ struct in6_addr ip6_dst; /* destination address */ }; tcpflow/src/wifipcap/README.txt0000644000175000017500000000375012263701151015234 0ustar dimadimaMAINTAINER ========== Simson L. Garfinkel ACKNOWLEDGEMENTS ================ Thanks to: * Jeffrey Pang, for the radiotap implementation * Doug Madory, for the Wifi parser * Jeremy Elson, for the original idea and initial tcp/ip implementation Title: Wifipcap Library Authors: Jeff Pang, Simson L. Garfinkel Description: ============ A simple C++ wrapper around libpcap that allows applications to selectively demultiplex 802.11 frames, and the most common layer 2 and layer 3 protocols contained within them. Basically, the wifipcap library handles all the parsing of 802.11 frames (and/or layer 2/3 packets) from the pcap file (or stream). Some of the code is derived from tcpdump. This program somewhat reworked by Simson Garfinkel Linux: Requires libpcap >= 0.9.4 on Linux. Windows: Requires WinPcap >= 4.0.2 and AirPcap for 802.11 capture See: http://www.cacetech.com/support/downloads.htm Usage: ====== For an overview see wifipcap.h. For an example, see sample.cpp. (0) Compile wifipcap. In Linux: Enter this directory and type: make In Windows: Open wifipcap.sln in Visual Studio and build it. You will need to have the winpcap include and library files in the appropriate search paths. (1) Include the header "wifipcap.h" in your application C++ file(s). (2) Implement a subclass of WifipcapCallbacks. This class has one member function for each type of 802.11 frame and layer 2/3 packets. Each of these functions will be called as a frame/packet is parsed. (3) Create an instance of Wifipcap with either a pcap trace file or a live device to capture packets from. (4) Call Wifipcap::Run with your instance of WifipcapCallbacks. (5) Compile your program linking to libpcap and wifipcap.a. On Linux: g++ -o myprogram myprogram.c /path/to/wifipcap.a -lpcap On Windows: Link the following libraries: wpcap.lib ws2_32.lib WINMM.LIB wifipcap.lib Make sure wifipcap.lib is in the library path. tcpflow/src/wifipcap/TimeVal.cpp0000644000175000017500000000326612263701151015605 0ustar dimadima//////////////////////////////////////////////////////////////////////////////// // Mercury and Colyseus Software Distribution // // Copyright (C) 2004-2005 Ashwin Bharambe (ashu@cs.cmu.edu) // 2004-2005 Jeffrey Pang (jeffpang@cs.cmu.edu) // 2004 Mukesh Agrawal (mukesh@cs.cmu.edu) // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2, or (at // your option) any later version. // // This program is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 // USA //////////////////////////////////////////////////////////////////////////////// /* -*- Mode:c++; c-basic-offset:4; tab-width:4; indent-tabs-mode:t -*- */ /************************************************************************** TimeVal.cpp begin : Oct 16, 2003 version : $Id: TimeVal.cpp,v 1.1.1.1 2006/12/14 01:22:11 jpang Exp $ copyright : (C) 2003 Jeff Pang ( jeffpang@cs.cmu.edu ) (C) 2003 Justin Weisz ( jweisz@cs.cmu.edu ) ***************************************************************************/ #include #include #include "TimeVal.h" //using namespace std; TimeVal TIME_NONE = {0,0}; tcpflow/src/wifipcap/cpack.cpp0000644000175000017500000000712512263701151015323 0ustar dimadima/*- * Copyright (c) 2003, 2004 David Young. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of David Young may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DAVID * YOUNG BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. */ #ifndef WIN32 #include #include "cpack.h" #include "extract.h" static u_int8_t * cpack_next_boundary(u_int8_t *buf, u_int8_t *p, size_t alignment) { size_t misalignment = (size_t)(p - buf) % alignment; if (misalignment == 0) return p; return p + (alignment - misalignment); } /* Advance to the next wordsize boundary. Return NULL if fewer than * wordsize bytes remain in the buffer after the boundary. Otherwise, * return a pointer to the boundary. */ static u_int8_t * cpack_align_and_reserve(struct cpack_state *cs, size_t wordsize) { u_int8_t *next; /* Ensure alignment. */ next = cpack_next_boundary(cs->c_buf, cs->c_next, wordsize); /* Too little space for wordsize bytes? */ if (next - cs->c_buf + wordsize > cs->c_len) return 0; return next; } int cpack_init(struct cpack_state *cs, u_int8_t *buf, size_t buflen) { memset(cs, 0, sizeof(*cs)); cs->c_buf = buf; cs->c_len = buflen; cs->c_next = cs->c_buf; return 0; } /* Unpack a 64-bit unsigned integer. */ int cpack_uint64(struct cpack_state *cs, u_int64_t *u) { u_int8_t *next; if ((next = cpack_align_and_reserve(cs, sizeof(*u))) == NULL) return -1; *u = EXTRACT_LE_64BITS(next); /* Move pointer past the u_int64_t. */ cs->c_next = next + sizeof(*u); return 0; } /* Unpack a 32-bit unsigned integer. */ int cpack_uint32(struct cpack_state *cs, u_int32_t *u) { u_int8_t *next; if ((next = cpack_align_and_reserve(cs, sizeof(*u))) == NULL) return -1; *u = EXTRACT_LE_32BITS(next); /* Move pointer past the u_int32_t. */ cs->c_next = next + sizeof(*u); return 0; } /* Unpack a 16-bit unsigned integer. */ int cpack_uint16(struct cpack_state *cs, u_int16_t *u) { u_int8_t *next; if ((next = cpack_align_and_reserve(cs, sizeof(*u))) == NULL) return -1; *u = EXTRACT_LE_16BITS(next); /* Move pointer past the u_int16_t. */ cs->c_next = next + sizeof(*u); return 0; } /* Unpack an 8-bit unsigned integer. */ int cpack_uint8(struct cpack_state *cs, u_int8_t *u) { /* No space left? */ if ((size_t)(cs->c_next - cs->c_buf) >= cs->c_len) return -1; *u = *cs->c_next; /* Move pointer past the u_int8_t. */ cs->c_next++; return 0; } #endif tcpflow/src/wifipcap/wifipcap_tcpdemux.cpp0000644000175000017500000003521412263701151017755 0ustar dimadima/////////////////////////////////////////////////////////////////////////////// /* These tcp optinos do not have the size octet */ #define ZEROLENOPT(o) ((o) == TCPOPT_EOL || (o) == TCPOPT_NOP) #if USING_PARSE_TCP_OPTS static void parse_tcp_opts(std::list& opts, const u_char *cp, u_int hlen) { if (hlen == 0) return; register u_int i, opt, datalen; register u_int len; //putchar(' '); //ch = '<'; while (hlen > 0) { tcp_opt_t tcpopt; //putchar(ch); //TCHECK(*cp); opt = *cp++; if (ZEROLENOPT(opt)) len = 1; else { //TCHECK(*cp); len = *cp++; /* total including type, len */ if (len < 2 || len > hlen) // stop processing on bad opt break; --hlen; /* account for length byte */ } --hlen; /* account for type byte */ datalen = 0; /* Bail if "l" bytes of data are not left or were not captured */ #define LENCHECK(l) { if ((l) > hlen) break; } tcpopt.type = opt; tcpopt.data_raw = cp; switch (opt) { case TCPOPT_MAXSEG: //(void)printf("mss"); datalen = 2; LENCHECK(datalen); //(void)printf(" %u", EXTRACT_16BITS(cp)); tcpopt.data.mss = EXTRACT_16BITS(cp); break; case TCPOPT_EOL: //(void)printf("eol"); break; case TCPOPT_NOP: //(void)printf("nop"); break; case TCPOPT_WSCALE: //(void)printf("wscale"); datalen = 1; LENCHECK(datalen); //(void)printf(" %u", *cp); tcpopt.data.wscale = *cp; break; case TCPOPT_SACKOK: //(void)printf("sackOK"); break; case TCPOPT_SACK: datalen = len - 2; if (datalen % 8 != 0) { //(void)printf("malformed sack"); } else { u_int32_t s, e; //(void)printf("sack %d ", datalen / 8); for (i = 0; i < datalen; i += 8) { LENCHECK(i + 4); s = EXTRACT_32BITS(cp + i); LENCHECK(i + 8); e = EXTRACT_32BITS(cp + i + 4); /* XXX leave application to do this translation? if (threv) { s -= thseq; e -= thseq; } else { s -= thack; e -= thack; } (void)printf("{%u:%u}", s, e); */ tcpopt.data_sack.push_back(std::pair(s,e)); } } break; case TCPOPT_ECHO: //(void)printf("echo"); datalen = 4; LENCHECK(datalen); //(void)printf(" %u", EXTRACT_32BITS(cp)); tcpopt.data.echo = EXTRACT_32BITS(cp); break; case TCPOPT_ECHOREPLY: //(void)printf("echoreply"); datalen = 4; LENCHECK(datalen); //(void)printf(" %u", EXTRACT_32BITS(cp)); tcpopt.data.echoreply = EXTRACT_32BITS(cp); break; case TCPOPT_TIMESTAMP: //(void)printf("timestamp"); datalen = 8; //LENCHECK(4); //(void)printf(" %u", EXTRACT_32BITS(cp)); LENCHECK(datalen); //(void)printf(" %u", EXTRACT_32BITS(cp + 4)); tcpopt.data.timestamp.tsval = EXTRACT_32BITS(cp); tcpopt.data.timestamp.tsecr = EXTRACT_32BITS(cp + 4); break; case TCPOPT_CC: //(void)printf("cc"); datalen = 4; LENCHECK(datalen); //(void)printf(" %u", EXTRACT_32BITS(cp)); tcpopt.data.cc = EXTRACT_32BITS(cp); break; case TCPOPT_CCNEW: //(void)printf("ccnew"); datalen = 4; LENCHECK(datalen); //(void)printf(" %u", EXTRACT_32BITS(cp)); tcpopt.data.ccnew = EXTRACT_32BITS(cp); break; case TCPOPT_CCECHO: //(void)printf("ccecho"); datalen = 4; LENCHECK(datalen); //(void)printf(" %u", EXTRACT_32BITS(cp)); tcpopt.data.ccecho = EXTRACT_32BITS(cp); break; case TCPOPT_SIGNATURE: //(void)printf("md5:"); datalen = TCP_SIGLEN; LENCHECK(datalen); for (i = 0; i < TCP_SIGLEN; ++i) //(void)printf("%02x", cp[i]); tcpopt.data.signature[i] = cp[i]; break; default: //(void)printf("opt-%u:", opt); datalen = len - 2; /* for (i = 0; i < datalen; ++i) { LENCHECK(i); (void)printf("%02x", cp[i]); } */ break; } /* Account for data printed */ cp += datalen; hlen -= datalen; /* Check specification against observed length */ //++datalen; /* option octet */ //if (!ZEROLENOPT(opt)) // ++datalen; /* size octet */ //if (datalen != len) // (void)printf("[len %d]", len); //ch = ','; tcpopt.len = datalen; opts.push_back(tcpopt); if (opt == TCPOPT_EOL) break; } //putchar('>'); } #endif void handle_tcp(WifipcapCallbacks *cbs, const u_char *bp, u_int length, struct ip4_hdr_t *ip4h, struct ip6_hdr_t *ip6h, int fragmented) { struct tcphdr *tp; tp = (struct tcphdr *)bp; int hlen; // truncated header if (length < sizeof(*tp)) { cbs->HandleTCP(ip4h, ip6h, NULL, NULL, 0, bp, length); return; } hlen = TH_OFF(tp) * 4; // bad header length || missing tcp options if (hlen < (int)sizeof(*tp) || length < (int)sizeof(*tp) || hlen > (int)length) { cbs->HandleTCP(ip4h, ip6h, NULL, NULL, 0, bp, length); return; } tcp_hdr_t hdr; hdr.sport = EXTRACT_16BITS(&tp->th_sport); hdr.dport = EXTRACT_16BITS(&tp->th_dport); hdr.seq = EXTRACT_32BITS(&tp->th_seq); hdr.ack = EXTRACT_32BITS(&tp->th_ack); hdr.dataoff = TH_OFF(tp) * 4; hdr.flags = tp->th_flags; hdr.win = EXTRACT_16BITS(&tp->th_win); hdr.cksum = EXTRACT_16BITS(&tp->th_sum); hdr.urgptr = EXTRACT_16BITS(&tp->th_urp); #if USING_PARSE_TCP_OPTS parse_tcp_opts(hdr.opts, bp+sizeof(*tp), hlen-sizeof(*tp)); #endif cbs->HandleTCP(ip4h, ip6h, &hdr, hlen==sizeof(*tp)?NULL:bp+sizeof(*tp), hlen-sizeof(*tp), bp+hlen, length-hlen); } void handle_udp( WifipcapCallbacks *cbs, const u_char *bp, u_int length, struct ip4_hdr_t *ip4h, struct ip6_hdr_t *ip6h, int fragmented) { struct udphdr *uh; uh = (struct udphdr *)bp; if (length < sizeof(struct udphdr)) { // truncated udp header cbs->HandleUDP(ip4h, ip6h, NULL, bp, length); return; } udp_hdr_t hdr; hdr.sport = EXTRACT_16BITS(&uh->uh_sport); hdr.dport = EXTRACT_16BITS(&uh->uh_dport); hdr.len = EXTRACT_16BITS(&uh->uh_ulen); hdr.cksum = EXTRACT_16BITS(&uh->uh_sum); cbs->HandleUDP(ip4h, ip6h, &hdr, bp+sizeof(struct udphdr), length-sizeof(struct udphdr)); } void handle_icmp( WifipcapCallbacks *cbs, const u_char *bp, u_int length, struct ip4_hdr_t *ip4h, struct ip6_hdr_t *ip6h, int fragmented) { struct icmp *dp; dp = (struct icmp *)bp; if (length < 4) { // truncated icmp header cbs->HandleICMP(ip4h, ip6h, -1, -1, bp, length); return; } cbs->HandleICMP(ip4h, ip6h, dp->icmp_type, dp->icmp_code, bp+4, length-4); } /////////////////////////////////////////////////////////////////////////////// struct ip_print_demux_state { struct ip *ip; const u_char *cp; u_int len, off; u_char nh; int advance; }; void ip_demux( WifipcapCallbacks *cbs, ip4_hdr_t *hdr, struct ip_print_demux_state *ipds, u_int len) { //struct protoent *proto; //again: switch (ipds->nh) { case IPPROTO_TCP: /* pass on the MF bit plus the offset to detect fragments */ handle_tcp(cbs, ipds->cp, ipds->len, hdr, NULL, ipds->off & (IP_MF|IP_OFFMASK)); break; case IPPROTO_UDP: /* pass on the MF bit plus the offset to detect fragments */ handle_udp(cbs, ipds->cp, ipds->len, hdr, NULL, ipds->off & (IP_MF|IP_OFFMASK)); break; case IPPROTO_ICMP: /* pass on the MF bit plus the offset to detect fragments */ handle_icmp(cbs, ipds->cp, ipds->len, hdr, NULL, ipds->off & (IP_MF|IP_OFFMASK)); break; case IPPROTO_IPV4: /* DVMRP multicast tunnel (ip-in-ip encapsulation) */ //handle_ip(t, cbs, ipds->cp, ipds->len); //break; case IPPROTO_IPV6: /* ip6-in-ip encapsulation */ //handle_ip6(t, cbs, ipds->cp, ipds->len); //break; ///// Jeff: XXX Some day handle these maybe (see tcpdump code) case IPPROTO_AH: /* ipds->nh = *ipds->cp; ipds->advance = ah_print(ipds->cp); if (ipds->advance <= 0) break; ipds->cp += ipds->advance; ipds->len -= ipds->advance; goto again; */ case IPPROTO_ESP: { /* int enh, padlen; ipds->advance = esp_print(ndo, ipds->cp, ipds->len, (const u_char *)ipds->ip, &enh, &padlen); if (ipds->advance <= 0) break; ipds->cp += ipds->advance; ipds->len -= ipds->advance + padlen; ipds->nh = enh & 0xff; goto again; */ } case IPPROTO_IPCOMP: { /* int enh; ipds->advance = ipcomp_print(ipds->cp, &enh); if (ipds->advance <= 0) break; ipds->cp += ipds->advance; ipds->len -= ipds->advance; ipds->nh = enh & 0xff; goto again; */ } case IPPROTO_SCTP: /* sctp_print(ipds->cp, (const u_char *)ipds->ip, ipds->len); break; */ case IPPROTO_DCCP: /* dccp_print(ipds->cp, (const u_char *)ipds->ip, ipds->len); break; */ case IPPROTO_PIGP: /* * XXX - the current IANA protocol number assignments * page lists 9 as "any private interior gateway * (used by Cisco for their IGRP)" and 88 as * "EIGRP" from Cisco. * * Recent BSD headers define * IP_PROTO_PIGP as 9 and IP_PROTO_IGRP as 88. * We define IP_PROTO_PIGP as 9 and * IP_PROTO_EIGRP as 88; those names better * match was the current protocol number * assignments say. */ /* igrp_print(ipds->cp, ipds->len, (const u_char *)ipds->ip); break; */ case IPPROTO_EIGRP: /* eigrp_print(ipds->cp, ipds->len); break; */ case IPPROTO_ND: /* ND_PRINT((ndo, " nd %d", ipds->len)); break; */ case IPPROTO_EGP: /* egp_print(ipds->cp, ipds->len); break; */ case IPPROTO_OSPF: /* ospf_print(ipds->cp, ipds->len, (const u_char *)ipds->ip); break; */ case IPPROTO_IGMP: /* igmp_print(ipds->cp, ipds->len); break; */ case IPPROTO_RSVP: /* rsvp_print(ipds->cp, ipds->len); break; */ case IPPROTO_GRE: /* do it */ /* gre_print(ipds->cp, ipds->len); break; */ case IPPROTO_MOBILE: /* mobile_print(ipds->cp, ipds->len); break; */ case IPPROTO_PIM: /* pim_print(ipds->cp, ipds->len); break; */ case IPPROTO_VRRP: /* vrrp_print(ipds->cp, ipds->len, ipds->ip->ip_ttl); break; */ case IPPROTO_PGM: /* pgm_print(ipds->cp, ipds->len, (const u_char *)ipds->ip); break; */ default: /* if ((proto = getprotobynumber(ipds->nh)) != NULL) ND_PRINT((ndo, " %s", proto->p_name)); else ND_PRINT((ndo, " ip-proto-%d", ipds->nh)); ND_PRINT((ndo, " %d", ipds->len)); */ cbs->HandleL3Unknown(hdr, NULL, ipds->cp, ipds->len); break; } } void handle_ip6( WifipcapCallbacks *cbs, const u_char *ptr, u_int len); void handle_ip( WifipcapCallbacks *cbs, const u_char *ptr, u_int len) { struct ip_print_demux_state ipd; struct ip_print_demux_state *ipds=&ipd; u_int hlen; // truncated (in fact, nothing!) if (len == 0) { cbs->HandleIP(NULL, NULL, 0, ptr, len); return; } ipds->ip = (struct ip *)ptr; if (IP_V(ipds->ip) != 4) { if (IP_V(ipds->ip) == 6) { // wrong link-layer encap! handle_ip6(cbs, ptr, len); return; } } if (len < sizeof (struct ip)) { // truncated! cbs->HandleIP(NULL, NULL, 0, ptr, len); return; } hlen = IP_HL(ipds->ip) * 4; ipds->len = EXTRACT_16BITS(&ipds->ip->ip_len); if (len < ipds->len) { // truncated IP // this is ok, we'll just report the truncation later } if (ipds->len < hlen) { // missing some ip options! cbs->HandleIP(NULL, NULL, 0, ptr, len); } ipds->len -= hlen; ipds->off = EXTRACT_16BITS(&ipds->ip->ip_off); struct ip4_hdr_t hdr; hdr.ver = IP_V(ipds->ip); hdr.hlen = IP_HL(ipds->ip) * 4; hdr.tos = ipds->ip->ip_tos; hdr.len = EXTRACT_16BITS(&ipds->ip->ip_len); hdr.id = EXTRACT_16BITS(&ipds->ip->ip_id); hdr.df = (bool)((ipds->off & IP_DF) != 0); hdr.mf = (bool)((ipds->off & IP_MF) != 0); hdr.fragoff = (ipds->off & IP_OFFMASK); hdr.ttl = ipds->ip->ip_ttl; hdr.proto = ipds->ip->ip_p; hdr.cksum = EXTRACT_16BITS(&ipds->ip->ip_sum); hdr.src = ipds->ip->ip_src; hdr.dst = ipds->ip->ip_dst; cbs->HandleIP(&hdr, hlen==sizeof(struct ip)?NULL:ptr+sizeof(struct ip), hlen-sizeof(struct ip), ptr+hlen, len-hlen); /* * If this is fragment zero, hand it to the next higher * level protocol. */ if ((ipds->off & 0x1fff) == 0) { ipds->cp = (const u_char *)ipds->ip + hlen; ipds->nh = ipds->ip->ip_p; ip_demux(cbs, &hdr, ipds, len); } else { // This is a fragment of a previous packet. can't demux it return; } } void handle_ip6( WifipcapCallbacks *cbs, const u_char *ptr, u_int len) { const struct ip6_hdr *ip6; if (len < sizeof (struct ip6_hdr)) { cbs->HandleIP6(NULL, ptr, len); return; } ip6 = ( const struct ip6_hdr *)ptr; ip6_hdr_t hdr; memcpy(&hdr, ip6, sizeof(hdr)); hdr.ip6_plen = EXTRACT_16BITS(&ip6->ip6_plen); hdr.ip6_flow = EXTRACT_32BITS(&ip6->ip6_flow); cbs->HandleIP6(&hdr, ptr+sizeof(hdr), len-sizeof(hdr)); int nh = ip6->ip6_nxt; switch(nh) { case IPPROTO_TCP: handle_tcp(cbs, ptr+sizeof(ip6_hdr), len-sizeof(ip6_hdr), NULL, &hdr, 0); break; case IPPROTO_UDP: handle_udp(cbs, ptr+sizeof(ip6_hdr), len-sizeof(ip6_hdr), NULL, &hdr, 0); break; default: cbs->HandleL3Unknown(NULL, &hdr, ptr+sizeof(ip6_hdr), len-sizeof(ip6_hdr)); break; } } void handle_arp( WifipcapCallbacks *cbs, const u_char *ptr, u_int len) { struct arp_pkthdr *ap; //u_short pro, hrd, op; if (len < sizeof(struct arp_pkthdr)) { cbs->HandleARP(NULL, ptr, len); return; } ap = (struct arp_pkthdr *)ptr; cbs->HandleARP(ap, ptr+ARP_HDRLEN, len-ARP_HDRLEN); } tcpflow/src/wifipcap/util.h0000644000175000017500000000202312263701151014654 0ustar dimadima#ifndef __WIFIPCAP_UTIL_H_ #define __WIFIPCAP_UTIL_H_ #include typedef unsigned char uint8_t; typedef unsigned short uint16_t; #ifdef _WIN32 typedef unsigned long long uint64_t; #endif #if 0 struct MAC { uint64_t val; MAC() {} MAC(const uint8_t *stream); MAC(uint64_t val); MAC(const char *str); MAC(const MAC& o); bool operator==(const MAC& o) const { return val == o.val; } bool operator!=(const MAC& o) const { return val != o.val; } bool operator<(const MAC& o) const { return val < o.val; } enum { PRINT_FMT_COLON, PRINT_FMT_PLAIN }; static MAC broadcast; static MAC null; static int print_fmt; }; std::ostream& operator<<(std::ostream& out, const MAC& mac); std::ostream& operator<<(std::ostream& out, const struct in_addr& ip); #endif char *va(const char *format, ...); struct tok { int v; /* value */ const char *s; /* string */ }; extern const char * tok2str(register const struct tok *lp, register const char *fmt, register int v); #endif tcpflow/src/wifipcap/sample.cpp0000644000175000017500000000741412263701151015524 0ustar dimadima#include #include "wifipcap.h" /* Demonstration of how to process pcap packets with a simple callback class */ class TestCB : public WifipcapCallbacks { public: TestCB(){} virtual ~TestCB(){}; virtual const char *name() {return "TestCB";} // override with your own name! virtual void PacketBegin(const WifiPacket &p, const u_char *pkt, size_t len, int origlen) { TimeVal t(p.header->ts); std::cout << &t << " {"; } virtual void PacketEnd(const WifiPacket &p ) { std::cout << "}" << std::endl; } virtual bool Check80211FCS(const WifiPacket &p ) { return true; } // please calculate FCS virtual void Handle80211DataFromAP(const WifiPacket &p, const mac_hdr_t *hdr, const u_char *rest, u_int len) { std::cout << "802.11 data:\t" << hdr->sa << " -> " << hdr->da << "\t" << len ; } virtual void Handle80211DataToAP(const WifiPacket &p, const mac_hdr_t *hdr, const u_char *rest, u_int len) { std::cout << "802.11 data:\t" << hdr->sa << " -> " << hdr->da << "\t" << len ; } virtual void Handle80211MgmtProbeRequest(const WifiPacket &p, const mgmt_header_t *hdr, const mgmt_body_t *body) { std::cout << "802.11 mgmt:\t" << hdr->sa << "\tprobe\t\"" << body->ssid.ssid << "\"" ; } virtual void Handle80211MgmtBeacon(const WifiPacket &p, const struct mgmt_header_t *hdr, const struct mgmt_body_t *body) { std::cout << "802.11 mgmt:\t" << hdr->sa << "\tbeacon\t\"" << body->ssid.ssid << "\"" ; } virtual void HandleTCP(const WifiPacket &p, const ip4_hdr_t *ip4h, const ip6_hdr_t *ip6h, const tcp_hdr_t *hdr, const u_char *options, int optlen, const u_char *rest, u_int len) { if (ip4h && hdr) std::cout << "tcp/ip: \t" << ip4h->src << ":" << hdr->sport << " -> " << ip4h->dst << ":" << hdr->dport << "\t" << ip4h->len ; else std::cout << "tcp/ip: \t" << "[truncated]" ; } virtual void HandleUDP(const WifiPacket &p, const ip4_hdr_t *ip4h, const ip6_hdr_t *ip6h, const udp_hdr_t *hdr, const u_char *rest, u_int len) { if (ip4h && hdr) std::cout << "udp/ip: \t" << ip4h->src << ":" << hdr->sport << " -> " << ip4h->dst << ":" << hdr->dport << "\t" << ip4h->len ; else std::cout << " " << "udp/ip: \t" << "[truncated]" ; } }; /** * usage: test */ int main(int argc, char **argv) { if (argc == 1) { pcap_if_t *alldevs; pcap_if_t *d; int i=0; char errbuf[PCAP_ERRBUF_SIZE]; /* Retrieve the device list from the local machine */ if (pcap_findalldevs(&alldevs, errbuf) == -1) { fprintf(stderr,"Error in pcap_findalldevs_ex: %s\n", errbuf); exit(1); } /* Print the list */ for(d= alldevs; d != NULL; d= d->next) { printf("%d. %s", ++i, d->name); if (d->description) printf(" (%s)\n", d->description); else printf(" (No description available)\n"); } if (i == 0) { printf("\nNo interfaces found! Make sure WinPcap is installed.\n"); return 1; } /* We don't need any more the device list. Free it */ pcap_freealldevs(alldevs); return 1; } bool live = argc == 3 && atoi(argv[2]) == 1; Wifipcap *wcap = new Wifipcap(argv[1], live); wcap->Run(new TestCB()); return 0; } tcpflow/src/wifipcap/TimeVal.h0000644000175000017500000001120012263701151015235 0ustar dimadima//////////////////////////////////////////////////////////////////////////////// // Mercury and Colyseus Software Distribution // // Copyright (C) 2004-2005 Ashwin Bharambe (ashu@cs.cmu.edu) // 2004-2005 Jeffrey Pang (jeffpang@cs.cmu.edu) // 2004 Mukesh Agrawal (mukesh@cs.cmu.edu) // 2013 Simson L. Garfinkel (simsong@acm.org) // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2, or (at // your option) any later version. // // This program is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 // USA //////////////////////////////////////////////////////////////////////////////// /* -*- Mode:c++; c-basic-offset:4; tab-width:4; indent-tabs-mode:t -*- */ /************************************************************************** TimeVal.h begin : Oct 16, 2003 version : $Id: TimeVal.h,v 1.1.1.1 2006/12/14 01:22:11 jpang Exp $ copyright : (C) 2003 Jeff Pang ( jeffpang@cs.cmu.edu ) (C) 2003 Justin Weisz ( jweisz@cs.cmu.edu ) (C) 2013 Simson Garfinkel ( simsong@acm.org ) ***************************************************************************/ #ifndef __TIME_VAL_H__ #define __TIME_VAL_H__ #include #include #ifndef _WIN32 #include #include #else #include #endif #include "types.h" typedef struct timeval TimeVal; #define MSEC_IN_SEC 1000 #define USEC_IN_SEC 1000000 #define USEC_IN_MSEC 1000 inline bool operator<(struct timeval a, struct timeval b) { return (a.tv_sec < b.tv_sec) || ((a.tv_sec == b.tv_sec) && (a.tv_usec < b.tv_usec)); } inline bool operator>(struct timeval a, struct timeval b) { return (a.tv_sec > b.tv_sec) || ((a.tv_sec == b.tv_sec) && (a.tv_usec > b.tv_usec)); } inline bool operator==(struct timeval a, struct timeval b) { return (a.tv_sec == b.tv_sec) && (a.tv_usec == b.tv_usec); } inline bool operator<=(struct timeval a, struct timeval b) { return a < b || a == b; } inline bool operator>=(struct timeval a, struct timeval b) { return a > b || a == b; } inline bool operator!=(struct timeval a, struct timeval b) { return !(a == b); } inline struct timeval operator+(struct timeval a, double add_msec) { struct timeval ret; // convert into sec/usec parts sint32 sec_part = (sint32)(add_msec/MSEC_IN_SEC); sint32 usec_part = (sint32)((add_msec - sec_part * MSEC_IN_SEC)*USEC_IN_MSEC); // do the initial addition ret.tv_sec = a.tv_sec + sec_part; ret.tv_usec = a.tv_usec + usec_part; // perform a carry if necessary if (ret.tv_usec > USEC_IN_SEC) { ret.tv_sec++; ret.tv_usec = ret.tv_usec % USEC_IN_SEC; } else if (ret.tv_usec < 0) { ret.tv_sec--; ret.tv_usec = USEC_IN_SEC + ret.tv_usec; } return ret; } inline int64_t operator-(struct timeval a, struct timeval b) { return ((sint64)a.tv_sec - (sint64)b.tv_sec)*USEC_IN_SEC + ((sint64)a.tv_usec - (sint64)b.tv_usec); } inline float timeval_to_float (struct timeval a) { return (float) a.tv_sec + ((float) a.tv_usec / USEC_IN_SEC); } inline std::ostream& operator<<(std::ostream& os, const TimeVal& t) { return os << &t; } #ifndef HAVE_TIMEVAL_OUT #define HAVE_TIMEVAL_OUT inline std::ostream& operator<<(std::ostream& os, const TimeVal* t) { return os << t->tv_sec << "." << std::setw(6) << std::setfill('0') << t->tv_usec; } #endif //bool operator<(struct timeval a, struct timeval b); //bool operator<=(struct timeval a, struct timeval b); //bool operator>(struct timeval a, struct timeval b); //bool operator>=(struct timeval a, struct timeval b); //bool operator==(struct timeval a, struct timeval b); //bool operator!=(struct timeval a, struct timeval b); //struct timeval operator+(struct timeval a, double add_msec); //sint64 operator-(struct timeval a, struct timeval b); /* usec result */ //float timeval_to_float (struct timeval a); extern TimeVal TIME_NONE; //std::ostream& operator<<(std::ostream& os, const TimeVal &t); //std::ostream& operator<<(std::ostream& os, const TimeVal *t); ////////////////////////////////////////////////////////////////////////////// #endif tcpflow/src/wifipcap/tcp.h0000644000175000017500000001157112263701151014475 0ustar dimadima/* @(#) $Header: /home/cvs/wifitools/wifipcap/tcp.h,v 1.1.1.1 2006/12/14 01:22:11 jpang Exp $ (LBL) */ /* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp.h 8.1 (Berkeley) 6/10/93 */ #ifndef TCP_H #define TCP_H typedef u_int32_t tcp_seq; /* * TCP header. * Per RFC 793, September, 1981. */ struct tcphdr { u_int16_t th_sport; /* source port */ u_int16_t th_dport; /* destination port */ tcp_seq th_seq; /* sequence number */ tcp_seq th_ack; /* acknowledgement number */ u_int8_t th_offx2; /* data offset, rsvd */ #define TH_OFF(th) (((th)->th_offx2 & 0xf0) >> 4) u_int8_t th_flags; #define TH_FIN 0x01 #define TH_SYN 0x02 #define TH_RST 0x04 #define TH_PUSH 0x08 #define TH_ACK 0x10 #define TH_URG 0x20 #define TH_ECNECHO 0x40 /* ECN Echo */ #define TH_CWR 0x80 /* ECN Cwnd Reduced */ u_int16_t th_win; /* window */ u_int16_t th_sum; /* checksum */ u_int16_t th_urp; /* urgent pointer */ }; #define TCPOPT_EOL 0 #define TCPOPT_NOP 1 #define TCPOPT_MAXSEG 2 #define TCPOLEN_MAXSEG 4 #define TCPOPT_WSCALE 3 /* window scale factor (rfc1323) */ #define TCPOPT_SACKOK 4 /* selective ack ok (rfc2018) */ #define TCPOPT_SACK 5 /* selective ack (rfc2018) */ #define TCPOPT_ECHO 6 /* echo (rfc1072) */ #define TCPOPT_ECHOREPLY 7 /* echo (rfc1072) */ #define TCPOPT_TIMESTAMP 8 /* timestamp (rfc1323) */ #define TCPOLEN_TIMESTAMP 10 #define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */ #define TCPOPT_CC 11 /* T/TCP CC options (rfc1644) */ #define TCPOPT_CCNEW 12 /* T/TCP CC options (rfc1644) */ #define TCPOPT_CCECHO 13 /* T/TCP CC options (rfc1644) */ #define TCPOPT_SIGNATURE 19 /* Keyed MD5 (rfc2385) */ #define TCPOLEN_SIGNATURE 18 #define TCP_SIGLEN 16 /* length of an option 19 digest */ #define TCPOPT_AUTH 20 /* Enhanced AUTH option */ #define TCPOPT_TSTAMP_HDR \ (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP) #include #include class tcp_opt_t { tcp_opt_t &operator=(const tcp_opt_t &); // not implemented public: tcp_opt_t(const tcp_opt_t &t):type(t.type),len(t.len),data_raw(t.data_raw),data(t.data),data_sack(t.data_sack){}; tcp_opt_t():type(),len(),data_raw(),data(),data_sack(){}; u_int type; u_int len; const u_char *data_raw; union { u_int16_t mss; u_int8_t wscale; u_int32_t echo; u_int32_t echoreply; u_int32_t cc; u_int32_t ccnew; u_int32_t ccecho; struct { u_int32_t tsval; u_int32_t tsecr; } timestamp; u_int8_t signature[TCP_SIGLEN]; } data; std::list< std::pair > data_sack; }; /* Jeff: endian-fixed, fully decoded tcp header */ struct tcp_hdr_t { u_int16_t sport; /* source port */ u_int16_t dport; /* destination port */ tcp_seq seq; /* sequence number */ tcp_seq ack; /* acknowledgement number */ u_int8_t dataoff; /* data offset */ u_int8_t flags; /* flags (see #defines under tcphdr::th_flags above) */ u_int16_t win; /* window */ u_int16_t cksum; /* checksum */ u_int16_t urgptr; /* urgent pointer */ //std::list opts; }; #endif tcpflow/src/wifipcap/wifipcap.h0000644000175000017500000007156212263701151015517 0ustar dimadima/** * Include this header in applications using wifipcap. * Released under GPLv3. * Some code (c) Jeffrey Pang , 2003 * (C) Simson Garfinkel 2012- */ #ifndef _WIFIPCAP_H_ #define _WIFIPCAP_H_ #include #include #include #include #include #include "arp.h" #include "ip.h" #include "ip6.h" #include "tcp.h" #include "udp.h" #include "TimeVal.h" /* Lengths of 802.11 header components. */ #define IEEE802_11_FC_LEN 2 #define IEEE802_11_DUR_LEN 2 #define IEEE802_11_DA_LEN 6 #define IEEE802_11_SA_LEN 6 #define IEEE802_11_BSSID_LEN 6 #define IEEE802_11_RA_LEN 6 #define IEEE802_11_TA_LEN 6 #define IEEE802_11_SEQ_LEN 2 #define IEEE802_11_IV_LEN 3 #define IEEE802_11_KID_LEN 1 /* Frame check sequence length. */ #define IEEE802_11_FCS_LEN 4 /* Lengths of beacon components. */ #define IEEE802_11_TSTAMP_LEN 8 #define IEEE802_11_BCNINT_LEN 2 #define IEEE802_11_CAPINFO_LEN 2 #define IEEE802_11_LISTENINT_LEN 2 #define IEEE802_11_AID_LEN 2 #define IEEE802_11_STATUS_LEN 2 #define IEEE802_11_REASON_LEN 2 /* Length of previous AP in reassocation frame */ #define IEEE802_11_AP_LEN 6 #define T_MGMT 0x0 /* management */ #define T_CTRL 0x1 /* control */ #define T_DATA 0x2 /* data */ #define T_RESV 0x3 /* reserved */ #define ST_ASSOC_REQUEST 0x0 #define ST_ASSOC_RESPONSE 0x1 #define ST_REASSOC_REQUEST 0x2 #define ST_REASSOC_RESPONSE 0x3 #define ST_PROBE_REQUEST 0x4 #define ST_PROBE_RESPONSE 0x5 /* RESERVED 0x6 */ /* RESERVED 0x7 */ #define ST_BEACON 0x8 #define ST_ATIM 0x9 #define ST_DISASSOC 0xA #define ST_AUTH 0xB #define ST_DEAUTH 0xC /* RESERVED 0xD */ /* RESERVED 0xE */ /* RESERVED 0xF */ #define CTRL_PS_POLL 0xA #define CTRL_RTS 0xB #define CTRL_CTS 0xC #define CTRL_ACK 0xD #define CTRL_CF_END 0xE #define CTRL_END_ACK 0xF #define DATA_DATA 0x0 #define DATA_DATA_CF_ACK 0x1 #define DATA_DATA_CF_POLL 0x2 #define DATA_DATA_CF_ACK_POLL 0x3 #define DATA_NODATA 0x4 #define DATA_NODATA_CF_ACK 0x5 #define DATA_NODATA_CF_POLL 0x6 #define DATA_NODATA_CF_ACK_POLL 0x7 /* * Bits in the frame control field. */ #define FC_VERSION(fc) ((fc) & 0x3) #define FC_TYPE(fc) (((fc) >> 2) & 0x3) #define FC_SUBTYPE(fc) (((fc) >> 4) & 0xF) #define FC_TO_DS(fc) ((fc) & 0x0100) #define FC_FROM_DS(fc) ((fc) & 0x0200) #define FC_MORE_FLAG(fc) ((fc) & 0x0400) #define FC_RETRY(fc) ((fc) & 0x0800) #define FC_POWER_MGMT(fc) ((fc) & 0x1000) #define FC_MORE_DATA(fc) ((fc) & 0x2000) #define FC_WEP(fc) ((fc) & 0x4000) #define FC_ORDER(fc) ((fc) & 0x8000) #define MGMT_HDRLEN (IEEE802_11_FC_LEN+IEEE802_11_DUR_LEN+ \ IEEE802_11_DA_LEN+IEEE802_11_SA_LEN+ \ IEEE802_11_BSSID_LEN+IEEE802_11_SEQ_LEN) #define CAPABILITY_ESS(cap) ((cap) & 0x0001) #define CAPABILITY_IBSS(cap) ((cap) & 0x0002) #define CAPABILITY_CFP(cap) ((cap) & 0x0004) #define CAPABILITY_CFP_REQ(cap) ((cap) & 0x0008) #define CAPABILITY_PRIVACY(cap) ((cap) & 0x0010) struct MAC { enum { PRINT_FMT_COLON, PRINT_FMT_PLAIN }; uint64_t val; MAC():val() {} MAC(uint64_t val_):val(val_){} MAC(const MAC& o):val(o.val){} MAC(const uint8_t *ether):val( ((uint64_t)(ether[0]) << 40) | ((uint64_t)(ether[1]) << 32) | ((uint64_t)(ether[2]) << 24) | ((uint64_t)(ether[3]) << 16) | ((uint64_t)(ether[4]) << 8) | ((uint64_t)(ether[5]) << 0)){} MAC(const char *str):val(){ int ether[6]; int ret = sscanf(str, "%02x:%02x:%02x:%02x:%02x:%02x", ðer[0], ðer[1], ðer[2], ðer[3], ðer[4], ðer[5]); if (ret != 6) { ret = sscanf(str, "%02X:%02X:%02X:%02X:%02X:%02X", ðer[0], ðer[1], ðer[2], ðer[3], ðer[4], ðer[5]); } if (ret != 6) { std::cerr << "bad mac address: " << str << std::endl; val = 0; return; } val = ((uint64_t)(ether[0]) << 40) | ((uint64_t)(ether[1]) << 32) | ((uint64_t)(ether[2]) << 24) | ((uint64_t)(ether[3]) << 16) | ((uint64_t)(ether[4]) << 8) | ((uint64_t)(ether[5]) << 0); } bool operator==(const MAC& o) const { return val == o.val; } bool operator!=(const MAC& o) const { return val != o.val; } bool operator<(const MAC& o) const { return val < o.val; } static MAC ether2MAC(const uint8_t * ether) { return MAC(ether); } static MAC broadcast; static MAC null; static int print_fmt; }; typedef enum { NOT_PRESENT, PRESENT, TRUNCATED } elem_status_t; struct ssid_t { ssid_t():element_id(),length(),ssid(){}; u_int8_t element_id; u_int8_t length; char ssid[33]; /* 32 + 1 for null */ }; struct rates_t { rates_t():element_id(),length(),rate(){}; u_int8_t element_id; u_int8_t length; u_int8_t rate[16]; }; struct challenge_t { challenge_t():element_id(),length(),text(){}; u_int8_t element_id; u_int8_t length; u_int8_t text[254]; /* 1-253 + 1 for null */ }; struct fh_t { fh_t():element_id(),length(),dwell_time(),hop_set(),hop_pattern(),hop_index(){}; u_int8_t element_id; u_int8_t length; u_int16_t dwell_time; u_int8_t hop_set; u_int8_t hop_pattern; u_int8_t hop_index; }; struct ds_t { u_int8_t element_id; u_int8_t length; u_int8_t channel; }; struct cf_t { u_int8_t element_id; u_int8_t length; u_int8_t count; u_int8_t period; u_int16_t max_duration; u_int16_t dur_remaing; }; struct tim_t { u_int8_t element_id; u_int8_t length; u_int8_t count; u_int8_t period; u_int8_t bitmap_control; u_int8_t bitmap[251]; }; #define E_SSID 0 #define E_RATES 1 #define E_FH 2 #define E_DS 3 #define E_CF 4 #define E_TIM 5 #define E_IBSS 6 /* reserved 7 */ /* reserved 8 */ /* reserved 9 */ /* reserved 10 */ /* reserved 11 */ /* reserved 12 */ /* reserved 13 */ /* reserved 14 */ /* reserved 15 */ /* reserved 16 */ #define E_CHALLENGE 16 /* reserved 17 */ /* reserved 18 */ /* reserved 19 */ /* reserved 16 */ /* reserved 16 */ // XXX Jeff: no FCS fields are filled in right now #define CTRL_RTS_HDRLEN (IEEE802_11_FC_LEN+IEEE802_11_DUR_LEN+ \ IEEE802_11_RA_LEN+IEEE802_11_TA_LEN) struct ctrl_cts_t { ctrl_cts_t():fc(),duration(),ra(),fcs(){}; u_int16_t fc; u_int16_t duration; MAC ra; u_int8_t fcs[4]; }; #define CTRL_CTS_HDRLEN (IEEE802_11_FC_LEN+IEEE802_11_DUR_LEN+IEEE802_11_RA_LEN) struct ctrl_ack_t { ctrl_ack_t():fc(),duration(),ra(),fcs(){}; u_int16_t fc; u_int16_t duration; MAC ra; u_int8_t fcs[4]; }; #define CTRL_ACK_HDRLEN (IEEE802_11_FC_LEN+IEEE802_11_DUR_LEN+IEEE802_11_RA_LEN) struct ctrl_ps_poll_t { ctrl_ps_poll_t():fc(),aid(),bssid(),ta(),fcs(){}; u_int16_t fc; u_int16_t aid; MAC bssid; MAC ta; u_int8_t fcs[4]; }; #define CTRL_PS_POLL_HDRLEN (IEEE802_11_FC_LEN+IEEE802_11_AID_LEN+ \ IEEE802_11_BSSID_LEN+IEEE802_11_TA_LEN) struct ctrl_end_t { ctrl_end_t():fc(),duration(),ra(),bssid(),fcs(){} u_int16_t fc; u_int16_t duration; MAC ra; MAC bssid; u_int8_t fcs[4]; }; #define CTRL_END_HDRLEN (IEEE802_11_FC_LEN+IEEE802_11_DUR_LEN+ \ IEEE802_11_RA_LEN+IEEE802_11_BSSID_LEN) struct ctrl_end_ack_t { ctrl_end_ack_t():fc(),duration(),ra(),bssid(),fcs(){}; u_int16_t fc; u_int16_t duration; MAC ra; MAC bssid; u_int8_t fcs[4]; }; #define CTRL_END_ACK_HDRLEN (IEEE802_11_FC_LEN+IEEE802_11_DUR_LEN+ \ IEEE802_11_RA_LEN+IEEE802_11_BSSID_LEN) #define IV_IV(iv) ((iv) & 0xFFFFFF) #define IV_PAD(iv) (((iv) >> 24) & 0x3F) #define IV_KEYID(iv) (((iv) >> 30) & 0x03) struct mac_hdr_t { // unified 80211 header mac_hdr_t():fc(),duration(),seq_ctl(),seq(),frag(),da(),sa(),ta(),ra(),bssid(),qos(){} uint16_t fc; // frame control uint16_t duration; uint16_t seq_ctl; uint16_t seq; // sequence number uint8_t frag; // fragment number? MAC da; // destination address // address1 MAC sa; // source address // address2 MAC ta; // transmitter // address3 MAC ra; // receiver // address4 MAC bssid; // BSSID bool qos; // has quality of service }; #if 0 struct data_hdr_ibss_t { // 80211 Independent Basic Service Set - e.g. ad hoc mode data_hdr_ibss_t():fc(),duration(),seq(),frag(),fcs(){}; u_int16_t fc; u_int16_t duration; u_int16_t seq; u_int8_t frag; u_int8_t fcs[4]; }; struct data_hdr_t { data_hdr_t():fc(),duration(),seq(),frag(),sa(),da(),bssid(),fcs(){} u_int16_t fc; // u_int16_t duration; // ? u_int16_t seq; // sequence #? u_int8_t frag; // fragment #? MAC sa; // sender address MAC da; // destination address MAC bssid; // base station ID u_int8_t fcs[4]; // frame check sequence }; struct data_hdr_wds_t { // 80211 Wireless Distribution System data_hdr_wds_t():fc(),duration(),seq(),frag(),ra(),ta(),sa(),da(),fcs(){} u_int16_t fc; u_int16_t duration; u_int16_t seq; u_int8_t frag; MAC ra; MAC ta; MAC sa; MAC da; u_int8_t fcs[4]; }; #endif #define DATA_HDRLEN (IEEE802_11_FC_LEN+IEEE802_11_DUR_LEN+ \ IEEE802_11_SA_LEN+IEEE802_11_DA_LEN+ \ IEEE802_11_BSSID_LEN+IEEE802_11_SEQ_LEN) #define DATA_WDS_HDRLEN (IEEE802_11_FC_LEN+IEEE802_11_DUR_LEN+ \ IEEE802_11_RA_LEN+IEEE802_11_TA_LEN+ \ IEEE802_11_SA_LEN+IEEE802_11_DA_LEN+IEEE802_11_SEQ_LEN) /* Jeff: added for fully-decoded wep info */ struct wep_hdr_t { wep_hdr_t():iv(),pad(),keyid(){}; u_int32_t iv; u_int32_t pad; u_int32_t keyid; }; /* prism header */ #ifdef _WIN32 #pragma pack(push, 1) #endif struct prism2_pkthdr { uint32_t host_time; uint32_t mac_time; uint32_t channel; uint32_t rssi; uint32_t sq; int32_t signal; int32_t noise; uint32_t rate; uint32_t istx; uint32_t frmlen; } __attribute__((__packed__)); struct radiotap_hdr { bool has_channel; int channel; bool has_fhss; int fhss_fhset; int fhss_fhpat; bool has_rate; int rate; bool has_signal_dbm; int signal_dbm; bool has_noise_dbm; int noise_dbm; bool has_signal_db; int signal_db; bool has_noise_db; int noise_db; bool has_quality; int quality; bool has_txattenuation; int txattenuation; bool has_txattenuation_db; int txattenuation_db; bool has_txpower_dbm; int txpower_dbm; bool has_flags; bool flags_cfp; bool flags_short_preamble; bool flags_wep; bool flags_fragmented; bool flags_badfcs; bool has_antenna; int antenna; bool has_tsft; u_int64_t tsft; bool has_rxflags; int rxflags; bool has_txflags; int txflags; bool has_rts_retries; int rts_retries; bool has_data_retries; int data_retries; } __attribute__((__packed__)); struct ether_hdr_t { ether_hdr_t():sa(),da(),type(){}; MAC sa, da; uint16_t type; }; struct mgmt_header_t { mgmt_header_t():fc(),duration(),da(),sa(),bssid(),seq(),frag(){}; u_int16_t fc; u_int16_t duration; MAC da; MAC sa; MAC bssid; u_int16_t seq; u_int8_t frag; }; struct mgmt_body_t { mgmt_body_t():timestamp(),beacon_interval(),listen_interval(),status_code(),aid(),ap(),reason_code(), auth_alg(),auth_trans_seq_num(),challenge_status(),challenge(),capability_info(), ssid_status(),ssid(),rates_status(),rates(),ds_status(),ds(),cf_status(),cf(), fh_status(),fh(),tim_status(),tim(){}; u_int8_t timestamp[IEEE802_11_TSTAMP_LEN]; u_int16_t beacon_interval; u_int16_t listen_interval; u_int16_t status_code; u_int16_t aid; u_char ap[IEEE802_11_AP_LEN]; u_int16_t reason_code; u_int16_t auth_alg; u_int16_t auth_trans_seq_num; elem_status_t challenge_status; struct challenge_t challenge; u_int16_t capability_info; elem_status_t ssid_status; struct ssid_t ssid; elem_status_t rates_status; struct rates_t rates; elem_status_t ds_status; struct ds_t ds; elem_status_t cf_status; struct cf_t cf; elem_status_t fh_status; struct fh_t fh; elem_status_t tim_status; struct tim_t tim; }; struct ctrl_rts_t { ctrl_rts_t():fc(),duration(),ra(),ta(),fcs(){} u_int16_t fc; u_int16_t duration; MAC ra; MAC ta; u_int8_t fcs[4]; }; #ifdef _WIN32 #pragma pack(pop) #endif /** * Applications should implement a subclass of this interface and pass * it to Wifipcap::Run(). Each time pcap reads a packet, Wifipcap will * call: * * (1) PacketBegin() * * (2) Each Handle*() callback in order from layer 1 to layer 3 (or as * far as it is able to demultiplex the packet). The time values * are the same in all these calls. The 'len' argument passed to * functions refers to the amount of captured data available * (e.g., in the 'rest' variable), not necessarily the original * length of the packet (to get that, look inside appropriate * packet headers, or during PacketBegin()). * * (3) PacketEnd() * * If the header for a layer was truncated, the appropriate function * will be called with the header == NULL and the rest == the start of * the packet. For truncated 802.11 headers, 80211Unknown will be * called with fc == -1; for truncated ICMP headers, type == code == * -1. * * All structures passed to the application will have fields in host * byte-order. For details about each header structure, see the * obvious header (e.g., ieee802_11.h for 802.11 stuff, ip.h for IPv4, * tcp.h for TCP, etc.). Note that there may be structures with * similar names that are only used internally; don't confuse them. * * For help parsing other protocols, the tcpdump source code will be * helpful. See the print-X.c file for help parsing protocol X. * The entry function is usually called X_print(...). */ struct WifiPacket; struct WifipcapCallbacks; class Wifipcap; extern std::ostream& operator<<(std::ostream& out, const MAC& mac); extern std::ostream& operator<<(std::ostream& out, const struct in_addr& ip); /////////////////////////////////////////////////////////////////////////////// /* * This class decodes a specific packet */ struct WifiPacket { /* Some instance variables */ /** 48-bit MACs in 64-bit ints */ static int debug; // prints callback before they are called WifiPacket(WifipcapCallbacks *cbs_,const int header_type_,const struct pcap_pkthdr *header_,const u_char *packet_): cbs(cbs_),header_type(header_type_),header(header_),packet(packet_),fcs_ok(false){} void parse_elements(struct mgmt_body_t *pbody, const u_char *p, int offset, size_t len); int handle_beacon(const struct mgmt_header_t *pmh, const u_char *p, size_t len); int handle_assoc_request(const struct mgmt_header_t *pmh, const u_char *p, size_t len); int handle_assoc_response(const struct mgmt_header_t *pmh, const u_char *p, size_t len, bool reassoc = false); int handle_reassoc_request(const struct mgmt_header_t *pmh, const u_char *p, size_t len); int handle_reassoc_response(const struct mgmt_header_t *pmh, const u_char *p, size_t len); int handle_probe_request(const struct mgmt_header_t *pmh, const u_char *p, size_t len); int handle_probe_response(const struct mgmt_header_t *pmh, const u_char *p, size_t len); int handle_atim(const struct mgmt_header_t *pmh, const u_char *p, size_t len); int handle_disassoc(const struct mgmt_header_t *pmh, const u_char *p, size_t len); int handle_auth(const struct mgmt_header_t *pmh, const u_char *p, size_t len); int handle_deauth(const struct mgmt_header_t *pmh, const u_char *p, size_t len); int decode_mgmt_body(u_int16_t fc, struct mgmt_header_t *pmh, const u_char *p, size_t len); int decode_mgmt_frame(const u_char * ptr, size_t len, u_int16_t fc, u_int8_t hdrlen); int decode_data_frame(const u_char * ptr, size_t len, u_int16_t fc); int decode_ctrl_frame(const u_char * ptr, size_t len, u_int16_t fc); /* Handle the individual packet types based on DTL callback switch */ void handle_llc(const mac_hdr_t &hdr,const u_char *ptr, size_t len,u_int16_t fc); void handle_wep(const u_char *ptr, size_t len); void handle_prism(const u_char *ptr, size_t len); void handle_ether(const u_char *ptr, size_t len); void handle_ip(const u_char *ptr, size_t len); void handle_80211(const u_char *ptr, size_t len); int print_radiotap_field(struct cpack_state *s, u_int32_t bit, int *pad, radiotap_hdr *hdr); void handle_radiotap(const u_char *ptr, size_t caplen); /* And finally the data for each packet */ WifipcapCallbacks *cbs; // the callbacks to use with this packet const int header_type; // DLT const struct pcap_pkthdr *header; // the actual pcap headers const u_char *packet; // the actual packet data bool fcs_ok; // was it okay? }; struct WifipcapCallbacks { /**************************************************************** *** Data Structures for each Packet Follow ****************************************************************/ WifipcapCallbacks(){}; virtual ~WifipcapCallbacks(){}; virtual const char *name() const {return "WifipcapCallbacks";} // override with your own name! /* Instance variables --- for a specific packet. * (Previously all of the functions had these parameters as the arguments, which made no sense) */ /** * @param t the time the packet was captured * @param pkt the entire packet captured * @param len the length of the data captured * @param origlen the original length of the data (before truncated by pcap) */ virtual void PacketBegin(const WifiPacket &p, const u_char *pkt, size_t len, int origlen){} virtual void PacketEnd(const WifiPacket &p ){} // If a Prism or RadioTap packet is found, call these, and then call Handle80211() virtual void HandlePrism(const WifiPacket &p, struct prism2_pkthdr *hdr, const u_char *rest, size_t len){} virtual void HandleRadiotap(const WifiPacket &p, struct radiotap_hdr *hdr, const u_char *rest, size_t len){} // 802.11 MAC (see ieee802_11.h) // // This method is called for every 802.11 frame just before the // specific functions below are called. This allows you to have // one entry point to easily do something with all 802.11 packets. // // The MAC addresses will be MAC::null unless applicable to the // particular type of packet. For unknown 802.11 packets, all // MAC addresses will be MAC::null and if the packet is truncated, // so that fc was not decoded, it will be 0. // // fcs_ok will be true if the frame had a valid fcs (frame // checksum) trailer and Check80211FCS() returns true. virtual void Handle80211(const WifiPacket &p, u_int16_t fc, const MAC& sa, const MAC& da, const MAC& ra, const MAC& ta, const u_char *ptr, size_t len){} // if this returns true, we'll check the fcs on every frame. // Note: if frames are truncated, the fcs check will fail, so you need // a complete packet capture for this to be meaningful virtual bool Check80211FCS(const WifiPacket &p ) { return false; } // Management virtual void Handle80211MgmtBeacon(const WifiPacket &p, const struct mgmt_header_t *hdr, const struct mgmt_body_t *body) {puts("Handle80211MgmtBeacon");} virtual void Handle80211MgmtAssocRequest(const WifiPacket &p, const struct mgmt_header_t *hdr, const struct mgmt_body_t *body){} virtual void Handle80211MgmtAssocResponse(const WifiPacket &p, const struct mgmt_header_t *hdr, const struct mgmt_body_t *body){} virtual void Handle80211MgmtReassocRequest(const WifiPacket &p, const struct mgmt_header_t *hdr, const struct mgmt_body_t *body){} virtual void Handle80211MgmtReassocResponse(const WifiPacket &p, const struct mgmt_header_t *hdr, const struct mgmt_body_t *body){} virtual void Handle80211MgmtProbeRequest(const WifiPacket &p, const struct mgmt_header_t *hdr, const struct mgmt_body_t *body){} virtual void Handle80211MgmtProbeResponse(const WifiPacket &p, const struct mgmt_header_t *hdr, const struct mgmt_body_t *body){} virtual void Handle80211MgmtATIM(const WifiPacket &p, const struct mgmt_header_t *hdr){} virtual void Handle80211MgmtDisassoc(const WifiPacket &p, const struct mgmt_header_t *hdr, const struct mgmt_body_t *body){} virtual void Handle80211MgmtAuth(const WifiPacket &p, const struct mgmt_header_t *hdr, const struct mgmt_body_t *body){} virtual void Handle80211MgmtAuthSharedKey(const WifiPacket &p, const struct mgmt_header_t *hdr, const u_char *rest, size_t len){} virtual void Handle80211MgmtDeauth(const WifiPacket &p, const struct mgmt_header_t *hdr, const struct mgmt_body_t *body){} // Control virtual void Handle80211CtrlPSPoll(const WifiPacket &p, const struct ctrl_ps_poll_t *hdr){} virtual void Handle80211CtrlRTS(const WifiPacket &p, const struct ctrl_rts_t *hdr){} virtual void Handle80211CtrlCTS(const WifiPacket &p, const struct ctrl_cts_t *hdr){} virtual void Handle80211CtrlAck(const WifiPacket &p, const struct ctrl_ack_t *hdr){} virtual void Handle80211CtrlCFEnd(const WifiPacket &p, const struct ctrl_end_t *hdr){} virtual void Handle80211CtrlEndAck(const WifiPacket &p, const struct ctrl_end_ack_t *hdr){} // Data - Each data packet results in a call to Handle80211Data and one of the others virtual void Handle80211Data(const WifiPacket &p, u_int16_t fc, const struct mac_hdr_t &hdr, const u_char *rest, size_t len){} virtual void Handle80211DataIBSS(const WifiPacket &p, const struct mac_hdr_t &hdr, const u_char *rest, size_t len){} virtual void Handle80211DataFromAP(const WifiPacket &p, const struct mac_hdr_t &hdr, const u_char *rest, size_t len){} virtual void Handle80211DataToAP(const WifiPacket &p, const struct mac_hdr_t &hdr, const u_char *rest, size_t len){} virtual void Handle80211DataWDS(const WifiPacket &p, const struct mac_hdr_t &hdr, const u_char *rest, size_t len){} // Erroneous Frames/Truncated Frames // Also called if Check80211FCS() returns true and the checksum is bad virtual void Handle80211Unknown(const WifiPacket &p, int fc, const u_char *rest, size_t len){} // LLC/SNAP (const WifiPacket &p, see llc.h) virtual void HandleLLC(const WifiPacket &p, const struct llc_hdr_t *hdr, const u_char *rest, size_t len){} virtual void HandleLLCUnknown(const WifiPacket &p, const u_char *rest, size_t len){} virtual void HandleWEP(const WifiPacket &p, const struct wep_hdr_t *hdr, const u_char *rest, size_t len){} // for non-802.11 ethernet traces virtual void HandleEthernet(const WifiPacket &p, const struct ether_hdr_t *hdr, const u_char *rest, size_t len){} ///// Layer 2 (see arp.h, ip.h, ip6.h) virtual void HandleARP(const WifiPacket &p, const arp_pkthdr *hdr, const u_char *rest, size_t len){} virtual void HandleIP(const WifiPacket &p, const ip4_hdr_t *hdr, const u_char *options, int optlen, const u_char *rest, size_t len){} virtual void HandleIP6(const WifiPacket &p, const ip6_hdr_t *hdr, const u_char *rest, size_t len){} virtual void HandleL2Unknown(const WifiPacket &p, uint16_t ether_type, const u_char *rest, size_t len){} ///// Layer 3 (see icmp.h, tcp.h, udp.h) // IP headers are included for convenience. one of ip4h, ip6h will // be non-NULL. Only the first fragment in a fragmented packet // will be decoded. The other fragments will not be passed to any // of these functions. // Jeff: XXX icmp callback will probably eventually change to // parse the entire icmp packet virtual void HandleICMP(const WifiPacket &p, const ip4_hdr_t *ip4h, const ip6_hdr_t *ip6h, int type, int code, const u_char *rest, size_t len){} virtual void HandleTCP(const WifiPacket &p, const ip4_hdr_t *ip4h, const ip6_hdr_t *ip6h, const tcp_hdr_t *hdr, const u_char *options, int optlen, const u_char *rest, size_t len){} virtual void HandleUDP(const WifiPacket &p, const ip4_hdr_t *ip4h, const ip6_hdr_t *ip6h, const udp_hdr_t *hdr, const u_char *rest, size_t len){} virtual void HandleL3Unknown(const WifiPacket &p, const ip4_hdr_t *ip4h, const ip6_hdr_t *ip6h, const u_char *rest, size_t len){} }; /** * Applications create an instance of this to start processing a pcap * trace. Example: * * Wifipcap *wp = new Wifipcap("/path/to/mytrace.cap"); * wp->Run(new MyCallbacks()); */ class Wifipcap { // these are not implemented Wifipcap(const Wifipcap &t); Wifipcap &operator=(const Wifipcap &that); public: /** * Utility functions for 802.11 fields. */ class WifiUtil { public: // some functions to convert codes to ascii names static const char *MgmtAuthAlg2Txt(uint v); static const char *MgmtStatusCode2Txt(uint v); static const char *MgmtReasonCode2Txt(uint v); static const char *EtherType2Txt(uint t); }; /** * Initialize the lib. Exits with error message upon failure. * * @param name the device if live = true, else the file name of * the trace. If the file name ends in '.gz', we assume its a * gzipped trace and will pipe it through zcat before parsing it. * @param live true if reading from a device, otherwise a trace */ Wifipcap():descr(),datalink(),morefiles(),verbose(),startTime(),lastPrintTime(),packetsProcessed(){ }; Wifipcap(const char *name, bool live_ = false, bool verbose_ = false): descr(NULL), datalink(),morefiles(),verbose(verbose_), startTime(TIME_NONE), lastPrintTime(TIME_NONE), packetsProcessed(0) { Init(name, live_); } /** * Initialize with nfiles. Will run on all of them in order. */ Wifipcap(const char* const *names, int nfiles_, bool verbose_ = false): descr(NULL), datalink(),morefiles(),verbose(verbose_), startTime(TIME_NONE), lastPrintTime(TIME_NONE), packetsProcessed(0) { for (int i=0; i morefiles; public: bool verbose; struct timeval startTime; struct timeval lastPrintTime; uint64_t packetsProcessed; static const int PRINT_TIME_INTERVAL = 6*60*60; // sec }; /////////////////////////////////////////////////////////////////////////////// #include "ieee802_11_radio.h" #include "llc.h" #endif tcpflow/src/wifipcap/cpack.h0000644000175000017500000000444112263701151014766 0ustar dimadima/*- * Copyright (c) 2003, 2004 David Young. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of David Young may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DAVID * YOUNG BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. */ #ifndef _CPACK_H #define _CPACK_H #include #include struct cpack_state { u_int8_t *c_buf; u_int8_t *c_next; size_t c_len; }; int cpack_init(struct cpack_state *, uint8_t *, size_t); int cpack_uint8(struct cpack_state *, uint8_t *); int cpack_uint16(struct cpack_state *, uint16_t *); int cpack_uint32(struct cpack_state *, uint32_t *); int cpack_uint64(struct cpack_state *, uint64_t *); inline int cpack_int8(struct cpack_state *s, int8_t *p) {return cpack_uint8(s,(uint8_t *)p);} inline int cpack_int16(struct cpack_state *s, int16_t *p) {return cpack_uint16(s,(uint16_t *)p);} inline int cpack_int32(struct cpack_state *s, int32_t *p) {return cpack_uint32(s,(uint32_t *)p);} inline int cpack_int64(struct cpack_state *s, int64_t *p) {return cpack_uint64(s,(uint64_t *)p);} #endif /* _CPACK_H */ tcpflow/src/wifipcap/util.cpp0000644000175000017500000000366712263701151015226 0ustar dimadima#include "os.h" #include #include #include #include #ifndef _WIN32 #include #include #include #else #define snprintf sprintf_s #endif #include "util.h" #include "ethertype.h" #include "wifipcap.h" //std::ostream& operator<<(std::ostream& out, const WifipcapCallbacks::MAC& mac) { // const char *fmt = WifipcapCallbacks::MAC::print_fmt == WifipcapCallbacks::MAC::PRINT_FMT_COLON ? // "%02x:%02x:%02x:%02x:%02x:%02x" : // "%02x%02x%02x%02x%02x%02x"; // char buf[24]; // sprintf(buf, fmt, // (int)((mac.val>>40)&0xff), // (int)((mac.val>>32)&0xff), // (int)((mac.val>>24)&0xff), // (int)((mac.val>>16)&0xff), // (int)((mac.val>>8)&0xff), // (int)((mac.val)&0xff) // ); // out << buf; // return out; //} // //std::ostream& operator<<(std::ostream& out, const struct in_addr& ip) { // out << inet_ntoa(ip); // return out; //} #if 0 char *va(const char *format, ...) { va_list argptr; static int index = 0; static char buf[8][512]; char *b = *(buf + index); va_start (argptr, format); vsprintf (b, format,argptr); va_end (argptr); index = (index + 1) % 8; return b; } /* * Convert a token value to a string; use "fmt" if not found. */ const char * tok2strbuf(register const struct tok *lp, register const char *fmt, register int v, char *buf, size_t bufsize) { if (lp != NULL) { while (lp->s != NULL) { if (lp->v == v) return (lp->s); ++lp; } } if (fmt == NULL) fmt = "#%d"; (void)snprintf(buf, bufsize, fmt, v); return (const char *)buf; } /* * Convert a token value to a string; use "fmt" if not found. */ const char * tok2str(register const struct tok *lp, register const char *fmt, register int v) { static char buf[4][128]; static int idx = 0; char *ret; ret = buf[idx]; idx = (idx+1) & 3; return tok2strbuf(lp, fmt, v, ret, sizeof(buf[0])); } #endif tcpflow/src/wifipcap/ieee802_11_radio.h0000644000175000017500000002340412263701151016525 0ustar dimadima/*- * Copyright (c) 2003, 2004 David Young. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of David Young may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DAVID * YOUNG BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. */ /* Copyright 2013 Simson L. Garfinkel * Cleaned up. */ #ifndef _NET_IF_IEEE80211RADIOTAP_H_ #define _NET_IF_IEEE80211RADIOTAP_H_ #include "os.h" /* A generic radio capture format is desirable. There is one for * Linux, but it is neither rigidly defined (there were not even * units given for some fields) nor easily extensible. * * I suggest the following extensible radio capture format. It is * based on a bitmap indicating which fields are present. * * I am trying to describe precisely what the application programmer * should expect in the following, and for that reason I tell the * units and origin of each measurement (where it applies), or else I * use sufficiently weaselly language ("is a monotonically nondecreasing * function of...") that I cannot set false expectations for lawyerly * readers. */ #ifndef DLT_IEEE802_11_RADIO #define DLT_IEEE802_11_RADIO 127 /* 802.11 plus WLAN header */ #endif #ifdef _WIN32 #pragma pack(push, 1) #endif /* The radio capture header precedes the 802.11 header. */ struct ieee80211_radiotap_header { u_int8_t it_version; /* Version 0. Only increases * for drastic changes, * introduction of compatible * new fields does not count. */ u_int8_t it_pad; u_int16_t it_len; /* length of the whole * header in bytes, including * it_version, it_pad, * it_len, and data fields. */ u_int32_t it_present; /* A bitmap telling which * fields are present. Set bit 31 * (0x80000000) to extend the * bitmap by another 32 bits. * Additional extensions are made * by setting bit 31. */ } _PACKED_; #ifdef _WIN32 #pragma pack(pop) #endif /* Name Data type Units * ---- --------- ----- * * IEEE80211_RADIOTAP_TSFT u_int64_t microseconds * * Value in microseconds of the MAC's 64-bit 802.11 Time * Synchronization Function timer when the first bit of the * MPDU arrived at the MAC. For received frames, only. * * IEEE80211_RADIOTAP_CHANNEL 2 x u_int16_t MHz, bitmap * * Tx/Rx frequency in MHz, followed by flags (see below). * * IEEE80211_RADIOTAP_FHSS u_int16_t see below * * For frequency-hopping radios, the hop set (first byte) * and pattern (second byte). * * IEEE80211_RADIOTAP_RATE u_int8_t 500kb/s * * Tx/Rx data rate * * IEEE80211_RADIOTAP_DBM_ANTSIGNAL int8_t decibels from * one milliwatt (dBm) * * RF signal power at the antenna, decibel difference from * one milliwatt. * * IEEE80211_RADIOTAP_DBM_ANTNOISE int8_t decibels from * one milliwatt (dBm) * * RF noise power at the antenna, decibel difference from one * milliwatt. * * IEEE80211_RADIOTAP_DB_ANTSIGNAL u_int8_t decibel (dB) * * RF signal power at the antenna, decibel difference from an * arbitrary, fixed reference. * * IEEE80211_RADIOTAP_DB_ANTNOISE u_int8_t decibel (dB) * * RF noise power at the antenna, decibel difference from an * arbitrary, fixed reference point. * * IEEE80211_RADIOTAP_LOCK_QUALITY u_int16_t unitless * * Quality of Barker code lock. Unitless. Monotonically * nondecreasing with "better" lock strength. Called "Signal * Quality" in datasheets. (Is there a standard way to measure * this?) * * IEEE80211_RADIOTAP_TX_ATTENUATION u_int16_t unitless * * Transmit power expressed as unitless distance from max * power set at factory calibration. 0 is max power. * Monotonically nondecreasing with lower power levels. * * IEEE80211_RADIOTAP_DB_TX_ATTENUATION u_int16_t decibels (dB) * * Transmit power expressed as decibel distance from max power * set at factory calibration. 0 is max power. Monotonically * nondecreasing with lower power levels. * * IEEE80211_RADIOTAP_DBM_TX_POWER int8_t decibels from * one milliwatt (dBm) * * Transmit power expressed as dBm (decibels from a 1 milliwatt * reference). This is the absolute power level measured at * the antenna port. * * IEEE80211_RADIOTAP_FLAGS u_int8_t bitmap * * Properties of transmitted and received frames. See flags * defined below. * * IEEE80211_RADIOTAP_ANTENNA u_int8_t antenna index * * Unitless indication of the Rx/Tx antenna for this packet. * The first antenna is antenna 0. * * IEEE80211_RADIOTAP_FCS u_int32_t data * * FCS from frame in network byte order. */ enum ieee80211_radiotap_type { IEEE80211_RADIOTAP_TSFT = 0, IEEE80211_RADIOTAP_FLAGS = 1, IEEE80211_RADIOTAP_RATE = 2, IEEE80211_RADIOTAP_CHANNEL = 3, IEEE80211_RADIOTAP_FHSS = 4, IEEE80211_RADIOTAP_DBM_ANTSIGNAL = 5, IEEE80211_RADIOTAP_DBM_ANTNOISE = 6, IEEE80211_RADIOTAP_LOCK_QUALITY = 7, IEEE80211_RADIOTAP_TX_ATTENUATION = 8, IEEE80211_RADIOTAP_DB_TX_ATTENUATION = 9, IEEE80211_RADIOTAP_DBM_TX_POWER = 10, IEEE80211_RADIOTAP_ANTENNA = 11, IEEE80211_RADIOTAP_DB_ANTSIGNAL = 12, IEEE80211_RADIOTAP_DB_ANTNOISE = 13, IEEE80211_RADIOTAP_RX_FLAGS = 14, IEEE80211_RADIOTAP_TX_FLAGS = 15, IEEE80211_RADIOTAP_RTS_RETRIES = 16, IEEE80211_RADIOTAP_DATA_RETRIES = 17, IEEE80211_RADIOTAP_XCHANNEL = 18, /* Unofficial, used by FreeBSD */ IEEE80211_RADIOTAP_MCS = 19, IEEE80211_RADIOTAP_AMPDU_STATUS = 20, /* valid in every it_present bitmap, even vendor namespaces */ IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29, IEEE80211_RADIOTAP_EXT = 31 }; /* Channel flags. */ #define IEEE80211_CHAN_TURBO 0x0010 /* Turbo channel */ #define IEEE80211_CHAN_CCK 0x0020 /* CCK channel */ #define IEEE80211_CHAN_OFDM 0x0040 /* OFDM channel */ #define IEEE80211_CHAN_2GHZ 0x0080 /* 2 GHz spectrum channel. */ #define IEEE80211_CHAN_5GHZ 0x0100 /* 5 GHz spectrum channel */ #define IEEE80211_CHAN_PASSIVE 0x0200 /* Only passive scan allowed */ #define IEEE80211_CHAN_DYN 0x0400 /* Dynamic CCK-OFDM channel */ #define IEEE80211_CHAN_GFSK 0x0800 /* GFSK channel (FHSS PHY) */ /* For IEEE80211_RADIOTAP_FLAGS */ #define IEEE80211_RADIOTAP_F_CFP 0x01 /* sent/received * during CFP */ #define IEEE80211_RADIOTAP_F_SHORTPRE 0x02 /* sent/received * with short * preamble */ #define IEEE80211_RADIOTAP_F_WEP 0x04 /* sent/received * with WEP encryption */ #define IEEE80211_RADIOTAP_F_FRAG 0x08 /* sent/received * with fragmentation */ #define IEEE80211_RADIOTAP_F_FCS 0x10 /* frame includes FCS */ #define IEEE80211_RADIOTAP_F_DATAPAD 0x20 /* frame has padding between * 802.11 header and payload * (to 32-bit boundary) */ #define IEEE80211_RADIOTAP_F_BADFCS 0x40 /* does not pass FCS check */ /* For IEEE80211_RADIOTAP_RX_FLAGS */ #define IEEE80211_RADIOTAP_F_RX_BADPLCP 0x0002 /* bad PLCP */ /* For IEEE80211_RADIOTAP_TX_FLAGS */ #define IEEE80211_RADIOTAP_F_TX_FAIL 0x0001 /* failed due to excessive * retries */ #define IEEE80211_RADIOTAP_F_TX_CTS 0x0002 /* used cts 'protection' */ #define IEEE80211_RADIOTAP_F_TX_RTS 0x0004 /* used rts/cts handshake */ /* For IEEE80211_RADIOTAP_AMPDU_STATUS */ #define IEEE80211_RADIOTAP_AMPDU_REPORT_ZEROLEN 0x0001 #define IEEE80211_RADIOTAP_AMPDU_IS_ZEROLEN 0x0002 #define IEEE80211_RADIOTAP_AMPDU_LAST_KNOWN 0x0004 #define IEEE80211_RADIOTAP_AMPDU_IS_LAST 0x0008 #define IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR 0x0010 #define IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN 0x0020 /* For IEEE80211_RADIOTAP_MCS */ #define IEEE80211_RADIOTAP_MCS_HAVE_BW 0x01 #define IEEE80211_RADIOTAP_MCS_HAVE_MCS 0x02 #define IEEE80211_RADIOTAP_MCS_HAVE_GI 0x04 #define IEEE80211_RADIOTAP_MCS_HAVE_FMT 0x08 #define IEEE80211_RADIOTAP_MCS_HAVE_FEC 0x10 #define IEEE80211_RADIOTAP_MCS_BW_MASK 0x03 #define IEEE80211_RADIOTAP_MCS_BW_20 0 #define IEEE80211_RADIOTAP_MCS_BW_40 1 #define IEEE80211_RADIOTAP_MCS_BW_20L 2 #define IEEE80211_RADIOTAP_MCS_BW_20U 3 #define IEEE80211_RADIOTAP_MCS_SGI 0x04 #define IEEE80211_RADIOTAP_MCS_FMT_GF 0x08 #define IEEE80211_RADIOTAP_MCS_FEC_LDPC 0x10 #endif /* _NET_IF_IEEE80211RADIOTAP_H_ */ tcpflow/src/wifipcap/icmp.h0000644000175000017500000001626612263701151014645 0ustar dimadima/* * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that: (1) source code distributions * retain the above copyright notice and this paragraph in its entirety, (2) * distributions including binary code include the above copyright notice and * this paragraph in its entirety in the documentation or other materials * provided with the distribution, and (3) all advertising materials mentioning * features or use of this software display the following acknowledgement: * ``This product includes software developed by the University of California, * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of * the University nor the names of its contributors may be used to endorse * or promote products derived from this software without specific prior * written permission. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ /* * Interface Control Message Protocol Definitions. * Per RFC 792, September 1981. */ /* * Structure of an icmp header. */ struct icmp { u_int8_t icmp_type; /* type of message, see below */ u_int8_t icmp_code; /* type sub code */ u_int16_t icmp_cksum; /* ones complement cksum of struct */ union { u_int8_t ih_pptr; /* ICMP_PARAMPROB */ struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ struct ih_idseq { u_int16_t icd_id; u_int16_t icd_seq; } ih_idseq; u_int32_t ih_void; /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ struct ih_pmtu { u_int16_t ipm_void; u_int16_t ipm_nextmtu; } ih_pmtu; } icmp_hun; #define icmp_pptr icmp_hun.ih_pptr #define icmp_gwaddr icmp_hun.ih_gwaddr #define icmp_id icmp_hun.ih_idseq.icd_id #define icmp_seq icmp_hun.ih_idseq.icd_seq #define icmp_void icmp_hun.ih_void #define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void #define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu union { struct id_ts { u_int32_t its_otime; u_int32_t its_rtime; u_int32_t its_ttime; } id_ts; struct id_ip { struct ip idi_ip; /* options and then 64 bits of data */ } id_ip; struct mpls_ext { u_int8_t legacy_header[128]; /* extension header starts 128 bytes after ICMP header */ u_int8_t version_res[2]; u_int8_t checksum[2]; u_int8_t data[1]; } mpls_ext; u_int32_t id_mask; u_int8_t id_data[1]; } icmp_dun; #define icmp_otime icmp_dun.id_ts.its_otime #define icmp_rtime icmp_dun.id_ts.its_rtime #define icmp_ttime icmp_dun.id_ts.its_ttime #define icmp_ip icmp_dun.id_ip.idi_ip #define icmp_mask icmp_dun.id_mask #define icmp_data icmp_dun.id_data #define icmp_mpls_ext_version icmp_dun.mpls_ext.version_res #define icmp_mpls_ext_checksum icmp_dun.mpls_ext.checksum #define icmp_mpls_ext_data icmp_dun.mpls_ext.data }; #define ICMP_MPLS_EXT_EXTRACT_VERSION(x) (((x)&0xf0)>>4) #define ICMP_MPLS_EXT_VERSION 2 /* * Lower bounds on packet lengths for various types. * For the error advice packets must first insure that the * packet is large enought to contain the returned ip header. * Only then can we do the check to see if 64 bits of packet * data have been returned, since we need to check the returned * ip header length. */ #define ICMP_MINLEN 8 /* abs minimum */ #define ICMP_EXTD_MINLEN (156 - sizeof (struct ip)) /* draft-bonica-icmp-mpls-02 */ #define ICMP_TSLEN (8 + 3 * sizeof (u_int32_t)) /* timestamp */ #define ICMP_MASKLEN 12 /* address mask */ #define ICMP_ADVLENMIN (8 + sizeof (struct ip) + 8) /* min */ #define ICMP_ADVLEN(p) (8 + (IP_HL(&(p)->icmp_ip) << 2) + 8) /* N.B.: must separately check that ip_hl >= 5 */ /* * Definition of type and code field values. */ #define ICMP_ECHOREPLY 0 /* echo reply */ #define ICMP_UNREACH 3 /* dest unreachable, codes: */ #define ICMP_UNREACH_NET 0 /* bad net */ #define ICMP_UNREACH_HOST 1 /* bad host */ #define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */ #define ICMP_UNREACH_PORT 3 /* bad port */ #define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ #define ICMP_UNREACH_SRCFAIL 5 /* src route failed */ #define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ #define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ #define ICMP_UNREACH_ISOLATED 8 /* src host isolated */ #define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */ #define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ #define ICMP_UNREACH_TOSNET 11 /* bad tos for net */ #define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ #define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */ #define ICMP_REDIRECT 5 /* shorter route, codes: */ #define ICMP_REDIRECT_NET 0 /* for network */ #define ICMP_REDIRECT_HOST 1 /* for host */ #define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ #define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ #define ICMP_ECHO 8 /* echo service */ #define ICMP_ROUTERADVERT 9 /* router advertisement */ #define ICMP_ROUTERSOLICIT 10 /* router solicitation */ #define ICMP_TIMXCEED 11 /* time exceeded, code: */ #define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ #define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */ #define ICMP_PARAMPROB 12 /* ip header bad */ #define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ #define ICMP_TSTAMP 13 /* timestamp request */ #define ICMP_TSTAMPREPLY 14 /* timestamp reply */ #define ICMP_IREQ 15 /* information request */ #define ICMP_IREQREPLY 16 /* information reply */ #define ICMP_MASKREQ 17 /* address mask request */ #define ICMP_MASKREPLY 18 /* address mask reply */ #define ICMP_MAXTYPE 18 #define ICMP_INFOTYPE(type) \ ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \ (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \ (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) #define ICMP_MPLS_EXT_TYPE(type) \ ((type) == ICMP_UNREACH || (type) == ICMP_TIMXCEED) /* rfc1700 */ #ifndef ICMP_UNREACH_NET_UNKNOWN #define ICMP_UNREACH_NET_UNKNOWN 6 /* destination net unknown */ #endif #ifndef ICMP_UNREACH_HOST_UNKNOWN #define ICMP_UNREACH_HOST_UNKNOWN 7 /* destination host unknown */ #endif #ifndef ICMP_UNREACH_ISOLATED #define ICMP_UNREACH_ISOLATED 8 /* source host isolated */ #endif #ifndef ICMP_UNREACH_NET_PROHIB #define ICMP_UNREACH_NET_PROHIB 9 /* admin prohibited net */ #endif #ifndef ICMP_UNREACH_HOST_PROHIB #define ICMP_UNREACH_HOST_PROHIB 10 /* admin prohibited host */ #endif #ifndef ICMP_UNREACH_TOSNET #define ICMP_UNREACH_TOSNET 11 /* tos prohibited net */ #endif #ifndef ICMP_UNREACH_TOSHOST #define ICMP_UNREACH_TOSHOST 12 /* tos prohibited host */ #endif /* rfc1716 */ #ifndef ICMP_UNREACH_FILTER_PROHIB #define ICMP_UNREACH_FILTER_PROHIB 13 /* admin prohibited filter */ #endif #ifndef ICMP_UNREACH_HOST_PRECEDENCE #define ICMP_UNREACH_HOST_PRECEDENCE 14 /* host precedence violation */ #endif #ifndef ICMP_UNREACH_PRECEDENCE_CUTOFF #define ICMP_UNREACH_PRECEDENCE_CUTOFF 15 /* precedence cutoff */ #endif tcpflow/src/wifipcap/oui.h0000644000175000017500000000645012263701151014503 0ustar dimadima/* @(#) $Header: /home/cvs/wifitools/wifipcap/oui.h,v 1.1.1.1 2006/12/14 01:22:11 jpang Exp $ (LBL) */ /* * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that: (1) source code * distributions retain the above copyright notice and this paragraph * in its entirety, and (2) distributions including binary code include * the above copyright notice and this paragraph in its entirety in * the documentation or other materials provided with the distribution. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND * WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT * LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE. * * Original code by Hannes Gredler (hannes@juniper.net) */ #ifndef UNI_OUT_H #define UNI_OUT_H extern struct tok oui_values[]; extern struct tok smi_values[]; #define OUI_ENCAP_ETHER 0x000000 /* encapsulated Ethernet */ #define OUI_CISCO 0x00000c /* Cisco protocols */ #define OUI_NORTEL 0x000081 /* Nortel SONMP */ #define OUI_CISCO_90 0x0000f8 /* Cisco bridging */ #define OUI_RFC2684 0x0080c2 /* RFC 2427/2684 bridged Ethernet */ #define OUI_ATM_FORUM 0x00A03E /* ATM Forum */ #define OUI_CABLE_BPDU 0x00E02F /* DOCSIS spanning tree BPDU */ #define OUI_APPLETALK 0x080007 /* Appletalk */ #define OUI_JUNIPER 0x009069 /* Juniper */ #define OUI_HP 0x080009 /* Hewlett-Packard */ /* * These are SMI Network Management Private Enterprise Codes for * organizations; see * * http://www.iana.org/assignments/enterprise-numbers * * for a list. * * List taken from Ethereal's epan/sminmpec.h. */ #define SMI_IETF 0 /* reserved - used by the IETF in L2TP? */ #define SMI_ACC 5 #define SMI_CISCO 9 #define SMI_HEWLETT_PACKARD 11 #define SMI_SUN_MICROSYSTEMS 42 #define SMI_MERIT 61 #define SMI_SHIVA 166 #define SMI_ERICSSON 193 #define SMI_CISCO_VPN5000 255 #define SMI_LIVINGSTON 307 #define SMI_MICROSOFT 311 #define SMI_3COM 429 #define SMI_ASCEND 529 #define SMI_BAY 1584 #define SMI_FOUNDRY 1991 #define SMI_VERSANET 2180 #define SMI_REDBACK 2352 #define SMI_JUNIPER 2636 #define SMI_APTIS 2637 #define SMI_CISCO_VPN3000 3076 #define SMI_COSINE 3085 #define SMI_SHASTA 3199 #define SMI_NETSCREEN 3224 #define SMI_NOMADIX 3309 #define SMI_SIEMENS 4329 #define SMI_CABLELABS 4491 #define SMI_UNISPHERE 4874 #define SMI_CISCO_BBSM 5263 #define SMI_THE3GPP2 5535 #define SMI_IP_UNPLUGGED 5925 #define SMI_ISSANNI 5948 #define SMI_QUINTUM 6618 #define SMI_INTERLINK 6728 #define SMI_COLUBRIS 8744 #define SMI_COLUMBIA_UNIVERSITY 11862 #define SMI_THE3GPP 10415 #define SMI_GEMTEK_SYSTEMS 10529 #define SMI_WIFI_ALLIANCE 14122 #endif tcpflow/src/wifipcap/ip.h0000644000175000017500000001516512263701151014322 0ustar dimadima/* @(#) $Header: /home/cvs/wifitools/wifipcap/ip.h,v 1.1.1.1 2006/12/14 01:22:11 jpang Exp $ (LBL) */ /* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip.h 8.2 (Berkeley) 6/1/94 */ /* * Definitions for internet protocol version 4. * Per RFC 791, September 1981. */ #define IPVERSION 4 /* * Structure of an internet header, naked of options. * * We declare ip_len and ip_off to be short, rather than u_short * pragmatically since otherwise unsigned comparisons can result * against negative integers quite easily, and fail in subtle ways. */ struct ip { u_int8_t ip_vhl; /* header length, version */ #define IP_V(ip) (((ip)->ip_vhl & 0xf0) >> 4) #define IP_HL(ip) ((ip)->ip_vhl & 0x0f) u_int8_t ip_tos; /* type of service */ u_int16_t ip_len; /* total length */ u_int16_t ip_id; /* identification */ u_int16_t ip_off; /* fragment offset field */ #define IP_DF 0x4000 /* dont fragment flag */ #define IP_MF 0x2000 /* more fragments flag */ #define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ u_int8_t ip_ttl; /* time to live */ u_int8_t ip_p; /* protocol */ u_int16_t ip_sum; /* checksum */ struct in_addr ip_src,ip_dst; /* source and dest address */ }; #define IP_MAXPACKET 65535 /* maximum packet size */ /* * Definitions for IP type of service (ip_tos) */ #define IPTOS_LOWDELAY 0x10 #define IPTOS_THROUGHPUT 0x08 #define IPTOS_RELIABILITY 0x04 /* * Definitions for IP precedence (also in ip_tos) (hopefully unused) */ #define IPTOS_PREC_NETCONTROL 0xe0 #define IPTOS_PREC_INTERNETCONTROL 0xc0 #define IPTOS_PREC_CRITIC_ECP 0xa0 #define IPTOS_PREC_FLASHOVERRIDE 0x80 #define IPTOS_PREC_FLASH 0x60 #define IPTOS_PREC_IMMEDIATE 0x40 #define IPTOS_PREC_PRIORITY 0x20 #define IPTOS_PREC_ROUTINE 0x00 /* * Definitions for options. */ #define IPOPT_COPIED(o) ((o)&0x80) #define IPOPT_CLASS(o) ((o)&0x60) #define IPOPT_NUMBER(o) ((o)&0x1f) #define IPOPT_CONTROL 0x00 #define IPOPT_RESERVED1 0x20 #define IPOPT_DEBMEAS 0x40 #define IPOPT_RESERVED2 0x60 #define IPOPT_EOL 0 /* end of option list */ #define IPOPT_NOP 1 /* no operation */ #define IPOPT_RR 7 /* record packet route */ #define IPOPT_TS 68 /* timestamp */ #define IPOPT_SECURITY 130 /* provide s,c,h,tcc */ #define IPOPT_LSRR 131 /* loose source route */ #define IPOPT_SATID 136 /* satnet id */ #define IPOPT_SSRR 137 /* strict source route */ #define IPOPT_RA 148 /* router-alert, rfc2113 */ /* * Offsets to fields in options other than EOL and NOP. */ #define IPOPT_OPTVAL 0 /* option ID */ #define IPOPT_OLEN 1 /* option length */ #define IPOPT_OFFSET 2 /* offset within option */ #define IPOPT_MINOFF 4 /* min value of above */ /* * Time stamp option structure. */ struct ip_timestamp { u_int8_t ipt_code; /* IPOPT_TS */ u_int8_t ipt_len; /* size of structure (variable) */ u_int8_t ipt_ptr; /* index of current entry */ u_int8_t ipt_oflwflg; /* flags, overflow counter */ #define IPTS_OFLW(ip) (((ipt)->ipt_oflwflg & 0xf0) >> 4) #define IPTS_FLG(ip) ((ipt)->ipt_oflwflg & 0x0f) union ipt_timestamp { u_int32_t ipt_time[1]; struct ipt_ta { struct in_addr ipt_addr; u_int32_t ipt_time; } ipt_ta[1]; } ipt_timestamp; }; /* flag bits for ipt_flg */ #define IPOPT_TS_TSONLY 0 /* timestamps only */ #define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ #define IPOPT_TS_PRESPEC 3 /* specified modules only */ /* bits for security (not byte swapped) */ #define IPOPT_SECUR_UNCLASS 0x0000 #define IPOPT_SECUR_CONFID 0xf135 #define IPOPT_SECUR_EFTO 0x789a #define IPOPT_SECUR_MMMM 0xbc4d #define IPOPT_SECUR_RESTR 0xaf13 #define IPOPT_SECUR_SECRET 0xd788 #define IPOPT_SECUR_TOPSECRET 0x6bc5 /* * Internet implementation parameters. */ #define MAXTTL 255 /* maximum time to live (seconds) */ #define IPDEFTTL 64 /* default ttl, from RFC 1340 */ #define IPFRAGTTL 60 /* time to live for frags, slowhz */ #define IPTTLDEC 1 /* subtracted when forwarding */ #define IP_MSS 576 /* default maximum segment size */ /* in print-ip.c */ extern u_int32_t ip_finddst(const struct ip *); /////////////////////////////////////////////////////////////////////////////// /* Jeff: will pass this endian-fixed, fully decoded version to applications */ struct ip4_hdr_t { u_int8_t ver; /* ip version */ u_int16_t hlen; /* header length (in bytes) */ u_int8_t tos; /* type of service */ u_int16_t len; /* total length (in bytes) */ u_int16_t id; /* identification */ bool df; /* don't fragment flag */ bool mf; /* more fragments flag */ u_int16_t fragoff; /* fragment offset */ u_int8_t ttl; /* time to live */ u_int8_t proto; /* protocol */ u_int16_t cksum; /* header checksum */ struct in_addr src, dst; /* source and dest address */ }; tcpflow/src/dfxml/0000755000175000017500000000000012263701334013044 5ustar dimadimatcpflow/src/dfxml/python/0000755000175000017500000000000012263701334014365 5ustar dimadimatcpflow/src/dfxml/python/filesdb.py0000644000175000017500000000725712263701334016362 0ustar dimadima# # filesdb # a module that holds a database of DFXML files # import dfxml from collections import defaultdict import sys class filesdb: def __init__(self,fname=None): self.sha1db = defaultdict(list) # fi's by hashdb self.md5db = defaultdict(list) # fi's by hashdb self.fnamedb = defaultdict(list) # fi's by fname self.dirs = defaultdict(list) # fi's by directory name self.fis = [] self.prefix = None self.delfix = None if fname: self.read(fname) def __iter__(self): """The iterator for filesdb iterates through all the files""" return self.fis.__iter__() def read(self,f): if type(f)==str: self.fname = f f = open(f,'rb') dfxml.read_dfxml(xmlfile=f,callback=self.pass1) def read_with_prefix(self,fname): if ':' in fname: (fmt,fname) = fname.split(':') if fmt[0]=='+': self.prefix = fmt[1:] if fmt[0]=='=': self.delfix = fmt[1:] if fmt[0]!='+' and fmt[0]!='=': self.prefix = fmt self.read(fname) def pass1(self,fi): """First pass for reading fi objects""" import os self.fis.append(fi) if fi.sha1(): self.sha1db[fi.sha1()].append(fi) if fi.md5(): self.md5db[fi.md5()].append(fi) if fi.filename(): fname = fi.filename() if self.delfix: if fname.startswith(self.delfix): fname = fname[len(self.delfix):] if self.prefix: fname = self.prefix + fname self.sha1db[fname].append(fi) self.dirs[os.path.dirname(fname)].append(fi) def print_stats(self,f=sys.stdout): """Returns a text string of the stats""" ret = [ ['Total directories',len(self.dirs)], ['Total files',len(self.fis)], ['Total bytes',sum([int(fi.filesize()) for fi in self.fis])], ['Total sha1s',len(self.sha1db)], ['Total md5s',len(self.md5db)], ] print("\n".join(["{:20}: {:14,}".format(a[0],a[1]) for a in ret])) mtime_min = [fi.mtime() for fi in self.fis] #print('mtime=',len(mtime_min)) #flt = list(filter(lambda a:a!=None,mtime_min)) #print('flt=',flt,len(flt)) #print('mtime_min=',mtime_min) #print(['ctime range',mtime_min]) #exit(0) def del_dirs(self,targetdb): """Given a targetdb, provide the dirs to get there.""" return set(self.dirs.keys()).difference(set(targetdb.dirs.keys())) def del_files(self,targetdb): """Given an targetdb, provide the files needed to get there.""" return set(self.filesdb).difference(set(db.filesdb)) def new_dirs(self,db): """Given an older db, provide the dirs that are new.""" return set(db.dirs.keys()).difference(set(self.dirs.keys())) def search(self,mfi,hash=False,name=False): """Return the matching fis""" if hash and not name: return self.md5db[mfi.md5()] if name and not hash: return self.fnamedb[mfi.filename()] if hash and name: return filter(lambda fi:fi.filename()==mfi.filename(),self.md5db[mfi.md5()]) return [] # # test program. Reads a database and dumps it. # if __name__=="__main__": from argparse import ArgumentParser parser = ArgumentParser(description='Test the files database with one or more DFXML files') parser.add_argument('xmlfiles',help='XML files to process',nargs='+') args = parser.parse_args() db = filesdb() for fn in args.xmlfiles: db.read(fn) print(db.stats()) tcpflow/src/dfxml/python/test_idifference.py0000755000175000017500000000314112263701334020243 0ustar dimadima#!/usr/bin/env python """ Test script. Evaluates idifference.py on a sequence of disk images. """ import sys, os, subprocess if __name__ == "__main__": from optparse import OptionParser parser = OptionParser() parser.usage = '%prog [options] dfxml_sequence_list.txt output_zip' #parser.add_option("-z", "--zap", help="Zap output directory (erases if present)" dest="zap") parser.add_option("-p", "--prefix", help="prepend prefix to every test image path", dest="prefix") parser.add_option("-v", "--verbose", help="verbose output: print call to difference program", dest="verbose", action="store_true") parser.add_option("-d", "--diff-program", help="use this path to the diff program", dest="diff_program") (options,args) = parser.parse_args() if len(args) < 2: parser.print_help() sys.exit(1) prefix = "" if options.prefix: prefix = options.prefix #Convert file contents to list files = [prefix + x.strip() for x in open(args[0],"r")] #Verify we'll run at least one difference if len(files) < 2: sys.stderr.write("Differencing requires 2 or more files.\n") #Check that the list lines actually point to files for f in files: assert os.path.isfile(f) #Run differences if options.diff_program: diff_program = options.diff_program else: diff_program = os.path.dirname(sys.argv[0]) + "/idifference.py" diff_command = ["python", diff_program, "--zipfile=" + args[1], "--imagefile"] + files if options.verbose: print(" ".join(diff_command)) subprocess.call(diff_command)tcpflow/src/dfxml/python/validate_dfxml.py0000644000175000017500000000131412263701334017721 0ustar dimadimaimport sys, fiwalk, os.path from optparse import OptionParser from sys import stdout def demo_dfxml_time_bug(filename): parser = OptionParser() parser.usage = '%prog% [options] xmlfile ' (options,args) = parser.parse_args() for fi in fiwalk.fileobjects_using_sax(xmlfile=open(filename,"rb")): fsize = fi.filesize() try: mt = fi.mtime() print('Type of mt:',type(mt)) print('Normal mtime:') print(mt) except KeyboardInterrupt: raise except: raise print('Abnormal mtime for file with size',fsize) if __name__=="__main__": filename = sys.argv[1] demo_dfxml_time_bug(filename) tcpflow/src/dfxml/python/dfxinfo.py0000644000175000017500000000405112263701334016374 0ustar dimadima#!/usr/bin/env python # coding=UTF-8 """dfxinfo.py: Generates a report about what up with a DFXML file. """ import platform,os,os.path,sys,time if platform.python_version_tuple() < ('3','2','0'): raise RuntimeError('This script now requires Python 3.2 or above') try: import dfxml, fiwalk except ImportError: raise ImportError('This script requires the dfxml and fiwalk modules for Python.') __version__='0.0.1' import fiwalk,dfxml from histogram import histogram class DiskSet: """DiskSet maintains a database of the file objects within a disk. The entire database is maintained in memory.""" def __init__(self): self.ext_histogram = histogram() self.ext_histogram_distinct = histogram() self.fi_by_md5 = dict() # a dictionary of lists def pass1(self,fi): if fi.is_virtual(): return if fi.is_file(): self.fi_by_md5.setdefault(fi.md5(),[]).append(fi) def print_dups_report(self): print("Duplicates:") # First extract the dups, then sort them dups = filter(lambda a:len(a[1])>1,self.fi_by_md5.items(),) dup_bytes = 0 for (md5hash,fis) in sorted(dups,key=lambda a:a[1][0].filesize(),reverse=True): for fi in fis: print("{:>16,} {:32} {}".format(fi.filesize(),fi.md5(),fi.filename())) print() dup_bytes += fis[0].filesize() * (len(fis)-1) print("Total duplicate bytes: {:,}".format(dup_bytes)) if __name__=="__main__": from argparse import ArgumentParser from copy import deepcopy parser = ArgumentParser(description='Report information about a DFXML file') parser.add_argument('xmlfiles',help='XML files to process',nargs='+') parser.add_argument("--imagefile",help="specifies imagefile to examine; automatically runs fiwalk",nargs='+') args = parser.parse_args() ds = DiskSet() for fn in args.xmlfiles: print("Processing {}".format(fn)) dfxml.read_dfxml(xmlfile=open(fn,'rb'),callback=ds.pass1) ds.print_dups_report() tcpflow/src/dfxml/python/tcpdiff.py0000644000175000017500000000363512263701334016365 0ustar dimadima#!/usr/bin/env python """tcpdiff.py Generates a report about what's different between two tcp DFXML files produced by tcpflow. Process: """ import sys,time if sys.version_info < (3,1): raise RuntimeError("rdifference.py requires Python 3.1 or above") import fiwalk,dfxml,dfxml_html def ptime(t): """Print the time in the requested format. T is a dfxml time value""" global options if t is None: return None elif options.timestamp: return str(t.timestamp()) else: return str(t.iso8601()) def dprint(x): "Debug print" global options if options.debug: print(x) # # This program keeps track of the current and previous TCP connections in a single # object called "FlowState". Another way to do that would have been to have # the instance built from the XML file and then have another function that compares # them. # class FlowState: def __init__(self,fname): self.options = options self.connections = set() self.process(fname) def process(self,fname): self.fname = fname dfxml.read_dfxml(xmlfile=open(fname,'rb'), callback=self.process_fi) def process_fi(self,fi): self.connections.add(fi) def report(self): dfxml_html.header() dfxml_html.h1("DFXML file:"+self.current_fname) dfxml_html.table(['Total Connections',str(len(self.connections))]) if __name__=="__main__": from optparse import OptionParser from copy import deepcopy global options parser = OptionParser() parser.usage = '%prog [options] file1 file2 (files MUST be tcpflow DFXML files)' parser.add_option("-d","--debug",help="debug",action='store_true') (options,args) = parser.parse_args() if len(args)!=2: parser.print_help() sys.exit(1) a = FlowState(fname=args[0]) a.report() b = FlowState(fname=args[1]) b.report() print("Difference:") tcpflow/src/dfxml/python/demo_mac_timeline_iter.py0000755000175000017500000000147112263701334021422 0ustar dimadima#!/usr/bin/env python # produce a MAC-times timeline using the iterative DFXML interface. # works under either Python2 or Python3 import dfxml, sys def main(): if len(sys.argv) < 2: print("Usage: {} ".format(sys.argv[0])) exit(1) timeline = [] for fi in dfxml.iter_dfxml( xmlfile=open(sys.argv[1],"rb") ): if fi.mtime()!=None: timeline.append([fi.mtime(),fi.filename()," modified"]) if fi.crtime()!=None: timeline.append([fi.crtime(),fi.filename()," created"]) if fi.ctime()!=None: timeline.append([fi.ctime(),fi.filename()," changed"]) if fi.atime()!=None: timeline.append([fi.atime(),fi.filename()," accessed"]) timeline.sort() for record in timeline: print("\t".join( map(str, record)) ) if __name__ == "__main__": main() tcpflow/src/dfxml/python/dfxml.py0000644000175000017500000020016212263701334016052 0ustar dimadima#!/usr/bin/env python # # dfxml.py # Digital Forensics XML classes """Digital Forensics XML classes. This module contains a number of classes for dealing with dfxml files, both using the XML DOM model and using the EXPAT model. The following moduel functions are defined: isone(x) - returns true if something is equal to 1 (useful for 1 safeInt(x) - converts something to an int but never raises an exception The following classes are defined in this module: byte_run - the class for representing a run on the disk dftime - represents time. Can be in either Unix timestamp or ISO8601. Interconverts as necessary. fileobject - represents a DFXML fileobject. byte_runs() is function that returns an array of byterun objects. Each object has the attributes: file_offset - offset from the beginning of the file img_offset - offset from the beginning of the image len - the number of bytes fs_offset - offset from the beginning of the file system where encoding, if present, is 0 for raw, 1 for NTFS compressed. """ import sys import re from sys import stderr from subprocess import Popen,PIPE import base64 import hashlib import datetime __version__ = "1.0.1" tsk_virtual_filenames = set(['$FAT1','$FAT2']) XMLNS_DFXML = "http://www.forensicswiki.org/wiki/Category:Digital_Forensics_XML" XMLNS_DELTA = "http://www.forensicswiki.org/wiki/Forensic_Disk_Differencing" def isone(x): """Return true if something is one (number or string)""" try: return int(x)==1; except TypeError: return False def safeInt(x): """Return an integer or False. False is returned, rather than None, because you can divide False by 3 but you can't divide None by 3. NOTE: This function could be written as: def safeInt(x): return int(x) if x else False but that doesn't work on older version of Python.""" if x: return int(x) return False def timestamp2iso8601(ts): import time return time.strftime("%FT%TZ",time.gmtime(ts)) from datetime import tzinfo,timedelta class GMTMIN(tzinfo): def __init__(self,minoffset): # DST starts last Sunday in March self.minoffset = minoffset def utcoffset(self, dt): return timedelta(minutes=self.minoffset) def dst(self, dt): return timedelta(0) def tzname(self,dt): return "GMT+%02d%02d" % (self.minoffset/60,self.minoffset%60) def parse_iso8601(ts): Z = ts.find('Z') if Z>0: return datetime.datetime.strptime(ts[:Z],"%Y-%m-%dT%H:%M:%S") raise RuntimeError("parse_iso8601: ISO8601 format {} not recognized".format(ts)) rx_iso8601 = re.compile("(\d\d\d\d)-(\d\d)-(\d\d)[T ](\d\d):(\d\d):(\d\d)(\.\d+)?(Z|[-+]\d\d:?\d\d)?") def iso8601Tdatetime(s): """SLG's conversion of ISO8601 to datetime""" m = rx_iso8601.search(s) if not m: raise ValueError("Cannot parse: "+s) # Get the microseconds try: microseconds = int(float(m.group(7)) * 1000000) except TypeError: microseconds = 0 # Figure tz offset offset = None minoffset = None if m.group(8): if m.group(8)=="Z": minoffset = 0 elif m.group(8)[0:1] in "-+": minoffset = int(m.group(8)[0:3]) * 60 + int(m.group(8)[-2:]) z = s.find("Z") if z>=0: offset = 0 # Build the response if minoffset: return datetime.datetime(int(m.group(1)),int(m.group(2)),int(m.group(3)), int(m.group(4)),int(m.group(5)),int(m.group(6)), microseconds,GMTMIN(minoffset)) elif offset: return datetime.datetime(int(m.group(1)),int(m.group(2)),int(m.group(3)), int(m.group(4)),int(m.group(5)),int(m.group(6)), microseconds,GMTMIN(offset)) else: return datetime.datetime(int(m.group(1)),int(m.group(2)),int(m.group(3)), int(m.group(4)),int(m.group(5)),int(m.group(6)), microseconds) #This format is as specified in RFC 822, section 5.1, and matches the adjustments in RFC 1123, section 5.2.14. It appears in email and HTTP headers. rx_rfc822datetime = re.compile("(?P\d{1,2}) (?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (?P\d{4}) (?P\d\d):(?P\d\d):(?P\d\d) (?PZ|[-+]\d\d:?\d\d)") three_letter_month_dict = { "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6, "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12 } def rfc822Tdatetime(s): """ AJN's conversion of times occurring in RFC 822 data to datetime. Follows SLG's pattern. """ m = rx_rfc822datetime.search(s) if not m: raise ValueError("Cannot parse as an RFC 822 timestamp: %r." % s) mgd = m.groupdict() # Figure tz offset offset = None minoffset = None match_timezone = mgd.get("timezone") if match_timezone: if match_timezone == "Z": minoffset = 0 elif match_timezone[0] in "-+": minoffset = int(match_timezone[0:-2]) * 60 + int(match_timezone[-2:]) #TODO Find a reason to use the 'offset' variable? (Hour offset, vs. minute offset?) if minoffset: return datetime.datetime( int(mgd["year"]), three_letter_month_dict[mgd["month"]], int(mgd["day"]), int(mgd["hours"]), int(mgd["minutes"]), int(mgd["seconds"]), 0, GMTMIN(minoffset) ) else: return datetime.datetime( int(mgd["year"]), three_letter_month_dict[mgd["month"]], int(mgd["day"]), int(mgd["hours"]), int(mgd["minutes"]), int(mgd["seconds"]), 0 ) ################################################################ ### ### byte_run class ### class byte_run: """The internal representation for a byte run. byte_runs have the following attributes: .img_offset = offset of the byte run from the image start, in bytes .len = the length of the run, in bytes (prevoiusly called 'bytes') .sector_size = sector size of the underlying media Originally this was an array, which is faster than an attributed object. But this approach is more expandable, and it's only 70% the speed of an array under Python3.0. Note that Python 3 removed the __cmp__ class method: """ # declaring slots prevents other attributes from appearing, # but that prevents the code from working with new XML that has new fields. # __slots__ = ["file_offset","img_offset","len","fill","sector_size"] def __init__(self,img_offset=None,len=None,file_offset=None): self.img_offset = img_offset self.file_offset = file_offset self.len = len self.sector_size = 512 # default self.hashdigest = dict() # def __lt__(self,other): if self.img_offset is not None and other.img_offset is not None: return self.img_offset < other.img_offset elif self.file_offset is not None and other.file_offset is not None: return self.file_offset < other.file_offset else: raise ValueError("Byte run objects are incomparable") def __eq__(self,other): if self.img_offset is not None and other.img_offset is not None: return self.img_offset == other.img_offset elif self.file_offset is not None and other.file_offset is not None: return self.file_offset == other.file_offset else: raise ValueError("Byte run objects are incomparable") def __str__(self): try: return "byte_run[img_offset={0}; file_offset={1} len={2}] ".format( self.img_offset,self.file_offset,self.len) except (AttributeError, TypeError): #Catch attributes that are missing or mis-typed (e.g. NoneType) pass try: return "byte_run[file_offset={0}; fill={1}; len={2}]".format( self.file_offset,self.fill,self.len) except AttributeError: pass try: return "byte_run[file_offset={0}; uncompressed_len={1}]".format( self.file_offset,self.uncompressed_len) except AttributeError: return "byte_run"+str(dir(self)) def start_sector(self): return self.img_offset // self.sector_size def sector_count(self): return self.len // self.sector_size def has_sector(self,s): if self.sector_size==0: raise ValueError("%s: sector_size cannot be 0" % (self)) try: return self.img_offset <= s * self.sector_size < self.img_offset+self.len except AttributeError: # Doesn't have necessary attributes to answer true. # Usually this happens with runs of a constant value return False def extra_len(self): return self.len % self.sector_size def decode_xml_attributes(self,attr): for (key,value) in attr.items(): try: setattr(self,key,int(value)) except ValueError: setattr(self,key,value) def decode_sax_attributes(self,attr): for (key,value) in attr.items(): if key=='bytes': key=='len' # tag changed name; provide backwards compatiability try: setattr(self,key,int(value)) except ValueError: setattr(self,key,value) class ComparableMixin(object): """ Comparator "Abstract" class. Classes inheriting this must define a _cmpkey() method. Credit to Lennart Regebro for the total implementation of this class, found equivalently from: http://regebro.wordpress.com/2010/12/13/python-implementing-rich-comparison-the-correct-way/ http://stackoverflow.com/questions/6907323/comparable-classes-in-python-3/6913420#6913420 """ def _compare(self, other, method): try: return method(self._cmpkey(), other._cmpkey()) except (AttributeError, TypeError): # _cmpkey not implemented, or return different type, # so I can't compare with "other". return NotImplemented def __lt__(self, other): return self._compare(other, lambda s, o: s < o) def __le__(self, other): return self._compare(other, lambda s, o: s <= o) def __eq__(self, other): return self._compare(other, lambda s, o: s == o) def __ge__(self, other): return self._compare(other, lambda s, o: s >= o) def __gt__(self, other): return self._compare(other, lambda s, o: s > o) def __ne__(self, other): return self._compare(other, lambda s, o: s != o) class dftime(ComparableMixin): """Represents a DFXML time. Automatically converts between representations and caches the results as necessary..""" UTC = GMTMIN(0) def ts2datetime(self,ts): import datetime return datetime.datetime.utcfromtimestamp(ts).replace(tzinfo=dftime.UTC) def __init__(self,val): #'unicode' is not a type in Python 3; 'basestring' is not a type in Python 2. if sys.version_info >= (3,0): _basestring = str else: _basestring = basestring if isinstance(val, str) or isinstance(val,_basestring): # #Test for ISO 8601 format - "YYYY-MM-DD" should have hyphen at val[4] if len(val)>5 and val[4]=="-": self.iso8601_ = val elif len(val) > 15 and ":" in val[13:15]: #Maybe the data are instead the timestamp format found in email headers? #(The check for 13:15 gets the 14th and 15th characters, since the day can be single- or double-digit.) self.datetime_ = rfc822Tdatetime(val) else: #Maybe the data are a string-wrapped int? #If this fails, the string format is completely unexpected, so just raise an error. self.timestamp_ = int(val) elif type(val)==int or type(val)==float: self.timestamp_ = val elif isinstance(val, datetime.datetime): self.datetime_ = val #TODO Unit-test this with a timezone-less datetime elif val==None: self.timestamp_ = None self.iso8601_ = None elif isinstance(val, dftime): #If we instead use .timestamp_, we risk having a timezone conversion error self.iso8601_ = val.iso8601() else: raise ValueError("Unknown type '%s' for DFXML time value" % (str(type(val)))) def __str__(self): return self.iso8601() or "" def __repr__(self): return self.iso8601() or "None" def __le__(self,b): if b is None: return None return self.iso8601().__le__(b.iso8601()) def __gt__(self,b): if b is None: return None return self.iso8601().__gt__(b.iso8601()) def _cmpkey(self): """Provide a key to use for comparisons; for use with ComparableMixin parent class.""" return self.timestamp() def __eq__(self,b): if b == None: #This will always be False - if self were None, we wouldn't be in this __eq__ method. return False return self.timestamp()==b.timestamp() def iso8601(self): # Do we have a cached representation? import time try: return self.iso8601_ except AttributeError: pass # Do we have a datetime representation? try: self.iso8601_ = self.datetime_.isoformat() return self.iso8601_ except AttributeError: # We better have a Unix timestamp representation? self.iso8601_ = time.strftime("%Y-%m-%dT%H:%M:%SZ",time.gmtime(self.timestamp_)) return self.iso8601_ def timestamp(self): import time # Do we have a cached representation? try: return self.timestamp_ except AttributeError: pass # Do we have a datetime_ object? try: self.timestamp_ = time.mktime(self.datetime_.timetuple()) return self.timestamp_ except AttributeError: self.datetime_ = iso8601Tdatetime(self.iso8601_) self.timestamp_ = time.mktime(self.datetime_.timetuple()) return self.timestamp_ def datetime(self): import datetime # return the datetime from parsing either iso8601 or from parsing timestamp try: self.datetime_ = self.ts2datetime(self.timestamp_) # This needs to be in UTC offset. How annoying. return self.datetime_ except AttributeError: self.datetime_ = iso8601Tdatetime(self.iso8601_) return self.datetime_ class registry_object: def __init__(self): self.object_index = {} self._mtime = None """Keep handy a handle on the registry object""" self.registry_handle = self def mtime(self): return self._mtime class registry_cell_object: def __init__(self): self._byte_runs = [] """This is a pointer to a registry_key_object. The root node has no parent key.""" self.parent_key = None self._name = None self._full_path = None """Keys have two types: "root" (0x2c,0xac) and not-root. Values have several more types.""" self._type = None """Keep handy a handle on the registry object""" self.registry_handle = None """Name the cell type, for str() and repr().""" self._cell_type = "(undefined cell object type)" """Only applicable to values.""" self._sha1 = None def name(self): """This is the name of the present key or value.""" return self._name def full_path(self): """ This is the full path from the root of the hive, with keys acting like directories and the value name acting like the basename. Unlike DFXML, registry paths are delimited with a backslash due to the forward slash being a legal and commonly observed character in cell names. """ return self._full_path def type(self): """ This is the data type of the cell. Keys can be root or not-root; values have several types, like UTF-8, binary, etc. Presently, this exports as a string representation of the type, not the numeric type code. """ return self._type def _myname(self): """This function is called by repr and str, due to (vague memories of) the possibility of an infinite loop if __repr__ calls __self__.""" if len(self._byte_runs) > 0: addr = str(self._byte_runs[0].file_offset) else: addr = "(unknown)" return "".join(["<", self._cell_type, " for hive file offset ", addr, ">"]) def __repr__(self): return self._myname() def __str__(self): return self._myname() def mtime(self): raise NotImplementedError("registry_cell_object.mtime() not over-ridden!") def byte_runs(self): """Returns a sorted array of byte_run objects.""" #If this idiom is confusing, see: http://henry.precheur.org/python/copy_list ret = list(self._byte_runs) return ret def sha1(self): """ Return None. Meant to be overwritten. """ return None def md5(self): """ Return None. Meant to be overwritten. """ return None class registry_key_object(registry_cell_object): def __init__(self): registry_cell_object.__init__(self) self._mtime = None self.values = {} self.used = True #TODO Add toggling logic for when hivexml (eventually) processes recovered keys self._cell_type = "registry_key_object" def mtime(self): return self._mtime def root(self): if self.type() is None: return None return self.type() == "root" class registry_value_object(registry_cell_object): def __init__(self): registry_cell_object.__init__(self) self.value_data = None self._cell_type = "registry_value_object" #TODO Replace to be in line with fileobjects: fileobject.hashdigest is a dictionary self._hashcache = dict() """List for the string-list type of value.""" self.strings = None def mtime(self): """Return nothing. Alternatively, we might return mtime of parent key in the future.""" return None # if self.parent_key: # return self.parent_key.mtime() # else: # return None def _hash(self, hashfunc): """ Return cached hash, populating cache if necessary. hashfunc expected values: The functions hashlib.sha1, hashlib.md5. If self.value_data is None, or there are no strings in a "string-list" type, this should return None. Interpretation: Registry values of type "string-list" are hashed by feeding each element of the list into the hash .update() function. All other Registry values are fed in the same way, as a 1-element list. For example, a string type value cell with data "a" fed into this function returns md5("a") (if hashlib.md5 were requested). A string-list type value cell with data ["a","b"] returns md5("ab"). This is a simplification to deal with Registry string encodings, and may change in the future. """ if self._hashcache.get(repr(hashfunc)) is None: feed_list = [] if self.type() == "string-list": feed_list = self.strings elif not self.value_data is None: feed_list.append(self.value_data) #Normalize to hash .update() required type for (elemindex, elem) in enumerate(feed_list): if type(elem) == type(""): #String data take a little extra care: #"The bytes in your ... file are being automatically decoded to Unicode by Python 3 as you read from the file" #http://stackoverflow.com/a/7778340/1207160 feed_list[elemindex] = elem.encode("utf-8") #Hash if there's data to hash if len(feed_list) > 0: h = hashfunc() for elem in feed_list: h.update(elem) self._hashcache[repr(hashfunc)] = h.hexdigest() return self._hashcache.get(repr(hashfunc)) def sha1(self): return self._hash(hashlib.sha1) def md5(self): return self._hash(hashlib.md5) class fileobject: """The base class for file objects created either through XML DOM or EXPAT""" TIMETAGLIST=['atime','mtime','ctime','dtime','crtime'] def __init__(self,imagefile=None): self.imagefile = imagefile self.hashdigest = dict() def __str__(self): try: fn = self.filename() except KeyError: fn = "???" return "fileobject %s byte_runs: %s" % (fn, " ".join([str(x) for x in self.byte_runs()])) def partition(self): """Partion number of the file""" return self.tag("partition") def filename(self): """Complement name of the file (sometimes called pathname)""" return self.tag("filename") def ext(self): """Extension, as a lowercase string without the leading '.'""" import os, string (base,ext) = os.path.splitext(self.filename()) if ext == '': return None else: return ext[1:] def filesize(self): """Size of the file, in bytes""" return safeInt(self.tag("filesize")) def uid(self): """UID of the file""" return safeInt(self.tag("uid")) def gid(self): """GID of the file""" return safeInt(self.tag("gid")) def meta_type(self): """Meta-type of the file""" return safeInt(self.tag("meta_type")) def mode(self): """Mode of the file""" return safeInt(self.tag("mode")) def ctime(self): """Metadata Change Time (sometimes Creation Time), as number of seconds since January 1, 1970 (Unix time)""" t = self.tag("ctime") if t: return dftime(t) return None def atime(self): """Access time, as number of seconds since January 1, 1970 (Unix time)""" t = self.tag("atime") if t: return dftime(t) return None def crtime(self): """CR time, as number of seconds since January 1, 1970 (Unix time)""" t = self.tag("crtime") if t: return dftime(t) return None def mtime(self): """Modify time, as number of seconds since January 1, 1970 (Unix time)""" t = self.tag("mtime") if t: return dftime(t) return None def dtime(self): """ext2 dtime""" t = self.tag("dtime") if t: return dftime(t) return None def times(self): """Return a dictionary of all times that the system has""" ret = {} for tag in self.TIMETAGLIST: if self.has_tag(tag): try: ret[tag] = dftime(self.tag(tag)) except TypeError: pass return ret def sha1(self): """Returns the SHA1 in hex""" return self.tag("sha1") def md5(self): """Returns the MD5 in hex""" return self.tag("md5") def fragments(self): """Returns number of file fragments""" return len(self.byte_runs()) def name_type(self): """Return the contents of the name_type tag""" return self.tag("name_type") def is_virtual(self): """Returns true if the fi entry is a TSK virtual entry""" return self.filename() in tsk_virtual_filenames def is_dir(self): """Returns true if file is a directory""" return self.name_type()=='d' def is_file(self): """Returns true if file is a file""" return self.name_type()=='r' or self.name_type()==None def inode(self): """Inode; may be a number or SleuthKit x-y-z formatr""" return self.tag("inode") def allocated(self): """Returns True if the file is allocated, False if it was not (that is, if it was deleted or is an orphan). Note that we need to be tolerant of mixed case, as it was changed. We also need to tolerate the case of the unalloc tag being used. """ if self.filename()=="$OrphanFiles": return False return isone(self.tag("alloc")) or isone(self.tag("ALLOC")) or not isone(self.tag("unalloc")) def compressed(self): if not self.has_tag("compressed") and not self.has_tag("compressed") : return False return isone(self.tag("compressed")) or isone(self.tag("COMPRESSED")) def encrypted(self): if not self.has_tag("encrypted") and not self.has_tag("encrypted") : return False return isone(self.tag("encrypted")) or isone(self.tag("ENCRYPTED")) def file_present(self,imagefile=None): """Returns true if the file is present in the disk image""" if self.filesize()==0: return False # empty files are never present if imagefile==None: imagefile=self.imagefile # use this one for hashname in ['md5','sha1']: oldhash = self.tag(hashname) if oldhash: newhash = hashlib.new(hashname,self.contents(imagefile=imagefile)).hexdigest() return oldhash==newhash raise ValueError("Cannot process file "+self.filename()+": no hash in "+str(self)) def has_contents(self): """True if the file has one or more bytes""" return len(self.byte_runs())>0 def has_sector(self,s): """True if sector s is contained in one of the byte_runs.""" for run in self.byte_runs(): if run.has_sector(s): return True return False def libmagic(self): """Returns libmagic string if the string is specified in the xml, or None otherwise""" return self.tag("libmagic") def content_for_run(self,run=None,imagefile=None): """ Returns the content for a specific run. This is a convenience feature which does not touch the file object if an imagefile is provided.""" if imagefile is None: imagefile=self.imagefile if run is None: raise ValueError("content_for_run called without a 'run' argument.") if run.len == -1: return chr(0) * run.len elif hasattr(run,'fill'): return chr(run.fill) * run.len else: imagefile.seek(run.img_offset) return imagefile.read(run.len) def contents(self,imagefile=None,icat_fallback=True): """ Returns the contents of all the runs concatenated together. For allocated files this should be the original file contents. """ if imagefile is None : imagefile=self.imagefile if imagefile is None : raise ValueError("imagefile is unknown") if self.encrypted() : raise ValueError("Cannot generate content for encrypted files") if self.compressed() or imagefile.name.endswith(".aff") or imagefile.name.endswith(".E01"): if icat_fallback: # # For now, compressed files rely on icat rather than python interface # offset = safeInt(self.volume.offset) block_size = safeInt(self.volume.block_size) if block_size==0: block_size = 512 inode = self.inode() if inode : block_size = 512 fstype_flag = "" fstype = self.volume.ftype_str() if fstype != None: fstype_flag = '-f' + fstype cmd = ['icat',fstype_flag,'-b',str(block_size),'-o',str(offset//block_size),imagefile.name,str(inode)] else: cmd = ['icat','-b',str(block_size),'-o',str(offset//block_size),imagefile.name,str(inode)] (data,err) = Popen(cmd, stdout=PIPE,stderr=PIPE).communicate() # Check for an error if len(err) > 0 : #sys.stderr.write("Debug: type(err) = %r.\n" % type(err)) raise ValueError("icat error (" + str(err).strip() + "): "+" ".join(cmd)) return data else : raise ValueError("Inode missing from file in compressed format.") raise ValueError("Cannot read raw bytes in compressed disk image") res = [] for run in self.byte_runs(): res.append(self.content_for_run(run=run,imagefile=imagefile)) return "".join(res) def tempfile(self,calcMD5=False,calcSHA1=False): """Return the contents of imagefile in a named temporary file. If calcMD5 or calcSHA1 are set TRUE, then the object returned has a haslib object as self.md5 or self.sha1 with the requested hash.""" import tempfile tf = tempfile.NamedTemporaryFile() if calcMD5: tf.md5 = hashlib.md5() if calcSHA1: tf.sha1 = hashlib.sha1() for run in self.byte_runs(): self.imagefile.seek(run.img_offset) count = run.len while count>0: xfer_len = min(count,1024*1024) # transfer up to a megabyte at a time buf = self.imagefile.read(xfer_len) if len(buf)==0: break tf.write(buf) if calcMD5: tf.md5.update(buf) if calcSHA1: tf.sha1.update(buf) count -= xfer_len tf.flush() return tf def savefile(self,filename=None): """Saves the file.""" with open(filename,"wb") as f: for run in self.byte_runs(): self.imagefile.seek(run.img_offset) count = run.len while count>0: xfer_len = min(count,1024*1024) # transfer up to a megabyte at a time buf = self.imagefile.read(xfer_len) if len(buf)==0: break f.write(buf) count -= xfer_len def frag_start_sector(self,fragment): return self.byte_runs()[fragment].img_offset / 512 def name_type(self): return self.tag("name_type") class fileobject_dom(fileobject): """file objects created through the DOM. Each object has the XML document stored in the .doc attribute.""" def __init__(self,xmldoc,imagefile=None): fileobject.__init__(self,imagefile=imagefile) self.doc = xmldoc def tag(self,name): """Returns the wholeText for any given NAME. Raises KeyError if the NAME does not exist.""" try: return self.doc.getElementsByTagName(name)[0].firstChild.wholeText except IndexError: # Check for a hash tag with legacy API if name in ['md5','sha1','sha256']: for e in self.doc.getElementsByTagName('hashdigest'): if e.getAttribute('type').lower()==name: return e.firstChild.wholeText raise KeyError(name+" not in XML") def has_tag(self,name) : try: temp=self.doc.getElementsByTagName(name)[0].firstChild.wholeText return True except IndexError: # Check for a hash tag with legacy API if name in ['md5','sha1','sha256']: for e in self.doc.getElementsByTagName('hashdigest'): if e.getAttribute('type').lower()==name: return True return False def byte_runs(self): """Returns a sorted array of byte_run objects. """ ret = [] try: for run in self.doc.getElementsByTagName("byte_runs")[0].childNodes: b = byte_run() if run.nodeType==run.ELEMENT_NODE: b.decode_xml_attributes(run.attributes) ret.append(b) except IndexError: pass ret.sort(key=lambda r:r.file_offset) return ret class saxobject: # saxobject is a mix-in that makes it easy to turn XML tags into functions. # If the sax tag is registered, then a function with the tag's name is created. # Calling the function returns the value for the tag that is stored in the _tags{} # dictionary. The _tags{} dictionary is filled by the _end_element() method that is defined. # For fileobjects all tags are remembered. def __init__(self): self._tags = {} def tag(self,name): """Returns the XML text for a given NAME.""" return self._tags.get(name,None) def has_tag(self,name) : return name in self._tags def register_sax_tag(tagclass,name): setattr(tagclass,name,lambda self:self.tag(name)) class fileobject_sax(fileobject,saxobject): """file objects created through expat. This class is created with a tags array and a set of byte runs.""" def __init__(self,imagefile=None,xml=None): fileobject.__init__(self,imagefile=imagefile) saxobject.__init__(self) self._byte_runs = [] def byte_runs(self): """Returns an array of byte_run objects.""" return self._byte_runs class volumeobject_sax(saxobject): """A class that represents the volume.""" def __init__(self): if hasattr(saxobject, "__init__"): saxobject.__init__(self) self.offset = 0 self.block_size = 0 def __str__(self): return "volume "+(str(self._tags)) def partition_offset(self): try: return self.tag('partition_offset') except KeyError: return self.tag('Partition_Offset') register_sax_tag(volumeobject_sax,'ftype') register_sax_tag(volumeobject_sax,'ftype_str') register_sax_tag(volumeobject_sax,'block_count') register_sax_tag(volumeobject_sax,'first_block') register_sax_tag(volumeobject_sax,'last_block') class imageobject_sax(saxobject): """A class that represents the disk image""" register_sax_tag(imageobject_sax,'imagesize') register_sax_tag(imageobject_sax,'image_filename') class creatorobject_sax(saxobject): """A class that represents the section of a DFXML file""" for tag in ['creator','program','version']: register_sax_tag(creatorobject_sax,tag) ################################################################ ################################################################ def safe_b64decode(b64data): """ This function takes care of the logistics of base64 decoding XML data in Python 2 and 3. Recall that Python3 requires b64decode operate on bytes, not a string. Ref: A forum post that noted several encoding differences between Python 2 and 3: """ if sys.version_info.major == 2: return base64.b64decode(b64data).decode("unicode_escape") elif sys.version_info.major == 3: dtype = str(type(b64data)) to_decode = None if dtype == "": to_decode = b64data.encode("ascii") elif dtype == "": to_decode = b64data return base64.b64decode(to_decode).decode("unicode_escape") else: raise Exception("Not sure how to parse base64 data outside Python versions 2 or 3.") class xml_reader: def __init__(self): self.cdata = None self.tagstack = ['xml'] def _char_data(self, data): """Handles XML data""" if self.cdata != None: self.cdata += data def process_xml_stream(self,xml_stream,callback,preserve_fis=False): "Run the reader on a given XML input stream" self.callback = callback self.preserve_fis = preserve_fis self.fi_history = [] import xml.parsers.expat p = xml.parsers.expat.ParserCreate() p.StartElementHandler = self._start_element p.EndElementHandler = self._end_element p.CharacterDataHandler = self._char_data p.ParseFile(xml_stream) class regxml_reader(xml_reader): def __init__(self,flags=None): self.flags = flags xml_reader.__init__(self) #TODO wait, shouldn't flags go in here? self.objectstack = [] self.registry_object = None self.nonce = 0 def _start_element(self, name, attrs): """ The objectstack conditionally grows, depending on type of element processed * msregistry (hive): Create a new msregistry object, append to objectstack * key (node): Create a new key object, append to objectstack * mtime: The text is going to become a property of the parent element; do not append to objectstack. * value: Create a new value object, append to objectstack. """ new_object = None if name in ["msregistry","hive"]: new_object = registry_object() self.objectstack.append(new_object) self.registry_object = new_object elif name in ["key","node"]: new_object = registry_key_object() #Note these two tests for root and parent _are_ supposed to be independent tests. if attrs.get("root",None) == "1": new_object._type = "root" else: new_object._type = "" if len(self.objectstack) > 1: new_object.parent_key = self.objectstack[-1] #Sanity check: root key implies no parent if new_object.type() == "root": assert new_object.parent_key == None #Sanity check: no parent implies root key --OR-- recovered key if new_object.parent_key == None: assert new_object.used == False or new_object.type() == "root" #Define new_object.name #Force a name for keys. If the key has no recorded name, apply artificial name prefix to nonce. name_data = attrs.get("name") if name_data == None: new_object._name = "__DFXML_NONCE_" + str(self.nonce) self.nonce += 1 else: enc = attrs.get("name_encoding") if enc == "base64": new_object._name = safe_b64decode(name_data) else: new_object._name = name_data if new_object.parent_key == None: new_object._full_path = "\\" + new_object.name() # TODO need a name scheme for orphan references, when we start processing orphans else: new_object._full_path = new_object.parent_key.full_path() + "\\" + new_object.name() self.objectstack.append(new_object) elif name in ["value"]: new_object = registry_value_object() new_object.parent_key = self.objectstack[-1] new_object._type = attrs.get("type",None) if new_object.type() == "string-list": new_object.strings = [] #Store decoded name if attrs.get("default",None) == "1": new_object._name = "Default" if attrs.get("name",attrs.get("key",None)) is not None: #TODO Notify: concurrently set name attribute and default-name flag pass else: enc = attrs.get("name_encoding",attrs.get("key_encoding")) name_data = attrs.get("name",attrs.get("key",None)) if enc == "base64": try: new_object._name = base64.b64decode(name_data.encode("ascii")).decode("unicode_escape") except: sys.stderr.write("name_data={} type={}\n".format(name_data,type(name_data))) raise else: new_object._name = name_data new_object._full_path = new_object.parent_key.full_path() + "\\" + new_object.name() #Store decoded value new_object.value_data = self.decoded_value(attrs) self.objectstack.append(new_object) elif name in ["mtime"]: self.cdata = "" elif name in ["string"]: self.cdata = "" elif name in ["byte_runs"]: pass elif name in ["byte_run"]: parent = self.objectstack[-1] parent._byte_runs.append(byte_run(file_offset=attrs.get("file_offset"), len=attrs.get("len"))) else: raise ValueError("regxml_reader._start_element: Don't know how to start element %s.\n" % name) #Give all cell objects a handle on the registry if new_object != None: new_object.registry_handle = self.registry_object def decoded_value(self, attrs): value_data = attrs.get("value",None) if value_data: # TODO adjust hivexml to not use a plain "encoding" attribute value_encoding = attrs.get("encoding", attrs.get("value_encoding")) if value_encoding == "base64": if sys.version_info.major>2: value_data = bytes(value_data,encoding='ascii') return base64.b64decode(value_data) else: return value_data else: return None def _end_element(self, name): """ The callback is invoked for each stack-popping operation, except the root. """ # TODO sanity-check the objectstack if name in ["msregistry","hive"]: pass elif name in ["key","node"]: finished_object = self.objectstack.pop() #Add finished object to object index if finished_object.full_path() in self.registry_object.object_index: raise ValueError("regxml_reader._end_element: Same key path found more than once: " + finished_object.full_path()) self.registry_object.object_index[finished_object.full_path()] = finished_object self.callback(finished_object) elif name in ["mtime"]: self.objectstack[-1]._mtime = dftime(self.cdata) self.cdata = None elif name in ["value"]: finished_object = self.objectstack.pop() #TODO Simplify once hivexml is patched to have value/@value instead of value/[cdata] if finished_object.value_data == None: finished_object.value_data = self.cdata self.callback(finished_object) elif name in ["string"]: value_object = self.objectstack[-1] if value_object.strings == None: raise ValueError("regxml_reader._end_element: parsing error, string element found, but parent's type can't support a string list.") value_object.strings.append(self.cdata) self.cdata = None elif name in ["byte_runs","byte_run"]: pass else: raise ValueError("regxml_reader._end_element: Don't know how to end element %s.\n" % name) class fileobject_reader(xml_reader): """Class which uses the SAX expat-based XML reader. Reads an FIWALK XML input file and automatically creates volumeobject_sax and fileobject_sax objects, but just returns the filoeobject objects..""" def __init__(self,imagefile=None,flags=None): self.creator = None self.volumeobject = None self.fileobject = None self.imageobject = imageobject_sax() self.imagefile = imagefile self.flags = flags xml_reader.__init__(self) def _start_element(self, name, attrs): """ Handles the start of an element for the XPAT scanner""" self.tagstack.append(name) self.cdata = "" # new element, so reset the data if name=="volume": self.volumeobject = volumeobject_sax() self.volumeobject.block_size = 512 # reasonable default self.volumeobject.image = self.imageobject if "offset" in attrs: self.volumeobject.offset = int(attrs["offset"]) return if name=="block_size": pass if name=="fileobject": self.fileobject = fileobject_sax(imagefile=self.imagefile) self.fileobject.volume = self.volumeobject return if name=='hashdigest': self.hashdigest_type = attrs['type'] if self.fileobject and (name=="run" or name=="byte_run"): b = byte_run() b.decode_sax_attributes(attrs) self.fileobject._byte_runs.append(b) return def _end_element(self, name): """Handles the end of an eleement for the XPAT scanner""" assert(self.tagstack.pop()==name) # make sure that the stack matches if name=="volume": self.volumeobject = None return if name=="block_size" and len(self.tagstack) > 1 : if self.tagstack[-1] == "volume" : self.volumeobject.block_size = int(self.cdata) self.cdata=None return if name=="fileobject": self.callback(self.fileobject) if self.preserve_fis: self.fi_history.append(self.fileobject) self.fileobject = None return if name=='hashdigest' and len(self.tagstack)>0: top = self.tagstack[-1] # what the hash was for alg = self.hashdigest_type.lower() # name of the hash algorithm used if top=='byte_run': self.fileobject._byte_runs[-1].hashdigest[alg] = self.cdata if top=="fileobject": self.fileobject._tags[alg] = self.cdata # legacy self.fileobject.hashdigest[alg] = self.cdata self.cdata = None return if self.fileobject: # in a file object, all tags are remembered self.fileobject._tags[name] = self.cdata self.cdata = None return # Special case: fn # gets put in fn if name in ['image_filename','imagefile'] and self.tagstack[-1]=='source': self.imageobject._tags['image_filename'] = self.cdata class volumeobject_reader(xml_reader): """Reads just the section of a DFXML file""" def __init__(self): self.volumeobject = False xml_reader.__init__(self) self.imageobject = imageobject_sax() def _start_element(self, name, attrs): """ Handles the start of an element for the XPAT scanner""" self.tagstack.append(name) if name=="volume": self.volumeobject = volumeobject_sax() self.volumeobject.image = self.imageobject return if name=="fileobject": self.cdata = None # don't record this return self.cdata = "" # new element; otherwise data is ignored def _end_element(self, name): """Handles the end of an eleement for the XPAT scanner""" assert(self.tagstack.pop()==name) if name=="volume": self.callback(self.volumeobject) self.volumeobject = None return if self.tagstack[-1]=='volume' and self.volumeobject: # in the volume self.volumeobject._tags[name] = self.cdata self.cdata = None return if self.tagstack[-1] in ['fiwalk','dfxml']: self.imageobject._tags[name] = self.cdata return # Special case: fn gets put in fn if name in ['image_filename','imagefile'] and self.tagstack[-1]=='source': self.imageobject._tags['image_filename'] = self.cdata return class FinishedReadingCreator(Exception): """Class to indicate that creator object has been read""" class creatorobject_reader(xml_reader): """Reads the section of a DFXML file""" def __init__(self): self.creatorobject = False xml_reader.__init__(self) def _start_element(self, name, attrs): """ Handles the start of an element for the XPAT scanner""" self.tagstack.append(name) if name=="creator": self.creatorobject = creatorobject_sax() return if self.creatorobject: self.cdata = "" # capture cdata for creatorobject def _end_element(self, name): """Handles the end of an eleement for the XPAT scanner""" assert(self.tagstack.pop()==name) if name=="creator": self.callback(self.creatorobject) self.creatorobject = None raise FinishedReadingCreator("Done") if self.tagstack[-1]=='creator' and self.creatorobject: # in the creator self.creatorobject._tags[name] = self.cdata self.cdata = None return def combine_runs(runs): """Given an array of bytrun elements, combine the runs and return a new array.""" if runs==[]: return [] ret = [runs[0]] for run in runs[1:]: # if the last one ends where this run begins, just extend # otherwise append last = ret[-1] if last.img_offset+last.len == run.img_offset: ret[-1] = byte_run(img_offset = last.img_offset, len = last.len + run.len) continue else: ret.append(run) return ret class extentdb: """A class to a database of extents and report if they collide. Currently this is not an efficient implementation, but it could become more efficient in the future. When it does, every program that uses this implementation will get faster too! Each extent is represented as a byte_run object""" def __init__(self,sectorsize=512): self.db = [] # the database of runs self.sectorsize = 512 pass def report(self,f): """Print information about the database""" f.write("sectorsize: %d\n" % self.sectorsize) for run in sorted(self.db): f.write(" [@%8d ; %8d]\n" % (run.img_offset,run.len)) f.write("total entries in database: %d\n\n" % len(r)) def sectors_for_bytes(self,count): """Returns the number of sectors necessary to hold COUNT bytes""" return (count+self.sectorsize-1)//self.sectorsize def sectors_for_run(self,run): """Returns an array of the sectors for a given run""" start_sector = run.img_offset/self.sectorsize sector_count = self.sectors_for_bytes(run.len) return range(start_sector,start_sector+sector_count) def run_for_sector(self,sector_number,count=1): """Returns the run for a specified sector, and optionally a count of sectors""" return byte_run(len=count*self.sectorsize,img_offset=sector_number * self.sectorsize) def intersects(self,extent): """Returns the intersecting extent, or None if there is none""" if extent.len==0: return True # 0 length intersects with everything if extent.len<0: raise ValueError("Length cannot be negative:"+str(extent)) start = extent.img_offset stop = extent.img_offset+extent.len for d in self.db: if d.img_offset <= start < d.img_offset+d.len: return d if d.img_offset < stop < d.img_offset+d.len: return d if startdb warn = "" if result != want: warn = " (!)" print("a=%s b=%s want=%s greater=%s%s" % (da,db,want,result,warn)) if options.regress: print("Testing unicode value parsing.") #Test base64 encoding of the "Registered" symbol, encountered in a key name in the M57-Patents corpus. test_unicode_string = "\xae" if sys.version_info.major == 2: #The test string doesn't quite get defined right that way in Python 2 test_unicode_string = unicode(test_unicode_string, encoding="latin-1") test_unicode_string_escaped = test_unicode_string.encode("unicode_escape") test_base64_bytes = base64.b64encode(test_unicode_string_escaped) elif sys.version_info.major == 3: test_unicode_string_escaped = test_unicode_string.encode("unicode_escape") test_base64_bytes = base64.b64encode(test_unicode_string_escaped) else: #Just hard-code value, no examples yet for this language version. test_base64_bytes = b'XHhhZQ==' test_base64_string = test_base64_bytes.decode("ascii") #test_base64_string is the kind of string data you'd expect to encounter in base64-encoded values processing RegXML. assert test_unicode_string == safe_b64decode(test_base64_bytes) assert test_unicode_string == safe_b64decode(test_base64_string) print("Unicode value parsing good!") print("Testing time string parsing") test_rfc822tdatetime = rfc822Tdatetime("26 Jun 2012 22:34:58 -0700") assert test_rfc822tdatetime.tzinfo is not None print("Time string parsing good!") print("Testing dftime values") #check_equal("1900-01-02T02:03:04Z",-2208895016,True) #AJN time.mktime doesn't seem to support old times any more a_pacific_dftime = dftime("26 Jun 2012 22:34:58 -0700") assert 0.0 == dftime(a_pacific_dftime.iso8601()).timestamp() - a_pacific_dftime.timestamp() check_equal("2000-01-02T02:03:04Z","2000-01-02T03:03:04-0100",False) check_equal("2000-01-02T02:03:04-0100","2000-01-02T02:03:04-0100",True) check_equal("2000-01-02T02:03:04-0100","2000-01-02T02:03:04-0200",False) check_equal("2000-01-02T02:03:04-0100","2000-01-02T01:03:04-0200",True) check_greater("2000-01-02T04:04:05-0100","2000-01-02T03:04:05-0100",True) check_greater("2000-01-02T03:04:05-0200","2000-01-02T03:04:05-0100",True) check_greater("2009-11-17T00:33:30.9375Z","2009-11-17T00:33:30Z",True) check_equal("2009-11-17T00:33:30.9375Z","2009-11-17T00:33:30Z",False) check_equal("2009-11-17T00:33:30.0000Z","2009-11-17T00:33:30Z",True) check_equal("27 Jun 2012 06:02:00 -0000","27 Jun 2012 05:02:00 -0100",True) check_equal("27 Jun 2012 06:02:00 -0000","2012-06-27T06:02:00Z",True) check_equal("26 Jun 2012 22:34:58 -0700","2012-06-27T05:34:58Z", True) print("dftime values passed.") print("Testing byte_run overlap engine:") db = extentdb() a = byte_run(img_offset=0,len=5) db.add(a) b = byte_run(5,5) db.add(b) try: assert db.intersects(byte_run(0,5))==byte_run(0,5) except: print(type(cmp)) print(db.intersects(byte_run(0,5))) print(byte_run(0,5)) raise assert db.intersects(byte_run(0,1)) assert db.intersects(byte_run(2,3)) assert db.intersects(byte_run(4,1)) assert db.intersects(byte_run(5,1)) assert db.intersects(byte_run(6,1)) assert db.intersects(byte_run(9,1)) assert db.intersects(byte_run(-1,5)) assert db.intersects(byte_run(-1,10)) assert db.intersects(byte_run(-1,11)) assert db.intersects(byte_run(-1,1))==None assert db.intersects(byte_run(10,1))==None print("Overlap engine good!") assert re.sub(rx_xmlns, "", """""") == "" assert re.sub(rx_xmlns, "", """""") == "" assert re.sub(rx_xmlns, "", """""") == """""" assert re.sub(rx_xmlns, "", """""") == """""" print("XML namespace regex good!") tcpflow/src/dfxml/python/iredact.py0000644000175000017500000003305012263701334016353 0ustar dimadima#!/usr/bin/python """Redact an image file using a ruleset... Image Redaction Project. This program redacts disk image files. inputs: * The disk image file * A set of rules that describe what to redact, and how to redact it. Rule File format: The readaction command file consists of commands. Each command has an "condition" and an "action" [condition] [action] Conditions: FILENAME - a file with the given name FILEPAT - any file with a given pattern DIRNAME - any file in the directory MD5 - any file with the given md5 SHA1 - any file with the given sha1 CONTAINS - any file that contains Actions: SCRUB MATCH - Scrubs the pattern where it occures SCRUB SECTOR - Scrubs the block where the patern occures SCRUB FILE - Scrubs the file in which the pattern occures Actions: FILL 0x44 - overwrite by filling with character 0x44 ('D') ENCRYPT - encrypts the data FUZZ - fuz the binary, but not the strings Examples: Example file: =============== MD5 3482347345345 SCRUB FILE MATCH simsong@acm.org SCRUB FILE MATCH foobar SCRUB BLOCK ================================================================ Other actions in file: KEY 12342343 (an encryption key) """ import xml.parsers.expat import hashlib import os.path import fiwalk import re ################################################################ def convert_fileglob_to_re(fileglob): regex = fileglob.replace(".","[.]").replace("*",".*").replace("?",".?") return re.compile(regex) class redact_rule: """ Instances of this class are objects that can decide whether or not to redact.""" def __init__(self,line): self.line = line self.complete = True # by default, redacts everything def should_redact(self,fileobject): """Returns True if this fileobject should be redacted""" raise ValueError("redact method of redact_rule super class should not be called") def __str__(self): return "action<"+self.line+">" def runs_to_redact(self,fi): """Returns the byte_runs of the source which match the rule. By default this is the entire object.""" return fi.byte_runs() class redact_rule_md5(redact_rule): """ redact if the MD5 matches""" def __init__(self,line,val): redact_rule.__init__(self,line) self.md5val = val.lower() def should_redact(self,fi): return self.md5val == fi.tag('md5') class redact_rule_sha1(redact_rule): """ redact if the SHA1 matches""" def __init__(self,line,val): redact_rule.__init__(self,line) self.sha1val = val.lower() def should_redact(self,fi): return self.sha1val == fi.tag('sha1') class redact_rule_filepat(redact_rule): def __init__(self,line,filepat): import re redact_rule.__init__(self,line) # convert fileglobbing to regular expression self.filepat_re = convert_fileglob_to_re(filepat) print("adding rule to redact path "+self.filepat_re.pattern) def should_redact(self,fileobject): return self.filepat_re.search(fileobject.filename()) class redact_rule_filename(redact_rule): def __init__(self,line,filename): redact_rule.__init__(self,line) self.filename = filename print("adding rule to redact filename "+self.filename) def should_redact(self,fileobject): was = os.path.sep os.path.sep = '/' # Force Unix filename conventions ret = self.filename == os.path.basename(fileobject.filename()) os.path.sep = was return ret class redact_rule_dirname(redact_rule): def __init__(self,line,dirname): redact_rule.__init__(self,line) self.dirname = dirname def should_redact(self,fileobject): was = os.path.sep os.path.sep = '/' # Force Unix filename conventions ret = self.dirname == os.path.dirname(fileobject.filename()) os.path.sep = was return ret class redact_rule_contains(redact_rule): def __init__(self,line,text): redact_rule.__init__(self,line) self.text = text def should_redact(self,fileobject): return self.text in fileobject.contents() class redact_rule_string(redact_rule): def __init__(self,line,text): redact_rule.__init__(self,line) self.text = text self.complete = False # doesn't redact the entire file def should_redact(self,fileobject): return self.text in fileobject.contents() def runs_to_redact(self,fi): """Overridden to return the byte runs of just the given text""" ret = [] tlen = len(self.text) for run in fi.byte_runs(): (file_offset,run_len,img_offset) = run run_content = fi.content_for_run(run) offset = 0 # Now find all the places inside "run" # where the text "self.text" appears print("looking for '{}' in '{}'".format(self.text,run)) while offset>=0: offset = run.find(self.text,offset) if offset>=0: ret.append((file_offset+offset,tlen,img_offset+offset)) offset += 1 # return ret """Not actually a redact rule, but rather a rule for global ignores""" class ignore_rule(): def __init__(self): self.ignore_patterns = [] def ignore(self,ignore): """Ignores specified files based on a regex""" self.ignore_patterns.append(re.compile(convert_fileglob_to_re(ignore))) return self def should_ignore(self, fi): for ig in self.ignore_patterns: if ig.search(fi.filename()): return True return False ################################################################ class redact_action(): """Instances of this class are objects that specify how a redaction shoudl be done.""" def redact(self,rule,fileobject,rc): """Performs the redaction""" raise ValueError,"redact method of redact_action super class should not be called" class redact_action_fill(redact_action): """ Perform redaction by filling""" def __init__(self,val): self.fillvalue = val def redact(self,rule,fi,rc): for run in rule.runs_to_redact(fi): print(" Current run %s " % run) rc.imagefile.seek(run.img_offset) runlen = run.len print "\tFile info - \n\t\tname: %s \n\t\tclosed: %s \n\t\tposition: %d \n\t\tmode: %s" % \ (rc.imagefile.name, rc.imagefile.closed, rc.imagefile.tell(), rc.imagefile.mode) print(" Filling at offset {}, {} bytes with pattern {}".format(run.img_offset,runlen,hex(self.fillvalue))) if rc.commit: rc.imagefile.seek(run.img_offset) rc.imagefile.write(chr(self.fillvalue) * run.len) print(" >>COMMIT\n") class redact_action_encrypt(redact_action): """ Perform redaction by encrypting""" def redact(self,rule,fileobject,rc): for run in rule.runs_to_redact(fileobject): print(" encrypting at offset {}, {} bytes with cipher".format(run.img_offset,run.bytes)) raise ValueError,"Whoops; Didn't write this yet" class redact_action_fuzz(redact_action): """ Perform redaction by fuzzing x86 instructions """ def redact(self,rule,fileobject,rc): ''' The net effect of this function is that bytes 127-255 are "fuzzed" over the range of 159-191, with each series of four bytes (e.g. 128-131) to one byte value (e.g. 160). ''' def fuzz(ch): o = ord(ch) if(o<127): r = ch else: r = chr(((o>>2)+128)%256) return r print "Redacting with FUZZ: ",fileobject for run in rule.runs_to_redact(fileobject): try: print " Fuzzing at offset: %d, can fuzz up to %d bytes " % (run.img_offset,run.len) rc.imagefile.seek(run.img_offset) # Previously redacted only first 10 bytes, now redacts entire sequence #first_ten_bytes = rc.imagefile.read(10) run_bytes = rc.imagefile.read(run.len) print "\tFile info - \n\t\tname: %s \n\t\tclosed: %s \n\t\tposition: %d \n\t\tmode: %s" % \ (rc.imagefile.name, rc.imagefile.closed, rc.imagefile.tell(), rc.imagefile.mode) print " Fuzzing %d bytes - should be %d" % (len(run_bytes), run.len) newbytes = "".join([fuzz(x) for x in run_bytes]) #debug print "new: %i old: %i" % (len(newbytes), run.len) assert(len(newbytes)==run.len) if rc.commit: rc.imagefile.seek(run.img_offset) rc.imagefile.write(newbytes) print "\n >>COMMIT" except AttributeError: print "!AttributeError: no byte run?" ################################################################ class RedactConfig: """Class to read and parse a redaction config file""" def __init__(self,fn): self.cmds = [] self.commit = False self.filename = None self.xmlfile = None self.ignore_rule = ignore_rule() for line in open(fn,"r"): if line[0] in '#;': continue # comment line line = line.strip() if line=="": continue atoms = line.split(" ") while "" in atoms: atoms.remove("") # take care of extra spaces cmd = atoms[0].lower() rule = None action = None # First look for simple commands if cmd=='key': self.key = atoms[1] continue if cmd=="commit": self.commit = True continue if cmd=="imagefile": self.imagefile = open(atoms[1],"r+b") continue if cmd=="xmlfile": self.xmlfile = open(atoms[1],"r") continue if cmd=='ignore': self.ignore_rule.ignore(atoms[1]) continue # Now look for commands that are rules if cmd=='md5': rule = redact_rule_md5(line,atoms[1]) if cmd=='sha1': rule = redact_rule_sha1(line,atoms[1]) if cmd=='filename': rule = redact_rule_filename(line,atoms[1]) if cmd=='filepat': rule = redact_rule_filepat(line,atoms[1]) if cmd=='contains': rule = redact_rule_contains(line,atoms[1]) if cmd=='string': rule = redact_rule_string(line,atoms[1]) if rule: if atoms[2].lower()=='fill': action = redact_action_fill(eval(atoms[3])) if atoms[2].lower()=='encrypt': action = redact_action_encrypt() if atoms[2].lower()=='fuzz': action = redact_action_fuzz() if not rule or not action: print "atoms:",atoms print "rule:",rule print "action:",action raise ValueError,"Cannot parse: '%s'" % line self.cmds.append((rule,action)) def need_md5(self): for (rule,action) in self.cmds: if rule.__class__==redact_rule_md5: return True return False def need_sha1(self): for (rule,action) in self.cmds: if rule.__class__==redact_rule_sha1: return True return False def fiwalk_opts(self): "Returns the options that fiwalk needs given the redaction requested." opts = "-x" if self.need_sha1(): opts = opts+"1" if self.need_md5(): opts = opts+"m" return opts def process_file(self,fileinfo): for (rule,action) in self.cmds: if rule.should_redact(fileinfo): print "Processing file: %s" % fileinfo.filename() if self.ignore_rule.should_ignore(fileinfo): print "(Ignoring %s)" % fileinfo.filename() return print "" print "Redacting ",fileinfo.filename() print "Reason:",str(rule) print "Action:",action action.redact(rule,fileinfo,self) if rule.complete: return # only need to redact once! def close_files(self): if self.imagefile and self.imagefile.closed == False: print "Closing file: %s" % self.imagefile.name self.imagefile.close() if self.xmlfile and self.xmlfile.closed == False: print "Closing file: %s" % self.xmlfile.name self.xmlfile.close() if __name__=="__main__": import sys,time from optparse import OptionParser from subprocess import Popen,PIPE global options parser = OptionParser() parser.usage = "%prog [options] config-file" parser.add_option("-d","--debug",help="prints debugging info",dest="debug") (options,args) = parser.parse_args() t0 = time.time() # Read the redaction configuration file rc = RedactConfig(args[0]) if not rc.imagefile: print "Error: a filename must be specified in the redaction config file" sys.exit(1) fiwalk.fiwalk_using_sax(imagefile=rc.imagefile,xmlfile=rc.xmlfile,callback=rc.process_file) t1 = time.time() rc.close_files() print "Time to run: %d seconds" % (t1-t0) tcpflow/src/dfxml/python/demo_mac_timeline.py0000755000175000017500000000142712263701334020400 0ustar dimadima#!/usr/bin/env python # produce a MAC-times timeline. # works under either Python2 or Python3 import dfxml, sys timeline = [] def process(fi): if fi.mtime()!=None: timeline.append([fi.mtime(),fi.filename()," modified"]) if fi.crtime()!=None: timeline.append([fi.crtime(),fi.filename()," created"]) if fi.ctime()!=None: timeline.append([fi.ctime(),fi.filename()," changed"]) if fi.atime()!=None: timeline.append([fi.atime(),fi.filename()," accessed"]) def main(): if len(sys.argv) < 2: print("Usage: {} ".format(sys.argv[0])) exit(1) dfxml.read_dfxml(xmlfile=open(sys.argv[1],"rb"), callback=process) timeline.sort() for record in timeline: print("\t".join( map(str, record)) ) if __name__ == "__main__": main() tcpflow/src/dfxml/python/idifference.py0000644000175000017500000006164012263701334017211 0ustar dimadima#!/usr/bin/env python """idifference. Generates a report about what's different between two disk images. Process: 1. A map is kept of all filenames->sha1 codes and all sha1->filenames. 2. For each image, read all of the fileobject objects: - create new maps - Note when things change. - Delete each file in the old map as it is processed. 3. Report files left in map; that's the files that were deleted! 4. Replace the old maps with the new maps """ __version__ = "0.2.0rfc4" import sys,fiwalk,dfxml,time import copy if sys.version_info < (3,1): raise RuntimeError("idifference.py now requires Python 3.1 or above") #Global variable, to be adjusted later options = None def ignore_filename(fn, include_dotdirs=False): """ Ignores particular file name patterns output by TSK. Detecting new and renamed files becomes difficult if there are 3+ names for an inode (i.e. "dir", "dir/.", and "dir/child/.."). """ return (not include_dotdirs and (fn.endswith("/.") or fn.endswith("/.."))) or fn in set(['$FAT1','$FAT2']) def ptime(t): """Print the time in the requested format. T is a dfxml time value. If T is null, return 'null'.""" global options if t is None: return "null" if options and options.timestamp: return str(t.timestamp()) else: return str(t.iso8601()) def dprint(x): global options if options and options.debug: print(x) def header(): global options if options and options.html: print(""" """) def h1(title): global options if options and options.html: print("

%s

" % title) return print("\n\n%s\n" % title) def h2(title): global options if options and options.html: print("

%s

" % title) return print("\n\n%s\n%s" % (title,"="*len(title))) def table(rows,styles=None,break_on_change=False): import sys global options def alldigits(x): if type(x)!=str: return False for ch in x: if ch.isdigit()==False: return False return True def fmt(x): if x==None: return "" if type(x)==int or alldigits(x): return "{0:>12}".format(x) return str(x) if options and options.html: print("") for row in rows: print("") if not styles: styles = [""]*len(rows) for (col,style) in zip(row,styles): sys.stdout.write("" % (style,col)) print("") print("
%s
") return lastRowCol0 = None for row in rows: # Insert a blank line if this row[0] is not the same as last row[0] if row[0]!=lastRowCol0 and break_on_change: sys.stdout.write("\n") lastRowCol0 = row[0] # Write the row. # This won't generate a unicode encoding error because the strings are valid unicode. sys.stdout.write("\t".join([fmt(col) for col in row])) sys.stdout.write("\n") # # This program keeps track of the current and previous diskstate in a single # object called "DiskState". Another way to do that would have been to have # the instance built from the XML file and then have another function that compares # them. # class DiskState: global options def __init__(self,notimeline=False,summary=False,include_dotdirs=False): self.current_fname = None # This class field is the name of the current disk image, whereas other fnames are in-image file names self.new_fnames = dict() # maps from fname -> fi self.new_inodes = dict() # maps from (partition, inode_number) -> fi self.new_fi_tally = 0 self.notimeline = notimeline self.summary = summary self.include_dotdirs = include_dotdirs self.changed_mtime_tally = 0 self.changed_atime_tally = 0 self.changed_ctime_tally = 0 self.changed_crtime_tally = 0 self.changed_dir_sha1_tally = 0 self.changed_file_sha1_tally = 0 self.changed_filesize_tally = 0 self.changed_first_byterun_tally = 0 self.next() def next(self): """Called when the next image is processed.""" global options self.fnames = self.new_fnames self.inodes = self.new_inodes self.fi_tally = self.new_fi_tally self.new_fnames = dict() self.new_inodes = dict() #Reset sets self.new_files = set() # set of file objects self.renamed_files = set() # set of (oldfile,newfile) file objects self.changed_content = set() # set of (oldfile,newfile) file objects self.changed_properties = set() # list of (oldfile,newfile) file objects #Reset counters self.new_fi_tally = 0 if self.notimeline: self.timeline = None else: self.timeline = set() self.changed_mtime_tally = 0 self.changed_atime_tally = 0 self.changed_ctime_tally = 0 self.changed_crtime_tally = 0 self.changed_dir_sha1_tally = 0 self.changed_file_sha1_tally = 0 self.changed_filesize_tally = 0 self.changed_first_byterun_tally = 0 def process_fi(self,fi): global options dprint("processing %s" % str(fi)) # See if the filename changed its hash code changed = False if not fi.allocated(): return # only look at allocated files # Filter out specific filenames create by TSK that are not of use if ignore_filename(fi.filename(), self.include_dotdirs): return # Remember the file for the next generation self.new_fnames[fi.filename()] = fi self.new_inodes[(fi.partition(), fi.inode())] = fi self.new_fi_tally += 1 # See if a file with this filename had its contents change or properties changed ofi = self.fnames.get(fi.filename(),None) if ofi: dprint(" found ofi") any_diff = False if ofi.sha1()!=fi.sha1(): dprint(" >>> sha1 changed") self.changed_content.add((ofi,fi)) any_diff = True elif ofi.atime() != fi.atime() or \ ofi.mtime() != fi.mtime() or \ ofi.crtime() != fi.crtime() or \ ofi.ctime() != fi.ctime(): dprint(" >>> time changed") self.changed_properties.add((ofi,fi)) any_diff = True if any_diff: #Count the types of changes that happened if ofi.filesize() != fi.filesize(): self.changed_filesize_tally += 1 if ofi.sha1() != fi.sha1(): if ofi.is_dir(): self.changed_dir_sha1_tally += 1 elif ofi.is_file(): self.changed_file_sha1_tally += 1 if ofi.mtime() != fi.mtime(): self.changed_mtime_tally += 1 if ofi.atime() != fi.atime(): self.changed_atime_tally += 1 if ofi.ctime() != fi.ctime(): self.changed_ctime_tally += 1 if ofi.crtime() != fi.crtime(): self.changed_crtime_tally += 1 if ofi.byte_runs() and fi.byte_runs(): brdiff = 0 ofirstbr = ofi.byte_runs()[0] nfirstbr = fi.byte_runs()[0] try: if ofirstbr.file_offset == nfirstbr.file_offset: brdiff = 1 if ofirstbr.img_offset == nfirstbr.img_offset: brdiff = 1 if ofirstbr.fs_offset == nfirstbr.fs_offset: brdiff = 1 except: pass self.changed_first_byterun_tally += brdiff # If a new file, note that (and optionally add to the timeline) if not ofi: self.new_files.add(fi) if self.timeline: create_time = fi.crtime() if not create_time: create_time = fi.ctime() if not create_time: create_time = fi.mtime() self.timeline.add((create_time,fi.filename(),"created")) # Delete files we have seen (so we can find out the files that were deleted) if fi.filename() in self.fnames: del self.fnames[fi.filename()] # Look for files that were renamed ofi = self.inodes.get((fi.partition(), fi.inode()),None) if ofi and ofi.filename() != fi.filename() and ofi.sha1()==fi.sha1(): #Never consider current-directory or parent-directory for rename operations. Because we match on partition+inode numbers, these trivially match. if not (fi.filename().endswith("/.") or fi.filename().endswith("/..") or ofi.filename().endswith("/.") or ofi.filename().endswith("/..")): self.renamed_files.add((ofi,fi)) def process(self,fname): self.prior_fname = self.current_fname self.current_fname = fname if fname.endswith("xml"): with open(fname,'rb') as xmlfile: for fi in dfxml.iter_dfxml(xmlfile, preserve_elements=True): self.process_fi(fi) else: fiwalk.fiwalk_using_sax(imagefile=open(fname,'rb'), flags=fiwalk.ALLOC_ONLY, callback=self.process_fi) def print_fis(self,title,fis): h2(title) def fidate(fi): try: return str(ptime(fi.mtime())) except TypeError: return "n/a" res = [(fidate(fi),str(fi.filesize()),fi.filename()) for fi in fis] if res: table(sorted(res)) def print_fi2(self,title,fi2s): def prtime(t): return "%d (%s)" % (t,ptime(t)) h2(title) res = set() for(ofi,fi) in fi2s: if ofi.filename() != fi.filename(): res.add((ofi.filename(),"renamed to",fi.filename())) # Don't know when it was renamed if ofi.filesize() != fi.filesize(): res.add((ofi.filename(),"resized",ofi.filesize(),"->",fi.filesize())) if self.timeline: self.timeline.add((fi.mtime(),fi.filename(),"resized",ofi.filesize(),"->",fi.filesize())) if ofi.sha1() != fi.sha1(): res.add((ofi.filename(),"SHA1 changed",ofi.sha1(),"->",fi.sha1())) if self.timeline: self.timeline.add((fi.mtime(),fi.filename(),"SHA1 changed",ofi.sha1(),"->",fi.sha1())) if ofi.atime() != fi.atime(): if not options.noatime: res.add((ofi.filename(),"atime changed",ptime(ofi.atime()),"->",ptime(fi.atime()))) if self.timeline: self.timeline.add((fi.atime(),fi.filename(),"atime changed",prtime(ofi.atime()),"->",prtime(fi.atime()))) if ofi.mtime() != fi.mtime(): res.add((ofi.filename(),"mtime changed",ptime(ofi.mtime()),"->",ptime(fi.mtime()))) if self.timeline: self.timeline.add((fi.mtime(),fi.filename(),"mtime changed",prtime(ofi.mtime()),"->",prtime(fi.mtime()))) if ofi.ctime() != fi.ctime(): res.add((ofi.filename(),"ctime changed",ptime(ofi.ctime()),"->",ptime(fi.ctime()))) if self.timeline: self.timeline.add((fi.ctime(),fi.filename(),"ctime changed",prtime(ofi.ctime()),"->",prtime(fi.ctime()))) if ofi.crtime() != fi.crtime(): res.add((ofi.filename(),"crtime changed",ptime(ofi.crtime()),"->",ptime(fi.crtime()))) if self.timeline: self.timeline.add((fi.crtime(),fi.filename(),"crtime changed",prtime(ofi.crtime()),"->",prtime(fi.crtime()))) if res: table(sorted(res),break_on_change=True) def print_timeline(self): prt = [] # Make the dates printable for line in sorted(self.timeline): prt.append([ptime(line[0])]+list(line[1:])) h2("Timeline") table(prt) def to_xml(self): import xml.etree.ElementTree as ET ET.register_namespace("delta", dfxml.XMLNS_DELTA) if not options.xmlfilename: sys.stderr.write("XML output filename not specified.\n") exit(1) metadict = dict() metadict["XMLNS_DFXML"] = dfxml.XMLNS_DFXML metadict["XMLNS_DELTA"] = dfxml.XMLNS_DELTA metadict["program"] = sys.argv[0] metadict["version"] = __version__ metadict["commandline"] = " ".join(sys.argv) metadict["priorf"] = self.prior_fname metadict["currentf"] = self.current_fname xmlfile = open(options.xmlfilename, "w") xmlfile.write("""\ Disk Image Difference Manifest %(program)s %(version)s %(commandline)s %(priorf)s %(currentf)s """ % metadict) def _annotate_changes(tmpel, ofi, fi): """ Adds "delta:changed_property" attributes to elements that changed their values. Returns number of annotations needed. """ retval = 0 def _xpaths(xp): """ Returns a list of xpaths: First, with an xmlns; second, as input. @param xp An xpath expression where all elements and attributes needing a namespace declaration are prefixed with "{0}" (for Python string formatting). """ retval = [] for nsprefix in ["{" + dfxml.XMLNS_DFXML + "}", ""]: retval.append(xp.format(nsprefix)) return retval # Triplets: Old value, new value, XPaths to find element to annotate for (oval, nval, xpaths) in [ (ofi.filename(), fi.filename(), _xpaths("./{0}filename")), (ofi.sha1(), fi.sha1(), _xpaths("./{0}hashdigest[@type='sha1']")), (ofi.md5(), fi.md5(), _xpaths("./{0}hashdigest[@type='md5']")), (ofi.mtime(), fi.mtime(), _xpaths("./{0}mtime")), (ofi.atime(), fi.atime(), _xpaths("./{0}atime")), (ofi.ctime(), fi.ctime(), _xpaths("./{0}ctime")), (ofi.crtime(), fi.crtime(), _xpaths("./{0}crtime")), (ofi.filesize(), fi.filesize(), _xpaths("./{0}filesize")) ]: #Find and flag the changed properties #Skip null-null comparisons if oval is None and nval is None: continue if oval != nval: retval += 1 #Find first namespace match for the property element for xp in xpaths: propertyel = tmpel.find(xp) if not propertyel is None: break if propertyel is None: comment = ET.Comment("Warning: Tried to note a changed property with the XPath queries %r; however, could not find the element." % xpaths) tmpel.insert(0, comment) else: propertyel.attrib["delta:changed_property"] = "1" return retval #List new files for fi in self.new_files: #xmlfile.write(" \n" % fi.filename()) xmlfile.write(" ") tmpel = copy.copy(fi.xml_element) tmpel.attrib["delta:new_file"] = "1" xmlfile.write(dfxml.ET_tostring(tmpel, encoding="unicode")) xmlfile.write("\n") #List deleted files for fi in self.fnames.values(): #xmlfile.write("\n" % fi.filename()) xmlfile.write(" ") tmpel = ET.Element("fileobject") tmpel.attrib["delta:deleted_file"] = "1" tmpchild = copy.copy(fi.xml_element) tmpchild.tag = "delta:original_fileobject" tmpel.insert(-1, tmpchild) xmlfile.write(dfxml.ET_tostring(tmpel, encoding="unicode")) xmlfile.write("\n") #List renamed files for (ofi, fi) in self.renamed_files: #xmlfile.write("\n" % (ofi.filename(), fi.filename())) tmpel = copy.copy(fi.xml_element) annos = _annotate_changes(tmpel, ofi, fi) tmpoldel = copy.copy(ofi.xml_element) tmpoldel.tag = "delta:original_fileobject" tmpel.append(tmpoldel) tmpel.attrib["delta:renamed_file"] = "1" if annos > 1: tmpel.attrib["delta:changed_file"] = "1" xmlfile.write(dfxml.ET_tostring(tmpel, encoding="unicode")) xmlfile.write("\n") #List files with with modified data or metadata changed_files = set.union(set(self.changed_content), set(self.changed_properties)) for (ofi, fi) in changed_files: #xmlfile.write("\n" % fi.filename()) xmlfile.write(" ") tmpel = copy.copy(fi.xml_element) _annotate_changes(tmpel, ofi, fi) tmpoldel = copy.copy(ofi.xml_element) tmpoldel.tag = "delta:original_fileobject" tmpel.append(tmpoldel) tmpel.attrib["delta:changed_file"] = "1" xmlfile.write(dfxml.ET_tostring(tmpel, encoding="unicode")) xmlfile.write("\n") xmlfile.write("\n") xmlfile.close() def report(self): header() h1("Disk image:"+self.current_fname) self.print_fis("New Files:",self.new_files) self.print_fis("Deleted Files:",self.fnames.values()) self.print_fi2("Renamed Files:",self.renamed_files) self.print_fi2("Files with modified content:",self.changed_content) self.print_fi2("Files with changed file properties:",self.changed_properties) if self.summary: h2("Summary:") table([ ("Prior image's file (file object) tally", str(self.fi_tally)), ("Prior image's file (inode) tally", str(len(self.inodes))), ("Current image's file (file object) tally", str(self.new_fi_tally)), ("Current image's file (inode) tally", str(len(self.new_inodes))), ("New files", str(len(self.new_files))), ("Deleted files", str(len(self.fnames))), ("Renamed files", str(len(self.renamed_files))), ("Files with modified content", str(len(self.changed_content))), ("Files with changed file properties", str(len(self.changed_properties))) ]) if self.timeline: self.print_timeline() def output_archive(self,imagefile=None,tarname=None,zipname=None): """Write the changed and/or new files to a tarfile or a ZIP file. """ import zipfile, tarfile, StringIO, datetime tfile = None zfile = None to_archive = self.new_files.copy() to_archive = to_archive.union(set([val[1] for val in self.changed_content])) to_archive = to_archive.union(set([val[1] for val in self.changed_properties])) # Make sure we are just writing out inodes that have file contents to_archive = filter( lambda fi:fi.allocated() and fi.has_tag("inode") and fi.has_contents() and (fi.name_type()=='' or fi.name_type()=='r'), to_archive) if len(to_archive)==0: print("No archive created, as no allocated files created or modified") return if tarname: print(">>> Creating tar file: %s" % tarname) tfile = tarfile.TarFile(tarname,mode="w") if zipname: print(">>> Creating zip file: %s" % zipname) zfile = zipfile.ZipFile(zipname,mode="w",allowZip64=True) files_written=set() content_error_log = [] for fi in to_archive: filename = fi.filename() fncount = 1 while filename in files_written: filename = "%s.%d" % (fi.filename(),fnperm) fncount+= 1 contents = None try: contents = fi.contents(imagefile) except ValueError as ve: if ve.message.startswith("icat error"): #Some files cannot be recovered, even from images that do not seem corrupted; log the icat command that failed. content_error_log.append(ve.message) else: #This is a more interesting error, so have process die to report immediately. raise if contents: if tfile: info = tarfile.TarInfo(name=filename) info.mtime = fi.mtime() info.atime = fi.atime() info.ctime = fi.ctime() info.uid = fi.uid() info.gid = fi.gid() info.size = fi.filesize() # addfile requires a 'file', so let's make one string = StringIO.StringIO() string.write(contents) string.seek(0) tfile.addfile(tarinfo=info,fileobj=string) if zfile: mtimestamp = fi.mtime().timestamp() info = zipfile.ZipInfo(filename) if mtimestamp: #mtime might be null info.date_time = datetime.datetime.fromtimestamp(mtimestamp).utctimetuple() info.internal_attr = 1 info.external_attr = 2175008768 # specifies mode 0644 zfile.writestr(info,contents) if tfile: tfile.close() if zfile: zfile.close() if len(content_error_log) > 0: sys.stderr.write("Errors retrieving file contents:\n") sys.stderr.write("\n".join(content_error_log)) sys.stderr.write("\n") if __name__=="__main__": from optparse import OptionParser global options parser = OptionParser() parser.usage = '%prog [options] file1 file2 [file3...] (files can be xml or image files)' parser.add_option("-x","--xml",help="specify output file for XML",dest="xmlfilename") parser.add_option("--html",help="specify output in HTML",action="store_true") parser.add_option("-n","--notimeline",help="do not generate a timeline",action="store_true") parser.add_option("-d","--debug",help="debug",action='store_true') parser.add_option("-T","--tararchive",help="create tar archive file of new/changed files",dest="tarfile") parser.add_option("-Z","--zipfile",help="create ZIP64 archive file of new/changed files",dest="zipfile") parser.add_option("--include-dotdirs",help="include files with names ending in '/.' and '/..'",action="store_true", dest="include_dotdirs", default=False) parser.add_option("--summary",help="output summary statistics of file system changes",action="store_true", default=False) parser.add_option("--timestamp",help="output all times in Unix timestamp format; otherwise use ISO 8601",action="store_true") parser.add_option("--imagefile",help="specifies imagefile or file2 is an XML file and you are archiving") parser.add_option("--noatime",help="Do not include atime changes",action="store_true") (options,args) = parser.parse_args() if len(args)<1: parser.print_help() sys.exit(1) if (options.tarfile or options.zipfile) and args[1].endswith("xml") and not options.imagefile: print("ERROR: %s is NOT an XML file and no imagefile specified" % args[1]) parser.print_help() exit(1) s = DiskState(notimeline=options.notimeline, summary=options.summary, include_dotdirs=options.include_dotdirs) for infile in args: print(">>> Reading %s" % infile) s.process(infile) if infile!=args[0]: # Not the first file. Report and optionally archive if options.tarfile or options.zipfile: imagefilename = infile if imagefilename.endswith("xml"): imagefilename = options.imagefile s.output_archive(imagefile=open(imagefilename),tarname=options.tarfile,zipname=options.zipfile) if options.xmlfilename: s.to_xml() else: s.report() s.next() tcpflow/src/dfxml/python/iredact-config.txt0000644000175000017500000000076212263701334020011 0ustar dimadima# # Paths to the disk image and fiwalk XML output # IMAGEFILE /home/bcadmin/Desktop/jowork.raw.raw XMLFILE /home/bcadmin/Desktop/jofiwalk.xml # # Redaction patterns # #FILEPAT *.dll FUZZ #FILEPAT *.com FUZZ FILEPAT *.exe FUZZ # # Other examples # #KEY 100200300400 #MD5 db06069ef1c9f40986ffa06db4fe8fd7 FILL 0x44 #FILENAME file3.txt ENCRYPT #FILEPAT file*.txt ENCRYPT #CONTAINS This FILL 0x44 #FILEPAT *Spotlight* FILL 0x44 # # Uncomment this line to actually commit the redaction: # COMMIT tcpflow/src/dfxml/python/demo_readtimes.py0000644000175000017500000000517012263701334017723 0ustar dimadima#!/usr/bin/python """Reads an fiwalk XML file and reports how many of the files are still in the image...""" import sys,os sys.path.append(os.getenv("DOMEX_HOME") + "/src/lib/") # add the library sys.path.append("../lib/") # add the library sys.path.append("../fiwalk/") import fiwalk import time def calc_jumps(fis,title): print title print "Count: %d" % (len(fis)) from histogram import histogram h = histogram() pos = 0 backwards = 0 prev_frag_count = 0 for fi in fis: for i in range(0,len(fi.byte_runs())): run = fi.byte_runs()[i] try: sector = run.start_sector() if sector2: return False if hasattr(fi.byte_runs()[0],"uncompressed_len"): return False if not hasattr(fi.byte_runs()[0],"img_offset"): return False return True fis = filter(resident_file,fis) print "Native order: " calc_jumps(fis,"Native Order") def sort_function(a,b): a0 = a.byte_runs()[0].start_sector() b0 = b.byte_runs()[0].start_sector() if a0 < b0 : return -1 if a0 == b0 : return 0 return 1 fis.sort(sort_function) calc_jumps(fis,"Sorted Order") tcpflow/src/dfxml/python/imicrosoft_redact.py0000644000175000017500000001172112263701334020441 0ustar dimadima#!/usr/bin/python """This is a small program written with the python fiwalk framework to break the microsoft executables from the m57 corpus. It does this by changing characters in the first 4096 bytes of the executable that are over hex 80 to hex FF""" import os.path,sys from subprocess import Popen,call,PIPE sys.path.append(os.getenv("DOMEX_HOME") + "/src/lib/") # add the library sys.path.append(os.getenv("DOMEX_HOME") + "/src/fiwalk/python/") # add the library import fiwalk,hashlib import xml.parsers.expat redact_extensions = set([".dll",".exe",".com"]) redact_filenames = set() redact_max_size = 4096 def should_redact(fi): if fi.filename() in redact_filenames: return True fnl = fi.filename().lower() (root,ext) = os.path.splitext(fnl) if options.debug: print "\r",fnl, if ext in redact_extensions and fnl.startswith("windows"): try: content = fi.contents(icat_fallback=False) except ValueError: if options.debug: print " *** can't redact --- is compressed *** " return False if not content: if options.debug: print " *** can't redact --- no content ***" return False if "Microsoft" in content: return True if "\0M\0i\0c\0r\0o\0s\0o\0f\0t" in content: return True if options.debug: print " *** won't redact --- no Microsoft ***" return False return False def redact(fi): from xml.sax.saxutils import escape global xml_out,options if not should_redact(fi): return # Get the first byterun br = fi.byte_runs()[0] if br.img_offset==0: return # this run isn't on the disk if br.bytes==0: return # too small to redact content = fi.contents() # before redaction redact_bytes = min(redact_max_size,br.bytes) fi.imagefile.seek(br.img_offset) sector = fi.imagefile.read(redact_bytes) # Redact the sector # Read the data def redact_function(ch): if ch<'~': return ch return '0xff' sector = "".join(map(redact_function,sector)) # Now write it back if options.commit: fi.imagefile.seek(br.img_offset) fi.imagefile.write(sector) redacted_content = fi.contents() # after redaction xml_out.write("\n%s\n" % (escape(fi.filename()))) xml_out.write(" %d\n" % (len(content))) xml_out.write(" %s\n" % (fi.inode())) xml_out.write(" %d\n" % (br.img_offset)) xml_out.write(" %d\n" % (redact_bytes)) xml_out.write(" \n") xml_out.write(" %s\n" % (hashlib.md5(content).hexdigest())) xml_out.write(" %s\n" % (hashlib.sha1(content).hexdigest())) xml_out.write(" \n") xml_out.write(" \n") xml_out.write(" %s\n" % (hashlib.md5(redacted_content).hexdigest())) xml_out.write(" %s\n" % (hashlib.sha1(redacted_content).hexdigest())) xml_out.write(" \n") xml_out.write("\n") if __name__=="__main__": import sys,time from optparse import OptionParser from subprocess import Popen,PIPE global options,xml_out from glob import glob parser = OptionParser() parser.usage = "%prog [options] imagefile" parser.add_option("-d","--debug",help="prints debugging info",dest="debug",action="store_true") parser.add_option("-c","--commit",help="Really do the redaction",action="store_true") parser.add_option("--all",help="Do all",action="store_true") (options,args) = parser.parse_args() # First read all of the redaction files for fn in glob("*redacted.xml*"): try: fiwalk.fiwalk_using_sax(xmlfile=open(fn),callback=lambda fi:redact_filenames.add(fi.filename())) except xml.parsers.expat.ExpatError: print "Invalid XML file:",fn print "number of filenames in redaction XML:",len(redact_filenames) if options.all: for fn in glob("*.aff"): raw = fn.replace(".aff",".raw") if not os.path.exists(raw): print "%s --> %s" % (fn,raw) if call(['afconvert','-e','raw',fn])!=0: raise RuntimeError,"afconvert of %s failed" % fn fns = glob("*.raw") else: fns = args for fn in fns: if fn.endswith(".aff"): raise ValueError,"Cannot redact AFF files" print "Redacting %s" % fn xml_out = open(fn.replace(".raw","-redacted.xml"),"w") xml_out.write("\n") xml_out.write("\n") mode = "rb" if options.commit: mode="r+b" fiwalk.fiwalk_using_sax(imagefile=open(args[0],mode),callback=redact) xml_out.write("\n") tcpflow/src/dfxml/python/iverify.py0000644000175000017500000000326612263701334016423 0ustar dimadima#!/usr/bin/python """Reads an fiwalk XML file and reports how many of the files are still in the image...""" import hashlib, os.path, fiwalk, sys present = [] not_present = [] def process_fi(fi): print "process file",fi.filename() if fi.filesize()==0: return try: if fi.file_present(): present.append(fi) return else: not_present.append(fi) return except ValueError,e: sys.stderr.write(str(e)+"\n") ################################################################ if __name__=="__main__": import sys from optparse import OptionParser from subprocess import Popen,PIPE global options parser = OptionParser() parser.add_option("-d","--debug",help="prints debugging info",dest="debug") parser.add_option("-g","--ground",help="ground truth XML file",dest="ground") parser.usage = '%prog [options] image.iso' (options,args) = parser.parse_args() if not options.ground: parser.print_help() sys.exit(1) # Read the XML file reader = fiwalk.fileobject_reader() reader.set_imagefilename(args[0]) reader.process_xml_stream(open(options.ground,"r"),process_fi) if len(present)==0: print "None of the files are present in the image" sys.exit(0) if len(not_present)==0: print "All of the files are present in the image" sys.exit(0) print "\n\n" print "Present in image:" print "=================" print "\n".join([fi.filename() for fi in present]) print "\n" print "Not Present or altered in image:" print "=====================" for fi in not_present: print fi.filename() tcpflow/src/dfxml/python/ChangeLog0000644000175000017500000001134012263701334016136 0ustar dimadima2013-11-02 * idifference.py: Imported null-variable tests, and corrected a variable reference, to help idifference to be used as a module * idifference.py: Imported extra, granular counters on disk state changes 2013-11-01 * cat_fileobjects.py: Allow for differential DFXML repeating * Unit tests: Adjusted cleanup, and tested differential DFXML repeating 2013-10-24 * dfxml.py: Add null-argument test on content_for_run * dfxml.py: Add image reference to iterative reader * idifference.py: Promote delta namespace to DFXML library 2013-09-19 * idifference: Remove redundant, faulty annotation attempt 2013-09-18 * Python: Run timeline test on differencing input XML; add hand-validated line count check * fiwalk.py: Correct Fiwalk fallback call 2013-09-17 * dfxml: Add an ElementTree tostring() function wrapper that removes redundant namespace declarations, with unit tests for new regular expression * idifference: Update unit tests to include vetted count of fileobjects generated * idifference: Correct a fileobject counting bug (where a counter reset was forgotten) * idifference, icat: Add some XML namespace handling logic, found necessary but missed until the last unit test updates * cat_fileobjects.py: Add debug flag 2013-09-13 * idifference.py: Switch XML namespace for differencing to forensicswiki page 2013-09-12 * idifference.py: Adjust XML output, along with sample data, to validate against DFXML schema (v1.1.0rfc0; at least, everything except the differential annotations not yet in the schema validates) 2013-08-15 * Regression testing: Make demo_mac_timeline.py into regression check * dfxml.py: Add iterative interface for fileobjects * Regression testing: Add iterative version of demo_mac_timeline.py * Python: Add demo program for repeating fileobject with XML Python objects * idifference.py: Have idifference.py --xml generate DFXML with differential annotations * idifference.py: Add test for XML output of idifference 2013-08-13 * idifference.py: Remove requirement for a fileobject's partition to be defined 2013-06-24 * fiwalk.py: Give standalone usage slightly nicer error handling 2013-06-21 * dfxml_tool.py: Assign a version to this script (as it works pretty well now, I chose "1.0.0") - necessary for some provenance information * dfxml_tool.py: Optionally include provenance information (similar to Fiwalk) * dfxml_tool.py: Clean file-not-found error output (found on broken symlinks) * Python: Add 'make check' invocation for unit tests 2013-06-12 * dfxml_tool.py: Report file creation and metadata-change time, when available * dfxml_tool.py: Optionally output file system timestamps in ISO-8601 * dfxml_tool.py: Optionally include directories in output listing * dfxml_tool.py: Include additional file metadata (partition and inode numbers) * dfxml_tool.py: Add alternative filename simplifier 2013-06-11 * dfxml_tool.py: Report file read failures 2013-05-28 * python: Integrated three changesets from end of 2012 * dfxml.py, rdifference.py: Change interface of RegXML cell type * dfxml.py: Revise method of hashing data of Registry "value" cells * dfxml.py: Trim superfluous code 2013-04-23 * idifference.py: Relax input name requirements; passing a ".dfxml" file silently failed. 2013-04-08 * dfxml_tool.py (class fileobject): add filename simplifier 2012-12-18 * dfxml.py (class fileobject): correct file extraction in Python 3 2012-12-17 * dfxml.py (class dftime): add parser and tests for email timestamp format * dfxml.py (class fileobject): correct 'allocation' interpretation 2012-11-24 * dfxml.py (class byte_runs): change comparator methods (Python 3 doesn't use __cmp__) 2012-10-31 * deidentify_xml.py (xml_sanitizer._start_element): fixed problem quoting attributes 2012-04-03 Simson Garfinkel * dfxml.py: globally changed .data to .cdata for consistency. 2012-02-23 Simson Garfinkel * dfxml.py (fileobject_reader._start_element): claned up handling of byte runs within fileobjects tcpflow/src/dfxml/python/rdifference.py0000644000175000017500000003053412263701334017220 0ustar dimadima#!/usr/bin/env python """rdifference.py Generates a report about what's different between two Windows Registry hives. Process: 1. For each regxml file, read all of the fileobject objects: - create new maps - Note when things change. - Delete each file in the old map as it is processed. 2. Report cells left in map; those are the cells that were deleted! 3. Replace the old maps with the new maps """ #AJN This script does not call out duplicate paths, but they are reported. import sys,fiwalk,dfxml,time if sys.version_info < (3,1): raise RuntimeError("rdifference.py requires Python 3.1 or above") def ptime(t): """Print the time in the requested format. T is a dfxml time value""" global options if t is None: return None elif "options" in globals() and options.timestamp: return str(t.timestamp()) else: return str(t.iso8601()) def dprint(x): global options if "options" in globals() and options.debug: print(x) def header(): if options.html: print(""" """) def h1(title): global options if "options" in globals() and options.html: print("

%s

" % title) return print("\n\n%s\n" % title) def h2(title): global options if "options" in globals() and options.html: print("

%s

" % title) return print("\n\n%s\n%s" % (title,"="*len(title))) def table(rows,styles=None,break_on_change=False): import sys global options def alldigits(x): if type(x)!=str: return False for ch in x: if ch.isdigit()==False: return False return True def fmt(x): if x==None: return "" if type(x)==int or alldigits(x): return "{0:>12}".format(x) return str(x) if "options" in globals() and options.html: print("") for row in rows: print("") if not styles: styles = [""]*len(rows) for (col,style) in zip(row,styles): sys.stdout.write("" % (style,col)) print("") print("
%s
") return lastRowCol0 = None for row in rows: # Insert a blank line if this row[0] is not the same as last row[0] if row[0]!=lastRowCol0 and break_on_change: sys.stdout.write("\n") lastRowCol0 = row[0] # Write the row. # This won't generate a unicode encoding error because the strings are valid unicode. sys.stdout.write("\t".join([fmt(col) for col in row])) sys.stdout.write("\n") # # This program keeps track of the current and previous hivestate in a single # object called "HiveState". Another way to do that would have been to have # the instance built from the XML file and then have another function that compares # them. # class HiveState: global options def __init__(self,notimeline=False): self.new_cnames = dict() # maps from cell full path -> cell self.notimeline = notimeline self.next() def next(self): """Called when the next image is processed.""" self.cnames = self.new_cnames self.new_cnames = dict() self.new_files = set() # set of file objects self.changed_content = set() # set of (oldfile,newfile) file objects self.changed_properties = set() # list of (oldfile,newfile) file objects if self.notimeline: self.timeline = None else: self.timeline = set() def process_cell(self,cell): dprint("processing %s" % str(cell)) # See if the filename changed its hash code changed = False # Remember the file for the next generation self.new_cnames[cell.full_path()] = cell #new_inodes from idifference translates to ... well, there is no 'inode' in the Registry. There's hive identifier and full path. Values are small and thus more likely to appear in multiple places. Skip translating # See if this filename changed or was resized ocell = self.cnames.get(cell.full_path(),None) if ocell: dprint(" found ocell: " + cell.full_path()) if ocell.sha1()!=cell.sha1(): dprint(" >>> sha1 changed") self.changed_content.add((ocell,cell)) if ocell.mtime() != cell.mtime(): dprint(" >>> Mtime changed") self.changed_properties.add((ocell,cell)) elif ocell.type() != cell.type(): dprint(" >>> Cell content type changed") self.changed_properties.add((ocell,cell)) elif type(ocell) != type(cell): dprint(" >>> Cell structural type changed") self.changed_properties.add((ocell,cell)) elif ( (ocell.parent_key and ocell.parent_key.mtime()) or None ) != ( (cell.parent_key and cell.parent_key.mtime()) or None ): dprint(" >>> Parent mtimes changed") self.changed_properties.add((ocell,cell)) # If a new file, note that (and optionally add to the timeline) if not ocell: self.new_files.add(cell) if self.timeline: modify_time = cell.mtime() self.timeline.add((modify_time,cell.full_path(),"modified")) # Delete files we have seen (so we can find out the files that were deleted) if cell.full_path() in self.cnames: del self.cnames[cell.full_path()] def process(self,fname): self.current_fname = fname if fname.endswith(".regxml"): reader = dfxml.read_regxml(xmlfile=open(fname,'rb'), callback=self.process_cell) def print_cells(self,title,cells): h2(title) def cdate(cell): try: return str(ptime(cell.mtime())) except TypeError: return "n/a" res = [(cdate(cell),cell.full_path()) for cell in cells] if res: table(sorted(res)) def print_cell2(self,title,cell2s): def prtime(t): return "%d (%s)" % (t,ptime(t)) h2(title) res = set() for(ocell,cell) in cell2s: if ocell.sha1() != cell.sha1(): res.add((ocell.full_path(),"SHA1 changed",ocell.sha1(),"->",cell.sha1())) if self.timeline: self.timeline.add((cell.mtime(),cell.full_path(),"SHA1 changed",ocell.sha1(),"->",cell.sha1())) if ocell.mtime() != cell.mtime(): res.add((ocell.full_path(),"mtime changed",ptime(ocell.mtime()),"->",ptime(cell.mtime()))) if self.timeline: self.timeline.add((cell.mtime(),cell.full_path(),"mtime changed",prtime(ocell.mtime()),"->",prtime(cell.mtime()))) if ocell.type() != cell.type(): res.add((ocell.full_path(),"cell type changed",ocell.type(),"->",cell.type())) if self.timeline: self.timeline.add((cell.mtime(),cell.full_path(),"cell type changed",prtime(ocell.mtime()),"->",prtime(cell.mtime()))) if res: table(sorted(res),break_on_change=True) def print_timeline(self): prt = [] # Make the dates printable for line in sorted(self.timeline): prt.append([ptime(line[0])]+list(line[1:])) h2("Timeline") table(prt) def report(self): header() h1("RegXML file:"+self.current_fname) self.print_cells("New Files:",self.new_files) self.print_cells("Deleted Files:",self.cnames.values()) self.print_cell2("Files with modified content:",self.changed_content) self.print_cell2("Files with changed file properties:",self.changed_properties) if self.timeline: self.print_timeline() def output_archive(self,tarname=None,zipname=None): """Write the changed and/or new files to a tarfile or a ZIP file. """ import zipfile, tarfile, StringIO, datetime tfile = None zfile = None to_archive = self.new_files.copy() to_archive = to_archive.union(set([val[1] for val in self.changed_content])) to_archive = to_archive.union(set([val[1] for val in self.changed_properties])) if len(to_archive)==0: print("No archive created, as no allocated files created or modified") return if tarname: print(">>> Creating tar file: %s" % tarname) tfile = tarfile.TarFile(tarname,mode="w") if zipname: print(">>> Creating zip file: %s" % zipname) zfile = zipfile.ZipFile(zipname,mode="w",allowZip64=True) files_written=set() content_error_log = [] for fi in to_archive: filename = fi.filename() fncount = 1 while filename in files_written: filename = "%s.%d" % (fi.filename(),fnperm) fncount+= 1 contents = None try: contents = fi.contents(imagefile) except ValueError as ve: if ve.message.startswith("icat error"): #Some files cannot be recovered, even from images that do not seem corrupted; log the icat command that failed. content_error_log.append(ve.message) else: #This is a more interesting error, so have process die to report immediately. raise if contents: if tfile: info = tarfile.TarInfo(name=filename) info.mtime = fi.mtime() info.atime = fi.atime() info.ctime = fi.ctime() info.uid = fi.uid() info.gid = fi.gid() info.size = fi.filesize() # addfile requires a 'file', so let's make one string = StringIO.StringIO() string.write(contents) string.seek(0) tfile.addfile(tarinfo=info,fileobj=string) if zfile: mtimestamp = fi.mtime().timestamp() info = zipfile.ZipInfo(filename) if mtimestamp: #mtime might be null info.date_time = datetime.datetime.fromtimestamp(mtimestamp).utctimetuple() info.internal_attr = 1 info.external_attr = 2175008768 # specifies mode 0644 zfile.writestr(info,contents) if tfile: tfile.close() if zfile: zfile.close() if len(content_error_log) > 0: sys.stderr.write("Errors retrieving file contents:\n") sys.stderr.write("\n".join(content_error_log)) sys.stderr.write("\n") if __name__=="__main__": from optparse import OptionParser from copy import deepcopy parser = OptionParser() parser.usage = '%prog [options] file1 file2 [file3...] (files can be xml or image files)' parser.add_option("-x","--xml",help="specify output file for XML",dest="xmlfilename") parser.add_option("--html",help="specify output in HTML",action="store_true") parser.add_option("-n","--notimeline",help="do not generate a timeline",action="store_true") parser.add_option("-d","--debug",help="debug",action='store_true') parser.add_option("-T","--tararchive",help="create tar archive file of new/changed files",dest="tarfile") parser.add_option("-Z","--zipfile",help="create ZIP64 archive file of new/changed files",dest="zipfile") parser.add_option("--timestamp",help="output all times in Unix timestamp format; otherwise use ISO 8601",action="store_true") (options,args) = parser.parse_args() if len(args)<1: parser.print_help() sys.exit(1) s = HiveState(notimeline=options.notimeline) for infile in args: print(">>> Reading %s" % infile) s.process(infile) if infile!=args[0]: # Not the first file. Report and optionally archive if options.tarfile or options.zipfile: s.output_archive(tarname=options.tarfile,zipname=options.zipfile) s.report() s.next() tcpflow/src/dfxml/python/icarvingtruth.py0000644000175000017500000001751612263701334017642 0ustar dimadima#!/usr/bin/python """ground truth program. Inputs: Multiple image files. Outputs: List of all the files and partial files in the last file. Process: 1. Read current disk image. Map out all allocated files. Add each fileinfo to the ground truth. 2. Read all of the previous disk images (or XML files). - If the a previously allocated file is present in the current image, add it to the ground truth. 3. Read all of the previous disk images. - For all of the sectors not used in the final version, note which sectors of the original files survive. """ import dfxml,fiwalk import sys import xml.dom.minidom from xml.dom.minidom import parseString # http://wiki.python.org/moin/MiniDom def make_residual(fi=None,image=None,runs=None): """Take a regular fileobject XML document and move the byte_runs into an section and add a section.""" fidoc = fi.doc dom = parseString("") newdoc = deepcopy(fidoc) original = dom.createElement("original") newdoc.appendChild(original) original.appendChild(newdoc.getElementsByTagName("byte_runs")[0]) # Create the residual residual = dom.createElement("residual") newdoc.appendChild(residual) # Create a new byte_runs new_byte_runs = dom.createElement("byte_runs") residual.appendChild(new_byte_runs) if image: x = dom.createElement("image") txt = dom.createTextNode(image) x.appendChild(txt) original.appendChild(x) for run in runs: x = dom.createElement("run") x.setAttribute("img_offset",run.img_offset) x.setAttribute("len",run.bytes) new_byte_runs.appendChild(x) return newdoc def annotate(xml,tag,value): from xml.dom.minidom import parseString dom = parseString("") x = dom.createElement(tag) txt = dom.createTextNode(value) x.appendChild(txt) xml.appendChild(x) def add_reference(doc,image="",reason=""): d2 = parseString("%s%s" % (image,reason)) doc.appendChild(d2.getElementsByTagName("reference")[0]) def print_ground_truth_fi(fi,out=sys.stdout): import os.path reference = fi.doc.getElementsByTagName("reference")[0] image = reference.getElementsByTagName("image")[0].firstChild.wholeText try: out.write("Original Filename: %s in: %s\nSHA 1: %s\n" % (fi.filename(),os.path.basename(image),fi.sha1())) except KeyError: print "***",fi print "***",fi.filename() print "***",os.path.basename(image) desc = "Location: " sector_size = 512 for run in fi.byte_runs(): eblurb = "" if run.extra_bytes()>0: eblurb = "and %3d bytes " % run.extra_bytes() out.write("%10s @ sector %8d ; %5d %4d-byte sectors %14s (%7d bytes total)\n" % (desc,run.start_sector(),run.sector_count(),run.sector_size,eblurb,run.bytes)) desc = "" out.write("\n") def print_ground_truth_report(doc,out=sys.stdout): def sort_by_runs(a,b): if len(a.byte_runs())==0: return -1 if len(b.byte_runs())==0: return 1 if a.byte_runs()[0].img_offset < b.byte_runs()[0].img_offset: return -1 if a.byte_runs()[0].img_offset > b.byte_runs()[0].img_offset: return 1 return 0 filist = [dfxml.fileobject_dom(x) for x in doc.getElementsByTagName("fileobject")] for fi in sorted(filist,sort_by_runs): print_ground_truth_fi(fi,out=out) def sector_from_file(imagefile,sector_number,sectorsize = 512): imagefile.seek(sector_number * sectorsize) return imagefile.read(sectorsize) if __name__=="__main__": from optparse import OptionParser from copy import deepcopy parser = OptionParser() parser.usage = '%prog [options] [mapfile1.iso mapfile2.iso ...] masterfile.iso' parser.add_option("-x","--xml",help="specify output file for XML",dest="xmlfilename") parser.add_option("-r","--report",help="specify output file for the report",dest="reportfilename") parser.add_option("-d","--debug",help="debug") (options,args) = parser.parse_args() if len(args)<1: parser.print_help() sys.exit(1) masterfn = args[-1] refs = args[:-1] master_imagefile = open(masterfn,"r") db = dfxml.extentdb(sectorsize=512) (doc,fileobjects) = fiwalk.fileobjects_using_dom(imagefile=master_imagefile, flags=fiwalk.ALLOC_ONLY) groundtruth = parseString("") # First, add all relevant metadata elements from the master file's # XML to the to the groundtruth file's XML. for node in doc.childNodes[0].childNodes: if node.nodeType==node.ELEMENT_NODE and \ node.nodeName not in ["volume","fileobject"]: groundtruth.childNodes[0].appendChild(node.cloneNode(node)) # Next, add the file object elements for fi in fileobjects: if options.debug: print "adding ",fi newdoc = fi.doc.cloneNode(fi) add_reference(newdoc,image=masterfn,reason='resident file') groundtruth.childNodes[0].appendChild(newdoc) db.add_runs(fi.byte_runs()) # For each map file, see if any of the allocated files are # in the ground truth file but not previously discovered for ref in refs: if options.debug: print "check files in ",ref (d2,fobj2) = fiwalk.fileobjects_using_dom(imagefile=open(ref,"r"), flags=fiwalk.ALLOC_ONLY) for fi in fobj2: runs = fi.byte_runs() if not db.intersects_runs(runs): db.add_runs(runs) newdoc = fi.doc.cloneNode(fi) add_reference(newdoc,image=ref,reason='residual file') groundtruth.childNodes[0].appendChild(newdoc) # Now, for each file, get a list of the sectors that are in unallocated # space and report which of them (if any) are in the final file. for ref in refs: if options.debug: print "check residual data in ",ref ref_imagefile = open(ref,"r") (d2,fobj2) = fiwalk.fileobjects_using_dom(imagefile=ref_imagefile, flags=fiwalk.ALLOC_ONLY) for fi in fobj2: for run in fi.byte_runs(): sectors_to_check = db.sectors_not_in_db(run) # For each sector to check, see if the value in the current image file # is the same as in the report imagefile def check_sector(n): return sector_from_file(ref_imagefile,n) \ == sector_from_file(master_imagefile,n) sectors_that_match = filter(check_sector,sectors_to_check) if sectors_that_match: if options.debug: print(fi.filename(), "run sectors:", db.sectors_for_bytes(run_len), "total sectors: ",len(sectors_to_check), "matching:",len(sectors_that_match)) runs = db.runs_for_sectors(sectors_that_match) if options.debug: print("runs:",runs) db.add_sectors(sectors_that_match) residual_doc = make_residual(fi=fi, image=ref, runs=runs) residual = residual_doc.childNodes[0] groundtruth.childNodes[0].appendChild(residual) try: xmlfile = open(options.xmlfilename,"w") except TypeError: xmlfile = sys.stdout print("Here is the XML:") xmlfile.write(groundtruth.toxml()) try: reportfile = open(options.reportfilename,"w") except TypeError: reportfile = sys.stdout print("\n\nHere is the report:") print_ground_truth_report(groundtruth,out=reportfile) sys.exit(0) tcpflow/src/dfxml/python/README.txt0000644000175000017500000000062012263701334016061 0ustar dimadimaThe following DFXML tools are provided: deidentify.py - Removes PII from filenames in a DFXML file dfxinfo.py - Print information about a DFXML file - Prints a summary about all of the files, duplicate files, histograms of file types. iredact.py - Image redaction tool using the langauge described in the file. iverify: - Reads an XML file and image and verifies that the files are present. tcpflow/src/dfxml/python/iexport.py0000644000175000017500000000510612263701334016433 0ustar dimadima#!/usr/bin/env python """iexport.py: export the unallocated spaces.""" class Run: """Keeps track of a single run""" def __init__(self,start,len): self.start = start self.len = len self.end = start+len-1 def __str__(self): return "Run<%d--%d> (len %d)" % (self.start,self.end,self.len) def contains(self,b): """Returns true if b is inside self.""" print "%d <= %d <= %d = %s" % (self.start,b,self.end,(self.start <= b <= self.end)) return self.start <= b <= self.end def intersects_run(self,r): """Return true if self intersects r. This may be because r.start is inside the run, r.end is inside the run, or self is inside the run.""" return self.contains(r.start) or self.contains(r.end) or r.contains(self.start) def contains_run(self,r): """Returns true if self completely contains r""" return self.contains(r.start) and self.contains(r.end) class RunDB: """The RunDB maintains a list of all the runs in a disk image. The RunDB is created with a single run that represnts all of the sectors in the disk image. Runs can then be removed, which causes existing runs to be split. Finally all of the remaining runs can be removed.""" def __init__(self,start,len): self.runs = [ Run(start,len) ] def __str__(self): return "RunDB\n" + "\n".join([str(p) for p in self.runs]) def intersecting_runs(self,r): """Return a list of all the Runs that intersect with r. This may be because r.start is inside the run, r.end is inside the run, because the run completely encloses r, or becuase r completely encloses the run.""" return filter(lambda x:x.intersects_run(r) , self.runs) def remove(self,r): """Remove run r""" for p in self.intersecting_runs(r): self.runs.remove(p) # if P is completely inside r, just remove it if r.contains_run(p): continue # Split p into before and after r; add the non-zero pieces before_len = r.start - p.start if before_len>0: self.runs.append(Run(p.start,before_len)) after_len = p.end - r.end if after_len>0: self.runs.append(Run(r.end,after_len)) if __name__=="__main__": r1 = Run(0,1000) r2 = Run(50,60) assert r1.intersects_run(r2) assert r2.intersects_run(r1) disk = RunDB(0,1000) print disk disk.remove(Run(50,60)) disk.remove(Run(0,10)) disk.remove(Run(40,20)) print disk tcpflow/src/dfxml/python/exp_slack.py0000644000175000017500000000333512263701334016714 0ustar dimadima#!/usr/bin/env python3.2 # # exp_slack.py: experiment on the slack space # quantify slack space # # (c) Martin Mulazzani, 2012 mmulazzani@sba-research.org # Additions by Simson Garfinkel import re import os import fiwalk import sys def proc(fi): # Skip the virtual files? if fi.filename()[0:1] in ['$']: return if fi.has_contents() and fi.is_file(): outstring = str(fi.partition())+"\t"+fi.filename()+"\t"+str(fi.filesize())+"\t"+str(fi.times())+"\n" f_out.write(outstring) if __name__=="__main__": if len(sys.argv) != 2: print('usage: ./fast_slack.py ') sys.exit(1) #input file_name = sys.argv[1] f = open(file_name, "rb") #output is to stdout outfile = sys.stdout #find partition information, blocksize and filesystem #1st partition has no. 1, to correspond to fiwalk output partitioncounter = 0 f.write("********************************** PARTITIONS **********************************") f.write("\nNo\tBlocksize\tFilesystem\n") for line in f: if re.search("block_size", line): partitioncounter += 1 f_out.write(str(partitioncounter)) f_out.write("\t") f_out.write(re.split(">|<", line)[2]) if re.search("ftype_str", line): f_out.write("\t\t") f_out.write(re.split(">|<", line)[2]) f_out.write("\n") f_out.write("\n\n************************************* DATA *************************************\n") f_out.write("Partition\tFilename\tSize\tTimestamps\n") f.close() #re-open file for binary reading #file processing f = open(file_name, "rb") fiwalk.fiwalk_using_sax(xmlfile=f,callback=proc) tcpflow/src/dfxml/python/test_idifference_to_dfxml.sh0000755000175000017500000000241212263701334022121 0ustar dimadima#!/bin/bash . _pick_pythons.sh XMLLINT=`which xmllint` #Halt on error set -e #Display all executed commands set -x #Ensure the non-XML output doesn't fail, first. "$PYTHON3" idifference.py --summary ../samples/difference_test_[01].xml > idifference_test.txt #Generate XML output. "$PYTHON3" idifference.py --xml idifference_test.dfxml ../samples/difference_test_[01].xml if [ ! -x "$XMLLINT" ]; then echo "Error: xmllint not found. Can't check for if generated DFXML is valid XML. Install libxml2 (possibly xmlutils) to complete these unit tests." >&2 exit 1 fi xmllint --format idifference_test.dfxml >idifference_test_formatted.dfxml _check_counts() { #Check expected number of fileobjects appears test 4 == $(grep ' idifference_test_cat.dfxml xmllint --format idifference_test_cat.dfxml >idifference_test_cat_formatted.dfxml _check_counts idifference_test_cat_formatted.dfxml tcpflow/src/dfxml/python/imap.py0000644000175000017500000000405212263701334015666 0ustar dimadima#!/usr/bin/python """Usage: imap imagefile0.iso imagefile1.iso imagefile2.iso ... Produces a map of imagefile0.iso, using the other image files as "hints" for missing data. Only reports files that have been allocated; deleted files are reported only if they can be found allocated in another file. """ import fiwalk ################################################################ if __name__=="__main__": import sys from sys import stdout from optparse import OptionParser parser = OptionParser() parser.usage = '%prog [options] image.iso ' parser.add_option("-d","--debug",help="debug",action="store_true") (options,args) = parser.parse_args() if len(args)<1: parser.print_help() sys.exit(1) imagefile = open(args[0],"r") annotated_runs = [] if options.debug: print("Read %d file objects from %s" % (len(fileobjects),imagefile.name)) def cb(fi): if options.debug: print("Read "+str(fi)) fragment_num = 1 for run in fi.byte_runs(): annotated_runs.append((run.img_offset,run,fragment_num,fi)) fragment_num += 1 fiwalk.fiwalk_using_sax(imagefile=imagefile,callback=cb) next_sector = 0 for (ip,run,fragment_num,fi) in sorted(annotated_runs): extra = "" fragment = "" start_sector = run.img_offset/512 sector_count = int(run.bytes/512) partial = run.bytes % 512 if not fi.allocated(): print("***") if not fi.file_present(): # it's not here! continue if partial>0: sector_count += 1 extra = "(%3d bytes slack)" % (512-partial) if fi.fragments()>2: fragment = "fragment %d" % fragment_num if next_sector != start_sector: print " <-- %5d unallocated sectors @ sector %5d -->" % (start_sector-next_sector,next_sector) print("[ %6d -> %6d sectors %18s ] %s %s " % (start_sector,sector_count,extra,fi.filename(),fragment)) next_sector = start_sector + sector_count tcpflow/src/dfxml/python/corpus_sync.py0000644000175000017500000000354112263701334017311 0ustar dimadima#!/usr/bin/env python3.2 # # sync corpus based on DFXML files import dfxml, fiwalk from collections import defaultdict class CorpusDB: def __init__(self): self.all = [] self.md5db = defaultdict(list) # maps from self.pathdb = dict() def process_fi(self,fi): self.all.append(fi) self.md5db[fi.md5()].append(fi) self.pathdb[fi.filename()] = fi def ingest_dfxml(self,fname): fiwalk.fiwalk_using_sax(xmlfile=open(fname,'rb'),flags=fiwalk.ALLOC_ONLY,callback=self.process_fi) def __iter__(self): return self.all.__iter__() def __delitem__(self,fi): self.all.remove(fi) self.md5db[fi.md5()].remove(fi) del self.pathdb[fi.filename()] if __name__=="__main__": from optparse import OptionParser from copy import deepcopy parser = OptionParser() (options,args) = parser.parse_args() (fn1,fn2) = args[0:2] print("# Reading B - the master {}".format(fn1)) b = CorpusDB() b.ingest_dfxml(fn1) print("# Reading A - the current system {}".format(fn2)) a = CorpusDB() a.ingest_dfxml(fn2) print("# Files in A that should not be in B:") rmlist = [afi for afi in a if (afi.md5() not in b.md5db)] for afi in rmlist: print("rm {}".format(afi.filename())) del a[afi] fixups = [] for bfi in b: if bfi.filename() in a.pathdb and bfi.md5()==a.pathdb[bfi.filename()].md5(): continue if bfi.md5() not in a.md5db: print("get {}".format(bfi.filename())) continue afi = a.md5db[bfi.md5()][0] nfn = bfi.filename()+".new" print("ln {} {}".format(afi.filename(),nfn)) fixups.append((nfn,bfi.filename())) for (nfn,bfi_filename) in fixups: print("mv {} {}".format(nfn,bfi_filename)) tcpflow/src/dfxml/python/Makefile.am0000644000175000017500000000247112263701334016425 0ustar dimadimaEXTRA_DIST = demo_plot_times.py demo_readtimes.py demo_sizes.py \ dfxml.py fiwalk.py histogram.py\ iblkfind.py icarvingtruth.py idifference.py iextract.py igrep.py \ imap.py imicrosoft_redact.py iredact-config.txt iredact.py ireport.py\ dfxml_tool.py iverify.py iverify2.py sanitize_xml.py xml2body.py CLEANFILES=g1.xml g2.xml g3.xml g4.xml nps-2009-canon2-gen1.xml IMAGES=/corp/images/nps/nps-2009-canon2/ GEN6=$(IMAGES)/nps-2009-canon2-gen6.raw iredact-test: python iredact.py config.txt igt: python igroundtruth.py $(IMAGES)/*[1-5].raw $(IMAGES)/*6.raw test2: python iverify2.py -g foo $(RDIR)/gen4.raw ~/domex/src/NIST/canon2/gen4.xml map: python imap.py $(RDIR)/gen4.raw test: cp -f small.dmg small.iso python iredact.py -r config.txt small.iso cmp -b small.dmg small.iso test3: g1.xml g2.xml g3.xml g4.xml test4: python isectorfind.py $(GEN6) 47520 49217 50690 clean: /bin/rm -f $(CLEANFILES) *~ *.bak nps-2009-canon2-gen1.xml: $(IMAGES)/nps-2009-canon2-gen1.raw fiwalk -Xg1.xml -1m $(IMAGES)/nps-2009-canon2-gen1.raw fiwalk -Xg2.xml -1m $(IMAGES)/nps-2009-canon2-gen2.raw fiwalk -Xg3.xml -1m $(IMAGES)/nps-2009-canon2-gen3.raw fiwalk -Xg4.xml -1m $(IMAGES)/nps-2009-canon2-gen4.raw fiwalk -Xg5.xml -1m $(IMAGES)/nps-2009-canon2-gen5.raw fiwalk -Xg6.xml -1m $(IMAGES)/nps-2009-canon2-gen6.raw tcpflow/src/dfxml/python/demo_sizes.py0000644000175000017500000000144412263701334017103 0ustar dimadima#!/usr/bin/env python3.2 # # Demo program that shows how to calculate the average size of file objects in a DFXML file # import dfxml,math,sys import collections sums = collections.Counter() sum_of_squares= collections.Counter() count = collections.Counter() def func(fi): ext = fi.ext() count[ext] += 1 sums[ext] += fi.filesize() sum_of_squares[ext] = fi.filesize() ** 2 dfxml.read_dfxml(xmlfile=open(sys.argv[1],'rb'),callback=func) fmt = "{:8} {:8} {:8} {:8} {:8}" print(fmt.format("Ext","Count","Total","Average","StdDev")) for ext in sums.keys(): print(fmt.format(ext, count[ext], sums[ext], sums[ext]/count[ext], math.sqrt(sum_of_squares[ext]/count[ext] - (sums[ext]/count[ext])**2))) tcpflow/src/dfxml/python/nsrl_rds.py0000644000175000017500000000266212263701334016573 0ustar dimadima#!/usr/bin/env python # # Demonstrates how to communicate with NPS NSRL RDS # RDS_SERVER = "https://domex.nps.edu/www-noauth/nsrl_rds.cgi" import xmlrpclib if __name__=="__main__": print("Demonstration of NSRL RDS service at %s\n" % RDS_SERVER) print("") p = xmlrpclib.ServerProxy(RDS_SERVER) try: avail = p.available() except xmlrpclib.ProtocolError as e: print("Cannot access "+RDS_SERVER) print(e) raise RuntimeError print("Available RDS sets: %s " % avail) md5_val = "EB714443AA2FC1A3D16E39EB8007A0B2" # Build a search term search = {"db":avail[0], # pick the first search term "md5":md5_val } print("Here are the files with a md5 of "+md5_val) ret = p.search(search) fields = ret['fields'] for row in ret['result']: for(a,b) in zip(fields,row): print a,"=",b print "" print("Now we will do a query for multiple MD5 values. You can do this by specifying\n"+ "a value as an array.") searchm = {"db":avail[0], "md5":["EB714443AA2FC1A3D16E39EB8007A0B2", "9B3702B0E788C6D62996392FE3C9786A"]} print "sending:",searchm ret = p.search(searchm) print "got:",ret fields = ret['fields'] for row in ret['result']: for(a,b) in zip(fields,row): print a,"=",b print "" tcpflow/src/dfxml/python/dfxml_html.py0000644000175000017500000000405312263701334017077 0ustar dimadimahtml = False def header(): if html: print(""" """) def h1(title): global options if html: print("

%s

" % title) return print("\n\n%s\n" % title) def h2(title): global options if html: print("

%s

" % title) return print("\n%s\n" % title) def table(rows,styles=None,break_on_change=False): import sys global options def alldigits(x): if type(x)!=str and type(x)!=unicode: return False for ch in x: if ch.isdigit()==False: return False return True def fmt(x): if x==None: return "" if type(x)==int: return "%12d" % x if alldigits(x): return "%12d" % int(x) if type(x)==unicode: return x return unicode(x) if html: print("") for row in rows: print("") if not styles: styles = [""]*len(rows) for (col,style) in zip(row,styles): sys.stdout.write("" % (style,col)) print("") print("
%s
") return lastRowCol0 = None for row in rows: if row[0]!=lastRowCol0: sys.stdout.write("\n") lastRowCol0 = row[0] try: line = "\t".join([fmt(col) for col in row]) sys.stdout.write(line) sys.stdout.write("\n") except UnicodeEncodeError: # Fall back to manual join for col in row: for ch in fmt(col): try: sys.stdout.write(ch) except UnicodeEncodeError: sys.stdout.write('?'); sys.stdout.write("\t") print("(UNICODE ERROR)") tcpflow/src/dfxml/python/iblkfind.py0000644000175000017500000000221112263701334016515 0ustar dimadima#!/usr/bin/python """Usage: iblkfind imagefile.iso s1 [s2 s3 ...] ... Reports the files in which sectors s1, s2, s3... are located. """ import dfxml,sys if __name__=="__main__": from optparse import OptionParser parser = OptionParser() parser.usage = '%prog [options] imagefile-or-xmlfile s1 [s2 s3 s3 ...]' parser.add_option("--offset",help="values are byte offsets, not sectors",action="store_true") parser.add_option("--blocksize",help="specify sector blockszie",default=512) (options,args) = parser.parse_args() if len(args)<1: parser.print_help() sys.exit(1) fn = args[0] print(args) print("Processing %s" % fn) print("Searching for %s" % ", ".join(args[1:])) divisor = 1 if options.offset: divisor = options.blocksize sectors = set([int(s)/divisor for s in args[1:]]) def process(fi): for s in sectors: if fi.has_sector(s): print("%d\t%s" % (s,fi.filename())) if not fn.endswith(".xml"): print("iblkfind requires an XML file") exit(1) dfxml.read_dfxml(xmlfile=open(args[0],"rb"),callback=process) tcpflow/src/dfxml/python/dedup.py0000644000175000017500000000137112263701334016042 0ustar dimadima#!/usr/bin/python # # dedup import os class dedup: def __init__(self,options): self.seen = set() self.options = options def process(fi): if fi.md5() in self.seen: if self.verbose: print("rm {}".format(fi.filename())) if self.commit: #os.unlink(fi.filename()) continue if __name__=="__main__": from optparse import OptionParser from copy import deepcopy global options parser.add_option("--commit",action="store_true") parser.add_option("--verbose",action="store_true") parser = OptionParser() (options,args) = parser.parse_args() dobj = dedup(options) dfxml.read_dfxml(open(args[0],'rb'),callback=dobj.process) tcpflow/src/dfxml/python/igrep.py0000644000175000017500000000133212263701334016044 0ustar dimadima#!/usr/bin/python """Usage: igrep imagefile.iso string ... Reports the files in which files have the string. """ import fiwalk,dfxml if __name__=="__main__": import sys from optparse import OptionParser parser = OptionParser() parser.usage = '%prog [options] image.iso s1' parser.add_option("-d","--debug",help="debug",action="store_true") (options,args) = parser.parse_args() if len(args)!=2: parser.print_help() sys.exit(1) (imagefn,data) = args def process(fi): offset = fi.contents().find(data) if offset>0: print "%s (offset=%d)" % (fi.filename(),offset) fiwalk.fiwalk_using_sax(imagefile=open(imagefn),callback=process) tcpflow/src/dfxml/python/test_regxml.sh0000644000175000017500000000030612263701334017255 0ustar dimadima#!/bin/sh python2.7 demo_registry_timeline.py ../tests/m57-charlie-2009-11-20-charlie-ntuser.dat.regxml python3.2 demo_registry_timeline.py ../tests/m57-charlie-2009-11-20-charlie-ntuser.dat.regxml tcpflow/src/dfxml/python/demo_piecewise.py0000644000175000017500000000261312263701334017722 0ustar dimadima#!/usr/bin/env python3.2 # # Demo program that prints piecewise hashes and reports on co-occurance of hashes. # # Multimap from http://stackoverflow.com/questions/1731971/is-there-multimap-implementation-in-python import dfxml,math,sys,collections class SectorCorrelator: def __init__(self): self.hashdb = collections.defaultdict(list) # key is the MD5 code, value is a list of matches self.files = 0 self.sectors = 0 def process(self,fi): """ Process the objects as they are read from the XML file""" self.files += 1 print(fi.filename()) for br in fi.byte_runs(): self.sectors += 1 self.hashdb[br.hashdigest['md5']].append((fi.filename(),br.file_offset)) def print_report(self): print("Files processed: {}".format(self.files)) print("Sectors processed: {}".format(self.sectors)) print("") print("The following duplicates were found:") print("Hash Filename Offset in file") for (hash,ents) in self.hashdb.items(): if len(ents)>1: print("{} -- {} copies found".format(hash,len(ents))) for e in sorted(ents): print(" {} {:8,}".format(e[0],e[1])) print("") sc = SectorCorrelator() dfxml.read_dfxml(xmlfile=open(sys.argv[1],'rb'),callback=sc.process) sc.print_report() tcpflow/src/dfxml/python/test_hfsj.sh0000644000175000017500000000140212263701334016707 0ustar dimadima#!/bin/sh hdiutil create -size 10m -fs HFS+J -nospotlight -attach -volname image -ov -layout NONE \ -imagekey diskimage-class=CRawDiskImage image.dmg echo "This is file 1 - snarf" > /Volumes/image/file1.txt echo "This is file 2 - snarf" > /Volumes/image/file2.txt sync hdiutil detach /Volumes/image cp image.dmg image.gen0.dmg echo "look for file1 and file2:" strings -o image.dmg | grep snarf echo "mount the disk and overwrite the contents of file2" hdiutil attach image.dmg echo "New file 1 contents - snarf" | dd of=/Volumes/image/file1.txt echo "" echo "===file1.txt===" cat /Volumes/image/file1.txt echo "" echo "===file2.txt===" cat /Volumes/image/file2.txt echo "" hdiutil detach /Volumes/image cp image.dmg image.gen1.dmg strings -o image.dmg | grep snarf tcpflow/src/dfxml/python/xmirror.py0000644000175000017500000000423512263701334016445 0ustar dimadima# # Using two XML files make the current system look like the master # from filesdb import filesdb import dfxml import sys # # test program. Reads a database and dumps it. # if __name__=="__main__": from argparse import ArgumentParser parser = ArgumentParser(description='Make the local system look like the master') parser.add_argument('--commit',help='Actually do the job',action='store_true') parser.add_argument('--local',help='speciies an XML file that describes the local system (required)') parser.add_argument('masterfiles', help='XML files to process. Files may be prefixed with an [xml] path', nargs='+') args = parser.parse_args() if not args.local: parser.print_help() exit(1) masterdb = filesdb() for fn in args.masterfiles: masterdb.read_with_prefix(fn) print("Master stats:") masterdb.print_stats(sys.stdout) print("\n") print("Local mirror stats:") ldb = filesdb() ldb.read_with_prefix(args.local) # Create new directories if needed for newdir in ldb.new_dirs(masterdb): print("mkdir {}".format(newdir)) keep_files = [] mv_files = [] rm_files = [] def process_fi(fi): # If hash is same and name is the same, ignore: for nfi in masterdb.search(fi,hash=True,name=True): keep_files.append(fi.filename()) return # in the database # If hash is same and name is different, move it for nfi in masterdb.search(fi,hash=True): mv_files.append((fi.filename(),nfi.filename())) return # If name is same and hash is different, erase it for nfi in masterdb.search(fi,name=True): rm_files.append(fi.filename()) return # Otherwise, erase the hash rm_files.append(fi.filename()) return # Rename files that need to be renamed for fi in ldb: process_fi(fi) print("Files to keep: {:12,}".format(len(keep_files))) print("Files to rm: {:12,}".format(len(rm_files))) print("Files to mv: {:12,}".format(len(mv_files))) tcpflow/src/dfxml/python/fiwalk.py0000644000175000017500000001341212263701334016215 0ustar dimadima#!/usr/bin/env python ### DO NOT MODIFY THIS FILE ### ### DOWNLOAD NEW FILE FROM https://raw.github.com/simsong/dfxml/master/python/fiwalk.py # # fiwalk version 0.6.3 # # %%% BEGIN NO FILL """fiwalk module This is the part of dfxml that is dependent on fiwalk.py """ import dfxml from sys import stderr from subprocess import Popen,PIPE ALLOC_ONLY = 1 fiwalk_cached_installed_version = None def fiwalk_installed_version(fiwalk='fiwalk'): """Return the current version of fiwalk that is installed""" global fiwalk_cached_installed_version if fiwalk_cached_installed_version: return fiwalk_cached_installed_version from subprocess import Popen,PIPE import re for line in Popen([fiwalk,'-V'],stdout=PIPE).stdout.read().decode('utf-8').split("\n"): g = re.search("^FIWalk Version:\s+(.*)$",line) if g: fiwalk_cached_installed_version = g.group(1) return fiwalk_cached_installed_version g = re.search("^SleuthKit Version:\s+(.*)$",line) if g: fiwalk_cached_installed_version = g.group(1) return fiwalk_cached_installed_version return None class XMLDone(Exception): def __init__(self,value): self.value = value class version: def __init__(self): self.cdata = "" self.in_element = [] self.version = None def start_element(self,name,attrs): if(name=='volume'): # too far? raise XMLDone(None) self.in_element += [name] self.cdata = "" def end_element(self,name): if ("fiwalk" in self.in_element) and ("creator" in self.in_element) and ("version" in self.in_element): raise XMLDone(self.cdata) if ("fiwalk" in self.in_element) and ("fiwalk_version" in self.in_element): raise XMLDone(self.cdata) if ("version" in self.in_element) and ("dfxml" in self.in_element) and ("creator" in self.in_element): raise XMLDone(self.cdata) self.in_element.pop() self.cdata = "" def char_data(self,data): self.cdata += data def get_version(self,fn): import xml.parsers.expat p = xml.parsers.expat.ParserCreate() p.StartElementHandler = self.start_element p.EndElementHandler = self.end_element p.CharacterDataHandler = self.char_data try: p.ParseFile(open(fn,'rb')) except XMLDone as e: return e.value except xml.parsers.expat.ExpatError: return None # XML error def fiwalk_xml_version(filename=None): """Returns the fiwalk version that was used to create an XML file. Uses the "quick and dirty" approach to getting to getting out the XML version.""" p = version() return p.get_version(filename) ################################################################ def E01_glob(fn): import os.path """If the filename ends .E01, then glob it. Currently only handles E01 through EZZ""" ret = [fn] if fn.endswith(".E01") and os.path.exists(fn): fmt = fn.replace(".E01",".E%02d") for i in range(2,100): f2 = fmt % i if os.path.exists(f2): ret.append(f2) else: return ret # Got through E99, now do EAA through EZZ fmt = fn.replace(".E01",".E%c%c") for i in range(0,26): for j in range(0,26): f2 = fmt % (i+ord('A'),j+ord('A')) if os.path.exists(f2): ret.append(f2) else: return ret return ret # don't do F01 through F99, etc. return ret def fiwalk_xml_stream(imagefile=None,flags=0,fiwalk="fiwalk",fiwalk_args=""): """ Returns an fiwalk XML stream given a disk image by running fiwalk.""" if flags & ALLOC_ONLY: fiwalk_args += "-O" from subprocess import call,Popen,PIPE # Make sure we have a valid fiwalk try: res = Popen([fiwalk,'-V'],stdout=PIPE).communicate()[0] except OSError: raise RuntimeError("Cannot execute fiwalk executable: "+fiwalk) cmd = [fiwalk,'-x'] if fiwalk_args: cmd += [fiwalk_args] p = Popen(cmd + E01_glob(imagefile.name),stdout=PIPE) return p.stdout def fiwalk_using_sax(imagefile=None,xmlfile=None,fiwalk="fiwalk",flags=0,callback=None,fiwalk_args=""): """Processes an image using expat, calling a callback for every file object encountered. If xmlfile is provided, use that as the xmlfile, otherwise runs fiwalk.""" import dfxml if xmlfile==None: xmlfile = fiwalk_xml_stream(imagefile=imagefile,flags=flags,fiwalk=fiwalk,fiwalk_args=fiwalk_args) r = dfxml.fileobject_reader(flags=flags) r.imagefile = imagefile r.process_xml_stream(xmlfile,callback) def fileobjects_using_sax(imagefile=None,xmlfile=None,fiwalk="fiwalk",flags=0): ret = [] fiwalk_using_sax(imagefile=imagefile,xmlfile=xmlfile,fiwalk=fiwalk,flags=flags, callback = lambda fi:ret.append(fi)) return ret def fileobjects_using_dom(imagefile=None,xmlfile=None,fiwalk="fiwalk",flags=0,callback=None): """Processes an image using expat, calling a callback for every file object encountered. If xmlfile is provided, use that as the xmlfile, otherwise runs fiwalk.""" import dfxml if xmlfile==None: xmlfile = fiwalk_xml_stream(imagefile=imagefile,flags=flags,fiwalk=fiwalk) return dfxml.fileobjects_dom(xmlfile=xmlfile,imagefile=imagefile,flags=flags) ctr = 0 def cb_count(fn): global ctr ctr += 1 if __name__=="__main__": import sys for fn in sys.argv[1:]: print("{} contains fiwalk version {}".format(fn,fiwalk_xml_version(fn))) # Count the number of files fiwalk_using_sax(xmlfile=open(fn,'rb'),callback=cb_count) print("Files: {}".format(ctr)) tcpflow/src/dfxml/python/test_mac_timelines.sh0000755000175000017500000000176212263701334020602 0ustar dimadima#!/bin/bash . _pick_pythons.sh #Halt on error set -e #Display all executed commands set -x "$PYTHON2" demo_mac_timeline.py ../samples/simple.xml >demo_mac_timeline_simple_p2.txt test 12 == $(cat demo_mac_timeline_simple_p2.txt | wc -l) "$PYTHON3" demo_mac_timeline.py ../samples/simple.xml >demo_mac_timeline_simple_p3.txt test 12 == $(cat demo_mac_timeline_simple_p3.txt | wc -l) "$PYTHON2" demo_mac_timeline_iter.py ../samples/simple.xml >demo_mac_timeline_iter_simple_p2.txt test 12 == $(cat demo_mac_timeline_iter_simple_p2.txt | wc -l) "$PYTHON3" demo_mac_timeline_iter.py ../samples/simple.xml >demo_mac_timeline_iter_simple_p3.txt test 12 == $(cat demo_mac_timeline_iter_simple_p3.txt | wc -l) "$PYTHON3" demo_mac_timeline.py ../samples/difference_test_1.xml >demo_mac_timeline_dt1.txt test 9 == $(cat demo_mac_timeline_dt1.txt | wc -l) "$PYTHON3" demo_mac_timeline_iter.py ../samples/difference_test_1.xml >demo_mac_timeline_iter_dt1.txt test 9 == $(cat demo_mac_timeline_iter_dt1.txt | wc -l) tcpflow/src/dfxml/python/test_redact.sh0000644000175000017500000000207212263701334017223 0ustar dimadima#!/bin/sh /bin/rm -f testdisk.dmg redact.cfg hdiutil create -size 1m -fs MS-DOS -nospotlight -attach -volname testdisk testdisk.dmg echo "This is the zero file. FILE0001." > /Volumes/TESTDISK/file0.txt echo "This is the first file. FILE0001." > /Volumes/TESTDISK/file1.txt echo "This is the second file. FILE0002." > /Volumes/TESTDISK/file2.txt echo "This is the third file. FILE0003." > /Volumes/TESTDISK/file3.txt echo "This is the fourth file. FILE0004." > /Volumes/TESTDISK/file4.txt echo "This is the fifth file. FILE0005." > /Volumes/TESTDISK/file5.txt echo "This is the dixth file. FILE0006." > /Volumes/TESTDISK/file6.txt hdiutil detach /Volumes/TESTDISK cat > redact.cfg <&2 exit 1 fi fi fi tcpflow/src/dfxml/python/dfxml_tool.py0000644000175000017500000003162212263701334017112 0ustar dimadima#!/usr/bin/python # # generate MD5s for a directory in Digital Forensics XML Output # Uses dublin core. # Find out more at http://www.dublincore.org/documents/dc-xml-guidelines/ # http://dublincore.org/documents/dc-citation-guidelines/ # http://jedmodes.sourceforge.net/doc/DC-Metadata/dcmi-terms-for-jedmodes.html # http://www.ukoln.ac.uk/metadata/dcmi/mixing-matching-faq/ __version__ = '1.0.0' import sys import os.path import hashlib from xml.sax.saxutils import escape xmloutputversion = '0.3' dfxml_ns = {'xmlns':'http://afflib.org/fiwalk/', 'xmlns:xsi':'http://www.w3.org/2001/XMLSchema-instance', 'xmlns:dc':'http://purl.org/dc/elements/1.1/'} class xml: def __init__(self): self.stack = [] def set_outfilename(self,fn): self.outfilename = fn def open(self,f): if type(f)==file: self.f = f if type(f)==str or type(f)==unicode: self.f = open(f,'w') self.write("\n") def dublin_core(self,dc_record): self.push('metadata',dfxml_ns,attrib_delim='\n ') for (n,v) in dc_record.iteritems(): if v!=None: self.xmlout(n,v) self.pop('metadata') self.write('\n') def provenance(self): global args if args.allprovenance or \ args.commandline or \ args.pythonversion: self.push('creator') self.xmlout('program', os.path.basename(sys.argv[0])) self.xmlout('version', __version__) self.push('execution_environment') if args.allprovenance or args.commandline: self.xmlout('command_line', ' '.join(sys.argv)) if args.allprovenance or args.pythonversion: self.xmlout('python_version', sys.version) self.pop('execution_environment') self.pop('creator') self.f.write('\n') def push(self,tag,attribs={},attrib_delim=' '): """Enter an XML block, with optional attributes on the tag""" self.tagout(tag,attribs=attribs,attrib_delim=attrib_delim,newline=True) self.stack.append(tag) def pop(self,v=None): """Leave an XML block""" if v: assert v==self.stack[-1] self.tagout("/"+self.stack.pop(),newline=True) def tagout(self,tag,attribs={},attrib_delim=' ',newline=None): """Outputs a plain XML tag and optional attributes""" self.f.write("<%s" % tag) if attribs: self.f.write(" ") count = len(attribs) for (n,v) in attribs.iteritems(): self.f.write("%s='%s'" % (n,escape(v))) count -= 1 if count>0: self.f.write(attrib_delim) self.f.write(">") if newline: self.f.write("\n") def xmlout(self,tag,value,attribs={}): """Output an XML tag and its value""" self.tagout(tag,attribs,newline=False) self.write(escape(str(value))) self.write("\n" % tag) def write(self,s): self.f.write(s) def xmlout_times(fn,x,fistat): global args for (time_tag, time_field) in [ ("mtime", "st_mtime"), ("atime", "st_atime"), ("ctime", "st_ctime"), ("crtime", "st_birthtime") ]: if time_field in dir(fistat): attrs_dict = dict() time_data = getattr(fistat,time_field) #Format timestamp data if args.iso_8601: import dfxml text_out = str(dfxml.dftime(time_data)) else: attrs_dict["format"] = "time_t" text_out = str(time_data) x.xmlout(time_tag, text_out, attrs_dict) def emit_directory(fn,x,partno=None): x.push("fileobject") if not args.nofilenames: if args.stripprefix and fn.startswith(args.stripprefix): x.xmlout("filename",fn[ len(args.stripprefix) : ]) elif args.stripleaddirs and args.stripleaddirs > 0: x.xmlout("filename","/".join(fn.split("/")[args.stripleaddirs:])) else: x.xmlout("filename",fn) if not args.nometadata: fistat = os.stat(fn) if partno: x.xmlout("partition",partno) x.xmlout("inode",fistat.st_ino) x.xmlout("filesize",fistat.st_size) xmlout_times(fn,x,fistat) x.xmlout("name_type", "d") if args.addfixml: x.write(args.addxml) x.pop("fileobject") x.write("\n") def hash_file(fn,x,partno=None): import hashlib try: f = open(fn) except IOError,e: sys.stderr.write("%s: %s\n" % (fn,str(e))) return x.push("fileobject") if not args.nofilenames: if args.stripprefix and fn.startswith(args.stripprefix): x.xmlout("filename",fn[ len(args.stripprefix) : ]) elif args.stripleaddirs and args.stripleaddirs > 0: x.xmlout("filename","/".join(fn.split("/")[args.stripleaddirs:])) else: x.xmlout("filename",fn) if not args.nometadata: fistat = os.stat(fn) if partno: x.xmlout("partition",partno) x.xmlout("inode",fistat.st_ino) x.xmlout("filesize",fistat.st_size) xmlout_times(fn,x,fistat) #Distinguish regular files from directories, if directories are requested if args.includedirs: x.xmlout("name_type", "r") if args.addfixml: x.write(args.addxml) if args.md5: md5_all = hashlib.md5() if args.sha1: sha1_all = hashlib.sha1() if args.sha256: sha256_all = hashlib.sha256() chunk_size = 65536 # default chunk size if args.piecewise: chunk_size = args.piecewise if args.piecewise: x.push("byte_runs") offset = 0 read_error = False while True: buf = "" try: buf = f.read(chunk_size) except: warning = "Warning: read() failed. Cannot produce hash." read_error = True x.write("\n") sys.stderr.write("%s File: %r\n" % (warning, fn)) buf = "" if buf=="": break if args.md5: md5_all.update(buf) if args.sha1: sha1_all.update(buf) if args.sha256: sha256_all.update(buf) if args.piecewise: x.write("" % (offset,len(buf))) if args.md5: md5_part = hashlib.md5() md5_part.update(buf) x.write("%s" % md5_part.hexdigest()) if args.sha1: sha1_part = hashlib.sha1() sha1_part.update(buf) x.write("%s" % sha1_part.hexdigest()) if args.sha256: sha256_part = hashlib.sha256() sha256_part.update(buf) x.write("%s" % sha256_part.hexdigest()) x.write("\n") offset += len(buf) if args.piecewise: x.pop("byte_runs") if not read_error: if args.md5: x.write("%s\n" % (md5_all.hexdigest())) if args.sha1: x.write("%s\n" % (sha1_all.hexdigest())) if args.sha256: x.write("%s\n" % (sha256_all.hexdigest())) x.pop("fileobject") x.write("\n") def extract(fn): out = sys.stdout cdata = None def start_element(name,attr): global cdata if name=='hashdigest': try: kind = attr['type'].upper() except KeyError: kind = 'MD5' if ((kind=='MD5' and args.md5 ) or (kind=='SHA1' and args.sha1) or (kind=='SHA256' and args.sha256)): cdata = "" else: cdata = None def char_data(data): global cdata if cdata!=None: cdata += data def end_element(name): global cdata if cdata!=None: out.write(cdata) out.write("\n") cdata = None import xml.parsers.expat p = xml.parsers.expat.ParserCreate() p.StartElementHandler = start_element p.EndElementHandler = end_element p.CharacterDataHandler = char_data p.ParseFile(open(fn)) if(__name__=='__main__'): from argparse import ArgumentParser global args parser = ArgumentParser() parser.usage =\ """ prog [args] file1 [file2...] --- hash files and produce DFXML [args] dir1 [dir2...] --- hash dirs and produce DFXML You can also extract a set of hashes to stdout with: [--md5 | --sha1 | --sha256] --extract=filename.xml Note: MD5 output is assumed unless another hash algorithm is specified. """ parser.add_argument('--piecewise',help='Specifies size of piecewise hashes',default=0,type=int) parser.add_argument('--addfixml',help='Specifies XML to add to each file object (for labeling)') parser.add_argument('--sha1',help='Generate sha1 hashes',action='store_true') parser.add_argument('--md5',help='Generate MD5 hashes',action='store_true') parser.add_argument('--sha256',help='Generate sha256 hashes',action='store_true') parser.add_argument('--output',help='Specify output filename (default stdout)') parser.add_argument('--extract',help='Specify a DFXML to extract a hash set from') parser.add_argument('--iso-8601',help='Format timestamps as ISO-8601 in metadata',action='store_true') parser.add_argument('--nometadata',help='Do not include file metadata (times & size) in XML',action='store_true') parser.add_argument('--nofilenames',help='Do not include filenames in XML',action='store_true') parser.add_argument('--stripprefix',help='Remove matching prefix string from filenames (e.g. "/mnt/diskname" would reduce "/mnt/diskname/foo" to "/foo", and would not affect "/run/mnt/diskname/foo")') parser.add_argument('--stripleaddirs',help='Remove N leading directories from filenames (e.g. 1 would reduce "/mnt/diskname/foo" to "mnt/diskname/foo", 2 would reduce the same to "diskname/foo")',default=0,type=int) parser.add_argument('--includedirs',help='Include directories alongside files in file system walk output',action='store_true') provenance_group = parser.add_argument_group('provenance', 'Options to record execution environment details in the output.') provenance_group.add_argument('--allprovenance',help='Include all provenance information requestable in this option group',action='store_true') provenance_group.add_argument('--commandline', help='Record command line in output',action='store_true') provenance_group.add_argument('--pythonversion', help='Record Python version in output',action='store_true') parser.add_argument('--title',help='HASHSET Title') parser.add_argument('--description',help='HASHSET Description') parser.add_argument('--publisher',help='HASHSET Publisher') parser.add_argument('--identifier',help='HASHSET Identifier') parser.add_argument('--creator',help='HASHSET Author or Creator') parser.add_argument('--accessRights',help='HASHSET Access Rights') parser.add_argument('--dateSubmitted',help='HASHSET Submission Date') parser.add_argument('--abstract',help='HASHSET Abstract') parser.add_argument('--classification',help='HASHSET Classification') parser.add_argument('--contact',help='HASHSET Contact if found') parser.add_argument('targets',help='What to parse',nargs='+') args = parser.parse_args() if args.extract: extract(args.extract) exit(0) x = xml() if args.output: x.open(open(args.output)) else: x.open(sys.stdout) # Start the DFXML x.push("dfxml",{'xmloutputversion':xmloutputversion}) x.dublin_core({'dc:type':'Hash Set', 'dc:title':args.title, 'dc:description':args.description, 'dc:publisher':args.publisher, 'dc:identifier':args.identifier, 'dc:creator':args.creator, 'dc:accessRights':args.accessRights, 'dc:dateSubmitted':args.dateSubmitted, 'dc:abstract':args.abstract, 'classification':args.classification, 'contactIfFound':args.contact } ) x.provenance() # Generate the hashes for (fn_no, fn) in enumerate(args.targets): if os.path.isdir(fn): for (dirpath,dirnames,filenames) in os.walk(fn): if args.includedirs: for dn in dirnames: emit_directory(os.path.join(dirpath,dn),x, fn_no+1) for fn in filenames: hash_file(os.path.join(dirpath,fn),x, fn_no+1) else: hash_file(fn,x) x.pop("dfxml") tcpflow/src/dfxml/python/xdiff.py0000644000175000017500000000132712263701334016042 0ustar dimadima# # Report the difference between two dfxml files # from filesdb import filesdb import dfxml import sys # # test program. Reads a database and dumps it. # if __name__=="__main__": from argparse import ArgumentParser parser = ArgumentParser(description='Test the files database with one or more DFXML files') parser.add_argument('xmlfiles',help='XML files to process',nargs='+') args = parser.parse_args() db0 = None for fn in args.xmlfiles: db1 = filesdb() db1.fname = fn db1.read(fn) print("{} stats:".format(fn)) db1.print_stats(sys.stdout) if db0: print("") print("Difference from {}".format(db0.fname)) db0 = db1 tcpflow/src/dfxml/python/deidentify_xml.py0000644000175000017500000000675312263701334017756 0ustar dimadima#!/usr/bin/python # # deidentify_xml.py: # Given XML for a disk, remove information that might be personally identifying from filenames. # remember the mapping so that directory names don't get changed. # # 2012-10-27 slg - updated to Python3 private_dirs = ["home/","usr/home","Users"] ok_top_paths_win = ["program files/","System","Windows"] ok_top_paths_mac = ["bin/","usr","etc","private","applications","developer",'bin','sbin','lib','dev'] ok_top_paths = ok_top_paths_win + ok_top_paths_mac + ['$orphanfiles'] acceptable_extensions = ["exe","dll","sys","com","hlp"] import os.path, os, sys partdir = {} def sanitize_part(part): """Sanitize a part of a pathname in a consistent manner""" if part not in partdir: partdir[part] = "P%07d" % (len(partdir)+1) return partdir[part] def sanitize_filename(fname): """Given a filename, sanitize each part and return it.""" ofn = fname jfn = fname if jfn[0]=='/': jfn=jfn[1:] pathok = False for p in ok_top_paths: if jfn.lower().startswith(p): pathok = True if not pathok: # if the path is not okay, replace all of the parts # and the name up to the .ext parts = fname.split("/") parts[:-1] = [sanitize_part(s) for s in parts[:-1]] (root,ext) = os.path.splitext(parts[-1]) if ext not in acceptable_extensions: parts[-1] = sanitize_part(root) + ext fname = "/".join(parts) if ofn[0]=='/' and fname[0]!='/': fname = "/" + fname return fname class xml_sanitizer: """Read and write the XML, but sanitize the filename elementss.""" def __init__(self,out): self.out = out self.cdata = "" def _start_element(self, name, attrs): """ Handles the start of an element for the XPAT scanner""" s = ['<',name] if attrs: for (a,v) in attrs.items(): if '"' not in v: s += [' ',a,'="',v,'"'] else: s += [" ",a,"='",v,"'"] s += ['>'] self.out.write("".join(s)) self.cdata = "" # new element def _end_element(self, name): """Handles the end of an element for the XPAT scanner""" if name=="filename": self.cdata = sanitize_filename(self.cdata) if self.cdata=="\n": self.cdata="" self.out.write("".join([self.cdata,''])) self.cdata = "" def _char_data(self, data): """Handles XML data""" self.cdata += data def process_xml_stream(self,xml_stream): "Run the reader on a given XML input stream" import xml.parsers.expat p = xml.parsers.expat.ParserCreate() p.StartElementHandler = self._start_element p.EndElementHandler = self._end_element p.CharacterDataHandler = self._char_data p.ParseFile(xml_stream) if __name__=="__main__": from optparse import OptionParser global options parser = OptionParser() parser.add_option("-t","--test",help='Test a specific pathanme to sanitize') (options,args) = parser.parse_args() if options.test: if os.path.isdir(options.test): for (dirpath,dirnames,filenames) in os.walk(options.test): for filename in filenames: fn = dirpath+"/"+filename print("%s\n %s" % (fn,sanitize_filename(fn))) x = xml_sanitizer(sys.stdout) x.process_xml_stream(open(args[0],'rb')) tcpflow/src/dfxml/python/xml2body.py0000755000175000017500000000750412263701334016510 0ustar dimadima#!/usr/bin/env python """xml2body.py Generate a Sluethkit 'body' file from fiwalk XML files. Dave Dittrich """ import sys,time import fiwalk,dfxml,idifference # We are re-using code from idifference.py and over-riding # the process_fi method in the DiskState class. from idifference import DiskState def dprint(x): global options if options.debug: print(x) import stat def is_suid(mode): return(mode & stat.S_ISUID == stat.S_ISUID) def is_sgid(mode): return(mode & stat.S_ISGID == stat.S_ISGID) def is_svtx(mode): return(mode & stat.S_ISVTX == stat.S_ISVTX) def is_read(mode): return(mode & stat.S_IREAD == stat.S_IREAD) def is_write(mode): return(mode & stat.S_IWRITE == stat.S_IWRITE) def is_exec(mode): return(mode & stat.S_IEXEC == stat.S_IEXEC) def is_rwxu(mode): return(mode & stat.S_IRWXU == stat.S_IRWXU) def is_rusr(mode): return(mode & stat.S_IRUSR == stat.S_IRUSR) def is_wusr(mode): return(mode & stat.S_IWUSR == stat.S_IWUSR) def is_xusr(mode): return(mode & stat.S_IXUSR == stat.S_IXUSR) def is_rwxg(mode): return(mode & stat.S_IRWXG == stat.S_IRWXG) def is_rgrp(mode): return(mode & stat.S_IRGRP == stat.S_IRGRP) def is_wgrp(mode): return(mode & stat.S_IWGRP == stat.S_IWGRP) def is_xgrp(mode): return(mode & stat.S_IXGRP == stat.S_IXGRP) def is_rwxo(mode): return(mode & stat.S_IRWXO == stat.S_IRWXO) def is_roth(mode): return(mode & stat.S_IROTH == stat.S_IROTH) def is_woth(mode): return(mode & stat.S_IWOTH == stat.S_IWOTH) def is_xoth(mode): return(mode & stat.S_IXOTH == stat.S_IXOTH) # Rather than convert every single permission mode, cache # each one after generating and re-use it next time. _modecache = dict() def make_perms(mode): omode = "%o" % mode try: return _modecache[omode] except: pass buf = list("---------") # user perms if is_rusr(mode): buf[0] = 'r' if is_wusr(mode): buf[1] = 'w' if is_suid(mode): if is_xusr(mode): buf[2] = 's' else: buf[2] = 'S' elif is_xusr(mode): buf[2] = 'x' # group perms if is_rgrp(mode): buf[3] = 'r' if is_wgrp(mode): buf[4] = 'w' # set gid if is_sgid(mode): if is_xgrp(mode): buf[5] = 's' else: buf[5] = 'S' elif is_xgrp(mode): buf[5] = 'x' # other perms if is_roth(mode): buf[6] = 'r' if is_woth(mode): buf[7] = 'w' # sticky bit if is_svtx(mode): if is_xoth(mode): buf[8] = 't' else: buf[8] = 'T' elif is_xoth(mode): buf[8] = 'x' perms= "".join([i for i in buf]) _modecache[omode] = perms return perms def process_fi(self,fi): global options dprint("processing %s" % str(fi)) # Is this a directory, or a file of some type? if fi.meta_type() == 2: itype = "d" else: itype = "-" # Concatenate inode meta_type and permissions in human-readable form. perms = itype + make_perms(fi.mode()) print "%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s" % ( fi.md5(), fi.filename(), fi.inode(), perms, fi.uid(), fi.gid(), fi.filesize(), fi.atime(), fi.mtime(), fi.ctime(), fi.crtime()) DiskState.process_fi = process_fi if __name__=="__main__": from optparse import OptionParser from copy import deepcopy global options parser = OptionParser() parser.usage = '%prog [options] file1 file2 [file3...] (files can be xml or image files)' parser.add_option("-d","--debug",help="debug",action='store_true') (options,args) = parser.parse_args() if len(args)<1: parser.print_help() sys.exit(1) s = DiskState() for infile in args: dprint(">>> Reading %s" % infile) s.process(infile) tcpflow/src/dfxml/python/Makefile0000644000175000017500000000100612263701334016022 0ustar dimadimacheck: \ check-dfxml \ check-cat_fileobjects \ check-dfxml_tool \ check-idifference-dfxml \ check-mac_timelines @echo "Tests passed!" check-dfxml: ./test_dfxml.sh check-cat_fileobjects: ./test_cat_fileobjects.sh check-dfxml_tool: ./test_dfxml_tool.sh check-idifference-dfxml: ./test_idifference_to_dfxml.sh check-mac_timelines: ./test_mac_timelines.sh clean: rm -f dfxml_tool_*xml rm -f cat_test_*.dfxml rm -f idifference_test.txt rm -f idifference_test*.dfxml rm -f demo_mac_timeline*.txt tcpflow/src/dfxml/python/demo_plot_times.py0000644000175000017500000000176212263701334020130 0ustar dimadima#!/usr/bin/python import fiwalk import time if __name__=="__main__": import sys from optparse import OptionParser from sys import stdout parser = OptionParser() parser.usage = '%prog [options] (xmlfile or imagefile)' (options,args) = parser.parse_args() if not args: parser.print_usage() exit(1) sizes = [] dates = {} def callback(fi): sizes.append(fi.filesize()) for (tag,val) in (fi.times().iteritems()): date = val.datetime() dates[date] = dates.get(date,0)+1 fn = args[0] if fn.endswith(".xml"): fiwalk.fiwalk_using_sax(xmlfile=open(fn),callback=callback) else: fiwalk.fiwalk_using_sax(imagefile=open(fn),callback=callback) print("Here is the dates array:") for d in sorted(dates.keys()): print("{} {}".format(d,dates[d])) tcpflow/src/dfxml/python/iextract.py0000755000175000017500000000237712263701334016576 0ustar dimadima#!/usr/bin/env python import dfxml,fiwalk import zipfile,sys,os,os.path,datetime if __name__=="__main__": from optparse import OptionParser parser = OptionParser() parser.add_option("-x", "--xml", dest="xmlfilename", help="Already-created DFXML file for imagefile") parser.usage = '%prog [options] imagefile zipfile [x1 x2 x3]\nFind files x1, x2, x3 ... in imagefile and write to zipfile' (options,args) = parser.parse_args() if len(args)<3: parser.print_help() exit(1) imagefilename = args[0] xmlfilename = options.xmlfilename xmlfh = None if xmlfilename != None: xmlfh = open(xmlfilename, "r") zipfilename = args[1] targets = set([fn.lower() for fn in args[2:]]) zfile = zipfile.ZipFile(zipfilename,"w",allowZip64=True) def proc(fi): basename = os.path.basename(fi.filename()).lower() if basename in targets: info = zipfile.ZipInfo(fi.filename(),datetime.datetime.fromtimestamp(fi.mtime().timestamp()).utctimetuple()) info.internal_attr = 1 info.external_attr = 2175008768 # specifies mode 0644 zfile.writestr(info,fi.contents()) fiwalk.fiwalk_using_sax(imagefile=open(imagefilename), xmlfile=xmlfh, callback=proc) tcpflow/src/dfxml/python/histogram.py0000644000175000017500000001703012263701334016735 0ustar dimadimaclass histogram(dict): """ Manage a histogram, which is really a dictionary where the keys are the items being counted and the values are the counts.""" def __init__(self): pass def add(self,a,count=1): self[a] = self.get(a,0) + count def average(self): """ Return the average number of counts """ return sum(self.values()) / self.items() def add_array(self,ary): """ Adds each element of array [ary] to the histogram.""" for a in ary: self.add(a) def add_array_unique(self,ary): """ Adds each element of array [ary] to the histogram only once!.""" for a in set(ary): self.add(a) def sortedValues(self): """Returns a sorted list of tuples where the element is the value and the second element is the count.""" return sorted(self.iteritems()) def unique_names(self): return self.keys() def unique_count(self): return len(self.keys()) def total_names(self): return self.items() def total_count(self): return sum(self.values()) def max_count(self): return max(self.values()) def names_for_value(self,value): return [k for k in self.keys() if self[k]==value] def topn(self,n=-1): """Returns a sorted list of [(count1,[item,item]),(count2,[item,item])]""" r = sorted(set(self.values())) r.reverse() ret = [] for count in r[0:n]: ret.append( (count,sorted(self.names_for_value(count)))) return ret def print_topn(self,topn=topn,func=False,title=""): print(title) print(" Rank Count Value(s):") print(" ============================") rank = 1 total_count = 0 for (count,vals) in topn: for val in vals: fout = "" if func: fout = func(val) if val==vals[0]: srank = "%5d" % rank else: srank = "%5s" % "" print(" %s %7d %s %s" % (srank,count,val,fout)) total_count += count rank += 1 print("") total = sum(self.values()) print("Total items printed: %d" % total) if total-total_count>0: print("Values not printed: %d " % (total-total_count)) def print_top(self,n=-1,func=False,title=""): if(n!=-1): print("top %d " % (n)) topn = self.topn(n) self.print_topn(func=func,title=title,topn=topn) def print_info(self,n=-1): print("total count: ",self.total_count()) print("unique count: ",self.unique_count()) print("") self.print_top(n=n) def filter_more(self,n): """ Return the names that have counts equal to or greater than n.""" return [k for k in self.keys() if self[k]>=n] def make_graph(self, figureTitle='Bargraph', binTitle='', countTitle='Count', saveas='Barchart', reverse=False,horizontal=False, sortValues=False, backend=None): """ Creates a barchart from the histogram and saves it to disk. Default sort is by Key, set sortKey=False to sort by value. Other options for figureTitle, x-axis title, y-axis title, and the name to save the figure as can be passed in. """ import matplotlib if backend and matplotlib.get_backend()!=backend: matplotlib.use(backend) import matplotlib.pyplot as plot import numpy as np # we need to define a new figure each time the function is # called, or every 'graph' will simply be drawn on top of the # previous one. # make the figure tall and skinny #w,h = plot.figaspect(1.75) #fig = plot.figure(figsize=(w,h)) fig = plot.figure() # arguments to add_axes are in fractions of figure width and # height. these values actually take away from the area # available to the graph. so for example your 1st and 3rd # values must sum up to <= 1.0 or your graph will run off the # edge of your image. ax = fig.add_axes([0.25, 0.1, 0.7, 0.8]) # sort according to the order requested by the user. vals = self.sortedValues() if sortValues: def f2(a,b): if a[1] < b[1]: return -1 return +1 vals.sort(f2) if reverse: vals.reverse() names = [x[0] for x in vals] counts = [x[1] for x in vals] numbins = len(vals) barHeight = 0.6 #set the location and labels of the x-axis ticks (our #histogram key values), add title and axis labels. # it is utterly ridiculous that we have to set this, but if we # do not then the edge bar gets cut off from the graph. plot.ylim(ymax=(barHeight*1.5*numbins)+(1.5*barHeight/2)) plot.title(figureTitle) # Make the font small and the xticks vertical for label in ax.yaxis.get_ticklabels(): # label is a Text instance label.set_fontsize(6) for label in ax.xaxis.get_ticklabels(): label.set_fontsize(7) # set the font sizes for the axis labels ax.xaxis.get_label().set_fontsize(8.5) ax.yaxis.get_label().set_fontsize(8.5) # create and save the graph if horizontal: plot.ylabel(binTitle) plot.xlabel(countTitle) plot.yticks(np.arange(numbins)+1.5*barHeight/2, names) rects = plot.barh(bottom=np.arange(numbins)+1.5*barHeight/2, width=counts, height=barHeight,align='center') else: plot.xlabel(binTitle) plot.ylabel(countTitle) plot.xticks(np.arange(numbins)+1.5*barHeight/2, names) rects = plot.bar(left=np.arange(numbins)+1.5*barHeight/2, height=counts, width=barHeight, align='center') # add text labels at the end of each bar with the numeric # total for that bar. #for rect in rects: # length = rect.get_width() # plot.text(1.05*length, rect.get_y()+rect.get_height()/2., # '%d'%int(length), size='9') plot.savefig(saveas + '.pdf', format='pdf') if(__name__=='__main__'): print("Demonstrate a simple histogram with print and graph output") j = histogram() j.add('apples') j.add('apples') j.add('apples') j.add('apples') j.add('apples') j.add('apples') j.add('kiwi',3) j.add('oranges') j.add('oranges') j.add('oranges') j.add('oranges') j.add('banana') j.add('cacao') j.add('dragonfruit') j.add('dragonfruit') j.add('dragonfruit') j.add('dragonfruit') j.add('dragonfruit') j.add('dragonfruit') j.add('dragonfruit') j.add('dragonfruit') j.add('dragonfruit') j.add('dragonfruit') j.add('dragonfruit') j.add('dragonfruit') j.add('dragonfruit') # optionally, make graphs. if so, make sure to import graphy.py j.make_graph(saveas='histogram_demo1',reverse=False,sortValues=True,horizontal=True) j.print_info(1000) print("Histogram test routine...") from datetime import date import time j = histogram() j.add(date.fromtimestamp(time.time()),4) j.add(date(2005,3,1)) j.print_info(100) j.make_graph(saveas='histogram_demo2') tcpflow/src/dfxml/python/cat_fileobjects.py0000644000175000017500000000334312263701334020062 0ustar dimadima#!/usr/bin/env python3 #Make a new DFXML file of all fileobjects in an input DFXML file. __version__ = "0.2.0" import sys import xml.etree.ElementTree as ET import dfxml import logging if sys.version < "3": logging.error("Due to Unicode issues with Python 2's ElementTree, Python 3 and up is required.\n") exit(1) def main(): print("""\ %s %s %s %s \ """ % (dfxml.XMLNS_DFXML, dfxml.XMLNS_DELTA, sys.argv[0], __version__, " ".join(sys.argv), args.filename)) ET.register_namespace("delta", dfxml.XMLNS_DELTA) xs = [] for fi in dfxml.iter_dfxml(xmlfile=open(args.filename, "rb"), preserve_elements=True): logging.debug("Processing: %s" % str(fi)) if args.cache: xs.append(fi.xml_element) else: logging.debug("Printing without cache: %s" % str(fi)) print(dfxml.ET_tostring(fi.xml_element, encoding="unicode")) if args.cache: for x in xs: logging.debug("Printing with cache: %s" % str(fi)) print(dfxml.ET_tostring(x, encoding="unicode")) print("""""") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("filename") parser.add_argument("--cache", action="store_true") parser.add_argument("--debug", action="store_true") args = parser.parse_args() logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) main() tcpflow/src/dfxml/python/test_dfxml_tool.sh0000755000175000017500000000131512263701334020132 0ustar dimadima#!/bin/bash . _pick_pythons.sh #Halt on error set -e #Display all executed commands set -x #Flags listed here in alphabetical order DT_OPTIONS[0]= DT_OPTIONS[1]=--allprovenance DT_OPTIONS[2]=--commandline DT_OPTIONS[3]=--includedirs DT_OPTIONS[4]=--iso-8601 DT_OPTIONS[5]=--md5 DT_OPTIONS[6]=--nofilenames DT_OPTIONS[7]=--nometadata DT_OPTIONS[8]=--pythonversion DT_OPTIONS[9]=--sha1 DT_OPTIONS[10]=--sha256 DT_OPTIONS[11]="--stripleaddirs 1" DT_OPTIONS[12]="--stripprefix .." iter=0 for x in "${DT_OPTIONS[@]}"; do echo "Iteration $iter: Testing $x" >&2 "$PYTHON2" dfxml_tool.py $x .. > dfxml_tool_p2_${iter}.dfxml # "$PYTHON3" dfxml_tool.py "--$x" .. > dfxml_tool_p3_${iter}.dfxml iter=$(($iter+1)) done tcpflow/src/dfxml/python/ireport.py0000644000175000017500000001333412263701334016427 0ustar dimadima#!/usr/bin/env python # # Print the stats from a DFXML file import sys,os,shelve sys.path.append(os.getenv("HOME")+"/slg/src/python") sys.path.append(os.getenv("DOMEX_HOME")+"/src/lib") from histogram2d import histogram2d from histogram import histogram from statbag import statbag from ttable import ttable import re,dfxml,fiwalk def process_files(fn): drive_files = {} # index of drives all_parts = [] all_files = [] files_by_md5 = {} # a dictionary of sets of fiobject, indexed by md5 extension_len_histogram = histogram2d() extension_fragments_histogram = histogram2d() partition_histogram = histogram2d() def cb(fi): # add the md5 to the set if fi.is_file() and fi.filesize(): files_by_md5.get(fi.md5,set()).add(fi) ext = fi.ext() if not ext: print fi.meta_type(),fi extension_len_histogram.add(ext,fi.filesize()) extension_fragments_histogram.add(ext,fi.fragments()) partition_histogram.add(fi.partition(),fi.filesize()) if fn.endswith('xml'): fiwalk.fiwalk_using_sax(xmlfile=open(fn),callback=cb) else: fiwalk.fiwalk_using_sax(imagefile=open(fn),callback=cb) # # Typeset the information # tab = ttable() tab.header = "File extension popularity and average size (suppressing 0-len files)" tab.col_headings = [['Ext','Count','Average Size','Max','Std Dev']] tab.omit_row = [[0,'']] extension_len_histogram.statcol = ['iaverage','maxx','istddev'] print extension_len_histogram.typeset(tab=tab) # # Information about fragmentation patterns # tab = ttable() tab.header="Fragmentation pattern by file system and file type:" tab.col_headings = [['Ext','Count','Average Size','Max','Std Dev']] tab.omit_row = [[0,'']] extension_fragments_histogram.statcol = ['iaverage','maxx','istddev'] print extension_fragments_histogram.typeset(tab=tab) exit(0) for fstype in fstypes: for ftype in ['jpg','pdf','doc','txt']: len1stats = statbag() len2stats = statbag() delta_hist = histogram() delta_re = re.compile("(\d+)\-?(\d+)? ?(\d+)\-?(\d+)?") for i in filter( (lambda(f): f.ext()==ftype and f.fragments==2),all_files): runs = False if(hasattr(i,'block_runs')): runs = i.block_runs if(hasattr(i,'sector_runs')): runs = i.sector_runs if not runs: continue m = delta_re.search(runs) r = [] for j in range(1,5): try: r.append(int(m.group(j))) except TypeError: r.append(int(m.group(j-1))) len1 = r[1] - r[0] + 1 len2 = r[3] - r[2] + 1 delta = r[2]-r[1] len1stats.addx(len1) len2stats.addx(len2) delta_hist.add(delta) if len1stats.count()>0: print "\n\n" print "fstype:",fstype," ftype:",ftype print "len1 average: %f stddev: %f" % (len1stats.average(),len1stats.stddev()) print "len2 average: %f stddev: %f" % (len2stats.average(),len2stats.stddev()) print "delta average: %f" % delta_hist.average() print "delta histogram:" delta_hist.print_top(10) exit(0) print("Partition histogram:") partition_histogram.print_top(n=100) print("Counts by extension:") extension_len_histogram.print_top(n=100) print("Fragments by extension:") extension_fragments_histogram.print_top(n=100) exit(0) for fstype in fstypes: if fstype=='(unrecognized)': continue print fstype,"Partitions:" def isfstype(x): return x.fstype==fstype these_parts = filter(isfstype,all_parts) these_files = [] for part in these_parts: these_files.extend(part.files) print fragmentation_table(these_files) exit(0) sys.exit(0) # # Typeset information about file extensions # hist_exts = histogram2d() hist_exts.topn = 20 for i in all_files: if i.size>0 and i.fragments>0: hist_exts.add(i.ext(),i.size) tab = table() tab.header = "File extension popularity and average size (suppressing 0-len files)" tab.col_headings = ['Ext','Count','Average Size','Max','Std Dev'] tab.omit_row = [[0,'']] hist_exts.statcol = ['iaverage','maxx','istddev'] print hist_exts.typeset(t=tab) hist_exts = histogram2d() hist_exts.topn = 20 for i in all_files: if i.fragments>0: hist_exts.add(i.ext(),i.fragments) tab = table() tab.header = "Fragmentation by file extension (suppressing files with 0 fragments)" tab.col_headings = ['Ext','Count','Avg Fragments','Max','Std Dev'] tab.omit_row = [[0,'']] hist_exts.statcol = ['average','maxx','stddev'] print hist_exts.typeset(t=tab) print "===========================" # # Typeset the File Systems on Drives table # tab = table() tab.header = "File Systems on Drives" tab.col_headings = ["FS Type","Drives","MBytes"] tab.col_totals = [1,2] fstypeh.statcol = 'sumx' print fstypeh.typeset(t=tab) # # Typeset overall fragmentation stats # print fragmentation_table(all_files) if(__name__=="__main__"): from optparse import OptionParser from copy import deepcopy global options parser = OptionParser() parser.usage="%prog [options] file1 [file2...] (files can be XML or image files)" (options,args) = parser.parse_args() for fn in args: process_files(fn) tcpflow/src/dfxml/python/ihistogram.py0000644000175000017500000000604512263701334017112 0ustar dimadima#!/usr/bin/python """Draw a quick hisogram of the timestamps on the hard drive""" import matplotlib matplotlib.use('agg.pdf') import fiwalk import datetime from matplotlib.dates import MonthLocator, WeekdayLocator, DateFormatter from matplotlib.dates import MONDAY,SATURDAY import time from pylab import * def get_dates_and_counts(times): from datetime import date data = {} for t in times: gm = time.gmtime(t) d = date(gm[0],gm[1],gm[2]) data[d] = data.get(d,0)+1 # Create a list of key,val items so you can sort by date dates_and_counts = [ (date,count) for date,count in data.items()] dates_and_counts = sorted(dates_and_counts) return dates_and_counts def version1(times): import pylab pylab.grid() pylab.hist(times,100) pylab.show() def version2(times): # see http://mail.python.org/pipermail/python-list/2003-November/236559.html # http://www.gossamer-threads.com/lists/python/python/665014 from matplotlib.pylab import plot, show, title, xlabel, ylabel, gca, bar, savefig, plot_date dates_and_counts = get_dates_and_counts(times) dates, counts = zip(*dates_and_counts) # bar(dates,counts) plot_date(dates,counts) xlabel("Date") ylabel("count") show() def version3(times): import datetime import numpy as np import matplotlib import matplotlib.pyplot as pyplot import matplotlib.dates as mdates import matplotlib.mlab as mlab dates_and_counts = get_dates_and_counts(times) dates, counts = zip(*dates_and_counts) years = mdates.YearLocator() # every year months = mdates.MonthLocator() # every month yearsFmt = mdates.DateFormatter('%Y') fig = pyplot.figure() ax = fig.add_subplot(111) ax.bar(dates,counts) ax.set_ylabel('file count') ax.set_xlabel('file modification time (mtime)') #ax.set_yscale('log') # Format the ticks ax.xaxis.set_major_locator(years) ax.xaxis.set_major_formatter(yearsFmt) #ax.xaxis.set_minor_locator(months) datemin = datetime.date(min(dates).year, 1, 1) datemax = datetime.date(max(dates).year, 1, 1) ax.set_xlim(datemin, datemax) ax.set_ylim(0,max(counts)) # format the coords message box def price(x): return '$%1.2f'%x ax.format_xdata = mdates.DateFormatter('%Y-%m-%d') ax.format_ydata = price ax.grid(True) # rotates and right aligns the x labels, and moves the bottom of the # axes up to make room for them fig.autofmt_xdate() plt.savefig("hist.pdf",format='pdf') print("dates:",dates) print("num dates:",len(dates)) if __name__=="__main__": import sys from optparse import OptionParser from sys import stdout parser = OptionParser() parser.usage = '%prog [options] xmlfile ' (options,args) = parser.parse_args() import time times = [] for fi in fiwalk.fileobjects_using_sax(xmlfile=open(args[0])): try: times.append(fi.mtime()) except KeyError: pass version3(times) tcpflow/src/dfxml/python/demo_registry_timeline.py0000755000175000017500000000077012263701334021510 0ustar dimadima#!/usr/bin/env python import dfxml, sys timeline = [] def process(co): mtime = co.mtime() if mtime != None: timeline.append([co.mtime(),co.full_path()," modified"]) def main(): if len(sys.argv) < 2: print("Usage: {} ".format(sys.argv[0])) exit(1) dfxml.read_regxml(xmlfile=open(sys.argv[1],"rb"), callback=process) timeline.sort() for record in timeline: print("\t".join( map(str, record)) ) if __name__ == "__main__": main() tcpflow/src/dfxml/python/test_cat_fileobjects.sh0000755000175000017500000000212312263701334021101 0ustar dimadima#!/bin/bash . _pick_pythons.sh XMLLINT=`which xmllint` #Halt on error set -e #Display all executed commands set -x #NOTE: Python2's ETree does not understand the "unicode" output encoding. #"$PYTHON2" cat_fileobjects.py ../samples/simple.xml "$PYTHON3" cat_fileobjects.py --debug ../samples/simple.xml >cat_test_nocache.dfxml "$PYTHON3" cat_fileobjects.py --debug --cache ../samples/simple.xml >cat_test_cache.dfxml #This checks that the XML structure wasn't changed by cache cleaning. Only the tail is hashed because the head contains metadata. test "x$(tail -n 10 cat_test_nocache.dfxml | openssl dgst -sha1 -)" == "x$(tail -n 10 cat_test_cache.dfxml | openssl dgst -sha1 -)" if [ -x "$XMLLINT" ]; then "$PYTHON3" cat_fileobjects.py ../samples/simple.xml | "$XMLLINT" - else echo "Warning: xmllint not found. Skipped check for if generated DFXML is valid XML." >&2 fi test $(grep ' #include #include #include #include #include class cppmutex { // default copy construction and assignment are meaningless // and not implemented cppmutex(const cppmutex &c); cppmutex &operator=(const cppmutex &cp); public: pthread_mutex_t M; public: cppmutex():M(){ if(pthread_mutex_init(&M,NULL)){ std::cerr << "pthread_mutex_init failed: " << strerror(errno) << "\n"; exit(1); } } virtual ~cppmutex(){ pthread_mutex_destroy(&M); } class lock { // get private: cppmutex &myMutex; lock(const lock &lock_):myMutex(lock_.myMutex){} public: lock(cppmutex &m):myMutex(m){ pthread_mutex_lock(&myMutex.M); } ~lock(){ pthread_mutex_unlock(&myMutex.M); } }; }; #endif tcpflow/src/dfxml/src/iblkfind.cpp0000644000175000017500000001157112263701334016126 0ustar dimadima/** * @file fiwalker.c++ * @brief demonstrates mapping offset back to filename from fiwalk dfxml * @version 1.0 * @author Joel Young * * License: Public Domain, Work of US Navy * * Compile with: * g++ -std=c++11 -I. -c -o dfxml_reader.o dfxml_reader.cpp * g++ -std=c++11 -I. -o fiwalker fiwalker.c++ dfxml_reader.o -lexpat * * Requires recent g++ as it uses set::emplace. Tested on g++ 4.8.2 * * Invocation: * ./fiwalker somefiwalk.dfxml * * Todo: * 1. Move to .h * 2. Capture additional info to identify file * partition, inode, used, alloc, name_type, etc? * 3. Separate file info into separate structure * 4. Figure out why dfxml_reader knows about sector_size for * byte_runs but not fs_offset */ #include "config.h" #include #include #include #include #include #include #include "dfxml_reader.h" /** @brief items of interest from dfxml byte_run data type * * Stores data from dfxml byte_run section. Provides default * ordering based on offset in image and run length. */ class byte_run_t { public: ///@{ @name Data Items uint64_t file_offset; uint64_t img_offset; uint64_t len; std::string filename; ///@} public: ///@t @name Constructors /** @brief Construct the byte_run * * @param _file_offset * @param _img_offset * @param _len * @param _filename */ byte_run_t(uint64_t _file_offset, uint64_t _img_offset, uint64_t _len, const std::string& _filename) : file_offset(_file_offset) ,img_offset(_img_offset) ,len(_len) ,filename(_filename) { } ///@} public: ///@{ Methods /** @brief Order byte_runs by offset in image and then by length * * @param rhs */ inline bool operator<(const byte_run_t& rhs) const { return (img_offset < rhs.img_offset) or (img_offset == rhs.img_offset and len < rhs.len); } /** @brief Make byte_run streamable * * @param[in,out] out Stream to write to * @param[in] item Item to stream out * @return The stream passed in */ friend std::ostream& operator<<(std::ostream& out, const byte_run_t& item) { out << "[" << item.file_offset << " " << item.img_offset << " " << item.len << " " << item.filename << "]"; return out; } ///@} }; /** @brief Provides mapping of offsets to byte_run records * * Reads byte_run entries from a fiwalk dfxml file and allows * user to query for byte_runs that overlap a desired offset. */ class extents_t { private: std::string dfxml_filename; public: //using extents_set_t = std::set; typedef std::set extents_set_t; extents_set_t extents; public: ///@{ @name Constructors /** @brief Load the byte_runs from a provided dfxml file * * @param[in] dfxml file name */ extents_t(const std::string& p_dfxml_filename) : dfxml_filename(p_dfxml_filename) { dfxml::file_object_reader::read_dfxml( dfxml_filename, [&] (dfxml::file_object& fi) { // lambda function to process byte_runs into the extents set for(const auto& item : fi.byte_runs) { extents.emplace(item.file_offset,item.img_offset, item.len, fi.filename()); } }); } ///@} public: ///@{ @name Member Functions /** @brief find the byte_run (if any) containing the provided offset * * Returns a reference to the byte_run or throws a std::range_error * if no byte_run found. * * @param[in] offset */ const byte_run_t& find(uint64_t offset) { auto it = extents.upper_bound(byte_run_t(0,offset,std::numeric_limits::max(),"")); if (it == extents.begin()) { throw std::range_error("Item not in dataset");; } --it; if (it->img_offset + it->len > offset) { return *it; } else { throw std::range_error("Item not in dataset");; } } ///@} }; int main(int argc, char** argv) { // load the extents from the dfxml extents_t extents(argv[1]); // Print them back out again //for (const auto& extent : extents.extents) { // std::cout << extent << std::endl; //} // A test list of offsets to look up std::vector offsets = { 2098176, 2621440, 3146752, 3163136, 3195904, 3212288, 3687424, 4195328, 8908800, 8913920, 9044992, 9208832, 9305088, 9306112, 9307136, 9388032, 9389056, 9418752, 9418751, 0 }; // Try to look up each of the offsets: for (uint64_t offset : offsets) { try { std::cerr << extents.find(offset) << " (" << offset << ")" << std::endl; } catch (const std::runtime_error& e) { std::cerr << "Not in file: " << offset << " : " << e.what() << "\n"; } } return 0; } tcpflow/src/dfxml/src/depcomp0000755000175000017500000005601612263701637015226 0ustar dimadima#! /bin/sh # depcomp - compile a program generating dependencies as side-effects scriptversion=2013-05-30.07; # UTC # Copyright (C) 1999-2013 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # Originally written by Alexandre Oliva . case $1 in '') echo "$0: No command. Try '$0 --help' for more information." 1>&2 exit 1; ;; -h | --h*) cat <<\EOF Usage: depcomp [--help] [--version] PROGRAM [ARGS] Run PROGRAMS ARGS to compile a file, generating dependencies as side-effects. Environment variables: depmode Dependency tracking mode. source Source file read by 'PROGRAMS ARGS'. object Object file output by 'PROGRAMS ARGS'. DEPDIR directory where to store dependencies. depfile Dependency file to output. tmpdepfile Temporary file to use when outputting dependencies. libtool Whether libtool is used (yes/no). Report bugs to . EOF exit $? ;; -v | --v*) echo "depcomp $scriptversion" exit $? ;; esac # Get the directory component of the given path, and save it in the # global variables '$dir'. Note that this directory component will # be either empty or ending with a '/' character. This is deliberate. set_dir_from () { case $1 in */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;; *) dir=;; esac } # Get the suffix-stripped basename of the given path, and save it the # global variable '$base'. set_base_from () { base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'` } # If no dependency file was actually created by the compiler invocation, # we still have to create a dummy depfile, to avoid errors with the # Makefile "include basename.Plo" scheme. make_dummy_depfile () { echo "#dummy" > "$depfile" } # Factor out some common post-processing of the generated depfile. # Requires the auxiliary global variable '$tmpdepfile' to be set. aix_post_process_depfile () { # If the compiler actually managed to produce a dependency file, # post-process it. if test -f "$tmpdepfile"; then # Each line is of the form 'foo.o: dependency.h'. # Do two passes, one to just change these to # $object: dependency.h # and one to simply output # dependency.h: # which is needed to avoid the deleted-header problem. { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile" sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile" } > "$depfile" rm -f "$tmpdepfile" else make_dummy_depfile fi } # A tabulation character. tab=' ' # A newline character. nl=' ' # Character ranges might be problematic outside the C locale. # These definitions help. upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ lower=abcdefghijklmnopqrstuvwxyz digits=0123456789 alpha=${upper}${lower} if test -z "$depmode" || test -z "$source" || test -z "$object"; then echo "depcomp: Variables source, object and depmode must be set" 1>&2 exit 1 fi # Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. depfile=${depfile-`echo "$object" | sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} rm -f "$tmpdepfile" # Avoid interferences from the environment. gccflag= dashmflag= # Some modes work just like other modes, but use different flags. We # parameterize here, but still list the modes in the big case below, # to make depend.m4 easier to write. Note that we *cannot* use a case # here, because this file can only contain one case statement. if test "$depmode" = hp; then # HP compiler uses -M and no extra arg. gccflag=-M depmode=gcc fi if test "$depmode" = dashXmstdout; then # This is just like dashmstdout with a different argument. dashmflag=-xM depmode=dashmstdout fi cygpath_u="cygpath -u -f -" if test "$depmode" = msvcmsys; then # This is just like msvisualcpp but w/o cygpath translation. # Just convert the backslash-escaped backslashes to single forward # slashes to satisfy depend.m4 cygpath_u='sed s,\\\\,/,g' depmode=msvisualcpp fi if test "$depmode" = msvc7msys; then # This is just like msvc7 but w/o cygpath translation. # Just convert the backslash-escaped backslashes to single forward # slashes to satisfy depend.m4 cygpath_u='sed s,\\\\,/,g' depmode=msvc7 fi if test "$depmode" = xlc; then # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information. gccflag=-qmakedep=gcc,-MF depmode=gcc fi case "$depmode" in gcc3) ## gcc 3 implements dependency tracking that does exactly what ## we want. Yay! Note: for some reason libtool 1.4 doesn't like ## it if -MD -MP comes after the -MF stuff. Hmm. ## Unfortunately, FreeBSD c89 acceptance of flags depends upon ## the command line argument order; so add the flags where they ## appear in depend2.am. Note that the slowdown incurred here ## affects only configure: in makefiles, %FASTDEP% shortcuts this. for arg do case $arg in -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; *) set fnord "$@" "$arg" ;; esac shift # fnord shift # $arg done "$@" stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi mv "$tmpdepfile" "$depfile" ;; gcc) ## Note that this doesn't just cater to obsosete pre-3.x GCC compilers. ## but also to in-use compilers like IMB xlc/xlC and the HP C compiler. ## (see the conditional assignment to $gccflag above). ## There are various ways to get dependency output from gcc. Here's ## why we pick this rather obscure method: ## - Don't want to use -MD because we'd like the dependencies to end ## up in a subdir. Having to rename by hand is ugly. ## (We might end up doing this anyway to support other compilers.) ## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like ## -MM, not -M (despite what the docs say). Also, it might not be ## supported by the other compilers which use the 'gcc' depmode. ## - Using -M directly means running the compiler twice (even worse ## than renaming). if test -z "$gccflag"; then gccflag=-MD, fi "$@" -Wp,"$gccflag$tmpdepfile" stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" echo "$object : \\" > "$depfile" # The second -e expression handles DOS-style file names with drive # letters. sed -e 's/^[^:]*: / /' \ -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" ## This next piece of magic avoids the "deleted header file" problem. ## The problem is that when a header file which appears in a .P file ## is deleted, the dependency causes make to die (because there is ## typically no way to rebuild the header). We avoid this by adding ## dummy dependencies for each header file. Too bad gcc doesn't do ## this for us directly. ## Some versions of gcc put a space before the ':'. On the theory ## that the space means something, we add a space to the output as ## well. hp depmode also adds that space, but also prefixes the VPATH ## to the object. Take care to not repeat it in the output. ## Some versions of the HPUX 10.20 sed can't process this invocation ## correctly. Breaking it into two sed invocations is a workaround. tr ' ' "$nl" < "$tmpdepfile" \ | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; hp) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; sgi) if test "$libtool" = yes; then "$@" "-Wp,-MDupdate,$tmpdepfile" else "$@" -MDupdate "$tmpdepfile" fi stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files echo "$object : \\" > "$depfile" # Clip off the initial element (the dependent). Don't try to be # clever and replace this with sed code, as IRIX sed won't handle # lines with more than a fixed number of characters (4096 in # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; # the IRIX cc adds comments like '#:fec' to the end of the # dependency line. tr ' ' "$nl" < "$tmpdepfile" \ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \ | tr "$nl" ' ' >> "$depfile" echo >> "$depfile" # The second pass generates a dummy entry for each header file. tr ' ' "$nl" < "$tmpdepfile" \ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ >> "$depfile" else make_dummy_depfile fi rm -f "$tmpdepfile" ;; xlc) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; aix) # The C for AIX Compiler uses -M and outputs the dependencies # in a .u file. In older versions, this file always lives in the # current directory. Also, the AIX compiler puts '$object:' at the # start of each line; $object doesn't have directory information. # Version 6 uses the directory in both cases. set_dir_from "$object" set_base_from "$object" if test "$libtool" = yes; then tmpdepfile1=$dir$base.u tmpdepfile2=$base.u tmpdepfile3=$dir.libs/$base.u "$@" -Wc,-M else tmpdepfile1=$dir$base.u tmpdepfile2=$dir$base.u tmpdepfile3=$dir$base.u "$@" -M fi stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" do test -f "$tmpdepfile" && break done aix_post_process_depfile ;; tcc) # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26 # FIXME: That version still under development at the moment of writing. # Make that this statement remains true also for stable, released # versions. # It will wrap lines (doesn't matter whether long or short) with a # trailing '\', as in: # # foo.o : \ # foo.c \ # foo.h \ # # It will put a trailing '\' even on the last line, and will use leading # spaces rather than leading tabs (at least since its commit 0394caf7 # "Emit spaces for -MD"). "$@" -MD -MF "$tmpdepfile" stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'. # We have to change lines of the first kind to '$object: \'. sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile" # And for each line of the second kind, we have to emit a 'dep.h:' # dummy dependency, to avoid the deleted-header problem. sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile" rm -f "$tmpdepfile" ;; ## The order of this option in the case statement is important, since the ## shell code in configure will try each of these formats in the order ## listed in this file. A plain '-MD' option would be understood by many ## compilers, so we must ensure this comes after the gcc and icc options. pgcc) # Portland's C compiler understands '-MD'. # Will always output deps to 'file.d' where file is the root name of the # source file under compilation, even if file resides in a subdirectory. # The object file name does not affect the name of the '.d' file. # pgcc 10.2 will output # foo.o: sub/foo.c sub/foo.h # and will wrap long lines using '\' : # foo.o: sub/foo.c ... \ # sub/foo.h ... \ # ... set_dir_from "$object" # Use the source, not the object, to determine the base name, since # that's sadly what pgcc will do too. set_base_from "$source" tmpdepfile=$base.d # For projects that build the same source file twice into different object # files, the pgcc approach of using the *source* file root name can cause # problems in parallel builds. Use a locking strategy to avoid stomping on # the same $tmpdepfile. lockdir=$base.d-lock trap " echo '$0: caught signal, cleaning up...' >&2 rmdir '$lockdir' exit 1 " 1 2 13 15 numtries=100 i=$numtries while test $i -gt 0; do # mkdir is a portable test-and-set. if mkdir "$lockdir" 2>/dev/null; then # This process acquired the lock. "$@" -MD stat=$? # Release the lock. rmdir "$lockdir" break else # If the lock is being held by a different process, wait # until the winning process is done or we timeout. while test -d "$lockdir" && test $i -gt 0; do sleep 1 i=`expr $i - 1` done fi i=`expr $i - 1` done trap - 1 2 13 15 if test $i -le 0; then echo "$0: failed to acquire lock after $numtries attempts" >&2 echo "$0: check lockdir '$lockdir'" >&2 exit 1 fi if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" # Each line is of the form `foo.o: dependent.h', # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. # Do two passes, one to just change these to # `$object: dependent.h' and one to simply `dependent.h:'. sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" # Some versions of the HPUX 10.20 sed can't process this invocation # correctly. Breaking it into two sed invocations is a workaround. sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \ | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; hp2) # The "hp" stanza above does not work with aCC (C++) and HP's ia64 # compilers, which have integrated preprocessors. The correct option # to use with these is +Maked; it writes dependencies to a file named # 'foo.d', which lands next to the object file, wherever that # happens to be. # Much of this is similar to the tru64 case; see comments there. set_dir_from "$object" set_base_from "$object" if test "$libtool" = yes; then tmpdepfile1=$dir$base.d tmpdepfile2=$dir.libs/$base.d "$@" -Wc,+Maked else tmpdepfile1=$dir$base.d tmpdepfile2=$dir$base.d "$@" +Maked fi stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile1" "$tmpdepfile2" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" do test -f "$tmpdepfile" && break done if test -f "$tmpdepfile"; then sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile" # Add 'dependent.h:' lines. sed -ne '2,${ s/^ *// s/ \\*$// s/$/:/ p }' "$tmpdepfile" >> "$depfile" else make_dummy_depfile fi rm -f "$tmpdepfile" "$tmpdepfile2" ;; tru64) # The Tru64 compiler uses -MD to generate dependencies as a side # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'. # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put # dependencies in 'foo.d' instead, so we check for that too. # Subdirectories are respected. set_dir_from "$object" set_base_from "$object" if test "$libtool" = yes; then # Libtool generates 2 separate objects for the 2 libraries. These # two compilations output dependencies in $dir.libs/$base.o.d and # in $dir$base.o.d. We have to check for both files, because # one of the two compilations can be disabled. We should prefer # $dir$base.o.d over $dir.libs/$base.o.d because the latter is # automatically cleaned when .libs/ is deleted, while ignoring # the former would cause a distcleancheck panic. tmpdepfile1=$dir$base.o.d # libtool 1.5 tmpdepfile2=$dir.libs/$base.o.d # Likewise. tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504 "$@" -Wc,-MD else tmpdepfile1=$dir$base.d tmpdepfile2=$dir$base.d tmpdepfile3=$dir$base.d "$@" -MD fi stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" do test -f "$tmpdepfile" && break done # Same post-processing that is required for AIX mode. aix_post_process_depfile ;; msvc7) if test "$libtool" = yes; then showIncludes=-Wc,-showIncludes else showIncludes=-showIncludes fi "$@" $showIncludes > "$tmpdepfile" stat=$? grep -v '^Note: including file: ' "$tmpdepfile" if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" echo "$object : \\" > "$depfile" # The first sed program below extracts the file names and escapes # backslashes for cygpath. The second sed program outputs the file # name when reading, but also accumulates all include files in the # hold buffer in order to output them again at the end. This only # works with sed implementations that can handle large buffers. sed < "$tmpdepfile" -n ' /^Note: including file: *\(.*\)/ { s//\1/ s/\\/\\\\/g p }' | $cygpath_u | sort -u | sed -n ' s/ /\\ /g s/\(.*\)/'"$tab"'\1 \\/p s/.\(.*\) \\/\1:/ H $ { s/.*/'"$tab"'/ G p }' >> "$depfile" echo >> "$depfile" # make sure the fragment doesn't end with a backslash rm -f "$tmpdepfile" ;; msvc7msys) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; #nosideeffect) # This comment above is used by automake to tell side-effect # dependency tracking mechanisms from slower ones. dashmstdout) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout, regardless of -o. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi # Remove '-o $object'. IFS=" " for arg do case $arg in -o) shift ;; $object) shift ;; *) set fnord "$@" "$arg" shift # fnord shift # $arg ;; esac done test -z "$dashmflag" && dashmflag=-M # Require at least two characters before searching for ':' # in the target name. This is to cope with DOS-style filenames: # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise. "$@" $dashmflag | sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile" rm -f "$depfile" cat < "$tmpdepfile" > "$depfile" # Some versions of the HPUX 10.20 sed can't process this sed invocation # correctly. Breaking it into two sed invocations is a workaround. tr ' ' "$nl" < "$tmpdepfile" \ | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; dashXmstdout) # This case only exists to satisfy depend.m4. It is never actually # run, as this mode is specially recognized in the preamble. exit 1 ;; makedepend) "$@" || exit $? # Remove any Libtool call if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi # X makedepend shift cleared=no eat=no for arg do case $cleared in no) set ""; shift cleared=yes ;; esac if test $eat = yes; then eat=no continue fi case "$arg" in -D*|-I*) set fnord "$@" "$arg"; shift ;; # Strip any option that makedepend may not understand. Remove # the object too, otherwise makedepend will parse it as a source file. -arch) eat=yes ;; -*|$object) ;; *) set fnord "$@" "$arg"; shift ;; esac done obj_suffix=`echo "$object" | sed 's/^.*\././'` touch "$tmpdepfile" ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" rm -f "$depfile" # makedepend may prepend the VPATH from the source file name to the object. # No need to regex-escape $object, excess matching of '.' is harmless. sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" # Some versions of the HPUX 10.20 sed can't process the last invocation # correctly. Breaking it into two sed invocations is a workaround. sed '1,2d' "$tmpdepfile" \ | tr ' ' "$nl" \ | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" "$tmpdepfile".bak ;; cpp) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi # Remove '-o $object'. IFS=" " for arg do case $arg in -o) shift ;; $object) shift ;; *) set fnord "$@" "$arg" shift # fnord shift # $arg ;; esac done "$@" -E \ | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ | sed '$ s: \\$::' > "$tmpdepfile" rm -f "$depfile" echo "$object : \\" > "$depfile" cat < "$tmpdepfile" >> "$depfile" sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; msvisualcpp) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi IFS=" " for arg do case "$arg" in -o) shift ;; $object) shift ;; "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") set fnord "$@" shift shift ;; *) set fnord "$@" "$arg" shift shift ;; esac done "$@" -E 2>/dev/null | sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" rm -f "$depfile" echo "$object : \\" > "$depfile" sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile" echo "$tab" >> "$depfile" sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" rm -f "$tmpdepfile" ;; msvcmsys) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; none) exec "$@" ;; *) echo "Unknown depmode $depmode" 1>&2 exit 1 ;; esac exit 0 # Local Variables: # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: tcpflow/src/dfxml/src/configure.ac0000644000175000017500000001156512263701334016131 0ustar dimadima# -*- Autoconf -*- # Process this file with autoconf to produce a configure script. # Order is largely irrevellant, although it must start with AC_INIT and end with AC_OUTPUT # See http://autotoolset.sourceforge.net/tutorial.html # and http://www.openismus.com/documents/linux/automake/automake.shtml AC_PREREQ(2.57) AC_INIT(DFXML, 0.0.1, bugs@afflib.org) AC_CONFIG_MACRO_DIR(m4) AC_CONFIG_FILES([Makefile]) AC_CONFIG_HEADER([config.h]) AM_INIT_AUTOMAKE AM_MAINTAINER_MODE # Programs we will be using AC_PROG_CC AC_PROG_CXX AM_PROG_CC_C_O dnl allow per-produce flags AC_PROG_INSTALL # use C++11 mode if available; HAVE_CXX11 is defined in config.h if so. Don't # use the GNU C++11 extensions for portability's sake (noext). AC_LANG_PUSH(C++) AX_CXX_COMPILE_STDCXX_11(noext, optional) AC_LANG_POP() m4_include([dfxml_configure.m4]) ################################################################ ## Expat ## Required for S3 and Digital Signatures ## AC_ARG_WITH(expat, AS_HELP_STRING([--with-expat=PATH], [where libexpat is compiled (if it isn't installed); required for S3 and Digital Signatures]), [LDFLAGS="-L${with_expat} $LDFLAGS" ; CPPFLAGS="-I${with_expat}/lib $CPPFLAGS"]) have_expat=yes AC_CHECK_HEADER([expat.h]) AC_CHECK_LIB([expat],[XML_ParserCreate],,[have_expat="no ";AC_MSG_WARN([expat not found; S3 and Digital Signatures not enabled])]) ################################################################ # # Enable all the compiler debugging we can find # # This is originally from PhotoRec, but modified substantially by Simson # Figure out which flags we can use with the compiler. # # These I don't like: # -Wdeclaration-after-statement -Wconversion # doesn't work: -Wunreachable-code # causes configure to crash on gcc-4.2.1: -Wsign-compare-Winline # causes warnings with unistd.h: -Wnested-externs # Just causes too much annoyance: -Wmissing-format-attribute # Check GCC WARNINGS_TO_TEST="-MD -Wpointer-arith -Wmissing-declarations -Wmissing-prototypes \ -Wshadow -Wwrite-strings -Wcast-align -Waggregate-return \ -Wbad-function-cast -Wcast-qual -Wundef -Wredundant-decls -Wdisabled-optimization \ -Wfloat-equal -Wmultichar -Wc++-compat -Wmissing-noreturn " if test x"${mingw}" != "xyes" ; then # add the warnings we do not want to do on mingw WARNINGS_TO_TEST="$WARNINGS_TO_TEST -Wall -Wstrict-prototypes" fi echo "Warnings to test: $WARNINGS_TO_TEST" for option in $WARNINGS_TO_TEST do SAVE_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $option" AC_MSG_CHECKING([whether gcc understands $option]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[]])], [has_option=yes], [has_option=no; CFLAGS="$SAVE_CFLAGS"]) AC_MSG_RESULT($has_option) unset has_option unset SAVE_CFLAGS if test $option = "-Wmissing-format-attribute" ; then AC_DEFINE(HAVE_MISSING_FORMAT_ATTRIBUTE_WARNING,1, [Indicates that we have the -Wmissing-format-attribute G++ warning]) fi done unset option # Check G++ # We don't use these warnings: # -Waggregate-return -- aggregate returns are GOOD; they simplify code design # We can use these warnings after ZLIB gets upgraded: # -Wundef --- causes problems with zlib # -Wcast-qual # -Wmissing-format-attribute --- Just too annoying AC_LANG_PUSH(C++) WARNINGS_TO_TEST="-Wall -MD -Wpointer-arith \ -Wshadow -Wwrite-strings -Wcast-align \ -Wredundant-decls -Wdisabled-optimization \ -Wfloat-equal -Wmultichar -Wmissing-noreturn \ -Wstrict-null-sentinel -Woverloaded-virtual -Wsign-promo \ -funit-at-a-time" if test x"${mingw}" != "xyes" ; then # add the warnings we don't want to do on mingw WARNINGS_TO_TEST="$WARNINGS_TO_TEST -Weffc++" fi echo "Warnings to test: $WARNINGS_TO_TEST" for option in $WARNINGS_TO_TEST do SAVE_CXXFLAGS="$CXXFLAGS" CXXFLAGS="$CXXFLAGS $option" AC_MSG_CHECKING([whether g++ understands $option]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[]])], [has_option=yes], [has_option=no; CXXFLAGS="$SAVE_CXXFLAGS"]) AC_MSG_RESULT($has_option) unset has_option unset SAVE_CXXFLAGS done unset option AC_LANG_POP() ################################################################ # Specify our other headers AC_HEADER_STDC AC_CHECK_HEADERS([\ string.h \ strings.h sys/cdefs.h sys/resource.h sys/types.h \ sys/socket.h \ unistd.hsys/bitypes.h \ arpa/inet.h \ fcntl.h \ semaphore.h \ inttypes.h \ netinet/in.h netinet/tcp.h netinet/in_systm.h netinet/ip.h \ netinet/if_ether.h \ pcap.h \ pcap/pcap.h \ linux/if_ether.h signal.h]) ############## drop optimization flags if requested ################ AC_ARG_WITH(noopt, AC_HELP_STRING([--with-noopt], [Drop -O C flags])) if test x"${AFF_NOOPT}" != "x" ; then with_noopt="yes"; fi if test "${with_noopt}" = "yes" ; then CFLAGS=`echo "$CFLAGS" | sed s/-O[[0-9]]//` # note the double quoting! CXXFLAGS=`echo "$CXXFLAGS" | sed s/-O[[0-9]]//` fi AC_OUTPUT tcpflow/src/dfxml/src/dfxml_writer.cpp0000644000175000017500000005021312263701334017046 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /** * implementation for C++ XML generation class * * The software provided here is released by the Naval Postgraduate * School, an agency of the U.S. Department of Navy. The software * bears no warranty, either expressed or implied. NPS does not assume * legal liability nor responsibility for a User's use of the software * or the results of such use. * * Please note that within the United States, copyright protection, * under Section 105 of the United States Code, Title 17, is not * available for any work of the United States Government and/or for * any works created by United States Government employees. User * acknowledges that this software contains work which was created by * NPS government employees and is therefore in the public domain and * not subject to copyright. */ #include "config.h" #ifdef WIN32 #include #endif #include #include #ifdef HAVE_SQLITE3_H #include #endif #ifdef HAVE_BOOST_VERSION_HPP #include #endif #ifdef HAVE_PTHREAD #define MUTEX_INIT(M) pthread_mutex_init(M,NULL); #define MUTEX_LOCK(M) pthread_mutex_lock(M) #define MUTEX_UNLOCK(M) pthread_mutex_unlock(M) #else #define MUTEX_INIT(M) {} #define MUTEX_LOCK(M) {} #define MUTEX_UNLOCK(M) {} #endif #ifdef HAVE_TRE_TRE_H #include #endif #include #include #include #include #include #include #include #include #include #include #ifdef _MSC_VER # include #else # include #endif using namespace std; #include "dfxml_writer.h" static const char *xml_header = "\n"; // Implementation of mkstemp for windows found on pan-devel mailing // list archive // @http://www.mail-archive.com/pan-devel@nongnu.org/msg00294.html #ifndef _S_IREAD #define _S_IREAD 256 #endif #ifndef _S_IWRITE #define _S_IWRITE 128 #endif #ifndef O_BINARY #define O_BINARY 0 #endif #ifndef _O_SHORT_LIVED #define _O_SHORT_LIVED 0 #endif #ifndef HAVE_MKSTEMP int mkstemp(char *tmpl) { int ret=-1; mktemp(tmpl); ret=open(tmpl,O_RDWR|O_BINARY|O_CREAT|O_EXCL|_O_SHORT_LIVED, _S_IREAD|_S_IWRITE); return ret; } #endif #ifndef O_BINARY #define O_BINARY 0 #endif #ifndef _O_SHORT_LIVED #define _O_SHORT_LIVED 0 #endif std::string dfxml_writer::xml_PRId32("%" PRId32); // gets around compiler bug std::string dfxml_writer::xml_PRIu32("%" PRIu32); // gets around compiler bug std::string dfxml_writer::xml_PRId64("%" PRId64); // gets around compiler bug std::string dfxml_writer::xml_PRIu64("%" PRIu64); // gets around compiler bug static const char *cstr(const string &str){ return str.c_str(); } // XML escapes static string xml_lt("<"); static string xml_gt(">"); static string xml_am("&"); static string xml_ap("'"); static string xml_qu("""); // % encodings static string encoding_null("%00"); static string encoding_r("%0D"); static string encoding_n("%0A"); static string encoding_t("%09"); std::string dfxml_writer::xmlescape(const string &xml) { string ret; for(string::const_iterator i = xml.begin(); i!=xml.end(); i++){ switch(*i){ // XML escapes case '>': ret += xml_gt; break; case '<': ret += xml_lt; break; case '&': ret += xml_am; break; case '\'': ret += xml_ap; break; case '"': ret += xml_qu; break; // % encodings case '\000': ret += encoding_null; break; // retain encoded nulls case '\r': ret += encoding_r; break; case '\n': ret += encoding_n; break; case '\t': ret += encoding_t; break; default: ret += *i; } } return ret; } /** * Strip an XML string as necessary for a tag name. */ std::string dfxml_writer::xmlstrip(const string &xml) { string ret; for(string::const_iterator i = xml.begin(); i!=xml.end(); i++){ if(isprint(*i) && !strchr("<>\r\n&'\"",*i)){ ret += isspace(*i) ? '_' : tolower(*i); } } return ret; } /** * xmlmap: * Turns a map into a blob of XML. */ std::string dfxml_writer::xmlmap(const dfxml_writer::strstrmap_t &m,const std::string &outer,const std::string &attrs) { std::stringstream ss; ss << "<" << outer; if(attrs.size()>0) ss << " " << attrs; ss << ">"; for(std::map::const_iterator it=m.begin();it!=m.end();it++){ ss << "<" << (*it).first << ">" << xmlescape((*it).second) << ""; } ss << ""; return ss.str(); } /* This goes to stdout */ dfxml_writer::dfxml_writer():M(),outf(),out(&cout),tags(),tag_stack(),tempfilename(),tempfile_template("/tmp/xml_XXXXXXXX"), t0(),t_last_timestamp(),make_dtd(false),outfilename(),oneline() { #ifdef HAVE_PTHREAD pthread_mutex_init(&M,NULL); #endif gettimeofday(&t0,0); gettimeofday(&t_last_timestamp,0); *out << xml_header; } /* This should be rewritten so that the temp file is done on close, not on open */ dfxml_writer::dfxml_writer(const std::string &outfilename_,bool makeDTD): M(),outf(outfilename_.c_str(),ios_base::out), out(),tags(),tag_stack(),tempfilename(),tempfile_template(outfilename_+"_tmp_XXXXXXXX"), t0(),t_last_timestamp(),make_dtd(false),outfilename(outfilename_),oneline() { MUTEX_INIT(&M); gettimeofday(&t0,0); gettimeofday(&t_last_timestamp,0); if(!outf.is_open()){ perror(outfilename_.c_str()); exit(1); } out = &outf; // use this one instead *out << xml_header; } void dfxml_writer::set_tempfile_template(const std::string &temp) { tempfile_template = temp; } void dfxml_writer::close() { MUTEX_LOCK(&M); outf.close(); if(make_dtd){ /* If we are making the DTD, then we should close the file, * scan the output file for the tags, write to a temp file, and then * close the temp file and have it overwrite the outfile. */ std::ifstream in(cstr(tempfilename)); if(!in.is_open()){ cerr << tempfilename << strerror(errno) << ":Cannot re-open for input\n"; exit(1); } outf.open(cstr(outfilename),ios_base::out); if(!outf.is_open()){ cerr << outfilename << " " << strerror(errno) << ": Cannot open for output; will not delete " << tempfilename << "\n"; exit(1); } // copy over first line --- the XML header std::string line; getline(in,line); outf << line; write_dtd(); // write the DTD while(!in.eof()){ getline(in,line); outf << line << endl; } in.close(); unlink(cstr(tempfilename)); outf.close(); } MUTEX_UNLOCK(&M); } void dfxml_writer::write_dtd() { *out << "::const_iterator it = tags.begin(); it != tags.end(); it++){ *out << "\n"; } *out << "\n"; *out << "\n"; *out << "\n"; *out << "]>\n"; } /** * make sure that a tag is valid and, if so, add it to the list of tags we use */ void dfxml_writer::verify_tag(string tag) { if(tag[0]=='/') tag = tag.substr(1); if(tag.find(" ") != string::npos){ cerr << "tag '" << tag << "' contains space. Cannot continue.\n"; exit(1); } tags.insert(tag); } void dfxml_writer::puts(const string &v) { *out << v; } void dfxml_writer::spaces() { for(unsigned int i=0;i0) *out << " " << attribute; *out << ">"; } #if (!defined(HAVE_VASPRINTF)) || defined(_WIN32) #ifndef _WIN32 #define ms_printf __print #define __MINGW_ATTRIB_NONNULL(x) #endif extern "C" { /** * We do not have vasprintf. * We have determined that vsnprintf() does not perform properly on windows. * So we just allocate a huge buffer and then strdup() and hope! */ int vasprintf(char **ret,const char *fmt,va_list ap) __attribute__((__format__(ms_printf, 2, 0))) __MINGW_ATTRIB_NONNULL(2) ; int vasprintf(char **ret,const char *fmt,va_list ap) { /* Figure out how long the result will be */ char buf[65536]; int size = vsnprintf(buf,sizeof(buf),fmt,ap); if(size<0) return size; /* Now allocate the memory */ *ret = (char *)strdup(buf); return size; } } #endif void dfxml_writer::printf(const char *fmt,...) { va_list ap; va_start(ap, fmt); /** printf to stream **/ char *ret = 0; if(vasprintf(&ret,fmt,ap) < 0){ *out << "dfxml_writer::xmlprintf: " << strerror(errno); exit(EXIT_FAILURE); } *out << ret; free(ret); /** end printf to stream **/ va_end(ap); } void dfxml_writer::push(const string &tag,const string &attribute) { spaces(); tag_stack.push(tag); tagout(tag,attribute); if(!oneline) *out << '\n'; } void dfxml_writer::pop() { assert(tag_stack.size()>0); string tag = tag_stack.top(); tag_stack.pop(); spaces(); tagout("/"+tag,""); *out << '\n'; } void dfxml_writer::set_oneline(bool v) { if(v==true) spaces(); if(v==false) *out << "\n"; oneline = v; } void dfxml_writer::cpuid(uint32_t op, unsigned long *eax, unsigned long *ebx, unsigned long *ecx, unsigned long *edx) { #if defined(HAVE_ASM_CPUID) && defined(__i386__) #if defined(__PIC__) __asm__ __volatile__("pushl %%ebx \n\t" /* save %ebx */ "cpuid \n\t" "movl %%ebx, %1 \n\t" /* save what cpuid just put in %ebx */ "popl %%ebx \n\t" /* restore the old %ebx */ : "=a"(*eax), "=r"(*ebx), "=c"(*ecx), "=d"(*edx) : "a"(op) : "cc"); #else __asm__ __volatile__("cpuid" : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) : "a"(op) : "cc"); #endif #endif } void dfxml_writer::add_cpuid() { #if defined(__i386__) #ifndef __WORDSIZE #define __WORDSIZE 32 #endif #define BFIX(val, base, end) ((val << (__WORDSIZE-end-1)) >> (__WORDSIZE-end+base-1)) char buf[256]; unsigned long eax=0, ebx=0, ecx=0, edx=0; // =0 avoids a compiler warning cpuid(0, &eax, &ebx, &ecx, &edx); snprintf(buf,sizeof(buf),"%.4s%.4s%.4s", (char *)&ebx, (char *)&edx, (char *)&ecx); push("cpuid"); xmlout("identification",buf); cpuid(1, &eax, &ebx, &ecx, &edx); xmlout("family", (int64_t) BFIX(eax, 8, 11)); xmlout("model", (int64_t) BFIX(eax, 4, 7)); xmlout("stepping", (int64_t) BFIX(eax, 0, 3)); xmlout("efamily", (int64_t) BFIX(eax, 20, 27)); xmlout("emodel", (int64_t) BFIX(eax, 16, 19)); xmlout("brand", (int64_t) BFIX(ebx, 0, 7)); xmlout("clflush_size", (int64_t) BFIX(ebx, 8, 15) * 8); xmlout("nproc", (int64_t) BFIX(ebx, 16, 23)); xmlout("apicid", (int64_t) BFIX(ebx, 24, 31)); cpuid(0x80000006, &eax, &ebx, &ecx, &edx); xmlout("L1_cache_size", (int64_t) BFIX(ecx, 16, 31) * 1024); pop(); #undef BFIX #endif } void dfxml_writer::add_DFXML_execution_environment(const std::string &command_line) { push("execution_environment"); #if defined(HAVE_ASM_CPUID) && defined(__i386__) add_cpuid(); #endif #ifdef HAVE_SYS_UTSNAME_H struct utsname name; if(uname(&name)==0){ xmlout("os_sysname",name.sysname); xmlout("os_release",name.release); xmlout("os_version",name.version); xmlout("host",name.nodename); xmlout("arch",name.machine); } #else #ifdef UNAMES xmlout("os_sysname",UNAMES,"",false); #endif #ifdef HAVE_GETHOSTNAME { char hostname[1024]; if(gethostname(hostname,sizeof(hostname))==0){ xmlout("host",hostname); } } #endif #endif xmlout("command_line", command_line); // quote it! #ifdef HAVE_GETUID xmlprintf("uid","","%d",getuid()); #ifdef HAVE_GETPWUID xmlout("username",getpwuid(getuid())->pw_name); #endif #endif #define TM_FORMAT "%Y-%m-%dT%H:%M:%SZ" char buf[256]; time_t t = time(0); strftime(buf,sizeof(buf),TM_FORMAT,gmtime(&t)); xmlout("start_time",buf); pop(); // } #ifdef WIN32 #include "psapi.h" #endif void dfxml_writer::add_rusage() { #ifdef WIN32 /* Note: must link -lpsapi for this */ PROCESS_MEMORY_COUNTERS_EX pmc; memset(&pmc,0,sizeof(pmc)); GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS *)&pmc, sizeof(pmc)); push("PROCESS_MEMORY_COUNTERS"); xmlout("cb",(int64_t)pmc.cb); xmlout("PageFaultCount",(int64_t)pmc.PageFaultCount); xmlout("WorkingSetSize",(int64_t)pmc.WorkingSetSize); xmlout("QuotaPeakPagedPoolUsage",(int64_t)pmc.QuotaPeakPagedPoolUsage); xmlout("QuotaPagedPoolUsage",(int64_t)pmc.QuotaPagedPoolUsage); xmlout("QuotaPeakNonPagedPoolUsage",(int64_t)pmc.QuotaPeakNonPagedPoolUsage); xmlout("PagefileUsage",(int64_t)pmc.PagefileUsage); xmlout("PeakPagefileUsage",(int64_t)pmc.PeakPagefileUsage); xmlout("PrivateUsage",(int64_t)pmc.PrivateUsage); pop(); #endif #ifdef HAVE_GETRUSAGE struct rusage ru; memset(&ru,0,sizeof(ru)); if(getrusage(RUSAGE_SELF,&ru)==0){ push("rusage"); xmlout("utime",ru.ru_utime); xmlout("stime",ru.ru_stime); xmloutl("maxrss",(long)ru.ru_maxrss); xmloutl("minflt",(long)ru.ru_minflt); xmloutl("majflt",(long)ru.ru_majflt); xmloutl("nswap",(long)ru.ru_nswap); xmloutl("inblock",(long)ru.ru_inblock); xmloutl("oublock",(long)ru.ru_oublock); struct timeval t1; gettimeofday(&t1,0); struct timeval t; t.tv_sec = t1.tv_sec - t0.tv_sec; if(t1.tv_usec > t0.tv_usec){ t.tv_usec = t1.tv_usec - t0.tv_usec; } else { t.tv_sec--; t.tv_usec = (t1.tv_usec+1000000) - t0.tv_usec; } xmlout("clocktime",t); pop(); } #endif } void dfxml_writer::add_timestamp(const std::string &name) { struct timeval t1; gettimeofday(&t1,0); struct timeval t; // timestamp delta against t_last_timestamp t.tv_sec = t1.tv_sec - t_last_timestamp.tv_sec; if(t1.tv_usec > t_last_timestamp.tv_usec){ t.tv_usec = t1.tv_usec - t_last_timestamp.tv_usec; } else { t.tv_sec--; t.tv_usec = (t1.tv_usec+1000000) - t_last_timestamp.tv_usec; } char delta[16]; snprintf(delta, 16, "%d.%06d", (int)t.tv_sec, (int)t.tv_usec); // reset t_last_timestamp for the next invocation gettimeofday(&t_last_timestamp,0); // timestamp total t.tv_sec = t1.tv_sec - t0.tv_sec; if(t1.tv_usec > t0.tv_usec){ t.tv_usec = t1.tv_usec - t0.tv_usec; } else { t.tv_sec--; t.tv_usec = (t1.tv_usec+1000000) - t0.tv_usec; } char total[16]; snprintf(total, 16, "%d.%06d", (int)t.tv_sec, (int)t.tv_usec); // prepare attributes std::stringstream ss; ss << "name='" << name << "' delta='" << delta << "' total='" << total << "'"; // add named timestamp xmlout("timestamp", "",ss.str(), true); } /**************************************************************** *** THESE ARE THE ONLY THREADSAFE ROUTINES ****************************************************************/ void dfxml_writer::comment(const string &comment_) { MUTEX_LOCK(&M); *out << "\n"; out->flush(); MUTEX_UNLOCK(&M); } void dfxml_writer::xmlprintf(const std::string &tag,const std::string &attribute, const char *fmt,...) { MUTEX_LOCK(&M); spaces(); tagout(tag,attribute); va_list ap; va_start(ap, fmt); /** printf to stream **/ char *ret = 0; if(vasprintf(&ret,fmt,ap) < 0){ cerr << "dfxml_writer::xmlprintf: " << strerror(errno) << "\n"; exit(EXIT_FAILURE); } *out << ret; free(ret); /** end printf to stream **/ va_end(ap); tagout("/"+tag,""); *out << '\n'; out->flush(); MUTEX_UNLOCK(&M); } void dfxml_writer::xmlout(const string &tag,const string &value,const string &attribute,bool escape_value) { MUTEX_LOCK(&M); spaces(); if(value.size()==0){ if(tag.size()) tagout(tag,attribute+"/"); } else { if(tag.size()) tagout(tag,attribute); *out << (escape_value ? xmlescape(value) : value); if(tag.size()) tagout("/"+tag,""); } *out << "\n"; out->flush(); MUTEX_UNLOCK(&M); } #ifdef HAVE_LIBEWF_H #include #endif #if defined(HAVE_EXIV2) && defined(HAVE_EXIV2_IMAGE_HPP) #ifdef DFXML_GNUC_HAS_DIAGNOSTIC_PRAGMA #pragma GCC diagnostic ignored "-Wshadow" #pragma GCC diagnostic ignored "-Weffc++" #endif #include #include #include #endif #ifdef HAVE_HASHID #include #endif #ifdef HAVE_ZMQ_H #include #endif #ifdef HAVE_AFFLIB_AFFLIB_H #pragma GCC diagnostic ignored "-Wreserved-user-defined-literal" // required for C11 #include #endif /* These support Digital Forensics XML and require certain variables to be defined */ void dfxml_writer::add_DFXML_build_environment() { /* __DATE__ formats as: Apr 30 2011 */ struct tm tm; memset(&tm,0,sizeof(tm)); push("build_environment"); #ifdef __GNUC__ // See http://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html xmlprintf("compiler","","%d.%d.%d (%s)",__GNUC__, __GNUC_MINOR__,__GNUC_PATCHLEVEL__,__VERSION__); #endif #ifdef CPPFLAGS xmlout("CPPFLAGS",CPPFLAGS,"",true); #endif #ifdef CFLAGS xmlout("CFLAGS",CFLAGS,"",true); #endif #ifdef CXXFLAGS xmlout("CXXFLAGS",CXXFLAGS,"",true); #endif #ifdef LDFLAGS xmlout("LDFLAGS",LDFLAGS,"",true); #endif #ifdef LIBS xmlout("LIBS",LIBS,"",true); #endif #if defined(__DATE__) && defined(__TIME__) && defined(HAVE_STRPTIME) if(strptime(__DATE__,"%b %d %Y",&tm)){ char buf[64]; snprintf(buf,sizeof(buf),"%4d-%02d-%02dT%s",tm.tm_year+1900,tm.tm_mon+1,tm.tm_mday,__TIME__); xmlout("compilation_date",buf); } #endif #ifdef BOOST_VERSION { char buf[64]; snprintf(buf,sizeof(buf),"%d",BOOST_VERSION); xmlout("library", "", std::string("name=\"boost\" version=\"") + buf + "\"",false); } #endif #ifdef HAVE_LIBTSK3 xmlout("library", "", std::string("name=\"tsk\" version=\"") + tsk_version_get_str() + "\"",false); #endif #ifdef HAVE_LIBAFFLIB xmlout("library", "", std::string("name=\"afflib\" version=\"") + af_version() +"\"",false); #endif #ifdef HAVE_LIBEWF xmlout("library", "", std::string("name=\"libewf\" version=\"") + libewf_get_version() + "\"",false); #endif #ifdef HAVE_EXIV2 xmlout("library", "", std::string("name=\"exiv2\" version=\"") + Exiv2::version() + "\"",false); #endif #if defined(HAVE_LIBTRE) && defined(HAVE_TRE_VERSION) xmlout("library", "", std::string("name=\"tre\" version=\"") + tre_version() + "\"",false); #endif #ifdef HAVE_HASHID xmlout("library", "", std::string("name=\"hashdb\" version=\"") + hashdb_version() + "\"",false); #endif #ifdef SQLITE_VERSION xmlout("library", "", "name=\"sqlite\" version=\"" SQLITE_VERSION "\" source_id=\"" SQLITE_SOURCE_ID "\"",false); #endif #ifdef HAVE_ZMQ_VERSION { int zmq_major, zmq_minor, zmq_patch; zmq_version (&zmq_major, &zmq_minor, &zmq_patch); stringstream zmq_ss; zmq_ss << zmq_major << "." << zmq_minor << "." << zmq_patch; xmlout("library", "", std::string("name=\"zmq\" version=\"") + zmq_ss.str() + "\"",false); } #endif #ifdef HAVE_GNUEXIF // gnuexif does not have a programmatically obtainable version. xmlout("library","","name=\"gnuexif\" version=\"?\"",false); #endif #ifdef GIT_COMMIT xmlout("git", "", "commit=\"" GIT_COMMIT "\"",false); #endif pop(); } tcpflow/src/dfxml/src/cpu_info.c0000644000175000017500000000170012263701334015577 0ustar dimadima#include #include #include #define cpuid(id) __asm__( "cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "a"(id), "b"(0), "c"(0), "d"(0)) #define b(val, base, end) ((val << (__WORDSIZE-end-1)) >> (__WORDSIZE-end+base-1)) int main(int argc, char **argv) { unsigned long eax, ebx, ecx, edx; cpuid(0); printf("identification: \"%.4s%.4s%.4s\"\n", (char *)&ebx, (char *)&edx, (char *)&ecx); printf("cpu information:\n"); cpuid(1); printf(" family %ld model %ld stepping %ld efamily %ld emodel %ld\n", b(eax, 8, 11), b(eax, 4, 7), b(eax, 0, 3), b(eax, 20, 27), b(eax, 16, 19)); printf(" brand %ld cflush sz %ld*8 nproc %ld apicid %ld\n", b(ebx, 0, 7), b(ebx, 8, 15), b(ebx, 16, 23), b(ebx, 24, 31)); cpuid(0x80000006); printf("L1 cache size (per core): %ld KB\n", b(ecx, 16, 31)); return(0); } tcpflow/src/dfxml/src/dfxml_configure.m40000644000175000017500000000360712263701334017256 0ustar dimadima# # mix-ins for dfxml # Support for hash_t as well. # AC_MSG_NOTICE([Including dfxml_configure.m4 from dfxml]) AC_CHECK_HEADERS([afflib/afflib.h err.h expat.h libewf.h pwd.h sys/cdefs.h sys/mman.h sys/resource.h sys/utsname.h unistd.h ]) AC_CHECK_FUNCS([fork localtime_r getuid gethostname getwpuid getrusage mkstemp vasprintf regcomp ]) AC_LANG_PUSH(C++) AC_CHECK_HEADERS([exiv2/image.hpp]) AC_LANG_POP() # Determine UTC date offset CPPFLAGS="$CPPFLAGS -DUTC_OFFSET=`date +%z`" # Get the GIT commit into the GIT_COMMIT variable AC_CHECK_PROG([git],[git],[yes],[no]) AM_CONDITIONAL([FOUND_GIT],[test "x$git" = xyes]) AM_COND_IF([FOUND_GIT], [GIT_COMMIT=`git describe --dirty --always` AC_MSG_NOTICE([git commit $GIT_COMMIT])], [AC_MSG_WARN([git not found])]) # Do we have the CPUID instruction? AC_TRY_COMPILE([#define cpuid(id) __asm__( "cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "a"(id), "b"(0), "c"(0), "d"(0))], [unsigned long eax, ebx, ecx, edx;cpuid(0);], have_cpuid=yes, have_cpuid=no) if test "$have_cpuid" = yes; then AC_DEFINE(HAVE_ASM_CPUID, 1, [define to 1 if __asm__ CPUID is available]) fi # Does GCC have the diagnostic pragma? AC_TRY_COMPILE([#pragma GCC diagnostic ignored "-Wredundant-decls"], [], AC_DEFINE([DFXML_GNUC_HAS_DIAGNOSTIC_PRAGMA],[1],[GCC supports #pragma GCC diagnostic]), ) ################################################################ ## OpenSSL Support is now required (for hash_t) AC_CHECK_HEADERS([openssl/aes.h openssl/bio.h openssl/evp.h openssl/hmac.h openssl/md5.h openssl/pem.h openssl/rand.h openssl/rsa.h openssl/sha.h openssl/pem.h openssl/x509.h]) # OpenSSL has been installed under at least two different names... AC_CHECK_LIB([crypto],[EVP_get_digestbyname]) AC_CHECK_LIB([ssl],[SSL_library_init]) AC_CHECK_FUNCS([EVP_get_digestbyname],, AC_MSG_ERROR([SSL/OpenSSL support required])) tcpflow/src/dfxml/src/dfxml_writer.h0000644000175000017500000001723312263701334016520 0ustar dimadima/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /* * Simson's XML output class. * Include this AFTER your config file with the HAVE statements. * Optimized for DFXML generation. */ #ifndef _DFXML_WRITER_H_ #define _DFXML_WRITER_H_ #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS #endif #include #include #include #include #include #include /* c++ */ #include #include #include #include #include #include #ifdef HAVE_PTHREAD #include #endif #ifdef HAVE_SYS_CDEFS_H #include #endif #ifdef HAVE_SYS_RESOURCE_H #include #endif #ifdef HAVE_PWD_H #include #endif #ifdef HAVE_SYS_UTSNAME_H #include #endif #ifndef __BEGIN_DECLS #if defined(__cplusplus) #define __BEGIN_DECLS extern "C" { #define __END_DECLS } #else #define __BEGIN_DECLS #define __END_DECLS #endif #endif #ifdef HAVE_LIBTSK3 #include #endif #ifdef __cplusplus #include "cppmutex.h" class dfxml_writer { private: /*** neither copying nor assignment is implemented *** *** We do this by making them private constructors that throw exceptions. ***/ dfxml_writer(const dfxml_writer &); dfxml_writer &operator=(const dfxml_writer &); /****************************************************************/ public: typedef std::map strstrmap_t; typedef std::set stringset; typedef std::set tagid_set_t; private: #ifdef HAVE_PTHREAD pthread_mutex_t M; // mutext protecting out #else int M; // placeholder #endif std::fstream outf; std::ostream *out; // where it is being written; defaulst to stdout stringset tags; // XML tags std::stacktag_stack; std::string tempfilename; std::string tempfile_template; struct timeval t0; struct timeval t_last_timestamp; // for creating delta timestamps bool make_dtd; std::string outfilename; void write_doctype(std::fstream &out); void write_dtd(); void verify_tag(std::string tag); void spaces(); // print spaces corresponding to tag stack static std::string xml_PRId32; // for compiler bug static std::string xml_PRIu32; // for compiler bug static std::string xml_PRId64; // for compiler bug static std::string xml_PRIu64; // for compiler bug bool oneline; public: static std::string make_command_line(int argc,char * const *argv){ std::string command_line; for(int i=0;i0) command_line.push_back(' '); if (strchr(argv[i],' ') != NULL) { // the argument has a space, so quote the argument command_line.append("\""); command_line.append(argv[i]); command_line.append("\""); } else { // the argument has no space, so append as is command_line.append(argv[i]); } } return command_line; } dfxml_writer(); // defaults to stdout dfxml_writer(const std::string &outfilename,bool makeDTD); // write to a file, optionally making a DTD virtual ~dfxml_writer(){}; void set_tempfile_template(const std::string &temp); static std::string xmlescape(const std::string &xml); static std::string xmlstrip(const std::string &xml); /** xmlmap turns a map into an XML block */ static std::string xmlmap(const strstrmap_t &m,const std::string &outer,const std::string &attrs); void close(); // writes the output to the file void flush(){outf.flush();} void tagout( const std::string &tag,const std::string &attribute); void push(const std::string &tag,const std::string &attribute); void push(const std::string &tag) {push(tag,"");} // writes a std::string as parsed data void puts(const std::string &pdata); // writes a std::string as parsed data void printf(const char *fmt,...) __attribute__((format(printf, 2, 3))); // "2" because this is "1" void pop(); // close the tag void add_timestamp(const std::string &name); void add_DFXML_build_environment(); static void cpuid(uint32_t op, unsigned long *eax, unsigned long *ebx,unsigned long *ecx, unsigned long *edx); void add_cpuid(); void add_DFXML_execution_environment(const std::string &command_line); void add_DFXML_creator(const std::string &program,const std::string &version, const std::string &svn_r, const std::string &command_line){ push("creator","version='1.0'"); xmlout("program",program); xmlout("version",version); if(svn_r.size()>0) xmlout("svn_version",svn_r); add_DFXML_build_environment(); add_DFXML_execution_environment(command_line); pop(); // creator } void add_rusage(); void set_oneline(bool v); /******************************** *** THESE ARE ALL THREADSAFE *** ********************************/ void comment(const std::string &comment); void xmlprintf(const std::string &tag,const std::string &attribute,const char *fmt,...) __attribute__((format(printf, 4, 5))); // "4" because this is "1"; void xmlout( const std::string &tag,const std::string &value, const std::string &attribute, const bool escape_value); /* These all call xmlout or xmlprintf which already has locking, so these are all threadsafe! */ void xmlout( const std::string &tag,const std::string &value){ xmlout(tag,value,"",true); } // void xmlout( const std::string &tag,const int value){ xmlprintf(tag,"","%d",value); } void xmloutl(const std::string &tag,const long value){ xmlprintf(tag,"","%ld",value); } #ifdef WIN32 void xmlout( const std::string &tag,const int32_t value){ xmlprintf(tag,"","%I32d",value); } void xmlout( const std::string &tag,const uint32_t value){ xmlprintf(tag,"","%I32u",value); } void xmlout( const std::string &tag,const int64_t value){ xmlprintf(tag,"","%I64d",value); } void xmlout( const std::string &tag,const uint64_t value){ xmlprintf(tag,"","%I64u",value); } #else void xmlout( const std::string &tag,const int32_t value){ xmlprintf(tag,"",xml_PRId32.c_str(),value); } void xmlout( const std::string &tag,const uint32_t value){ xmlprintf(tag,"",xml_PRIu32.c_str(),value); } void xmlout( const std::string &tag,const int64_t value){ xmlprintf(tag,"",xml_PRId64.c_str(),value); } void xmlout( const std::string &tag,const uint64_t value){ xmlprintf(tag,"",xml_PRIu64.c_str(),value); } #ifdef __APPLE__ void xmlout( const std::string &tag,const size_t value){ xmlprintf(tag,"",xml_PRIu64.c_str(),value); } #endif #endif void xmlout( const std::string &tag,const double value){ xmlprintf(tag,"","%f",value); } void xmlout( const std::string &tag,const struct timeval &ts) { xmlprintf(tag,"","%d.%06d",(int)ts.tv_sec, (int)ts.tv_usec); } static std::string to8601(const struct timeval &ts) { struct tm tm; char buf[64]; #ifdef HAVE_LOCALTIME_R localtime_r(&ts.tv_sec,&tm); #else time_t t = ts.tv_sec; tm = *localtime(&t); #endif strftime(buf,sizeof(buf),"%Y-%m-%dT%H:%M:%S",&tm); if(ts.tv_usec>0){ int len = strlen(buf); snprintf(buf+len,sizeof(buf)-len,".%06d",(int)ts.tv_usec); } strcat(buf,"Z"); return std::string(buf); } }; #endif #endif tcpflow/src/dfxml/src/bootstrap.sh0000755000175000017500000000053712263701334016214 0ustar dimadima#!/bin/sh # have automake do an initial population iff necessary if [ ! -e config.guess -o ! -e config.sub -o ! -e install-sh -o ! -e missing ]; then autoheader -f touch NEWS README AUTHORS ChangeLog touch stamp-h aclocal -I m4 autoconf -f #libtoolize || glibtoolize automake --add-missing --copy else autoreconf -f fi tcpflow/src/dfxml/src/ChangeLog0000644000175000017500000000045512263701334015411 0ustar dimadima2013-12-11 Basic * dfxml_reader.h (dfxml): created a DFXML namespace 2013-07-15 Man Page * dfxml_writer.cpp (dfxml_writer::xmlmap): now escapes xmlmap 2013-05-27 Basic * dfxml_generator.cpp: renamed to dfxml_writer tcpflow/src/dfxml/src/cpu_stat.cpp0000644000175000017500000000442712263701334016170 0ustar dimadima/** cpustat.h -- Header for cpustat.cpp. * Copyright (c) 2004 Brad Fish (brad.fish@gmail.com). */ #if !defined(MAIN_H) #define MAIN_H #include // missing Windows processor power information struct typedef struct _PROCESSOR_POWER_INFORMATION { ULONG Number; ULONG MaxMhz; ULONG CurrentMhz; ULONG MhzLimit; ULONG MaxIdleState; ULONG CurrentIdleState; } PROCESSOR_POWER_INFORMATION , *PPROCESSOR_POWER_INFORMATION; int main (int argc, char *argv[]); #endif // MAIN_H #include "cpustat.h" #include #include #include extern "C" { #include } int main (int argc, char *argv[]) { typedef std::vector PPIVector; SYSTEM_INFO sys_info; PPIVector ppis; SYSTEM_POWER_CAPABILITIES spc; // find out how many processors we have in the system GetSystemInfo(&sys_info); ppis.resize(sys_info.dwNumberOfProcessors); // get CPU stats if (CallNtPowerInformation(ProcessorInformation, NULL, 0, &ppis[0], sizeof(PROCESSOR_POWER_INFORMATION) * ppis.size()) != ERROR_SUCCESS) { perror("main: "); return -1; } // print out CPU stats for (PPIVector::iterator it = ppis.begin(); it != ppis.end(); ++it) { std::cout << "stats for CPU " << it->Number << ':' << std::endl; std::cout << " maximum MHz: " << it->MaxMhz << std::endl; std::cout << " current MHz: " << it->CurrentMhz << std::endl; std::cout << " MHz limit: " << it->MhzLimit << std::endl; std::cout << " maximum idle state: " << it->MaxIdleState << std::endl; std::cout << " current idle state: " << it->CurrentIdleState << std::endl; } // get system power settings if (!GetPwrCapabilities(&spc)) { perror("main: "); return -2; } // print power settings std::cout << "system power capabilities:" << std::endl; std::cout << " processor throttle: " << (spc.ProcessorThrottle ? "enabled" : "disabled") << std::endl; std::cout << " processor minimum throttle: " << static_cast(spc.ProcessorMinThrottle) << '%' << std::endl; std::cout << " processor maximum throttle: " << static_cast(spc.ProcessorMaxThrottle) << '%' << std::endl; } tcpflow/src/dfxml/src/m4/0000755000175000017500000000000012263701334014153 5ustar dimadimatcpflow/src/dfxml/src/m4/ax_cxx_compile_stdcxx_11.m40000644000175000017500000001076312263701334021324 0ustar dimadima# ============================================================================ # http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx_11.html # ============================================================================ # # SYNOPSIS # # AX_CXX_COMPILE_STDCXX_11([ext|noext],[mandatory|optional]) # # DESCRIPTION # # Check for baseline language coverage in the compiler for the C++11 # standard; if necessary, add switches to CXXFLAGS to enable support. # # The first argument, if specified, indicates whether you insist on an # extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g. # -std=c++11). If neither is specified, you get whatever works, with # preference for an extended mode. # # The second argument, if specified 'mandatory' or if left unspecified, # indicates that baseline C++11 support is required and that the macro # should error out if no mode with that support is found. If specified # 'optional', then configuration proceeds regardless, after defining # HAVE_CXX11 if and only if a supporting mode is found. # # LICENSE # # Copyright (c) 2008 Benjamin Kosnik # Copyright (c) 2012 Zack Weinberg # Copyright (c) 2013 Roy Stogner # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 3 m4_define([_AX_CXX_COMPILE_STDCXX_11_testbody], [ template struct check { static_assert(sizeof(int) <= sizeof(T), "not big enough"); }; typedef check> right_angle_brackets; int a; decltype(a) b; typedef check check_type; check_type c; check_type&& cr = static_cast(c); auto d = a; ]) AC_DEFUN([AX_CXX_COMPILE_STDCXX_11], [dnl m4_if([$1], [], [], [$1], [ext], [], [$1], [noext], [], [m4_fatal([invalid argument `$1' to AX_CXX_COMPILE_STDCXX_11])])dnl m4_if([$2], [], [ax_cxx_compile_cxx11_required=true], [$2], [mandatory], [ax_cxx_compile_cxx11_required=true], [$2], [optional], [ax_cxx_compile_cxx11_required=false], [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX_11])])dnl AC_LANG_PUSH([C++])dnl ac_success=no AC_CACHE_CHECK(whether $CXX supports C++11 features by default, ax_cv_cxx_compile_cxx11, [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])], [ax_cv_cxx_compile_cxx11=yes], [ax_cv_cxx_compile_cxx11=no])]) if test x$ax_cv_cxx_compile_cxx11 = xyes; then ac_success=yes fi m4_if([$1], [noext], [], [dnl if test x$ac_success = xno; then for switch in -std=gnu++11 -std=gnu++0x; do cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch]) AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch, $cachevar, [ac_save_CXXFLAGS="$CXXFLAGS" CXXFLAGS="$CXXFLAGS $switch" AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])], [eval $cachevar=yes], [eval $cachevar=no]) CXXFLAGS="$ac_save_CXXFLAGS"]) if eval test x\$$cachevar = xyes; then CXXFLAGS="$CXXFLAGS $switch" ac_success=yes break fi done fi]) m4_if([$1], [ext], [], [dnl if test x$ac_success = xno; then for switch in -std=c++11 -std=c++0x; do cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch]) AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch, $cachevar, [ac_save_CXXFLAGS="$CXXFLAGS" CXXFLAGS="$CXXFLAGS $switch" AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])], [eval $cachevar=yes], [eval $cachevar=no]) CXXFLAGS="$ac_save_CXXFLAGS"]) if eval test x\$$cachevar = xyes; then CXXFLAGS="$CXXFLAGS $switch" ac_success=yes break fi done fi]) AC_LANG_POP([C++]) if test x$ax_cxx_compile_cxx11_required = xtrue; then if test x$ac_success = xno; then AC_MSG_ERROR([*** A compiler with support for C++11 language features is required.]) fi else if test x$ac_success = xno; then HAVE_CXX11=0 AC_MSG_NOTICE([No compiler with C++11 support was found]) else HAVE_CXX11=1 AC_DEFINE(HAVE_CXX11,1, [define if the compiler supports basic C++11 syntax]) fi AC_SUBST(HAVE_CXX11) fi ]) tcpflow/src/dfxml/src/INSTALL0000644000175000017500000003660512263701637014704 0ustar dimadimaInstallation Instructions ************************* Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation, Inc. Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without warranty of any kind. Basic Installation ================== Briefly, the shell commands `./configure; make; make install' should configure, build, and install this package. The following more-detailed instructions are generic; see the `README' file for instructions specific to this package. Some packages provide this `INSTALL' file but do not implement all of the features documented below. The lack of an optional feature in a given package is not necessarily a bug. More recommendations for GNU packages can be found in *note Makefile Conventions: (standards)Makefile Conventions. The `configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses those values to create a `Makefile' in each directory of the package. It may also create one or more `.h' files containing system-dependent definitions. Finally, it creates a shell script `config.status' that you can run in the future to recreate the current configuration, and a file `config.log' containing compiler output (useful mainly for debugging `configure'). It can also use an optional file (typically called `config.cache' and enabled with `--cache-file=config.cache' or simply `-C') that saves the results of its tests to speed up reconfiguring. Caching is disabled by default to prevent problems with accidental use of stale cache files. If you need to do unusual things to compile the package, please try to figure out how `configure' could check whether to do them, and mail diffs or instructions to the address given in the `README' so they can be considered for the next release. If you are using the cache, and at some point `config.cache' contains results you don't want to keep, you may remove or edit it. The file `configure.ac' (or `configure.in') is used to create `configure' by a program called `autoconf'. You need `configure.ac' if you want to change it or regenerate `configure' using a newer version of `autoconf'. The simplest way to compile this package is: 1. `cd' to the directory containing the package's source code and type `./configure' to configure the package for your system. Running `configure' might take a while. While running, it prints some messages telling which features it is checking for. 2. Type `make' to compile the package. 3. Optionally, type `make check' to run any self-tests that come with the package, generally using the just-built uninstalled binaries. 4. Type `make install' to install the programs and any data files and documentation. When installing into a prefix owned by root, it is recommended that the package be configured and built as a regular user, and only the `make install' phase executed with root privileges. 5. Optionally, type `make installcheck' to repeat any self-tests, but this time using the binaries in their final installed location. This target does not install anything. Running this target as a regular user, particularly if the prior `make install' required root privileges, verifies that the installation completed correctly. 6. You can remove the program binaries and object files from the source code directory by typing `make clean'. To also remove the files that `configure' created (so you can compile the package for a different kind of computer), type `make distclean'. There is also a `make maintainer-clean' target, but that is intended mainly for the package's developers. If you use it, you may have to get all sorts of other programs in order to regenerate files that came with the distribution. 7. Often, you can also type `make uninstall' to remove the installed files again. In practice, not all packages have tested that uninstallation works correctly, even though it is required by the GNU Coding Standards. 8. Some packages, particularly those that use Automake, provide `make distcheck', which can by used by developers to test that all other targets like `make install' and `make uninstall' work correctly. This target is generally not run by end users. Compilers and Options ===================== Some systems require unusual options for compilation or linking that the `configure' script does not know about. Run `./configure --help' for details on some of the pertinent environment variables. You can give `configure' initial values for configuration parameters by setting variables in the command line or in the environment. Here is an example: ./configure CC=c99 CFLAGS=-g LIBS=-lposix *Note Defining Variables::, for more details. Compiling For Multiple Architectures ==================================== You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their own directory. To do this, you can use GNU `make'. `cd' to the directory where you want the object files and executables to go and run the `configure' script. `configure' automatically checks for the source code in the directory that `configure' is in and in `..'. This is known as a "VPATH" build. With a non-GNU `make', it is safer to compile the package for one architecture at a time in the source code directory. After you have installed the package for one architecture, use `make distclean' before reconfiguring for another architecture. On MacOS X 10.5 and later systems, you can create libraries and executables that work on multiple system types--known as "fat" or "universal" binaries--by specifying multiple `-arch' options to the compiler but only a single `-arch' option to the preprocessor. Like this: ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ CPP="gcc -E" CXXCPP="g++ -E" This is not guaranteed to produce working output in all cases, you may have to build one architecture at a time and combine the results using the `lipo' tool if you have problems. Installation Names ================== By default, `make install' installs the package's commands under `/usr/local/bin', include files under `/usr/local/include', etc. You can specify an installation prefix other than `/usr/local' by giving `configure' the option `--prefix=PREFIX', where PREFIX must be an absolute file name. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you pass the option `--exec-prefix=PREFIX' to `configure', the package uses PREFIX as the prefix for installing programs and libraries. Documentation and other data files still use the regular prefix. In addition, if you use an unusual directory layout you can give options like `--bindir=DIR' to specify different values for particular kinds of files. Run `configure --help' for a list of the directories you can set and what kinds of files go in them. In general, the default for these options is expressed in terms of `${prefix}', so that specifying just `--prefix' will affect all of the other directory specifications that were not explicitly provided. The most portable way to affect installation locations is to pass the correct locations to `configure'; however, many packages provide one or both of the following shortcuts of passing variable assignments to the `make install' command line to change installation locations without having to reconfigure or recompile. The first method involves providing an override variable for each affected directory. For example, `make install prefix=/alternate/directory' will choose an alternate location for all directory configuration variables that were expressed in terms of `${prefix}'. Any directories that were specified during `configure', but not in terms of `${prefix}', must each be overridden at install time for the entire installation to be relocated. The approach of makefile variable overrides for each directory variable is required by the GNU Coding Standards, and ideally causes no recompilation. However, some platforms have known limitations with the semantics of shared libraries that end up requiring recompilation when using this method, particularly noticeable in packages that use GNU Libtool. The second method involves providing the `DESTDIR' variable. For example, `make install DESTDIR=/alternate/directory' will prepend `/alternate/directory' before all installation names. The approach of `DESTDIR' overrides is not required by the GNU Coding Standards, and does not work on platforms that have drive letters. On the other hand, it does better at avoiding recompilation issues, and works well even when some directory options were not specified in terms of `${prefix}' at `configure' time. Optional Features ================= If the package supports it, you can cause programs to be installed with an extra prefix or suffix on their names by giving `configure' the option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. Some packages pay attention to `--enable-FEATURE' options to `configure', where FEATURE indicates an optional part of the package. They may also pay attention to `--with-PACKAGE' options, where PACKAGE is something like `gnu-as' or `x' (for the X Window System). The `README' should mention any `--enable-' and `--with-' options that the package recognizes. For packages that use the X Window System, `configure' can usually find the X include and library files automatically, but if it doesn't, you can use the `configure' options `--x-includes=DIR' and `--x-libraries=DIR' to specify their locations. Some packages offer the ability to configure how verbose the execution of `make' will be. For these packages, running `./configure --enable-silent-rules' sets the default to minimal output, which can be overridden with `make V=1'; while running `./configure --disable-silent-rules' sets the default to verbose, which can be overridden with `make V=0'. Particular systems ================== On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC is not installed, it is recommended to use the following options in order to use an ANSI C compiler: ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" and if that doesn't work, install pre-built binaries of GCC for HP-UX. HP-UX `make' updates targets which have the same time stamps as their prerequisites, which makes it generally unusable when shipped generated files such as `configure' are involved. Use GNU `make' instead. On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot parse its `' header file. The option `-nodtk' can be used as a workaround. If GNU CC is not installed, it is therefore recommended to try ./configure CC="cc" and if that doesn't work, try ./configure CC="cc -nodtk" On Solaris, don't put `/usr/ucb' early in your `PATH'. This directory contains several dysfunctional programs; working variants of these programs are available in `/usr/bin'. So, if you need `/usr/ucb' in your `PATH', put it _after_ `/usr/bin'. On Haiku, software installed for all users goes in `/boot/common', not `/usr/local'. It is recommended to use the following options: ./configure --prefix=/boot/common Specifying the System Type ========================== There may be some features `configure' cannot figure out automatically, but needs to determine by the type of machine the package will run on. Usually, assuming the package is built to be run on the _same_ architectures, `configure' can figure that out, but if it prints a message saying it cannot guess the machine type, give it the `--build=TYPE' option. TYPE can either be a short name for the system type, such as `sun4', or a canonical name which has the form: CPU-COMPANY-SYSTEM where SYSTEM can have one of these forms: OS KERNEL-OS See the file `config.sub' for the possible values of each field. If `config.sub' isn't included in this package, then this package doesn't need to know the machine type. If you are _building_ compiler tools for cross-compiling, you should use the option `--target=TYPE' to select the type of system they will produce code for. If you want to _use_ a cross compiler, that generates code for a platform different from the build platform, you should specify the "host" platform (i.e., that on which the generated programs will eventually be run) with `--host=TYPE'. Sharing Defaults ================ If you want to set default values for `configure' scripts to share, you can create a site shell script called `config.site' that gives default values for variables like `CC', `cache_file', and `prefix'. `configure' looks for `PREFIX/share/config.site' if it exists, then `PREFIX/etc/config.site' if it exists. Or, you can set the `CONFIG_SITE' environment variable to the location of the site script. A warning: not all `configure' scripts look for a site script. Defining Variables ================== Variables not defined in a site shell script can be set in the environment passed to `configure'. However, some packages may run configure again during the build, and the customized values of these variables may be lost. In order to avoid this problem, you should set them in the `configure' command line, using `VAR=value'. For example: ./configure CC=/usr/local2/bin/gcc causes the specified `gcc' to be used as the C compiler (unless it is overridden in the site shell script). Unfortunately, this technique does not work for `CONFIG_SHELL' due to an Autoconf limitation. Until the limitation is lifted, you can use this workaround: CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash `configure' Invocation ====================== `configure' recognizes the following options to control how it operates. `--help' `-h' Print a summary of all of the options to `configure', and exit. `--help=short' `--help=recursive' Print a summary of the options unique to this package's `configure', and exit. The `short' variant lists options used only in the top level, while the `recursive' variant lists options also present in any nested packages. `--version' `-V' Print the version of Autoconf used to generate the `configure' script, and exit. `--cache-file=FILE' Enable the cache: use and save the results of the tests in FILE, traditionally `config.cache'. FILE defaults to `/dev/null' to disable caching. `--config-cache' `-C' Alias for `--cache-file=config.cache'. `--quiet' `--silent' `-q' Do not print messages saying which checks are being made. To suppress all normal output, redirect it to `/dev/null' (any error messages will still be shown). `--srcdir=DIR' Look for the package's source code in directory DIR. Usually `configure' can determine that directory automatically. `--prefix=DIR' Use DIR as the installation prefix. *note Installation Names:: for more details, including other options available for fine-tuning the installation locations. `--no-create' `-n' Run the configure checks, but stop before creating any output files. `configure' also accepts some other, not widely useful, options. Run `configure --help' for more details. tcpflow/src/dfxml/src/Makefile.am0000644000175000017500000000052112263701334015665 0ustar dimadimabin_PROGRAMS = dfxml_demo iblkfind dfxml_demo_SOURCES = dfxml_demo.cpp dfxml_reader.cpp dfxml_reader.h iblkfind_SOURCES = iblkfind.cpp dfxml_reader.cpp dfxml_reader.h EXTRA_DIST = \ beregex.cpp \ beregex.h \ cppmutex.h \ cpu_info.c \ cpu_stat.cpp \ dfxml_generator.cpp \ dfxml_generator.h \ dfxml_reader.cpp \ dfxml_reader.h tcpflow/src/dfxml/src/AUTHORS0000644000175000017500000000000012263701334014671 0ustar dimadimatcpflow/src/dfxml/src/NEWS0000644000175000017500000000000012263701334014320 0ustar dimadimatcpflow/src/dfxml/src/dfxml_demo.cpp0000644000175000017500000000122312263701334016453 0ustar dimadima#include #include #include #include #include #include #include #include #include #include "config.h" #include "dfxml_reader.h" void process(dfxml::file_object &fi) { std::cout << "fi.filename: " << fi.filename() << "\n"; std::cout << " pieces: " << fi.byte_runs.size() << "\n"; for(dfxml::file_object::byte_runs_t::const_iterator it = fi.byte_runs.begin(); it!=fi.byte_runs.end(); it++){ //std::cout << " " << *it << "\n"; } } int main(int argc,char **argv) { dfxml::file_object_reader::read_dfxml(argv[1],process); return 0; } tcpflow/src/dfxml/src/Makefile.defs0000644000175000017500000000031712263701334016214 0ustar dimadimaDFXML_WRITER = dfxml/src/dfxml_writer.cpp \ dfxml/src/dfxml_writer.h \ dfxml/src/hash_t.h DFXML_READER = dfxml/src/dfxml_reader.cpp \ dfxml/src/dfxml_reader.h \ dfxml/src/hash_t.h tcpflow/src/dfxml/src/COPYING0000644000175000017500000001773412263701334014702 0ustar dimadimaThis software is a work of the Naval Postgraduate School (NPS) and the National Institute of Standards and Technology (NIST) . As a work of the US Government this work is not subject to copyright law. Neither NPS nor NIST assumes any responsibility whatsoever for its use by other parties, and makes no guarantees, expressed or implied, about its quality, reliability, or any other characteristic. Contributions by non-US government entities herein are covered under the LGPL, which is included below. GNU LESSER GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. This version of the GNU Lesser General Public License incorporates the terms and conditions of version 3 of the GNU General Public License, supplemented by the additional permissions listed below. 0. Additional Definitions. As used herein, "this License" refers to version 3 of the GNU Lesser General Public License, and the "GNU GPL" refers to version 3 of the GNU General Public License. "The Library" refers to a covered work governed by this License, other than an Application or a Combined Work as defined below. An "Application" is any work that makes use of an interface provided by the Library, but which is not otherwise based on the Library. Defining a subclass of a class defined by the Library is deemed a mode of using an interface provided by the Library. A "Combined Work" is a work produced by combining or linking an Application with the Library. The particular version of the Library with which the Combined Work was made is also called the "Linked Version". The "Minimal Corresponding Source" for a Combined Work means the Corresponding Source for the Combined Work, excluding any source code for portions of the Combined Work that, considered in isolation, are based on the Application, and not on the Linked Version. The "Corresponding Application Code" for a Combined Work means the object code and/or source code for the Application, including any data and utility programs needed for reproducing the Combined Work from the Application, but excluding the System Libraries of the Combined Work. 1. Exception to Section 3 of the GNU GPL. You may convey a covered work under sections 3 and 4 of this License without being bound by section 3 of the GNU GPL. 2. Conveying Modified Versions. If you modify a copy of the Library, and, in your modifications, a facility refers to a function or data to be supplied by an Application that uses the facility (other than as an argument passed when the facility is invoked), then you may convey a copy of the modified version: a) under this License, provided that you make a good faith effort to ensure that, in the event an Application does not supply the function or data, the facility still operates, and performs whatever part of its purpose remains meaningful, or b) under the GNU GPL, with none of the additional permissions of this License applicable to that copy. 3. Object Code Incorporating Material from Library Header Files. The object code form of an Application may incorporate material from a header file that is part of the Library. You may convey such object code under terms of your choice, provided that, if the incorporated material is not limited to numerical parameters, data structure layouts and accessors, or small macros, inline functions and templates (ten or fewer lines in length), you do both of the following: a) Give prominent notice with each copy of the object code that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the object code with a copy of the GNU GPL and this license document. 4. Combined Works. You may convey a Combined Work under terms of your choice that, taken together, effectively do not restrict modification of the portions of the Library contained in the Combined Work and reverse engineering for debugging such modifications, if you also do each of the following: a) Give prominent notice with each copy of the Combined Work that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the Combined Work with a copy of the GNU GPL and this license document. c) For a Combined Work that displays copyright notices during execution, include the copyright notice for the Library among these notices, as well as a reference directing the user to the copies of the GNU GPL and this license document. d) Do one of the following: 0) Convey the Minimal Corresponding Source under the terms of this License, and the Corresponding Application Code in a form suitable for, and under terms that permit, the user to recombine or relink the Application with a modified version of the Linked Version to produce a modified Combined Work, in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source. 1) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (a) uses at run time a copy of the Library already present on the user's computer system, and (b) will operate properly with a modified version of the Library that is interface-compatible with the Linked Version. e) Provide Installation Information, but only if you would otherwise be required to provide such information under section 6 of the GNU GPL, and only to the extent that such information is necessary to install and execute a modified version of the Combined Work produced by recombining or relinking the Application with a modified version of the Linked Version. (If you use option 4d0, the Installation Information must accompany the Minimal Corresponding Source and Corresponding Application Code. If you use option 4d1, you must provide the Installation Information in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.) 5. Combined Libraries. You may place library facilities that are a work based on the Library side by side in a single library together with other library facilities that are not Applications and are not covered by this License, and convey such a combined library under terms of your choice, if you do both of the following: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities, conveyed under the terms of this License. b) Give prominent notice with the combined library that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 6. Revised Versions of the GNU Lesser General Public License. The Free Software Foundation may publish revised and/or new versions of the GNU Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation. If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library. tcpflow/src/dfxml/src/README0000644000175000017500000000020412263701334014507 0ustar dimadima$ git checkout -b tmp ; git checkout master ; git merge tmp ; git branch -d tmp ; git push git@github.com:simsong/dfxml.git master tcpflow/src/dfxml/src/hash_t.h0000644000175000017500000001613212263701334015255 0ustar dimadima/* * C++ covers for md5, sha1, and sha256 (and sha512 if present) * * hash representation classes: md5_t, sha1_t, sha256_t (sha512_t) * has generators: md5_generator(), sha1_generator(), sha256_generator() * * Generating a hash: * sha1_t val = sha1_generator::hash_buf(buf,bufsize) * sha1_1 generator hasher; * hasher.update(buf,bufsize) * hasher.update(buf,bufsize) * hasher.update(buf,bufsize) * sha1_t val = hasher.final() * * Using the values: * string val.hexdigest() --- return a hext digest * val.size() --- the size of the hash in bytes * uint8_t val.digest[SIZE] --- the buffer of the raw bytes * uint8_t val.final() --- synonym for md.digest * * This can be updated in the future for Mac so that the hash__ class * is then subclassed by a hash__openssl or a hash__commonCrypto class. */ #ifndef HASH_T_H #define HASH_T_H #include #include /** * For reasons that defy explanation (at the moment), this is required. */ #ifdef __APPLE__ #include #undef DEPRECATED_IN_MAC_OS_X_VERSION_10_7_AND_LATER #define DEPRECATED_IN_MAC_OS_X_VERSION_10_7_AND_LATER #endif #include #include #include #include #include #include #include #if defined(HAVE_OPENSSL_HMAC_H) && defined(HAVE_OPENSSL_EVP_H) #include #include #else #error OpenSSL required for hash_t.h #endif #ifdef HAVE_SYS_MMAN_H #include #endif #ifdef HAVE_SYS_MMAP_H #include #endif template class hash__ { public: uint8_t digest[SIZE]; static size_t size() { return(SIZE); } hash__(){ } hash__(const uint8_t *provided){ memcpy(this->digest,provided,size()); } const uint8_t *final() const { return this->digest; } /* python like interface for hexdigest */ static unsigned int hex2int(char ch){ if(ch>='0' && ch<='9') return ch-'0'; if(ch>='a' && ch<='f') return ch-'a'+10; if(ch>='A' && ch<='F') return ch-'A'+10; return 0; } static unsigned int hex2int(char ch0,char ch1){ return (hex2int(ch0)<<4) | hex2int(ch1); } static hash__ fromhex(const std::string &hexbuf) { hash__ res; assert(hexbuf.size()==SIZE*2); for(unsigned int i=0;i+1=3;i++){ snprintf(hexbuf,bufsize,"%02x",this->digest[i]); hexbuf += 2; bufsize -= 2; } return hexbuf_start; } std::string hexdigest() const { std::string ret; char buf[SIZE*2+1]; return std::string(hexdigest(buf,sizeof(buf))); } /** * Convert a hex representation to binary, and return * the number of bits converted. * @param binbuf output buffer * @param binbuf_size size of output buffer in bytes. * @param hex input buffer (in hex) * @return the number of converted bits. */ static int hex2bin(uint8_t *binbuf,size_t binbuf_size,const char *hex) { int bits = 0; while(hex[0] && hex[1] && binbuf_size>0){ *binbuf++ = hex2int(hex[0],hex[1]); hex += 2; bits += 8; binbuf_size -= 1; } return bits; } static const hash__ *new_from_hex(const char *hex) { hash__ *val = new hash__(); if(hex2bin(val->digest,sizeof(val->digest),hex)!=SIZE*8){ std::cerr << "invalid input " << hex << "(" << SIZE*8 << ")\n"; exit(1); } return val; } bool operator<(const hash__ &s2) const { /* Check the first byte manually as a performance hack */ if(this->digest[0] < s2.digest[0]) return true; if(this->digest[0] > s2.digest[0]) return false; return memcmp(this->digest,s2.digest, SIZE) < 0; } bool operator==(const hash__ &s2) const { if(this->digest[0] != s2.digest[0]) return false; return memcmp(this->digest,s2.digest, SIZE) == 0; } friend std::ostream& operator<<(std::ostream& os,const hash__ &s2) { os << s2.hexdigest(); return os; } }; typedef hash__ md5_t; typedef hash__ sha1_t; typedef hash__ sha256_t; #ifdef HAVE_EVP_SHA512 typedef hash__ sha512_t; #endif template class hash_generator__ { /* generates the hash */ EVP_MD_CTX mdctx; /* the context for computing the value */ bool initialized; /* has the context been initialized? */ bool finalized; /* Static function to determine if something is zero */ static bool iszero(const uint8_t *buf,size_t bufsize){ for(unsigned int i=0;i final() { if(finalized){ std::cerr << "currently friendly_geneator does not cache the final value\n"; assert(0); /* code below will never be executed after assert(0) */ } if(!initialized){ init(); /* do it now! */ } hash__ val; unsigned int len = sizeof(val.digest); EVP_DigestFinal(&mdctx,val.digest,&len); finalized = true; return val; } /** Compute a sha1 from a buffer and return the hash */ static hash__ hash_buf(const uint8_t *buf,size_t bufsize){ /* First time through find the SHA1 of 512 NULLs */ hash_generator__ g; g.update(buf,bufsize); return g.final(); } #ifdef HAVE_MMAP /** Static method allocateor */ static hash__ hash_file(const char *fname){ int fd = open(fname,O_RDONLY #ifdef O_BINARY |O_BINARY #endif ); if(fd<0) throw fname; struct stat st; if(fstat(fd,&st)<0){ close(fd); throw fname; } const uint8_t *buf = (const uint8_t *)mmap(0,st.st_size,PROT_READ,MAP_FILE|MAP_SHARED,fd,0); if(buf==0){ close(fd); throw fname; } hash__ s = hash_buf(buf,st.st_size); munmap((void *)buf,st.st_size); close(fd); return s; } #endif }; typedef hash_generator__ md5_generator; typedef hash_generator__ sha1_generator; typedef hash_generator__ sha256_generator; #ifdef HAVE_EVP_SHA512 typedef hash_generator__ sha512_generator; #define HAVE_SHA512_T #endif #endif tcpflow/src/dfxml/src/dfxml_reader.h0000644000175000017500000001235512263701334016446 0ustar dimadima#ifndef _DFXML_READER_H_ #define _DFXML_READER_H_ /** ** NOTE: ** THIS IS NOT A COMPLETE IMPLEMENTATION. ** This is a skeletal implementation of a DFXML reader to solve an immediate problem. ** For a full implementation, please see ../python/dfxml.py ** ** If you want to add support for a specific DFXML tag, please add it ** and submit your patch as a pull request on github. **/ #include #include #include #include #include #include #if __cplusplus >= 201103L #include #endif #include #ifdef HAVE_EXPAT_H #include #else #error dfxml_reader.h requires expat.h #endif #ifdef HAVE_MD5_H #include "md5.h" #endif namespace dfxml { class saxobject { public: typedef std::map hashmap_t; typedef std::map tagmap_t; virtual ~saxobject(){} saxobject():hashdigest(),_tags(){} saxobject(const saxobject &that):hashdigest(that.hashdigest),_tags(that._tags){} hashmap_t hashdigest; // any object can have hashes tagmap_t _tags; // any object can tags }; std::ostream & operator <<(std::ostream &os,const dfxml::saxobject::hashmap_t &h); class no_hash:public std::exception { virtual const char *what() const throw() { return "requested hash not found"; } }; class byte_run:public saxobject { public: virtual ~byte_run(){}; byte_run():saxobject(),img_offset(0),file_offset(0),len(0),sector_size(0){} byte_run(const byte_run &that):saxobject(that), img_offset(that.img_offset), file_offset(that.file_offset), len(that.len), sector_size(that.sector_size){} int64_t img_offset; int64_t file_offset; int64_t len; int64_t sector_size; #ifdef HAVE_MD5_H md5_t md5() const { hashmap_t::const_iterator it = hashdigest.find("md5"); if(it==hashdigest.end()) std::cout << "end found\n"; if(it!=hashdigest.end()) std::cout << it-> first << "=" /* << it->second */ << "\n"; if(it!=hashdigest.end()) return md5_t::fromhex(it->second); throw new no_hash(); } #endif }; std::ostream & operator <<(std::ostream &os,const byte_run &b); class imageobject_sax:public saxobject { public: virtual ~imageobject_sax(){}; }; class volumeobject_sax:public saxobject { public:; volumeobject_sax():saxobject(),block_size(),image(){} uint64_t block_size; imageobject_sax image; }; class file_object:public saxobject { public:; file_object():saxobject(),volumeobject(0),byte_runs() { }; file_object(const file_object &that):saxobject(that),volumeobject(that.volumeobject), byte_runs(that.byte_runs) { }; const file_object &operator=(const file_object &fo){ this->hashdigest = fo.hashdigest; this->_tags = fo._tags; this->volumeobject = fo.volumeobject; this->byte_runs = fo.byte_runs; return *this; } typedef std::vector byte_runs_t; volumeobject_sax *volumeobject; byte_runs_t byte_runs; std::string filename(){return _tags["filename"];} #ifdef HAVE_MD5_H md5_t md5() const { std::map::const_iterator it = hashdigest.find("md5"); if(it!=hashdigest.end()) return md5_t::fromhex(it->second); throw new no_hash(); } #endif }; #if __cplusplus >= 201103L typedef std::function fileobject_callback_t; #else typedef void (*fileobject_callback_t)(file_object &); #endif class dfxml_reader { public: dfxml_reader():tagstack(),cdata(){} virtual ~dfxml_reader(){} static std::string getattrs(const char **attrs,const std::string &name); static uint64_t getattri(const char **attrs,const std::string &name); std::stack tagstack; std::stringstream cdata; }; class file_object_reader:public dfxml_reader{ private: /*** neither copying nor assignment is implemented *** *** We do this by making them private constructors that throw exceptions. ***/ file_object_reader(const file_object_reader &); file_object_reader &operator=(const file_object_reader&); public:; static void startElement(void *userData, const char *name_, const char **attrs); static void endElement(void *userData, const char *name_); static void read_dfxml(const std::string &fname,fileobject_callback_t process); static void characterDataHandler(void *userData,const XML_Char *s,int len); virtual ~file_object_reader(){}; file_object_reader(): dfxml_reader(),volumeobject(),fileobject(),callback(),hashdigest_type(){} volumeobject_sax *volumeobject; file_object *fileobject; // the object currently being read fileobject_callback_t callback; std::string hashdigest_type; }; }; #endif tcpflow/src/dfxml/src/dfxml_reader.cpp0000644000175000017500000001242212263701334016774 0ustar dimadima#include #include #include #include #include #include #include #include #include /* We need netinet/in.h or windowsx.h */ #ifdef HAVE_NETINET_IN_H # include #endif #ifdef WIN32 # include # include # include #endif #include #include #include "dfxml_reader.h" std::ostream & operator <<(std::ostream &os,const dfxml::byte_run &b) { os << "byte_run["; if(b.img_offset) os << "img_offset=" << b.img_offset << ";"; if(b.file_offset) os << "file_offset=" << b.file_offset << ";"; if(b.len) os << "len=" << b.len << ";"; if(b.sector_size) os << "sector_size=" << b.sector_size << ";"; //os << b.hashdigest; os << "]"; return os; }; std::ostream & operator <<(std::ostream &os,const dfxml::saxobject::hashmap_t &h) { for(dfxml::saxobject::hashmap_t::const_iterator it = h.begin(); it!=h.end(); it++){ os << it->first << ":" << it->second << " "; } return os; } std::string dfxml::dfxml_reader::getattrs(const char **attrs,const std::string &name) { for(int i=0;attrs[i];i+=2){ if(name==attrs[i]) return std::string(attrs[i+1]); } return std::string(""); } static uint64_t atoi64(const char *str) { std::stringstream ss; ss << str; uint64_t val; ss >> val; return val; } uint64_t dfxml::dfxml_reader::getattri(const char **attrs,const std::string &name) { std::stringstream ss; for(int i=0;attrs[i];i+=2){ if(name==attrs[i]){ ss << attrs[i+1]; uint64_t val; ss >> val; return val; } } return 0; } void dfxml::file_object_reader::startElement(void *userData, const char *name_, const char **attrs) { class file_object_reader &self = *(file_object_reader *)userData; std::string name(name_); self.cdata.str(""); self.tagstack.push(name); if(name=="volume"){ self.volumeobject = new volumeobject_sax(); self.volumeobject->block_size = 512; // default } if(name=="block_size"){ /* pass */ } if(name=="fileobject"){ self.fileobject = new file_object(); self.fileobject->volumeobject = self.volumeobject; return; } if(name=="hashdigest"){ self.hashdigest_type = getattrs(attrs,"type"); return; } if(self.fileobject && (name=="run" || name=="byte_run")){ byte_run run; for(int i=0;attrs[i];i+=2){ if(run.img_offset==0 && !strcmp(attrs[i],"img_offset")){run.img_offset = atoi64(attrs[i+1]);continue;} if(run.file_offset==0 && !strcmp(attrs[i],"file_offset")){run.file_offset = atoi64(attrs[i+1]);continue;} if(run.len==0 && !strcmp(attrs[i],"len")){run.len = atoi64(attrs[i+1]);continue;} if(run.sector_size==0 && !strcmp(attrs[i],"sector_size")){run.sector_size = atoi64(attrs[i+1]);continue;} } self.fileobject->byte_runs.push_back(run); // is there a more efficient way to do this? } } void dfxml::file_object_reader::endElement(void *userData, const char *name_) { std::string name(name_); file_object_reader &self = *(file_object_reader *)userData; if(self.tagstack.top() != name){ std::cout << "close tag '" << name << "' found; '" << self.tagstack.top() << "' expected.\n"; exit(1); } self.tagstack.pop(); std::string cdata = self.cdata.str(); self.cdata.str(""); if(name=="volume"){ self.volumeobject = 0; return; } if(name=="block_size" && self.tagstack.size()>1){ if(self.tagstack.top()=="volume"){ self.volumeobject->block_size = atoi(cdata.c_str()); } return; } if(name=="fileobject"){ self.callback(*self.fileobject); delete self.fileobject; self.fileobject = 0; return; } if(name=="hashdigest" and self.tagstack.size()>0){ std::string alg = self.hashdigest_type; std::transform(alg.begin(), alg.end(), alg.begin(), ::tolower); if(self.tagstack.top()=="byte_run"){ self.fileobject->byte_runs.back().hashdigest[alg] = cdata; } if(self.tagstack.top()=="fileobject"){ self.fileobject->hashdigest[alg] = cdata; } return; } if(self.fileobject){ self.fileobject->_tags[name] = cdata; return; } } /** * Remember - 's' is NOT null-terminated */ void dfxml::file_object_reader::characterDataHandler(void *userData,const XML_Char *s,int len) { class file_object_reader &self = *(file_object_reader *)userData; self.cdata.write(s,len); } void dfxml::file_object_reader::read_dfxml(const std::string &fname,fileobject_callback_t process) { file_object_reader r; r.callback = process; XML_Parser parser = XML_ParserCreate(NULL); XML_SetUserData(parser, &r); XML_SetElementHandler(parser, startElement, endElement); XML_SetCharacterDataHandler(parser,characterDataHandler); std::fstream in(fname.c_str()); if(!in.is_open()){ std::cout << "Cannot open " << fname << ": " << strerror(errno) << "\n"; exit(1); } try { std::string line; while(getline(in,line)){ if (!XML_Parse(parser, line.c_str(), line.size(), 0)) { std::cout << "XML Error: " << XML_ErrorString(XML_GetErrorCode(parser)) << " at line " << XML_GetCurrentLineNumber(parser) << "\n"; XML_ParserFree(parser); return; } } XML_Parse(parser, "", 0, 1); } catch (const std::exception &e) { std::cout << "ERROR: " << e.what() << "\n"; } XML_ParserFree(parser); } tcpflow/src/dfxml/.gitignore0000644000175000017500000000046412263701334015040 0ustar dimadima*.o *.pyc *~ .deps .dirstamp __pycache__ src/.deps src/Makefile src/Makefile.in src/aclocal.m4 src/autom4te.cache src/config.log src/config.status src/configure src/dfxml_demo src/install-sh src/missing .DS_Store _deps src/config.h src/config.h.in src/iblkfind.cpp stamp-h stamp-h1 src/compile src/iblkfind tcpflow/src/dfxml/README.txt0000644000175000017500000000061412263701334014543 0ustar dimadimaDFXML tools python/ - tools in Python src/ - tools in C ================================================================ To get back on master: Summary: $ git checkout -b newbranch $ git checkout master $ git merge newbranch $ git branch -d newbranch or: $ git checkout -b tmp ; git checkout master ; git merge tmp ; git branch -d tmp ; git push git@github.com:simsong/dfxml.git master tcpflow/src/dfxml/dtd/0000755000175000017500000000000012263701334013617 5ustar dimadimatcpflow/src/dfxml/dtd/dfxml.dtd0000644000175000017500000000302712263701334015430 0ustar dimadima tcpflow/src/dfxml/samples/0000755000175000017500000000000012263701334014510 5ustar dimadimatcpflow/src/dfxml/samples/piecwise.xml0000644000175000017500000006135012263701334017047 0ustar dimadima Hash List MD5DEEP 4.0.0_beta2-002 GCC 4.2 Darwin 11.3.0 Darwin Kernel Version 11.3.0: Thu Jan 12 18:47:41 PST 2012; root:xnu-1699.24.23~1/RELEASE_X86_64 Mucha.local x86_64 md5deep -dp512 /Users/simsong/uploads/einstein template.jpg /Users/simsong/uploads/image1.jpg /Users/simsong/uploads/image2.jpg /Users/simsong/uploads/image3.jpg 502 2012-02-23T16:35:11Z /Users/simsong/uploads/image2.jpg 12833 2012-02-22T03:53:05Z 2012-02-22T03:53:05Z 2012-02-23T16:34:27Z 8f5457a55452412da5c0e3743931e4f8 d41f71e039466d3f833dbcae949e875b 1ab3f69fca529b16d42f6a56bfa7c5d7 6d975bf907bc62679aef3bcb8669b142 5c735c1953d2c27aa8d485fb48b3d463 84e25e9988b612d7f00824f524ac0303 0cc56da9c790d99fe87921bc71ee0309 6e6621606e1eec456abd497a77c0cdc5 1e6dcaf4b23609cb076782eeebbc42dc 865da6c577c4c72b8fc395f719215f54 5feff808bdda6251963f1c01d8c4a4b8 c62d567a7251dd03f93febda2a966694 b24d94d636603204346cfde95b2b5d5b 8f973423e2ef8cde2b4a7790504df22d 56aeb00814bad806ea95684721b0b5c3 af12f6dab2a4db0b5ceb55009acf152b 22dcb00c48e7e8b1b405e743fa55a5d0 1e5f8c292e80e8e0d1572aa6e988f7f5 3ff02b0308b64fafc2eeae3e244a1d75 7607030a32f98a64b066abb217fa8373 55f663ded85a12a79486c5f6a90c3c73 914437574989cf7428ffcc85f7b87da2 daadacaf1b1994048f0549122c156508 31c68bc45ee0f211e7c0e137df6b3a8a fcba36173dfd95558f8bc6794b6db855 1fe1dfe13928706ca0eb8cbcff7273fa d7ced55e7d7f5b9995fc3cbac7942155 /Users/simsong/uploads/image1.jpg 12551 2012-02-22T03:53:54Z 2012-02-22T03:53:54Z 2012-02-23T16:34:27Z 8f5457a55452412da5c0e3743931e4f8 d41f71e039466d3f833dbcae949e875b b8747777489e64a617f87d7406de9e93 e4d385a2e6347882f7ba929d6c74e0aa 597d96ca2fe3e0427e21ac4710a8655f 61a076c61bfd6e1e814d62caf893ec5a e2d090f45a71c4bdaba1e9d6e7df115a e7bc72cb232f5eaca2afcf1dea4c37e5 0d551551fba5b675abc58da2c9fa1429 5fd32b04cefeae0c0cebbdb83371a108 5c51be0daa2c67899d79ddd0a88d5285 0c4d539f4842bece13776eee4866948b a22091f6c0b78fd26bd3d16c67ab8035 15dca6095762849bdcf609dc8e2a320f 4a0dbf591af1fb53299360682d659692 1a6765b54b02d87bd8c4a7266487e4b5 91cc0febcbe59b6089d0521d30bd8e17 9ada586e87ae2f6309f5f153f50cf627 8df49a9764c544784ee630be2a8a7333 2d6cd302516c4af6708ece430b4c266a 66ccb00ae94d47d799bbe34f747e55fe bc328a637e51ff2fa04257c3e6a2ecef 12e40f90833fcfefb28eabf4a7d1b1a8 6ca38e3efe21d4cc0242c7ef646148e0 d2c2f35cb5f851a3fb811a322bae96f7 3bb144b5abc65312099f79caa69ff94f /Users/simsong/uploads/image3.jpg 12545 2012-02-22T03:55:38Z 2012-02-22T03:55:38Z 2012-02-23T16:34:27Z 8f5457a55452412da5c0e3743931e4f8 d41f71e039466d3f833dbcae949e875b ee6bf406ce8b5c7baf7c496212989181 8ffc08745c1967c5631ed3d55289aa0d 18b7b458335d91b82f2042833af06d5a 2e62d373ff99fd28c8668471dd78521a 60aa78cc415622ddec26030195f500df c89b7029c9ddc07fb848dd14856fd8a5 ce5be4f9c8a87c41cf68ddf5103486d4 b0cd77411a7d3496b0d9757196908396 7f4cc6ae1f01ffb9d479ce81d49e12f3 5e8265d60a30517ccc11e655d0af51e5 7e877d6429be5b3e2ffba13d9be2559d 76806e3529e1c318ab37494be5b3e9af 17ef0a326e1f75cbe473bd565364ca09 8e9b587bdc0ac6ea8fca730788dc6301 88b8ba8a644d928ab39068640167a7f3 6ece190b2874acbf9cd08409cb366589 a4a50852a623b3f308f74e499836fe1b 7c6e2f22ef13c6c687c65100a74c2e7d b9022315f72980a757db674001f58181 3c75c4b9f44911519c22bb074abfbf43 a6832c15e1b3ec2666130ecbd1e1657d f7f4fd0c5fd28e2e616ac8f6f8e454dc cf7b3ea00544ca5095828b4b33103185 6377d89ab3165a3fe24b390b513f47d7 /Users/simsong/uploads/einstein template.jpg 43819 2012-02-22T03:54:19Z 2012-02-22T03:54:19Z 2012-02-23T16:34:27Z 805adb2ce40f70642feb6cf77af45d08 17f4ac184dcc2f68afbf045d76c9f64d 6432b81d343ffe58c91d6cb3db92745f c9d3de107041dfac52df904477b10583 8d8670310c013b29903bcc2450032d68 80087bc68f197fa15d9ef5ee47b404ae 3bcc414cf80688e7b385c23b9f4fd362 7e646a8507ad94488ffe1ec4f23c0677 95bd70304069018de9cf826dd1454bc7 0a2145574510093200ee27b9a002eefe afdf02515788461f906c5a4240cc6303 59a1d49826f73fe4092fda670b10e0b9 33eb025d127bb039da9b02d5a83ef2ff ff0ce810556993a0eabb4cca4b8e9475 82b66873a9271abe064b7448dcef93a0 f8f2c9f1df189d2c640227f094788256 1177e0b4b32943afd2c3bb45e7ed8f8b d9c2aa92d9367f87314094bdb10b8a64 1b95c871b8f38708fab8963a44bfa77d fa92b66db919aba7483365f586b2e78d 59f98ea3246a1e9ccdc73bb6e417cca8 23168943c823932a15bfad756eaa9f9c 7b27c8da4c409caccef4876332571f22 823163a07a2c0b07dee5be5d5bc1f390 267750824bcf6e58a4b2d95e6320925b 5864230a443c435cbc67b0ac30c3786e df477fffc448455e021ce430fcdf6cf3 9fd16fefb21fb532cb292b46baaca83d 60863de0392d367ff1cfb89c3e7a3c2f a9adae01e3176988bf81b8a63c3e89ad e85daee152610db6fbbdcc33c68f80ab 7d04cba5cc2ec31c4e150e0d246bc2ce 1557ec786789858e9043fbada6c74c18 481128961232534124a37ed4b7410fca f5652b39f797fd5f14f96a718ea2be06 e5b3c8151a72a3c80c4c0d0237296b05 c318db58c8800240dc01eeb6ca88ee08 03a05b57e0ee933d0c5dab1e85635285 9e241a3d824d618f8a16d2b5a6becdf2 81a12fc4873147fc4e255e23e6f31ceb 64f592d494f7d77335c6b70ad7947b32 0d0a22e39617a03ae24544ed166c15be d67687e75df0c8731a4b13da16c74455 df978b32e2d6c9996239e4f12ed6e946 0f772dedec2ebe2d3f2253c93ee232f0 43fb50689bd20d4d57510b28005308e3 86ed5f9ddd3e45e57403b31a8acc6ea1 76ed7d3f0a4ba886b86e113e35b2ac7a b57e02328c51ba78d15efcc02a1371af 1da744e63da4723392a661015a097fb1 ab83cc66a087c1afef0ad3f81639d895 059eb208392a17184853ca42a1d49325 d417626e9ad8e62231213417c8ac3a45 8d2e0cdb54a910f7f89498f2cdb530f0 68ed517bb9c3cd14afb01443aebab809 18363a8a531dcff8e11a333155cddd17 0a93b51e3f1b530eb1937b755b60d04e 4cc4795dcf39a34149775dd8b73a8fd4 41679e49e3706537c13e8b7ee2f040e4 699eb5a75501ca8bb160545e985af3a7 f57793eecf88eb8da4515aa6f1bebadb 9e34c82aeb4529ccd0121bf8d9d0f983 292cd63f522ba4184894d8a889e8ef5f d3a730ff16da36ca2d08b3d622da1306 87bb953c0970d05ed97bb61545f20b37 61d8164199c6b75143ba69d9d906dafd f1b9e3bb076bfc9b42fe997cdab24cc5 0d7e55abe3c7e837d07d5ed920de3a21 2444ce723591b1149519014e6976b41d 5fd6d9f2e445cd0cce09b4be2e1e63d4 685ec79dbcdf235b903ec065e3c9e692 c56b0931a4a74a96c531f59ee9d0f27a cf8c0ab5655703b95afe45635dbcb82c a61bd292840c4fb8e67af9e522751a7c 08c78d03701f6143f01da5ef3c6471e5 c273ea1a01d9638c5138612e6eadc4f0 0e946f4b026b60f0c74e045358f61b03 04d5827006513bc1dd75aa5b34d4af58 3b84aa6f8137cd998c408b1ceb849da6 63d613c6dd4905f174643f2614bb5662 6e387f8a636158145bcb69fdc5e7ad60 e27768f23451051d17d408879f7f81f3 207f36bdad0d832819081452c3ee4258 1cfe021561c55b04811f76e170285531 95fd03086b45555d65503a16b2daa377 66f557662e4026bd97901d11d295dd6c 702da00183448a42f5a861c95973f4f3 0.008982 0.003041 1069056 391 0 0 0 0 0.006578 tcpflow/src/dfxml/samples/fileobjectexample.xml0000644000175000017500000000063712263701334020722 0ustar dimadima tcpflow/src/dfxml/samples/simple.xml0000644000175000017500000000552312263701334016530 0ustar dimadima Hash List MD5DEEP 4.0.0_beta2-002 GCC 4.2 Darwin 11.3.0 Darwin Kernel Version 11.3.0: Thu Jan 12 18:47:41 PST 2012; root:xnu-1699.24.23~1/RELEASE_X86_64 Mucha.local x86_64 md5deep -dp512 /Users/simsong/uploads/einstein template.jpg /Users/simsong/uploads/image1.jpg /Users/simsong/uploads/image2.jpg /Users/simsong/uploads/image3.jpg 502 2012-02-23T16:35:11Z /Users/simsong/uploads/image2.jpg 12833 2012-02-22T03:53:05Z 2012-02-22T03:53:05Z 2012-02-23T16:34:27Z d7ced55e7d7f5b9995fc3cbac7942155 /Users/simsong/uploads/image1.jpg 12551 2012-02-22T03:53:54Z 2012-02-22T03:53:54Z 2012-02-23T16:34:27Z 3bb144b5abc65312099f79caa69ff94f /Users/simsong/uploads/image3.jpg 12545 2012-02-22T03:55:38Z 2012-02-22T03:55:38Z 2012-02-23T16:34:27Z 6377d89ab3165a3fe24b390b513f47d7 /Users/simsong/uploads/einstein template.jpg 43819 2012-02-22T03:54:19Z 2012-02-22T03:54:19Z 2012-02-23T16:34:27Z 702da00183448a42f5a861c95973f4f3 0.008982 0.003041 1069056 391 0 0 0 0 0.006578 tcpflow/src/dfxml/samples/difference_test_0.xml0000644000175000017500000000406112263701334020603 0ustar dimadima Sample vi 7.3 vi pre.xml i_will_be_deleted.txt r 20 123456 2013-01-01T00:00:00Z 2013-01-01T00:00:00Z 2013-01-01T00:00:00Z 3d90d31043ad6158de35eacc7ac39069 2a5630f9e70b4931f6b6fc3d9d7f483007012352 i_will_be_modified.txt r 22 123457 2013-01-01T00:00:00Z 2013-01-01T00:00:00Z 2013-01-01T00:00:00Z 7ac390693d90d31043ad6158de35eacc f4830070123522a5630f9e70b4931f6b6fc3d9d7 i_will_be_accessed.txt r 12 123458 2013-01-01T00:00:00Z 2013-01-01T00:00:00Z 2013-01-01T00:00:00Z e35eacc7ac390693d90d31043ad6158d 31f6b6fc3d9d7f4830070123522a5630f9e70b49 tcpflow/src/dfxml/samples/difference_test_1.xml0000644000175000017500000000405012263701334020602 0ustar dimadima Sample vi 7.3 vi post.xml i_am_new.txt r 40 123459 2013-05-16T21:01:00Z 2013-05-16T21:01:00Z 2013-05-16T21:01:00Z 158de35eacc7ac390693d90d31043ad6 b6fc3d9d7f4830070123522a5630f9e70b4931f6 i_will_be_modified.txt r 23 123457 2013-05-16T20:59:00Z 2013-05-16T20:59:00Z 2013-05-16T20:59:00Z d90d31043ad6158de35eacc7ac390693 070123522a5630f9e70b4931f6b6fc3d9d7f4830 i_will_be_accessed.txt r 12 123458 2013-01-01T00:00:00Z 2013-01-01T00:00:00Z 2013-05-16T21:00:00Z e35eacc7ac390693d90d31043ad6158d 31f6b6fc3d9d7f4830070123522a5630f9e70b49 tcpflow/src/iptree.h0000644000175000017500000005232212263701151013374 0ustar dimadima/* * iptree.h: * * Maintains a count of all IP addresses seen, with limits on the * maximum amount of memory. * * #include this file after config.h (or whatever you are calling it) */ #ifndef IPTREE_H #define IPTREE_H #include #include #include #include #include #ifdef HAVE_ARPA_INET_H #include #endif #define IP4_ADDR_LEN 4 #define IP6_ADDR_LEN 16 /** * the iptree. * * pruning a node means cutting off its leaves (the node remains in the tree). */ /* addrbytes is the number of bytes in the address */ template class iptreet { private:; /** * the node class. * Each node tracks the sum that it currently has and its two children. * A node has pointers to the 0 and 1 children, as well as a sum for everything below. * A short address or prefix being tallied may result in BOTH a sum and one or more PTR values. * If a node is pruned, ptr0=ptr1=0 and tsum>0. * If tsum>0 and ptr0=0 and ptr1=0, then the node cannot be extended. * Nodes need to know their parent so that nodes found through the cache can be made dirty, * which requires knowing their parents. * * Note: currently we get a slightly different answer when the pruning cache is enabled. * Not sure why. It's probably not worth fixing at the moment. */ class node { /** best describes the best node to prune */ public: class best { public: best &operator=(const best &that){ ptr = that.ptr; depth = that.depth; return *this; } const node *ptr; int depth; best():ptr(0),depth(-1){}; // don't use this one best(const node *ptr_,int depth_): ptr(ptr_),depth(depth_){} best(const best &b):ptr(b.ptr),depth(b.depth){ } virtual ~best(){} friend std::ostream & operator<<(std::ostream &os,best const & foo) { os << "node=" << foo.ptr << " depth=" << foo.depth << " "; return os; } }; private: /* Assignment and copy are not implemented */ node &operator=(const iptreet::node &that); node(const node &n); public: class node *parent; class node *ptr0; // 0 bit next class node *ptr1; // 1 bit next private: public:; TYPE tsum; // this node and pruned children. /* Caching system */ mutable bool dirty; // add() has been called and cached data is no longer valid mutable best cached_best; public: node(node *p):parent(p),ptr0(0),ptr1(0),tsum(),dirty(false),cached_best(){ } int children() const {return (ptr0 ? 1 : 0) + (ptr1 ? 1 : 0);} ~node(){ if(ptr0){ delete ptr0; ptr0 = 0; } if(ptr1){ delete ptr1; ptr1 = 0; } }; // a node is leaf if tsum>0 and both ptrs are 0. bool isLeaf() const { if(tsum>0 && ptr0==0 && ptr1==0) return true; return false; } /** * prune(): * Cut this node's children off the tree. * Returns the number removed, which should be larger than 0 (or we shouldn't have been called). */ int prune(class iptreet &tree){ // prune this node //std::cerr << "prune " << this << " ptr0= " << ptr0 << " ptr1=" << ptr1 << " parent= " << parent << "\n"; /* If prune() on a node is called, then both ptr0 and ptr1 nodes, if present, * must not have children. * Now delete those that we counted out */ int removed = 0; if(ptr0){ assert(ptr0->isLeaf()); // only prune leaf nodes tsum += ptr0->tsum; tree.cache_remove(ptr0); // remove it from the cache tree.pruned++; delete ptr0; ptr0=0; tree.nodes--; removed++; } if(ptr1){ assert(ptr1->isLeaf()); tsum += ptr1->tsum; tree.cache_remove(ptr1); tree.pruned++; delete ptr1; ptr1=0; tree.nodes--; removed++; } assert(removed>0); assert(isLeaf()); // I am now a leaf! set_dirty(); // should be able to just set parent //std::cerr << " parent dirty=" << parent->dirty << " this=" << this << " isLeaf()=" << isLeaf() << "\n"; //if(parent){ //parent->dirty=true; // parent is dirty, but no need to propigate it up //} return removed; } /** * Return the best node to prune (the node with the leaves to remove) * Possible outputs: * case 1 - no node (if this is a leaf node, it can't be pruned; should not have been called) * case 2 - this node (if all of the children are leaf) * case 3 - the best node of the one child (if there is only one child) * case 4 - the of the non-leaf child (if one child is leaf and one is not) * case 5 - the better node of each child's best node. */ class best best_to_prune(int my_depth) const { if(dirty==false && cached_best.ptr){ //return cached_best; // haven't changed, so return } dirty = false; // we will be cleaning // case 1 - this is a leaf; it was an error to call best_to_prune assert(isLeaf()==0); // case 2 - our only children are leaves; this is the best node if ((ptr0==0 || ptr0->isLeaf()) && (ptr1==0 || ptr1->isLeaf())){ return cached_best = best(this,my_depth); // case 2 } // case 3 - one of our children is a node and not a leaf, // - and the other is a child or not present. // - The best to prune is the child's best if ((ptr0==0 || ptr0->isLeaf()) && (ptr1!=0 && !ptr1->isLeaf())){ return cached_best = ptr1->best_to_prune(my_depth+1); // case 3 } if ((ptr1==0 || ptr1->isLeaf()) && (ptr0!=0 && !ptr0->isLeaf())){ return cached_best = ptr0->best_to_prune(my_depth+1); // case 3 } // case 5 - the better node of each child's best node. best ptr0_best = ptr0->best_to_prune(my_depth+1); best ptr1_best = ptr1->best_to_prune(my_depth+1); // The better to prune of two children is the one with a lower sum, // or the one that is deeper if they have the same sum. TYPE ptr0_best_sum = ptr0_best.ptr->sum(); TYPE ptr1_best_sum = ptr1_best.ptr->sum(); if(ptr0_best_sum < ptr1_best_sum || (ptr0_best_sum == ptr1_best_sum && ptr0_best.depth > ptr1_best.depth)){ return cached_best = ptr0_best; } return cached_best = ptr1_best; } /** The nodesum is the sum of just the node. * This exists purely because tsum is a private variable. */ TYPE nodesum() const { return tsum; } /** The sum is the sum of this node and its children (if they exist) */ TYPE sum() const { TYPE s = tsum; if(ptr0) s+=ptr0->sum(); if(ptr1) s+=ptr1->sum(); return s; } /** Increment this node by the given amount */ void add(TYPE val) { tsum+=val; // increment set_dirty(); } void set_dirty() { // make us dirty and our parent dirty if(dirty==false){ dirty = true; if(parent && parent->dirty==false){ parent->set_dirty(); // recurses to the root or the first dirty node. } } } }; /* end of node class */ class node *root; enum {root_depth=0, max_histogram_depth=128, ipv4_bits=32, ipv6_bits=128, }; iptreet &operator=(const iptreet &that); // not implemented protected: size_t nodes; // nodes in tree size_t maxnodes; // how many will we tolerate? uint64_t ctr_added; // how many were added uint64_t pruned; public: /**************************************************************** *** static member service routines ****************************************************************/ /* get the ith bit; 0 is the MSB */ static bool bit(const uint8_t *addr,size_t i){ return (addr[i / 8]) & (1<<((7-i)&7)); } /* set the ith bit to 1 */ static void setbit(uint8_t *addr,size_t i){ addr[i / 8] |= (1<<((7-i)&7)); } virtual ~iptreet(){} // required per compiler warnings /* copy is a deep copy */ iptreet(const iptreet &n):root(n.root ? new node(*n.root) : 0), nodes(n.nodes),maxnodes(n.maxnodes),ctr_added(),pruned(),cache(),cachenext(),cache_hits(),cache_misses(){}; /* create an empty tree */ iptreet(int maxnodes_):root(new node(0)),nodes(0),maxnodes(maxnodes_), ctr_added(),pruned(),cache(),cachenext(),cache_hits(),cache_misses(){ for(size_t i=0;isum();}; /* add a node; implementation below */ void add(const uint8_t *addr,size_t addrlen,TYPE val); /**************************************************************** *** cache ****************************************************************/ class cache_element { public: uint8_t addr[ADDRBYTES]; node *ptr; // 0 means cache entry is not in use cache_element(const uint8_t addr_[ADDRBYTES],size_t addrlen,node *p):addr(),ptr(p){ memcpy(addr,addr_,addrlen); } }; enum {cache_size=4}; typedef std::vector cache_t; cache_t cache; size_t cachenext; // which cache element to evict next uint64_t cache_hits; uint64_t cache_misses; void cache_remove(const node *p){ for(size_t i=0;i=cache.size()) cachenext = 0; memcpy(cache[cachenext].addr,addr,addrlen); cache[cachenext].ptr = ptr; } /**************************************************************** *** pruning ****************************************************************/ /* prune the tree, starting at the root. Find the node to prune and then prune it. * node that best_to_prune() returns a const pointer. But we want to modify it, so we * do a const_cast (which is completely fine). */ int prune_best_node(){ if(root->isLeaf()) return 0; // leaf nodes can't be pruned class node::best b = root->best_to_prune(root_depth); node *tnode = const_cast(b.ptr); if(tnode){ return tnode->prune(*this); } return 0; } /* Simple implementation to prune the table if over the limit. */ void prune_if_needed(){ while(nodes > maxnodes){ if(prune_best_node()==0) return; // cannot prune } } /**************************************************************** *** historam support ****************************************************************/ class addr_elem { public: addr_elem(const uint8_t *addr_,uint8_t depth_,int64_t count_): addr(),depth(depth_),count(count_){ memcpy((void *)addr,addr_,sizeof(addr)); } addr_elem() : addr(), depth(0), count(0) { memset((void *) addr, 0x00, sizeof(addr)); } addr_elem &operator=(const addr_elem &n){ memcpy((void *)this->addr,n.addr,sizeof(this->addr)); this->count = n.count; this->depth = n.depth; return *this; } virtual ~addr_elem(){} const uint8_t addr[ADDRBYTES]; // maximum size address; v4 addresses have addr[4..15]=0 uint8_t depth; // in bits; /depth TYPE count; bool is4() const { return isipv4(addr,ADDRBYTES);}; std::string str() const { return ipstr(addr,ADDRBYTES,depth); } }; /** get a histogram of the tree, and starting at a particular node * The histogram is reported for every node that has a sum. * This is leaf nodes and inleafediate nodes. * This means that there must be a way for converting TYPE(count) to a boolean. * * @param depth - tracks current depth (in bits) into address. * @param ptr - the node currently being queried * @param histogram - where the histogram is written */ typedef std::vector histogram_t; void get_histogram(int depth,const uint8_t *addr,const class node *ptr,histogram_t &histogram) const{ if(ptr->nodesum()){ histogram.push_back(addr_elem(addr,depth,ptr->nodesum())); //return; } if(depth>max_histogram_depth) return; // can't go deeper than this now /* create address with 0 and 1 added */ uint8_t addr0[ADDRBYTES]; uint8_t addr1[ADDRBYTES]; memset(addr0,0,sizeof(addr0)); memcpy(addr0,addr,(depth+7)/8); memset(addr1,0,sizeof(addr1)); memcpy(addr1,addr,(depth+7)/8); setbit(addr1,depth); if(ptr->ptr0) get_histogram(depth+1,addr0,ptr->ptr0,histogram); if(ptr->ptr1) get_histogram(depth+1,addr1,ptr->ptr1,histogram); } void get_histogram(histogram_t &histogram) const { // adds the histogram to the passed in vector uint8_t addr[ADDRBYTES]; memset(addr,0,sizeof(addr)); get_histogram(0,addr,root,histogram); } /**************************************************************** *** output routines ****************************************************************/ // returns true if addr[4..15]==0 static std::string itos(int n){ char buf[64]; snprintf(buf,sizeof(buf),"%d",n); return std::string(buf); } static bool isipv4(const uint8_t *addr,size_t addrlen) { if(addrlen==4) return true; for(u_int i=4;i void iptreet::add(const uint8_t *addr,size_t addrlen,TYPE val) { prune_if_needed(); if(addrlen > ADDRBYTES) addrlen=ADDRBYTES; u_int addr_bits = addrlen * 8; // in bits /* check the cache first */ ssize_t i = cache_search(addr,addrlen); if(i>=0){ cache[i].ptr->add(val); return; } /* descend the radix tree until we run out of bits, or we have a node with no pointers and a non-zero sum. */ node *ptr = root; // start at the root for(u_int depth=0;depth<=addr_bits;depth++){ if(depth==addr_bits){ // reached end of address ptr->add(val); // increment this node (and all of its descendants cache_replace(addr,addrlen,ptr); return; } if((ptr->tsum > 0) && (ptr->ptr0==0) && (ptr->ptr1==0)){ ptr->add(val); cache_replace(addr,addrlen,ptr); return; } /* Not a leaf node, so go down a level based on the next bit, * extending if necessary. */ if(bit(addr,depth)==0){ if(ptr->ptr0==0){ ptr->ptr0 = new node(ptr); nodes++; ctr_added++; } ptr = ptr->ptr0; } else { if(ptr->ptr1==0){ ptr->ptr1 = new node(ptr); nodes++; ctr_added++; } ptr = ptr->ptr1; } } assert(0); // should never happen } /* a structure for a pair of IP addresses */ class ip2tree:public iptreet { public: /* de-interleave a pair of addresses */ static void un_pair(uint8_t *addr1,uint8_t *addr2,size_t addr12len,size_t *depth1,size_t *depth2,const uint8_t *addr,size_t addrlen,size_t depth){ for(size_t i=0;i::bit(addr,i*2)) iptreet::setbit(addr1,i); if(iptreet::bit(addr,i*2+1)) iptreet::setbit(addr2,i); } *depth1 = (depth+1)/2; *depth2 = (depth)/2; } ip2tree(int maxnodes_):iptreet(maxnodes_){} virtual ~ip2tree(){}; /* str requires more work */ static std::string ip2str(const uint8_t *addr,size_t addrlen,size_t depth){ uint8_t addr1[16];memset(addr1,0,sizeof(addr1)); uint8_t addr2[16];memset(addr2,0,sizeof(addr2)); size_t depth1=0,depth2=0; ip2tree::un_pair(addr1,addr2,sizeof(addr1),&depth1,&depth2,addr,addrlen,depth); return ipstr(addr1,sizeof(addr1),depth1) + " " + ipstr(addr2,sizeof(addr2),depth2); } /* 2tree needs its own dump because a different ipstr is called */ std::ostream & dump(std::ostream &os) const { histogram_t histogram; get_histogram(histogram); os << "nodes: " << nodes << " histogram size: " << histogram.size() << "\n"; for(size_t i=0;i::bit(addr1,i)) iptreet::setbit(addr,i*2); if(iptreet::bit(addr2,i)) iptreet::setbit(addr,i*2+1); } add(addr,addrlen*2,val); /* Add it */ } }; typedef iptreet iptree; // simple tree for counting; reimplement so val is tcount template std::ostream & operator <<(std::ostream &os,const iptreet &ipt) { return ipt.dump(os); } inline std::ostream & operator <<(std::ostream &os,const ip2tree &ipt) { return ipt.dump(os); } #endif tcpflow/configure.ac0000644000175000017500000004074712263701151013442 0ustar dimadima# -*- Autoconf -*- # tcpflow configure.ac # # Process this file with autoconf to produce a configure script. # Order is largely irrevellant, although it must start with AC_INIT and end with AC_OUTPUT # See http://autotoolset.sourceforge.net/tutorial.html # and http://www.openismus.com/documents/linux/automake/automake.shtml AC_PREREQ(2.57) AC_INIT(TCPFLOW, 1.4.4, bugs@afflib.org) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_FILES([Makefile src/Makefile tests/Makefile doc/Makefile]) AC_CONFIG_FILES([doc/tcpflow.1]) AC_CONFIG_FILES([tcpflow.spec]) AC_CONFIG_HEADER([config.h]) AM_INIT_AUTOMAKE AM_MAINTAINER_MODE AC_PREFIX_PROGRAM(tcpflow) dnl build for same location # Endian check is required for MD5 implementation AC_C_BIGENDIAN # Programs we will be using AC_PROG_CC AC_PROG_CXX AM_PROG_CC_C_O dnl allow per-product flags AC_PROG_INSTALL # use C++11 mode if available; HAVE_CXX11 is defined in config.h if so. Don't # use the GNU C++11 extensions for portability's sake (noext). AC_LANG_PUSH(C++) AX_CXX_COMPILE_STDCXX_11(noext, optional) AC_LANG_POP() ################################################################ ## See if we are running on mingw # http://osdir.com/ml/gnu.mingw.devel/2003-09/msg00040.html # Note: Windows 95 WINVER=0x400 # Windows 98 WINVER=0x400 _WIN32_WINDOWS=0x0410 # Windows Me WINVER=0x400 _WIN32_WINDOWS=0x0490 # Windows NT 4.0 WINVER=0x0400 _WIN32_WINNT=0x0400 # Windows NT 4.0 SP3 WINVER=0x0400 _WIN32_WINNT=0x0403 # Windows 2000 WINVER=0x500 _WIN32_WINNT=0x0500 # Windows XP WINVER=0x501 _WIN32_WINNT=0x0501 # Windows Server 2003 WINVER=0x502 _WIN32_WINNT=0x0502 # # mingw32 includes i686-w64-mingw32 and x86_64-w64-mingw32 mingw="no" case $host in *-*-*linux*-*) AC_DEFINE([__LINUX__],1,[Linux operating system functions]) ;; *-*-mingw32*) LIBS="-lpsapi -lws2_32 -lgdi32 $LIBS" CPPFLAGS="-DUNICODE -D_UNICODE -D__MSVCRT_VERSION__=0x0601 -DWINVER=0x0500 -D_WIN32_WINNT=0x0500 -g $CPPFLAGS" CPPFLAGS="$CPPFLAGS --static" CFLAGS="$CFLAGS --static -static-libgcc -static-libstdc++" CXXFLAGS="$CXXFLAGS -Wno-format " # compiler mingw-4.3.0 is broken on I64u formats CXXFLAGS="$CXXFLAGS --static -static-libgcc -static-libstdc++" LDFLAGS="$LDFLAGS --static" mingw="yes" ;; esac if test x"${mingw}" == "xno" ; then # Bring additional directories where things might be found into our # search path. I don't know why autoconf doesn't do this by default for spfx in /usr/local /opt/local /sw /usr/local/ssl /usr/boost/include ; do AC_MSG_NOTICE([checking ${spfx}/include]) if test -d ${spfx}/include; then CPPFLAGS="-I${spfx}/include $CPPFLAGS" LDFLAGS="-L${spfx}/lib $LDFLAGS" AC_MSG_NOTICE([ *** ADDING ${spfx}/include to CPPFLAGS *** ]) AC_MSG_NOTICE([ *** ADDING ${spfx}/lib to LDFLAGS *** ]) fi done AC_MSG_NOTICE([ CPPFLAGS = ${CPPFLAGS} ]) AC_MSG_NOTICE([ LDFLAGS = ${LDFLAGS} ]) fi if test -r /bin/uname.exe ; then if test "`uname -o`" == "Msys" ; then AC_MSG_NOTICE([Compiling with Msys. Setting flags appropriately.]) LIBS="$LIBS -lws2_32 -lgdi32" LDFLAGS="$LDFLAGS -Wl,--enable-auto-import" fi fi ################################################################ # # Enable all the compiler debugging we can find # # This is originally from PhotoRec, but modified substantially by Simson # Figure out which flags we can use with the compiler. # # These I don't like: # -Wdeclaration-after-statement -Wconversion # doesn't work: -Wunreachable-code # causes configure to crash on gcc-4.2.1: -Wsign-compare-Winline # causes warnings with unistd.h: -Wnested-externs # Just causes too much annoyance: -Wmissing-format-attribute # Check GCC WARNINGS_TO_TEST="-MD -D_FORTIFY_SOURCE=2 -Wpointer-arith -Wmissing-declarations -Wmissing-prototypes \ -Wshadow -Wwrite-strings -Wcast-align -Waggregate-return \ -Wbad-function-cast -Wcast-qual -Wundef -Wredundant-decls -Wdisabled-optimization \ -Wfloat-equal -Wmultichar -Wc++-compat -Wmissing-noreturn " if test $mingw = "no" ; then # add the warnings we do not want to do on mingw WARNINGS_TO_TEST="$WARNINGS_TO_TEST -Wall -Wstrict-prototypes" fi echo "Warnings to test: $WARNINGS_TO_TEST" for option in $WARNINGS_TO_TEST do SAVE_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $option" AC_MSG_CHECKING([whether gcc understands $option]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[]])], [has_option=yes], [has_option=no; CFLAGS="$SAVE_CFLAGS"]) AC_MSG_RESULT($has_option) unset has_option unset SAVE_CFLAGS if test $option = "-Wmissing-format-attribute" ; then AC_DEFINE(HAVE_MISSING_FORMAT_ATTRIBUTE_WARNING,1, [Indicates that we have the -Wmissing-format-attribute G++ warning]) fi done unset option # C++ # We don't use these warnings: # -Waggregate-return -- aggregate returns are GOOD; they simplify code design # We can use these warnings after ZLIB gets upgraded: # -Wundef --- causes problems with zlib # -Wcast-qual # -Wmissing-format-attribute --- Just too annoying AC_LANG_PUSH(C++) WARNINGS_TO_TEST="-Wall -MD -D_FORTIFY_SOURCE=2 -Wpointer-arith \ -Wshadow -Wwrite-strings -Wcast-align \ -Wredundant-decls -Wdisabled-optimization \ -Wfloat-equal -Wmultichar -Wmissing-noreturn \ -Woverloaded-virtual -Wsign-promo \ -funit-at-a-time" if $CXX -dM -E - < /dev/null | grep -q __clang__ ; then AC_MSG_RESULT([Using clang]) clang='yes' else AC_MSG_RESULT([not using clang]) clang='no' # -Wstrict-null-sentinel is not supported under clang or under Darwin's gcc WARNINGS_TO_TEST="$WARNINGS_TO_TEST -Wstrict-null-sentinel" fi if test $mingw = "no" ; then # add the warnings we don't want to do on mingw WARNINGS_TO_TEST="$WARNINGS_TO_TEST -Weffc++" fi for option in $WARNINGS_TO_TEST do SAVE_CXXFLAGS="$CXXFLAGS" CXXFLAGS="$CXXFLAGS $option" AC_MSG_CHECKING([whether g++ understands $option]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[]])], [has_option=yes], [has_option=no; CXXFLAGS="$SAVE_CXXFLAGS"]) AC_MSG_RESULT($has_option) unset has_option unset SAVE_CXXFLAGS done unset option AC_LANG_POP() ################################################################ ## Includes m4_include([src/dfxml/src/dfxml_configure.m4]) m4_include([src/be13_api/be13_configure.m4]) ################################################################ ## # sqlite3 is fun AC_CHECK_HEADERS([sqlite3.h]) AC_CHECK_LIB([sqlite3],[sqlite3_open]) # # ZLIB is required for decompressing # Note you cannot put comments in the AC_MSG_ERROR for some reason AC_CHECK_LIB([z],[uncompress],, AC_MSG_ERROR([zlib libraries not installed; try installing zlib-dev zlib-devel zlib1g-dev or libz-dev])) AC_CHECK_HEADERS([zlib.h]) ################################################################ ## regex support ## there are several options ## tre is better than regex AC_CHECK_HEADER([lightgrep/api.h]) AC_CHECK_LIB([lightgrep], [lg_create_pattern]) AC_CHECK_HEADERS([regex.h tre/tre.h]) AC_CHECK_LIB([regex],[regcomp]) AC_CHECK_LIB([tre],[tre_regcomp]) AC_CHECK_FUNCS([regcomp tre_regcomp tre_version]) ################################################################ ## OpenSSL Support (required for AFFLIB and hash_t ) AC_CHECK_HEADERS([openssl/aes.h openssl/bio.h openssl/evp.h openssl/hmac.h openssl/md5.h openssl/pem.h openssl/rand.h openssl/rsa.h openssl/sha.h openssl/pem.h openssl/x509.h]) AC_CHECK_LIB([crypto],[EVP_get_digestbyname]) # if crypto is available, get it AC_CHECK_LIB([md],[MD5]) # if libmd is available, get it AC_CHECK_LIB([ssl],[SSL_library_init],, AC_MSG_ERROR([OpenSSL developer library 'libssl-dev' or 'openssl-devel' not installed])) AC_CHECK_FUNCS([MD5_Init EVP_get_digestbyname]) ################################################################ # PTHREAD support # With special nods to compiling under mingw if test x"$mingw" = x"yes"; then #AC_DEFINE([HAVE_STRUCT_TIMESPEC],1,[Required for mingw]) CFLAGS="$CFLAGS -mthreads " CPPFLAGS="-DPTW32_STATIC_LIB $CPPFLAGS" CXXFLAGS="$CXXFLAGS -mthreads " AC_DEFINE(HAVE_PTHREAD,1,[Defined to POSIX threads for mingw]) #AC_MSG_NOTICE([pthreads now disabled under mingw]) else m4_include([m4/ax_pthread.m4]) AX_PTHREAD([ echo Using settings from [AX_PTHREAD] LIBS="$PTHREAD_LIBS $LIBS" CFLAGS=" $PTHREAD_CFLAGS $CFLAGS" CXXFLAGS="$PTHREAD_CFLAGS $CXXFLAGS " CPPFLAGS="$PTHREAD_CFLAGS $CPPFLAGS " CC="$PTHREAD_CC" ]) fi AC_CHECK_HEADERS([pthread.h]) AC_CHECK_LIB([pthreadGC2],[pthread_create]) ################################################################ # Boost AC_CHECK_HEADERS([boost/version.hpp],, AC_MSG_WARN([tcpflow now requires boost interval_map and interval_set.]) if test x"$mingw" = x"yes" ; then AC_MSG_ERROR([Please install mingw32-boost and mingw64-boost]) else AC_MSG_ERROR([Please install boost-devel or libboost-dev.]) fi ) # Now make sure we have the correct boost version AC_LANG_PUSH(C++) have_interval=yes AC_CHECK_HEADERS([boost/icl/interval.hpp boost/icl/interval_map.hpp boost/icl/interval_set.hpp],,have_interval=no) if test "${have_interval}" != yes ; then AC_MSG_ERROR([ tcpflow requires a version of Boost that has Boost interval_map and interval_set. Your version is too old! Please install a current version of Boost from http://www.boost.org/users/download/. Try these commands: wget http://downloads.sourceforge.net/project/boost/boost/1.53.0/boost_1_53_0.tar.gz?r=http%3A%2F%2Fsourceforge.net%2Fprojects%2Fboost%2Ffiles%2Fboost%2F1.53.0%2F&ts=1362359340&use_mirror=hivelocity tar xfz boost_1_53_0.tar.gz cd boost_1_53_0 sh bootstrap.sh ./b2 sudo ./b2 install ... and then re-run configure! ]) fi AC_LANG_POP() ################################################################ # drawing support via cairo # # Cairo requires these to be explicitly included on mingw (and perhaps others): AC_CHECK_LIB([expat],[XML_ParserCreate]) AC_CHECK_LIB([pixman-1],[pixman_region_init]) AC_CHECK_LIB([bz2],[BZ2_bzDecompress]) AC_CHECK_LIB([freetype],[FT_Init_FreeType]) # requires bz2 AC_CHECK_LIB([fontconfig],[FcBlanksCreate]) # requires freetype expat AC_CHECK_HEADERS([cairo/cairo.h cairo/cairo-pdf.h]) AC_CHECK_HEADERS([cairo.h cairo-pdf.h]) AC_CHECK_LIB([cairo],[cairo_create], , [ AC_MSG_WARN([ *** cairo libraries not detected. *** Please install cairo-devel to get 1-page PDF summary generation. ]) Fmissing_library="cairo-devel $missing_library " Umissing_library="libcairo2-dev $missing_library " Mmissing_library="cairo-devel " ]) ################################################################ # pcap support. A bit more involved than normal due to the error message # pcap=test AC_ARG_ENABLE([pcap],[ --enable-pcap=false to disable libpcap even if present]) if test "${enableval}" = false ; then pcap=false fi if test $pcap = test ; then AC_CHECK_HEADERS(pcap.h pcap/pcap.h ) if test x"$mingw" = x"yes" ; then AC_MSG_WARN([pcap not supported under mingw]) else AC_CHECK_LIB(pcap, pcap_lookupdev, , [ AC_MSG_WARN([ Can't find the pcap library (libpcap.a). tcpflow will not live capture or compile rules without pcap! If you need rules or live capture, you must install the pcap and/or pcap-dev library. Please execute this command: UBUNTU: sudo apt-get install libpcap-dev DEBIAN: sudo apt-get install libpcap-dev FEDORA: sudo yum install libpcap-devel MINGW: Sorry! libpcap is not currently available when cross-compiling. If your libpcap is installed in a non-standard location, you will need to use the --with-pcap=directory to specify where your pcap is located.]) Fmissing_library="$Fmissing_library libpcap-dev " Umissing_library="$Umissing_library libpcap-dev " Mmissing_library="$Mmissing_library libpcap " ]) fi fi ################################################################ # Specify our other headers AC_HEADER_STDC AC_CHECK_HEADERS([\ arpa/inet.h \ ctype.h \ fcntl.h \ inttypes.h \ linux/if_ether.h \ net/ethernet.h \ netinet/in.h \ netinet/in_systm.h \ netinet/tcp.h \ regex.h \ semaphore.h \ signal.h \ string.h \ strings.h \ stdio.h \ stdlib.h \ string.h \ syslog.h \ sys/cdefs.h \ sys/mman.h \ sys/resource.h \ sys/socket.h \ sys/types.h \ sys/bitypes.h \ sys/wait.h \ unistd.h \ ]) AC_CHECK_FUNCS([getdtablesize]) # # These all require additional headers. See: # http://www.gnu.org/software/autoconf/manual/autoconf-2.67/html_node/Present-But-Cannot-Be-Compiled.html # AC_CHECK_HEADERS([net/if_var.h], [], [], [[ #include #include #include ]]) AC_CHECK_HEADERS([net/if.h], [], [], [[ #ifdef HAVE_SYS_TYPES_H #include #endif #if HAVE_NET_IF_VAR_H #include #endif #ifdef HAVE_SYS_SOCKET_H #include #endif ]]) AC_CHECK_HEADERS([netinet/ip_var.h], [], [], [[ #ifdef HAVE_SYS_TYPES_H #include #endif #if HAVE_NETINET_IN_H #include #endif ]]) AC_CHECK_HEADERS([netinet/ip.h], [], [], [[ #ifdef HAVE_SYS_TYPES_H #include #endif #if HAVE_NETINET_IN_H #include #endif ]]) AC_CHECK_HEADERS([netinet/ip_ether.h], [], [], [[ #ifdef HAVE_SYS_TYPES_H #include #endif #if HAVE_NETINET_IN_H #include #endif ]]) AC_CHECK_HEADERS([netinet/tcpip.h], [], [], [[ #ifdef HAVE_SYS_TYPES_H #include #endif #if HAVE_NETINET_IN_H #include #endif #if HAVE_NETINET_IP_VAR_H #include #endif #if HAVE_NETINET_TCP_H #include #endif ]]) AC_CHECK_FUNCS([inet_ntop sigaction sigset strnstr setuid setgid mmap futimes futimens ]) AC_CHECK_TYPES([socklen_t], [], [], [[ #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_ARPA_INET_H #include #endif #ifdef HAVE_SYS_SOCKET_H #include #endif #ifdef HAVE_NETINET_TCP_H #include #endif ]] ) AC_CHECK_TYPES([sa_family_t], [], [], [[ #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_ARPA_INET_H #include #endif #ifdef HAVE_SYS_SOCKET_H #include #endif #ifdef HAVE_NETINET_TCP_H #include #endif ]] ) ### AC_CHECK_TYPES([tcp_seq], [], [], ### [[ ### /* from sysdep.h */ ### #include ### #include ### ### #ifdef HAVE_UNISTD_H ### # include ### #endif ### ### #ifndef __FAVOR_BSD ### #define __FAVOR_BSD ### #endif ### ### #ifndef __USE_BSD ### #define __USE_BSD ### #endif ### ### #ifdef HAVE_ARPA_INET_H ### #include ### #endif ### ### #ifdef HAVE_SYS_SOCKET_H ### #include ### #endif ### ### #ifdef HAVE_NETINET_TCP_H ### #include ### #endif ### ]] ### ) ############## drop optimization flags if requested ################ # Should we disable optimization? AC_ARG_WITH([opt], AC_HELP_STRING([--without-opt], [Drop all -O C flags])) # Or maybe just tone it down a bit? AC_ARG_WITH([o3], AC_HELP_STRING([--without-o3], [Do not force O3 optimization; use default level])) if test x"${AFF_NOOPT}" != "x" ; then AC_MSG_NOTICE([Dropping optimization flags because AFF_NOOPT is set.]) with_opt="no"; fi if test "${with_opt}" = "no" ; then AC_MSG_NOTICE([Dropping optimization flags]) CFLAGS=`echo "$CFLAGS" | sed s/-O[[0-9]]//` # note the double quoting! CXXFLAGS=`echo "$CXXFLAGS" | sed s/-O[[0-9]]//` AC_MSG_NOTICE([Removing -D_FORTIFY_SOURCE=2]) CPPFLAGS=`echo $CPPFLAGS | sed s/-D_FORTIFY_SOURCE=2//` CXXFLAGS=`echo $CXXFLAGS | sed s/-D_FORTIFY_SOURCE=2//` CFLAGS=`echo $CFLAGS | sed s/-D_FORTIFY_SOURCE=2//` else # and increase optimizer from -O2 to -O3 if not explicitly forbidden if test "${with_o3}" != "no" ; then CFLAGS=`echo -g "$CFLAGS" | sed s/-O2/-O3/` # note the double quoting! CXXFLAGS=`echo -g "$CXXFLAGS" | sed s/-O2/-O3/` fi fi AC_OUTPUT ## Finally, record the values of CFLAGS, CPPFLAGS, and CXXFLAGS for DFXML echo "#define CPPFLAGS \"$CPPFLAGS\"" >> config.h echo "#define CFLAGS \"$CFLAGS\"" >> config.h echo "#define CXXFLAGS \"$CXXFLAGS\"" >> config.h echo "#define LIBS \"$LIBS\"" >> config.h echo "#define LDFLAGS \"$LDFLAGS\"" >> config.h if test x"$GIT_COMMIT" != "x" ; then echo "#define GIT_COMMIT \"$GIT_COMMIT\"" >> config.h fi if test "x${Fmissing_library}" != "x" ; then AC_MSG_NOTICE([*** You have missing libraries. To install them:]) AC_MSG_NOTICE([*** Fedora: sudo yum install $Fmissing_library]) AC_MSG_NOTICE([*** Ubuntu: sudo apt-get install $Umissing_library]) AC_MSG_NOTICE([*** MacOS: sudo port install $Mmissing_library]) fi tcpflow/doc/0000755000175000017500000000000012263701323011706 5ustar dimadimatcpflow/doc/announce_1_4.txt0000644000175000017500000000725212263701151014725 0ustar dimadimaI'm pleased to announce the release of tcpflow version 1.4.0. Key elements in 1.4.0 include: Completely rewritten TCP implementation that: * Handles TCP flows larger than 4GiB * Handles TCP packets sent after a connection is closed. Such packets are compared with the packets from the connection that were already received. If the packets match, they are discarded as retransmissions. If they do not match they are put in new transcript files. Incompatiable Changes: * -e (alternate colors of console output) has been renamed -J so that -e and -E can be used in a manner consistent with bulk_extractor Completely rewritten HTTP parser * Handles multiple HTTP objects per connections * Optional http_cmd will run a program or script for each HTTP attachment received as received. For example, to run the program /bin/echo and provide the filename of the attachment for each attachment, specify: tcpflow -Shttp_cmd=/bin/echo ... * Optional timeout on tcp connections, causes processing of HTTP objects before the HTTP connection closes: -Stcp_timeout=10 * Optional alert file descriptor causes http parser to alert as files are opened and closed by the HTTP parser. e.g.: $ ./tcpflow -o out -a -E http -S http_alert_fd=1 -r ../tests/multifile_25_21.pcap open out/038.122.002.045.00080-192.168.123.101.04634-HTTPBODY-001.png close out/038.122.002.045.00080-192.168.123.101.04634-HTTPBODY-001.png open out/038.122.002.045.00080-192.168.123.101.04637-HTTPBODY-001.png close out/038.122.002.045.00080-192.168.123.101.04637-HTTPBODY-001.png open out/038.122.002.045.00080-192.168.123.101.04637-HTTPBODY-002.png close out/038.122.002.045.00080-192.168.123.101.04637-HTTPBODY-002.png open out/038.122.002.045.00080-192.168.123.101.04648-HTTPBODY-001.png close out/038.122.002.045.00080-192.168.123.101.04648-HTTPBODY-001.png open out/038.122.002.045.00080-192.168.123.101.04649-HTTPBODY-001.png close out/038.122.002.045.00080-192.168.123.101.04649-HTTPBODY-001.png open out/038.122.002.045.00080-192.168.123.101.04654-HTTPBODY-001 close out/038.122.002.045.00080-192.168.123.101.04654-HTTPBODY-001 open out/038.122.002.045.00080-192.168.123.101.04655-HTTPBODY-001 close out/038.122.002.045.00080-192.168.123.101.04655-HTTPBODY-001 open out/038.122.002.045.00080-192.168.123.101.04655-HTTPBODY-002 close out/038.122.002.045.00080-192.168.123.101.04655-HTTPBODY-002 ... New Scalability features: * Automatically creates new directories as necessary for output filenames that include forward slashes ("/"). * -Fk option automatically bins up to a thousand flows in a thousand directories in one directory layer, easily handling up to million flows. * -Fm option automatically bins up to a thousand flows in a million directories (two directory layers), easily handling up to billion flows. * -Fg option automatically bins up to a thousand flows in a billion directories (three directory layers), easily handling up to trillion flows. Additional features: * Produces a one-page visualization (report.pdf) of the packets that were analyzed. * New -w option writes a PCAP file of packets not processed by tcpflow. * Better support for decoding and decompressing HTTP objects, including multiple objects sent over a single HTTP stream. * Full support for the bulk_extractor plug-in system You can download version 1.4.0, both source and precompiled windows binaries, from: https://github.com/simsong/tcpflow/downloads (what we didn't get to) The following is scheduled for release 1.5: * Full handling of radiotap files * Passive DNS implementation * top-100 connection map tcpflow/doc/tcpflow.1.in0000644000175000017500000002614712263701151014064 0ustar dimadima.\"edit the file tcpflow.1.in, not tcpflow.1" .\"" .\"" .TH tcpflow 1 "2013-04-13" "tcpflow @VERSION@" "tcpflow @VERSION@" .SH NAME tcpflow \- TCP flow recorder .SH SYNOPSIS .na .B tcpflow [\c .BI \-aBcCDhpsvVZ\fR\c ] [\c .BI \-b \ max_bytes\fR\c ] [\c .BI \-d \ debug_level\fR\c ] [\c .BI \-[eE] \ scanner\fR\c ] [\c .BI \-f \ max_fds\fR\c ] [\c .BI \-F[ctTXMkmg]\fR\c ] [\c .BI \-i \ iface\fR\c ] [\c .BI \-L \ semlock\fR\c ] [\c .BI \-m \ min_bytes\fR\c ] [\c .BI \-o \ outdir\fR\c ] [\c .BI \-r \ file1.pcap\fR\c ] [\c .BI \-R \ file0.pcap\fR\c ] [\c .BI \-S name=value\fR\c ] [\c .BI \-T[filename\ template]\fR\c ] [\c .BI \-w file\fR\c ] [\c .BI -x\ scanner\fR\c ] [\c .BI \-X \ file.xml\fR\c ] [\c .BI expression\fR\c ] .SH DESCRIPTION .LP .B tcpflow is a program that captures data transmitted as part of TCP connections (flows), and stores the data in a way that is convenient for protocol analysis or debugging. Rather than showing packet-by-packet information, tcpflow reconstructs the actual data streams and stores each flow in a separate file for later analysis. tcpflow understands TCP sequence numbers and will correctly reconstruct data streams regardless of retransmissions or out-of-order delivery. tcpflow provides control over filenames for automatic binning of connections by protocol, IP adress or connection number, and has a sophisticated plug-in system for decompressing compressed HTTP connections, undoing MIME encoding, or calling user-provided programs for post-processing. .LP By default tcpflow stores all captured data in files that have names of the form: .in +.5i .nf \fB192.168.101.102.02345-010.011.012.013.45103\fP .fi .in -.5i \,...where the contents of the above file would be data transmitted from host 192.168.101.102 port 2345, to host 10.11.12.13 port 45103. .LP If you want to simply process a few hundred thousand packets and see what you have, try this: .in +.5i .nf \fBtcpflow -a -o outdir -Fk -r packets.pcap\fP .fi .in -.5i This will cause tcpflow to perform (-a) all processing, store the output in a directory called .BI outdir, bin the output in directories of 1000 connections each, and read its input from the file \fBpackets.pcap\fP. More sophisticiated processing is possible, of course. .SH OPTIONS .TP .B \-a Enable all processing. Same as .B \-e all. .TP .B \-B Force binary output even when printing to console with .B -C or .B -c. .TP .B \-b \fImax_bytes\fP Specifies the maximum size of a captured flow. Any bytes beyond \fImax_bytes\fP from the first byte captured will be discarded. The default is to store an unlimited number of bytes per flow. \fBNote:\fP previous versions of \fBtcpflow\fP could only store a maximum of 4GiB per flow, but version 1.4 and above can really store an unlimited amount of bytes. Good thing that modern disks are so big, eh? .TP .B \-c Console print. Print the contents of packets to stdout as they are received, without storing any captured data to files (implies .TP .B \-C Console print without the packet source and destination details being printed. Print the contents of packets to stdout as they are received, without storing any captured data to files (implies .B \-e When outputting to the console each flow will be output in different colors (blue for client to server flows, red for server to client flows, green for undecided flows). .B -s ). .TP .B \-D Console output should be in hex. .TP .B \-d Debug level. Set the level of debugging messages printed to stderr to \fIdebug_level\fP. Higher numbers produce more messages. .B \-d 0 causes completely silent operation. .B \-d 1 , the default, produces minimal status messages. .B \-d 10 produces verbose output equivalent to .B \-v . Numbers higher than 10 can produce a large amount of debugging information useful only to developers. .TP .B \-E name Disable all scanners and then enable scanner .B name .TP .B \-e name Enable scanner .B name. .TP .B \-e all Enables all scanners. Same as .B \-a .TP .B \-e http Perform HTTP post-processing ("After" processing). If the output file is .in +.5i .nf \fB208.111.153.175.00080-192.168.001.064.37314,\fP .fi .in -.5i Then the post-processing will create the files: .in +.5i .nf \fB208.111.153.175.00080-192.168.001.064.37314-HTTP\fP \fB208.111.153.175.00080-192.168.001.064.37314-HTTPBODY\fP .fi .in -.5i If the HTTPBODY was compressed with GZIP, you may get a third file as well: .in +.5i .nf \fB208.111.153.175.00080-192.168.001.064.37314-HTTPBODY-GZIP\fP .fi .in -.5i Additional information about these streams, such as their MD5 hash value, is also written to the DFXML file .TP .B \-F[format] Specifies format for output filenames. Format specifiers: .B c appends the connection counter to ALL filenames. .B t prepends each filename with a Unix timestamp. .B T prepends each filename with an ISO-8601 timestamp. .B X Do not output any files (other than the .B report.xml report files). .TP .B \-FM Include MD5 of each flow in the DFXML output. .TP .B \-FX Suppresses file output entirely (DFXML file is still produced). .TP .B \-Fk bin output in 1K directories .TP .B \-Fm bin output in 1M directories (2 levels) .TP .B \-Fg bin output in 1G directories (3 levels) .B \-T[format] Specifies an arbitrary template for filenames. .B %A expands to source IP address. .B %a expands to source IP port. .B %B expands to destination IP address. .B %a expands to destination IP port. .B %T expands to timestamp in ISO8601 format. .B %t expands to timestamp in Unix time_t format. .B %V expands to "--" if a VLAN is present. .B %v expands to the VLAN number if a VLAN is present. .B %C expands to "c" if the connection count>0. .B %c expands to the connection count if the connection count>0. .B %# always expands to the connection count. .B %% prints a "%". .TP .B \-f\fImax_fds\fP Max file descriptors used. Limit the number of file descriptors used by tcpflow to \fImax_fds\fP. Higher numbers use more system resources, but usually perform better. If the underlying operating system supports the .B setrlimit() system call, the OS will be asked to enforce the requested limit. The default is for tcpflow to use the maximum number of file descriptors allowed by the OS. The .B \-v option will report how many file descriptors tcpflow is using. .TP .B \-h Help. Print usage information and exit. .TP .B \-hh More help. Print more usage information and exit. .TP .B \-i \fIiface\fP Interface name. Capture packets from the network interface named \fIiface\fP. If no interface is specified with .B \-i , a reasonable default will be used by libpcap automatically. .TP .B \-L \fIsemlock_name\fP Specifies that \fIsemlock_name\fP should be used as a Unix semaphore to prevent two different copies of tcpflow running in two different processes but outputing to the same standard output from printing on top of each other. This is an application of Unix named semaphores; bet you have never seen one before. .TP .B \-l Treat the following arguments as filenames with an assumed \fB-r\fP command before each one. This allows you to read a lot of files at once with shell globbing. For example, to process all of the pcap files in the current directory, use this: .in +.5i .nf \fBtcpflow -o out -a -l *.pcap\fP .fi .in -.5i .TP .B \-J Output flow information to console in multiple colors. \fBNOTE: This option was changed from tcpflow 1.3.\fP .TP .B \-m \fImin_size\fP Forces a new connection output file when there is a skip in the TCP session of \fImin_size\fP bytes or more. .TP .B \-o \fIoutdir\fP Specifies the output directory where the transcript files will be written. .TP .B \-P No purge. Normally tcpflow removes connections from the hash table after the connection is closed with a FIN. This conserves memory but takes additional CPU time. Selecting this option causes the std::tr1:unordered_map to grow without bounds, as tcpflow did prior to version 1.1. That makes tcpflow run faster if there are less than 10 million connections, but can lead to out-of-memory errors. .TP .B \-p No promiscuous mode. Normally, tcpflow attempts to put the network interface into promiscuous mode before capturing packets. The \fB-p\fP option tells tcpflow \fInot\fP to put the interface into promiscuous mode. Note that it might already be in promiscuous mode for some other reason. .TP .B \-q Quiet mode --- don't print warnings. Currently the only warning that \fBtcpflow\fP prints is a warning when more than 10,000 files are created that the user should have provided the \fB-Fk\fP, \fB-Fm\fP, or \fB-Fg\fP options. We might have other warnings in the future. .TP .B \-r Read from file. Read packets from \fIfile\fP, which was created using the .B \-w option of .IR tcpdump (1). This option may be repeated any number of times. Standard input is used if \fIfile\fP is "-". Note that for this option to be useful, tcpdump's .B \-s option should be used to set the snaplen to the MTU of the interface (e.g., 1500) while capturing packets. .TP .B \-R Read from a file, but only to complete TCP flows. This option is used when .IR tcpflow is used to process a series of files that are captured over time. For each time period \fIn,\fP file \fIfilen.pcap\fP should be processed with \fBR -r \fIfilen.pcap\fP, while \fIfile(n-1).pcap\fP should be processed with \fIR -R file(n-1).pcap.\fP .TP .B \-S\fIname\fB=\fIvalue\fP Sets a \fIname\fP parameter to be equal to \fIvalue\fP for a plug-in. Use \fB-hh\fP to find out all of the settable parameters. .TP .B \-s Strip non-printables. Convert all non-printable characters to the "." character before printing packets to the console or storing them to a file. .TP .B \-V Print the version number and exit. .TP .B \-v Verbose operation. Verbosely describe tcpflow's operation. Equivalent to \fB \-d 10\fP. .TP .B \-w \fIfilename.pcap\fP Write packets that were not processed to \fIfilename.pcap\fP. Typically this will be UDP packets. .TP .B \-X \fIfilename.xml\fP Write a DFXML report file to \fIfilename.xml\fP. The file contains a record of every tcp connection, how the tcpflow program was compiled, and the computer on which tcpflow was run. .TP .B \-Z Don't decompress gzip-compressed streams. .\"START -- tcpdump excerpt" .SH EXAMPLES .LP To record all packets arriving at or departing from \fIsundown\fP and extract all of the HTTP attachments: .RS .nf \fBtcpflow -e scan_http -o outdir host sundown\fP .fi .RE .LP To record traffic between \fIhelios\fR and either \fIhot\fR or \fIace\fR and bin the results into 1000 files per directory and calculate the MD5 of each flow: .RS .nf \fBtcpflow -X report.xml -e scan_md5 -o outdir -Fk host helios and \\( hot or ace \\)\fP .fi .SH BUGS Please send bug reports to simsong@acm.org. .LP tcpflow currently does not understand IP fragments. Flows containing IP fragments will not be recorded correctly. .SH AUTHORS Originally by Jeremy Elson . Substantially modified and maintained by Simson L. Garfinkel . Network visualization code by Michael Shick .LP The current version of this software is available at .RS .I http://www.digitalcorpora.org/downloads/tcpflow/ .LP .RE An announcement mailing list for this program is at: .RS .I http://groups.google.com/group/tcpflow-users .RE .SH "SEE ALSO" tcpdump(1), nit(4P), bpf(4), pcap(3), pcap-savefile(5), pcap-filter(7) tcpflow/doc/announce_1_3.txt0000644000175000017500000000061712263701151014722 0ustar dimadimaI'm pleased to announce the release of tcpflow version 1.3.0. Key elements in 1.3.0 include: - Compiles with mingw32 and mingw64 for 32-bit and 64-bit windows. I am now distributing pre-compiled binaries of some releases. - Better support for DFXML (fixed some bugs) You can download version 1.3.0, both source and precompiled windows binaries, from: https://github.com/simsong/tcpflow/downloads tcpflow/doc/timeline_1.4.txt0000644000175000017500000000602012263701151014634 0ustar dimadimaTimeline for 1.4 ship: + mfs + Document the refactored class hiearchy for one-page-report. + slg will look at before mfs begins refactoring + plot becomes abstract + pure virtual destructor (protected constructor too?) + concrete render(cairo_t, bounds_t) + pure virtual render_data(cairo_t, bounds_t) - This is called by render, which will calculate the bounds within the axes, labels etc. - sublcasses need only override render_data, plot() owns the space given to it - time_histogram split into time_histogram and time_histogram_plot - time_histogram_plot is a concrete subclass of plot - contains pointer to const time_histogram and probably not too much else - time_histogram replaces dyn_time_histogram - time_histogram now contains a vector of maps and a const pointer to the best fit histogram (best fit map) which starts as the most granular histogram and is updated as histograms are dropped for overflow - implement [], size() directly on time_histogram to hide implementation and selection - port_histogram and address_histogram are similarly split - histogram_bar class is added - map (or fixed bucket uints?) values to counts - render(cairo_t, bounds_t, color_map) will fill region with a proportional bar by counts - with a flexible enough histogram_bar class, time_histogram_plot, port_histogram_plot, and address_histogram_plot can be merged into templated histogram_plot concrete subclass of plot if desired - packet intestion logic is moved to one_page_report; histograms are simply data structures - could create ingester class instead too Features needed: ================ - Packet Grid - Documentation (NPS Report) What we are not doing: ====================== - Traffic Map - Language identification - Keyword extraction & clustering - Passive DNS Test Plan: ========== - Performance testing Packaging: ---------- - Put relevant boost headers in boost subdirectory Compile testing: ---------------- Make sure that it compiles on these platforms: - FC17 - Ubuntu - OS10.6, 10.8 - cygwin - mingw - Centos 5.8 - Centos 6.0 - SUSE Reliability testing: -------------------- Test for crashing with all scanners on with specific data sets: - no packets - Lincoln Labs ID98 - One day - All of the packets concatenated together - Lincoln Labs ID99 (All of the packets concatenated together) - One day - All of the packets concatenated together - One day from ID98 & One day from ID99 - M57 Patents - One day - All packets concatenated together - NGDC 2012 - All packets concatenated together - Cada? Correctness testing: -------------------- Create a file of all MD5s of all TCP streams. Sort the file. Use "diff" Data Sets for comparison: - ID98 one day - M57 one day Compare results of: - tcpflow 1.4 with tcpflow 1.0 - Question: Can Suricata provide the MD5 of tcp streams? Packaging: ---------- - .tar.gz file distributed on digitalcorpora. - windows executables on digitalcorpora - downloadable tag from github Announcements: ------------- tcpflow/doc/Makefile.am0000644000175000017500000000005412263701151013740 0ustar dimadimaman_MANS = tcpflow.1 CLEANFILES = tcpflow.1 tcpflow/doc/Planning-1page.txt0000644000175000017500000000611112263701151015206 0ustar dimadima[mockup of 1 page report] tcpflow input: Date range: ISO-8601 to ISO-8601 Packets Analyzed: COUNT (XXX MB) Protocols: IPv4 (%); IPv6 (%); ARP (%); Other (?) +-------------------------------------------------------------+ | | | | | Bandwidth Histogram | | | +-------------------------------------------------------------+ +-------------------------------------------------------------+ | | | | | MAP | | | +-------------------------------------------------------------+ +-----------------------+ +-----------------------+ | | | | | | | | | | | | | Top Server IPs | -> | Top Client IPs | | | | | | | | | +-----------------------+ +-----------------------+ Top #1: IP (%) Top #1: IP (%) Top #2: IP (%) Top #2: IP (%) Top #3: IP (%) Top #3: IP (%) +-----------------------+ +-----------------------+ | | | | | | | | | | | | | Top Server Ports | -> | Top Client Ports | | | | | | | | | +-----------------------+ +-----------------------+ Top #1: Port (%) Top #1: Port (%) Top #2: Port (%) Top #2: Port (%) Top #3: Port (%) Top #3: Port (%) +-----------------------+ +-----------------------+ | | | | | | | | | | | | | | | Observed Downloaded | | | | Types | | | | | +-----------------------+ +-----------------------+ ==== Notes: 1 - DNS Resolution: 1.1 - IP addresses only. 1.2 - Passive DNS 1.3 - Passive DNS augmented by a list of DNS records (in PCAP format) 1.4 - Generate a list of IP addresses requiring resolution tcpflow/doc/make_web.sh0000644000175000017500000000146112263701151014015 0ustar dimadima#!/bin/bash # # Create the files for the tcpflow website CORP=/corp/ DEST=/var/www/digitalcorpora/tcpflow/demo TCPFLOW=../src/tcpflow TMP=/tmp/out$$ if [ ! -d $DEST ]; then mkdir -p $DEST ; fi if [ ! -x $TCPFLOW ]; then (cd .. ; make ) ; fi run() { DPDF=$DEST/$2 DPNG=${DPDF%pdf}png echo DPDF=$DPDF echo DPNG=$DPNG echo $TCPFLOW -o $TMP -x tcpdemux -E netviz $1 $TCPFLOW -o $TMP -x tcpdemux -E netviz $1 if [ ! -r $TMP/report.pdf ]; then echo tcpflow failed exit 1 fi mv $TMP/report.pdf $DPDF /bin/rm -rf $TMP convert -scale 300 $DPDF $DPNG ls -l $DPDF $DPNG } run "-r $CORP/nps/packets/2008-nitroba/nitroba.pcap" nitroba.pdf run "-l $CORP/nps/packets/2009-m57-patents/net-2009*.gz" m57-net.pdf run "-l $CORP/mitll/packets/ideval99/week?/*/outside*gz" id99-outside.pdf tcpflow/tests/0000755000175000017500000000000012263701323012303 5ustar dimadimatcpflow/tests/test-pdfs.sh0000755000175000017500000000111112263701323014545 0ustar dimadima#!/bin/sh # case x"$srcdir" in x) echo No srcdir specified. Assuming $0 is run locally TCPFLOW=../src/tcpflow ;; x.) echo srcdir is . Assuming $0 is run locally from make check TCPFLOW=../src/tcpflow ;; *) echo srcdir is $srcdir Assuming $0 is run from make distcheck TCPFLOW=../../_build/src/tcpflow ;; esac cmd() { echo $1; $1; } # create PDFs for all of the pcap files for i in *.pcap do echo $i cmd "$TCPFLOW -Fg -e netviz -o tmp$$ -r $i" cmd "mv tmp$$/report.pdf `basename $i .pcap`.pdf" echo "" /bin/rm -rf tmp$$ done tcpflow/tests/test7-three-flows.pcap0000644000175000017500000000360012263701151016451 0ustar dimadimaÔò¡ÿÿÍ‹°MéJJ Û’y 0þºE<H@@×õ¬¬\.'úÕP† Ðb­´ €ƒPÎÍ‹°M¨éBB0þº Û’y E4@€˜E¬\¬'.P™AúÕP‡€Ð?´Í‹°Màé66 Û’y 0þºE(I@@ج¬\.'úÕP‡P™BP\b™Ó‹°MeRR Û’y 0þºEDJ@@×묬\.'úÕP‡P™BP\bµThis is connection number 1 Ó‹°MÓ<<0þº Û’y E(†g@€ê¬\¬'.P™BúÕP£P\phÔ‹°MÔ#66 Û’y 0þºE(K@@ج¬\.'úÕP£P™BP\b™Ô‹°Mt$<<0þº Û’y E(†h@€é¬\¬'.P™BúÕP¤P\pfÔ‹°M¡$66 Û’y 0þºE(L@@ج¬\.'úÕP¤P™CP\b™šŒ°MŸJJ Û’y 0þºE<¡@@7+¬¬\.'¹ÚБ Ðb­´ €†qƒšŒ°M ŸBB0þº Û’y E4@€˜E¬\¬'.j„@¹ÚÐ’€Й^´šŒ°Mן66 Û’y 0þºE(¡@@7>¬¬\.'¹ÚÐ’j„AP\b™¡Œ°Mðx RR Û’y 0þºED¡@@7!¬¬\.'¹ÚÐ’j„AP\bµThis is connection number 2 ¡Œ°Mdy <<0þº Û’y E(!î@€vc¬\¬'.j„A¹ÚЮP\ð‡¢Œ°M©Þ 66 Û’y 0þºE(¡@@7<¬¬\.'¹ÚЮj„AP\b™¢Œ°MXß <<0þº Û’y E(!ï@€vb¬\¬'.j„A¹ÚЯP\ð…¢Œ°M†ß 66 Û’y 0þºE(¡@@7;¬¬\.'¹ÚЯj„BP\b™\°Mùè JJ Û’y 0þºE<$3@@´ ¬¬\.'p“LÜ Ðb­´ €‰hï\°M{é BB0þº Û’y E4@€˜E¬\¬'.Åp,¬p“LÝ€Ðé´\°M´é 66 Û’y 0þºE($4@@´¬¬\.'p“LÝÅp,­P\b™a°MÏ RR Û’y 0þºED$5@@´¬¬\.'p“LÝÅp,­P\bµThis is connection number 3 a°MH <<0þº Û’y E(°_@€çñ¬\¬'.Åp,­p“LùP\`b°MKÙ 66 Û’y 0þºE($6@@´¬¬\.'p“LùÅp,­P\b™b°MÛÙ <<0þº Û’y E(°`@€çð¬\¬'.Åp,­p“LúP\`b°M Ú 66 Û’y 0þºE($7@@´¬¬\.'p“LúÅp,®P\b™tcpflow/tests/test5-lines.pcap0000644000175000017500000000543612263701151015333 0ustar dimadimaÔò¡ÿÿ¹÷‚P™É NNcñÓ<T[°4E@g‡@@À¨ iE£œ­ÀðP‘çÈǰÿÿ­”´ +Qñ×¹÷‚Pþ BB<T[°4cñÓE 4·Z6èE£œ­À¨ iPÀð(lÆ´‘çÈÈ€9}…´¹÷‚Pûþ 66cñÓ<T[°4E(wÛ@@À¨ iE£œ­ÀðP‘çÈÈ(lƵP@­|¹÷‚PÌÿ ®®cñÓ<T[°4E ì‘@@À¨ iE£œ­ÀðP‘çÈÈ(lƵP@­ôGET /lines.txt HTTP/1.1 User-Agent: Wget/1.14 (darwin12.1.0) Accept: */* Host: simson.net Connection: Keep-Alive ¹÷‚PC <<<T[°4cñÓE (±û6%SE£œ­À¨ iPÀð(lƵ‘çÉ@Psöt¹÷‚PaK êê<T[°4cñÓE ÜœŽ65 E£œ­À¨ iPÀð(lƵ‘çÉ@PsægHTTP/1.1 200 OK Date: Sat, 20 Oct 2012 19:12:57 GMT Server: Apache Last-Modified: Sat, 20 Oct 2012 19:10:38 GMT ETag: "636-4cc826029a780" Accept-Ranges: bytes Content-Length: 1590 Vary: Accept-Encoding Keep-Alive: timeout=2, max=100 Connection: Keep-Alive Content-Type: text/plain this is line 0 this is line 1 this is line 2 this is line 3 this is line 4 this is line 5 this is line 6 this is line 7 this is line 8 this is line 9 this is line 10 this is line 11 this is line 12 this is line 13 this is line 14 this is line 15 this is line 16 this is line 17 this is line 18 this is line 19 this is line 20 this is line 21 this is line 22 this is line 23 this is line 24 this is line 25 this is line 26 this is line 27 this is line 28 this is line 29 this is line 30 this is line 31 this is line 32 this is line 33 this is line 34 this is line 35 this is line 36 this is line 37 this is line 38 this is line 39 this is line 40 this is line 41 this is line 42 this is line 43 this is line 44 this is line 45 this is line 46 this is line 47 this is line 48 this is line 49 this is line 50 this is line 51 this is line 52 this is line 53 this is line 54 this is line 55 this is line 56 this is line 57 this is line 58 this is line 59 this is line 60 this is line 61 this is line 62 this is line 63 this is line 64 this is line 65 this is line 66 this is line 67 this is line 68 this is line 69 this is line 70 this is line 71 this is line 72 this is ¹÷‚PŠQ ÞÞ<T[°4cñÓE ÐËJ6 \E£œ­À¨ iPÀð(lÌi‘çÉ@Ps[ðline 73 this is line 74 this is line 75 this is line 76 this is line 77 this is line 78 this is line 79 this is line 80 this is line 81 this is line 82 this is line 83 this is line 84 this is line 85 this is line 86 this is line 87 this is line 88 this is line 89 this is line 90 this is line 91 this is line 92 this is line 93 this is line 94 this is line 95 this is line 96 this is line 97 this is line 98 this is line 99 ¹÷‚PíQ 66cñÓ<T[°4E(-·@@À¨ iE£œ­ÀðP‘çÉ@(lÎP?å­|¹÷‚P)W 66cñÓ<T[°4E(ô@@À¨ iE£œ­ÀðP‘çÉ@(lÎP@­|¹÷‚Pp–<<<T[°4cñÓE (ÊL6 E£œ­À¨ iPÀð(lΑçÉAPsï¹÷‚P´–66cñÓ<T[°4E('í@@À¨ iE£œ­ÀðP‘çÉA(lÎP@­|tcpflow/tests/test-multifile.sh0000755000175000017500000001734612263701323015624 0ustar dimadima#!/bin/sh # test the multifile case x"$srcdir" in x) echo No srcdir specified. Assuming $0 is run locally DMPDIR=. TCPFLOW=../src/tcpflow ;; x.) echo srcdir is . Assuming $0 is run locally from make check DMPDIR=. TCPFLOW=../src/tcpflow ;; *) echo srcdir is $srcdir Assuming $0 is run from make distcheck DMPDIR=../../tests/ TCPFLOW=../../_build/src/tcpflow ;; esac echo DMPDIR=$DMPDIR echo TCPFLOW=$TCPFLOW # check the results checkmd5() { if [ ! -r $1 ] ; then echo file $1 was not created ls -l exit 1 fi md5val=`openssl md5 $1 | awk '{print $2;}'` if [ x$2 != x$md5val ]; then echo failure: $1 echo expected md5: $2 "(got '$md5val')" echo expected length: $3 ls -l $1 exit 1 fi } testmd5() { md5val=`openssl md5 $1 | awk '{print $2;}'` len=`stat -r $1 | awk '{print $8;}'` echo checkmd5 \"$1\" \"$md5val\" \"$len\" } cmd() { echo $1 if ! $1 ; then echo failed; exit 1; fi } # this test requires MULTIFILE MULTIFILE=/corp/nps/packets/2013-httpxfer/multifile_25_21.pcap if [ ! -r $MULTIFILE ]; then echo missing $MULTIFILE exit 0 fi testlist="1 2 3 4 10 100" deldir=yes if test x$1 != x ; then echo Just testing $1 testlist=$1 deldir=no fi OUT=/tmp/out$$ for maxfds in $testlist do /bin/rm -rf $OUT if test -x $OUT ; then echo out directory not deleted. ls -l $OUT exit 1 fi cmd="$TCPFLOW -f $maxfds -o $OUT -X $OUT/report.xml -r $MULTIFILE -a" $TCPFLOW -V echo $cmd if ! $cmd; then echo tcpdump failed; exit 1 ; fi checkmd5 "$OUT/038.122.002.045.00080-192.168.123.101.04634" "e0971231a9473c40c2de398b73dc0d80" "3183" checkmd5 "$OUT/038.122.002.045.00080-192.168.123.101.04634-HTTPBODY-001.png" "9e7819dcf5f9ebff79a9d2b09caac6fc" "2947" checkmd5 "$OUT/038.122.002.045.00080-192.168.123.101.04637" "e24c1889394a9b693e4211c294476e5d" "6497" checkmd5 "$OUT/038.122.002.045.00080-192.168.123.101.04637-HTTPBODY-001.png" "b1ba2f6d2bf1adaa9ffc2208eb383844" "2943" checkmd5 "$OUT/038.122.002.045.00080-192.168.123.101.04637-HTTPBODY-002.png" "e55dcbaf4c9b3437b1af2764721dfcf7" "3082" checkmd5 "$OUT/038.122.002.045.00080-192.168.123.101.04648" "5870e48e497c50487def6714540ab7d3" "3346" checkmd5 "$OUT/038.122.002.045.00080-192.168.123.101.04648-HTTPBODY-001.png" "b5e24b33589a29a73709661ff7f51243" "3110" checkmd5 "$OUT/038.122.002.045.00080-192.168.123.101.04649" "6564a6583bb31f5fc0b97d233450a98e" "3436" checkmd5 "$OUT/038.122.002.045.00080-192.168.123.101.04649-HTTPBODY-001.png" "e27d7c5537b03f08cd8f80b179b9c321" "3200" checkmd5 "$OUT/038.122.002.045.00080-192.168.123.101.04654" "45f8461dab7b145667093aab500600bc" "896" checkmd5 "$OUT/038.122.002.045.00080-192.168.123.101.04654-HTTPBODY-001" "fa5c9a9bf04219147f73e4fd9f72193d" "1473" checkmd5 "$OUT/038.122.002.045.00080-192.168.123.101.04655" "53d401972e8b0600e6e41500dc6da31b" "668" checkmd5 "$OUT/038.122.002.045.00080-192.168.123.101.04655-HTTPBODY-001" "230d6a43654bc5cf8891601df7218f19" "32" checkmd5 "$OUT/038.122.002.045.00080-192.168.123.101.04655-HTTPBODY-002" "230d6a43654bc5cf8891601df7218f19" "32" checkmd5 "$OUT/046.137.228.251.00080-192.168.123.101.04646" "7f8700b151e6eb5623993eb7ca80bf7d" "26160818" checkmd5 "$OUT/046.137.228.251.00080-192.168.123.101.04646-HTTPBODY-001" "538ae956097d9ee5813441561ec4ad33" "26160468" checkmd5 "$OUT/046.137.228.251.00080-192.168.123.101.04651" "6a980b667ac975f9ce031b11d7349559" "22751630" checkmd5 "$OUT/046.137.228.251.00080-192.168.123.101.04651-HTTPBODY-001" "bc5222e0c58a7be607dc9ce4bf121490" "1290" checkmd5 "$OUT/046.137.228.251.00080-192.168.123.101.04651-HTTPBODY-002" "64576f998dde977627d8131b5aa33ee8" "4000" checkmd5 "$OUT/046.137.228.251.00080-192.168.123.101.04651-HTTPBODY-003" "b94ff046f678a5e89d06007ea24c57ec" "22749412" checkmd5 "$OUT/063.217.232.082.00443-192.168.123.101.04607" "524b5d5853191e976128502cf33f5576" "53" checkmd5 "$OUT/074.125.128.094.00443-192.168.123.101.04587" "6092dbf3a2098fa0fa135db550043c63" "102" checkmd5 "$OUT/074.125.128.125.05222-192.168.123.101.02503" "f7fef5760e6fbc27faccea641f581299" "15165" checkmd5 "$OUT/074.125.128.125.05222-192.168.123.101.04000" "3f0ee6e9d4c523ba8d2362e569e31035" "602" checkmd5 "$OUT/074.125.128.136.00443-192.168.123.101.04657" "8fbfee96d692fdd6c2e18206bc26ef83" "3217" checkmd5 "$OUT/074.125.128.138.00443-192.168.123.101.04586" "001a6a55b70316c68b0dbf7a2ecafe9f" "11210" checkmd5 "$OUT/110.045.186.224.01120-192.168.123.101.04660" "7522c09ef4414d352984f89625da3ef4" "199" checkmd5 "$OUT/110.045.186.224.01120-192.168.123.101.04660-HTTPBODY-001.html" "43c55722039e66f40fd12cf03d68f1e0" "23" checkmd5 "$OUT/110.045.186.224.01120-192.168.123.101.04661" "3b2e761992ea2aaeacf7f783fd7a354f" "178" checkmd5 "$OUT/110.045.186.224.01120-192.168.123.101.04661-HTTPBODY-001.html" "ecaa88f7fa0bf610a5a26cf545dcd3aa" "3" checkmd5 "$OUT/110.045.186.225.01120-192.168.123.101.04658" "d8a9d91e4514d98771bbcccbfa0f8309" "2148" checkmd5 "$OUT/110.045.186.225.01120-192.168.123.101.04658-HTTPBODY-001.html" "b4ec4bc12cf6f200acfeb0a68d373c35" "1970" checkmd5 "$OUT/173.194.038.190.00443-192.168.123.101.04606" "2c99627350d11352ae267b7111b36167" "102" checkmd5 "$OUT/182.162.057.224.00443-192.168.123.101.04595" "3403a3dcb06aeba43d503e3ea5b082f7" "53" checkmd5 "$OUT/182.162.057.224.00443-192.168.123.101.04598" "a8a48f227b7147ae7b47af04ceaa0878" "53" checkmd5 "$OUT/192.168.123.101.02503-074.125.128.125.05222" "ffcf862c8632cd11235ea8d7100fc106" "8445" checkmd5 "$OUT/192.168.123.101.04000-074.125.128.125.05222" "93b885adfe0da089cdf634904fd59f71" "1" checkmd5 "$OUT/192.168.123.101.04586-074.125.128.138.00443" "7231901ed6805790ef9ae1ea8b2b16ea" "5576" checkmd5 "$OUT/192.168.123.101.04587-074.125.128.094.00443" "93b885adfe0da089cdf634904fd59f71" "1" checkmd5 "$OUT/192.168.123.101.04591-202.043.063.139.00443" "93b885adfe0da089cdf634904fd59f71" "1" checkmd5 "$OUT/192.168.123.101.04595-182.162.057.224.00443" "93b885adfe0da089cdf634904fd59f71" "1" checkmd5 "$OUT/192.168.123.101.04598-182.162.057.224.00443" "93b885adfe0da089cdf634904fd59f71" "1" checkmd5 "$OUT/192.168.123.101.04606-173.194.038.190.00443" "93b885adfe0da089cdf634904fd59f71" "1" checkmd5 "$OUT/192.168.123.101.04607-063.217.232.082.00443" "93b885adfe0da089cdf634904fd59f71" "1" checkmd5 "$OUT/192.168.123.101.04615-074.125.128.100.00080" "93b885adfe0da089cdf634904fd59f71" "1" checkmd5 "$OUT/192.168.123.101.04634-038.122.002.045.00080" "a86fc704a0a8e49043a43211c56ac6f4" "749" checkmd5 "$OUT/192.168.123.101.04637-038.122.002.045.00080" "b0e06f173af7d6bed3a1b93358116b1e" "1493" checkmd5 "$OUT/192.168.123.101.04646-046.137.228.251.00080" "a0a547efbcb42b4ac1b2a74334e1be41" "893" checkmd5 "$OUT/192.168.123.101.04648-038.122.002.045.00080" "5c22ffaef694fd09f829563aa8cc9e3b" "752" checkmd5 "$OUT/192.168.123.101.04649-038.122.002.045.00080" "210ee9c362c938ef68630ebab12c4a17" "750" checkmd5 "$OUT/192.168.123.101.04651-046.137.228.251.00080" "49cc6cc8758ec5b605a7d6f62af291af" "2791" checkmd5 "$OUT/192.168.123.101.04654-038.122.002.045.00080" "bde02e78dbdc16949d2580f7c1d91099" "941" checkmd5 "$OUT/192.168.123.101.04655-038.122.002.045.00080" "3b4417ab638ca9120c7fb49bfeb73d4c" "2046" checkmd5 "$OUT/192.168.123.101.04657-074.125.128.136.00443" "fe8a9a4d79ac47ba78464ac835e32d3b" "2095" checkmd5 "$OUT/192.168.123.101.04658-110.045.186.225.01120" "e6493e52f04325f9a06e22dc7f977a04" "297" checkmd5 "$OUT/192.168.123.101.04660-110.045.186.224.01120" "dcd18bf7b6572443215154539a37d75c" "363" checkmd5 "$OUT/192.168.123.101.04661-110.045.186.224.01120" "d202ebd7c286d1ea4734bdbef69431c6" "323" checkmd5 "$OUT/202.043.063.139.00443-192.168.123.101.04591" "722c54c6443119b6c411359b9b7a47c2" "53" if test $deldir == "yes" ; then /bin/rm -rf $OUT fi done exit 0 tcpflow/tests/test1.pcap0000644000175000017500000001560712263701151014220 0ustar dimadimaÔò¡ìPêHÂb66ÌþÌÜ[ÇúÒE(ˆ£@@åºÀ¨fúøiÆP‰äB}›† Pÿÿ/|ìPêHÊhFF[ÇúÒÌþÌÜE8cJÿÔÂÀ¨À¨fj5E(ˆ£@?æºÀ¨fúøiÆP‰äB}íPêHXmZZÌþÌÜ[ÇúÒEL/…@gbÀ¨f—{{8†_#ì –—Ì”Í =*CÌ”Íã:ç£Ì”Íã>T›Ì”Ïm9p\ íPêH~ÏZZ[ÇúÒÌþÌÜELÈ85Ù®—À¨f{{8Ž$ï*\˜H…7Ì”ÏC¾—á2Ì”Ïm9p\ Ì”Ïm<Ÿ×¡Ì”Ïm<¢˜EíPêHê..ÌþÌÜ[ÇúÒE ïž@wÀ¨fÀ¨ÁKÀ ¯íPêHqðFF[ÇúÒÌþÌÜE8cKÿÔÁÀ¨À¨f‹ãE ïž@wÀ¨fÀ¨ÁKÀ ¯îPêH> JJÌþÌÜ[ÇúÒEâÀ¨fJ}hÇ Pè6ºø°ÿÿ¸<´ 1jÂÙîPêHÁ¿ JJ[ÇúÒÌþÌÜE<õ7n¶J}hÀ¨fPÇ ÀNXè6ºù (Â=€ ;~¨1jÂÙîPêHõÀ BBÌþÌÜ[ÇúÒE47Å@@ã À¨fJ}hÇ Pè6ºùÀNY€ÿÿý 1jÂÙ;~¨îPêH®Ä ÑÑÌþÌÜ[ÇúÒEÃ3©@@ä˜À¨fJ}hÇ Pè6ºùÀNY€ÿÿûo 1jÂÙ;~¨GET / HTTP/1.1 User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_5; en-us) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1 Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5 Accept-Language: en-us Accept-Encoding: gzip, deflate Cookie: rememberme=true; PREF=ID=509fa92efffbd577:LD=en:NR=100:TM=1210898180:LM=1218831137:L=0veUs8ZfoozNBR3nyhxk:DV=AA:GM=1:IG=1:S=HFBwZ4As7wZ2VuJy; NID=15=rLfzXrsL_QsVJ0v8OYAPxDlzLxugre3bK075NPVsTnY-vAIF-pb4taX0m5sGH9-hRC8LH2_NgSRFRM4DhiKhJU77lHWx7DHoYLNgnFwEeS7P0B_aJRF783hRFvKZ2NNl Connection: keep-alive Host: www.google.com îPêH4! BB[ÇúÒÌþÌÜE4õ7n½J}hÀ¨fPÇ ÀNYè6½ˆ€nç ;~Á1jÂÙîPêHS— __[ÇúÒÌþÌÜEQõ7iŸJ}hÀ¨fPÇ ÀNYè6½ˆ€nèn ;~Ü1jÂÙHTTP/1.1 200 OK Cache-Control: private, max-age=0 Date: Mon, 06 Oct 2008 17:54:54 GMT Expires: -1 Content-Type: text/html; charset=UTF-8 Content-Encoding: gzip Server: gws Content-Length: 2589 ‹ÿ­XmWÛ:þ~…k–_;/…Ð8hK¹¥=lK·ÝÓíöȲl ˱ä„4ä¿ïȲ§„ÒËcÉ£g^5šÑ0ãx4ŒòGÃ1H‹„H[d’Ó©«c–’ˆ–˜§D×Ê‘« r'l¹ÐÑp„2N„ûéúuëH 13Ædh«Ñ‹9üó˜?7…o"35­hZ+@cÏ(£(69Jx‹“ŒËê;§?È ÛNïä f1Ë;½Þá!ÆKkRM´Û0\þ"E¾O“pЖ e>ÉZ@£”“Aõ²Ü =”-"BÃH ºÝôÎ)¶bÀŒäz‘ ¿B,tÒ;³˜úÚ~á÷ƒŽ³–²S.ª`ÛNÊ8”%äÁ¢\GbtŸ»õE4è´Û»RjÂÄEø6ÌXžøƒ œB–_ÂL)§©˜"êû$q~´hâ“; Ý. Kñ×¢;k«Hã)=´Þ!.¨ë!Ò¯#u)ÇTæmyL6ôó§,(ʪfIZ3L¯fßÊŠµe‰O‘†âxöëH£÷6|3FYH“VVÌXý;S’ ŠQÜB1 “ –> b†Ä@Zn)½Ñ]ø”§1š¼˜áÛJŒÕ%cÍ: ã%*xÊg·xöjQU—² (¾Ëèoù³ nIXB ˜î b ؆Á¢N .­mµ-†g4£˜›Í¬°Ø:îâöÕÅ@ïÇ/'ÿþ<;¹šñÙÕ¤ïß¼: uóöÕ—+øÜéw^˜~¯sÏ~žGG‡ðýÍå@'‰¾tþòK154…Ïp>†=lÖÄ `À›Æò’5ŽoÝŠ¼é™ØôMbfh,hÐ\­¤cn,¦(ÓKÌ|òéÃÅÍ ¸¿'£”8̴̈́ ImX<înçY|Ì‘{­ï7ñ±Þ`ÔÕ÷Q]7ö›>Laäs¾šƒ QŒIñ®¾@ë- cÏʸ“¦½cýi›°ÐXOýgßM}·{ªW„ºØu?\fDäY¢‰,'`¼Ê'Yîbé4WÖQê{n,9ž5ák"E†+²‡Æj4Ö† X6æ_Û߬Éý=HjMQœL dE \õo­[óëqã›1q¿þ·ñíÏûØæŠ3•^Sº4C€kHÍ'`®‡"üÒX:«8¹i" d­“‹ë*¿è½˜Ž‰î Kn3ïëé¾ô,nhŽ\t_)>F޲EÛá*‹$húêNdÈôÝ•B"^ÅD¾žÎ/ü¦IL7LÚ H Éx*i@ ŒLâÓܱ\é9i´g±Ö 8#HrMó$ËÐ\J•ÍW2*Š+–æé±Nƒ hëÓ©n8UŒO‹ÌéêmfŠÞÑßJQ ïÁ®JS’øg}ˆS‹BÄBf×¥@¡nR&ZhøîPêH™ BBÌþÌÜ[ÇúÒE4Óì@@FäÀ¨fJ}hÇ Pè6½ˆÀSv€ÿ¨ÿr 1jÂÚ;~ÜîPêH XX[ÇúÒÌþÌÜEJõ7n¤J}hÀ¨fPÇ ÀXêè6½ˆ€npx ;~Ü1jÂÙ±uÃ÷œ-üt~BÙO7÷dó¹gÈîPêHÏ NNÌþÌÜ[ÇúÒE@lÊ@@­úÀ¨fJ}hÇ Pè6½ˆÀSv°ÿÿñ™ 1jÂÚ;~Ü ÀXêÀYîPêH¶ž ¶¶[ÇúÒÌþÌÜE¨õ7iGJ}hÀ¨fPÇ ÀSvè6½ˆ€n<_ ;~Ü1jÂÙM KŠS“¤~¥ßLß hÆE±Ø€Íˆ8L%BØTš°Çߺ08Ëš¬©ŸIòžn—Ôä‚ÕÞ‡)pZ_‚ßœð!M²š¼*¨ ç¦é[Ef‚(jÊW«8¯\¿$ý,G…ÛW*_¯(ÞCErÓ Ö€‘û,Z:¥ìñ52†àu¹4š†ùQ%Æ¡­ªY?h^XdSWfS™Pe2v!WË-¦É­`¬MÕèà ƒŽ<8]ŠQ„,c|#s¢³-¹­RWSå®=[}±!`¿Ç,d=+M½¥®ÁÑ£Î'·§©—ҽтGƒÙa4L˜OO+\sÑgâ mo¤ ¡è’Û[—¥×À.Y•'…ÙfÂ(=ŽbH· dÚѵ*$:d¦ˆ(7jè…ø|h£‡ ÆPÕá帎?þ–lÅËm`'3¾)zò4ø{ÀØ >›ÍêØiæobOcŒXšB ðˆah¼iÛuc½u.?ÿŽœ4±M›¥2ع½–R—âY^œgˆ…ȰŠli8åQô¬³æ 1•Æ1†v\ùЦQcçÅáa[î—b(ZG Ñh«ŒSêV—²®çô)CvGÿ’ë·"ËÚg3ÀÂ,ÝtTø4þy²•ÇØí¾wûS¥OãŸJ­ðG,†E50µÉ€?ÍࣂÙÊ" ‰<´ê,Ê©:ò4“×jÕv3A¢âe8zÌÍÞo˜I‚(xTëCHÏ6Œ«ç#!'·Åœå"÷ŠÒyZ”³ü–þÎŽƒÖÎa”Ùp¦Á‘]g„õäYI·3…ƒ£ñêÈoøhò´ÜWl{¨ÁÁÃs« ú²<¡~+/B%JÃ'³ ÄyñCA´}PA«Wù()ÿÑñˤ¹F—R“yPkdh’3G–LÔI«u1"­(C\ÕWÿŠBµš$E£©>™U6¹%µ¾îužï®z̶ÖÖ I×5U<Éf¿ª*Í«Ž(m¤rLÃF U¿ÛkLÜGŒîö^Gñnï%Iv»‡œå&0¢a|ÛÈyèž¼þçÍÙû×'£/WAèõý·É—àíþ]?윿|Ó¡>¢Õõ Øì~Ó ü!K„1´²‚Û— Š›cy D“œ<"`£ˆ#L Ô-£ ‡`b" !èbHN.œYÒ q˜B˜@¡–†žO‰¨ke1Õé´5ÕC–añýäò²ªÉd=f…4¨,ÝíJ|õ'»* U&¬VéP×YX£!DtL4èâÒkn»ñáb$™6U·R†~Åå`wÔH<žBð ¿ø È”†ZÂf‚4Is¡F±&oÒ\ÕKiEƒÖªhP.Ø0‰€ ‚BZ‡ŠòT(ÙµŸ)”‰&CÍ=8ЊëµÊVÚÇR7«ëJÿ{O$çJž{c**ÊÍõ?/¹Ø¶äbo¬½&ŠêP»Ìñí–UVPŠ×¬4”ûC Ýê–6+-WÆžü©<ÆüïÊA*ߌNÊÙR´"˜@§­rÈö˜”¥ìèj=óË•1JÂé»`,®_–“ÚµœTa,Õ(Õ´… è"zŠÙ¸q+m;µ­^å3äC.“ªÉ+,Tâ\e,„S%ÓÖzWBÈfS :À¢Óè çÚGy(³âêÎYKÞÑ/ù®­7}¥Iú³g0KçN·Ý>Ú ™ftŠð¼½R£ Ë ]ío[¶k«Ë­2CÌÝEÑã¾~[:eß Ýâ5ñçë•»þX¿§p÷ ˆÊ :A’%ßo¸Øgáß±]œù_ÞeïNØóÓù,{w>?¡3;>|ºmŸ_LîPêH  BBÌþÌÜ[ÇúÒE42Ä@@è À¨fJ}hÇ Pè6½ˆÀY€ÿšùö 1jÂÚ;~ÜîPêH'Ý ƒƒ[ÇúÒÌþÌÜEuõ7nxJ}hÀ¨fPÇ ÀYè6½ˆ€n1 ;~ñ1jÂÚ{™zsl¥Ù6Ö]êOšªfÞJsÕõ*?#lÑÚÍY¿—3÷öà×X>Š ¼¿Ë#~îvêM²¼Èÿ/ТÏîPêHãÞ BBÌþÌÜ[ÇúÒE4«–@@o:À¨fJ}hÇ Pè6½ˆÀYA€ÿÿù; 1jÂÚ;~ñîPêHj OOÌþÌÜ[ÇúÒEAE£@’›À¨fD^œé 5-‡¸³Uclients1googlecomîPêHá §§[ÇúÒÌþÌÜE™ÞO@ù–D^œÀ¨f5é … :³U€clients1googlecomÀ  clientslÀÀ1'J}eÀ1'J}qÀ1'J}dÀ1'J}fîPêHWè NNÌþÌÜ[ÇúÒE@˜@@0À¨fJ}eÇ P"∠°ÿÿ°i´ 1jÂÛîPêH½' JJ[ÇúÒÌþÌÜE<^ˆ7DJ}eÀ¨fPÇ 8ëe"âˆ! (œ¶€ ,æ1jÂÛîPêHy+ BBÌþÌÜ[ÇúÒE4Ó@@ûÀ¨fJ}eÇ P"âˆ!8ëf€ÿÿáu 1jÂÛ,æîPêHÔ+ žžÌþÌÜ[ÇúÒE @@ú×À¨fJ}eÇ P"âˆ!8ëf€ÿÿ`ç 1jÂÛ,æGET /gen_204 HTTP/1.1 User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_5; en-us) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1 Referer: http://www.google.com/ Accept: */* Accept-Language: en-us Accept-Encoding: gzip, deflate Cookie: rememberme=true; PREF=ID=509fa92efffbd577:LD=en:NR=100:TM=1210898180:LM=1218831137:L=0veUs8ZfoozNBR3nyhxk:DV=AA:GM=1:IG=1:S=HFBwZ4As7wZ2VuJy; NID=15=rLfzXrsL_QsVJ0v8OYAPxDlzLxugre3bK075NPVsTnY-vAIF-pb4taX0m5sGH9-hRC8LH2_NgSRFRM4DhiKhJU77lHWx7DHoYLNgnFwEeS7P0B_aJRF783hRFvKZ2NNl Connection: keep-alive Host: clients1.google.com îPêH BB[ÇúÒÌþÌÜE4^‰7KJ}eÀ¨fPÇ 8ëf"âŠ}€lÞ“ ,1jÂÛîPêHù ÊÊ[ÇúÒÌþÌÜE¼^Š7ÂJ}eÀ¨fPÇ 8ëf"âŠ}€l3 ,1jÂÛHTTP/1.1 204 No Content Content-Type: text/html; charset=UTF-8 Date: Mon, 06 Oct 2008 17:54:54 GMT Server: gws Content-Length: 0 îPêH†ú BBÌþÌÜ[ÇúÒE4æ¬@@4'À¨fJ}eÇ P"âŠ}8ëî€ÿÿÞ\ 1jÂÛ,ïPêH1jFF33[ÇúÒ†Ý`:ÿþ€[ÿþÇúÒÿ…Í»[ÇúÒtcpflow/tests/udp.pcap0000644000175000017500000002073712263701151013750 0ustar dimadimaÔò¡ÿÿÉð²Oóà „„&bKýÄ,(M…EvŠKÿÀ¨ À¨õ%5bƒÏp10000000000000000000000000000000ip6arpa Éð²Oâù ››Ä,(M…&bKýE@@·À¨À¨ 5õ%y¬}p€10000000000000000000000000000000ip6arpa À  localhostÉð²Oˆ— „„^ûÄ,(M…Ev _ÿÀ¨ àûééb£!58d482efff30c26c00000000000008efip6arpa Éð²O¹— ˜˜33ûÄ,(M…†Ý`bÿþ€Æ,ÿþ(M…ÿûéébMv58d482efff30c26c00000000000008efip6arpa Éð²O ˜ dd&bKýÄ,(M…EVntÿÀ¨ À¨ùø5Bƒ¯ö»dr_dns-sd_udp01168192in-addrarpa Éð²O˜ OO&bKýÄ,(M…EAŠ-ÿÀ¨ À¨ÅÈ5-ƒšôr_dns-sd_udphome Éð²O,˜ PP&bKýÄ,(M…EBQ6ÿÀ¨ À¨Áo5.ƒ›udr_dns-sd_udphome Éð²Oì› ddÄ,(M…&bKýEV@@·;À¨À¨ 5ùøBæ…ö»ƒdr_dns-sd_udp01168192in-addrarpa Éð²OY OOÄ,(M…&bKýEA@@·PÀ¨À¨ 5ÅÈ-Tôƒr_dns-sd_udphome Éð²Oôž PPÄ,(M…&bKýEB@@·OÀ¨À¨ 5Áo.Ssuƒdr_dns-sd_udphome Éð²O´..ÿÿÿÿÿÿÄ,(M…E sd@À¨ ÿÿÿÿD\D\ ÂÏ{"host_int": 182109364, "version": [1, 8], "displayname": "182109364", "port": 17500, "namespaces": [114480065, 118689634, 123941859, 88349700, 127644752, 119453932, 104545047, 65980972, 94784816, 88226864, 128979731, 69264279, 125396025, 118460388, 86089726]}Éð²Oÿ..ÿÿÿÿÿÿÄ,(M…E ®@À¨ À¨ÿD\D\ …w{"host_int": 182109364, "version": [1, 8], "displayname": "182109364", "port": 17500, "namespaces": [114480065, 118689634, 123941859, 88349700, 127644752, 119453932, 104545047, 65980972, 94784816, 88226864, 128979731, 69264279, 125396025, 118460388, 86089726]}Éð²O…ÜÜ^ûÄ,(M…EÎ?DÿÀ¨ àûé麣y„58D482EFFF30C26C00000000000008EFip6arpa €x imac3localimac3 _device-info_tcpÀf”model=iMac11,2À /€xÀ Éð²O¸ðð33ûÄ,(M…†Ý`ºÿþ€Æ,ÿþ(M…ÿûééºÑ]„58D482EFFF30C26C00000000000008EFip6arpa €x imac3localimac3 _device-info_tcpÀf”model=iMac11,2À /€xÀ Êð²O<GG&bKýÄ,(M…E9ðÿÀ¨ À¨ߪ5%ƒ’6'ncrnpseduÊð²Oê´WWÄ,(M…&bKýEI@@·HÀ¨À¨ 5ߪ51Á6'€ncrnpseduÀ *0O Íð²OmE÷÷ÿÿÿÿÿÿÄ,(M…Eéó @À¨ À¨ÿwwÕ…@480b05e 3 ipp://192.168.1.10:631/printers/Brother_MFC_9840CDW "" "Brother MFC-9840CDW" "Brother MFC-9840CDW CUPS" job-sheets=none,none lease-duration=300 uuid=urn:uuid:f520e5bf-ce65-3fac-6854-7e1c05678bef Îð²OõJííÿÿÿÿÿÿÄ,(M…E߆U@À¨ À¨ÿwwË…69046 3 ipp://192.168.1.10:631/printers/Brother_PT_2700 "imac3" "Brother PT-2700" "Brother PT-2700 CUPS" job-sheets=none,none lease-duration=300 uuid=urn:uuid:0291e6b8-63ca-34e3-5dd7-45db08e27a9c Ñð²OllÄ,(M…&bKýE^{»ü)öO À¨ Ó ÓJ«šÿÿBñqÒ4+ó):†b”YráîB(bñ¾§+*¨õc5\ÖÞ‡«oB6Y™þzC+q©w)‚ysSx•r Ñð²Oll&bKýÄ,(M…E^Ç@À¨ O Ó ÓJ9ÿÿ|Λ˜Ö0Ü/×?Ô\D6ÆýL¼{,™,<…¯¯ËMW'ë\”¡$ðTr:‘8R¥Ûû‰ £26C»ˆÑð²OhŒŒÄ,(M…&bKýE~Ù¡üÇïO À¨ Ó Ój£*ÿÿ•­4臣³õ*ýµ÷Ô —1Z¼ú~ÔeÊlh*hÔ©XÀ!Ê'*Ø’ÏG* R6z¨ ‡5‰ö&êŠÁ%ÏãƒJ ƒ„ÏÅeÒt°máÔOXB>Vg@îŽ?ðJm{€¦wo>½Œ¥V‚‹ À!šäŽ™‹½>Îì4•ï1Í B‚üð^ûûñ>Nˆ×{4ˆž ·$¸€u¶¹}‹î†©MX¾Ê¤ÝÖ½ÇÉ‚”“jC _±µé6°ç͈•3¦Ö¨þ.>Pù6·KàrŠEñû‡{úÕŽ1{„ýÁ}Éù âŒdßìâÔ0pèþ[bV°,0þæõŒÊ̦0™‹g•÷×pGÎåXÇ#µëk€,pÖO¸Ò¯Ž•cYX‰ô.Á²ÜT}@äNÌe¶ìŽ¿:È`#渟Æ}å Qzw´±qü«ôlú»:1„*ÖkÅòöë‘—òo-wK§E~ô•Ö£ ´Ko«\»‘JÝœ“ªW—î„Æ­ÞO%SN\õ¹ÌîÌ’´ˆ I›á$+þ_|ÀM”ªž³[ïNÚv+@Hµç§XÅœunP#–JÍþŠî[ï—`º‰µöÍbº» »«*@ûù´ ,(ÜjVÚ‘+È’¹Ñ¢èÂr[úÎí©"×Ö´™Ï>úð÷ÄXG&j÷«©]õI¯¶Øjyê4X\è}„‘›'Ûpb¿ŠÒÂwÿ£Ä«ªø++v)}®<ÑGÈ—MÉÊ´Í¥ÑïFÀ-hîùJâGU{«ëƒnj,¯_‘®ÏÒ鱡‚t„r“¦»-ÿ$âAžwéNW+0ZuP×”S¿h³‚Ó+Ñð²O×ll&bKýÄ,(M…E^êV@À¨ O Ó ÓJ9ÿÿôÁr®.–ýšzï0;`˜ó¯uX ÀWõAf±…@ÿ}xR‘Ye4”Ï€6æûCŒ{&vE`V¡ßšåüqF×ÕqªòÛ™d핲‹{iB /^V`°ÈG×êù@i–ü󵿭· Ró)õ@ÿ#þEÑ—k{oFœx‚aýk¦,`\Ù#wÂ:#»¤×IÁ\òo)¶E:Pšx¥æ˜“ºõ—ú¯«M]î߃nW‘´†m¹„ï *Œ 2ÉëMq“øc¤¸?ïQƒXn(×ÈN7žKQ9F[•…NqÔg.2¾W»róׇ¾c¼5ö»÷`©˜s€)aGÖø2$Ì•yÞóéTé2ß TÒv#媰»¨,ÈÝz¿Ø Í¦-œÑ,df°­°=Õ‡Ñ!IP㊂~! m¢ù¸¥%õ&´Dß|í:@B7æhaØVÉ~ÃΑ ÇÙpKB£`‰ºô! jUv»`î¾Ó4—ÁqTޚ̓B_ÌÕøèËÃtï¸im÷ÇäÉûî@@“g)5S0QRTùñQíˆú`Á=««r^Z=Nm£ëõøã õÈIYºl+aXñYzµÑe‰êQZ×ÔëUFæ>µÁÉû$©ôð° ‚ÚÆ963Ñ/ûZß"ÏÖÐ[&'x]B5 ¦öÈ«>QzP¦t‘Ú¡^Bµx X0š¼˜ØIq—Üþ;Íd˜¾$å—›8“îgt) –kúáÐ<ÙŸUÜ?Tt;Qt’¸üÿròõÅ=JͤþRíM~÷s[ÚùFÕÙ åö»íœ¶”† ÖOý«íIÖ¶éî²wºÞ“N¿©ka“C3;XÔ» =u—b6A‚¢7? ©c’6Ó—¨Äoræ¡&P¡kA¬^}ˆièÅȃeøl@j6t_ bNÍlÛÝË[ ¿°ÅEø9¨O‚nº‹ö´ËŸÎ-Ù:‡yÑð²OÖBll&bKýÄ,(M…E^1@À¨ O Ó ÓJ9ÿÿóÖú/›Fÿí7»ASÏq»çÉk*3)Ùô3¿8ÈQ‚ãÐã2}ÊîÔO$Ú;“ìÛU« ¦8«ÚΑÑð²O¿sŒŒÄ,(M…&bKýE~ÅŽüÜO À¨ Ó Ój¡ÿÿÚE% ë+Š—) Ttªd|©jæVPð—zDÓì@îg2 nŸ—<ª°L ¦R–8°«]&ófã'@'Lþ ¡×±ú™jâ“á—EUy‡‚± &ÍÈŠ«^»„ir§:{³ëÞg ¤¼»¸ƒf±}¼T¬y+¦™¼ûºzt¾,>{fàÿƒ|êuÿ ½ì„j#¾MtgÑ¥±iS}bCY€Ó_õ›’Lžué%@ùŠ.P¢ÉÒW=÷QÆS•š3•Ý^?é²H²¡ƒO¦ÇìXåå9Ž… qè+6D“’1镱íè,¥gÌ×ãSÛ´¹ŽF‰å‡?Åÿ¼š®øcT5d#Ž 8V·õ®þïþ£p”f³ €ÇÕéyZ‹  „LXO™Ô‚ƺ’Ú€b»X2²%‚>za'=1bˆÅŪ]Ñ$Κ§õÛûs§‹ùz¶:+/ Óæ#>PTî`¾×”n—SRĂȄ¢eZýûwÑÕ)æÀîLD4HªÄ!#¨ïše1wWHGl~¡ÿ/ø‰B7Jm<¨*¿:Î!ƒþq7ò›Ù5vqÕÉ,熟% ¶“| äZˆUa)„ÏsCLÖ©øì’â”yoodŒ_uKæCJ\ãz6ެaËÃBJ‡ÎHoƦ=ŸD ñ’XÍEŒ„óˆ{ÝA ·×~÷™ Ø¿@'. G<­éõ© ËHZíÝí—í—ø ‘%ËKc;•[Ñ.ˆí6ŠÀwíùØ•ÆIðJ>ä¹]j):ÍåK"1PyŒ)@ÑyÍ™F¥6µ:Üog@Qb9e㲦Šù·M2]Y¶÷Ë´ZyÎî”YïDY¨BXš}gAý7P†Â`• aT€=KÔÛf>ALýž{“`Å”ž€I áÒÊ>@†F_¹5 œs¿¡†ðÜ3që’Atò‘Y tB© _wbßÕUN¹ÈÄvp3~T-R4ña­\™Ù—šª½äÍêmׂ£æä^àqG3è°è)ÆZº_QÎüvÅÔ6ûçrŸAžŸ † -£ØYßTòýhùjL?'zóü|ù0[õ¨ö`#Žü8=R~ÑÈêëJŽgÄ,òÉ.á(”½/‰ðÛMy¹9³-±$e¡A”Íî½Ü Ò®GOËK xà Ì`Ñð²OOtll&bKýÄ,(M…E^T@À¨ O Ó ÓJ9ÿÿ-oäM²VgšXýLnÓ>1PÈc7€À>Ô›®_· 0 `³l7{ù×|9ÏÅ ¹[Îω»ýi´oN+3FFÑð²Ox›<<Ä,(M…&bKýE. Ðü•O À¨ Ó ÓŠÿÿ§_˜&…™Oƒ¹j'&µ¥ë êΰÄêâCpCÆ…u »¤…5ßÖtû¡ 96{—^EÄ¥üUðuμÍB¢/:*°­>ó Šú‡YáA”ä7÷üÖÅý,Xv$¼ø«ªˆLŸÒu”x‡.ûcKï"\®gðpœd!ç¸ñÐøqM”ÚÔ¶`åÞ¥Œ5î˜ñmç~Æ?iý—(ónrÖ,zœÇ¯¢-ói´.CÅÈ¿˜ŠÀÜg‚P®…ÞnâžWÞjVmPBÌ9ý½i_$¹"ÍÔ"›¡°[ƒÕÎRïíÅîtù¤lb?í™u¬³q¢7£°·™±É'Œ€h¹%WÏ0Ææ1´V.Z![õ{èVä±CÔ×ý«º¶n‰Ê(„`Ä¥ ¢ô‚±bLtMj£eý?×{p/6°ÁTõø†B5Ž…i„âSj'9gT³»Ó[×ï†ü 9u2è®ãðäIAh•ÑZ°å6A}MÕ»t6ªˆ¡¿è> ŸøXU’­[ýé ²” ;½» ב‡6Ö&<ž±iæéZ墥uÕ#tçho)zIö`ß(0áka“ªµà ä0 ͜ʳ ;$æOšWòÖù.Ÿ2,0ŽÜ ¡Bb4KTš ó3ª"î?Ø=j¦(ˆG™š_¬†ðkñ% ‡^CGïö¹×´J™ý®é‡ù׈wð ŠZ>h‰ÿj-B™J$TX{¨p©cT0_àHàRÄ*–à³¾Å(Y¿ã©#ϼB{“Žxi…nkÚg 4êÓ4T . tsÙPT„ÞjØ;%I†šdÂVp@à Çþ–þE=tpMKøröA˜×½ÎÓlMºwúöM¶LF=ºÉBÈ D2–ï+K_I&kõbU} ¦×ÅgHTTP/1.1 200 OK Date: Wed, 15 May 2013 14:29:44 GMT Server: Apache/1.3.34 Ben-SSL/1.57 (Unix) mod_perl/1.29 mod_watch/3.17 Last-Modified: Sun, 05 May 2002 23:46:32 GMT ETag: "8ce054-f04-3cd5c458" Accept-Ranges: bytes Content-Length: 3844 Connection: close Content-Type: text/html AirSnort 0.2.1 Changes

AirSnort Changes

New in AirSnort 0.2.1:

  • Packet capture is done using libpcap. THIS MEANS NETLINK SOCKETS ARE NO LONGER SUPPORTED. This primarily affects users of older wlan-ng drivers. For wlan-ng users, you must use a patched 0.1.13 driver, or a 0.1.14 or later driver.

  • It should be possible to use ANY card that passes monitor mode packets up via the PF_PACKET interface. For wlan-ng and patched Orinoco drivers airsnort will do automatic placement into monitor mode and channel scan at a 0.2 second interÓ™“QÍu  X”k|7Œ´‰aþÀE’_ü@4³Í†¼¢HpP–¨làpTµD*6€€Ð>  ¦×Ågval. For other cards, like Cisco, you will need to manually place the card in monitor mode before airsnort will see any packets. Orinoco users MUST use the the *-packet-* Orinoco driver patch available at http://airsnort.shmoo.com/orinocoinfo.html

  • Minor user interface changes to bring common options to the main page. The preferences dialog is gone. Options are saved and loaded from .airsnortrc in your home directory.

  • Airsnort can save packets in pcap dump format

  • Airsnort can read pcap dump files

  • The gencases tool will generate encrypted packets using weak IVs, and save them to a pcap format dump file. Load the file with Airsnort to observe it crack the password.

  • The decrypt tool opens a pcap dump file and decrypts all packets associated with a specified AP when supplied with the proper password. decrypted packets are saved to a new pcap dump file. As an option, beacon packets can be filtered out of the output file.

New in AirSnort 0.2.0:

  • Packets are sorted based on the SSID of the associated AP, allowing packets from several APs to be captured simultaneously without hindering the crack operation

  • Cracking is attempted in parallel with capture.  There is no need to guess whether you have eÓ™“Q>›  X”k|7Œ´‰aþÀE’`@4«Í†¼¢HpP–¨làu²µD*6€€ÐëD ¦×Åjnough packets to obtain a successful crack. Packet capture for a given AP terminates when that AP is cracked. A couple of cracking parameters are configurable in the Preferences dialog.

  • The GUI may be a bit buggy as I did not take the time to learn about using GTK in a mutli-threading environment.  If anyone wants to look into improving reliability I am all for it.

  • An increased set of IVs that result in a resolved condition is accepted.

  • AirSnort sets the channel to sniff on via direct communication with the nic.  There is no need to place the card in promiscuous mode prior to starting airsnort.  Also, airsnort now has a crude channel scanning capability built in.

  • Orinoco WaveLAN/IEEE cards are now supported, via a patch to the orinoco_cs driver (actually the orinoco.o module) available for the pcmcia-cs-3.1.31 source.

  • Wireless device name is configurable in the Preferences dialog.

  • It is even possible to start a session w/ a prism2 nic, pause it, swap to an orinoco nic, and resume the session, without exiting airsnort.

  • The PF_PACKET interface available with a patch to linux-wlan-ng-0.1.13 and expected to be available in 0.1.14 is supported with a radio button in the preferences dialog.
Ó™“Qù»BBX”k|7Œ´‰aþÀE4`@4 ͆¼¢HpP–¨là{µD*7€€Ïœ ¦×ÅmÔ™“Q{——X”k|7Œ´‰aþÀE‰Uf;ûÆR÷"Hp5»àuó¦€ €airsnortshmoocomÀ  g obfuscationÀÀJ#À hostmasterÀwíÅ*0 :€XÔ™“Q—¦¦X”k|7Œ´‰aþÀE˜Ug;ëÆR÷"Hp5»à„kÝ!€airsnortshmoocomÀ  g obfuscationÀÀ0 h͆¼¢À0ò krustyÀÀ0ò archimedesÀÔ™“QR:NNX”k|7Œ´‰aþÀE@`@4 ì͆¼¢HpP–©jX<ÑUÒ°ÿÿýÄj ¦×ÅyÔ™“Q:NNX”k|7Œ´‰aþÀE@`@4 ë͆¼¢HpP–ª“é¹wýŠ$?°ÿÿ$j ¦×ÅyÔ™“Q…k  X”k|7Œ´‰aþÀE’`@4˜Í†¼¢HpP–©jY<ÑWc€€Ðú“ ¦×Å{HTTP/1.1 200 OK Date: Wed, 15 May 2013 14:29:44 GMT Server: Apache/1.3.34 Ben-SSL/1.57 (Unix) mod_perl/1.29 mod_watch/3.17 Last-Modified: Sun, 10 Mar 2002 21:06:43 GMT ETag: "9d2fed-d2b-3c8bcae3" Accept-Ranges: bytes Content-Length: 3371 Connection: close Content-Type: image/gif GIF89al_÷  !!!"""###$$$%%%&&&'''((()))***+++,,,---...///000111222333444555666777888999:::;;;<<<===>>>???@@@AAABBBCCCDDDEEEFFFGGGHHHIIIJJJKKKLLLMMMNNNOOOPPPQQQRRRSSSTTTUUUVVVWWWXXXYYYZZZ[[[\\\]]]^^^___```aaabbbcccdddeeefffggghhhiiijjjkkklllmmmnnnooopppqqqrrrssstttuuuvvvwwwxxxyyyzzz{{{|||}}}~~~€€€‚‚‚ƒƒƒ„„„………†††‡‡‡ˆˆˆ‰‰‰ŠŠŠ‹‹‹ŒŒŒŽŽŽ‘‘‘’’’“““”””•••–––———˜˜˜™™™ššš›››œœœžžžŸŸŸ   ¡¡¡¢¢¢£££¤¤¤¥¥¥¦¦¦§§§¨¨¨©©©ªªª«««¬¬¬­­­®®®¯¯¯°°°±±±²²²³³³´´´µµµ¶¶¶···¸¸¸¹¹¹ººº»»»¼¼¼½½½¾¾¾¿¿¿ÀÀÀÁÁÁÂÂÂÃÃÃÄÄÄÅÅÅÆÆÆÇÇÇÈÈÈÉÉÉÊÊÊËËËÌÌÌÍÍÍÎÎÎÏÏÏÐÐÐÑÑÑÒÒÒÓÓÓÔÔÔÕÕÕÖÖÖ×××ØØØÙÙÙÚÚÚÛÛÛÜÜÜÝÝÝÞÞÞßßßàààáááâââãããäääåååæææçççèèèéééêêêëëëìììíííîîîïïïðððñññòòòóóóôôôõõõööö÷÷÷øøøùùùúúúûûûüüüýýýþþþÿÿÿ,l_ÿÿ H° Áƒ*\Ȱ¡Ã‡#JœH±¢Å‹3jÜȱ£Ç CŠyñ™¢“(QŽB©éâ®”)¿}cIr¢8s漉S‘E;:ƒîÚ•Óg͈ª††È©i(œœ»*~Ë!éЗ3¡8g„EvkæÝ¿wfƪ-RåYAM9c\ÝõM`XœiÉšý§¶JÞ±U­UuÖÎZµ„ß>\„ðB¸8«|GµÈRœvøægˆ‚<ƒˆð²WË93›Æ9VgŒ»8¿ÁæœSöæ"c/ mTUÑDY¯.b7gÁË¢ìòÝøæÌ˜Ãkqœ£žkÆ®vT…ÈÝþÿŠÓ­¢Ô9·`rãä˜O=žéÙu¦7ª3Òï:Ô™“Qºk  X”k|7Œ´‰aþÀE’`@4—͆¼¢HpP–©j·<ÑWc€€Ð- ¦×Å{í…PW£ T]ÿT‘“*¡‰–Aª¬…q~•à‚ÿ@]ѵôLNU(XÞ?²&¢|$j—PzÓ݇—@¹ðNŒ (P|ЬÖbŒï¬öÙ?¡Õu^O4þ3$Šy¤Q …ÖtöøYŒvÀTc1~Óá?RBùâ?—ýdŒ'iø ™Š˜©ÐŒZmaG§ r:1™_r´œÏèSpêÅ£&1Rå¡¢E`çVŒ6êè£F*餔Vj饘^Ê=ò¸ó 6Ð<³Œ1Ç  -¥”ÒÉ'¤ ¢ 4ÚhÿSÎ;òà“éDõt3 )Ô¡Å7 à¼gl(PA- E„HŠ.È\ÃÎ>·úsŽ2°P’G+8PÀ4` 5è ÄaŒFnÐAG ‹4" $ƒØ1Çjœ1†^H!'`8 Àèð… ÔâÌ8¶B:"CXPlNP‘É/ßä³O?þø“‘?ùtZÎ5Ò #Š!^P “È€Å!ÉT¼Q/n0R ?EƒÉ`€ ðð…$9ƒD0‰l¡ƒ k€ {ls>•£Na Ðî<2±É ÐBÍltÏ-cp€ö{$b2—ŽTqÃ#w;ÿ„KP ìpÇs£@ßY4І \xP˜`3É×8!€ DÛt`, TÔQÉ<žSÔcP=ßð’Š)˜ÀrAàda¼‚Ž)‹€U…KDÅ”ThÔbA`èžÓÔ 4+H €N:|òº@³|€wÔ%Ð.(ÿÞêH4NàäDÛÁ£ .šh/>Ó  ­C*ûd¾´ˆ>v± Ì/(€Â9(²‡`¡G,ÌàP'HBNƒ#˜á EÐ\Nv€°ÀB   /¨“5Té Š°mD¸×BT!(høG:ÿdwà|@<ÇtÃ"Š&hØ¡Aô†  Û°‚Eó)>$[,\Ò¡aäD £ï$‚5¢-½PÈ Üˆ6>D$t¢cP¡-ê19VˆH:ò÷GÑäA!A(¤h‚‘z¬@‘AÉ„B’Éœ€A"'‚d¡GT2'|ŒÈ'> ˜.!ÓHÀ'0 ‰à㟌CîðÉ;Pd…Òãú–uØP‘3XGE:ÑD:ê Y ‰e\䪤#Ì1’‘¬Øäå?ˆ!?7~€ù ÔH€9¯dÚ$/¿±7.Á{IÇzwCÌB þè‡@ªAÿ Bø“†ˆ…<¦h6'f^Ôˆ1¨Ð€ ² åˆ>ÛÑ ˆF<à¤A¦q† \phÐZH¦‘ˆ8àqÁ\1ЂÌc Àz`\ "Žgrl |œ ° r¥Þ@… Ê`Р5˜Á²ð†G ÃF©ÇP…jüÑ8@ `œha AÇ-Ñ,ü`PÁ°ÐDÜ“âÇ>ö¡{Mú,ˆ!à“QƒÀXÉÊ€ôÂ!þÐÇ^õ‘ÎLå›Y.Æ!v´xøG-䦨Å€ùÓ@6RŽ\èìV±&6IV²ÿÐÃF˜@ L&ÎX  @À$°,¶ (ë ’'  Ã8%l;[mîƒBh€Pl=ãHŸÑ `—¬ª$¶ÀÁ`BˆE>¦ûËfÓ È°Â°èàúü®ÑlzÀæ=®àpüC~ÐÀ~Ô™“QÖk  X”k|7Œ´‰aþÀE’`@4–͆¼¢HpP–ª“é¹xýŠ%Ï€€Ð¬  ¦×Å{HTTP/1.1 200 OK Date: Wed, 15 May 2013 14:29:44 GMT Server: Apache/1.3.34 Ben-SSL/1.57 (Unix) mod_perl/1.29 mod_watch/3.17 Last-Modified: Fri, 03 May 2002 16:51:48 GMT ETag: "8ce03d-1057-3cd2c024" Accept-Ranges: bytes Content-Length: 4183 Connection: close Content-Type: image/jpeg ÿØÿàJFIF,,ÿþCreated with The GIMPÿÛC    $.' ",#(7),01444'9=82<.342ÿÛC  2!!22222222222222222222222222222222222222222222222222ÿÀ]È"ÿÄÿÄ:!1"AQa2q#3b¡±BR’ð$4C‚‘¢ÑÿÄÿÄ+!1A"Q2B¡ÁaÑáðÿÚ ?¿éJá4Ñ[A$óÊ‘C—y‚ª¨$“Øë@s¥U÷[ÿT܉v»p®b®RßSš/æÆ2Éç#œþ‡ cGªï c ±îoµÉË(„S68þ« Î}O§|s8®Æ˜pí”z’-šTÃê°u®áÒ^Ò4Æo,ØÜDxÉb QúÏÖ¦Ö·V÷¶éqk¡¨ÝÀGŒ–ó“é—ë9ø-ŸŠÀÖwuÆæÖ ØÛ¦ÓO_ÝÜ^À:G†?F? ~ìxìy•èÚÍ«ê+´ö³$V:T¾ÔÈ °;.xg'$“ÀÁ88«»£ËrŒž´cý2ÓovÏÔÝÁ³ÒþâóD‚Égglô3ÇÀ8fΩŸÔMè»WGörÇ÷Å×’ÙèóHÃ=€Î;å±Á®¯§šs8Õ÷]Ê·Ú5É„õŒ0µAÓG¡eóÜ*‡Õ÷Úî¥s­ê.îbBB?è 8Æ9>¦¯®:û“àq£}Ù7‘]ÙÕ#Îò\ÊÓ™ï mq<Ó±f c9=É>Ÿ5Óq3¶’®@2H¡prÕ‘m=Í”7ÖÐø×0‰‚ŽYPçä|V+J.uXáËn:ÛÛ'ðÿzÕ«;3ê£en-W?¥öKÁÊî ¶pÙ)þ!TãØrMe¼}O*¡ÉQý+ ïS‘×”·ÿ1ïÿÊæ’ªGuxÔ™“QAŒÔÔX”k|7Œ´‰aþÀEÆ`"@4Y͆¼¢HpP–©j<ÑWc€€Ð ¦×Å~ 5@¶öM§>!Ö „!Õà)‰ÀKV²˜0îb $c â'tÐ<€µT† ’. ‰ÀÆ_  @¬c5ÀMW|€,@£y0ºÀÔš•ª®Aü± 4€x-ÈÓ!¼  8ïyðdöµÿ ûXà¯JjÇ©ÆpkÄÒèà N!Z˜(±š 0=Dãê8 q‚äC1h8 )-ä`ÿ ’8LaA8È ÊÅÉá;Œ‘Ù @8VÝѸ ò4Aªñh£ ü€Å Ð ,™9`æ4`¬°)ð)eˆAî2xÉS0àdd”°mÛ#D” ùÀ\ˆ¸ ɰàq`BnĆ €ð‰ äH¶Žzß‚àc Lp‡¥õ€ B@ø‡6U±Äœl`–Ö†—…0ÀCÿH3,ˆ”‡ä²•eâaO`Hü‡>ªðƒ×ªÜ !hZYß<ÃvÈ1>4qü ͸€,Hóƒä¢4/4&p…òUS`EeÿÑŽ¬„b8@+ ’ „À¨Z‡Áj_>ÛƒÔb"%92 Ÿg¹¨H=!`d6°AK ²ŠH"µuHÝ bØÂ çèÁ®'{¤b p·H-l‹… ¤5´%šÐã#ëˆ>¼að㳨}<\áòŠxœ;ü€ƒc„BöHìoHÊ>IÕé{gÌR¿úc¿þQ"¯ýî¿&4ñ¾ÿH4$âˆ$é‘Ë\–s~8é jÑ„`Š ˆÅÆ1ãQ‹[4ÿù¯g0°ë· Ñ@9qh¬µA9añ± ¡Ñ ˆ)%1^ qqíÄס.b ˆ`#‚˜r$v  ‚±ññ$ûq'8aé¡ B >±‚)'â'¢ â='âh~¡AÑ¡éáÑ!™Â#0’è¡$ÿußÀ‚#·#¯¡!!-òh¢&fXIB…Èq1’s–‚(•a"ÿñr¨Š2š@'¢€^±(í·éñ$ƒ8Arˆ$QjUŠøˆ‰’8‰”X‰–Ø;Ô™“QdŒ  X”k|7Œ´‰aþÀE’`#@4ŒÍ†¼¢HpP–ª“éÄ4ýŠ%Ï€€Ðû° ¦×Å~Ç)Î9ã Ç™Íu5ÿBøÛ^¹|xþËý–‡ÑM"{cRÜlY-V±@¥ˆzƒ;|`¨ùôõºª)ôã@wdiöÎ&[‰Ð]\¬Ü2Êà\z`ñŠ•Ö9=z|7&×m²œ¾E)J‰@¥)@)JP R”%Äׂ0Šë“ïþøªn]NÛNÝŽÊòB—§Sy ’UuR…TrÞ_nØæ®æUu*À{ƒT‚îkÝí®^j{[Q“QyŒrøa`Ke'¥DŒ@Ç©#$ç·¥BkQ§b„Ó$å–2e zXŒ¯ÅwT}_sXͪÃЊŽnû=Ù¯h¶¦ÓA¼û¦ì†/Yäc©ADo*1Ç9üñ땺ìÆÇ×´]sìi§Ùëp,:•œXðíîB‚ ãR?B}j~ÛÃ3æF6gÁݬËq¹5˜¶¦#G‰âêW Þ(?È?™»~_ªú¹›O·¶úµ¢"âTX§[qÊ!XGÉ·Çæk³<]#bÞnf·7¦¦MÐŒr]˜ôÁøåÔk¯mlË«§ßnØþóÝ7ynNBÈçôË–öϧyG±žé;i,nv…”;3o´n½X/Ûçˆó‘ŸN2IöçÔc'X·ý˜µý‡ÒîŠCK&±qé{©œgÃÏpqÇ®p}jY¶ô†Ñnµl.¢½Ý†—ZÖÜuA§!ó2vö_R2p*µº½³}jêóN¶½M2䣬—ŽRسœõ7ëWÔ“—rßO®ršúN¨£†Ê7¾ÓäšÒâ,”ž)YXêk,DZx¼O%éýâÉ“¡‰Ï” ïè8®ZŽ›·p²( +t¸Ç§rG±ãúÖÛÊÙpqëZ•i¶š>’®-¶jqK,ù5úªÜ_Kd³Ü<÷™SÌ|C…ç=ÇÁ®kchÅad.ñ/ã'Iî>rkœ¾¾žoð¦"_î­qŽQ¬÷mþ2XgØpû÷¢Œ|à¯D_WOoá“ÞÆ;»‰šåQÒA ¼yºq“Œ Ÿjáw§ÙEbn @…pW¨œr§ Ö| -¬¢I-!rÕÓ}$k(ºˆHYâ8>.B¡?¶qüµÇx*ºŠiã7Ò›þ_ƒ½$·ûGÛnÝ,dk™2}Ïz±þ‰ÙÞÜn]KRI¯F›ªÃ‰]ÊK#6r:»ô…ÿÛæ zV÷¾«¤møòVît‰ñÁð—Í!ÿH5ê´EDTE ª0U[‹éÃÈõiW_M1ŠOœ©JU'†)JP R””¥¥)@C¾ Zkz–Š–ú7ŠLw)%ÔËáIsn3Ôˆü`çdd3ÍSÚÚ%ž¢—#KšÞÏßMžÞîÌcÍ!ltºŒç‚+ÒU¬×縇G¸K; /®eFŽ(F<ÄûäþY5Æ´¶t¬Â7ô¾øK¶dÓ%uk½>âD” Â2ÈÆTdþB®0=0G¥gý@ÚPï]£s¤<‹ìD–Ò·d• ü ü_6.Õ}­¢ø7¬—’ˆÄ¥y ¨AõÀ\“êI©,éâFT"·ÃPƒÆÊOmkúîÙÑàÛºþÈ×n®¬€Š¬-¼h¥U>SÕœ `r íž*O™¼·võÖјù¢†A%ìÃÛ¬q?aV P¶2ÌëƒÛ¯5‘L$ç,Í#—{NÒßbj;sC†+$žÎXbÆ¡L{Ô™“Q‰Œ¤¤X”k|7Œ´‰aþÀE–`$@4 ‡Í†¼¢HpP–ª“éÉ’ýŠ%Ï€€Ðca ¦×Å~’IäžkÎòèí™Kͳ«øðázb´yˆã*ËÃõ¯UÒ§¸ø/âólâ¶áòybm±»-ï’[­«Ú.¨D0x¤dò[§='ŽÇšêƒ@ÝP,Â]«­˨[6a‚?ïêÊT½Ù«òSÝûþç•æÚ[›@ˆZêZ ü†Ux¶´Èz€%I\á†H û{s_.vŽìŸO‚ê=±© *«'‚|fÿ‡ø‚ñÜQ^©¥=ÉfŹÚ¯{#ʯ¡nG¹‰ÎÕ×|4ÿȾKË5ÂmŸºLjRí­U ’V Û•:t’£•³wÿ-z¶”vI‘³Õ/³ó}ô¥þ’mmIw=Þ·ªéwv1Û[øÉw™Üå˜ÏŸæ«¢”¨6ÛÖc¾éÝc²~X¥)\*¥()JJR€R” ¥()JJR€R” ¥()JJR€R” ¥()JÿÙÔ™“Q¨ªBBX”k|7Œ´‰aþÀE4`%@4 è͆¼¢HpP–©j¨<ÑWd€€Ï­ ¦×Å€Ô™“QâªBBX”k|7Œ´‰aþÀE4`&@4 ç͆¼¢HpP–ª“éÊõýŠ%Ѐ€ÏI¿ ¦×Å€Õ™“Q…‚ŒŒX”k|7Œ´‰aþÀE~¬ø@41%Õ£@+Hp#)ÑWØ›DÖØ2玀¶Â ´Âz÷·Ý Ë nÞöŠ€/@R#fæáiì4˺rûâ} a Å™BK˜¨Ð­:6÷ŸÂCÀœ;®Xü¤øv»úÏÞ†~笔t(-¡~Å¢ƒ k1n¬åúŠçl;ÄŽª-ëEö:6íl"qB³1q½'z]Hb–¤©˜õµ ›¢ÚB¥…%ð»ÜÍNì¤'2Ëù'êgóãÔ5ììË%.tA8•³¸KЮÑäª1ôímõTð/ êL6}GB °Ð"Þ¿Rr0‡¥Ž¯_yVÆ:þBÅä¹üŸ”Ê„ÙÝ"mcÄ-g`ž|Ye-ÃY-ÁºÅÊÂ]é©Á°±Y»Ã¼ü¦kQ 5qɆ¤Z7ï¿î}è͆ðîo6`Mã—5ó¼ÑÍ.ÿé&Ü ÃÓÝ„B˜ý#_茀 »ƒ§ W#â©€çFĶ.év$à^üøKÅ •¬˜¤|€y¤ÃRm)Psàönk¿Âãä} A+¤#„j’ÒÙo¦Á½j@«É£pY幎TˆÒt¸†erØe?u_€³\3"{HÀq‘êïÒIYÏ_¯c[fâ°æ÷²äŒ-)èa keïŽöc}4B„ŽM"Ìù3G„Á‚LÊùû.Œ®ˆ=‘ÛN?÷ fOouŒ•‰Ž=±àÔêÈqÙKÊLÙÚpM=ŽÍI=…Sfãa‡;‡ÿn5 Ðy›ÄÈJE—5í »ûã±%cžG^€R³‹tcpflow/tests/test1-one-packet.pcap0000644000175000017500000000025312263701151016233 0ustar dimadimaÔò¡ÿÿîPêH'Ý ƒƒ[ÇúÒÌþÌÜEuõ7nxJ}hÀ¨fPÇ ÀYè6½ˆ€n1 ;~ñ1jÂÚ{™zsl¥Ù6Ö]êOšªfÞJsÕõ*?#lÑÚÍY¿—3÷öà×X>Š ¼¿Ë#~îvêM²¼Èÿ/ТÏtcpflow/tests/test1.sh0000755000175000017500000000722712263701323013712 0ustar dimadima#!/bin/sh # # test to make sure that we can process the packets normally # case x"$srcdir" in x) echo No srcdir specified. Assuming $0 is run locally DMPDIR=. TCPFLOW=../src/tcpflow ;; x.) echo srcdir is . Assuming $0 is run locally from make check DMPDIR=. TCPFLOW=../src/tcpflow ;; *) echo srcdir is $srcdir Assuming $0 is run from make distcheck DMPDIR=../../tests/ TCPFLOW=../../_build/src/tcpflow ;; esac echo DMPDIR=$DMPDIR echo TCPFLOW=$TCPFLOW # check the results checkmd5() { if [ ! -r $1 ] ; then echo file $1 was not created ls -l exit 1 fi md5val=`openssl md5 $1 | awk '{print $2;}'` if [ x$2 != x$md5val ]; then echo failure: $1 echo expected md5: $2 "(got '$md5val')" echo expected length: $3 ls -l $1 exit 1 fi } cmd() { echo $1 if ! $1 ; then echo failed; exit 1; fi } for t in 1 2 3 do echo echo ======== echo check $t echo ======== # Run the program DMPFILE=$DMPDIR/test$t.pcap echo checking $DMPFILE if ! [ -r $DMPFILE ] ; then echo $DMPFILE not found ; fi /bin/rm -rf out cmd "$TCPFLOW -o out -X out/report.xml -r $DMPFILE" case $t in 1) checkmd5 out/"074.125.019.101.00080-192.168.001.102.50956" "ae30a88136feb0655492bdb75e078643" "136" checkmd5 out/"074.125.019.104.00080-192.168.001.102.50955" "61051e417d34e1354559e3a8901d19d3" "2792" checkmd5 out/"192.168.001.102.50955-074.125.019.104.00080" "14e9c335bf54dc4652999e25d99fecfe" "655" checkmd5 out/"192.168.001.102.50956-074.125.019.101.00080" "78b8073093d107207327103e80fbdf43" "604" # Check the times if ! ls -l out/074.125.019.101.00080-192.168.001.102.50956 | grep '2008' >/dev/null ; then echo utimes on packet files not properly set. exit 1 fi ;; 2) checkmd5 out/"010.000.000.001.09999-010.000.000.002.36559--42" "b7d0b9ee8a7c1ea94b6b43b5a3e0da83" checkmd5 out/"010.000.000.002.36559-010.000.000.001.09999--42" "c4b95c552616bda3e21d063e8ee2e332" ;; 3) ;; 4) checkmd5 out/2001:0:53aa:64c:422:2ece:a29c:9cf6.51391-2001:67c:1220:809::93e5:916.00080 2600d38f9524c66f190212bbdb6f3c96 checkmd5 out/2001:0:53aa:64c:422:2ece:a29c:9cf6.51392-2001:67c:1220:809::93e5:916.00080 ea4d328b4c831f6cb54772bcaa206ad1 checkmd5 out/2001:0:53aa:64c:422:2ece:a29c:9cf6.51393-2001:67c:1220:809::93e5:916.00080 775823553ec206c97c079ab054869c80 checkmd5 out/2001:0:53aa:64c:422:2ece:a29c:9cf6.51394-2001:67c:1220:809::93e5:916.00080 4b12431fb1403ed45a0cdd264c555c21 checkmd5 out/2001:0:53aa:64c:422:2ece:a29c:9cf6.51395-2001:67c:1220:809::93e5:916.00080 3a2c8438a3e42e617b0d134ae9bb2f0a checkmd5 out/2001:0:53aa:64c:422:2ece:a29c:9cf6.51396-2001:67c:1220:809::93e5:916.00080 547bdc57f5ac3bac3b6620afc19d5a00 checkmd5 out/2001:67c:1220:809::93e5:916.00080-2001:0:53aa:64c:422:2ece:a29c:9cf6.51391 2a8f64558ad7a1731e4950a3f7f16913 checkmd5 out/2001:67c:1220:809::93e5:916.00080-2001:0:53aa:64c:422:2ece:a29c:9cf6.51392 92e4df1f268a7f7b1244b4ddc67120d3 checkmd5 out/2001:67c:1220:809::93e5:916.00080-2001:0:53aa:64c:422:2ece:a29c:9cf6.51393 873ce29539afc9bd72d65c11d9aef2f7 checkmd5 out/2001:67c:1220:809::93e5:916.00080-2001:0:53aa:64c:422:2ece:a29c:9cf6.51394 c043c19025e6ba8278b7ddb6f08d68d3 checkmd5 out/2001:67c:1220:809::93e5:916.00080-2001:0:53aa:64c:422:2ece:a29c:9cf6.51395 ca32de2d5504c6f8dc32610d94046106 checkmd5 out/2001:67c:1220:809::93e5:916.00080-2001:0:53aa:64c:422:2ece:a29c:9cf6.51396 b4772e037e05aaf315aaad911a59650d ;; esac /bin/rm -f *.[0-9][0-9][0-9][0-9][0-9]-[0-9][0-9][0-9].* /bin/rm -f *.[0-9][0-9][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9]:* echo Packet file $t completed successfully done /bin/rm -rf out exit 0 tcpflow/tests/test1-part2.pcap0000644000175000017500000000730312263701151015240 0ustar dimadimaÔò¡ìPêHÂb66ÌþÌÜ[ÇúÒE(ˆ£@@åºÀ¨fúøiÆP‰äB}›† Pÿÿ/|îPêH XX[ÇúÒÌþÌÜEJõ7n¤J}hÀ¨fPÇ ÀXêè6½ˆ€npx ;~Ü1jÂÙ±uÃ÷œ-üt~BÙO7÷dó¹gÈîPêHÏ NNÌþÌÜ[ÇúÒE@lÊ@@­úÀ¨fJ}hÇ Pè6½ˆÀSv°ÿÿñ™ 1jÂÚ;~Ü ÀXêÀYîPêH¶ž ¶¶[ÇúÒÌþÌÜE¨õ7iGJ}hÀ¨fPÇ ÀSvè6½ˆ€n<_ ;~Ü1jÂÙM KŠS“¤~¥ßLß hÆE±Ø€Íˆ8L%BØTš°Çߺ08Ëš¬©ŸIòžn—Ôä‚ÕÞ‡)pZ_‚ßœð!M²š¼*¨ ç¦é[Ef‚(jÊW«8¯\¿$ý,G…ÛW*_¯(ÞCErÓ Ö€‘û,Z:¥ìñ52†àu¹4š†ùQ%Æ¡­ªY?h^XdSWfS™Pe2v!WË-¦É­`¬MÕèà ƒŽ<8]ŠQ„,c|#s¢³-¹­RWSå®=[}±!`¿Ç,d=+M½¥®ÁÑ£Î'·§©—ҽтGƒÙa4L˜OO+\sÑgâ mo¤ ¡è’Û[—¥×À.Y•'…ÙfÂ(=ŽbH· dÚѵ*$:d¦ˆ(7jè…ø|h£‡ ÆPÕá帎?þ–lÅËm`'3¾)zò4ø{ÀØ >›ÍêØiæobOcŒXšB ðˆah¼iÛuc½u.?ÿŽœ4±M›¥2ع½–R—âY^œgˆ…ȰŠli8åQô¬³æ 1•Æ1†v\ùЦQcçÅáa[î—b(ZG Ñh«ŒSêV—²®çô)CvGÿ’ë·"ËÚg3ÀÂ,ÝtTø4þy²•ÇØí¾wûS¥OãŸJ­ðG,†E50µÉ€?ÍࣂÙÊ" ‰<´ê,Ê©:ò4“×jÕv3A¢âe8zÌÍÞo˜I‚(xTëCHÏ6Œ«ç#!'·Åœå"÷ŠÒyZ”³ü–þÎŽƒÖÎa”Ùp¦Á‘]g„õäYI·3…ƒ£ñêÈoøhò´ÜWl{¨ÁÁÃs« ú²<¡~+/B%JÃ'³ ÄyñCA´}PA«Wù()ÿÑñˤ¹F—R“yPkdh’3G–LÔI«u1"­(C\ÕWÿŠBµš$E£©>™U6¹%µ¾îužï®z̶ÖÖ I×5U<Éf¿ª*Í«Ž(m¤rLÃF U¿ÛkLÜGŒîö^Gñnï%Iv»‡œå&0¢a|ÛÈyèž¼þçÍÙû×'£/WAèõý·É—àíþ]?윿|Ó¡>¢Õõ Øì~Ó ü!K„1´²‚Û— Š›cy D“œ<"`£ˆ#L Ô-£ ‡`b" !èbHN.œYÒ q˜B˜@¡–†žO‰¨ke1Õé´5ÕC–añýäò²ªÉd=f…4¨,ÝíJ|õ'»* U&¬VéP×YX£!DtL4èâÒkn»ñáb$™6U·R†~Åå`wÔH<žBð ¿ø È”†ZÂf‚4Is¡F±&oÒ\ÕKiEƒÖªhP.Ø0‰€ ‚BZ‡ŠòT(ÙµŸ)”‰&CÍ=8ЊëµÊVÚÇR7«ëJÿ{O$çJž{c**ÊÍõ?/¹Ø¶äbo¬½&ŠêP»Ìñí–UVPŠ×¬4”ûC Ýê–6+-WÆžü©<ÆüïÊA*ߌNÊÙR´"˜@§­rÈö˜”¥ìèj=óË•1JÂé»`,®_–“ÚµœTa,Õ(Õ´… è"zŠÙ¸q+m;µ­^å3äC.“ªÉ+,Tâ\e,„S%ÓÖzWBÈfS :À¢Óè çÚGy(³âêÎYKÞÑ/ù®­7}¥Iú³g0KçN·Ý>Ú ™ftŠð¼½R£ Ë ]ío[¶k«Ë­2CÌÝEÑã¾~[:eß Ýâ5ñçë•»þX¿§p÷ ˆÊ :A’%ßo¸Øgáß±]œù_ÞeïNØóÓù,{w>?¡3;>|ºmŸ_LîPêH  BBÌþÌÜ[ÇúÒE42Ä@@è À¨fJ}hÇ Pè6½ˆÀY€ÿšùö 1jÂÚ;~ÜîPêH'Ý ƒƒ[ÇúÒÌþÌÜEuõ7nxJ}hÀ¨fPÇ ÀYè6½ˆ€n1 ;~ñ1jÂÚ{™zsl¥Ù6Ö]êOšªfÞJsÕõ*?#lÑÚÍY¿—3÷öà×X>Š ¼¿Ë#~îvêM²¼Èÿ/ТÏîPêHãÞ BBÌþÌÜ[ÇúÒE4«–@@o:À¨fJ}hÇ Pè6½ˆÀYA€ÿÿù; 1jÂÚ;~ñîPêHj OOÌþÌÜ[ÇúÒEAE£@’›À¨fD^œé 5-‡¸³Uclients1googlecomîPêHá §§[ÇúÒÌþÌÜE™ÞO@ù–D^œÀ¨f5é … :³U€clients1googlecomÀ  clientslÀÀ1'J}eÀ1'J}qÀ1'J}dÀ1'J}fîPêHWè NNÌþÌÜ[ÇúÒE@˜@@0À¨fJ}eÇ P"∠°ÿÿ°i´ 1jÂÛîPêH½' JJ[ÇúÒÌþÌÜE<^ˆ7DJ}eÀ¨fPÇ 8ëe"âˆ! (œ¶€ ,æ1jÂÛîPêHy+ BBÌþÌÜ[ÇúÒE4Ó@@ûÀ¨fJ}eÇ P"âˆ!8ëf€ÿÿáu 1jÂÛ,æîPêHÔ+ žžÌþÌÜ[ÇúÒE @@ú×À¨fJ}eÇ P"âˆ!8ëf€ÿÿ`ç 1jÂÛ,æGET /gen_204 HTTP/1.1 User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_5; en-us) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1 Referer: http://www.google.com/ Accept: */* Accept-Language: en-us Accept-Encoding: gzip, deflate Cookie: rememberme=true; PREF=ID=509fa92efffbd577:LD=en:NR=100:TM=1210898180:LM=1218831137:L=0veUs8ZfoozNBR3nyhxk:DV=AA:GM=1:IG=1:S=HFBwZ4As7wZ2VuJy; NID=15=rLfzXrsL_QsVJ0v8OYAPxDlzLxugre3bK075NPVsTnY-vAIF-pb4taX0m5sGH9-hRC8LH2_NgSRFRM4DhiKhJU77lHWx7DHoYLNgnFwEeS7P0B_aJRF783hRFvKZ2NNl Connection: keep-alive Host: clients1.google.com îPêH BB[ÇúÒÌþÌÜE4^‰7KJ}eÀ¨fPÇ 8ëf"âŠ}€lÞ“ ,1jÂÛîPêHù ÊÊ[ÇúÒÌþÌÜE¼^Š7ÂJ}eÀ¨fPÇ 8ëf"âŠ}€l3 ,1jÂÛHTTP/1.1 204 No Content Content-Type: text/html; charset=UTF-8 Date: Mon, 06 Oct 2008 17:54:54 GMT Server: gws Content-Length: 0 îPêH†ú BBÌþÌÜ[ÇúÒE4æ¬@@4'À¨fJ}eÇ P"âŠ}8ëî€ÿÿÞ\ 1jÂÛ,ïPêH1jFF33[ÇúÒ†Ý`:ÿþ€[ÿþÇúÒÿ…Í»[ÇúÒtcpflow/tests/local2.pcap0000644000175000017500000006154512263701151014336 0ustar dimadimaÔò¡ñŒO¥ææEâr…@@÷r­óåi €ÿÿþÖ 5¯âº5¯”³U2FsdGVkX1/GuCxhWI8T+Net7tccYpMJGd28uRkDjO+Jay6MXdBYTN4Jus0Vvc//JzhbXnt3UJMylK/U+0iN6taixu4nZ9PF5SzXH1nYwZGSnDkyxk7wzhJu5nap0ooeCQnQLi8aZhIEpp0qkQU+DurborGLSrMDEmXKf24eNy0=ÿñŒOí88E4ά@@r÷i ­ô“€ÿÿþ( 5¯âº5¯âºñŒO5»»E·ïã@@r÷i ­ô“€ÿÿþ« 5¯âº5¯âºU2FsdGVkX1+D5xHqyRfy+ubRck3gvP6AIX0oFNcN6gzFNCwwMKhLtFMMqO0+PV/QvnOjd/pXtad1xNriCRpiQPrEh3Ny2YEQq5t5YioNiLn2PHxr4uRzjWD9ReNPi2nBÿñŒOq88E4€ß@@÷r­ô“i ‡€ÿÿþ( 5¯âº5¯âºôŒOÜÐDDE@ø @@Ä\;ÇÝw°ÿÿþ4?Ø 5¯ïôŒO ÑDDE@O¼@@Ä\Yê´ç;ÇÝx°ÿÿþ4?Ø 5¯ï5¯ïôŒOÑ88E4Ðæ@@Ä\;ÇÝxYê´è€ÿÿþ( 5¯ï5¯ïôŒO Ñ88E4̈@@Ä\Yê´è;ÇÝx€ÿÿþ( 5¯ï5¯ïôŒO' MMEIç@@Ä\Yê´è;ÇÝx€ÿÿþ= 5¯ï+5¯ïSSH-2.0-OpenSSH_5.6 ôŒOF 88E4¾¡@@Ä\;ÇÝxYê´ý€ÿÿþ( 5¯ï+5¯ï+ôŒOŒ MMEIvý@@Ä\;ÇÝxYê´ý€ÿÿþ= 5¯ï+5¯ï+SSH-2.0-OpenSSH_5.6 ôŒOš 88E4¥—@@Ä\Yê´ý;ÇÝ€ÿÿþ( 5¯ï+5¯ï+ôŒOí ÀÀE¼Lˆ@@Ä\;ÇÝYê´ý€ÿÿ± 5¯ï+5¯ï+„ .­ 9'g:±¼Ò¹xù¬Ñ~diffie-hellman-group-exchange-sha256,diffie-hellman-group-exchange-sha1,diffie-hellman-group14-sha1,diffie-hellman-group1-sha1ƒssh-rsa-cert-v01@openssh.com,ssh-dss-cert-v01@openssh.com,ssh-rsa-cert-v00@openssh.com,ssh-dss-cert-v00@openssh.com,ssh-rsa,ssh-dssaes128-ctr,aes192-ctr,aes256-ctr,arcfour256,arcfour128,aes128-cbc,3des-cbc,blowfish-cbc,cast128-cbc,aes192-cbc,aes256-cbc,arcfour,rijndael-cbc@lysator.liu.seaes128-ctr,aes192-ctr,aes256-ctr,arcfour256,arcfour128,aes128-cbc,3des-cbc,blowfish-cbc,cast128-cbc,aes192-cbc,aes256-cbc,arcfour,rijndael-cbc@lysator.liu.seihmac-md5,hmac-sha1,umac-64@openssh.com,hmac-ripemd160,hmac-ripemd160@openssh.com,hmac-sha1-96,hmac-md5-96ihmac-md5,hmac-sha1,umac-64@openssh.com,hmac-ripemd160,hmac-ripemd160@openssh.com,hmac-sha1-96,hmac-md5-96none,zlib@openssh.com,zlibnone,zlib@openssh.com,zlibôŒOù 88E41ï@@Ä\Yê´ý;Çá€ÿÿþ( 5¯ï+5¯ï+ôŒO HHEDKì@@Ä\Yê´ý;Çá€ÿÿ9 5¯ï,5¯ï+ òÀ¤÷a¥`‡ã³ˆO/v™~diffie-hellman-group-exchange-sha256,diffie-hellman-group-exchange-sha1,diffie-hellman-group14-sha1,diffie-hellman-group1-sha1ssh-rsa,ssh-dssaes128-ctr,aes192-ctr,aes256-ctr,arcfour256,arcfour128,aes128-cbc,3des-cbc,blowfish-cbc,cast128-cbc,aes192-cbc,aes256-cbc,arcfour,rijndael-cbc@lysator.liu.seaes128-ctr,aes192-ctr,aes256-ctr,arcfour256,arcfour128,aes128-cbc,3des-cbc,blowfish-cbc,cast128-cbc,aes192-cbc,aes256-cbc,arcfour,rijndael-cbc@lysator.liu.seihmac-md5,hmac-sha1,umac-64@openssh.com,hmac-ripemd160,hmac-ripemd160@openssh.com,hmac-sha1-96,hmac-md5-96ihmac-md5,hmac-sha1,umac-64@openssh.com,hmac-ripemd160,hmac-ripemd160@openssh.com,hmac-sha1-96,hmac-md5-96none,zlib@openssh.comnone,zlib@openssh.comôŒO\ 88E4+ö@@Ä\;ÇáYê¸ €ÿÿþ( 5¯ï,5¯ï,ôŒO¿ PPEL‹¾@@Ä\;ÇáYê¸ €ÿÿþ@ 5¯ï,5¯ï," ôŒOï 88E4e@@Ä\Yê¸ ;Çá-€ÿÿþ( 5¯ï,5¯ï,ôŒOú! ÐÐEÌ0ô@@Ä\Yê¸ ;Çá-€ÿÿþÀ 5¯ï-5¯ï,”ÞIüi™L7+ecïÓ~úæx^ëС+ ¬'+"ߌd¤¢«{™Î w©¥.3Õ-S²XÎßý]È£vj›˜6&FÜ’bŒ?Jðà«`£¹å[®Gè&QÚ ¢sUݰceÊáÝÞL —Ü™Býe醥ráÇ…AÝ(Þ'ñ¿ôŒO9" 88E4¯ö@@Ä\;Çá-Y긥€ÿÿþ( 5¯ï-5¯ï-ôŒOm' ÈÈEÄÍ@@Ä\;Çá-Y긥€ÿÿþ¸ 5¯ï.5¯ï-Œ €lt7Ê^cµz‚Á¨ŽúF? Þrİʵ£#7w.=E+TodÔÖ{žå5ôÇN; Ø«™+þéÙv­‘ûYóXÕ¹öG„Ú©\᪠Ì÷/˜N.Öƒjo¡N ÿž€Z>iµYbE@ŽŒdá@v8iù·õ¡d{.§Îü»ÙÉù|úôŒO­' 88E4eÃ@@Ä\Y긥;Çá½€ÿÿþ( 5¯ï.5¯ï.ôŒOpO Eú@@Ä\Y긥;Çá½€ÿÿù 5¯ï85¯ï.¼!ssh-rsaÈ}&ý&›ËoBç¥2Ð+ûÕÖfÜ–ùÕqK~¾Ú}w ÌËq¼TÀk°ÅW*ö}ññ¤¢tk” ®DÍmÒ|\ (yg0û$èÍx‚õ•ÅÈ3 ÒßSZ»üÔœLk(Jñ² …ÍkÅîéð¢ê1!Ü÷Í7Ǻ˜ÑF:Ì9ižÑ|(éS–€ËT$ ߺXÄ^¹Þ0#y³ cótò×}¥«ãCÆÎèºÇ§ ág#tn%˜=oœãÂÍ*ë³âɄʟˆWüêÝLw'i¡ˆ%&;’´'O+#ÕX~…Ÿ.Óôï%¤ÞhÆ©©EZfH_æí³Ž'ó×ÛCxŸŽ2¢³Gáâ;ʇEd`$¡\ oÇGºC†iOs—C‹ 3¢ÐôÝ IeUn'œà|„´ç›N­ÖúEŒóÝVøâä¤pvù›9F´¶P5^k¡}šVN­*w•9sûû»²FhÝ8Ý[P蘇K£÷­`]༟:z 1{^"ssh-rsaÆ2H28äl’Qo3$ï}¶!¡u) Sy<ÿo0¬Ö-À€ËÉ /"Œ]Œ; Ó<‡»ÍÈe˜šÄíà‘ÊüÚQŸÆ&Ò76£Ë±)Ë.¶ÂHJÀ äaл™#ÕÊt¬ê=ÕX»£üë Ù9ÙÓé¦NRÃÓjn}ОëÃæI‚¤®—mVk†g•ü÷ÿêtª¸.)JA»+5ÅoÍ6@õ=?öOÃêc+s´Éž¬ð½üÇÅÜ$

ôŒOW 88E4®ÿ@@Ä\;ÇáýY껥€ÿÿþ( 5¯ï95¯ï9ôŒO´W xxEtèÒ@@Ä\;ÇáýY껥€ÿÿþh 5¯ï95¯ï9­]Êà ×®ÔÜP÷ÓÔiÄ/ Á<Šû¦`wk͸Pÿ³¸¡Çͧç0°–¼ûí…”ÜÂÓÛ¾DÔZ‡oÀ>ôŒOàW 88E4z²@@Ä\Y껥;Çâ=€ÿÿþ( 5¯ï95¯ï9ôŒO^[ xxEt9¨@@Ä\Y껥;Çâ=€ÿÿþh 5¯ï:5¯ï9ï²:Má¾6ª>C«š¨".Ý ;Cb{e¢kKÖŸÞv,ÌPØ%ÔVˆë»%¨5|͉õ.má1.W)Uñ;ôŒOr[ 88E4“ @@Ä\;Çâ=Yê»å€ÿÿþ( 5¯ï:5¯ï:ôŒO“[ ¨¨E¤Tê@@Ä\;Çâ=Yê»å€ÿÿÿ˜ 5¯ï:5¯ï: ‹„…&g}JüýR÷¬¦È<Œ£ šZÍu‰‘@4([J2W-𸞎"ôÎE³¤¦öfU0ÞTÍŸ[ÐëØ2A)6©V‚vQ"©P í…ø™Ý“ÜߎmÍÈ‹Ls¶MÝ­ˆÔ—<Ä~FI:|RëMÞ@zw‘uHêk(ûŒe͵íÂV'ÇL¶±¶mdžY2yÅ‹1)0ÐÇŽoK…Õ—Ans|9ž7åšôÀzÇÅN S|ª2G²ûËüå#±ƒJ§2l›1ê_v šnö‡–=:¦‘@ŒšŠG–%ÚÀO•ßøy!§î.›Åu7¤å}’X>%ýÞ<_ʹ×g» ˆ@4ÛÌ Z¹æW¦Á%r£ŠÝÖPéOªÀÜ’M_<â]Pæaö®8‘•oÄ ­*»O5‰Æ=H*\„å¬ÄÅKÚK-oÌDJþjk]"ò_Ò‹lˆå 1z“lôŒO [ 88E4-¾@@Ä\Yê»å;Çã­€ÿÿþ( 5¯ï:5¯ï:ôŒO5l xxEtЦ@@Ä\Yê»å;Çã­€ÿÿþh 5¯ï>5¯ï:+9vS—R {Çïñ#mèD*¥;÷»‡6úM­*t«’nMc‰æÞ|²hŠ•;¼(#ÝV»~Ý ù¶ôŒOOl 88E4)@@Ä\;Çã­Yê¼%€ÿÿþ( 5¯ï>5¯ï>ôŒOpl ˜˜E”Ͱ@@Ä\;Çã­Yê¼%€ÿÿþˆ 5¯ï>5¯ï>ÙU >)ƒ(Ñ4¼ƒœ8Kõín” dB³µnnD‚¥ÅÐWù­¢¼ßf?¯¾„ø ì.,oä±³ñ¿Çìw¡AáÄ1›'¾¦Åñ¢\×Ïüój EpZú™øKÒ™µôŒO}l 88E4–@@Ä\Yê¼%;Çä €ÿÿþ( 5¯ï>5¯ï>ôŒO:† xxEt!í@@Ä\Yê¼%;Çä €ÿÿþh 5¯ïD5¯ï>I%H$!„û&]·§Ñ\B5i75ïT¢Ê¹¢ÙD[­o*ì-‹B‡ZUqFðߨØU{ÞÂ:E®¤Àý.£-ôŒOU† 88E4x¥@@Ä\;Çä Yê¼e€ÿÿþ( 5¯ïD5¯ïDöŒOIC ˆˆE„~Ð@@Ä\;Çä Yê¼e€ÿÿþx 5¯÷;5¯ïDY&HÌøRéi=©èùlZ¿¨¬9­Wd?°Sw™úoל¥ªÊ;–ô¸ÆR)6—ÙpÖ«Í ¯Ò s®Ò}5â=\3(…Ó©¬²‚©è&öŒO½Ê 88E4ó@@Ä\;Çä½Yê¼å€ÿÿþ( 5¯÷ž5¯÷žøŒOG(ˆˆE„V@@Ä\;Çä½Yê¼å€ÿÿþx 5¯ýq5¯÷žîÍ%³}:–Ýk«éT—÷bª¾k§¡’Ÿ¦øÆ}ܘ¦ ÿ°4£±:ÅoŠx?q}P€°÷Öª³º…—cãt­ëWi° ò”È?xËŠÙíÿ.øŒOh(88E4âî@@Ä\Yê¼å;Çå €ÿÿþ( 5¯ýq5¯ýqøŒO?RxxEtÈ@@Ä\Yê¼å;Çå €ÿÿþh 5¯ý{5¯ýq5küƒˆ¬ö—S<Íߊç'Í/ á¨YÍ@àÄÒwP¥5LÕT2?7käÆxŒ¤žmH R‘¶ðLB‘¬’v0°sûŒO`‘ 88E4Zü@@Ä\;Çå½Y꽕€ÿÿþ( 5° È5° ÈûŒO‹‘ ˆˆE„Í‹@@Ä\;Çå½Y꽕€ÿÿþx 5° È5° ȯ¼]kßO=Q bOU‡¤PsÒ~Íu®¬5Â"ÕËb´ñƒ ˜ÏŠÜ©IìŠä9îÃ')¶JAjÏåR,²a–U[xâZïã]®ê$íÒûŒO¦‘ 88E4á@@Ä\Y꽕;Çæ €ÿÿþ( 5° È5° ÈûŒO!“ XXET·à@@Ä\Y꽕;Çæ €ÿÿþH 5° È5° È}ñƒ$‚‘~vJýïà+R¯øç×W Ê|\ƒÒ®ì‡ûŒO4“ 88E4sò@@Ä\;Çæ Yê½µ€ÿÿþ( 5° È5° ÈûŒOÌ“ ¸¸E´X¨@@Ä\;Çæ Yê½µ€ÿÿþ¨ 5° È5° È*†ºâêŽvt#ö?e©k—±òõ³}ûù“ÔT6sÊtòàÍñÏ“†oŒÕ€ù ¸Å>ûÎä Þ:YwiÎâÞ+1¢àäe8rzƒ<,jø÷Ño(Æ ¾¦Ì,ÎúrVã²"¢NXµ„\&Z!q ³NYïÌÂú OA"ûŒOÜ“ 88E4}Ä@@Ä\Yê½µ;Çæ€ÿÿþ( 5° È5° ÈûŒO·ê hhEdÏŽ@@Ä\Yê½µ;Çæ€ÿÿþX 5° Þ5° ÈÄ=u‚/ד5—´P€©‹МË5®Ò)'„Æiá©B‹KŠÖ&–å{Í.€ÿŒûŒO×ê 88E4ûÁ@@Ä\;ÇæYê½å€ÿÿþ( 5° Þ5° ÞûŒOwë 88E4|¸@@Ä\;ÇæYê½å€ÿÿ) 5° Þ5° ÞÝe}Z•âa»9†ÀnÄÁñ/(kj-òh—¡&¨Îâsƒû(´xk@0°„‡2ˆLáØÛ¬¿@¯÷&Q(èðxЧ#$¶_ jøíü2• Þ@I Y§ìÿ¸HާÍØÕŽªÌÌ̹Ù:R€2†MC~ç¨ãäÛ Ð=ó¸ˆ5"¾À Ï!H§Ún0Èò˜Wäëcˆbx¬AcM27?!ª…Hhù-Pç@ tíP™‡ì{èçÀ-ÉœTˆ½XÛ䍯šÔ'„:ƒbý\p{›¨´_Nka\*óU›j¤Ks·óèÇì`†ûëiLÉèï»äêz]ÑZ(%ÚAƒŠy¹ß®-jO04q~æ_™4ñ;mZÊ­ÔùepÍXï¢âwz¢ë[m= BÆRm<êÖLäìg .Êè¿Z‰Ô…´8¹™psÔŽ%H)©"x@Ç“ù’igkRmC^Õ¸ydjúRA°,nbÏá#œ‚¿bwå"w4€‰|w©8Ó  Á2‚:Pʶ†µûŒOk 88E4í‰@@Ä\;ÇèYê¾U€ÿÿþ( 5° ã5° ãûŒOí ˜˜E”׬@@Ä\Yê¾U;Çè€ÿÿþˆ 5° ã5° ã_èé=,}a:Ø`í5WF ÜhŽ#šÑLÞ†ïúrÈ_Ë„ÇVĘN-HýÉtoÚø¥®äý_Þš§Î¦áHow2R,*G1"ëmø:ªøV|^¾^ä^ûŒO  88E4«@@Ä\;ÇèYê¾µ€ÿÿþ( 5° ã5° ãûŒOÌ\ xxEt,©@@Ä\Yê¾µ;Çè€ÿÿþh 5° ú5° ãïhDªlò܉žøÛ‹r€ŒÈFκ0¿q³‚øõÐ92ÈÉØñáµÀ?"è«kju6ã<8éÎx%÷o–Þ·ûŒOè\ 88E4 Ñ@@Ä\;ÇèYê¾õ€ÿÿþ( 5° ú5° úüŒOÜhhEdw¯@@Ä\;ÇèYê¾õ€ÿÿþX 5° >5° úœ.÷iµ åi â/ú ~Ÿ‹Q Ù÷‹¨0<u(š¸‰.\}¿¹¼:M^oè?üŒOñ88E4 Ò@@Ä\Yê¾õ;Çè½€ÿÿþ( 5° >5° >üŒOthhEd]š@@Ä\Yê¾õ;Çè½€ÿÿþX 5° >5° >÷ì¶Sg£žœxãCÝ…4tgR*ä´– Çuã•Ú€ LˆA•š˜í «…%4ÄüŒO‹88E4‹@@Ä\;Çè½Yê¿%€ÿÿþ( 5° >5° >üŒO¦øøEôeä@@Ä\Yê¿%;Çè½€ÿÿþè 5° >5° >À _&"#ºÎ3·©;JíœUòa:7i]GЖ°ÈL>³AÚTÏ \õšüpýFè¿ÈsnKŸôš‚|%(Ò‚«Ò0NèœLÖÖdÄÃkï\õÖe5° >üŒO XXETôš@@Ä\;Çè½Yê¿å€ÿÿþH 5° ?5° > }èç·Éú:¬ê&F¨ˆOU°hV½Œ20V*ŽR1üŒO88E4]œ@@Ä\Yê¿å;ÇèÝ€ÿÿþ( 5° ?5° ?üŒOxxEtŸþ@@Ä\;ÇèÝYê¿å€ÿÿþh 5° ?5° >¯W}¼®ƒs€Ó­drѲÀÜPoõ¤ïDRÇR<ÎoÕ¯›h¡±íZ9ä)†‹Crܺї\4ùŸä`†Ãø–èLüŒO*88E4¸‹@@Ä\Yê¿å;Çé€ÿÿþ( 5° ?5° ?üŒOH88E4­Ï@@Ä\;ÇéYê¿å€ÿÿþ( 5° ?5° ?üŒOw88E4m©@@Ä\Yê¿å;Çé€ÿÿþ( 5° ?5° ?üŒO”88E4MŒ@@Ä\;ÇéYê¿å€ÿÿþ( 5° ?5° ?üŒO 88E4uœ@@Ä\Yê¿å;Çé€ÿÿþ( 5° A5° ?üŒOD 88E4Ý@@Ä\;ÇéYê¿æ€ÿÿþ( 5° A5° AOJææEâG"@@÷r­ô“i ‡€ÿÿþÖ 5°0¤5¯âºU2FsdGVkX19gBszvUL81y6BnMAKe0WscKWfdFYEq/D9xxWHrSWHKGUecn7g33U2PQtjPzwBsySwEr3oQ4XD/2QrRCwdqrNKIgzduZ0s5UKZj2zSyu9vJebllOQVpLbdhUUAJcimaJhjI5Gz9uIyqdVy0vTCq1zKNyZGJPFfsvI0=ÿOs88E4ÙG@@r÷i ‡­õA€ÿÿþ( 5°0¤5°0¤O‹»»E·B@@r÷i ‡­õA€ÿÿþ« 5°0¤5°0¤U2FsdGVkX19dWvMtiKMDXyJKnQ3Xnc/fdkl8VX/BL5cYj42ohynm3YEx2Gc5WqxBQofum5P4QrV+eouquawJd1AonoFEawOsdXHO+hLQV6sKYTLXMmr76BddrMpQEi8pÿO¤88E4ˆ@@÷r­õAi €ÿÿþ( 5°0¤5°0¤OÛÊXX`,@þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©Uò°ÿÿ¾’  5°]ÁOËXX`¶,@þ€æÎÿþëÚþ€æÎÿþëÚÄ^» _ÿ©Uó°ÿÿ¾’  5°]Á5°]ÁO"ËLL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©Uó» `€ÿÿ¾† 5°]Á5°]ÁO0ËLL`¶ @þ€æÎÿþëÚþ€æÎÿþëÚÄ^» `ÿ©Uó€ÿÿ¾† 5°]Á5°]ÁOøþaa`¶5@þ€æÎÿþëÚþ€æÎÿþëÚÄ^» `ÿ©Uó€ÿÿ¾› 5°]Î5°]ÁSSH-2.0-OpenSSH_5.6 OÿLL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©Uó» u€ÿÿ¾† 5°]Î5°]ÎOmÿaa`5@þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©Uó» u€ÿÿ¾› 5°]Î5°]ÎSSH-2.0-OpenSSH_5.6 O~ÿLL`¶ @þ€æÎÿþëÚþ€æÎÿþëÚÄ^» uÿ©V€ÿÿ¾† 5°]Î5°]ÎOÓÿÔÔ`¨@þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©V» u€ÿÿ 5°]Î5°]΄·îñ¿0 Öiù8~diffie-hellman-group-exchange-sha256,diffie-hellman-group-exchange-sha1,diffie-hellman-group14-sha1,diffie-hellman-group1-sha1ƒssh-rsa-cert-v01@openssh.com,ssh-dss-cert-v01@openssh.com,ssh-rsa-cert-v00@openssh.com,ssh-dss-cert-v00@openssh.com,ssh-rsa,ssh-dssaes128-ctr,aes192-ctr,aes256-ctr,arcfour256,arcfour128,aes128-cbc,3des-cbc,blowfish-cbc,cast128-cbc,aes192-cbc,aes256-cbc,arcfour,rijndael-cbc@lysator.liu.seaes128-ctr,aes192-ctr,aes256-ctr,arcfour256,arcfour128,aes128-cbc,3des-cbc,blowfish-cbc,cast128-cbc,aes192-cbc,aes256-cbc,arcfour,rijndael-cbc@lysator.liu.seihmac-md5,hmac-sha1,umac-64@openssh.com,hmac-ripemd160,hmac-ripemd160@openssh.com,hmac-sha1-96,hmac-md5-96ihmac-md5,hmac-sha1,umac-64@openssh.com,hmac-ripemd160,hmac-ripemd160@openssh.com,hmac-sha1-96,hmac-md5-96none,zlib@openssh.com,zlibnone,zlib@openssh.com,zlibOãÿLL`¶ @þ€æÎÿþëÚþ€æÎÿþëÚÄ^» uÿ©Y€ÿÿ¾† 5°]Î5°]ÎO\\`¶0@þ€æÎÿþëÚþ€æÎÿþëÚÄ^» uÿ©Y€ÿÿÁ– 5°]Ï5°]Î ¦Ëlè{†F¥¤JÕ"¿˜~diffie-hellman-group-exchange-sha256,diffie-hellman-group-exchange-sha1,diffie-hellman-group14-sha1,diffie-hellman-group1-sha1ssh-rsa,ssh-dssaes128-ctr,aes192-ctr,aes256-ctr,arcfour256,arcfour128,aes128-cbc,3des-cbc,blowfish-cbc,cast128-cbc,aes192-cbc,aes256-cbc,arcfour,rijndael-cbc@lysator.liu.seaes128-ctr,aes192-ctr,aes256-ctr,arcfour256,arcfour128,aes128-cbc,3des-cbc,blowfish-cbc,cast128-cbc,aes192-cbc,aes256-cbc,arcfour,rijndael-cbc@lysator.liu.seihmac-md5,hmac-sha1,umac-64@openssh.com,hmac-ripemd160,hmac-ripemd160@openssh.com,hmac-sha1-96,hmac-md5-96ihmac-md5,hmac-sha1,umac-64@openssh.com,hmac-ripemd160,hmac-ripemd160@openssh.com,hmac-sha1-96,hmac-md5-96none,zlib@openssh.comnone,zlib@openssh.comOdLL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©Y»…€ÿÔ¾† 5°]Ï5°]ÏOËdd`8@þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©Y»…€ÿÿ¾ž 5°]Ï5°]Ï" OLL`¶ @þ€æÎÿþëÚþ€æÎÿþëÚÄ^»…ÿ©Y¨€ÿÿ¾† 5°]Ï5°]ÏO" ää`¶¸@þ€æÎÿþëÚþ€æÎÿþëÚÄ^»…ÿ©Y¨€ÿÿ¿ 5°]Ð5°]Ï”ÞIüi™L7+ecïÓ~úæx^ëС+ ¬'+"ߌd¤¢«{™Î w©¥.3Õ-S²XÎßý]È£vj›˜6&FÜ’bŒ?Jðà«`£¹å[®Gè&QÚ ¢sUݰceÊáÝÞL —Ü™Býe醥ráÇ…AÝ(Þ'é Of LL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©Y¨»€ÿÿ¾† 5°]Ð5°]ÐOßÜÜ`°@þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©Y¨»€ÿÿ¿ 5°]Ñ5°]ÐŒ €s¿/|e°D¥nÕAHVpÄ]xš°œ÷éÝEóRwÍ6[ý%fïÁó콉Ípu÷;™«í R¹e+,ðW8ó£ŽW{×%ÏÈ J'jÞh™³|Íé¿´KÂT_ŽŸ¯…²WÏ+½a4À:8kµWÎCídÆ7X¸]1d#wªO(LL`¶ @þ€æÎÿþëÚþ€æÎÿþëÚÄ^»ÿ©Z8€ÿÿ¾† 5°]Ñ5°]ÑOn;`¶ð@þ€æÎÿþëÚþ€æÎÿþëÚÄ^»ÿ©Z8€ÿÿÁV 5°]Û5°]Ѽ!ssh-rsaÈ}&ý&›ËoBç¥2Ð+ûÕÖfÜ–ùÕqK~¾Ú}w ÌËq¼TÀk°ÅW*ö}ññ¤¢tk” ®DÍmÒ|\ (yg0û$èÍx‚õ•ÅÈ3 ÒßSZ»üÔœLk(Jñ² …ÍkÅîéð¢ê1!Ü÷Í7Ǻ˜ÑF:Ì9ižÑ|(éS–€ËT$ ߺXÄ^¹Þ0#y³ cótò×}¥«ãCÆÎèºÇ§ ág#tn%˜=oœãÂÍ*ë³âɄʟˆWüêÝLw'i¡ˆ%&;’´'O+#ÕX~…Ÿ.Óôï%¤ÞhÆ©©EZfH_æí³Ž'ó×ÛCxŸ€3–`V¿P\Â¼—j¢‚T"Ý—Lt_²M'åü€ã°D±ïÈ-ѤgKÐT/q—eúëµ:âFëa#ÂèA2QUã㦡Ǧen” ærO佨3_;›ÉkÍó¸¿#¢)eÿ²mùÛ‘b´]0ïHÊØš Œssh-rsa3[n®+‹ƒÉlἊ?é‘×)BßãÇð1R°¨ÔómŠBŠró–bAEØú3‡P¼ï1Çœá_ ·üå¨è“E<™ˆøg2z~ïëÒ|yœ¬ :ò tYüÅÍJÇü¯ßÒ†QµoòoÈý.öeã=%ábÉp~G""Á.&›U¼`î-šàGî‹0¨(2¤ÅVn_ ÞÛžæJ©ûì‚KŒûö”)¦~×Òè{; p3²Ÿ:j±6 C¯“­<Ô%Z­Ø˜$%YQç·-¾À± Ëò³Ãü{•ÃPH)ß àï‹^ųä¹2Ù7|µÖp Oµ;LL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©Z8»í€ÿ來 5°]Û5°]ÛO…Û \\`0@þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©Z8»í€ÿÿ¾– 5°t#5°]Û O±Û LL`¶ @þ€æÎÿþëÚþ€æÎÿþëÚÄ^»íÿ©ZH€ÿÿ¾† 5°t#5°t#O¿Û ||`P@þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©ZH»í€ÿÿ¾¶ 5°t#5°t#‘;ªiWe& krw2¬’¶íZ’IAóɪóŽ}¬ÆFž¥š04™•ïû‰¸Í(JôOÉÛ LL`¶ @þ€æÎÿþëÚþ€æÎÿþëÚÄ^»íÿ©Zx€ÿÿ¾† 5°t#5°t#OÜ ||`¶P@þ€æÎÿþëÚþ€æÎÿþëÚÄ^»íÿ©Zx€ÿÿ¾¶ 5°t#5°t#ë:%•Þ€j1%+ǯ¯jGzÄ*u«¾½ äGέ½¥›ÛsÇÍ\@|LR-ã"O%Ü LL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©Zx»€ÿÿ¾† 5°t#5°t#O÷Ü ŒŒ``@þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©Zx»€ÿÿ¾Æ 5°t#5°t#”’5馵$ß>æÊr²——î|ƒ}r¾¬W$ü Q£Ç7ã—ßàÿ_ÑoY™ìÖGG®ó¾_ª}Щ;c7n©PåO Ý LL`¶ @þ€æÎÿþëÚþ€æÎÿþëÚÄ^»ÿ©Z¸€ÿÿ¾† 5°t#5°t#O3œ zzEv^ÿºÆÀ¨ àûééb¾,adbe50effff8ec6e00000000000008efip6arpa OÛœ ŽŽ`bÿþ€æÎÿþëÚÿûééb¯adbe50effff8ec6e00000000000008efip6arpa OI(××EÓ6Dÿâ%À¨ àûéé¿ j„ADBE50EFFFF8EC6E00000000000008EFip6arpa €x MuchalocalMucha _device-info_tcpÀf”model=MacBookPro8,2À /€xÀ O»(ëë`¿ÿþ€æÎÿþëÚÿûéé¿Nì„ADBE50EFFFF8EC6E00000000000008EFip6arpa €x MuchalocalMucha _device-info_tcpÀf”model=MacBookPro8,2À /€xÀ Oæk==E9¨hÿp›À¨ àûéé%\³muchalocalO8lQQ`%ÿþ€æÎÿþëÚÿûéé% 5muchalocalOp÷wwEsm%ÿ«¤À¨ àûéé_ :„Muchalocal€xþ€æÎÿþëÚÀ €xÀ¨ À /€xÀ @Oá÷‹‹`_ÿþ€æÎÿþëÚÿûéé_L¼„Muchalocal€xþ€æÎÿþëÚÀ €xÀ¨ À /€xÀ @OZúŒŒ`¶`@þ€æÎÿþëÚþ€æÎÿþëÚÄ^»ÿ©Z¸€ÿÿ¾Æ 5°uÂ5°t#¾XjÄEbù‘ŒC+dm"µb é¤ÊmJÍ÷aëFÐVÕL‡¨/ íj•Rb#/yŽëÅoìyi¨×n¢5þ“‘“ð1Ç·’Ðy‡!jM~‡:µ>W|DŠ äSÙøGæ‚7qÂF)O LL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©\(»€ÿÿ¾† 5°uÆ5°uÆO, ¬¬`€@þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©\(»€ÿÿ¾æ 5°uÆ5°uÆ,§†YK\á•,&'yÌ7¼¹·QâÑJ¯¿ÐvíÕÐgÛjx&—‰_hò,º yŒøÞÞ[,ÖäºæÃ@A!S^«ƒ¥e¾ÁxÈP½ÖDRÒ’Œ•$híùNÂŒO> LL`¶ @þ€æÎÿþëÚþ€æÎÿþëÚÄ^»ÿ©\ˆ€ÿÿ¾† 5°uÆ5°uÆOª%ŒŒ`¶`@þ€æÎÿþëÚþ€æÎÿþëÚÄ^»ÿ©\ˆ€ÿÿ¾Æ 5°uÌ5°uÆi=)ØI µÛFAÙý´ìnbN嬡0³³è!怺BÏVL'X^Ö¢S…Cx?‚¢)ñ!iølÔ%©©³-a¢OÍ%LL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©\ˆ»Ý€ÿÿ¾† 5°uÌ5°uÌOýææEâ .@@÷r­õAi €ÿÿþÖ 5°~•5°0¤U2FsdGVkX19NKzILQmFbPqog4zTChkbizH/agPQd7Tqixj0eBQFzlisFtlW2eh0Sibf9+0+YOj0HgUoHDGKaOQlkxIfhH4PF+Mu87A8p4tJwgiMp611fVoscMk4138Qjx0znrqplZKUCx1kGLYEGrOZhVviXDFaJCr9V5mN1Td0=ÿOH88E4Ò@@r÷i ­õï€ÿÿþ( 5°~•5°~•Oa!»»E·˜1@@r÷i ­õï€ÿÿþ« 5°~•5°~•U2FsdGVkX18/b3EX+3Mv+ehpEPCuYGD5trrFOJN2HHTmzG5v0oblj0DQi1Z/cbEjqujU6ng01aHUlEbAHsqvVcpFtPzJc5jty9MgL9ywumHGqduAJTs4LiKyQD+/D9A5ÿO!88E49@@÷r­õïi €ÿÿþ( 5°~•5°~•O ¬œœ`p@þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©\ˆ»Ý€ÿÿ¾Ö 5°V5°uÌÁ¤~QÞZ‰tSLŒ»½ÓÂ9‘"KtTºƒX>ÑÇë¸üÔ’ó‹äîˆnÏÚ(&/[¦´gʾϦÐ>qí-¦(¾ á—EˆOǬLL`¶ @þ€æÎÿþëÚþ€æÎÿþëÚÄ^»Ýÿ©\Ø€ÿÿ¾† 5°V5°VOb°ŒŒ`¶`@þ€æÎÿþëÚþ€æÎÿþëÚÄ^»Ýÿ©\Ø€ÿÿ¾Æ 5°V5°VÍC¥ýˆ[öþ„ÑwíP}µž·–ÃCÜøTŒ9ÑP8bÉŸUj>žeÝÑàà\J³£k‹õƒÅj¦¿¼D[O‚°LL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©\Ø»€ÿÿ¾† 5°W5°VO¯°¬¬`€@þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©\Ø»€ÿÿ¾æ 5°W5°V-‰Õ ¨±X…°9¡)ŠŒ¢ã×Ñu;íŠû[1BŽBÖrV ¯ÆPAz ½€JG\ª”-RÒ®,¨.9“#%J´•cL„-0sϺыժú†B[øë÷îùÒK„OÁ°LL`¶ @þ€æÎÿþëÚþ€æÎÿþëÚÄ^»ÿ©]8€ÿÿ¾† 5°W5°WO°ÝŒŒ`¶`@þ€æÎÿþëÚþ€æÎÿþëÚÄ^»ÿ©]8€ÿÿ¾Æ 5°b5°WA¤K ‚2Ã,Ú@®æ"½ñÚ£÷ÎõYï §Ü¿,XÛ8·ñ{>‹ÎÜ~(Á†SÿA|DxWé#CwOØÝLL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©]8»]€ÿÿ¾† 5°b5°bOº\ œœ`p@þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©]8»]€ÿÿ¾Ö 5°ƒK5°b×™è‹8YôMðIõ$fIƯIY°KÛŸqîûÅ+¬ØaÿÄRe°}ÂpoEŠë›î°3Ñln:&MtÍnf fó„ø³þ‚¾°¶×0¢Oã\ LL`¶ @þ€æÎÿþëÚþ€æÎÿþëÚÄ^»]ÿ©]ˆ€ÿÿ¾† 5°ƒK5°ƒKO]` ŒŒ`¶`@þ€æÎÿþëÚþ€æÎÿþëÚÄ^»]ÿ©]ˆ€ÿÿ¾Æ 5°ƒK5°ƒKþå:ŒÏG_»¹Î3²\¢ ‘ÖšàNøò^GMú(ÒA5ƒÒ}ׯ½ÓYYÇ*oî`8ŸŒÊ?ÔIõ$îùGO„` LL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©]ˆ»€ÿÿ¾† 5°ƒK5°ƒKO­` ¬¬`€@þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©]ˆ»€ÿÿ¾æ 5°ƒL5°ƒKz"4èò81©ßõD4;ˆ—æ¤ÒsNÒLMG ðÍ êu §î{y6êMó÷,å}µfeMþl—ˆO¡ …æxV^ ö2¦˜gÜ*£E°ßOf… LL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©]è»Ý€ÿÿ¾† 5°ƒU5°ƒUO\¢ œœ`p@þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©]è»Ý€ÿÿ¾Ö 5°ˆB5°ƒU0Wzaà µ¹¨rzì“kÑ¢¿Qæ¼.iAع“z?—O¿©nî_`”+¹-Æ“xmþ®Èàa ÷è2>h¨)½C¥çÿ ×~^BÉïoO…¢ LL`¶ @þ€æÎÿþëÚþ€æÎÿþëÚÄ^»Ýÿ©^8€ÿÿ¾† 5°ˆB5°ˆBO^Í ŒŒ`¶`@þ€æÎÿþëÚþ€æÎÿþëÚÄ^»Ýÿ©^8€ÿÿ¾Æ 5°ˆM5°ˆBœ÷~U¾·g.G¶YðŸÔìß­ëéÖ¶~{…W-Å´J]0éã~[™ Ó'0˜¸»;eÏæÿ¾mŒx³4–sOˆÍ LL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©^8»€ÿÿ¾† 5°ˆM5°ˆMO¹Î LL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©^8»€ÿÿ¾† 5°ˆM5°ˆMOàÎ LL`¶ @þ€æÎÿþëÚþ€æÎÿþëÚÄ^»ÿ©^9€ÿÿ¾† 5°ˆM5°ˆMOóÎ LL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©^9»€ÿÿ¾† 5°ˆM5°ˆMOlÙ LL`¶ @þ€æÎÿþëÚþ€æÎÿþëÚÄ^»ÿ©^9€ÿÿ¾† 5°ˆP5°ˆMO¯Ù LL` @þ€æÎÿþëÚþ€æÎÿþëÚÄ^ÿ©^9»€ÿÿ¾† 5°ˆP5°ˆPtcpflow/tests/bug3.pcap0000644000175000017500000014567212263701151014026 0ustar dimadimaÔò¡ò>…H¦“@@Ù.Oak™˜hE, tì´…AÔvÀ¨@P‚‚œŽù{…Ř›`þÇ@|£O.ò>…HÙ½@@Ù.Oak™˜hE(¼&@-ØAÔvÀ¨@P‚‚œŽù|…Å™JP äôT{¾ò>…Hˆ¶¶Ù.Oak™˜hE¤¼'@-[AÔvÀ¨@P‚‚œŽù|…Å™JP uHTTP/1.1 200 OK Date: Tue, 22 Jul 2008 01:59:37 GMT Server: Apache Cache-Control: no-cache Pragma: no-cache Vary: Accept-Encoding Transfer-Encoding: chunked Content-Type: text/html;charset=ISO-8859-1 2066 Pacific Grove Weather Forecast and Conditions California (93950) var css='style_sheet.css?01272008';if(typeof(pageType)!="undefined"&&pageType=="920"){css="global.css?01272008";} if(typeof(usingGrids)!="undefined"&&usingGrids=="yes"){css="global_grids.css?01272008";} document.write(''); 368a


Home Travel Driving & Traffic Healthy Living Home & Family Sports & Recreation Climate & Green The Weather Channel TV