pax_global_header00006660000000000000000000000064140477375540014532gustar00rootroot0000000000000052 comment=f3a3a6fbcecaaabd6b9339799b62222317d83e99 ipt-netflow-2.6/000077500000000000000000000000001404773755400136515ustar00rootroot00000000000000ipt-netflow-2.6/.travis.yml000066400000000000000000000025231404773755400157640ustar00rootroot00000000000000# .travis.yml for ipt-netflow language: c dist: xenial services: - docker addons: apt: update: true packages: - pkg-config - module-assistant - iptables-dev - snmpd - libsnmp-dev matrix: include: - name: x86_64 Ubuntu 16.04.5 LTS Xenial os: linux - name: ppc64le Ubuntu 16.04.4 LTS Xenial os: linux-ppc64le - name: x86_64 CentOS 7.6.1810 env: OS_NAME=centos OS_VERSION=7.6.1810 - name: x86_64 CentOS 7.4.1708 env: OS_NAME=centos OS_VERSION=7.4.1708 - name: x86_64 CentOS 7.3.1611 env: OS_NAME=centos OS_VERSION=7.3.1611 - name: x86_64 CentOS 6.10 env: OS_NAME=centos OS_VERSION=6.10 - name: x86_64 CentOS 6.9 env: OS_NAME=centos OS_VERSION=6.9 - name: x86_64 CentOS 6.8 env: OS_NAME=centos OS_VERSION=6.8 install: - if [ -z "$OS_NAME" ]; then ( set -x; sudo m-a prepare ); else ( set -x; sudo docker pull ${OS_NAME}:${OS_VERSION}; sudo docker build --no-cache --rm --file=travis/Dockerfile.${OS_NAME} --build-arg=OS_VERSION=${OS_VERSION} --tag=${OS_NAME}-${OS_VERSION}:test .; ); fi script: - if [ -z "$OS_NAME" ]; then ( set -x; ./configure && make all && sudo make install ); else ( set -x; sudo docker run -v $PWD:$PWD -w $PWD ${OS_NAME}-${OS_VERSION}:test ); fi ipt-netflow-2.6/CREDITS000066400000000000000000000062661404773755400147030ustar00rootroot00000000000000License is GPL-2.0-only, is the same as of Linux kernel: This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Sign-off rule is that of the Linux kernel: Developer's Certificate of Origin 1.1 By making a contribution to this project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. Principal author and project maintainer: ABC [2008-2021] Compatibility layer is using code from Linux Kernel and should be attributed to respective Linux developers. MurmurHash3 is based on smhasher (2012) of Austin Appleby. Patch authors and submitters: Ilya Evseev [2010] spizer [2010] Eric W. Biederman [2010] Giedrius Liubavičius [2010] Igor Alov [2010] Valentin V. Yankin [2011] prototype for SNMP-index Alexey Osipov [2011] Pavel Boldin [2012] Alexander Demenshin [2013] uropek [2013] shaman [2013] Jeremy Drake [2013] Matthew Martin [2016] DKMS fixes alex-eri [2016, 2017] OpenWRT compatibility xtaran [2018] Thadeu Lima de Souza Cascardo @ Canonical [2019] dbugnar [2019] Vadim Fedorenko [2019] Paolo Pisati @ Canonical [2020] Jeroen Roovers @ Gentoo [2020] Michael Hu [2020] Sven Hartge [2020] Simon Chopin [2021] Project supporters: Summa Telecom [2014] Starlink [2014] Anonymous Extensive testing and other help: Alexander (shulik) [2013] Igor Diakonov @ Summa Telecom [2014] Yuriy Dolgoruk @ Summa Telecom [2014] Andrew Savin @ Starlink [2014] Alexander Zakharov @ WAW Technologies [2015] Ivanov Eduard [2015] Maciej Zdeb [2015] (Send your names, emails, or nicks to add to the list.) ipt-netflow-2.6/IPT-NETFLOW-MIB.my000066400000000000000000000433101404773755400163760ustar00rootroot00000000000000-- IPT-NETFLOW-MIB.my IPT-NETFLOW-MIB DEFINITIONS ::= BEGIN IMPORTS MODULE-IDENTITY, OBJECT-TYPE, Counter64, Gauge32, Integer32, Counter32, enterprises FROM SNMPv2-SMI OBJECT-GROUP, MODULE-COMPLIANCE FROM SNMPv2-CONF CounterBasedGauge64 FROM HCNUM-TC TEXTUAL-CONVENTION, DisplayString, DateAndTime FROM SNMPv2-TC; iptNetflowMIB MODULE-IDENTITY LAST-UPDATED "201409120000Z" ORGANIZATION "ABC" CONTACT-INFO "Author's email: abc at telekom.ru Latest version should be obtained from https://raw.githubusercontent.com/aabc/ipt-netflow/master/IPT-NETFLOW-MIB.my" DESCRIPTION "The IPT-NETFLOW-MIB defines managed objects for ipt_NETFLOW kernel module, which is high performance NetFlow/IPFIX probe for Linux. Copyright (c) 2014 . License: GPL-2.0-only" REVISION "201409110000Z" DESCRIPTION "Initial revision." ::= { enterprises 37476 9000 10 1 } -- Top Level -- iptNetflowObjects OBJECT IDENTIFIER ::= { iptNetflowMIB 1 } iptNetflowStatistics OBJECT IDENTIFIER ::= { iptNetflowMIB 2 } iptNetflowConformance OBJECT IDENTIFIER ::= { iptNetflowMIB 3 } -- Objects -- -- modinfo iptNetflowModule OBJECT IDENTIFIER ::= { iptNetflowObjects 1 } -- sysctl net.netflow iptNetflowSysctl OBJECT IDENTIFIER ::= { iptNetflowObjects 2 } -- Modinfo Objects -- name OBJECT-TYPE SYNTAX DisplayString MAX-ACCESS read-only STATUS current DESCRIPTION "Module name." ::= { iptNetflowModule 1 } version OBJECT-TYPE SYNTAX DisplayString MAX-ACCESS read-only STATUS current DESCRIPTION "Software version of the module." ::= { iptNetflowModule 2 } srcversion OBJECT-TYPE SYNTAX DisplayString MAX-ACCESS read-only STATUS current DESCRIPTION "Binary version of the module." ::= { iptNetflowModule 3 } loadTime OBJECT-TYPE SYNTAX DateAndTime MAX-ACCESS read-only STATUS current DESCRIPTION "Module load date-time." ::= { iptNetflowModule 4 } refcnt OBJECT-TYPE SYNTAX Integer32 MAX-ACCESS read-only STATUS current DESCRIPTION "Module usage by other kernel objects." ::= { iptNetflowModule 5 } -- RW Sysctl objects -- protocol OBJECT-TYPE SYNTAX INTEGER { netflow5(5), netflow9(9), ipfix(10) } MAX-ACCESS read-write STATUS current DESCRIPTION "Protocol version (5, 9, 10=IPFIX)." ::= { iptNetflowSysctl 1 } hashsize OBJECT-TYPE SYNTAX Integer32 UNITS "buckets" MAX-ACCESS read-write STATUS current DESCRIPTION "Hash table size of flows cache." ::= { iptNetflowSysctl 2 } maxflows OBJECT-TYPE SYNTAX Integer32 UNITS "flows" MAX-ACCESS read-write STATUS current DESCRIPTION "Max flows limit. This limit is used for DoS protection." ::= { iptNetflowSysctl 3 } active-timeout OBJECT-TYPE SYNTAX Integer32 UNITS "minutes" MAX-ACCESS read-write STATUS current DESCRIPTION "Active flows timeout value." ::= { iptNetflowSysctl 4 } inactive-timeout OBJECT-TYPE SYNTAX Integer32 UNITS "minutes" MAX-ACCESS read-write STATUS current DESCRIPTION "Inactive flows timeout value." ::= { iptNetflowSysctl 5 } sndbuf OBJECT-TYPE SYNTAX Integer32 UNITS "bytes" MAX-ACCESS read-write STATUS current DESCRIPTION "Sockets SNDBUF size." ::= { iptNetflowSysctl 6 } destination OBJECT-TYPE SYNTAX DisplayString MAX-ACCESS read-write STATUS current DESCRIPTION "Export destination parameter." ::= { iptNetflowSysctl 7 } aggregation OBJECT-TYPE SYNTAX DisplayString MAX-ACCESS read-write STATUS current DESCRIPTION "Aggregation parameters." ::= { iptNetflowSysctl 8 } sampler OBJECT-TYPE SYNTAX DisplayString MAX-ACCESS read-write STATUS current DESCRIPTION "Sampler parameters: sampling mode:sampling interval. Where samplign modes: deterministic, random, hash." ::= { iptNetflowSysctl 9 } natevents OBJECT-TYPE SYNTAX INTEGER { disabled(0), enabled(1) } MAX-ACCESS read-write STATUS current DESCRIPTION "Natevents (NEL) controlling parameter." ::= { iptNetflowSysctl 10 } promisc OBJECT-TYPE SYNTAX INTEGER { disabled(0), enabled(1) } MAX-ACCESS read-write STATUS current DESCRIPTION "Promisc hack controlling parameter." ::= { iptNetflowSysctl 11 } snmp-rules OBJECT-TYPE SYNTAX DisplayString MAX-ACCESS read-write STATUS current DESCRIPTION "SNMP-index translation rules." ::= { iptNetflowSysctl 12 } scan-min OBJECT-TYPE SYNTAX Integer32 MAX-ACCESS read-write STATUS current DESCRIPTION "scan-min parameter." ::= { iptNetflowSysctl 13 } -- Statistics Objects -- iptNetflowTotals OBJECT IDENTIFIER ::= { iptNetflowStatistics 1 } inBitRate OBJECT-TYPE SYNTAX CounterBasedGauge64 UNITS "bits/second" MAX-ACCESS read-only STATUS current DESCRIPTION "Total incoming bits per second." ::= { iptNetflowTotals 1 } inPacketRate OBJECT-TYPE SYNTAX Gauge32 UNITS "packets/second" MAX-ACCESS read-only STATUS current DESCRIPTION "Total incoming packets per second." ::= { iptNetflowTotals 2 } inFlows OBJECT-TYPE SYNTAX Counter64 UNITS "flows" MAX-ACCESS read-only STATUS current DESCRIPTION "Total observed (metered) flows." ::= { iptNetflowTotals 3 } inPackets OBJECT-TYPE SYNTAX Counter64 UNITS "packets" MAX-ACCESS read-only STATUS current DESCRIPTION "Total metered packets. Not couning dropped packets." ::= { iptNetflowTotals 4 } inBytes OBJECT-TYPE SYNTAX Counter64 UNITS "bytes" MAX-ACCESS read-only STATUS current DESCRIPTION "Total metered bytes in inPackets." ::= { iptNetflowTotals 5 } FixedDiv100 ::= TEXTUAL-CONVENTION DISPLAY-HINT "d-2" STATUS current DESCRIPTION "Fixed point, two decimals." SYNTAX Gauge32 hashMetric OBJECT-TYPE SYNTAX FixedDiv100 MAX-ACCESS read-only STATUS current DESCRIPTION "Measure of performance of hash table. When optimal should attract to 1.0, when non-optimal will be highly above of 1." ::= { iptNetflowTotals 6 } hashMemory OBJECT-TYPE SYNTAX Gauge32 UNITS "bytes" MAX-ACCESS read-only STATUS current DESCRIPTION "How much system memory is used by the hash table." ::= { iptNetflowTotals 7 } hashFlows OBJECT-TYPE SYNTAX Gauge32 UNITS "flows" MAX-ACCESS read-only STATUS current DESCRIPTION "Flows currently residing in the hash table and not exported yet." ::= { iptNetflowTotals 8 } hashPackets OBJECT-TYPE SYNTAX Gauge32 UNITS "packets" MAX-ACCESS read-only STATUS current DESCRIPTION "Packets in flows currently residing in the hash table." ::= { iptNetflowTotals 9 } hashBytes OBJECT-TYPE SYNTAX CounterBasedGauge64 UNITS "bytes" MAX-ACCESS read-only STATUS current DESCRIPTION "Bytes in flows currently residing in the hash table." ::= { iptNetflowTotals 10 } dropPackets OBJECT-TYPE SYNTAX Counter64 UNITS "packets" MAX-ACCESS read-only STATUS current DESCRIPTION "Total packets dropped by metering process." ::= { iptNetflowTotals 11 } dropBytes OBJECT-TYPE SYNTAX Counter64 UNITS "bytes" MAX-ACCESS read-only STATUS current DESCRIPTION "Total bytes in packets dropped by metering process." ::= { iptNetflowTotals 12 } outByteRate OBJECT-TYPE SYNTAX Gauge32 UNITS "bytes/second" MAX-ACCESS read-only STATUS current DESCRIPTION "Total exporter output bytes per second." ::= { iptNetflowTotals 13 } outFlows OBJECT-TYPE SYNTAX Counter64 UNITS "flows" MAX-ACCESS read-only STATUS current DESCRIPTION "Total exported flow data records." ::= { iptNetflowTotals 14 } outPackets OBJECT-TYPE SYNTAX Counter64 UNITS "packets" MAX-ACCESS read-only STATUS current DESCRIPTION "Total exported packets of netflow stream itself." ::= { iptNetflowTotals 15 } outBytes OBJECT-TYPE SYNTAX Counter64 UNITS "bytes" MAX-ACCESS read-only STATUS current DESCRIPTION "Total exported bytes of netflow stream itself." ::= { iptNetflowTotals 16 } lostFlows OBJECT-TYPE SYNTAX Counter64 UNITS "flows" MAX-ACCESS read-only STATUS current DESCRIPTION "Total of accounted flows that are lost by exporting process due to socket errors. This value will not include asynchronous errors (cberr), these will be counted in errTotal." ::= { iptNetflowTotals 17 } lostPackets OBJECT-TYPE SYNTAX Counter64 UNITS "packets" MAX-ACCESS read-only STATUS current DESCRIPTION "Total metered packets lost by exporting process. See lostFlows for details." ::= { iptNetflowTotals 18 } lostBytes OBJECT-TYPE SYNTAX Counter64 UNITS "bytes" MAX-ACCESS read-only STATUS current DESCRIPTION "Total bytes in packets lost by exporting process. See lostFlows for details." ::= { iptNetflowTotals 19 } errTotal OBJECT-TYPE SYNTAX Counter32 MAX-ACCESS read-only STATUS current DESCRIPTION "Total exporting sockets errors (including cberr)." ::= { iptNetflowTotals 20 } sndbufPeak OBJECT-TYPE SYNTAX Counter32 UNITS "bytes" MAX-ACCESS read-only STATUS current DESCRIPTION "Global maximum value of socket sndbuf. Sort of output queue length." ::= { iptNetflowTotals 21 } -- Per CPU statistics -- iptNetflowCpuTable OBJECT-TYPE SYNTAX SEQUENCE OF IptNetflowCpuEntry MAX-ACCESS not-accessible STATUS current DESCRIPTION "Per-CPU statistics." ::= { iptNetflowStatistics 2 } iptNetflowCpuEntry OBJECT-TYPE SYNTAX IptNetflowCpuEntry MAX-ACCESS not-accessible STATUS current DESCRIPTION "Defines an entry in the iptNetflowCpuTable." INDEX { cpuIndex } ::= { iptNetflowCpuTable 1 } IptNetflowCpuEntry ::= SEQUENCE { cpuIndex INTEGER, cpuInPacketRate Gauge32, cpuInFlows Counter64, cpuInPackets Counter64, cpuInBytes Counter64, cpuHashMetric FixedDiv100, cpuDropPackets Counter64, cpuDropBytes Counter64, cpuErrTrunc Counter32, cpuErrFrag Counter32, cpuErrAlloc Counter32, cpuErrMaxflows Counter32 } cpuIndex OBJECT-TYPE SYNTAX Integer32 (0..4096) MAX-ACCESS read-only STATUS current DESCRIPTION "Index of this cpu." ::= { iptNetflowCpuEntry 1 } cpuInPacketRate OBJECT-TYPE SYNTAX Gauge32 UNITS "packets/second" MAX-ACCESS read-only STATUS current DESCRIPTION "Incoming packets per second for this cpu." ::= { iptNetflowCpuEntry 2 } cpuInFlows OBJECT-TYPE SYNTAX Counter64 UNITS "flows" MAX-ACCESS read-only STATUS current DESCRIPTION "Flows metered on this cpu." ::= { iptNetflowCpuEntry 3 } cpuInPackets OBJECT-TYPE SYNTAX Counter64 UNITS "packets" MAX-ACCESS read-only STATUS current DESCRIPTION "Packets metered for cpuIndex." ::= { iptNetflowCpuEntry 4 } cpuInBytes OBJECT-TYPE SYNTAX Counter64 UNITS "bytes" MAX-ACCESS read-only STATUS current DESCRIPTION "Bytes metered on this cpu." ::= { iptNetflowCpuEntry 5 } cpuHashMetric OBJECT-TYPE SYNTAX FixedDiv100 MAX-ACCESS read-only STATUS current DESCRIPTION "Measure of performance of hash table on this cpu." ::= { iptNetflowCpuEntry 6 } cpuDropPackets OBJECT-TYPE SYNTAX Counter64 UNITS "packets" MAX-ACCESS read-only STATUS current DESCRIPTION "Packets dropped by metering process on this cpu." ::= { iptNetflowCpuEntry 7 } cpuDropBytes OBJECT-TYPE SYNTAX Counter64 UNITS "bytes" MAX-ACCESS read-only STATUS current DESCRIPTION "Bytes in cpuDropPackets for this cpu." ::= { iptNetflowCpuEntry 8 } cpuErrTrunc OBJECT-TYPE SYNTAX Counter32 MAX-ACCESS read-only STATUS current DESCRIPTION "Truncated packets dropped for this cpu." ::= { iptNetflowCpuEntry 9 } cpuErrFrag OBJECT-TYPE SYNTAX Counter32 MAX-ACCESS read-only STATUS current DESCRIPTION "Fragmented packets dropped for this cpu." ::= { iptNetflowCpuEntry 10 } cpuErrAlloc OBJECT-TYPE SYNTAX Counter32 MAX-ACCESS read-only STATUS current DESCRIPTION "Packets dropped due to memory allocation errors." ::= { iptNetflowCpuEntry 11 } cpuErrMaxflows OBJECT-TYPE SYNTAX Counter32 MAX-ACCESS read-only STATUS current DESCRIPTION "Packets dropped due to maxflows limit being reached." ::= { iptNetflowCpuEntry 12 } -- Per Socket statistics -- iptNetflowSockTable OBJECT-TYPE SYNTAX SEQUENCE OF IptNetflowSockEntry MAX-ACCESS not-accessible STATUS current DESCRIPTION "Per socket statistics." ::= { iptNetflowStatistics 3 } iptNetflowSockEntry OBJECT-TYPE SYNTAX IptNetflowSockEntry MAX-ACCESS not-accessible STATUS current DESCRIPTION "Defines an entry in the iptNetflowSockTable." INDEX { sockIndex } ::= { iptNetflowSockTable 1 } IptNetflowSockEntry ::= SEQUENCE { sockIndex INTEGER, sockDestination DisplayString, sockActive INTEGER, sockErrConnect Counter32, sockErrFull Counter32, sockErrCberr Counter32, sockErrOther Counter32, sockSndbuf Gauge32, sockSndbufFill Gauge32, sockSndbufPeak Gauge32 } sockIndex OBJECT-TYPE SYNTAX Integer32 (0..4096) MAX-ACCESS not-accessible STATUS current DESCRIPTION "Exporting socket index." ::= { iptNetflowSockEntry 1 } sockDestination OBJECT-TYPE SYNTAX DisplayString MAX-ACCESS read-only STATUS current DESCRIPTION "Exporting connection destination of this socket." ::= { iptNetflowSockEntry 2 } sockActive OBJECT-TYPE SYNTAX INTEGER { inactive(0), active(1) } MAX-ACCESS read-only STATUS current DESCRIPTION "Connection state of this socket." ::= { iptNetflowSockEntry 3 } sockErrConnect OBJECT-TYPE SYNTAX Counter32 MAX-ACCESS read-only STATUS current DESCRIPTION "Connections attempt count. High value usually mean that network is not set up properly, or module is loaded before network is up, in this case it is not dangerous and should be ignored." ::= { iptNetflowSockEntry 4 } sockErrFull OBJECT-TYPE SYNTAX Counter32 MAX-ACCESS read-only STATUS current DESCRIPTION "Socket full errors on this socket. Usually mean sndbuf value is too small." ::= { iptNetflowSockEntry 5 } sockErrCberr OBJECT-TYPE SYNTAX Counter32 MAX-ACCESS read-only STATUS current DESCRIPTION "Asynchronous callback errors on this socket. Usually mean that there is 'connection refused' errors on UDP socket reported via ICMP messages." ::= { iptNetflowSockEntry 6 } sockErrOther OBJECT-TYPE SYNTAX Counter32 MAX-ACCESS read-only STATUS current DESCRIPTION "All other possible errors on this socket." ::= { iptNetflowSockEntry 7 } sockSndbuf OBJECT-TYPE SYNTAX Gauge32 UNITS "bytes" MAX-ACCESS read-only STATUS current DESCRIPTION "Sndbuf value for this socket. Higher value allows accommodate (exporting) traffic bursts." ::= { iptNetflowSockEntry 8 } sockSndbufFill OBJECT-TYPE SYNTAX Gauge32 UNITS "bytes" MAX-ACCESS read-only STATUS current DESCRIPTION "Amount of data currently in socket buffers. When this value will reach size sndbuf, packet loss will occur." ::= { iptNetflowSockEntry 9 } sockSndbufPeak OBJECT-TYPE SYNTAX Gauge32 UNITS "bytes" MAX-ACCESS read-only STATUS current DESCRIPTION "Historical peak amount of data in socket buffers. Useful to evaluate sndbuf size, because sockSndbufFill is transient." ::= { iptNetflowSockEntry 10 } -- Conformance Information -- iptNetflowCompliances OBJECT IDENTIFIER ::= { iptNetflowConformance 1 } iptNetflowGroups OBJECT IDENTIFIER ::= { iptNetflowConformance 2 } iptNetflowCompliance MODULE-COMPLIANCE STATUS current DESCRIPTION "iptNetflowCompliance" MODULE MANDATORY-GROUPS { iptNetflowModuleGroup, iptNetflowSysctlGroup, iptNetflowTotalsGroup, iptNetflowCpuGroup, iptNetflowSockGroup } ::= { iptNetflowCompliances 1 } iptNetflowModuleGroup OBJECT-GROUP OBJECTS { name, version, srcversion, loadTime, refcnt } STATUS current DESCRIPTION "Modinfo." ::= { iptNetflowGroups 1 } iptNetflowSysctlGroup OBJECT-GROUP OBJECTS { hashsize, maxflows, protocol, active-timeout, inactive-timeout, sndbuf, destination, aggregation, sampler, natevents, promisc, snmp-rules, scan-min } STATUS current DESCRIPTION "Read-write objects accessed via sysctl" ::= { iptNetflowGroups 2 } iptNetflowTotalsGroup OBJECT-GROUP OBJECTS { inBitRate, inPacketRate, inFlows, inPackets, inBytes, hashMetric, hashMemory, hashFlows, hashPackets, hashBytes, dropPackets, dropBytes, outByteRate, outFlows, outPackets, outBytes, lostFlows, lostPackets, lostBytes, errTotal, sndbufPeak } STATUS current DESCRIPTION "Statistics totals." ::= { iptNetflowGroups 3 } iptNetflowCpuGroup OBJECT-GROUP OBJECTS { cpuIndex, cpuInPacketRate, cpuInFlows, cpuInPackets, cpuInBytes, cpuHashMetric, cpuDropPackets, cpuDropBytes, cpuErrTrunc, cpuErrFrag, cpuErrAlloc, cpuErrMaxflows } STATUS current DESCRIPTION "Per CPU statistics." ::= { iptNetflowGroups 4 } iptNetflowSockGroup OBJECT-GROUP OBJECTS { sockDestination, sockActive, sockErrConnect, sockErrFull, sockErrCberr, sockErrOther, sockSndbuf, sockSndbufFill, sockSndbufPeak } STATUS current DESCRIPTION "Per socket statistics." ::= { iptNetflowGroups 5 } END ipt-netflow-2.6/Makefile.in000066400000000000000000000071021404773755400157160ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0-only # # Edit Makefile.in and run ./configure KVERSION = @KVERSION@ KDIR = @KDIR@ KINSTDIR = $(shell dirname @KDIR@) KOPTS = @KOPTS@ IPTABLES_CFLAGS = @IPTABLES_CFLAGS@ IPTABLES_MODULES = @IPTABLES_MODULES@ DEPMOD = /sbin/depmod -a CARGS = @CARGS@ SNMPTGSO = /usr/lib/snmp/dlmod/snmp_NETFLOW.so SNMPCONF = /etc/snmp/snmpd.conf SNMPLINE = dlmod netflow $(SNMPTGSO) CC = gcc # https://www.kernel.org/doc/Documentation/kbuild/modules.txt # https://www.kernel.org/doc/Documentation/kbuild/makefiles.txt obj-m = ipt_NETFLOW.o ccflags-y = @KOPTS@ all: ipt_NETFLOW.ko libipt_NETFLOW.so libip6t_NETFLOW.so @SNMPTARGET@ ipt_NETFLOW.ko: version.h ipt_NETFLOW.c ipt_NETFLOW.h compat_def.h compat.h Makefile @echo Compiling $(shell ./version.sh) for kernel $(KVERSION) make -C $(KDIR) M=$(CURDIR) modules @touch $@ compat_def.h: gen_compat_def ./gen_compat_def > $@ sparse: | version.h ipt_NETFLOW.c ipt_NETFLOW.h compat.h Makefile @rm -f ipt_NETFLOW.ko ipt_NETFLOW.o @echo Compiling for kernel $(KVERSION) make -C $(KDIR) M=$(CURDIR) modules C=1 @touch ipt_NETFLOW.ko coverity: coverity-submit -v minstall: | ipt_NETFLOW.ko @echo " *" make -C $(KDIR) M=$(CURDIR) modules_install INSTALL_MOD_PATH=$(DESTDIR) $(DEPMOD) mclean: make -C $(KDIR) M=$(CURDIR) clean lclean: -rm -f *.so *_sh.o clean: mclean lclean -rm -f *.so *.o modules.order version.h compat_def.h snmp_NETFLOW.so: snmp_NETFLOW.c $(CC) -fPIC -shared -o $@ $< -lnetsnmp sinstall: | snmp_NETFLOW.so IPT-NETFLOW-MIB.my @echo " *" install -D IPT-NETFLOW-MIB.my $(DESTDIR)/usr/share/snmp/mibs/IPT-NETFLOW-MIB.my install -D snmp_NETFLOW.so $(DESTDIR)$(SNMPTGSO) @if ! egrep -qs "^ *$(SNMPLINE)" $(SNMPCONF); then \ echo " *"; \ echo " * Add this line to $(SNMPCONF) to enable IPT-NETFLOW-MIB:"; \ echo " *"; \ echo " * $(SNMPLINE)"; \ echo " *"; \ fi @if killall -0 snmpd >/dev/null 2>&1; then \ echo " * (snmpd needs restart for changes to take effect.)"; \ else \ echo " * (snmpd is not started.)"; \ fi %_sh.o: libipt_NETFLOW.c $(CC) $(CFLAGS) -O2 -Wall -Wunused $(IPTABLES_CFLAGS) -fPIC -o $@ -c libipt_NETFLOW.c %.so: %_sh.o $(CC) -shared -o $@ $< version.h: ipt_NETFLOW.c ipt_NETFLOW.h compat.h Makefile @./version.sh --define > version.h linstall: | libipt_NETFLOW.so libip6t_NETFLOW.so @echo " *" install -D libipt_NETFLOW.so $(DESTDIR)$(IPTABLES_MODULES)/libipt_NETFLOW.so install -D libip6t_NETFLOW.so $(DESTDIR)$(IPTABLES_MODULES)/libip6t_NETFLOW.so dinstall: @echo " *" @./install-dkms.sh --install install: minstall linstall @DKMSINSTALL@ @SNMPINSTALL@ uninstall: -rm -f $(DESTDIR)$(IPTABLES_MODULES)/libipt_NETFLOW.so -rm -f $(DESTDIR)$(IPTABLES_MODULES)/libip6t_NETFLOW.so -rm -f $(DESTDIR)/usr/share/snmp/mibs/IPT-NETFLOW-MIB.my -rm -f $(DESTDIR)$(SNMPTGSO) @if egrep -qs "^ *$(SNMPLINE)" $(SNMPCONF); then \ echo " *"; \ echo " * Remove this line from $(SNMPCONF):"; \ echo " *"; \ echo " * "`egrep "^ *$(SNMPLINE)" $(SNMPCONF)`; \ echo " *"; \ fi @if [ "@DKMSINSTALL@" = dinstall ]; then ./install-dkms.sh --uninstall; fi -rm -f $(DESTDIR)$(KINSTDIR)/extra/ipt_NETFLOW.ko Makefile: Makefile.in configure ./configure --make ${CARGS} load: all -insmod ipt_NETFLOW.ko active_timeout=5 protocol=9 -iptables -I OUTPUT -j NETFLOW -iptables -I INPUT -j NETFLOW -ip6tables -I OUTPUT -j NETFLOW -ip6tables -I INPUT -j NETFLOW unload: -iptables -D OUTPUT -j NETFLOW -iptables -D INPUT -j NETFLOW -ip6tables -D OUTPUT -j NETFLOW -ip6tables -D INPUT -j NETFLOW -rmmod ipt_NETFLOW.ko reload: unload load ChangeLog: gitlog-to-changelog > ChangeLog .PHONY: ChangeLog ipt-netflow-2.6/NEWS000066400000000000000000000076041404773755400143570ustar00rootroot00000000000000ipt-netflow NEWS ================ 2.6 (2021-05-15) * Minor feature and maintenance release. - Compilation compatibility with kernel 5.12. - Add ipVersion (60) support. - Optionally seed initial template ID from PRNG. - Compilation improvements. 2.5.1 (2020-08-12) * Maintenance release. - Fix soft lockup on kernels with xtables targets used via nftables. - Fix compilation with Linux 5.8 and with CentOS 8. 2.5 (2020-04-24) * Minor maintenance release. - Compilation compatibility with kernels up to 5.6. - Performance improvements by Vadim Fedorenko. - Small code fixes and typo corrections. 2.4 (2019-06-23) * Minor maintenance release. - Compilation compatibility with kernels up to 5.2. - README, CREDITS update. 2.3 (2018-03-19) * Minor feature and Maintenance release. - Compatibility with latest kernels. - Allow to set engine_id (observationDomainId). - Cross-compilation support. - OpenWRT support. - Allow binding destination socket to IP and interface. 2.2 (2016-02-21) * Minor feature and Maintenance release. - Decapsulate MPLS in promisc mode and MPLS-aware NetFlow feature. - Export flowEndReason for IPFIX. - Promics mode improvements. - Allow export destination to be IPv6 address. - Move flows list from debugfs to proc. - Compilation compatibility with latest kernels. - Code is Coverity scanned. 2.1 (2014-02-08) * Options Templates support (V9 and IPFIX). Which let to implement: - Flow Sampling (random, deterministic, and hash modes) for all types of NetFlow protocols (V5, V9, IPFIX). - Export Statistics (metering, exporting, sampling) and Configuration. - Export Interface list (ifName, ifDescr). * Promisc hack (no need to patch kernel anymore). * SNMP monitoring interface and agent (via net-snmp dlmod). * More compilation compatibility with recent kernels, grsecurity kernels, Gentoo, Debian, Centos. DKMS install support. * Minor features: IPSec flows, Direction Element. Removed support for CONNMARK. Bug fixes and improvements. * irqtop tool (ruby). 2.0.1 (2014-09-04) * Minor fixes for 2.0 release. 2.0 (2014-08-07) * This is major release with a lot of new features and improvements, such as: - Support of NetFlow v9 and IPFIX. - IPv6 support. - NAT translation events (NEL). - Additional options is SNMP-index translation rules, Ethernet Type, VLAN, and MAC addresses exporting. - Performance improvements (tested to work well on 10Gbit load). - Stability improvements and bug fixes. 1.8 (2012-07-02) * This is minor bug fix release with small improvements. 1.7.1 (2011-04-04) * This is minor release with improved compilation compatibility and small improvements. 1.7 (2011-01-30) * This version have improved compilation compatibility with latest Linux kernels (2.6.36.3 and 2.6.27) and bunch of small improvements. * Since version 1.7 ipt-netflow's repository moved to Git SCM. Use of CVS repository is deprecated. 1.5.1 (2009-03-14) * This version have improved compliance to NetFlow standard and compatibility with iptables/xtables 1.4.x. Added options to configure script for manual customization. 1.4 (2008-12-23) * This version have restored compatibility with popular kernel 2.6.18, added configure script, statistics improvements, some fixes for 64-bit platforms, and minor performance tune up. 1.2 (2008-11-15) * This version have stability enhancements, documentation improvements, IP frag support, better statistics. 1.1 (2008-08-06) * This version updated compatibility with latest (2.6.26) Linux kernel, have more support for promisc patch, and minor fixes. 1.0 (2008-07-12) * First release tested in production environment. ipt-netflow-2.6/README000066400000000000000000001016041404773755400145330ustar00rootroot00000000000000ipt_NETFLOW linux 2.6.x-5.x kernel module by -- 2008-2021. High performance NetFlow v5, v9, IPFIX flow data export module for Linux kernel. Created to be useful for linux routers in high-throughput networks. It should be used as iptables target. ========================= = Detailed Feature List = ========================= * High performance and scalability. For highest performance module could be run without conntrack being enabled in kernel. Reported to be able to handle 10Gbit traffic with more than 1500000 pps with negligible server load (on S5500BC). * NetFlow v5, v9, and IPFIX are fully supported. Support of v9/IPFIX is adding flexibility to exporting of flow data plus greater visibility of traffic, letting export many additional fields besides what was possible in v5 era. Such as * IPv6 option headers, IPv4 options, TCP options, ethernet type, dot1q service and customer VLAN ids, MAC addresses, and * Full IPv6 support, * NAT translations events (from conntrack) using NetFlow Event Logging (NEL). This is standardized way for v9/IPFIXr, but module export such events even for v5 collectors via specially crafted pseudo-records. * Deterministic (systematic count-based), random and hash Flow Sampling. With appropriate differences in support of v5, v9, and IPFIX. * SNMP agent (for net-snmp) for remote management and monitoring. * Options Templates (v9/IPFIX) let export useful statistical, configurational, and informational records to collector. Such as metering, exporting, sampling stat and reliability stat, sampling configuration, network devices ifName, ifDescr list. * Tested to compile and work out of the box on Centos 6, 7, Debian and * Ubuntu. Many vanilla Linux kernels since 2.6.18 up to the latest (as of * writing is 3.19) are supported and tested. * Module load time and run-time (via sysctl) configuration. * Flexibility in enabling features via ./configure script. This will let you disable features you don't need, which increase compatibility with custom kernels and performance. * SNMP-index translation rules, let convert meaningless and unstable interface indexes (ifIndex) to more meaningful numbering scheme. * Easy support for catching mirrored traffic with promisc option. Which is also supporting optional MPLS decapsulation and MPLS-aware NetFlow. ============================ = OBTAINING LATEST VERSION = ============================ $ git clone git://github.com/aabc/ipt-netflow.git ipt-netflow $ cd ipt-netflow ================ = INSTALLATION = ================ Five easy steps. ** 1. Prepare Kernel source If you have package system install kernel-devel package, otherwise install raw kernel source from http://kernel.org matching _exactly_ version of your installed kernel. a) What to do for Centos: ~# yum install kernel-devel b) What to do for Debian and Ubuntu: ~# apt-get install module-assistant ~# m-a prepare c) Otherwise, if you downloaded raw kernel sources don't forget to create .config by copying it from your distribution's kernel. Its copy could reside in /boot or sometimes in /proc, examples: kernel-src-dir/# cp /boot/config-`uname -r` .config or kernel-src-dir/# zcat /proc/config.gz > .config Assuming you unpacked kernel source into `kernel-src-dir/' directory. Then run: kernel-src-dir/# make oldconfig After that you'll need to prepare kernel for modules build: kernel-src-dir/# make prepare modules_prepare Note: Don't try to `make prepare' in Centos kernel-devel package directory (which is usually something like /usr/src/kernels/2.6.32-431.el6.x86_64) as this is wrong and meaningless. ** 2. Prepare Iptables Before this step it also would be useful to install pkg-config if don't already have. If you have package system just install iptables-devel (or on Debian, Ubuntu and derivatives libxtables-dev if available, otherwise iptables-dev) package, otherwise install iptables source matching version of your installation from ftp://ftp.netfilter.org/pub/iptables/ a) What to do for Centos: # yum install iptables-devel b) What to do for Debian or Ubuntu: # apt-get install iptables-dev pkg-config c) Otherwise, for raw iptables source build it and make install. ** 3. Prepare net-snmp (optional) In case you want to manage or monitor module performance via SNMP you may install net-snmp. If you want to skip this step run configure with --disable-snmp-agent option. a) For Centos: # yum install net-snmp net-snmp-devel b) For Debian or Ubuntu: # apt-get install snmpd libsnmp-dev c) Otherwise install net-snmp from www.net-snmp.org ** 4. Now, to actually build the module run: ~/ipt-netflow# ./configure ~/ipt-netflow# make all install ~/ipt-netflow# depmod This will install kernel module and iptables specific library. Troubleshooting: a) Sometimes you will want to add CC=gcc-3 to make command. Example: make CC=gcc-3.3 b) Compile module with actual kernel source compiled. I.e. first compile kernel and boot into it, and then compile module. If you are using kernel-devel package check that its version matches your kernel package. c) If you have sources in non-standard places or configure isn't able to find something run ./configure --help to see how to specify paths manually. d) To run irqtop on Debian 8 you may need to install: # apt-get install ruby ruby-dev ncurses-dev # gem install curses z) If all fails create ticket at https://github.com/aabc/ipt-netflow/issues ** 5. After this point you should be able to load module and use -j NETFLOW target in your iptables. See next section. ===================== = Configure Options = ===================== Configure script allows to enable or disable optional features: --enable-natevents enables natevents (NEL) support, (this and option will require conntrack support to be enabled into kernel and conntack module (nf_conntrack) loaded before ipt_NETFLOW. Usually this is done automatically because of `depmod', but if you don't do `make install' you'll need to load nf_conntrack manually. Read below for explanation of natevents. --enable-sampler enables flow sampler. Read below for explanation of its configuration option. --enable-sampler=hash additionally enables 'hash' sampler. --disable-snmp-agent disables building net-snmp agent module, which is enabled by default. --enable-snmp-rules enables SNMP-index conversion rules. Read below for explanation of snmp-rules. --enable-macaddress enables exporting of src and dst MAC addresses for every flow in v9/IPFIX. Difference in any of MAC address will be accounted as different flow. I.e. MAC addresses will be part of flow key. --enable-vlan enables exporting of dot1q VLAN Ids and Priorities for every flow in v9/IPFIX. It supports outer and second dot1q tags if present. Any of two previous options will enable exporting of Ethernet Packet Type, ethernetType(256). --enable-direction enables exporting of flowDirection(61) Element for v9/IPFIX. Packets captured in PREROUTING and INPUT chains will be accounted as ingress flows(0), in OUTPUT and POSTROUTING as egress flows(1), and in FORWARD will have flowDirection set to undefined value 255. --enable-aggregation enables aggregation rules. Read below for explanation of aggregation. --disable-dkms disable creating dkms.conf and auto-install module into DKMS tree. --disable-dkms-install only disable auto-install into DKMS, but still create dkms.conf, in case you will want to install it manually. --enable-physdev Export ingressPhysicalInterface(252) and egressPhysicalInterface(253) (relevant for bridges) in V9 and IPFIX. If your collector does not support these Elements but you still need physdevs then use --enable-physdev-override, in that case physdevs will override normal interface numbers ingressInterface(10) and egressInterface(14). --enable-promisc Enables capturing of promiscuous packets into raw/PREROUTING chain. See README.promisc Solution 1 for usage details and example. --promisc-mpls Enables MPLS label stack decapsulation for promiscuous packets. (For IPv4 and IPv6 packets only). This also enables MPLS-aware NetFlow (v9 and IPFIX), you may wish to specify with --promisc-mpls=n how much MPLS labels you want to be recorded and exported (default is 3, maximum is 10, set to 0 to not report anything). =========== = RUNNING = =========== 1. You can load module directly by insmod like this: # insmod ipt_NETFLOW.ko destination=127.0.0.1:2055 debug=1 Or if properly installed (make install; depmod) by this: # modprobe ipt_NETFLOW destination=127.0.0.1:2055 See, you may add options in insmod/modprobe command line, or add them in /etc/modprobe.conf or /etc/modprobe.d/ipt_NETFLOW.conf like thus: options ipt_NETFLOW destination=127.0.0.1:2055 protocol=9 natevents=1 2. Statistics is in /proc/net/stat/ipt_netflow Machine readable statistics is in /proc/net/stat/ipt_netflow_snmp To view boring slab statistics: grep ipt_netflow /proc/slabinfo Dump of all flows is in /proc/net/stat/ipt_netflow_flows 3. You can view parameters and control them via sysctl, example: # sysctl net.netflow # sysctl net.netflow.hashsize=32768 Note: For after-reboot configuration I recommend to store module parameters in modprobe configs instead of storing them in /etc/sysctl.conf, as it's less clear when init process will apply sysctl.conf, before of after module's load. 4. Example of directing all IPv4 traffic into the module: # iptables -I FORWARD -j NETFLOW # iptables -I INPUT -j NETFLOW # iptables -I OUTPUT -j NETFLOW Note: It is preferable (because easier to understand) to _insert_ NETFLOW target at the top of the chain, otherwise not all traffic may reach NETFLOW if your iptables configuration is complicated and some other rule inadvertently consume the traffic (dropping or acepting before NETFLOW is reached). It's always good to test your configuration. Use iptables -L -nvx to check pkts/bytes counters on the rules. 5. If you want to account IPv6 traffic you should use protocol 9 or 10. Example of directing all IPv6 traffic into the module: # sysctl net.netflow.protocol=10 # ip6tables -I FORWARD -j NETFLOW # ip6tables -I INPUT -j NETFLOW # ip6tables -I OUTPUT -j NETFLOW Note: First enable right version of protocol and after that add ip6tables rules, otherwise you will get errors in dmesg. 6. If you want to account NAT events (NEL): # sysctl net.netflow.natevents=1 Note that natevents feature is completely independent from traffic accounting (it's using so called conntrack events), thus you don't need to set or change any iptables rules to use that. You may need to enable kernel config option CONFIG_NF_CONNTRACK_EVENTS though (if it isn't already enabled). For details on how they are exported for different protocol versions see below. 7. For SNMP support you will need to add this command into snmpd.conf to enable IPT-NETFLOW-MIB in SNMP agent: dlmod netflow /usr/lib/snmp/dlmod/snmp_NETFLOW.so Restart snmpd for changes to take effect. Don't forget to properly configure access control. Example simplest configuration may looks like (note that this is whole /etc/snmp/snmpd.conf): rocommunity public 127.0.0.1 dlmod netflow /usr/lib/snmp/dlmod/snmp_NETFLOW.so Note, that this config will also allow _full_ read-only access to the whole linux MIB. To install IPT-NETFLOW-MIB locally, copy file IPT-NETFLOW-MIB.my into ~/.snmp/mibs/ * Detailed example of SNMP configuration is there: * https://github.com/aabc/ipt-netflow/wiki/Configuring-SNMP-access To check that MIB is installed well you may issue: $ snmptranslate -m IPT-NETFLOW-MIB -IR -Tp iptNetflowMIB This should output IPT-NETFLOW-MIB in tree form. To check that snmp agent is working well issue: $ snmpwalk -v 1 -c public 127.0.0.1 -m IPT-NETFLOW-MIB iptNetflowMIB Should output full MIB. If MIB is not installed try: $ snmpget -v 1 -c public 127.0.0.1 .1.3.6.1.4.1.37476.9000.10.1.1.1.1.0 Which should output STRING: "ipt_NETFLOW". MIB provides access to very similar statistics that you have in /proc/net/stat/ipt_netflow, you can read description of objects in text file IPT-NETFLOW-MIB.my If you want to access to SNMP stat in machine readable form for your scripts there is file /proc/net/stat/ipt_netflow_snmp Note: Using of SNMP v2c or v3 is mandatory for most tables, because this MIB uses 64-bit counters (Counter64) which is not supported in old SNMP v1. You should understand that 32-bit counter will wrap on 10Gbit traffic in just 3.4 seconds! So, always pass option `-v2c' or `-v3' to net-snmp utils. Or, for example, configure option `defVersion 2c' in ~/.snmp/snmp.conf You can also have `defCommunity public' ov v3 auth parameters (defSecurityName, defSecurityLevel, defPassphrase) set there (man snmp.conf). Examples for dumping typical IPT-NETFLOW-MIB objects: - Module info (similar to modinfo, SNMPv1 is ok for following two objects): $ snmpwalk -v 1 -c public 127.0.0.1 -m IPT-NETFLOW-MIB iptNetflowModule - Read-write sysctl-like parameters (yes, they are writable via snmpset, you may need to configure write access to snmpd, though): $ snmpwalk -v 1 -c public 127.0.0.1 -m IPT-NETFLOW-MIB iptNetflowSysctl - Global performance stat of the module (note -v2c, because rest of the objects require SNMP v2c or SNMP v3): $ snmpwalk -v2c -c public 127.0.0.1 -m IPT-NETFLOW-MIB iptNetflowTotals - Per-CPU (metering) and per-socket (exporting) statistics in table format: $ snmptable -v2c -c public 127.0.0.1 -m IPT-NETFLOW-MIB iptNetflowCpuTable $ snmptable -v2c -c public 127.0.0.1 -m IPT-NETFLOW-MIB iptNetflowSockTable =========== = OPTIONS = =========== Options can be passed as parameters to module or changed dynamically via sysctl net.netflow or IPT-NETFLOW-MIB::iptNetflowSysctl protocol=5 - what version of NetFlow protocol to use. Default is 5. You can choose from 5, 9, or 10 (where 10 is IPFIX). If you plan to account IPv6 traffic you should use protocol 9 or 10 (IPFIX), because NetFlow v5 isn't compatible with IPv6. destination=127.0.0.1:2055 - where to export netflow, to this ip address. Port is optional, default is 2055. You will see this connection in netstat like this: udp 0 0 127.0.0.1:32772 127.0.0.1:2055 ESTABLISHED destination=[2001:db8::1]:2055 - export target using IPv6 address. Brackets are optional, but otherwise you should delimit port with 'p' or '#' character. destination=127.0.0.1:2055,192.0.0.1:2055 - mirror flows to two (can be more) addresses, separate addresses with comma. destination=127.0.0.1:2055@127.0.0.2 - bind socket to address (127.0.0.2). destination=127.0.0.1:2055%eth0 - bind socket to interface (eth0). May be useful for multi-homed boxes. sampler=deterministic:123 sampler=random:123 sampler=hash:123 - enables Flow Sampling. To disable set to the empty value or to `0'. Note, that this is flow sampling (as of RFC 7014), not packet sampling (PSAMP). There is three sampling modes: deterministic: select each N-th observed flow; in IPFIX this mode is called Systematic count-based Sampling; random: select randomly one out of N flows. hash: select hash-randomly one out of N flows. Number after colon is population size N, with valid values 2-16383. (This 16383 limit is for compatibility with NetFlow v5.) Using 'deterministic' and 'random' sampling will not reduce resource usage caused by the module, because flows are sampled late in exporting process. This will reduces amount of flows which go to the collector, thus, reducing load on the collector. On the other hand, using 'hash' sampling will reduce CPU and memory load caused by the module, because flows are discarded early in the processing chain. They are discarded almost like in random sampler, except that pseudo-random value is depend on the Flow Key hash for each packet. All required NetFlow/IPFIX information to signal use of sampling is also sent to the collector. 'Hash' sampling will be presented as 'random' sampling to the collector, because of their similarity. Note, that Flow Sampling is compatible with NetFlow v5, v9, and IPFIX. natevents=1 - Collect and send NAT translation events as NetFlow Event Logging (NEL) for NetFlow v9/IPFIX, or as dummy flows compatible with NetFlow v5. Default is 0 (don't send). For NetFlow v5 protocol meaning of fields in dummy flows are such: Src IP, Src Port is Pre-nat source address. Dst IP, Dst Port is Post-nat destination address. - These two fields made equal to data flows caught in FORWARD chain. Nexthop, Src AS is Post-nat source address for SNAT. Or, Nexthop, Dst AS is Pre-nat destination address for DNAT. TCP Flags is SYN+SCK for start event, RST+FIN for stop event. Pkt/Traffic size is 0 (zero), so it won't interfere with accounting. Natevents are compilation disabled by default, to enable you will need to add --enable-natevents option to ./configure script. For technical description of NAT Events see: http://tools.ietf.org/html/draft-ietf-behave-ipfix-nat-logging-04 inactive_timeout=15 - export flow after it's inactive for 15 seconds. Default value is 15. active_timeout=1800 - export flow after it's active for 1800 seconds (30 minutes). Default value is 1800. refresh-rate=20 - for NetFlow v9 and IPFIX it's rate how frequently to re-send templates (per packets). You probably don't need to change default (which is 20). timeout-rate=30 - for NetFlow v9 and IPFIX it's rate when to re-send old templates (in minutes). No need to change it. debug=0 - debug level (none). sndbuf=number - size of output socket buffer in bytes. I recommend you to put higher value if you experience netflow packet drops (can be seen in statistics as 'sock: fail' number.) Default value is system default. hashsize=number - Hash table bucket size. Used for performance tuning. Abstractly speaking, it should be minimum two times bigger than flows you usually have, but not need to. Default is system memory dependent small enough value. maxflows=2000000 - Maximum number of flows to account. It's here to prevent DOS attacks. After this limit is reached new flows will not be accounted. Default is 2000000, zero is unlimited. aggregation=string.. - Few aggregation rules (or some say they are rule.) Buffer for aggregation string 1024 bytes, and sysctl limit it to ~700 bytes, so don't write there a lot. Rules worked in definition order for each packet, so don't write them a lot again. Rules applied to both directions (dst and src). Rules tried until first match, but for netmask and port aggregations separately. Delimit them with commas. Rules are of two kinds: for netmask aggregation and port aggregation: a) Netmask aggregation example: 192.0.0.0/8=16 Which mean to strip addresses matching subnet 192.0.0.0/8 to /16. b) Port aggregation example: 80-89=80 Which mean to replace ports from 80 to 89 with 80. Full example: aggregation=192.0.0.0/8=16,10.0.0.0/8=16,80-89=80,3128=80 Aggregation rules are enabled by default, if you feel you don't need them you may add --disable-aggregation to ./configure script. snmp-rules=string... - Few SNMP-index conversion rules similar to fproble-ulog. Quoting man fprobe-ulog: "Comma separated list of interface name to SNMP-index conversion rules. Each rule consists of interface base name and SNMP-index base separated by colon (e.g. ppp:200). Final SNMP-index is sum of corresponding SNMP-index base and interface number. In the above example SNMP-index of interface ppp11 is 211. If interface name did not fit to any of conversion rules then SNMP-index will be taken from kernel." This implementation isn't optimized for performance (no rule caching or hashing), but should be fast if rules list are short. Rules are parsed in order from first to last until first match. snmp-rules are compilation disabled by default, to enable you will need to add --enable-snmp option to ./configure script. scan-min=1 - Minimal interval between flow export scans. Sometimes could be useful to reduce load on exporting CPU by increasing this interval. Value are in kernel jiffies units (which is x/HZ seconds). promisc=1 - Enables promisc hack. See README.promisc Solution 1 for details. exportcpu=number - Lock exporter to single CPU. This may be useful to fine control CPU load. Common use case: with smp_affinity and RSS you spread packet processing to all CPUs except one, and lock it to the exporter. While exporter CPU load generally is not high, for someone it may be not desirable to combine it with packet processing on very highly loaded routers. This option could be changed at runtime with: # echo number > /sys/module/ipt_NETFLOW/parameters/exportcpu engine_id=number - Observation Domain ID (on IPFIX, Source Id on NetFlow v9, or Engine Id on NetFlow v5) value to be exported. This may help your collector to distinguish between multiple exporters. On Netflow v9 and IPFIX this value is 32-bit on NetFlow v5 only 8 low bits are significant. Default value is 0. This option could be changed at runtime with: # echo number > /sys/module/ipt_NETFLOW/parameters/engine_id ==================== = HOW TO READ STAT = ==================== Statistics is your friend to fine tune and understand netflow module performance. To see stat in human readable form: # cat /proc/net/stat/ipt_netflow How to interpret the data: > ipt_NETFLOW version v1.8-122-gfae9d59-dirty, srcversion 6141961152BE0DFA6A21EF4; aggr mac vlan This line helps to identify actual source that your module is build on. Please always supply it in all bug reports. v1.8-122: 1.8 is release, 122 is commit number after release; -gfae9d59: fae9d59 is short git commit id; -dirty: if present, meaning that git detected that sources are changed since last git commit, you may wish to do `git diff' to view changes; srcversion 6141961152BE0DFA6A21EF4: binary version of module, you can compare this with data from `modinfo ./ipt_NETFLOW.ko' to identify actual binary loaded; aggr mac vlan: tags to identify compile time options that are enabled. > Protocol version 10 (ipfix), refresh-rate 20, timeout-rate 30, (templates 2, active 2). Timeouts: active 5, inactive 15. Maxflows 2000000 Protocol version currently in use. Refresh-rate and timeout-rate for v9 and IPFIX. Total templates generated and currently active. Timeout: active X: how much seconds to wait before exporting active flow. - same as sysctl net.netflow.active_timeout variable. inactive X: how much seconds to wait before exporting inactive flow. - same as sysctl net.netflow.inactive_timeout variable. Maxflows 2000000: maxflows limit. - all flows above maxflows limit must be dropped. - you can control maxflows limit by sysctl net.netflow.maxflows variable. > Promisc hack is disabled (observed 0 packets, discarded 0). observed n: To see that promisc hack is really working. > Natevents disabled, count start 0, stop 0. - Natevents mode disabled or enabled, and how much start or stop events are reported. > Flows: active 5187 (peak 83905 reached 0d0h1m ago), mem 283K, worker delay 100/1000 (37 ms, 0 us, 4:0 0 [3]). active X: currently active flows in memory cache. - for optimum CPU performance it is recommended to set hash table size to at least twice of average of this value, or higher. peak X reached Y ago: peak value of active flows. mem XK: how much kilobytes of memory currently taken by active flows. - one active flow taking 56 bytes of memory. - there is system limit on cache size too. worker delay X/HZ: how frequently exporter scan flows table per second. Rest is boring debug info. > Hash: size 8192 (mem 32K), metric 1.00, [1.00, 1.00, 1.00]. InHash: 1420 pkt, 364 K, InPDU 28, 6716. Hash: size X: current hash size/limit. - you can control this by sysctl net.netflow.hashsize variable. - increasing this value can significantly reduce CPU load. - default value is not optimal for performance. - optimal value is twice of average of active flows. mem XK: how much memory occupied by hash table. - hash table is fixed size by nature, taking 4 bytes per entry. metric X, [X, X, X]: how optimal is your hash table being used. - lesser value mean more optimal hash table use, min is 1.0. - last three numbers in squares is moving average (EWMA) of hash table access divided by match rate (searches / matches) for 4sec, and 1, 5, and 15 minutes. Sort of hash table load average. First value is instantaneous. You can try to increase hashsize if averages more than 1 (increase certainly if >= 2). InHash: X pkt, X K: how much traffic accounted for flows in the hash table. InPDU X, X: how much traffic in flows preparing to be exported. > Rate: 202448 bits/sec, 83 packets/sec; 1 min: 668463 bps, 930 pps; 5 min: 329039 bps, 483 pps - Module throughput values for 1 second, 1 minute, and 5 minutes. > cpu# pps; , traffic: , drop: > cpu0 123; 980540 10473 180600 [1.03], 0 0 0 0, traffic: 188765, 14 MB, drop: 27863, 1142 K cpu#: this is Total and per CPU statistics for: pps: packets per second on this CPU. It's useful to debug load imbalance. : internal stat for: search found new: hash table searched, found, and not found counters. [metric]: one minute (ewma) average hash metric per cpu. trunc: how much truncated packets are ignored - for example if packets don't have valid IP header. - it's also accounted in drop packets counter, but not in drop bytes. frag: how much fragmented packets have seen. - kernel defragments INPUT/OUTPUT chains for us if nf_defrag_ipv[46] module is loaded. - these packets are not ignored but not reassembled either, so: - if there is no enough data in fragment (ex. tcp ports) it is considered to be zero. alloc: how much cache memory allocations are failed. - packets ignored and accounted in traffic drop stat. - probably increase system memory if this ever happen. maxflows: how much packets ignored on maxflows (maximum active flows reached). - packets ignored and accounted in traffic drop stat. - you can control maxflows limit by sysctl net.netflow.maxflows variable. traffic: : how much traffic is accounted. pkt, bytes: sum of packets/megabytes accounted by module. - flows that failed to be exported (on socket error) is accounted here too. drop: : how much of traffic is not accounted. pkt, bytes: sum of packets/kilobytes that are dropped by metering process. - reasons these drops are accounted here: truncated/fragmented packets, packet is for new flow but failed to allocate memory for it, packet is for new flow but maxflows is already reached. Traffic lost due to socket errors is not accounted here. Look below about export and socket errors. > Export: Rate 0 bytes/s; Total 2 pkts, 0 MB, 18 flows; Errors 0 pkts; Traffic lost 0 pkts, 0 Kbytes, 0 flows. Rate X bytes/s: traffic rate generated by exporter itself. Total X pkts, X MB: total amount of traffic generated by exporter. X flows: how much data flows are exported. Errors X pkts: how much packets not sent due to socket errors. Traffic lost 0 pkts, 0 Kbytes, 0 flows: how much metered traffic is lost due to socket errors. Note that `cberr' errors are not accounted here due to their asynchronous nature. Read below about `cberr' errors. > sock0: 10.0.0.2:2055 unconnected (1 attempts). If socket is unconnected (for example if module loaded before interfaces is up) it shows now much connection attempts was failed. It will try to connect until success. > sock0: 10.0.0.2:2055, sndbuf 106496, filled 0, peak 106848; err: sndbuf reached 928, connect 0, cberr 0, other 0 sockX: per destination stats for: X.X.X.X:Y: destination ip address and port. - controlled by sysctl net.netflow.destination variable. sndbuf X: how much data socket can hold in buffers. - controlled by sysctl net.netflow.sndbuf variable. - if you have packet drops due to sndbuf reached (error -11) increase this value. filled X: how much data in socket buffers right now. peak X: peak value of how much data in socket buffers was. - you will be interested to keep it below sndbuf value. err: how much packets are dropped due to errors. - all flows from them will be accounted in drop stat. sndbuf reached X: how much packets dropped due to sndbuf being too small (error -11). connect X: how much connection attempts was failed. cberr X: how much connection refused ICMP errors we got from export target. - probably you are not launched collector software on destination, - or specified wrong destination address. - flows lost in this fashion is not possible to account in drop stat. - these are ICMP errors, and would look like this in tcpdump: 05:04:09.281247 IP alice.19440 > bob.2055: UDP, length 120 05:04:09.281405 IP bob > alice: ICMP bob udp port 2055 unreachable, length 156 other X: dropped due to other possible errors. > aggr0: ... aggrX: aggregation rulesets. - controlled by sysctl net.netflow.aggregation variable. ========================== = NetFlow considerations = ========================== List of all IPFIX Elements http://www.iana.org/assignments/ipfix/ipfix.xhtml Flow Keys are Elements that distinguish flows. Quoting RFC: "If a Flow Record for a specific Flow Key value already exists, the Flow Record is updated; otherwise, a new Flow Record is created." In this implementation following Elements are treated as Flow Keys: IPv4 source address: sourceIPv4Address(8), IPv6 source address: sourceIPv6Address(27), IPv4 destination address: destinationIPv4Address(12), IPv6 destination address: destinationIPv6Address(28), TCP/UDP source port: sourceTransportPort(7), TCP/UDP destination port: destinationTransportPort(11), input interface: ingressInterface(10), IP protocol: protocolIdentifier(4), IP TOS: ipClassOfService(5), and address family (IP or IPv6). Additional Flow Keys if VLAN exporting is enabled: First (outer) dot1q VLAN tag: dot1qVlanId(243) and dot1qPriority(244) for IPFIX, or vlanId(243) for NetFlow v9. Second (customer) dot1q VLAN tag: dot1qCustomerVlanId(245) and dot1qCustomerPriority(246). Additional Flow Keys if MAC address exporting is enabled: Destination MAC address: destinationMacAddress(80), Source MAC address: sourceMacAddress(56). Additional Flow Keys if MPLS-aware NetFlow is enabled: Captured MPLS stack is fully treated as flow key (including TTL values), which is Elements from mplsTopLabelStackSection(70) to mplsLabelStackSection10(79), and, if present, mplsTopLabelTTL(200). Other Elements are not Flow Keys. Note that outer interface, which is egressInterface(14), is not regarded as Flow Key. Quoting RFC 7012: "For Information Elements ... for which the value may change from packet to packet within a single Flow, the exported value of an Information Element is by default determined by the first packet observed for the corresponding Flow". Note that NetFlow and IPFIX modes of operation may have slightly different Elements being used and different statistics sent via Options Templates. ========= = VOILA = ========= ipt-netflow-2.6/README.promisc000066400000000000000000000067551404773755400162210ustar00rootroot00000000000000Hello, If you wish to account with ipt-netflow module traffic mirrored on switch you may follow one of these examples: Solution 1: Promisc-hack module option. [2014] Solution 2: General kernel patch. [2008] Solution 3: Alternative w/o kernel patch, using bridges. [2010] ************** * Solution 1 * ************** No kernel patching is need anymore! (As in easy.) Compile module with `./configure --enable-promisc' option. This will enable `promisc=' module parameter and sysctl parameter `net.netflow.promisc'. Set any of these to `1' to enable promisc hack, you will see dmesg message that it's enabled, set to `0' to disable (default). This option turned on will pass promisc traffic into `PREROUTING' chain of `raw' table (same as with promisc patches). Briefly it's like this: # cd ipt-netflow/ # ./configure --enable-promisc # make all install # iptables -A PREROUTING -t raw -i eth2 -j NETFLOW # sysctl net.netflow.promisc=1 # ifconfig eth2 promisc # grep Promisc /proc/net/stat/ipt_netflow Now you should be able to see promisc observed packets count increasing. Note, that enabling module's parameter promisc=1 will not enable promiscuous mode on network cards, these are completely different things. This option will let iptables to see promisc traffic. That traffic will not be routed anywhere and discarded just after passing PREROUTING chain. Do not enable this option if you have kernel already patched with promisc patch, as this may cause double accounting. Just keep it disabled or remove the patch. Promisc patching is completely custom and non-standard, so if you did not apply it - you are most probably safe to use promisc option. But, if in doubt - measure if module seeing traffic in raw/PREROUTING, if it isn't - you don't have the patch applied and certainly safe to use the option. ************** * Solution 2 * ************** 1. Patch your kernel with `raw_promisc.patch' to enable raw table to see promisc traffic. # cd /usr/src/linux # patch -p1 < ~/ipt_netflow/raw_promisc.patch Then recompile and reinstall patched kernel. 2. For example you mirroring traffic on your Cisco switch to 47th vlan: # interface FastEthernet0/32 # description this port with vlan 47 # switchport trunk encapsulation dot1q # switchport mode trunk # ! # interface FastEthernet0/33 # port monitor FastEthernet0/32 # ! 3. Enable promisc mode on interface to actually see the packets: # /sbin/ifconfig eth1 promisc 4. Configure vlan on your linux box: # /sbin/vconfig add eth1 47 # /sbin/ifconfig eth1.47 up 5. Compile module: # make clean all install 6. Load ipt_netflow module: # /sbin/modprobe ipt_NETFLOW hashsize=160000 destination=127.0.0.1:9800 7. Direct all packets from 47th vlan to ipt_netflow module: # /sbin/iptables -A PREROUTING -t raw -i eth1.47 -j NETFLOW Voila. ps. For Debian Squeeze instructions look at raw_promisc_debian_squeeze6.patch ************** * Solution 3 * ************** By Anonymous. > raw promisc hack is not needed > there is a more elegant way to capture port mirrored traffic: > > 1. create a bridge of eth0 and dummy0 > 2. put eth0 to promisc > 3. add a "-i br0 -j NETFLOW" rule to FORWARD (possibly also -j DROP after that) > > ...for some reason it works when ipt_netflow is attached to a bridge, but > requires the promisc hack when attached to a real promisc interface. Sometimes you may need to run: # brctl setageing br0 0 for this scheme to work. ipt-netflow-2.6/compat.h000066400000000000000000000434271404773755400153170ustar00rootroot00000000000000/* SPDX-License-Identifier: GPL-2.0-only * * This code is derived from the Linux Kernel sources intended * to maintain compatibility with different Kernel versions. * Copyright of original source is of respective Linux Kernel authors. */ #ifndef COMPAT_NETFLOW_H #define COMPAT_NETFLOW_H #include "compat_def.h" #ifndef NIPQUAD # define NIPQUAD(addr) \ ((unsigned char *)&addr)[0], \ ((unsigned char *)&addr)[1], \ ((unsigned char *)&addr)[2], \ ((unsigned char *)&addr)[3] #endif #ifndef HIPQUAD # if defined(__LITTLE_ENDIAN) # define HIPQUAD(addr) \ ((unsigned char *)&addr)[3], \ ((unsigned char *)&addr)[2], \ ((unsigned char *)&addr)[1], \ ((unsigned char *)&addr)[0] # elif defined(__BIG_ENDIAN) # define HIPQUAD NIPQUAD # else # error "Please fix asm/byteorder.h" # endif /* __LITTLE_ENDIAN */ #endif #ifndef IPT_CONTINUE # define IPT_CONTINUE XT_CONTINUE # define ipt_target xt_target #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) union nf_inet_addr { __be32 ip; __be32 ip6[4]; struct in_addr in; struct in6_addr in6; }; #endif #ifndef list_first_entry #define list_first_entry(ptr, type, member) \ list_entry((ptr)->next, type, member) #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) # define INIT_NET(x) x #else # define INIT_NET(x) init_net.x #endif #ifndef ETH_P_8021AD # define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ #endif #ifndef ETH_P_QINQ1 # define ETH_P_QINQ1 0x9100 /* deprecated QinQ VLAN */ # define ETH_P_QINQ2 0x9200 /* deprecated QinQ VLAN */ # define ETH_P_QINQ3 0x9300 /* deprecated QinQ VLAN */ #endif #ifndef IPPROTO_MH # define IPPROTO_MH 135 #endif #ifdef CONFIG_SYSCTL # if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) # define BEFORE2632(x,y) x,y # else /* since 2.6.32 */ # define BEFORE2632(x,y) # endif # if LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) # define ctl_table struct ctl_table # endif # ifndef HAVE_GRSECURITY_H # define ctl_table_no_const ctl_table # endif #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) # define compat_hlist_for_each_entry hlist_for_each_entry # define compat_hlist_for_each_entry_safe hlist_for_each_entry_safe #else /* since 3.9.0 */ # define compat_hlist_for_each_entry(a,pos,c,d) hlist_for_each_entry(a,c,d) # define compat_hlist_for_each_entry_safe(a,pos,c,d,e) hlist_for_each_entry_safe(a,c,d,e) #endif #ifndef WARN_ONCE #define WARN_ONCE(x,fmt...) ({ if (x) printk(KERN_WARNING fmt); }) #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) # define IPPROTO_UDPLITE 136 #endif #ifndef time_is_before_jiffies # define time_is_before_jiffies(a) time_after(jiffies, a) #endif #ifndef time_is_after_jiffies # define time_is_after_jiffies(a) time_before(jiffies, a) #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0) # if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) # define prandom_u32 get_random_int # elif LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0) # define prandom_u32 random32 #endif #define prandom_u32_max compat_prandom_u32_max static inline u32 prandom_u32_max(u32 ep_ro) { return (u32)(((u64) prandom_u32() * ep_ro) >> 32); } #endif #ifndef min_not_zero # define min_not_zero(x, y) ({ \ typeof(x) __x = (x); \ typeof(y) __y = (y); \ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) static int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { ASSERT_RTNL(); if (!dev->ethtool_ops->get_settings) return -EOPNOTSUPP; memset(cmd, 0, sizeof(struct ethtool_cmd)); cmd->cmd = ETHTOOL_GSET; return dev->ethtool_ops->get_settings(dev, cmd); } #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) # define ethtool_cmd_speed(x) (x)->speed #endif #ifndef ARPHRD_PHONET # define ARPHRD_PHONET 820 # define ARPHRD_PHONET_PIPE 821 #endif #ifndef ARPHRD_IEEE802154 # define ARPHRD_IEEE802154 804 #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) # define for_each_netdev_ns(net, dev) for (dev = dev_base; dev; dev = dev->next) #elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) # define for_each_netdev_ns(net, d) for_each_netdev(d) #else # define for_each_netdev_ns(net, d) for_each_netdev(net, d) #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) # define CHECK_FAIL 0 # define CHECK_OK 1 #else # define CHECK_FAIL -EINVAL # define CHECK_OK 0 #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) # define use_module ref_module #endif #ifndef NF_IP_LOCAL_IN /* 2.6.25 */ # define NF_IP_PRE_ROUTING NF_INET_PRE_ROUTING # define NF_IP_LOCAL_IN NF_INET_LOCAL_IN # define NF_IP_FORWARD NF_INET_FORWARD # define NF_IP_LOCAL_OUT NF_INET_LOCAL_OUT # define NF_IP_POST_ROUTING NF_INET_POST_ROUTING #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) /* net/netfilter/x_tables.c */ static void xt_unregister_targets(struct xt_target *target, unsigned int n) { unsigned int i; for (i = 0; i < n; i++) xt_unregister_target(&target[i]); } static int xt_register_targets(struct xt_target *target, unsigned int n) { unsigned int i; int err = 0; for (i = 0; i < n; i++) if ((err = xt_register_target(&target[i]))) goto err; return err; err: if (i > 0) xt_unregister_targets(target, i); return err; } #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,0,0) #define num_physpages totalram_pages() #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0) #define num_physpages totalram_pages #endif #ifndef HAVE_TIMEVAL /* timeval is only used internally, so we can use anything for it. */ struct timeval { long tv_sec; long tv_usec; /* microseconds */ }; unsigned long timeval_to_jiffies(const struct timeval *tv) { return timespec64_to_jiffies(&(struct timespec64){ tv->tv_sec, tv->tv_usec * NSEC_PER_USEC }); } #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) # ifdef ktime_to_timeval /* ktime_to_timeval is defined on 64bit and inline on 32bit cpu */ /* when it's defined it calls ns_to_timeval, which is not exported */ struct timeval portable_ns_to_timeval(const s64 nsec) { struct timespec ts = ns_to_timespec(nsec); struct timeval tv; tv.tv_sec = ts.tv_sec; tv.tv_usec = (suseconds_t) ts.tv_nsec / 1000; return tv; } # define ns_to_timeval portable_ns_to_timeval # endif static inline s64 portable_ktime_to_ms(const ktime_t kt) { struct timeval tv = ktime_to_timeval(kt); return (s64) tv.tv_sec * MSEC_PER_SEC + tv.tv_usec / USEC_PER_MSEC; } # define ktime_to_ms portable_ktime_to_ms #endif /* before 2.6.35 */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) static inline s64 portable_ktime_to_us(const ktime_t kt) { struct timeval tv = ktime_to_timeval(kt); return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec; } #define ktime_to_us portable_ktime_to_us #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) static inline void put_unaligned_be16(u16 val, void *p) { put_unaligned(cpu_to_be16(val), (__be16 *)p); } static inline void put_unaligned_be32(u32 val, void *p) { put_unaligned(cpu_to_be32(val), (__be32 *)p); } static inline void put_unaligned_be64(u64 val, void *p) { put_unaligned(cpu_to_be64(val), (__be64 *)p); } #endif #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,24) && !defined(RHEL_MAJOR) static void *__seq_open_private(struct file *f, struct seq_operations *ops, int psize) { int rc; void *private; struct seq_file *seq; private = kzalloc(psize, GFP_KERNEL); if (private == NULL) goto out; rc = seq_open(f, ops); if (rc < 0) goto out_free; seq = f->private_data; seq->private = private; return private; out_free: kfree(private); out: return NULL; } #endif /* disappeared in v3.19 */ #ifndef __get_cpu_var #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) #endif #ifndef MPLS_HLEN #define MPLS_HLEN 4 static inline int eth_p_mpls(__be16 eth_type) { return eth_type == htons(ETH_P_MPLS_UC) || eth_type == htons(ETH_P_MPLS_MC); } #endif #ifndef MPLS_LS_S_MASK struct mpls_label { __be32 entry; }; #define MPLS_LS_S_MASK 0x00000100 #endif /* sockaddr comparison functions is from fs/nfs/client.c */ static int sockaddr_match_ipaddr6(const struct sockaddr *sa1, const struct sockaddr *sa2) { const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1; const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2; if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) return 0; #if 0 else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL) return sin1->sin6_scope_id == sin2->sin6_scope_id; #endif return 1; } static int sockaddr_match_ipaddr4(const struct sockaddr *sa1, const struct sockaddr *sa2) { const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1; const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2; return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; } static int sockaddr_cmp_ip6(const struct sockaddr *sa1, const struct sockaddr *sa2) { const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1; const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2; return sockaddr_match_ipaddr6(sa1, sa2) && (sin1->sin6_port == sin2->sin6_port); } static int sockaddr_cmp_ip4(const struct sockaddr *sa1, const struct sockaddr *sa2) { const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1; const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2; return sockaddr_match_ipaddr4(sa1, sa2) && (sin1->sin_port == sin2->sin_port); } static int sockaddr_cmp(const struct sockaddr_storage *sa1, const struct sockaddr_storage *sa2) { const struct sockaddr *s1 = (const struct sockaddr *)sa1; const struct sockaddr *s2 = (const struct sockaddr *)sa2; if (sa1->ss_family != sa2->ss_family) return 0; switch (sa1->ss_family) { case AF_INET: return sockaddr_cmp_ip4(s1, s2); case AF_INET6: return sockaddr_cmp_ip6(s1, s2); } return 0; } #ifndef IN6PTON_XDIGIT #define hex_to_bin compat_hex_to_bin /* lib/hexdump.c */ int hex_to_bin(char ch) { if ((ch >= '0') && (ch <= '9')) return ch - '0'; ch = tolower(ch); if ((ch >= 'a') && (ch <= 'f')) return ch - 'a' + 10; return -1; } /* net/core/utils.c */ #define IN6PTON_XDIGIT 0x00010000 #define IN6PTON_DIGIT 0x00020000 #define IN6PTON_COLON_MASK 0x00700000 #define IN6PTON_COLON_1 0x00100000 /* single : requested */ #define IN6PTON_COLON_2 0x00200000 /* second : requested */ #define IN6PTON_COLON_1_2 0x00400000 /* :: requested */ #define IN6PTON_DOT 0x00800000 /* . */ #define IN6PTON_DELIM 0x10000000 #define IN6PTON_NULL 0x20000000 /* first/tail */ #define IN6PTON_UNKNOWN 0x40000000 static inline int xdigit2bin(char c, int delim) { int val; if (c == delim || c == '\0') return IN6PTON_DELIM; if (c == ':') return IN6PTON_COLON_MASK; if (c == '.') return IN6PTON_DOT; val = hex_to_bin(c); if (val >= 0) return val | IN6PTON_XDIGIT | (val < 10 ? IN6PTON_DIGIT : 0); if (delim == -1) return IN6PTON_DELIM; return IN6PTON_UNKNOWN; } int in4_pton(const char *src, int srclen, u8 *dst, int delim, const char **end) { const char *s; u8 *d; u8 dbuf[4]; int ret = 0; int i; int w = 0; if (srclen < 0) srclen = strlen(src); s = src; d = dbuf; i = 0; while(1) { int c; c = xdigit2bin(srclen > 0 ? *s : '\0', delim); if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK))) { goto out; } if (c & (IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK)) { if (w == 0) goto out; *d++ = w & 0xff; w = 0; i++; if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { if (i != 4) goto out; break; } goto cont; } w = (w * 10) + c; if ((w & 0xffff) > 255) { goto out; } cont: if (i >= 4) goto out; s++; srclen--; } ret = 1; memcpy(dst, dbuf, sizeof(dbuf)); out: if (end) *end = s; return ret; } int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char **end) { const char *s, *tok = NULL; u8 *d, *dc = NULL; u8 dbuf[16]; int ret = 0; int i; int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL; int w = 0; memset(dbuf, 0, sizeof(dbuf)); s = src; d = dbuf; if (srclen < 0) srclen = strlen(src); while (1) { int c; c = xdigit2bin(srclen > 0 ? *s : '\0', delim); if (!(c & state)) goto out; if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { /* process one 16-bit word */ if (!(state & IN6PTON_NULL)) { *d++ = (w >> 8) & 0xff; *d++ = w & 0xff; } w = 0; if (c & IN6PTON_DELIM) { /* We've processed last word */ break; } /* * COLON_1 => XDIGIT * COLON_2 => XDIGIT|DELIM * COLON_1_2 => COLON_2 */ switch (state & IN6PTON_COLON_MASK) { case IN6PTON_COLON_2: dc = d; state = IN6PTON_XDIGIT | IN6PTON_DELIM; if (dc - dbuf >= sizeof(dbuf)) state |= IN6PTON_NULL; break; case IN6PTON_COLON_1|IN6PTON_COLON_1_2: state = IN6PTON_XDIGIT | IN6PTON_COLON_2; break; case IN6PTON_COLON_1: state = IN6PTON_XDIGIT; break; case IN6PTON_COLON_1_2: state = IN6PTON_COLON_2; break; default: state = 0; } tok = s + 1; goto cont; } if (c & IN6PTON_DOT) { ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s); if (ret > 0) { d += 4; break; } goto out; } w = (w << 4) | (0xff & c); state = IN6PTON_COLON_1 | IN6PTON_DELIM; if (!(w & 0xf000)) { state |= IN6PTON_XDIGIT; } if (!dc && d + 2 < dbuf + sizeof(dbuf)) { state |= IN6PTON_COLON_1_2; state &= ~IN6PTON_DELIM; } if (d + 2 >= dbuf + sizeof(dbuf)) { state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2); } cont: if ((dc && d + 4 < dbuf + sizeof(dbuf)) || d + 4 == dbuf + sizeof(dbuf)) { state |= IN6PTON_DOT; } if (d >= dbuf + sizeof(dbuf)) { state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK); } s++; srclen--; } i = 15; d--; if (dc) { while(d >= dc) dst[i--] = *d--; while(i >= dc - dbuf) dst[i--] = 0; while(i >= 0) dst[i--] = *d--; } else memcpy(dst, dbuf, sizeof(dbuf)); ret = 1; out: if (end) *end = s; return ret; } #endif /* IN6PTON_XDIGIT */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0) # define sock_create_kern(f, t, p, s) sock_create_kern(&init_net, f, t, p, s) #endif #if !defined(vlan_tx_tag_get) && defined(skb_vlan_tag_get) # define vlan_tx_tag_get skb_vlan_tag_get # define vlan_tx_tag_present skb_vlan_tag_present #endif #ifndef SPEED_UNKNOWN # define SPEED_UNKNOWN -1 #endif #if !defined __GNUC_PREREQ && defined __GNUC__ && defined __GNUC_MINOR__ # define __GNUC_PREREQ(maj, min) \ ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) #else # define __GNUC_PREREQ(maj, min) 0 #endif /* ktime is not union anymore, since 2456e855354415bfaeb7badaa14e11b3e02c8466 */ #if LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0) # define first_tv64 first.tv64 # define last_tv64 last.tv64 #else # define first_tv64 first # define last_tv64 last #endif /* Offset changes made in 613dbd95723aee7abd16860745691b6c7bda20dc */ #ifndef HAVE_XT_FAMILY # if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) # define xt_action_param xt_target_param # endif static inline u_int8_t xt_family(const struct xt_action_param *par) { return par->family; } static inline const struct net_device *xt_in(const struct xt_action_param *par) { return par->in; } static inline const struct net_device *xt_out(const struct xt_action_param *par) { return par->out; } static inline unsigned int xt_hooknum(const struct xt_action_param *par) { return par->hooknum; } #endif #ifndef SK_CAN_REUSE # define SK_CAN_REUSE 1 #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(4,13,0) # define compat_refcount_read atomic_read #else # define compat_refcount_read refcount_read #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0) # define timer_setup setup_timer #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(4,15,0) static int dev_get_alias(const struct net_device *dev, char *name, size_t len) { return snprintf(name, len, "%s", dev->ifalias); } #else /* no static because defined in include/linux/netdevice.h, * but forgot to create EXPORT_SYMBOL, * probably will collide with some future kernel */ int dev_get_alias(const struct net_device *dev, char *name, size_t len) { const struct dev_ifalias *alias; int ret = 0; rcu_read_lock(); alias = rcu_dereference(dev->ifalias); if (alias) ret = snprintf(name, len, "%s", alias->ifalias); rcu_read_unlock(); return ret; } #endif #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,39) && !defined(RHEL_MAJOR) static inline int is_vlan_dev(struct net_device *dev) { return dev->priv_flags & IFF_802_1Q_VLAN; } #endif #ifdef CONFIG_BRIDGE_NETFILTER # ifndef HAVE_NF_BRIDGE_INFO_GET static inline struct nf_bridge_info * nf_bridge_info_get(const struct sk_buff *skb) { return skb->nf_bridge; } # endif #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,0,0) static inline void do_gettimeofday(struct timeval *tv) { struct timespec64 ts64; ktime_get_real_ts64(&ts64); tv->tv_sec = ts64.tv_sec; tv->tv_usec = ts64.tv_nsec/1000; } #endif #define TOLOWER(x) ((x) | 0x20) unsigned long long strtoul(const char *cp, char **endp, unsigned int base) { unsigned long long result = 0; if (!base) { if (cp[0] == '0') { if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2])) base = 16; else base = 8; } else { base = 10; } } if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x') cp += 2; while (isxdigit(*cp)) { unsigned int value; value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10; if (value >= base) break; result = result * base + value; cp++; } if (endp) *endp = (char *)cp; return result; } #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,12,0) /* * find_module() is unexported in v5.12: * 089049f6c9956 ("module: unexport find_module and module_mutex") * and module_mutex is replaced with RCU in * a006050575745 ("module: use RCU to synchronize find_module") */ #include struct module *find_module(const char *name) { struct module *mod; rcu_read_lock_sched(); /* Yes this is crazy, but should work. */ list_for_each_entry_rcu(mod, &THIS_MODULE->list, list) { if (!strcmp(mod->name, name)) { rcu_read_unlock_sched(); return mod; } } rcu_read_unlock_sched(); return NULL; } #endif #endif /* COMPAT_NETFLOW_H */ ipt-netflow-2.6/configure000077500000000000000000000453431404773755400155710ustar00rootroot00000000000000#!/bin/sh # SPDX-License-Identifier: GPL-2.0-only PATH=$PATH:/bin:/usr/bin:/usr/sbin:/sbin:/usr/local/sbin case "$1" in --from-dkms-conf*) KDKMS=`echo "$1" | sed 's/[^=]*.//'` # restore options from existing Makefile, if present if [ -e Makefile ]; then set -- `sed -n 's/^CARGS = \(.*\)/\1/p' Makefile` FROMDKMSCONF=1 fi ;; esac error() { printf "! Error: $*\n" exit 1 } iptables_src_version() { test "$IPTSRC" || return 1 echo -n "Checking iptables sources version: " SRC="$IPTSRC/Makefile" test -s "$SRC" || error "Please build iptables first." VER=`sed -n 's/^\(IPTABLES_\)\?VERSION[ :]= \?//p' "$SRC"` test "$VER" || error "Unknown version of iptables." if [ "$VER" = "$IPTVER" ]; then echo "$VER (ok)" else echo "$VER" error "Source version ($VER) doesn't match binary ($IPTVER)" fi } get_lib_dir() { test -s "$1" && LIB=`sed -n 's/.*_LIB_DIR "\(.*\)"/\1/p' "$1"` if [ "$LIB" ]; then IPTLIB=$LIB echo "$IPTLIB (from sources)" return 0 fi return 1 } get_lib_from_bin() { LIB=`strings $IPTBIN | grep ^/.*lib.*/.*tables` if [ "$LIB" ]; then IPTLIB=$LIB echo "$IPTLIB (from binary)" return 0 fi return 1 } get_lib_from_lib() { XLIB=`/usr/bin/ldd $IPTBIN | grep libxtables | sed -n 's!.* \(/[^ ]\+\).*!\1!p'` test "$XLIB" || return 1 LIB=`strings $XLIB | grep ^/.*lib.*/.*tables` if [ "$LIB" ]; then IPTLIB=$LIB echo "$IPTLIB (from libxtables.so, from binary)" return 0 fi return 1 } iptables_inc() { echo -n "Iptables include flags: " if [ "$IPTINC" ]; then echo "$IPTINC (user specified)" elif [ "$PKGVER" ]; then IPTINC="$PKGINC" echo "$IPTINC (pkg-config)" elif [ "$NOIPTSRC" ]; then IPTINC= echo "none (default)" else IPTINC="$IPTSRC/include" IPTINC="-I$IPTINC" echo "$IPTINC (from source)" fi } iptables_modules() { echo -n "Iptables module path: " if [ "$IPTLIB" ]; then echo "$IPTLIB (user specified)" else if [ "$PKGLIB" ]; then IPTLIB="$PKGLIB" echo "$IPTLIB (pkg-config)" else get_lib_dir "$IPTSRC/include/iptables.h" && return 0 get_lib_dir "$IPTSRC/include/xtables.h" && return 0 get_lib_dir "$IPTSRC/xtables/internal.h" && return 0 get_lib_from_bin && return 0 get_lib_from_lib && return 0 error "can not find, try setting it with --ipt-lib=" fi fi } try_dir() { if [ -d "$1/include" ]; then echo "Found iptables sources at $1" IPTSRC=$1 return 0 fi return 1 } try_dirg() { try_dir "$1" && return 0 try_dir "$1.git" && return 0 } try_dir2() { test -d "$1" && try_dir `dirname $1` && return 0 } check_pkg_config() { test "$PKGWARN" && return 1 if ! which pkg-config >/dev/null 2>&1; then echo "! You don't have pkg-config, it may be useful to install it." PKGWARN=1 return 1 fi return 0 } iptables_find_version() { echo -n "Iptables binary version: " if [ "$IPTVER" ]; then echo "$IPTVER (user specified)" else IPTVER=`$IPTBIN -V 2>/dev/null | sed -n s/iptables.v//p` if [ "$IPTVER" ]; then echo "$IPTVER (detected from $IPTBIN)" return else echo "no iptables binary found" fi check_pkg_config PKGVER=`pkg-config --modversion xtables 2>/dev/null` if [ "$PKGVER" ]; then IPTVER="$PKGVER" echo "Xtables version: $IPTVER (detected from `which pkg-config`)" return fi error "Can not find iptables version, try setting it with --ipt-ver=" fi } compiler_presence_test() { echo -n "Check for working gcc: " $CC -v >/dev/null 2>&1 if [ $? = 0 ]; then echo Yes "($CC)" else echo No echo "! You need gcc to install module from source" if [ -s /etc/debian_version ]; then NAME=Debian if [ -e /etc/os-release ]; then . /etc/os-release >/dev/null 2>&1 fi echo "! " echo "! Under $NAME try to run this:" echo "! root# apt-get install gcc" echo "! " elif [ -s /etc/redhat-release ]; then echo "! " echo "! Under Centos try to run this:" echo "! root# yum install gcc" echo "! " fi exit 1 fi } compile_libitp_test() { local FLAGS local MSG echo -n "Checking for presence of $@... " if [ "$IPTINC" ]; then FLAGS=$IPTINC MSG="(using ipt-inc)" elif [ "$PKGINC" ]; then FLAGS=$PKGINC MSG="(using pkg-config)" else FLAGS= MSG= fi echo " #define __EXPORTED_HEADERS__ #include <$*>" > test.c $CC -c test.c $FLAGS >/dev/null 2>&1 RET=$? if [ $RET = 0 ]; then echo Yes $MSG; else echo No; fi rm -f test.c test.o return $RET } iptables_try_pkgconfig() { if [ ! "$PKGVER" ]; then check_pkg_config PKGVER=`pkg-config --modversion xtables 2>/dev/null` TRYPKGVER=`pkg-config --modversion xtables 2>/dev/null` echo -n "pkg-config for version $IPTVER exists: " pkg-config --exact-version=$IPTVER xtables 2>/dev/null if [ $? = 0 ]; then echo "Yes" PKGVER=$TRYPKGVER else if [ "$TRYPKGVER" ]; then echo "No (reported: $TRYPKGVER)" else echo "No" fi PKGVER= fi fi if [ "$PKGVER" ]; then check_pkg_config PKGVER=`pkg-config --modversion xtables 2>/dev/null` PKGINC=`pkg-config --cflags xtables` PKGLIB=`pkg-config --variable=xtlibdir xtables` elif expr "$IPTVER" : '^1\.3' >/dev/null; then echo "! This version of iptables ($IPTVER) will be treated as old version." # Newer versions of iptables should not have -I/kernel/include! # So I assume that newer version will have correct pkg-config set up # and if not, then it's older who need it. IPTCFLAGS="-I$KDIR/include -DIPTABLES_VERSION=\\\\\"$IPTVER\\\\\"" fi compiler_presence_test if compile_libitp_test xtables.h; then IPTCFLAGS="-DXTABLES $IPTCFLAGS" elif ! compile_libitp_test iptables.h; then echo "! Iptables headers not found. You may need to specify --ipt-inc=..." if [ -s /etc/debian_version ]; then echo "! " echo "! Under Debian simply run this:" if apt-cache policy libxtables-dev 2>&1 | fgrep -q "Candidate:"; then echo "! root# apt-get install libxtables-dev pkg-config" else echo "! root# apt-get install iptables-dev pkg-config" fi elif [ -s /etc/redhat-release ]; then echo "! " arch=.`uname -m` echo "! Under Centos simply run this:" echo "! root# yum install iptables-devel$arch pkgconfig" fi exit 1 fi } iptables_find_src() { test "$IPTINC" && return 1 test "$PKGVER" && return 1 VER="iptables-$IPTVER" if [ "$IPTSRC" ]; then echo "User specified source directory: $IPTSRC" try_dir $IPTSRC || error "Specified directory is not iptables source.." else echo "Searching for $VER sources.." try_dir "./$VER" && return 0 try_dir "../$VER" && return 0 try_dir "/usr/src/$VER" && return 0 try_dirg "iptables" && return 0 try_dirg "../iptables" && return 0 try_dirg "/usr/src/iptables" && return 0 try_dir2 `locate $VER/extensions 2>/dev/null | head -1` && return 0 echo "! Can not find iptables source directory, you may try setting it with --ipt-src=" echo "! This is not fatal error, yet. Will be just using default include dir." NOIPTSRC=1 fi } show_help() { echo "Possible options:" echo " --ipt-ver=.. iptables version (ex.: 1.4.2)" echo " --ipt-bin=.. iptables binary to use (ex.: /usr/sbin/iptables)" echo " --ipt-src=.. directory for iptable source (ex.: ../iptables-1.4.2)" echo " --ipt-lib=.. iptable modules path (ex.: /usr/libexec/xtables)" echo " --ipt-inc=.. directory for iptable headers (ex.: /usr/include)" echo " --kver=.. kernel version (ex.: 2.6.30-std-def-alt15)" echo " --kdir=.. directory for kernel source (ex.: /usr/src/kernel)" echo " --enable-natevents enables natevents support" echo " --enable-snmp-rules enables SNMP-index conversion rules" echo " --enable-macaddress enables MAC address for v9/IPFIX" echo " --enable-vlan enables VLAN Ids for v9/IPFIX" echo " --enable-direction enables flowDirection(61) Element" echo " --enable-sampler enables Flow Sampling" echo " --enable-sampler=hash enables Hash sampler" echo " --enable-rand-tpl-id enables seeding the template IDs from a random number" echo " --enable-aggregation enables aggregation rules" echo " --enable-promisc enables promisc hack mode" echo " --promisc-mpls decapsulate MPLS in promisc mode" echo " --promisc-mpls=N -- and record N labels (default 3)" echo " --enable-physdev enables physdev reporting" echo " --enable-physdev-override to override interfaces" echo " --disable-snmp-agent disables net-snmp agent" echo " --disable-dkms disables DKMS support completely" echo " --disable-dkms-install no DKMS install but still create dkms.conf" exit 0 } CARGS="$@" for ac_option do case "$ac_option" in -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; *) ac_optarg= ;; esac case "$ac_option" in --ipt-bin=*) IPTBIN="$ac_optarg" ;; --ipt-lib=*) IPTLIB="$ac_optarg" ;; --ipt-src=*) IPTSRC="$ac_optarg" ;; --ipt-ver=*) IPTVER="$ac_optarg" ;; --ipt-inc=*) IPTINC="-I$ac_optarg" ;; --kver=*) KVERSION="$ac_optarg" ;; --kdir=*) KDIR="$ac_optarg" ;; --enable-nat*) KOPTS="$KOPTS -DENABLE_NAT" ;; --enable-mac*) KOPTS="$KOPTS -DENABLE_MAC" ;; --enable-vlan*) KOPTS="$KOPTS -DENABLE_VLAN" ;; --enable-direc*) KOPTS="$KOPTS -DENABLE_DIRECTION" ;; --enable-sampl*hash) KOPTS="$KOPTS -DENABLE_SAMPLER -DSAMPLING_HASH" ;; --enable-sampl*) KOPTS="$KOPTS -DENABLE_SAMPLER" ;; --enable-aggr*) KOPTS="$KOPTS -DENABLE_AGGR" ;; --enable-rand-tpl*) KOPTS="$KOPTS -DENABLE_RANDOM_TEMPLATE_IDS" ;; --enable-promi*) ENABLE_PROMISC=1 ;; --promisc-mpls*) ENABLE_PROMISC=1; PROMISC_MPLS=1; MPLS_DEPTH=${ac_optarg:-3} ;; --enable-snmp-r*) KOPTS="$KOPTS -DSNMP_RULES" ;; --enable-physdev) KOPTS="$KOPTS -DENABLE_PHYSDEV" ;; --enable-physdev-over*) KOPTS="$KOPTS -DENABLE_PHYSDEV_OVER" ;; --disable-snmp-a*) SKIPSNMP=1 ;; --disable-net-snmp*) SKIPSNMP=1 ;; --disable-dkms*) SKIPDKMS=1 ;; --from-dkms-conf*) ;; --make) echo called from make; CARGS=`echo $CARGS | sed s/--make//g` ;; -Werror) KOPTS="$KOPTS -Werror" ;; --help|-h) show_help ;; -*) echo Invalid option: $ac_option; exit 1 ;; # *) ni="$ni $ac_option" ;; esac done if [ "$ENABLE_PROMISC" = 1 ]; then KOPTS="$KOPTS -DENABLE_PROMISC"; fi if [ "$PROMISC_MPLS" = 1 ]; then KOPTS="$KOPTS -DPROMISC_MPLS" case "$MPLS_DEPTH" in (*[!0-9]*|"") MPLS_DEPTH=1 ;; esac if [ "$MPLS_DEPTH" -lt 1 ]; then echo "! Requested MPLS stack depth is too small, limiting to 1." elif [ "$MPLS_DEPTH" -gt 10 ]; then echo "! Requested MPLS stack depth is too big, limiting to 10." MPLS_DEPTH=10; fi if [ "$MPLS_DEPTH" -ge 1 ]; then KOPTS="$KOPTS -DMPLS_DEPTH=$MPLS_DEPTH"; fi fi kernel_find_version() { KHOW=requested test "$KVERSION" && return 0 if grep -q '#.*Debian' /proc/version; then KHOW=proc KVERSION=`sed -n 's/.*#.*Debian \([0-9\.]\+\)-.*/\1/p' /proc/version` KLIBMOD=`uname -r` else KHOW=uname KVERSION=`uname -r` fi test "$KDIR" || return 0 test -s $KDIR/Makefile || return 1 test -s $KDIR/include/config/kernel.release || return 1 KVERSION=`cat $KDIR/include/config/kernel.release` KHOW=sources } kernel_check_src() { if [ -s "$1/Makefile" ]; then KDIR="$1" return 0 fi return 1 } kernel_check_src2() { if kernel_check_src $1/source; then KSRC=$KDIR fi kernel_check_src $1/build } kernel_find_source() { if [ "$KDKMS" ]; then # dkms args is highest priority KDIR=$KDKMS KSHOW=dkms return 0 fi KSHOW=requested test "$KDIR" && return 0 KSHOW=found kernel_check_src2 /lib/modules/$KLIBMOD && return 0 kernel_check_src2 /lib/modules/$KVERSION && return 0 kernel_check_src /usr/src/kernels/$KVERSION && return 0 kernel_check_src /usr/src/linux-$KVERSION && return 0 echo "! Linux source not found. Don't panic. You may specify kernel source" echo "! directory with --kdir=..., or try to install kernel-devel package," echo "! or just raw sources for linux-$KVERSION from kernel.org." if grep -q -i centos /proc/version 2>/dev/null; then echo "! " arch=.`uname -m` echo "! Under Centos simply run this:" echo "! root# yum install kernel-devel iptables-devel$arch pkgconfig" fi if grep -q -i debian /proc/version 2>/dev/null; then echo "! " echo "! Under Debian simply run this:" echo "! root# apt-get install module-assistant iptables-dev pkg-config" echo "! root# m-a prepare" fi exit 1 } kernel_check_consistency() { if [ -s $KDIR/include/config/kernel.release ]; then SRCVER=`cat $KDIR/include/config/kernel.release` if [ "$KVERSION" != "$SRCVER" ]; then echo "! Warning: $KHOW kernel version ($KVERSION) and $KSHOW version of kernel source ($SRCVER) doesn't match!" echo "! You may try to specify only kernel source tree with --kdir=$KDIR" echo "! and configure will pick up version properly." echo "! Assuming you want to build for $SRCVER" KVERSION=$SRCVER fi fi test -e "$KDIR/.config" || error ".config in kernel source not found, run make menuconfig in $KDIR" test -d "$KDIR/include/config" || error "kernel is not prepared, run make prepare modules_prepare in $KDIR" } kconfig() { KCONFIG=$KDIR/.config if ! grep -q "^$1=" $KCONFIG 2>/dev/null; then if [ "$KCONFIGREPORTED" != true ]; then KCONFIGREPORTED=true echo Kernel config file checked: $KCONFIG echo fi echo "! Attention: $1 is undefined in your kernel configuration" echo "! Without this option enabled $2 will not work." echo return 1 fi return 0 } # Respond to change in https://github.com/torvalds/linux/commit/4806e975729f99 nf_nat_needed() { local INC=include/linux/netfilter.h echo -n "Checking for presence of $INC... " if [ "$KSRC" -a -e $KSRC/$INC ]; then echo Yes INC=$KSRC/$INC elif [ -e $KDIR/$INC ]; then echo Yes INC=$KDIR/$INC else echo No return 1 fi echo -n "netfilter.h uses CONFIG_NF_NAT_NEEDED... " if grep -q CONFIG_NF_NAT_NEEDED $INC; then echo Yes else echo No return 1 fi } kernel_check_config() { kconfig CONFIG_SYSCTL "sysctl interface" kconfig CONFIG_PROC_FS "proc interface" if nf_nat_needed; then kconfig CONFIG_NF_NAT_NEEDED "natevents" else kconfig CONFIG_NF_NAT "natevents" && KOPTS="$KOPTS -DCONFIG_NF_NAT_NEEDED" fi kconfig CONFIG_NF_CONNTRACK_EVENTS "natevents" kconfig CONFIG_IPV6 "IPv6" kconfig CONFIG_IP6_NF_IPTABLES "ip6tables target" kconfig CONFIG_BRIDGE_NETFILTER "physdev override" } kernel_check_include() { echo -n "Checking for presence of $1... " if [ "$KSRC" -a -e $KSRC/$1 ]; then echo Yes KOPTS="$KOPTS $2" elif [ -e $KDIR/$1 ]; then echo Yes KOPTS="$KOPTS $2" else echo No fi } kernel_check_features() { kernel_check_include include/linux/llist.h -DHAVE_LLIST kernel_check_include include/linux/grsecurity.h -DHAVE_GRSECURITY_H } snmp_check() { SNMPTARGET= SNMPINSTALL= test "$SKIPSNMP" && return echo -n "Searching for net-snmp-config... " if which net-snmp-config >/dev/null 2>&1; then echo Yes `which net-snmp-config` else echo No. SNMPCONFIG=no fi echo -n "Searching for net-snmp agent... " if [ -s /etc/redhat-release ]; then if ! rpm --quiet -q net-snmp; then echo No. SNMPADD="do: yum install net-snmp" if [ "$SNMPCONFIG" ]; then SNMPADD="$SNMPADD net-snmp-devel" fi else echo Yes. fi if [ "$SNMPCONFIG" ]; then SNMPCONFIG="run: yum install net-snmp-devel" fi elif [ -s /etc/debian_version ]; then if ! dpkg -s snmpd >/dev/null 2>&1; then echo No. SNMPADD="do: apt-get install snmpd" if [ "$SNMPCONFIG" ]; then SNMPADD="$SNMPADD libsnmp-dev" fi else echo Yes. fi if [ "$SNMPCONFIG" ]; then SNMPCONFIG="run: apt-get install libsnmp-dev" fi elif [ -s /etc/snmp/snmpd.conf ]; then echo Yes. else echo No. SNMPADD="install net-snmp (www.net-snmp.org)" SNMPCONFIG="reinstall net-snmp with agent support." fi if [ "$SNMPADD" ]; then echo " Assuming you don't want net-snmp agent support". echo " Otherwise $SNMPADD" return elif [ "$SNMPCONFIG" ]; then echo "! You have net-snmp agent but not development package." echo "! net-snmp agent will not be built, to fix:" echo "! $SNMPCONFIG" return fi SNMPTARGET=snmp_NETFLOW.so SNMPINSTALL=sinstall } dkms_check() { DKMSINSTALL= test "$SKIPDKMS" && return echo -n "Checking for DKMS... " if ! which dkms >/dev/null 2>&1; then echo "No. (It may be useful to install it.)" echo "! " echo "! DKMS is method of installing kernel modules, that will" echo "! automatically recompile module after kernel upgrade." if [ -s /etc/debian_version ]; then echo "! " echo "! To install it under Debian simply run this:" echo "! root# apt-get install dkms" echo "! " elif [ -s /etc/redhat-release ]; then echo "! " echo "! To install it under Centos enable EPEL or RPMforge repository," echo "! then run this:" echo "! root# yum install dkms" echo "! " fi return fi echo Yes. DKMSINSTALL=dinstall test "$FROMDKMSCONF" && return if dkms status | grep ^ipt-netflow, >/dev/null; then echo "! You are already have module installed via DKMS" echo "! it will be uninstalled on 'make install' and" echo "! current version of module installed afterwards." echo "! Use --disable-dkms option if don't want this." fi } echo "Module version: $(./version.sh)" kernel_find_version #KVERSION test "$KLIBMOD" || KLIBMOD=$KVERSION echo "Kernel version: $KVERSION ($KHOW)" kernel_find_source #KDIR echo "Kernel sources: $KDIR ($KSHOW)" kernel_check_consistency kernel_check_config kernel_check_features CC=${CC:-gcc} test "$IPTBIN" || IPTBIN=`which iptables` iptables_find_version #IPTVER iptables_try_pkgconfig #try to configure from pkg-config iptables_find_src #IPTSRC iptables_src_version #check that IPTSRC match to IPTVER iptables_inc #IPTINC iptables_modules #IPTLIB snmp_check dkms_check rm -f compat_def.h REPLACE="\ s!@CARGS@!$CARGS!;\ s!@KVERSION@!$KVERSION!;\ s!@KDIR@!$KDIR!;\ s!@KOPTS@!$KOPTS!;\ s!@SNMPTARGET@!$SNMPTARGET!;\ s!@SNMPINSTALL@!$SNMPINSTALL!;\ s!@DKMSINSTALL@!$DKMSINSTALL!;\ s!@IPTABLES_VERSION@!$IPTVER!;\ s!@IPTABLES_CFLAGS@!$IPTCFLAGS $IPTINC!;\ s!@IPTABLES_MODULES@!$IPTLIB!" echo -n "Creating Makefile.. " sed "$REPLACE" Makefile.in > Makefile echo done. echo echo " If you need some options enabled run ./configure --help" echo " Now run: make all install" echo ipt-netflow-2.6/dkms.conf000066400000000000000000000004311404773755400154540ustar00rootroot00000000000000PACKAGE_NAME="ipt-netflow" pushd `dirname $BASH_SOURCE` PACKAGE_VERSION=`./version.sh` popd BUILT_MODULE_NAME[0]=ipt_NETFLOW DEST_MODULE_LOCATION[0]=/kernel/extra STRIP[0]=no MAKE[0]="make ipt_NETFLOW.ko" PRE_BUILD="./configure --from-dkms-conf=$kernel_source_dir" AUTOINSTALL=yes ipt-netflow-2.6/gen_compat_def000077500000000000000000000046331404773755400165370ustar00rootroot00000000000000#!/bin/bash -efu # SPDX-License-Identifier: GPL-2.0-only # # Generate defines based on kernel having # some symbols declared # # Copyright (C) 2019-2021 # export LANG=C LC_ALL=C LC_MESSAGES=C LC_CTYPE=C fatal() { echo "Error: $*" >&2 exit 1 } eval $(grep ^KDIR Makefile | tr -d ' ') [ "$KDIR" ] || fatal "KDIR is not found" WD=cc-test-build mkdir -p $WD cd ./$WD || fatal "cannot cd to $WD" # args: HAVE_SUMBOL symbol include kbuild_test_compile() { local cmd cat > test.c echo obj-m = test.o > Makefile cmd="make -s -B -C $KDIR M=$PWD modules" echo "$cmd" > log if $cmd >> log 2>&1; then echo " declared" >&2 [ "$2" ] && echo "// $2 is declared ${3:+in <$3>}" echo "#define HAVE_$1" echo else echo " undeclared" >&2 echo "#undef HAVE_$1" echo "// ${2:-symbol} is undeclared${3:+ in <$3>}. Compile:" sed "s/^/\/\/ /" test.c echo "// Output:" sed "s/^/\/\/ /" log echo if ! egrep -q 'has no member named|undeclared|storage size of .* isn.t known|No such file or directory' log; then echo "Error: unexpected error from compiler" >&2 cat log >&2 echo >&2 exit 3 fi fi } # Test that symbol is defined. kbuild_test_symbol() { echo -n "Test symbol $* " >&2 kbuild_test_compile ${1^^} $1 ${2-} <<-EOF #include ${2:+#include <$2>} MODULE_LICENSE("GPL"); void *test = $1; EOF } # Test that struct is defined. kbuild_test_struct() { echo -n "Test struct $* " >&2 kbuild_test_compile ${1^^} "struct $1" ${2-} <<-EOF #include ${2:+#include <$2>} MODULE_LICENSE("GPL"); struct $1 test; EOF } echo "// Autogenerated for $KDIR" echo # helpers introduced in 613dbd95723aee7abd16860745691b6c7bda20dc kbuild_test_symbol xt_family linux/netfilter_ipv4/ip_tables.h kbuild_test_struct timeval linux/ktime.h # 97a32539b9568 proc: convert everything to "struct proc_ops" # d56c0d45f0e27 proc: decouple proc from VFS with "struct proc_ops" kbuild_test_struct proc_ops linux/proc_fs.h # No since v5.1, but present in CentOS-8's 4.18.0-227 kbuild_test_symbol synchronize_sched linux/rcupdate.h # Fails on 3.10.0-957.10.1.el7.x86_64 kbuild_test_symbol nf_bridge_info_get linux/netfilter_bridge.h # Stumbled on 5.9 kbuild_test_struct vlan_dev_priv linux/if_vlan.h echo "// End of compat_def.h" cd $OLDPWD rm -rf $WD # debug output for Travis if [ -z "${PWD/*travis*}" ]; then cat compat_def.h >&2 fi ipt-netflow-2.6/install-dkms.sh000077500000000000000000000053031404773755400166130ustar00rootroot00000000000000#!/bin/bash # SPDX-License-Identifier: GPL-2.0-only # # This script cleanly re-install module into DKMS tree. PATH=$PATH:/bin:/usr/bin:/usr/sbin:/sbin:/usr/local/sbin if [ "$1" = --uninstall ]; then echo "Uninstalling from DKMS..." elif [ "$1" = --install ]; then echo "Installing into DKMS..." else exit 1 fi if ! which dkms >/dev/null 2>&1; then echo "! You don't have DKMS accessible in system." exit 1 fi if [ ! -e dkms.conf ]; then echo "! You don't have DKMS configured for this module." exit 1 fi MVERSION=`./version.sh` contains() { for e in "${@:2}"; do [[ "$e" = "$1" ]] && return 0; done; return 1; } D=() # to be list of installed versions OLDIFS="$IFS" IFS=$'\n' A=(`dkms status | grep ^ipt-netflow`) IFS="$OLDIFS" for i in "${A[@]}"; do z=($i) v=${z[1]} v=${v%,} v=${v%:} if ! contains "$v" "${D[@]}"; then D+=($v) fi done if [ ${#D[@]} -eq 1 ]; then # single version is already installed. if [ $D = "$MVERSION" ]; then echo "! You have same version of module already installed into DKMS." else echo "! You have different version of module installed into DKMS." fi if [ ! -d /usr/src/ipt-netflow-$D ]; then echo "! Can not find DKMS dir for it, that's plain weird." elif [ -e /usr/src/ipt-netflow-$D/.automatic ]; then echo "! That version was automatically installed by this script," echo "! thus, is safe to remove. No worries." else echo "! That version was manually installed by you." fi nodepmod= if grep -qs no-depmod `which dkms`; then nodepmod=--no-depmod fi echo "! Removing from dkms..." dkms $nodepmod remove ipt-netflow/$D --all if [ -d "/usr/src/ipt-netflow-$D" ]; then echo "! Removing source tree from /usr/src/ipt-netflow-$D" rm -rf "/usr/src/ipt-netflow-$D" fi elif [ ${#D[@]} -gt 1 ]; then # multiple versions are installed. echo "! You have multiple versions of module already installed in DKMS." echo "! Please remove them manually to avoid conflict." echo "! 'dkms status' output:" dkms status echo "! Suggested commands to remove them:" for i in ${D[@]}; do echo "! root# dkms remove ipt-netflow/$i --all" done exit 1 fi if [ "$1" = --uninstall ]; then exit 0 fi if [ "$PWD" = "/usr/src/ipt-netflow-$MVERSION" ]; then echo "! You are already in DKMS dir." dkms add -m ipt-netflow -v $MVERSION exit $? fi echo "! Installing $MVERSION into DKMS..." rm -rf /usr/src/ipt-netflow-$MVERSION mkdir -p /usr/src/ipt-netflow-$MVERSION cp -p *.[ch] Make* READ* conf* gen* irq* *.sh *.conf /usr/src/ipt-netflow-$MVERSION/ if [ -d .git ]; then cp -pr .git /usr/src/ipt-netflow-$MVERSION/ fi touch /usr/src/ipt-netflow-$MVERSION/.automatic dkms add -m ipt-netflow -v $MVERSION exit $? ipt-netflow-2.6/ipt_NETFLOW.c000066400000000000000000004720731404773755400160240ustar00rootroot00000000000000/* SPDX-License-Identifier: GPL-2.0-only * * This is NetFlow exporting module (NETFLOW target) for linux * (c) 2008-2021 * * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef ENABLE_NAT # undef CONFIG_NF_NAT_NEEDED #endif #if defined(ENABLE_VLAN) || defined(ENABLE_PROMISC) # include #endif #ifdef ENABLE_MAC # include # include #endif #if defined(CONFIG_NF_NAT_NEEDED) # include # include # include #endif #include #include #ifdef HAVE_LLIST /* llist.h is officially defined since linux 3.1, * but centos6 have it backported on its 2.6.32.el6 */ # include #endif #include "compat.h" #include "ipt_NETFLOW.h" #include "murmur3.h" #ifdef CONFIG_SYSCTL # include #endif #ifndef CONFIG_NF_CONNTRACK_EVENTS /* No conntrack events in the kernel imply no natevents. */ # undef CONFIG_NF_NAT_NEEDED #endif #if defined(CONFIG_NF_NAT_NEEDED) && LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39) # include #endif #ifndef CONFIG_BRIDGE_NETFILTER # ifdef ENABLE_PHYSDEV_OVER # warning "Requested physdev override is not compiled." # undef ENABLE_PHYSDEV_OVER # endif # ifdef ENABLE_PHYSDEV # warning "Requested physdev is not compiled." # undef ENABLE_PHYSDEV # endif #endif #define IPT_NETFLOW_VERSION "2.6" /* Note that if you are using git, you will see version in other format. */ #include "version.h" #ifdef GITVERSION #undef IPT_NETFLOW_VERSION #define IPT_NETFLOW_VERSION GITVERSION #endif MODULE_LICENSE("GPL"); MODULE_AUTHOR(""); MODULE_DESCRIPTION("iptables NETFLOW target module"); MODULE_VERSION(IPT_NETFLOW_VERSION); MODULE_ALIAS("ip6t_NETFLOW"); static char version_string[128]; static int version_string_size; static struct duration start_ts; /* ts of module start (ktime) */ #define DST_SIZE 256 static char destination_buf[DST_SIZE] = "127.0.0.1:2055"; static char *destination = destination_buf; module_param(destination, charp, 0444); MODULE_PARM_DESC(destination, "export destination ipaddress:port"); #ifdef ENABLE_SAMPLER static char sampler_buf[128] = ""; static char *sampler = sampler_buf; module_param(sampler, charp, 0444); MODULE_PARM_DESC(sampler, "flow sampler parameters"); static atomic_t flow_count = ATOMIC_INIT(0); /* flow counter for deterministic sampler */ static atomic64_t flows_observed = ATOMIC_INIT(0); static atomic64_t flows_selected = ATOMIC_INIT(0); #define SAMPLER_INFO_INTERVAL (5*60) static unsigned long ts_sampler_last = 0; /* template send time (jiffies) */ static struct duration sampling_ts; /* ts of sampling start (ktime) */ #define SAMPLER_SHIFT 14 #define SAMPLER_INTERVAL_M ((1 << SAMPLER_SHIFT) - 1) enum { SAMPLER_DETERMINISTIC = 1, SAMPLER_RANDOM = 2, SAMPLER_HASH = 3 }; struct sampling { union { u32 v32; struct { u8 mode; u16 interval; }; }; } samp; #endif static int inactive_timeout = 15; module_param(inactive_timeout, int, 0644); MODULE_PARM_DESC(inactive_timeout, "inactive flows timeout in seconds"); static int active_timeout = 30 * 60; module_param(active_timeout, int, 0644); MODULE_PARM_DESC(active_timeout, "active flows timeout in seconds"); static int exportcpu = -1; module_param(exportcpu, int, 0644); MODULE_PARM_DESC(exportcpu, "lock exporter to this cpu"); #ifdef ENABLE_PROMISC static int promisc = 0; module_param(promisc, int, 0444); MODULE_PARM_DESC(promisc, "enable promisc hack (0=default, 1)"); static DEFINE_MUTEX(promisc_lock); #endif static int debug = 0; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "debug verbosity level"); static int sndbuf; module_param(sndbuf, int, 0444); MODULE_PARM_DESC(sndbuf, "udp socket SNDBUF size"); static int protocol = 5; module_param(protocol, int, 0444); MODULE_PARM_DESC(protocol, "netflow protocol version (5, 9, 10=IPFIX)"); static unsigned int refresh_rate = 20; module_param(refresh_rate, uint, 0644); MODULE_PARM_DESC(refresh_rate, "NetFlow v9/IPFIX refresh rate (packets)"); static unsigned int timeout_rate = 30; module_param(timeout_rate, uint, 0644); MODULE_PARM_DESC(timeout_rate, "NetFlow v9/IPFIX timeout rate (minutes)"); static int one = 1; static unsigned int scan_min = 1; static unsigned int scan_max = HZ / 10; module_param(scan_min, uint, 0644); MODULE_PARM_DESC(scan_min, "Minimal interval between export scans (jiffies)"); #ifdef SNMP_RULES static char snmp_rules_buf[DST_SIZE] = ""; static char *snmp_rules = snmp_rules_buf; module_param(snmp_rules, charp, 0444); MODULE_PARM_DESC(snmp_rules, "SNMP-index conversion rules"); static unsigned char *snmp_ruleset; static DEFINE_SPINLOCK(snmp_lock); #endif #ifdef CONFIG_NF_NAT_NEEDED static int natevents = 0; module_param(natevents, int, 0444); MODULE_PARM_DESC(natevents, "enable NAT Events"); #endif static int hashsize; module_param(hashsize, int, 0444); MODULE_PARM_DESC(hashsize, "hash table size"); static int maxflows = 2000000; module_param(maxflows, int, 0644); MODULE_PARM_DESC(maxflows, "maximum number of flows"); static int peakflows = 0; static unsigned long peakflows_at; /* jfffies */ static int engine_id = 0; module_param(engine_id, int, 0644); MODULE_PARM_DESC(engine_id, "Observation Domain ID"); #ifdef ENABLE_AGGR #define AGGR_SIZE 1024 static char aggregation_buf[AGGR_SIZE] = ""; static char *aggregation = aggregation_buf; module_param(aggregation, charp, 0400); MODULE_PARM_DESC(aggregation, "aggregation ruleset"); static LIST_HEAD(aggr_n_list); static LIST_HEAD(aggr_p_list); static DEFINE_RWLOCK(aggr_lock); static void aggregation_remove(struct list_head *list); static int add_aggregation(char *ptr); #endif static DEFINE_PER_CPU(struct ipt_netflow_stat, ipt_netflow_stat); static LIST_HEAD(usock_list); static DEFINE_MUTEX(sock_lock); #define LOCK_COUNT (1<<8) #define LOCK_COUNT_MASK (LOCK_COUNT-1) struct stripe_entry { struct list_head list; /* struct ipt_netflow, list for export */ spinlock_t lock; /* this locks both: hash table stripe & list above */ }; static struct stripe_entry htable_stripes[LOCK_COUNT]; static DEFINE_RWLOCK(htable_rwlock); /* global rwlock to protect htable[] resize */ static struct hlist_head *htable __read_mostly; /* hash table memory */ static unsigned int htable_size __read_mostly = 0; /* buckets */ /* How it's organized: * htable_rwlock locks access to htable[hash], where * htable[htable_size] is big/resizable hash table, which is striped into * htable_stripes[LOCK_COUNT] smaller/static hash table, which contains * .list - list of flows ordered by exportability (usually it's access time) * .lock - lock to both: that .list and to htable[hash], where * hash to the htable[] is hash_netflow(&tuple) % htable_size * hash to the htable_stripes[] is hash & LOCK_COUNT_MASK */ #ifdef HAVE_LLIST static LLIST_HEAD(export_llist); /* flows to purge */ #endif #ifdef CONFIG_NF_NAT_NEEDED static LIST_HEAD(nat_list); /* nat events */ static DEFINE_SPINLOCK(nat_lock); static unsigned long nat_events_start = 0; static unsigned long nat_events_stop = 0; #endif static struct kmem_cache *ipt_netflow_cachep __read_mostly; /* ipt_netflow memory */ static atomic_t ipt_netflow_count = ATOMIC_INIT(0); static long long pdu_packets = 0, pdu_traf = 0; /* how much accounted traffic in pdu */ static unsigned int pdu_count = 0; static unsigned int pdu_seq = 0; static unsigned int pdu_data_records = 0; /* Data records */ static unsigned int pdu_flow_records = 0; /* Data records with flows (for stat only) */ static unsigned int pdu_tpl_records = 0; static unsigned long pdu_ts_mod; /* ts(jiffies) of last flow */ static unsigned int pdu_needs_export = 0; static union { __be16 version; struct netflow5_pdu v5; struct netflow9_pdu v9; struct ipfix_pdu ipfix; } pdu; static __u8 *pdu_data_used; static __u8 *pdu_high_wm; /* high watermark */ static struct flowset_data *pdu_flowset = NULL; /* current data flowset */ static unsigned long wk_start; /* last start of worker (jiffies) */ static unsigned long wk_busy; /* last work busy time (jiffies) */ static unsigned int wk_count; /* how much is scanned */ static unsigned int wk_cpu; static unsigned int wk_trylock; static unsigned int wk_llist; static void (*netflow_export_flow)(struct ipt_netflow *nf); static void (*netflow_export_pdu)(void); /* called on timeout */ static void netflow_switch_version(int ver); #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) static void netflow_work_fn(void *work); static DECLARE_WORK(netflow_work, netflow_work_fn, NULL); #else static void netflow_work_fn(struct work_struct *work); static DECLARE_DELAYED_WORK(netflow_work, netflow_work_fn); #endif static struct timer_list rate_timer; #define TCP_SYN_ACK 0x12 #define TCP_FIN_RST 0x05 static long long sec_prate = 0, sec_brate = 0; static long long min_prate = 0, min_brate = 0; static long long min5_prate = 0, min5_brate = 0; #define METRIC_DFL 100 static int metric = METRIC_DFL, min15_metric = METRIC_DFL, min5_metric = METRIC_DFL, min_metric = METRIC_DFL; /* hash metrics */ static int set_hashsize(int new_size); static void destination_removeall(void); static int add_destinations(const char *ptr); static int netflow_scan_and_export(int flush); enum { DONT_FLUSH, AND_FLUSH }; static int template_ids = FLOWSET_DATA_FIRST; static int tpl_gen_count = 0; /* how much templates */ static int tpl_count = 0; /* how much active templates */ #define STAT_INTERVAL (1*60) #define SYSINFO_INTERVAL (5*60) static unsigned long ts_stat_last = 0; /* (jiffies) */ static unsigned long ts_sysinf_last = 0; /* (jiffies) */ static unsigned long ts_ifnames_last = 0; /* (jiffies) */ static inline __be32 bits2mask(int bits) { return (bits? 0xffffffff << (32 - bits) : 0); } static inline int mask2bits(__be32 mask) { int n; for (n = 0; mask; n++) mask = (mask << 1) & 0xffffffff; return n; } /* under that lock worker is always stopped and not rescheduled, * and we can call worker sub-functions manually */ static DEFINE_MUTEX(worker_lock); static int worker_delay = HZ / 10; static inline void _schedule_scan_worker(const int pdus) { int cpu = exportcpu; /* rudimentary congestion avoidance */ if (pdus > 0) worker_delay /= pdus; else worker_delay *= 2; if (worker_delay < scan_min) worker_delay = scan_min; else if (worker_delay > scan_max) worker_delay = scan_max; if (cpu >= 0) { if (cpu < NR_CPUS && cpu_online(cpu)) { schedule_delayed_work_on(cpu, &netflow_work, worker_delay); return; } printk(KERN_WARNING "ipt_NETFLOW: can't schedule exporter on cpu %d. Disabling cpu lock.\n", cpu); exportcpu = -1; } schedule_delayed_work(&netflow_work, worker_delay); } /* This is only called soon after pause_scan_worker. */ static inline void cont_scan_worker(void) { _schedule_scan_worker(0); mutex_unlock(&worker_lock); } static inline void _unschedule_scan_worker(void) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) cancel_rearming_delayed_work(&netflow_work); #else cancel_delayed_work_sync(&netflow_work); #endif } /* This is only used for quick pause (in procctl). */ static inline void pause_scan_worker(void) { mutex_lock(&worker_lock); _unschedule_scan_worker(); } #ifdef ENABLE_SAMPLER static inline unsigned char get_sampler_mode(void) { return samp.mode; } static inline unsigned short get_sampler_interval(void) { return samp.interval; } static inline const char *sampler_mode_string(void) { const unsigned char mode = get_sampler_mode(); return mode == SAMPLER_DETERMINISTIC? "deterministic" : mode == SAMPLER_RANDOM? "random" : "hash"; } /* map SAMPLER_HASH into SAMPLER_RANDOM */ static unsigned char get_sampler_mode_nf(void) { const unsigned char mode = get_sampler_mode(); return (mode == SAMPLER_HASH)? SAMPLER_RANDOM : mode; } static inline unsigned short sampler_nf_v5(void) { return (get_sampler_mode_nf() << SAMPLER_SHIFT) | get_sampler_interval(); } #endif /* return value is different from usual snprintf */ static char *snprintf_sockaddr(char *buf, size_t len, const struct sockaddr_storage *ss) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) if (ss->ss_family == AF_INET) { const struct sockaddr_in *sin = (struct sockaddr_in *)ss; snprintf(buf, len, "%u.%u.%u.%u:%u", NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port)); } else if (ss->ss_family == AF_INET6) { const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; snprintf(buf, len, "[%x:%x:%x:%x:%x:%x:%x:%x]:%u", ntohs(sin6->sin6_addr.s6_addr16[0]), ntohs(sin6->sin6_addr.s6_addr16[1]), ntohs(sin6->sin6_addr.s6_addr16[2]), ntohs(sin6->sin6_addr.s6_addr16[3]), ntohs(sin6->sin6_addr.s6_addr16[4]), ntohs(sin6->sin6_addr.s6_addr16[5]), ntohs(sin6->sin6_addr.s6_addr16[6]), ntohs(sin6->sin6_addr.s6_addr16[7]), ntohs(sin6->sin6_port)); } else snprintf(buf, len, "(invalid address)"); #elif LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0) if (ss->ss_family == AF_INET) snprintf(buf, len, "%pI4:%u", &((const struct sockaddr_in *)ss)->sin_addr, ntohs(((const struct sockaddr_in *)ss)->sin_port)); else if (ss->ss_family == AF_INET6) snprintf(buf, len, "[%pI6c]:%u", &((const struct sockaddr_in6 *)ss)->sin6_addr, ntohs(((const struct sockaddr_in6 *)ss)->sin6_port)); else snprintf(buf, len, "(invalid address)"); #else snprintf(buf, len, "%pISpc", ss); #endif return buf; } static char *print_sockaddr(const struct sockaddr_storage *ss) { static char buf[64]; return snprintf_sockaddr(buf, sizeof(buf), ss); } static int is_zero_addr(const struct sockaddr_storage *ss) { if (ss->ss_family == AF_INET) return ((const struct sockaddr_in *)ss)->sin_addr.s_addr == 0; else if (ss->ss_family == AF_INET6) return ((const struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 && ((const struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 && ((const struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 && ((const struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0; else /* AF_UNSPEC */ return 1; } static char *print_usock_addr(struct ipt_netflow_sock *usock) { static char buf[128]; size_t len; snprintf(buf, sizeof(buf), "%s", print_sockaddr(&usock->addr)); if (!is_zero_addr(&usock->saddr)) { len = strlen(buf); snprintf(buf + len, sizeof(buf) - len, "@%s", print_sockaddr(&usock->saddr)); len = strlen(buf); /* strip zero port */ if (len > 2 && buf[len - 1] == '0' && buf[len - 2] == ':') buf[len - 2] = '\0'; } if (usock->sdev[0]) { len = strlen(buf); snprintf(buf + len, sizeof(buf) - len, "%%%s", usock->sdev); } return buf; } #ifdef CONFIG_PROC_FS static inline int ABS(int x) { return x >= 0 ? x : -x; } #define SAFEDIV(x,y) ((y)? ({ u64 __tmp = x; do_div(__tmp, y); (int)__tmp; }) : 0) #define FFLOAT(x, prec) (int)(x) / prec, ABS((int)(x) % prec) static int snmp_seq_show(struct seq_file *seq, void *v) { int cpu; unsigned int nr_flows = atomic_read(&ipt_netflow_count); struct ipt_netflow_stat t = { 0 }; struct ipt_netflow_sock *usock; unsigned int sndbuf_peak = 0; int snum = 0; for_each_present_cpu(cpu) { struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu); t.notfound += st->notfound; t.pkt_total += st->pkt_total; t.traf_total += st->traf_total; t.send_failed += st->send_failed; t.sock_cberr += st->sock_cberr; t.exported_rate += st->exported_rate; t.exported_pkt += st->exported_pkt; t.exported_flow += st->exported_flow; t.exported_traf += st->exported_traf; t.pkt_drop += st->pkt_drop; t.traf_drop += st->traf_drop; t.pkt_lost += st->pkt_lost; t.traf_lost += st->traf_lost; t.flow_lost += st->flow_lost; } seq_printf(seq, "inBitRate %llu\n" "inPacketRate %llu\n" "inFlows %llu\n" "inPackets %llu\n" "inBytes %llu\n" "hashMetric %d.%02d\n" "hashMemory %lu\n" "hashFlows %u\n" "hashPackets %llu\n" "hashBytes %llu\n" "dropPackets %llu\n" "dropBytes %llu\n" "outByteRate %u\n" "outFlows %llu\n" "outPackets %llu\n" "outBytes %llu\n" "lostFlows %llu\n" "lostPackets %llu\n" "lostBytes %llu\n" "errTotal %u\n", sec_brate, sec_prate, t.notfound, t.pkt_total, t.traf_total, FFLOAT(SAFEDIV(100LL * (t.searched + t.found + t.notfound), (t.found + t.notfound)), 100), (unsigned long)nr_flows * sizeof(struct ipt_netflow) + (unsigned long)htable_size * sizeof(struct hlist_head), nr_flows, t.pkt_total - t.pkt_out, t.traf_total - t.traf_out, t.pkt_drop, t.traf_drop, t.exported_rate, t.exported_flow, t.exported_pkt, t.exported_traf, t.flow_lost, t.pkt_lost, t.traf_lost, t.send_failed + t.sock_cberr); for_each_present_cpu(cpu) { struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu); seq_printf(seq, "cpu%u %u %llu %llu %llu %d.%02d %llu %llu %u %u %u %u\n", cpu, st->pkt_total_rate, st->notfound, st->pkt_total, st->traf_total, FFLOAT(st->metric, 100), st->pkt_drop, st->traf_drop, st->truncated, st->frags, st->alloc_err, st->maxflows_err); } mutex_lock(&sock_lock); list_for_each_entry(usock, &usock_list, list) { int wmem_peak = atomic_read(&usock->wmem_peak); if (sndbuf_peak < wmem_peak) sndbuf_peak = wmem_peak; seq_printf(seq, "sock%d %s %d %u %u %u %u", snum, print_usock_addr(usock), !!usock->sock, usock->err_connect, usock->err_full, usock->err_cberr, usock->err_other); if (usock->sock) { struct sock *sk = usock->sock->sk; seq_printf(seq, " %u %u %u\n", sk->sk_sndbuf, compat_refcount_read(&sk->sk_wmem_alloc), wmem_peak); } else seq_printf(seq, " 0 0 %u\n", wmem_peak); snum++; } mutex_unlock(&sock_lock); seq_printf(seq, "sndbufPeak %u\n", sndbuf_peak); return 0; } /* procfs statistics /proc/net/stat/ipt_netflow */ static int nf_seq_show(struct seq_file *seq, void *v) { unsigned int nr_flows = atomic_read(&ipt_netflow_count); int cpu; struct ipt_netflow_stat t = { 0 }; struct ipt_netflow_sock *usock; #ifdef ENABLE_AGGR struct netflow_aggr_n *aggr_n; struct netflow_aggr_p *aggr_p; #endif int snum = 0; int peak = (jiffies - peakflows_at) / HZ; seq_printf(seq, "ipt_NETFLOW " IPT_NETFLOW_VERSION ", srcversion %s;" #ifdef ENABLE_AGGR " aggr" #endif #ifdef ENABLE_DIRECTION " dir" #endif #ifdef HAVE_LLIST " llist" #endif #ifdef ENABLE_MAC " mac" #endif #ifdef CONFIG_NF_NAT_NEEDED " nel" #endif #ifdef ENABLE_PROMISC " promisc" # ifdef PROMISC_MPLS "+mpls" # endif #endif #ifdef ENABLE_SAMPLER " samp" # ifdef SAMPLING_HASH "-h" # endif #endif #ifdef SNMP_RULES " snmp" #endif #ifdef ENABLE_VLAN " vlan" #endif "\n", THIS_MODULE->srcversion); seq_printf(seq, "Protocol version %d", protocol); if (protocol == 10) seq_printf(seq, " (ipfix)"); else seq_printf(seq, " (netflow)"); if (protocol >= 9) seq_printf(seq, ", refresh-rate %u, timeout-rate %u, (templates %d, active %d).\n", refresh_rate, timeout_rate, tpl_gen_count, tpl_count); else seq_printf(seq, "\n"); seq_printf(seq, "Timeouts: active %ds, inactive %ds. Maxflows %u\n", active_timeout, inactive_timeout, maxflows); for_each_present_cpu(cpu) { struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu); t.searched += st->searched; t.found += st->found; t.notfound += st->notfound; t.pkt_total += st->pkt_total; t.traf_total += st->traf_total; #ifdef ENABLE_PROMISC t.pkt_promisc += st->pkt_promisc; t.pkt_promisc_drop += st->pkt_promisc_drop; #endif t.truncated += st->truncated; t.frags += st->frags; t.maxflows_err += st->maxflows_err; t.alloc_err += st->alloc_err; t.send_failed += st->send_failed; t.sock_cberr += st->sock_cberr; t.exported_rate += st->exported_rate; t.exported_pkt += st->exported_pkt; t.exported_flow += st->exported_flow; t.exported_traf += st->exported_traf; t.pkt_total_rate += st->pkt_total_rate; t.pkt_drop += st->pkt_drop; t.traf_drop += st->traf_drop; t.pkt_lost += st->pkt_lost; t.traf_lost += st->traf_lost; t.flow_lost += st->flow_lost; t.pkt_out += st->pkt_out; t.traf_out += st->traf_out; #ifdef ENABLE_SAMPLER t.pkts_observed += st->pkts_observed; t.pkts_selected += st->pkts_selected; #endif } #ifdef ENABLE_SAMPLER if (get_sampler_mode()) { seq_printf(seq, "Flow sampling mode %s one-out-of %u.", sampler_mode_string(), get_sampler_interval()); if (get_sampler_mode() != SAMPLER_HASH) seq_printf(seq, " Flows selected %lu, discarded %lu.", atomic64_read(&flows_selected), atomic64_read(&flows_observed) - atomic64_read(&flows_selected)); else seq_printf(seq, " Flows selected %lu.", atomic64_read(&flows_selected)); seq_printf(seq, " Pkts selected %llu, discarded %llu.\n", t.pkts_selected, t.pkts_observed - t.pkts_selected); } else seq_printf(seq, "Flow sampling is disabled.\n"); #endif #ifdef ENABLE_PROMISC seq_printf(seq, "Promisc hack is %s (observed %llu packets, discarded %llu).\n", promisc? "enabled" : "disabled", t.pkt_promisc, t.pkt_promisc_drop); #endif #ifdef CONFIG_NF_NAT_NEEDED seq_printf(seq, "Natevents %s, count start %lu, stop %lu.\n", natevents? "enabled" : "disabled", nat_events_start, nat_events_stop); #endif seq_printf(seq, "Flows: active %u (peak %u reached %ud%uh%um ago), mem %uK, worker delay %d/%d" " [%d..%d] (%u ms, %u us, %u:%u" #ifdef HAVE_LLIST " %u" #endif " [cpu%u]).\n", nr_flows, peakflows, peak / (60 * 60 * 24), (peak / (60 * 60)) % 24, (peak / 60) % 60, (unsigned int)(((unsigned long)nr_flows * sizeof(struct ipt_netflow) + (unsigned long)htable_size * sizeof(struct hlist_head)) >> 10), worker_delay, HZ, scan_min, scan_max, jiffies_to_msecs(jiffies - wk_start), jiffies_to_usecs(wk_busy), wk_count, wk_trylock, #ifdef HAVE_LLIST wk_llist, #endif wk_cpu); seq_printf(seq, "Hash: size %u (mem %uK), metric %d.%02d [%d.%02d, %d.%02d, %d.%02d]." " InHash: %llu pkt, %llu K, InPDU %llu, %llu.\n", htable_size, (unsigned int)((htable_size * sizeof(struct hlist_head)) >> 10), FFLOAT(metric, 100), FFLOAT(min_metric, 100), FFLOAT(min5_metric, 100), FFLOAT(min15_metric, 100), t.pkt_total - t.pkt_out, (t.traf_total - t.traf_out) >> 10, pdu_packets, pdu_traf); seq_printf(seq, "Rate: %llu bits/sec, %llu packets/sec;" " Avg 1 min: %llu bps, %llu pps; 5 min: %llu bps, %llu pps\n", sec_brate, sec_prate, min_brate, min_prate, min5_brate, min5_prate); seq_printf(seq, "cpu# pps; ," " traffic: , drop: \n"); seq_printf(seq, "Total %6u; %6llu %6llu %6llu [%d.%02d], %4u %4u %4u %4u," " traffic: %llu, %llu MB, drop: %llu, %llu K\n", t.pkt_total_rate, t.searched, t.found, t.notfound, FFLOAT(SAFEDIV(100LL * (t.searched + t.found + t.notfound), (t.found + t.notfound)), 100), t.truncated, t.frags, t.alloc_err, t.maxflows_err, t.pkt_total, t.traf_total >> 20, t.pkt_drop, t.traf_drop >> 10); if (num_present_cpus() > 1) { for_each_present_cpu(cpu) { struct ipt_netflow_stat *st; st = &per_cpu(ipt_netflow_stat, cpu); seq_printf(seq, "cpu%-2u %6u; %6llu %6llu %6llu [%d.%02d], %4u %4u %4u %4u," " traffic: %llu, %llu MB, drop: %llu, %llu K\n", cpu, st->pkt_total_rate, st->searched, st->found, st->notfound, FFLOAT(st->metric, 100), st->truncated, st->frags, st->alloc_err, st->maxflows_err, st->pkt_total, st->traf_total >> 20, st->pkt_drop, st->traf_drop >> 10); } } seq_printf(seq, "Export: Rate %u bytes/s; Total %llu pkts, %llu MB, %llu flows;" " Errors %u pkts; Traffic lost %llu pkts, %llu Kbytes, %llu flows.\n", t.exported_rate, t.exported_pkt, t.exported_traf >> 20, t.exported_flow, t.send_failed, t.pkt_lost, t.traf_lost >> 10, t.flow_lost); mutex_lock(&sock_lock); list_for_each_entry(usock, &usock_list, list) { seq_printf(seq, "sock%d: %s", snum, print_usock_addr(usock)); if (usock->sock) { struct sock *sk = usock->sock->sk; seq_printf(seq, ", sndbuf %u, filled %u, peak %u;" " err: sndbuf reached %u, connect %u, cberr %u, other %u\n", sk->sk_sndbuf, compat_refcount_read(&sk->sk_wmem_alloc), atomic_read(&usock->wmem_peak), usock->err_full, usock->err_connect, usock->err_cberr, usock->err_other); } else seq_printf(seq, " unconnected (%u attempts).\n", usock->err_connect); snum++; } mutex_unlock(&sock_lock); #ifdef ENABLE_AGGR read_lock_bh(&aggr_lock); snum = 0; list_for_each_entry(aggr_n, &aggr_n_list, list) { seq_printf(seq, "aggr#%d net: match %u.%u.%u.%u/%d strip %d (usage %u)\n", snum, HIPQUAD(aggr_n->addr), mask2bits(aggr_n->mask), mask2bits(aggr_n->aggr_mask), atomic_read(&aggr_n->usage)); snum++; } snum = 0; list_for_each_entry(aggr_p, &aggr_p_list, list) { seq_printf(seq, "aggr#%d port: ports %u-%u replace %u (usage %u)\n", snum, aggr_p->port1, aggr_p->port2, aggr_p->aggr_port, atomic_read(&aggr_p->usage)); snum++; } read_unlock_bh(&aggr_lock); #endif #ifdef SNMP_RULES { const unsigned char *rules; snum = 0; rcu_read_lock(); rules = rcu_dereference(snmp_ruleset); if (rules) while (*rules) { const unsigned int len = *rules++; seq_printf(seq, "SNMP-rule#%d: prefix '%.*s' map to %d\n", snum, len, rules, (rules[len] << 8) + rules[len + 1]); rules += len + 2; ++snum; } rcu_read_unlock(); } #endif return 0; } static int nf_seq_open(struct inode *inode, struct file *file) { return single_open(file, nf_seq_show, NULL); } static int snmp_seq_open(struct inode *inode, struct file *file) { return single_open(file, snmp_seq_show, NULL); } #ifdef HAVE_PROC_OPS static struct proc_ops nf_seq_fops = { .proc_open = nf_seq_open, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = single_release, }; #else static struct file_operations nf_seq_fops = { .owner = THIS_MODULE, .open = nf_seq_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; #endif #ifdef HAVE_PROC_OPS static struct proc_ops snmp_seq_fops = { .proc_open = snmp_seq_open, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = single_release, }; #else static struct file_operations snmp_seq_fops = { .owner = THIS_MODULE, .open = snmp_seq_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; #endif static inline int inactive_needs_export(const struct ipt_netflow *nf, const long i_timeout, const unsigned long jiff); static inline int active_needs_export(const struct ipt_netflow *nf, const long a_timeout, const unsigned long jiff); static inline u_int32_t hash_netflow(const struct ipt_netflow_tuple *tuple); struct flows_dump_private { int pcache; /* pos */ void *vcache; /* corresponding pointer for pos */ int stripe; /* current stripe */ struct list_head list; /* copy of stripe */ int alloc_errors; }; /* deallocate copied stripe */ static void nf_free_stripe(struct list_head *list) { struct ipt_netflow *cf, *tmp; list_for_each_entry_safe(cf, tmp, list, flows_list) { kmem_cache_free(ipt_netflow_cachep, cf); } INIT_LIST_HEAD(list); } /* quickly clone stripe into flows_dump_private then it can be walked slowly * and lockless */ static void __nf_copy_stripe(struct flows_dump_private *st, const struct list_head *list) { const struct ipt_netflow *nf; struct ipt_netflow *cf; nf_free_stripe(&st->list); list_for_each_entry(nf, list, flows_list) { cf = kmem_cache_alloc(ipt_netflow_cachep, GFP_ATOMIC); if (!cf) { st->alloc_errors++; continue; } memcpy(cf, nf, sizeof(*cf)); list_add(&cf->flows_list, &st->list); } } /* nstripe is desired stripe, in st->stripe will be recorded actual stripe used * (with empty stripes skipped), -1 is there is no valid stripes anymore, * return first element in stripe list or NULL */ static struct list_head *nf_get_stripe(struct flows_dump_private *st, int nstripe) { read_lock_bh(&htable_rwlock); for (; nstripe < LOCK_COUNT; nstripe++) { struct stripe_entry *stripe = &htable_stripes[nstripe]; spin_lock(&stripe->lock); if (!list_empty(&stripe->list)) { st->stripe = nstripe; __nf_copy_stripe(st, &stripe->list); spin_unlock(&stripe->lock); read_unlock_bh(&htable_rwlock); return st->list.next; } spin_unlock(&stripe->lock); } read_unlock_bh(&htable_rwlock); st->stripe = -1; return NULL; } /* simply next element in flows list or NULL */ static struct list_head *nf_get_next(struct flows_dump_private *st, struct list_head *head) { if (head == SEQ_START_TOKEN) return nf_get_stripe(st, 0); if (st->stripe < 0) return NULL; /* next element */ if (!list_is_last(head, &st->list)) return head->next; /* next bucket */ return nf_get_stripe(st, st->stripe + 1); } /* seq_file could arbitrarily start/stop iteration as it feels need, * so, I try to cache things to (significantly) speed it up. */ static void *flows_dump_seq_start(struct seq_file *seq, loff_t *pos) { struct flows_dump_private *st = seq->private; int ppos = *pos; struct list_head *lh; if (!ppos) { /* first */ st->pcache = 0; st->vcache = SEQ_START_TOKEN; return st->vcache; } if (ppos >= st->pcache) { /* can iterate forward */ ppos -= st->pcache; lh = st->vcache; } else /* can't, start from 0 */ lh = SEQ_START_TOKEN; /* iterate forward */ while (ppos--) lh = nf_get_next(st, lh); st->pcache = *pos; st->vcache = lh; return st->vcache; } static void *flows_dump_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct flows_dump_private *st = seq->private; st->pcache = ++*pos; st->vcache = nf_get_next(st, (struct list_head *)v); return st->vcache; } static void flows_dump_seq_stop(struct seq_file *seq, void *v) { } /* To view this: cat /sys/kernel/debug/netflow_dump */ static int flows_dump_seq_show(struct seq_file *seq, void *v) { struct flows_dump_private *st = seq->private; const long i_timeout = inactive_timeout * HZ; const long a_timeout = active_timeout * HZ; const struct ipt_netflow *nf; if (v == SEQ_START_TOKEN) { seq_printf(seq, "# hash a dev:i,o" #ifdef SNMP_RULES " snmp:i,o" #endif #ifdef ENABLE_MAC " mac:src,dst" #endif #ifdef ENABLE_VLAN " vlan" #endif #if defined(ENABLE_MAC) || defined(ENABLE_VLAN) " type" #endif " proto src:ip,port dst:ip,port nexthop" " tos,tcpflags,options,tcpoptions" " packets bytes ts:first,last\n"); return 0; } nf = list_entry(v, struct ipt_netflow, flows_list); seq_printf(seq, "%d %04x %x", st->pcache, hash_netflow(&nf->tuple), (!!inactive_needs_export(nf, i_timeout, jiffies)) | (active_needs_export(nf, a_timeout, jiffies) << 1)); seq_printf(seq, " %hd,%hd", nf->tuple.i_ifc, nf->o_ifc); #ifdef SNMP_RULES seq_printf(seq, " %hd,%hd", nf->i_ifcr, nf->o_ifcr); #endif #ifdef ENABLE_MAC seq_printf(seq, " %pM,%pM", &nf->tuple.h_src, &nf->tuple.h_dst); #endif #ifdef ENABLE_VLAN if (nf->tuple.tag[0]) { seq_printf(seq, " %d", ntohs(nf->tuple.tag[0])); if (nf->tuple.tag[1]) seq_printf(seq, ",%d", ntohs(nf->tuple.tag[1])); } #endif #if defined(ENABLE_MAC) || defined(ENABLE_VLAN) seq_printf(seq, " %04x", ntohs(nf->ethernetType)); #endif seq_printf(seq, " %u ", nf->tuple.protocol); if (nf->tuple.l3proto == AF_INET) { seq_printf(seq, "%pI4n,%u %pI4n,%u %pI4n", &nf->tuple.src, ntohs(nf->tuple.s_port), &nf->tuple.dst, ntohs(nf->tuple.d_port), &nf->nh); } else if (nf->tuple.l3proto == AF_INET6) { seq_printf(seq, "%pI6c,%u %pI6c,%u %pI6c", &nf->tuple.src, ntohs(nf->tuple.s_port), &nf->tuple.dst, ntohs(nf->tuple.d_port), &nf->nh); } else { seq_puts(seq, "?,? ?,? ?"); } seq_printf(seq, " %x,%x,%x,%x", nf->tuple.tos, nf->tcp_flags, nf->options, nf->tcpoptions); seq_printf(seq, " %u %u %lu,%lu\n", nf->nr_packets, nf->nr_bytes, jiffies - nf->nf_ts_first, jiffies - nf->nf_ts_last); return 0; } static struct seq_operations flows_dump_seq_ops = { .start = flows_dump_seq_start, .show = flows_dump_seq_show, .next = flows_dump_seq_next, .stop = flows_dump_seq_stop, }; static int flows_seq_open(struct inode *inode, struct file *file) { struct flows_dump_private *st; char *buf; const size_t size = 4 * PAGE_SIZE; buf = kmalloc(size, GFP_KERNEL); if (!buf) return -ENOMEM; st = __seq_open_private(file, &flows_dump_seq_ops, sizeof(struct flows_dump_private)); if (!st) { kfree(buf); return -ENOMEM; } INIT_LIST_HEAD(&st->list); /* speed up seq interface with bigger buffer */ ((struct seq_file *)file->private_data)->buf = buf; ((struct seq_file *)file->private_data)->size = size; return 0; } static int flows_seq_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; struct flows_dump_private *st = seq->private; nf_free_stripe(&st->list); if (st->alloc_errors) printk(KERN_INFO "ipt_NETFLOW: alloc_errors %d\n", st->alloc_errors); return seq_release_private(inode, file); } #ifdef HAVE_PROC_OPS static struct proc_ops flows_seq_fops = { .proc_open = flows_seq_open, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = flows_seq_release, }; #else static struct file_operations flows_seq_fops = { .owner = THIS_MODULE, .open = flows_seq_open, .read = seq_read, .llseek = seq_lseek, .release = flows_seq_release, }; #endif #endif /* CONFIG_PROC_FS */ #ifdef ENABLE_PROMISC static int promisc_finish( #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0) struct net *net, #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) || \ (defined(RHEL_MAJOR) && RHEL_MAJOR == 7 && RHEL_MINOR >= 2) struct sock *sk, #endif struct sk_buff *skb) { /* don't pass to the routing */ kfree_skb(skb); return NET_RX_DROP; } static int promisc4_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { const struct iphdr *iph; u32 len; /* clone skb and do basic IPv4 sanity checking and preparations * for L3, this is quick and dirty version of ip_rcv() */ if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto drop; iph = ip_hdr(skb); if (iph->ihl < 5 || iph->version != 4) goto drop; if (!pskb_may_pull(skb, iph->ihl*4)) goto drop; iph = ip_hdr(skb); if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) goto drop; len = ntohs(iph->tot_len); if (skb->len < len) goto drop; else if (len < (iph->ihl*4)) goto drop; if (pskb_trim_rcsum(skb, len)) goto drop; skb->transport_header = skb->network_header + iph->ihl*4; memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); skb_orphan(skb); return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0) dev_net(dev), #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) || (defined(RHEL_MAJOR) && RHEL_MAJOR == 7 && RHEL_MINOR > 1) NULL, #endif skb, dev, NULL, promisc_finish); drop: NETFLOW_STAT_INC(pkt_promisc_drop); kfree_skb(skb); return NET_RX_DROP; } static int promisc6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { const struct ipv6hdr *hdr; u32 pkt_len; struct inet6_dev *idev; /* quick and dirty version of ipv6_rcv(), basic sanity checking * and preparation of skb for later processing */ rcu_read_lock(); idev = __in6_dev_get(skb->dev); if (!idev || unlikely(idev->cnf.disable_ipv6)) goto drop; memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); IP6CB(skb)->iif = skb_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex; if (unlikely(!pskb_may_pull(skb, sizeof(*hdr)))) goto drop; hdr = ipv6_hdr(skb); if (hdr->version != 6) goto drop; if (!(dev->flags & IFF_LOOPBACK) && ipv6_addr_loopback(&hdr->daddr)) goto drop; if (!(skb->pkt_type == PACKET_LOOPBACK || dev->flags & IFF_LOOPBACK) && ipv6_addr_is_multicast(&hdr->daddr) && IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1) goto drop; if (ipv6_addr_is_multicast(&hdr->daddr) && IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 0) goto drop; if (ipv6_addr_is_multicast(&hdr->saddr)) goto drop; skb->transport_header = skb->network_header + sizeof(*hdr); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); pkt_len = ntohs(hdr->payload_len); if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { if (pkt_len + sizeof(struct ipv6hdr) > skb->len) goto drop; if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) goto drop; hdr = ipv6_hdr(skb); } if (hdr->nexthdr == NEXTHDR_HOP) { int optlen; /* ipv6_parse_hopopts() is not exported by kernel. * I don't really need to parse hop options, since packets * are not routed, nor terminated, but I keep calculations * in case other code depend on it. */ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) || !pskb_may_pull(skb, (sizeof(struct ipv6hdr) + ((skb_transport_header(skb)[1] + 1) << 3)))) goto drop; optlen = (skb_transport_header(skb)[1] + 1) << 3; if (skb_transport_offset(skb) + optlen > skb_headlen(skb)) goto drop; skb->transport_header += optlen; IP6CB(skb)->nhoff = sizeof(struct ipv6hdr); } rcu_read_unlock(); skb_orphan(skb); return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0) dev_net(dev), #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) || (defined(RHEL_MAJOR) && RHEL_MAJOR == 7 && RHEL_MINOR > 1) NULL, #endif skb, dev, NULL, promisc_finish); drop: rcu_read_unlock(); NETFLOW_STAT_INC(pkt_promisc_drop); kfree_skb(skb); return NET_RX_DROP; } /* source is skb_network_protocol() and __vlan_get_protocol() */ static __be16 __skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; unsigned int vlan_depth; if (type == htons(ETH_P_TEB)) { struct ethhdr *eth; if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) return 0; eth = (struct ethhdr *)skb_mac_header(skb); type = eth->h_proto; } vlan_depth = skb->mac_len; if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { if (vlan_depth) { if (WARN_ON(vlan_depth < VLAN_HLEN)) return 0; vlan_depth -= VLAN_HLEN; } else { vlan_depth = ETH_HLEN; } do { struct vlan_hdr *vh; if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) return 0; vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; } while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)); } *depth = vlan_depth; return type; } static int promisc_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { /* what is not PACKET_OTHERHOST will be processed normally */ if (skb->pkt_type != PACKET_OTHERHOST) goto out; NETFLOW_STAT_INC(pkt_promisc); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) goto drop; /* Note about vlans: * - older kernels will pass raw packet; * - newer kernes (since 3.0) will have one vlan tag * physically stripped out of the packet, and it will * be saved into skb->vlan_tci. skb->protocol will be * untagged etherType. */ if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || skb->protocol == cpu_to_be16(ETH_P_8021AD)) { int vlan_depth = skb->mac_len; skb_push(skb, skb->data - skb_mac_header(skb)); skb->protocol = __skb_network_protocol(skb, &vlan_depth); skb_pull(skb, vlan_depth); skb_reset_network_header(skb); skb_reset_mac_len(skb); } # ifdef PROMISC_MPLS if (eth_p_mpls(skb->protocol)) { size_t stack_len = 0; const struct mpls_label *mpls; do { stack_len += MPLS_HLEN; if (unlikely(!pskb_may_pull(skb, stack_len))) goto drop; mpls = (struct mpls_label *)(skb->data + stack_len - MPLS_HLEN); } while (!(mpls->entry & htonl(MPLS_LS_S_MASK))); skb_pull(skb, stack_len); skb_reset_network_header(skb); if (!pskb_may_pull(skb, 1)) goto drop; switch (ip_hdr(skb)->version) { case 4: skb->protocol = htons(ETH_P_IP); break; case 6: skb->protocol = htons(ETH_P_IPV6); break; default: goto drop; } } # endif switch (skb->protocol) { case htons(ETH_P_IP): return promisc4_rcv(skb, dev, pt, orig_dev); case htons(ETH_P_IPV6): return promisc6_rcv(skb, dev, pt, orig_dev); } drop: NETFLOW_STAT_INC(pkt_promisc_drop); out: kfree_skb(skb); return 0; } static struct packet_type promisc_packet_type __read_mostly = { .type = htons(ETH_P_ALL), .func = promisc_rcv, }; /* should not have promisc passed as parameter */ static int switch_promisc(int newpromisc) { newpromisc = !!newpromisc; mutex_lock(&promisc_lock); if (newpromisc == promisc) goto unlock; if (newpromisc) dev_add_pack(&promisc_packet_type); else dev_remove_pack(&promisc_packet_type); printk(KERN_INFO "ipt_NETFLOW: promisc hack is %s\n", newpromisc? "enabled" : "disabled"); promisc = newpromisc; unlock: mutex_unlock(&promisc_lock); return 0; } #endif #ifdef CONFIG_SYSCTL /* sysctl /proc/sys/net/netflow */ static int hsize_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,) void __user *buffer, size_t *lenp, loff_t *fpos) { int ret, hsize; ctl_table_no_const lctl = *ctl; if (write) lctl.data = &hsize; ret = proc_dointvec(&lctl, write, BEFORE2632(filp,) buffer, lenp, fpos); if (write) { if (hsize < LOCK_COUNT) return -EPERM; return set_hashsize(hsize)?:ret; } else return ret; } static int sndbuf_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,) void __user *buffer, size_t *lenp, loff_t *fpos) { int ret; struct ipt_netflow_sock *usock; ctl_table_no_const lctl = *ctl; mutex_lock(&sock_lock); if (list_empty(&usock_list)) { mutex_unlock(&sock_lock); return -ENOENT; } usock = list_first_entry(&usock_list, struct ipt_netflow_sock, list); if (usock->sock) sndbuf = usock->sock->sk->sk_sndbuf; mutex_unlock(&sock_lock); lctl.data = &sndbuf; ret = proc_dointvec(&lctl, write, BEFORE2632(filp,) buffer, lenp, fpos); if (!write) return ret; if (sndbuf < SOCK_MIN_SNDBUF) sndbuf = SOCK_MIN_SNDBUF; pause_scan_worker(); mutex_lock(&sock_lock); list_for_each_entry(usock, &usock_list, list) { if (usock->sock) usock->sock->sk->sk_sndbuf = sndbuf; } mutex_unlock(&sock_lock); cont_scan_worker(); return ret; } static void free_templates(void); static int destination_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,) void __user *buffer, size_t *lenp, loff_t *fpos) { int ret; ret = proc_dostring(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos); if (ret >= 0 && write) { pause_scan_worker(); destination_removeall(); add_destinations(destination_buf); free_templates(); cont_scan_worker(); } return ret; } #ifdef ENABLE_AGGR static int aggregation_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,) void __user *buffer, size_t *lenp, loff_t *fpos) { int ret; if (debug > 1) printk(KERN_INFO "aggregation_procctl (%d) %u %llu\n", write, (unsigned int)(*lenp), *fpos); ret = proc_dostring(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos); if (ret >= 0 && write) add_aggregation(aggregation_buf); return ret; } #endif #ifdef ENABLE_PROMISC static int promisc_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,) void __user *buffer, size_t *lenp, loff_t *fpos) { int newpromisc = promisc; int ret; ctl_table_no_const lctl = *ctl; lctl.data = &newpromisc; ret = proc_dointvec(&lctl, write, BEFORE2632(filp,) buffer, lenp, fpos); if (ret < 0 || !write) return ret; return switch_promisc(newpromisc); } #endif #ifdef ENABLE_SAMPLER static int parse_sampler(char *ptr); static int sampler_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,) void __user *buffer, size_t *lenp, loff_t *fpos) { int ret; if (debug > 1) printk(KERN_INFO "sampler_procctl (%d) %u %llu\n", write, (unsigned int)(*lenp), *fpos); ret = proc_dostring(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos); if (ret >= 0 && write) { int cpu; pause_scan_worker(); netflow_scan_and_export(AND_FLUSH); /* paused for sampling_code reads to be consistent */ ret = parse_sampler(sampler_buf); /* resend templates */ ts_sampler_last = 0; /* zero stat */ atomic64_set(&flows_observed, 0); atomic64_set(&flows_selected, 0); for_each_present_cpu(cpu) { struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu); st->pkts_selected = 0; st->pkts_observed = 0; } cont_scan_worker(); } return ret; } #endif #ifdef SNMP_RULES static int add_snmp_rules(char *ptr); static int snmp_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,) void __user *buffer, size_t *lenp, loff_t *fpos) { int ret; if (debug > 1) printk(KERN_INFO "snmp_procctl (%d) %u %llu\n", write, (unsigned int)(*lenp), *fpos); ret = proc_dostring(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos); if (ret >= 0 && write) return add_snmp_rules(snmp_rules_buf); return ret; } #endif static void clear_ipt_netflow_stat(void) { int cpu; for_each_present_cpu(cpu) { struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu); memset(st, 0, sizeof(*st)); st->metric = METRIC_DFL; } } static int flush_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,) void __user *buffer, size_t *lenp, loff_t *fpos) { int ret; int val = 0; ctl_table_no_const lctl = *ctl; lctl.data = &val; ret = proc_dointvec(&lctl, write, BEFORE2632(filp,) buffer, lenp, fpos); if (!write) return ret; if (val > 0) { char *stat = ""; pause_scan_worker(); netflow_scan_and_export(AND_FLUSH); if (val > 1) { clear_ipt_netflow_stat(); stat = " (reset stat counters)"; } printk(KERN_INFO "ipt_NETFLOW: forced flush%s.\n", stat); cont_scan_worker(); } return ret; } static int protocol_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,) void __user *buffer, size_t *lenp, loff_t *fpos) { int ret; int ver = protocol; ctl_table_no_const lctl = *ctl; lctl.data = &ver; ret = proc_dointvec(&lctl, write, BEFORE2632(filp,) buffer, lenp, fpos); if (!write) return ret; switch (ver) { case 5: case 9: case 10: printk(KERN_INFO "ipt_NETFLOW: forced flush (protocol version change)\n"); pause_scan_worker(); netflow_scan_and_export(AND_FLUSH); netflow_switch_version(ver); cont_scan_worker(); break; default: return -EPERM; } return ret; } #ifdef CONFIG_NF_NAT_NEEDED static void register_ct_events(void); static void unregister_ct_events(void); static int natevents_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,) void __user *buffer, size_t *lenp, loff_t *fpos) { int ret; int val = natevents; ctl_table_no_const lctl = *ctl; lctl.data = &val; ret = proc_dointvec(&lctl, write, BEFORE2632(filp,) buffer, lenp, fpos); if (!write) return ret; if (natevents && !val) unregister_ct_events(); else if (!natevents && val) register_ct_events(); return ret; } #endif static struct ctl_table_header *netflow_sysctl_header; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) #define _CTL_NAME(x) .ctl_name = x, static void ctl_table_renumber(ctl_table *table) { int c; for (c = 1; table->procname; table++, c++) table->ctl_name = c; } #else #define _CTL_NAME(x) #define ctl_table_renumber(x) #endif static ctl_table netflow_sysctl_table[] = { { .procname = "active_timeout", .mode = 0644, .data = &active_timeout, .maxlen = sizeof(int), .proc_handler = &proc_dointvec, }, { .procname = "inactive_timeout", .mode = 0644, .data = &inactive_timeout, .maxlen = sizeof(int), .proc_handler = &proc_dointvec, }, { .procname = "debug", .mode = 0644, .data = &debug, .maxlen = sizeof(int), .proc_handler = &proc_dointvec, }, { .procname = "hashsize", .mode = 0644, .data = &htable_size, .maxlen = sizeof(int), .proc_handler = &hsize_procctl, }, { .procname = "sndbuf", .mode = 0644, .maxlen = sizeof(int), .proc_handler = &sndbuf_procctl, }, { .procname = "destination", .mode = 0644, .data = &destination_buf, .maxlen = sizeof(destination_buf), .proc_handler = &destination_procctl, }, #ifdef ENABLE_AGGR { .procname = "aggregation", .mode = 0644, .data = &aggregation_buf, .maxlen = sizeof(aggregation_buf), .proc_handler = &aggregation_procctl, }, #endif { .procname = "maxflows", .mode = 0644, .data = &maxflows, .maxlen = sizeof(int), .proc_handler = &proc_dointvec, }, { .procname = "flush", .mode = 0644, .maxlen = sizeof(int), .proc_handler = &flush_procctl, }, { .procname = "protocol", .mode = 0644, .maxlen = sizeof(int), .proc_handler = &protocol_procctl, }, { .procname = "refresh-rate", .mode = 0644, .data = &refresh_rate, .maxlen = sizeof(int), .proc_handler = &proc_dointvec, }, { .procname = "timeout-rate", .mode = 0644, .data = &timeout_rate, .maxlen = sizeof(int), .proc_handler = &proc_dointvec, }, #ifdef ENABLE_PROMISC { .procname = "promisc", .mode = 0644, .data = &promisc, .maxlen = sizeof(int), .proc_handler = &promisc_procctl, }, #endif #ifdef ENABLE_SAMPLER { .procname = "sampler", .mode = 0644, .data = &sampler_buf, .maxlen = sizeof(sampler_buf), .proc_handler = &sampler_procctl, }, #endif { .procname = "scan-min", .mode = 0644, .data = &scan_min, .maxlen = sizeof(int), .proc_handler = &proc_dointvec_minmax, .extra1 = &one, .extra2 = &scan_max, }, #ifdef SNMP_RULES { .procname = "snmp-rules", .mode = 0644, .data = &snmp_rules_buf, .maxlen = sizeof(snmp_rules_buf), .proc_handler = &snmp_procctl, }, #endif #ifdef CONFIG_NF_NAT_NEEDED { .procname = "natevents", .mode = 0644, .maxlen = sizeof(int), .proc_handler = &natevents_procctl, }, #endif { } }; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) static ctl_table netflow_sysctl_root[] = { { _CTL_NAME(33) .procname = "netflow", .mode = 0555, .child = netflow_sysctl_table, }, { } }; static ctl_table netflow_net_table[] = { { .ctl_name = CTL_NET, .procname = "net", .mode = 0555, .child = netflow_sysctl_root, }, { } }; #else /* >= 2.6.25 */ static struct ctl_path netflow_sysctl_path[] = { { .procname = "net", #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) .ctl_name = CTL_NET #endif }, { .procname = "netflow" }, { } }; #endif /* 2.6.25 */ #endif /* CONFIG_SYSCTL */ /* socket code */ static void sk_error_report(struct sock *sk) { struct ipt_netflow_sock *usock; /* clear connection refused errors if any */ if (debug > 1) printk(KERN_INFO "ipt_NETFLOW: socket error <%d>\n", sk->sk_err); sk->sk_err = 0; usock = sk->sk_user_data; if (usock) usock->err_cberr++; NETFLOW_STAT_INC(sock_cberr); /* It's theoretically possible to determine to which datagram this reply is, * because ICMP message frequently includes header of erroneous packet, but * this is not that reliable - packets could be spoofed, and requires keeping * book of sent packets. */ return; } static struct socket *usock_open_sock(struct ipt_netflow_sock *usock) { struct socket *sock; int error; int salen = 0; if ((error = sock_create_kern(usock->addr.ss_family, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { printk(KERN_ERR "ipt_NETFLOW: sock_create_kern error %d\n", -error); return NULL; } sock->sk->sk_allocation = GFP_ATOMIC; sock->sk->sk_prot->unhash(sock->sk); /* hidden from input */ sock->sk->sk_error_report = &sk_error_report; /* clear ECONNREFUSED */ sock->sk->sk_user_data = usock; sock->sk->sk_reuse = SK_CAN_REUSE; if (usock->sdev[0]) { struct net_device *dev = dev_get_by_name(&init_net, usock->sdev); if (dev) { struct sock *sk = sock->sk; /* SO_BINDTOIFINDEX */ sk->sk_bound_dev_if = dev->ifindex; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36) if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); #endif sk_dst_reset(sk); dev_put(dev); } else { printk(KERN_ERR "ipt_NETFLOW: error binding to device %s, errno %d\n", usock->sdev, -error); goto err_free_sock; } } if (!is_zero_addr(&usock->saddr)) { if (usock->saddr.ss_family == AF_INET) salen = sizeof(struct sockaddr_in); else if (usock->saddr.ss_family == AF_INET6) salen = sizeof(struct sockaddr_in6); if ((error = sock->ops->bind(sock, (struct sockaddr *)&usock->saddr, salen)) < 0) { printk(KERN_ERR "ipt_NETFLOW: error binding socket %d\n", -error); goto err_free_sock; } } if (sndbuf) sock->sk->sk_sndbuf = sndbuf; else sndbuf = sock->sk->sk_sndbuf; error = sock->ops->connect(sock, (struct sockaddr *)&usock->addr, sizeof(usock->addr), 0); if (error < 0) { printk(KERN_ERR "ipt_NETFLOW: error connecting UDP socket %d," " don't worry, will try reconnect later.\n", -error); /* ENETUNREACH when no interfaces */ goto err_free_sock; } return sock; err_free_sock: sock_release(sock); return NULL; } static void usock_connect(struct ipt_netflow_sock *usock, const int sendmsg) { usock->sock = usock_open_sock(usock); if (usock->sock) { if (sendmsg || debug) printk(KERN_INFO "ipt_NETFLOW: connected %s\n", print_sockaddr(&usock->addr)); } else { usock->err_connect++; if (debug) printk(KERN_INFO "ipt_NETFLOW: connect to %s failed%s.\n", print_sockaddr(&usock->addr), (sendmsg)? " (pdu lost)" : ""); } atomic_set(&usock->wmem_peak, 0); usock->err_full = 0; usock->err_other = 0; } static void usock_close(struct ipt_netflow_sock *usock) { if (usock->sock) sock_release(usock->sock); usock->sock = NULL; } ktime_t ktime_get_real(void); // return numbers of sends succeeded, 0 if none /* only called in scan worker path */ static void netflow_sendmsg(void *buffer, const int len) { struct msghdr msg = { .msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL }; struct kvec iov = { buffer, len }; int retok = 0, ret; int snum = 0; struct ipt_netflow_sock *usock; mutex_lock(&sock_lock); list_for_each_entry(usock, &usock_list, list) { usock->pkt_exp++; usock->bytes_exp += len; if (!usock->sock) usock_connect(usock, 1); if (!usock->sock) { NETFLOW_STAT_INC(send_failed); usock->pkt_fail++; continue; } if (debug) printk(KERN_INFO "netflow_sendmsg: sendmsg(%d, %d) [%u %u]\n", snum, len, compat_refcount_read(&usock->sock->sk->sk_wmem_alloc), usock->sock->sk->sk_sndbuf); ret = kernel_sendmsg(usock->sock, &msg, &iov, 1, (size_t)len); if (ret < 0) { char *suggestion = ""; NETFLOW_STAT_INC(send_failed); usock->pkt_fail++; if (ret == -EAGAIN) { usock->err_full++; suggestion = ": increase sndbuf!"; } else { usock->err_other++; if (ret == -ENETUNREACH) { suggestion = ": network is unreachable."; } else if (ret == -EINVAL) { usock_close(usock); suggestion = ": will reconnect."; } } if (debug) printk(KERN_ERR "ipt_NETFLOW: sendmsg[%d] error %d: data loss %llu pkt, %llu bytes%s\n", snum, ret, pdu_packets, pdu_traf, suggestion); else printk_ratelimited(KERN_ERR "ipt_NETFLOW: sendmsg[%d] error %d: %s\n", snum, ret, suggestion); } else { unsigned int wmem = compat_refcount_read(&usock->sock->sk->sk_wmem_alloc); if (wmem > atomic_read(&usock->wmem_peak)) atomic_set(&usock->wmem_peak, wmem); NETFLOW_STAT_INC(exported_pkt); NETFLOW_STAT_ADD(exported_traf, ret); usock->pkt_sent++; retok++; } snum++; } mutex_unlock(&sock_lock); if (retok == 0) { /* not least one send succeeded, account stat for dropped packets */ NETFLOW_STAT_ADD(pkt_lost, pdu_packets); NETFLOW_STAT_ADD(traf_lost, pdu_traf); NETFLOW_STAT_ADD(flow_lost, pdu_flow_records); NETFLOW_STAT_TS(lost); } else { NETFLOW_STAT_ADD(exported_flow, pdu_flow_records); } } static void usock_close_free(struct ipt_netflow_sock *usock) { printk(KERN_INFO "ipt_NETFLOW: removed destination %s\n", print_sockaddr(&usock->addr)); usock_close(usock); vfree(usock); } static void destination_removeall(void) { mutex_lock(&sock_lock); while (!list_empty(&usock_list)) { struct ipt_netflow_sock *usock; usock = list_entry(usock_list.next, struct ipt_netflow_sock, list); list_del(&usock->list); mutex_unlock(&sock_lock); usock_close_free(usock); mutex_lock(&sock_lock); } mutex_unlock(&sock_lock); } static void add_usock(struct ipt_netflow_sock *usock) { struct ipt_netflow_sock *sk; mutex_lock(&sock_lock); /* don't need duplicated sockets */ list_for_each_entry(sk, &usock_list, list) { if (sockaddr_cmp(&sk->addr, &usock->addr)) { mutex_unlock(&sock_lock); usock_close_free(usock); return; } } list_add_tail(&usock->list, &usock_list); printk(KERN_INFO "ipt_NETFLOW: added destination %s%s\n", print_usock_addr(usock), (!usock->sock)? " (unconnected)" : ""); mutex_unlock(&sock_lock); } #if defined(ENABLE_SAMPLER) || defined(SNMP_RULES) static inline int xisdigit(int ch) { return (ch >= '0') && (ch <= '9'); } static inline int simple_atoi(const char *p) { int i; for (i = 0; xisdigit(*p); p++) i = i * 10 + *p - '0'; return i; } #endif #ifdef ENABLE_SAMPLER static void set_sampler(const unsigned char mode, const unsigned short interval) { struct sampling s; s.mode = mode; s.interval = interval; if (!mode || interval > SAMPLER_INTERVAL_M) { *sampler_buf = 0; samp.v32 = s.v32; printk(KERN_ERR "ipt_NETFLOW: flow sampling is disabled.\n"); } else { sampling_ts.first = ktime_get_real(); /* no race here, because exporting process is stopped */ samp.v32 = s.v32; sprintf(sampler_buf, "%s:%u", sampler_mode_string(), interval); printk(KERN_ERR "ipt_NETFLOW: flow sampling is enabled, mode %s one-out-of %u.\n", sampler_mode_string(), interval); } } static int parse_sampler(char *ptr) { char *p; unsigned char mode; unsigned int val; int ret = 0; switch (tolower(*ptr)) { case 'd': mode = SAMPLER_DETERMINISTIC; break; case 'r': mode = SAMPLER_RANDOM; break; #ifdef SAMPLING_HASH case 'h': mode = SAMPLER_HASH; break; #endif default: printk(KERN_ERR "ipt_NETFLOW: sampler parse error (%s '%s').\n", "unknown mode", ptr); ret = -EINVAL; /* FALLTHROUGH */ case '\0': /* empty */ case 'n': /* none */ case 'o': /* off */ case '0': /* zero */ set_sampler(0, 0); return ret; } p = strchr(ptr, ':'); if (!p) { printk(KERN_ERR "ipt_NETFLOW: sampler parse error (%s '%s').\n", "no interval specified", ptr); set_sampler(0, 0); return -EINVAL; } val = simple_atoi(++p); if (val < 2 || val > SAMPLER_INTERVAL_M) { printk(KERN_ERR "ipt_NETFLOW: sampler parse error (%s '%s').\n", "illegal interval", p); set_sampler(0, 0); return -EINVAL; } set_sampler(mode, val); return 0; } #endif #ifdef SNMP_RULES /* source string: eth:100,ppp:200,vlan:300 */ /* reformat to: length[1], prefix[len], offset[2], ..., null[1]. */ static int parse_snmp_rules(char *ptr, unsigned char *dst) { int osize = 0; while (*ptr) { char *prefix = ptr; unsigned int number; int len, lsize; char *p; p = strchr(ptr, ':'); if (!p) return -EINVAL; len = p - ptr; if (len == 0) return -EINVAL; ptr += len; if (sscanf(ptr, ":%d%n", &number, &lsize) < 1) return -EINVAL; ptr += lsize; if (*ptr) /* any separator will work */ ptr++; osize += 1 + len + 2; if (dst) { *dst++ = len; memcpy(dst, prefix, len); dst += len; *dst++ = (number >> 8) & 0xff; *dst++ = number & 0xff; } } osize += 1; if (dst) *dst = '\0'; return osize; } static int add_snmp_rules(char *ptr) { int osize = parse_snmp_rules(ptr, NULL); char *dst; char *old; if (osize <= 0) { printk(KERN_ERR "ipt_NETFLOW: add_snmp_rules parse error.\n"); strcpy(snmp_rules_buf, "parse error"); return -EINVAL; } dst = kmalloc(osize, GFP_KERNEL); if (!dst) { strcpy(snmp_rules_buf, "no memory"); printk(KERN_ERR "ipt_NETFLOW: add_snmp_rules no memory.\n"); return -ENOMEM; } parse_snmp_rules(ptr, dst); spin_lock(&snmp_lock); old = snmp_ruleset; rcu_assign_pointer(snmp_ruleset, dst); spin_unlock(&snmp_lock); synchronize_rcu(); if (old) kfree(old); return 0; } static inline int resolve_snmp(const struct net_device *ifc) { const unsigned char *rules; if (!ifc) return -1; rules = rcu_dereference(snmp_ruleset); if (!rules) return ifc->ifindex; while (*rules) { const unsigned int len = *rules++; const char *ifname = ifc->name; if (!strncmp(ifname, rules, len)) { rules += len; return (rules[0] << 8) + rules[1] + simple_atoi(ifname + len); } rules += len + 2; } return ifc->ifindex; } #endif /* SNMP_RULES */ /* count how much character c is in the string */ static size_t strncount(const char *s, size_t count, int c) { size_t amount = 0; for (; count-- && *s != '\0'; ++s) if (*s == (char)c) ++amount; return amount; } #define SEPARATORS " ,;\t\n" static int add_destinations(const char *ptr) { int len; for (; ptr; ptr += len) { struct sockaddr_storage ss; struct sockaddr_storage sbind = {}; struct ipt_netflow_sock *usock; const char *end; int succ = 0; char name[IFNAMSIZ] = { 0 }; /* skip initial separators */ ptr += strspn(ptr, SEPARATORS); len = strcspn(ptr, SEPARATORS); if (!len) break; memset(&ss, 0, sizeof(ss)); if (strncount(ptr, len, ':') >= 2) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ss; struct sockaddr_in6 *sout = (struct sockaddr_in6 *)&sbind; const char *c = ptr; int clen = len; sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(2055); if (*c == '[') { ++c; --clen; } succ = in6_pton(c, clen, (u8 *)&sin6->sin6_addr, -1, &end); if (succ && *ptr == '[' && *end == ']') ++end; if (succ && (*end == ':' || *end == '.' || *end == 'p' || *end == '#')) sin6->sin6_port = htons(strtoul(++end, (char **)&end, 0)); if (succ && *end == '@') { ++end; sout->sin6_family = AF_INET6; sout->sin6_port = 0; succ = in6_pton(end, strcspn(end, SEPARATORS), (u8 *)&sout->sin6_addr, -1, &end); } } else { struct sockaddr_in *sin = (struct sockaddr_in *)&ss; struct sockaddr_in *sout = (struct sockaddr_in *)&sbind; sin->sin_family = AF_INET; sin->sin_port = htons(2055); succ = in4_pton(ptr, len, (u8 *)&sin->sin_addr, -1, &end); if (succ && *end == ':') sin->sin_port = htons(strtoul(++end, (char **)&end, 0)); if (succ && *end == '@') { ++end; sout->sin_family = AF_INET; sout->sin_port = 0; succ = in4_pton(end, strcspn(end, SEPARATORS), (u8 *)&sout->sin_addr, -1, &end); } } if (succ && *end == '%') { ++end; snprintf(name, sizeof(name), "%.*s", (int)strcspn(end, SEPARATORS), end); } if (!succ) { printk(KERN_ERR "ipt_NETFLOW: can't parse destination: %.*s\n", len, ptr); continue; } if (!(usock = vmalloc(sizeof(*usock)))) { printk(KERN_ERR "ipt_NETFLOW: can't vmalloc socket\n"); return -ENOMEM; } memset(usock, 0, sizeof(*usock)); usock->addr = ss; usock->saddr = sbind; memcpy(usock->sdev, name, sizeof(usock->sdev)); usock_connect(usock, 0); add_usock(usock); } return 0; } #ifdef ENABLE_AGGR static void aggregation_remove(struct list_head *list) { write_lock_bh(&aggr_lock); while (!list_empty(list)) { struct netflow_aggr_n *aggr; /* match netflow_aggr_p too */ aggr = list_entry(list->next, struct netflow_aggr_n, list); list_del(&aggr->list); write_unlock_bh(&aggr_lock); vfree(aggr); write_lock_bh(&aggr_lock); } write_unlock_bh(&aggr_lock); } static int add_aggregation(char *ptr) { struct netflow_aggr_n *aggr_n, *aggr, *tmp; struct netflow_aggr_p *aggr_p; LIST_HEAD(new_aggr_n_list); LIST_HEAD(new_aggr_p_list); LIST_HEAD(old_aggr_list); while (ptr && *ptr) { unsigned char ip[4]; unsigned int mask; unsigned int port1, port2; unsigned int aggr_to; ptr += strspn(ptr, SEPARATORS); if (sscanf(ptr, "%hhu.%hhu.%hhu.%hhu/%u=%u", ip, ip + 1, ip + 2, ip + 3, &mask, &aggr_to) == 6) { if (!(aggr_n = vmalloc(sizeof(*aggr_n)))) { printk(KERN_ERR "ipt_NETFLOW: can't vmalloc aggr\n"); return -ENOMEM; } memset(aggr_n, 0, sizeof(*aggr_n)); aggr_n->mask = bits2mask(mask); aggr_n->addr = ntohl(*(__be32 *)ip) & aggr_n->mask; aggr_n->aggr_mask = bits2mask(aggr_to); aggr_n->prefix = mask; printk(KERN_INFO "ipt_NETFLOW: add aggregation [%u.%u.%u.%u/%u=%u]\n", HIPQUAD(aggr_n->addr), mask, aggr_to); list_add_tail(&aggr_n->list, &new_aggr_n_list); } else if (sscanf(ptr, "%u-%u=%u", &port1, &port2, &aggr_to) == 3 || sscanf(ptr, "%u=%u", &port2, &aggr_to) == 2) { if (!(aggr_p = vmalloc(sizeof(*aggr_p)))) { printk(KERN_ERR "ipt_NETFLOW: can't vmalloc aggr\n"); return -ENOMEM; } memset(aggr_p, 0, sizeof(*aggr_p)); aggr_p->port1 = port1; aggr_p->port2 = port2; aggr_p->aggr_port = aggr_to; printk(KERN_INFO "ipt_NETFLOW: add aggregation [%u-%u=%u]\n", port1, port2, aggr_to); list_add_tail(&aggr_p->list, &new_aggr_p_list); } else { printk(KERN_ERR "ipt_NETFLOW: bad aggregation rule: %s (ignoring)\n", ptr); break; } ptr = strpbrk(ptr, SEPARATORS); } /* swap lists */ write_lock_bh(&aggr_lock); list_for_each_entry_safe(aggr, tmp, &aggr_n_list, list) list_move(&aggr->list, &old_aggr_list); list_for_each_entry_safe(aggr, tmp, &aggr_p_list, list) list_move(&aggr->list, &old_aggr_list); list_for_each_entry_safe(aggr, tmp, &new_aggr_n_list, list) list_move_tail(&aggr->list, &aggr_n_list); list_for_each_entry_safe(aggr, tmp, &new_aggr_p_list, list) list_move_tail(&aggr->list, &aggr_p_list); write_unlock_bh(&aggr_lock); aggregation_remove(&old_aggr_list); return 0; } #endif #ifdef SAMPLING_HASH static uint32_t hash_seed; #define HASH_SEED hash_seed #else #define HASH_SEED 0 #endif static inline u_int32_t __hash_netflow(const struct ipt_netflow_tuple *tuple) { return murmur3(tuple, sizeof(struct ipt_netflow_tuple), HASH_SEED); } static inline u_int32_t hash_netflow(const struct ipt_netflow_tuple *tuple) { return __hash_netflow(tuple) % htable_size; } static struct ipt_netflow * ipt_netflow_find(const struct ipt_netflow_tuple *tuple, const unsigned int hash) { struct ipt_netflow *nf; #if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) struct hlist_node *pos; #endif compat_hlist_for_each_entry(nf, pos, &htable[hash], hlist) { if (ipt_netflow_tuple_equal(tuple, &nf->tuple) && nf->nr_bytes < FLOW_FULL_WATERMARK) { NETFLOW_STAT_INC(found); return nf; } NETFLOW_STAT_INC(searched); } NETFLOW_STAT_INC(notfound); return NULL; } static struct hlist_head *alloc_hashtable(const int size) { struct hlist_head *hash; hash = vmalloc(sizeof(struct hlist_head) * size); if (hash) { int i; for (i = 0; i < size; i++) INIT_HLIST_HEAD(&hash[i]); } else printk(KERN_ERR "ipt_NETFLOW: unable to vmalloc hash table.\n"); return hash; } static int set_hashsize(int new_size) { struct hlist_head *new_hash, *old_hash; struct ipt_netflow *nf, *tmp; LIST_HEAD(all_list); int i; if (new_size < LOCK_COUNT) new_size = LOCK_COUNT; printk(KERN_INFO "ipt_NETFLOW: allocating new hash table %u -> %u buckets\n", htable_size, new_size); new_hash = alloc_hashtable(new_size); if (!new_hash) return -ENOMEM; /* rehash */ write_lock_bh(&htable_rwlock); old_hash = htable; htable = new_hash; htable_size = new_size; for (i = 0; i < LOCK_COUNT; i++) { struct stripe_entry *stripe = &htable_stripes[i]; spin_lock(&stripe->lock); list_splice_init(&stripe->list, &all_list); spin_unlock(&stripe->lock); } list_for_each_entry_safe(nf, tmp, &all_list, flows_list) { unsigned int hash; struct stripe_entry *stripe; hash = hash_netflow(&nf->tuple); stripe = &htable_stripes[hash & LOCK_COUNT_MASK]; spin_lock(&stripe->lock); list_move_tail(&nf->flows_list, &stripe->list); hlist_add_head(&nf->hlist, &htable[hash]); spin_unlock(&stripe->lock); } write_unlock_bh(&htable_rwlock); vfree(old_hash); return 0; } static struct ipt_netflow * ipt_netflow_alloc(const struct ipt_netflow_tuple *tuple) { struct ipt_netflow *nf; long count; nf = kmem_cache_alloc(ipt_netflow_cachep, GFP_ATOMIC); if (!nf) { printk(KERN_ERR "ipt_NETFLOW: Can't allocate flow.\n"); return NULL; } memset(nf, 0, sizeof(*nf)); nf->tuple = *tuple; count = atomic_inc_return(&ipt_netflow_count); if (count > peakflows) { peakflows = count; peakflows_at = jiffies; } return nf; } static void ipt_netflow_free(struct ipt_netflow *nf) { if (IS_DUMMY_FLOW(nf)) return; atomic_dec(&ipt_netflow_count); kmem_cache_free(ipt_netflow_cachep, nf); } /* cook pdu, send, and clean */ /* only called in scan worker path */ static void netflow_export_pdu_v5(void) { struct timeval tv; int pdusize; if (!pdu_data_records) return; if (debug > 1) printk(KERN_INFO "netflow_export_pdu_v5 with %d records\n", pdu_data_records); pdu.v5.version = htons(5); pdu.v5.nr_records = htons(pdu_data_records); pdu.v5.ts_uptime = htonl(jiffies_to_msecs(jiffies)); do_gettimeofday(&tv); pdu.v5.ts_usecs = htonl(tv.tv_sec); pdu.v5.ts_unsecs = htonl(tv.tv_usec); pdu.v5.seq = htonl(pdu_seq); //pdu.v5.eng_type = 0; pdu.v5.eng_id = (__u8)engine_id; #ifdef ENABLE_SAMPLER pdu.v5.sampling = htons(sampler_nf_v5()); #endif pdusize = NETFLOW5_HEADER_SIZE + sizeof(struct netflow5_record) * pdu_data_records; netflow_sendmsg(&pdu.v5, pdusize); pdu_packets = 0; pdu_traf = 0; pdu_seq += pdu_data_records; pdu_count++; pdu_flow_records = pdu_data_records = 0; } /* only called in scan worker path */ static void netflow_export_flow_v5(struct ipt_netflow *nf) { struct netflow5_record *rec; if (unlikely(debug > 2)) printk(KERN_INFO "adding flow to export (%d)\n", pdu_data_records); pdu_packets += nf->nr_packets; pdu_traf += nf->nr_bytes; pdu_ts_mod = jiffies; rec = &pdu.v5.flow[pdu_data_records++]; pdu_flow_records++; /* make V5 flow record */ rec->s_addr = nf->tuple.src.ip; rec->d_addr = nf->tuple.dst.ip; rec->nexthop = nf->nh.ip; #ifdef SNMP_RULES rec->i_ifc = htons(nf->i_ifcr); rec->o_ifc = htons(nf->o_ifcr); #else rec->i_ifc = htons(nf->tuple.i_ifc); rec->o_ifc = htons(nf->o_ifc); #endif rec->nr_packets = htonl(nf->nr_packets); rec->nr_octets = htonl(nf->nr_bytes); rec->first_ms = htonl(jiffies_to_msecs(nf->nf_ts_first)); rec->last_ms = htonl(jiffies_to_msecs(nf->nf_ts_last)); rec->s_port = nf->tuple.s_port; rec->d_port = nf->tuple.d_port; //rec->reserved = 0; /* pdu is always zeroized for v5 in netflow_switch_version */ rec->tcp_flags = nf->tcp_flags; rec->protocol = nf->tuple.protocol; rec->tos = nf->tuple.tos; #ifdef CONFIG_NF_NAT_NEEDED rec->s_as = nf->s_as; rec->d_as = nf->d_as; #endif rec->s_mask = nf->s_mask; rec->d_mask = nf->d_mask; //rec->padding = 0; ipt_netflow_free(nf); if (pdu_data_records == NETFLOW5_RECORDS_MAX) netflow_export_pdu_v5(); } /* pdu is initially blank, export current pdu, and prepare next for filling. */ static void netflow_export_pdu_v9(void) { struct timeval tv; int pdusize; if (pdu_data_used <= pdu.v9.data) return; if (debug > 1) printk(KERN_INFO "netflow_export_pdu_v9 with %d records\n", pdu_data_records + pdu_tpl_records); pdu.v9.version = htons(9); pdu.v9.nr_records = htons(pdu_data_records + pdu_tpl_records); pdu.v9.sys_uptime_ms = htonl(jiffies_to_msecs(jiffies)); do_gettimeofday(&tv); pdu.v9.export_time_s = htonl(tv.tv_sec); pdu.v9.seq = htonl(pdu_seq); pdu.v9.source_id = htonl(engine_id); pdusize = pdu_data_used - (unsigned char *)&pdu.v9; netflow_sendmsg(&pdu.v9, pdusize); pdu_packets = 0; pdu_traf = 0; pdu_seq++; pdu_count++; pdu_flow_records = pdu_data_records = pdu_tpl_records = 0; pdu_data_used = pdu.v9.data; pdu_flowset = NULL; } static void netflow_export_pdu_ipfix(void) { struct timeval tv; int pdusize; if (pdu_data_used <= pdu.ipfix.data) return; if (debug > 1) printk(KERN_INFO "netflow_export_pduX with %d records\n", pdu_data_records); pdu.ipfix.version = htons(10); do_gettimeofday(&tv); pdu.ipfix.export_time_s = htonl(tv.tv_sec); pdu.ipfix.seq = htonl(pdu_seq); pdu.ipfix.odomain_id = htonl(engine_id); pdusize = pdu_data_used - (unsigned char *)&pdu; pdu.ipfix.length = htons(pdusize); netflow_sendmsg(&pdu.ipfix, pdusize); pdu_packets = 0; pdu_traf = 0; pdu_seq += pdu_data_records; pdu_count++; pdu_flow_records = pdu_data_records = pdu_tpl_records = 0; pdu_data_used = pdu.ipfix.data; pdu_flowset = NULL; } static inline int pdu_have_space(const size_t size) { return ((pdu_data_used + size) <= pdu_high_wm); } static inline unsigned char *pdu_grab_space(const size_t size) { unsigned char *ptr = pdu_data_used; pdu_data_used += size; return ptr; } static inline void pdu_rewind_space(const size_t size) { pdu_data_used -= size; } /* allocate data space in pdu, or export (reallocate) and fail. */ static inline unsigned char *pdu_alloc_fail_export(const size_t size) { if (unlikely(!pdu_have_space(size))) { netflow_export_pdu(); return NULL; } return pdu_grab_space(size); } /* doesn't fail, but can provide empty pdu. */ static unsigned char *pdu_alloc_export(const size_t size) { return pdu_alloc_fail_export(size) ?: pdu_grab_space(size); } /* global table of sizes of template field types */ #define two(id, a, b, len) [id] = len, #define one(id, a, len) [id] = len, static u_int8_t tpl_element_sizes[] = { Elements }; #undef two #undef one #define TEMPLATES_HASH_BSIZE 8 #define TEMPLATES_HASH_SIZE (1<tpl_key == tmask) return tpl; tnum = 0; /* assemble array of base_templates from template key */ /* NB: this should not have exporting protocol dependent checks */ if (tmask & BTPL_OPTION) { switch (tmask) { case OTPL_SYSITIME: tlist[tnum++] = &template_sys_init_time; break; case OTPL_MPSTAT: tlist[tnum++] = &template_meter_stat; break; case OTPL_MPRSTAT: tlist[tnum++] = &template_meter_rel_stat; break; case OTPL_EPRSTAT: tlist[tnum++] = &template_exp_rel_stat; break; #ifdef ENABLE_SAMPLER case OTPL_SAMPLER: tlist[tnum++] = &template_sampler; break; case OTPL_SEL_RAND: tlist[tnum++] = &template_selector_random; break; case OTPL_SEL_COUNT: tlist[tnum++] = &template_selector_systematic; break; case OTPL_SEL_STAT: tlist[tnum++] = &template_selector_stat; break; case OTPL_SEL_STATH: tlist[tnum++] = &template_selector_stat_hash; break; #endif case OTPL_IFNAMES: tlist[tnum++] = &template_interfaces; break; } } else { if (tmask & BTPL_IP4) { tlist[tnum++] = &template_ipv4; if (tmask & BTPL_IP4OPTIONS) tlist[tnum++] = &template_options4; if (tmask & BTPL_MASK4) tlist[tnum++] = &template_ipv4_mask; if (tmask & BTPL_ICMPX4) tlist[tnum++] = &template_icmp_ipv4; } else if (tmask & BTPL_IP6) { tlist[tnum++] = &template_ipv6; if (tmask & BTPL_LABEL6) tlist[tnum++] = &template_label6; if (tmask & BTPL_IP6OPTIONS) tlist[tnum++] = &template_options6; if (tmask & BTPL_ICMPX6) tlist[tnum++] = &template_icmp_ipv6; } else if (tmask & BTPL_NAT4) tlist[tnum++] = &template_nat4; if (tmask & BTPL_PORTS) tlist[tnum++] = &template_ports; else if (tmask & BTPL_ICMP9) tlist[tnum++] = &template_icmp_v9; if (tmask & BTPL_BASE9) tlist[tnum++] = &template_base_9; else if (tmask & BTPL_BASEIPFIX) tlist[tnum++] = &template_base_ipfix; if (tmask & BTPL_TCPOPTIONS) tlist[tnum++] = &template_tcpoptions; if (tmask & BTPL_IGMP) tlist[tnum++] = &template_igmp; if (tmask & BTPL_IPSEC) tlist[tnum++] = &template_ipsec; #ifdef ENABLE_MAC if (tmask & BTPL_MAC) tlist[tnum++] = &template_mac_ipfix; #endif #ifdef ENABLE_VLAN if (tmask & BTPL_VLAN9) tlist[tnum++] = &template_vlan_v9; else { if (tmask & BTPL_VLANX) tlist[tnum++] = &template_vlan_ipfix; if (tmask & BTPL_VLANI) tlist[tnum++] = &template_vlan_inner; } #endif #if defined(ENABLE_MAC) || defined(ENABLE_VLAN) if (tmask & BTPL_ETHERTYPE) tlist[tnum++] = &template_ethertype; #endif #ifdef MPLS_DEPTH if (tmask & BTPL_MPLS) tlist[tnum++] = &template_mpls; #endif #ifdef ENABLE_DIRECTION if (tmask & BTPL_DIRECTION) tlist[tnum++] = &template_direction; #endif #ifdef ENABLE_SAMPLER if (tmask & BTPL_SAMPLERID) tlist[tnum++] = &template_samplerid; else if (tmask & BTPL_SELECTORID) tlist[tnum++] = &template_selectorid; #endif } /* !BTPL_OPTION */ /* calculate resulting template length * and update base_template array lengths */ length = 0; for (i = 0; i < tnum; i++) { if (!tlist[i]->length) { for (k = 0; tlist[i]->types[k]; k++); tlist[i]->length = k; } length += tlist[i]->length; } /* elements are [type, len] pairs + one termiantor */ tpl = kmalloc(sizeof(struct data_template) + (length * 2 + 1) * sizeof(u_int16_t), GFP_KERNEL); if (!tpl) { printk(KERN_ERR "ipt_NETFLOW: unable to kmalloc template (%#x).\n", tmask); return NULL; } tpl->tpl_key = tmask; tpl->options = (tmask & BTPL_OPTION) != 0; if (tpl->options) tpl->tpl_size = sizeof(struct flowset_opt_tpl_v9); /* ipfix is of the same size */ else tpl->tpl_size = sizeof(struct flowset_template); tpl->length = length; tpl->rec_size = 0; tpl->template_id_n = htons(template_ids++); tpl_gen_count++; if (template_ids >= 0x00010000) template_ids = FLOWSET_DATA_FIRST; tpl->exported_cnt = 0; tpl->exported_ts = 0; /* construct resulting data_template and fill lengths */ j = 0; for (i = 0; i < tnum; i++) { struct base_template *btpl = tlist[i]; for (k = 0; k < btpl->length; k++) { int size; int type = btpl->types[k]; tpl->fields[j++] = type; size = tpl_element_sizes[type]; tpl->fields[j++] = size; tpl->rec_size += size; } tpl->tpl_size += btpl->length * TPL_FIELD_NSIZE; } tpl->fields[j++] = 0; hlist_add_head(&tpl->hlist, &templates_hash[hash]); tpl_count++; return tpl; } static u_int16_t scope_ipfix_to_v9(const u_int16_t elem) { switch (elem) { case observationDomainId: case meteringProcessId: case exportingProcessId: return V9_SCOPE_SYSTEM; case ingressInterface: case portId: return V9_SCOPE_INTERFACE; case observationPointId: case LineCardId: return V9_SCOPE_LINECARD; case TemplateId: return V9_SCOPE_TEMPLATE; default: return -1; } } /* add template of any type and version */ static void pdu_add_template(struct data_template *tpl) { __u8 *ptr; struct flowset_template *ntpl; __be16 *sptr, *fields; size_t added_size = 0; /* for options template we also make sure there is enough * room in the packet for one record, with flowset header */ if (tpl->options) added_size = sizeof(struct flowset_data) + tpl->rec_size; ptr = pdu_alloc_export(tpl->tpl_size + added_size); pdu_rewind_space(added_size); ntpl = (void *)ptr; /* first three fields are equal for all types of templates */ if (tpl->options) ntpl->flowset_id = protocol == 9? htons(FLOWSET_OPTIONS) : htons(IPFIX_OPTIONS); else ntpl->flowset_id = protocol == 9? htons(FLOWSET_TEMPLATE) : htons(IPFIX_TEMPLATE); ntpl->length = htons(tpl->tpl_size); ntpl->template_id = tpl->template_id_n; if (tpl->options) { /* option templates should be defined with first element being scope */ if (protocol == 9) { struct flowset_opt_tpl_v9 *otpl = (void *)ptr; otpl->scope_len = htons(TPL_FIELD_NSIZE); otpl->opt_len = htons((tpl->length - 1) * TPL_FIELD_NSIZE); ptr += sizeof(struct flowset_opt_tpl_v9); } else { struct flowset_opt_tpl_ipfix *otpl = (void *)ptr; otpl->field_count = htons(tpl->length); otpl->scope_count = htons(1); ptr += sizeof(struct flowset_opt_tpl_ipfix); } } else { ntpl->field_count = htons(tpl->length); ptr += sizeof(struct flowset_template); } sptr = (__be16 *)ptr; fields = tpl->fields; if (tpl->options && protocol == 9) { /* v9 scope */ *sptr++ = htons(scope_ipfix_to_v9(*fields++)); *sptr++ = htons(*fields++); } for (;;) { const int type = *fields++; if (!type) break; *sptr++ = htons(type); *sptr++ = htons(*fields++); } tpl->exported_cnt = pdu_count; tpl->exported_ts = jiffies; pdu_flowset = NULL; pdu_tpl_records++; } #ifdef ENABLE_DIRECTION static inline __u8 hook2dir(const __u8 hooknum) { switch (hooknum) { case NF_INET_PRE_ROUTING: case NF_INET_LOCAL_IN: return 0; case NF_INET_LOCAL_OUT: case NF_INET_POST_ROUTING: return 1; default: return -1; } } #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(5,7,0) static inline void put_unaligned_be24(u32 val, unsigned char *p) { *p++ = val >> 16; put_unaligned_be16(val, p); } #endif static struct { s64 ms; /* this much abs milliseconds */ unsigned long jiffies; /* is that much jiffies */ } jiffies_base; /* prepare for jiffies_to_ms_abs() batch */ static void set_jiffies_base(void) { ktime_t ktime; /* try to get them atomically */ local_bh_disable(); jiffies_base.jiffies = jiffies; ktime = ktime_get_real(); local_bh_enable(); jiffies_base.ms = ktime_to_ms(ktime); } /* convert jiffies to ktime and rebase to unix epoch */ static inline s64 jiffies_to_ms_abs(unsigned long j) { long jdiff = jiffies_base.jiffies - j; if (likely(jdiff >= 0)) return jiffies_base.ms - (s64)jiffies_to_msecs(jdiff); else return jiffies_base.ms + (s64)jiffies_to_msecs(-jdiff); } typedef struct in6_addr in6_t; /* encode one field (data records only) */ static inline void add_tpl_field(__u8 *ptr, const int type, const struct ipt_netflow *nf) { switch (type) { case IN_BYTES: put_unaligned_be32(nf->nr_bytes, ptr); break; case IN_PKTS: put_unaligned_be32(nf->nr_packets, ptr); break; case FIRST_SWITCHED: put_unaligned_be32(jiffies_to_msecs(nf->nf_ts_first), ptr); break; case LAST_SWITCHED: put_unaligned_be32(jiffies_to_msecs(nf->nf_ts_last), ptr); break; case flowStartMilliseconds: put_unaligned_be64(jiffies_to_ms_abs(nf->nf_ts_first), ptr); break; case flowEndMilliseconds: put_unaligned_be64(jiffies_to_ms_abs(nf->nf_ts_last), ptr); break; case IPV4_SRC_ADDR: put_unaligned(nf->tuple.src.ip, (__be32 *)ptr); break; case IPV4_DST_ADDR: put_unaligned(nf->tuple.dst.ip, (__be32 *)ptr); break; case IPV4_NEXT_HOP: put_unaligned(nf->nh.ip, (__be32 *)ptr); break; case L4_SRC_PORT: put_unaligned(nf->tuple.s_port, (__be16 *)ptr); break; case L4_DST_PORT: put_unaligned(nf->tuple.d_port, (__be16 *)ptr); break; #ifdef SNMP_RULES case INPUT_SNMP: put_unaligned_be16(nf->i_ifcr, ptr); break; case OUTPUT_SNMP: put_unaligned_be16(nf->o_ifcr, ptr); break; #else case INPUT_SNMP: put_unaligned_be16(nf->tuple.i_ifc, ptr); break; case OUTPUT_SNMP: put_unaligned_be16(nf->o_ifc, ptr); break; #endif #ifdef ENABLE_PHYSDEV case ingressPhysicalInterface: put_unaligned_be16(nf->i_ifphys, ptr); break; case egressPhysicalInterface: put_unaligned_be16(nf->o_ifphys, ptr); break; #endif #ifdef ENABLE_VLAN #define EXTRACT_VLAN_PRIO(tag) ((ntohs(tag) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT) case SRC_VLAN: case dot1qVlanId: put_unaligned(nf->tuple.tag[0] & htons(VLAN_VID_MASK), (__be16 *)ptr); break; case dot1qPriority: *ptr = EXTRACT_VLAN_PRIO(nf->tuple.tag[0]); break; case dot1qCustomerVlanId: put_unaligned(nf->tuple.tag[1] & htons(VLAN_VID_MASK), (__be16 *)ptr); break; case dot1qCustomerPriority: *ptr = EXTRACT_VLAN_PRIO(nf->tuple.tag[1]); break; #endif #if defined(ENABLE_MAC) || defined(ENABLE_VLAN) case ethernetType: put_unaligned(nf->ethernetType, (__be16 *)ptr); break; #endif #ifdef ENABLE_MAC case destinationMacAddress: memcpy(ptr, &nf->tuple.h_dst, ETH_ALEN); break; case sourceMacAddress: memcpy(ptr, &nf->tuple.h_src, ETH_ALEN); break; #endif #ifdef MPLS_DEPTH # if __GNUC_PREREQ(4,6) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Warray-bounds" # endif case MPLS_LABEL_1: memcpy(ptr, &nf->tuple.mpls[0], 3); break; case MPLS_LABEL_2: memcpy(ptr, &nf->tuple.mpls[1], 3); break; case MPLS_LABEL_3: memcpy(ptr, &nf->tuple.mpls[2], 3); break; # if MPLS_DEPTH > 3 case MPLS_LABEL_4: memcpy(ptr, &nf->tuple.mpls[3], 3); break; case MPLS_LABEL_5: memcpy(ptr, &nf->tuple.mpls[4], 3); break; case MPLS_LABEL_6: memcpy(ptr, &nf->tuple.mpls[5], 3); break; case MPLS_LABEL_7: memcpy(ptr, &nf->tuple.mpls[6], 3); break; case MPLS_LABEL_8: memcpy(ptr, &nf->tuple.mpls[7], 3); break; case MPLS_LABEL_9: memcpy(ptr, &nf->tuple.mpls[8], 3); break; case MPLS_LABEL_10: memcpy(ptr, &nf->tuple.mpls[9], 3); break; # endif # if __GNUC_PREREQ(4,6) # pragma GCC diagnostic pop # endif case mplsTopLabelTTL: *ptr = ntohl(nf->tuple.mpls[0]); break; #endif #ifdef ENABLE_DIRECTION case DIRECTION: *ptr = hook2dir(nf->hooknumx - 1); break; #endif case IP_VERSION: *ptr = (nf->tuple.l3proto == AF_INET ? 4 : 6); break; case PROTOCOL: *ptr = nf->tuple.protocol; break; case TCP_FLAGS: *ptr = nf->tcp_flags; break; case TOS: *ptr = nf->tuple.tos; break; case IPV6_SRC_ADDR: *(in6_t *)ptr = nf->tuple.src.in6; break; case IPV6_DST_ADDR: *(in6_t *)ptr = nf->tuple.dst.in6; break; case IPV6_NEXT_HOP: *(in6_t *)ptr = nf->nh.in6; break; case IPV6_FLOW_LABEL: put_unaligned_be24(nf->flow_label, ptr); break; case tcpOptions: put_unaligned_be32(nf->tcpoptions, ptr); break; case ipv4Options: put_unaligned_be32(nf->options, ptr); break; case IPV6_OPTION_HEADERS: put_unaligned_be16(nf->options, ptr); break; case SRC_MASK: *ptr = nf->s_mask; break; case DST_MASK: *ptr = nf->d_mask; break; case icmpTypeCodeIPv4: /*FALLTHROUGH*/ case icmpTypeCodeIPv6: put_unaligned(nf->tuple.d_port, (__be16 *)ptr); break; case MUL_IGMP_TYPE: *ptr = nf->tuple.d_port; break; case flowEndReason: *ptr = nf->flowEndReason; break; #ifdef CONFIG_NF_NAT_NEEDED case postNATSourceIPv4Address: put_unaligned(nf->nat->post.s_addr, (__be32 *)ptr); break; case postNATDestinationIPv4Address: put_unaligned(nf->nat->post.d_addr, (__be32 *)ptr); break; case postNAPTSourceTransportPort: put_unaligned(nf->nat->post.s_port, (__be16 *)ptr); break; case postNAPTDestinationTransportPort: put_unaligned(nf->nat->post.d_port, (__be16 *)ptr); break; case natEvent: *ptr = nf->nat->nat_event; break; #endif case IPSecSPI: put_unaligned(EXTRACT_SPI(nf->tuple), (__be32 *)ptr); break; case observationTimeMilliseconds: put_unaligned_be64(ktime_to_ms(nf->nf_ts_obs), ptr); break; case observationTimeMicroseconds: put_unaligned_be64(ktime_to_us(nf->nf_ts_obs), ptr); break; case observationTimeNanoseconds: put_unaligned_be64(ktime_to_ns(nf->nf_ts_obs), ptr); break; #ifdef ENABLE_SAMPLER case FLOW_SAMPLER_ID: case selectorId: *ptr = get_sampler_mode(); break; #endif default: WARN_ONCE(1, "NETFLOW: Unknown Element id %d\n", type); memset(ptr, 0, tpl_element_sizes[type]); } } #define PAD_SIZE 4 /* rfc prescribes flowsets to be padded */ /* cache timeout_rate (minutes) in jiffies */ static inline unsigned long timeout_rate_j(void) { static unsigned int t_rate = 0; static unsigned long t_rate_j = 0; if (unlikely(timeout_rate != t_rate)) { struct timeval tv = { .tv_sec = timeout_rate * 60, .tv_usec = 0 }; t_rate = timeout_rate; t_rate_j = timeval_to_jiffies(&tv); } return t_rate_j; } /* return buffer where to write records data */ static unsigned char *alloc_record_tpl(struct data_template *tpl) { unsigned char *ptr; /* If previous write was to the same template and there is room, then we just add new record, * otherwise we (re)allocate flowset (and/or whole pdu). */ if (!pdu_flowset || pdu_flowset->flowset_id != tpl->template_id_n || !(ptr = pdu_alloc_fail_export(tpl->rec_size))) { /* if there was previous data template we should pad it to 4 bytes */ if (pdu_flowset) { int padding = (PAD_SIZE - ntohs(pdu_flowset->length) % PAD_SIZE) % PAD_SIZE; if (padding && (ptr = pdu_alloc_fail_export(padding))) { pdu_flowset->length = htons(ntohs(pdu_flowset->length) + padding); for (; padding; padding--) *ptr++ = 0; } } /* export template if needed */ if (!tpl->exported_ts || pdu_count > (tpl->exported_cnt + refresh_rate) || time_is_before_jiffies(tpl->exported_ts + timeout_rate_j())) { pdu_add_template(tpl); } /* new flowset */ ptr = pdu_alloc_export(sizeof(struct flowset_data) + tpl->rec_size); pdu_flowset = (struct flowset_data *)ptr; pdu_flowset->flowset_id = tpl->template_id_n; pdu_flowset->length = htons(sizeof(struct flowset_data)); ptr += sizeof(struct flowset_data); } return ptr; } static unsigned char *alloc_record_key(const unsigned int t_key, struct data_template **ptpl) { struct data_template *tpl; tpl = get_template(t_key); if (unlikely(!tpl)) { printk(KERN_INFO "ipt_NETFLOW: template %#x allocation failed.\n", t_key); NETFLOW_STAT_INC_ATOMIC(alloc_err); return NULL; } *ptpl = tpl; return alloc_record_tpl(tpl); } static void netflow_export_flow_tpl(struct ipt_netflow *nf) { unsigned char *ptr; struct data_template *tpl; unsigned int tpl_mask; int i; if (unlikely(debug > 2)) printk(KERN_INFO "adding flow to export (%d)\n", pdu_data_records + pdu_tpl_records); /* build the template key */ #ifdef CONFIG_NF_NAT_NEEDED if (nf->nat) { tpl_mask = BTPL_NAT4; goto ready; } #endif tpl_mask = (protocol == 9)? BTPL_BASE9 : BTPL_BASEIPFIX; if (likely(nf->tuple.l3proto == AF_INET)) { tpl_mask |= BTPL_IP4; if (unlikely(nf->options)) tpl_mask |= BTPL_IP4OPTIONS; } else { tpl_mask |= BTPL_IP6; if (unlikely(nf->options)) tpl_mask |= BTPL_IP6OPTIONS; if (unlikely(nf->flow_label)) tpl_mask |= BTPL_LABEL6; } if (unlikely(nf->tcpoptions)) tpl_mask |= BTPL_TCPOPTIONS; if (unlikely(nf->s_mask || nf->d_mask)) tpl_mask |= BTPL_MASK4; if (likely(nf->tuple.protocol == IPPROTO_TCP || nf->tuple.protocol == IPPROTO_UDP || nf->tuple.protocol == IPPROTO_SCTP || nf->tuple.protocol == IPPROTO_UDPLITE)) tpl_mask |= BTPL_PORTS; else if (nf->tuple.protocol == IPPROTO_ICMP || nf->tuple.protocol == IPPROTO_ICMPV6) { if (protocol == 9) tpl_mask |= BTPL_ICMP9; else if (likely(nf->tuple.l3proto == AF_INET)) tpl_mask |= BTPL_ICMPX4; else tpl_mask |= BTPL_ICMPX6; } else if (nf->tuple.protocol == IPPROTO_IGMP) tpl_mask |= BTPL_IGMP; else if (nf->tuple.protocol == IPPROTO_AH || nf->tuple.protocol == IPPROTO_ESP) tpl_mask |= BTPL_IPSEC; #ifdef ENABLE_MAC if (!is_zero_ether_addr(nf->tuple.h_src) || !is_zero_ether_addr(nf->tuple.h_dst)) tpl_mask |= BTPL_MAC; #endif #ifdef ENABLE_VLAN if (nf->tuple.tag[0]) { if (protocol == 9) tpl_mask |= BTPL_VLAN9; else { tpl_mask |= BTPL_VLANX; if (nf->tuple.tag[1]) tpl_mask |= BTPL_VLANI; } } #endif #if defined(ENABLE_MAC) || defined(ENABLE_VLAN) if (nf->ethernetType) tpl_mask |= BTPL_ETHERTYPE; #endif #ifdef MPLS_DEPTH if (nf->tuple.mpls[0]) tpl_mask |= BTPL_MPLS; #endif #ifdef ENABLE_DIRECTION if (nf->hooknumx) tpl_mask |= BTPL_DIRECTION; #endif #ifdef ENABLE_SAMPLER if (get_sampler_mode()) tpl_mask |= (protocol == 9)? BTPL_SAMPLERID : BTPL_SELECTORID; #endif #ifdef CONFIG_NF_NAT_NEEDED ready: #endif ptr = alloc_record_key(tpl_mask, &tpl); if (unlikely(!ptr)) { NETFLOW_STAT_ADD(pkt_lost, nf->nr_packets); NETFLOW_STAT_ADD(traf_lost, nf->nr_bytes); NETFLOW_STAT_INC(flow_lost); NETFLOW_STAT_TS(lost); ipt_netflow_free(nf); return; } /* encode all fields */ for (i = 0; ; ) { int type = tpl->fields[i++]; if (!type) break; add_tpl_field(ptr, type, nf); ptr += tpl->fields[i++]; } pdu_data_records++; pdu_flow_records++; pdu_flowset->length = htons(ntohs(pdu_flowset->length) + tpl->rec_size); pdu_packets += nf->nr_packets; pdu_traf += nf->nr_bytes; pdu_ts_mod = jiffies; ipt_netflow_free(nf); } static u64 get_sys_init_time_ms(void) { static u64 sys_init_time = 0; if (!sys_init_time) sys_init_time = ktime_to_ms(ktime_get_real()) - jiffies_to_msecs(jiffies); return sys_init_time; } #ifdef ENABLE_SAMPLER /* http://www.iana.org/assignments/ipfix/ipfix.xml#ipfix-flowselectoralgorithm */ static unsigned char get_flowselectoralgo(void) { switch (get_sampler_mode()) { case SAMPLER_DETERMINISTIC: return 1; /* Systematic count-based Sampling */ case SAMPLER_HASH: case SAMPLER_RANDOM: return 3; /* Random n-out-of-N Sampling */ default: return 0; /* Unassigned */ } } #endif static void export_stat_st_ts(const unsigned int tpl_mask, struct ipt_netflow_stat *st, struct duration *ts) { unsigned char *ptr; struct data_template *tpl; int i; ptr = alloc_record_key(tpl_mask, &tpl); if (unlikely(!ptr)) return; /* encode all fields */ for (i = 0; ; ) { int type = tpl->fields[i++]; if (!type) break; switch (type) { case observationDomainId: put_unaligned_be32(engine_id, ptr); break; case exportingProcessId: put_unaligned_be32(engine_id, ptr); break; case observedFlowTotalCount: put_unaligned_be64(st->notfound, ptr); break; case exportedMessageTotalCount: put_unaligned_be64(st->exported_pkt, ptr); break; case exportedOctetTotalCount: put_unaligned_be64(st->exported_traf, ptr); break; case exportedFlowRecordTotalCount: put_unaligned_be64(st->exported_flow, ptr); break; case ignoredPacketTotalCount: put_unaligned_be64(st->pkt_drop, ptr); break; case ignoredOctetTotalCount: put_unaligned_be64(st->traf_drop, ptr); break; case notSentFlowTotalCount: put_unaligned_be64(st->flow_lost, ptr); break; case notSentPacketTotalCount: put_unaligned_be64(st->pkt_lost, ptr); break; case notSentOctetTotalCount: put_unaligned_be64(st->traf_lost, ptr); break; case flowStartMilliseconds: put_unaligned_be64(ktime_to_ms(ts->first), ptr); break; case flowEndMilliseconds: put_unaligned_be64(ktime_to_ms(ts->last), ptr); break; case systemInitTimeMilliseconds: put_unaligned_be64(get_sys_init_time_ms(), ptr); break; case observationDomainName: memcpy(ptr, version_string, version_string_size + 1); break; #ifdef ENABLE_SAMPLER case FLOW_SAMPLER_ID: case selectorId: *ptr = get_sampler_mode(); break; case FLOW_SAMPLER_MODE: *ptr = get_sampler_mode_nf(); break; case flowSelectorAlgorithm: *ptr = get_flowselectoralgo(); break; case samplingSize: case samplingFlowInterval: *ptr = 1 /* always 'one-out-of' */; break; case samplingFlowSpacing: case samplingPopulation: case FLOW_SAMPLER_RANDOM_INTERVAL: put_unaligned_be16(get_sampler_interval(), ptr); break; case selectorIDTotalFlowsObserved: put_unaligned_be64(atomic64_read(&flows_observed), ptr); break; case selectorIDTotalFlowsSelected: put_unaligned_be64(atomic64_read(&flows_selected), ptr); break; case selectorIdTotalPktsObserved: put_unaligned_be64(st->pkts_observed, ptr); break; case selectorIdTotalPktsSelected: put_unaligned_be64(st->pkts_selected, ptr); break; #endif default: WARN_ONCE(1, "NETFLOW: Unknown Element id %d\n", type); } ptr += tpl->fields[i++]; } pdu_data_records++; pdu_flowset->length = htons(ntohs(pdu_flowset->length) + tpl->rec_size); pdu_ts_mod = jiffies; } static inline void export_stat_ts(const unsigned int tpl_mask, struct duration *ts) { export_stat_st_ts(tpl_mask, NULL, ts); } static inline void export_stat_st(const unsigned int tpl_mask, struct ipt_netflow_stat *st) { export_stat_st_ts(tpl_mask, st, NULL); } static inline void export_stat(const unsigned int tpl_mask) { export_stat_st(tpl_mask, NULL); } static void netflow_export_stats(void) { struct ipt_netflow_stat t = { 0 }; int cpu; if (unlikely(!ts_sysinf_last) || time_is_before_jiffies(ts_sysinf_last + SYSINFO_INTERVAL * HZ)) { start_ts.last = ktime_get_real(); export_stat_ts(OTPL_SYSITIME, &start_ts); ts_sysinf_last = jiffies; pdu_needs_export++; } if (unlikely(!ts_stat_last)) ts_stat_last = jiffies; if (likely(time_is_after_jiffies(ts_stat_last + STAT_INTERVAL * HZ))) return; for_each_present_cpu(cpu) { struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu); t.notfound += st->notfound; // observedFlowTotalCount t.exported_pkt += st->exported_pkt; // exportedMessageTotalCount t.exported_traf += st->exported_traf; // exportedOctetTotalCount t.exported_flow += st->exported_flow; // exportedFlowRecordTotalCount t.pkt_drop += st->pkt_drop; // ignoredPacketTotalCount t.traf_drop += st->traf_drop; // ignoredOctetTotalCount t.flow_lost += st->flow_lost; // notSentFlowTotalCount t.pkt_lost += st->pkt_lost; // notSentPacketTotalCount t.traf_lost += st->traf_lost; // notSentOctetTotalCount #ifdef ENABLE_SAMPLER t.pkts_selected += st->pkts_selected; t.pkts_observed += st->pkts_observed; #endif t.drop.first_tv64 = min_not_zero(t.drop.first_tv64, st->drop.first_tv64); t.drop.last_tv64 = max(t.drop.last_tv64, st->drop.last_tv64); t.lost.first_tv64 = min_not_zero(t.lost.first_tv64, st->lost.first_tv64); t.lost.last_tv64 = max(t.lost.last_tv64, st->lost.last_tv64); } export_stat_st(OTPL_MPSTAT, &t); if (t.pkt_drop) export_stat_st_ts(OTPL_MPRSTAT, &t, &t.drop); if (t.pkt_lost) export_stat_st_ts(OTPL_EPRSTAT, &t, &t.lost); #ifdef ENABLE_SAMPLER if (protocol == 10) { sampling_ts.last = ktime_get_real(); switch (get_sampler_mode()) { case SAMPLER_HASH: export_stat_st_ts(OTPL_SEL_STATH, &t, &sampling_ts); break; case SAMPLER_DETERMINISTIC: case SAMPLER_RANDOM: export_stat_st_ts(OTPL_SEL_STAT, &t, &sampling_ts); } } #endif ts_stat_last = jiffies; pdu_needs_export++; } #ifdef ENABLE_SAMPLER static void export_sampler_parameters(void) { if (get_sampler_mode() && (unlikely(!ts_sampler_last) || time_is_before_jiffies(ts_sampler_last + SAMPLER_INFO_INTERVAL * HZ))) { if (protocol == 9) export_stat(OTPL_SAMPLER); else { const unsigned char mode = get_sampler_mode(); if (mode == SAMPLER_DETERMINISTIC) export_stat(OTPL_SEL_COUNT); else export_stat(OTPL_SEL_RAND); } ts_sampler_last = jiffies; } } #endif static int ethtool_drvinfo(unsigned char *ptr, size_t size, struct net_device *dev) { struct ethtool_drvinfo info = { 0 }; const struct ethtool_ops *ops = dev->ethtool_ops; #ifndef ETHTOOL_GLINKSETTINGS struct ethtool_cmd ecmd; #define _KSETTINGS(x, y) (x) #else struct ethtool_link_ksettings ekmd; #define _KSETTINGS(x, y) (y) #endif int len = size; int n; if (len <= 0 || !ops) return 0; if (ops->begin) { /* was not called before __ethtool_get_settings() though */ if (ops->begin(dev) < 0) return 0; } /* driver name */ if (ops->get_drvinfo) ops->get_drvinfo(dev, &info); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37) else if (dev->dev.parent && dev->dev.parent->driver) { strlcpy(info.driver, dev->dev.parent->driver->name, sizeof(info.driver)); } #endif n = scnprintf(ptr, len, "%s", info.driver); ptr += n; len -= n; if (!n || len <= 1) /* have room for separator too */ goto ret; /* only get_settings for running devices to not trigger link negotiation */ if (dev->flags & IFF_UP && dev->flags & IFF_RUNNING && !_KSETTINGS(__ethtool_get_settings(dev, &ecmd), __ethtool_get_link_ksettings(dev, &ekmd))) { char *units, *p; __u32 speed = _KSETTINGS(ethtool_cmd_speed(&ecmd), ekmd.base.speed); if (speed == SPEED_UNKNOWN) units = ""; else if (speed <= 1000) units = "MbE"; else { speed /= 1000; units = "GbE"; } switch (_KSETTINGS(ecmd.port, ekmd.base.port)) { case PORT_TP: p = "tp"; break; case PORT_AUI: p = "aui"; break; case PORT_MII: p = "mii"; break; case PORT_FIBRE: p = "fb"; break; case PORT_BNC: p = "bnc"; break; #ifdef PORT_DA case PORT_DA: p = "da"; break; #endif default: p = ""; } n = scnprintf(ptr, len, ",%d%s,%s", speed, units, p); len -= n; } ret: if (ops->complete) ops->complete(dev); return size - len; } #undef _KSETTINGS static const unsigned short netdev_type[] = {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_AX25, ARPHRD_IEEE802, ARPHRD_ARCNET, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_IRDA, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE}; static const char *const netdev_type_name[] = {"NET/ROM", "Ethernet", "AX.25 Level 2", "IEEE 802.2 Ethernet", "ARCnet", "Frame Relay DLCI", "ATM", "Metricom STRIP", "IEEE 1394 IPv4", "EUI-64", "InfiniBand", "SLIP", "CSLIP", "SLIP6", "CSLIP6", "ROSE", "X.25", "HW X.25", "PPP", "Cisco HDLC", "LAPB", "DDCMP", "Raw HDLC", "IPIP Tunnel", "IP6IP6 Tunnel", "FRAD", "Loopback", "Localtalk", "FDDI", "SIT Tunnel", "IP over DDP", "GRE over IP", "PISM Register", "HIPPI", "IrDA", "IEEE 802.11", "IEEE 802.11 Prism2", "IEEE 802.11 Radiotap", "PhoNet", "PhoNet pipe", "IEEE 802.15.4", "void", "none"}; static const char *dev_type(int dev_type) { int i; BUG_ON(ARRAY_SIZE(netdev_type) != ARRAY_SIZE(netdev_type_name)); for (i = 0; i < ARRAY_SIZE(netdev_type); i++) if (netdev_type[i] == dev_type) return netdev_type_name[i]; return ""; } static void export_dev(struct net_device *dev) { unsigned char *ptr; struct data_template *tpl; int i; ptr = alloc_record_key(OTPL_IFNAMES, &tpl); if (unlikely(!ptr)) return; /* encode all fields */ for (i = 0; ; ) { int type = tpl->fields[i++]; int size; int n; if (!type) break; size = tpl->fields[i++]; switch (type) { case observationDomainId: put_unaligned_be32(engine_id, ptr); break; case IF_NAME: n = scnprintf(ptr, size, "%s", dev->name); memset(ptr + n, 0, size - n); break; case IF_DESC: /* manual dev 'alias' setting is a first priority, * then ethtool driver name with basic info, * finally net_device.type is a last resort */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) if (dev->ifalias) { n = dev_get_alias(dev, ptr, size); if (n >= size) n = size - 1; } else #endif n = ethtool_drvinfo(ptr, size, dev); if (!n) n = scnprintf(ptr, size, "%s", dev_type(dev->type)); memset(ptr + n, 0, size - n); break; case INPUT_SNMP: #ifdef SNMP_RULES rcu_read_lock(); put_unaligned_be16(resolve_snmp(dev), ptr); rcu_read_unlock(); #else put_unaligned_be16(dev->ifindex, ptr); #endif break; default: WARN_ONCE(1, "NETFLOW: Unknown Element id %d\n", type); } ptr += size; } pdu_data_records++; pdu_flowset->length = htons(ntohs(pdu_flowset->length) + tpl->rec_size); pdu_ts_mod = jiffies; } static void export_ifnames(void) { struct net_device *dev; if (likely(ts_ifnames_last) && time_is_after_jiffies(ts_ifnames_last + SYSINFO_INTERVAL * HZ)) return; rtnl_lock(); for_each_netdev_ns(&init_net, dev) { export_dev(dev); } rtnl_unlock(); ts_ifnames_last = jiffies; } /* under pause_scan_worker() */ static void netflow_switch_version(const int ver) { protocol = ver; if (protocol == 5) { memset(&pdu, 0, sizeof(pdu)); pdu_data_used = NULL; pdu_high_wm = NULL; netflow_export_flow = &netflow_export_flow_v5; netflow_export_pdu = &netflow_export_pdu_v5; } else if (protocol == 9) { pdu_data_used = pdu.v9.data; pdu_high_wm = (unsigned char *)&pdu + sizeof(pdu.v9); netflow_export_flow = &netflow_export_flow_tpl; netflow_export_pdu = &netflow_export_pdu_v9; } else { /* IPFIX */ pdu_data_used = pdu.ipfix.data; pdu_high_wm = (unsigned char *)&pdu + sizeof(pdu.ipfix); netflow_export_flow = &netflow_export_flow_tpl; netflow_export_pdu = &netflow_export_pdu_ipfix; } pdu.version = htons(protocol); free_templates(); pdu_flow_records = pdu_data_records = pdu_tpl_records = 0; pdu_flowset = NULL; printk(KERN_INFO "ipt_NETFLOW protocol version %d (%s) enabled.\n", protocol, protocol == 10? "IPFIX" : "NetFlow"); } #ifdef CONFIG_NF_NAT_NEEDED static void export_nat_event(struct nat_event *nel) { static struct ipt_netflow nf = { { NULL } }; nf.tuple.l3proto = AF_INET; nf.tuple.protocol = nel->protocol; nf.nat = nel; /* this is also flag of dummy flow */ nf.tcp_flags = (nel->nat_event == NAT_DESTROY)? TCP_FIN_RST : TCP_SYN_ACK; if (protocol >= 9) { nf.nf_ts_obs = nel->ts_ktime; nf.tuple.src.ip = nel->pre.s_addr; nf.tuple.dst.ip = nel->pre.d_addr; nf.tuple.s_port = nel->pre.s_port; nf.tuple.d_port = nel->pre.d_port; netflow_export_flow(&nf); } else { /* v5 */ /* The weird v5 packet(s). * src and dst will be same as in data flow from the FORWARD chain * where src is pre-nat src ip and dst is post-nat dst ip. * What we lacking here is external src ip for SNAT, or * pre-nat dst ip for DNAT. We will put this into Nexthop field * with port into src/dst AS field. tcp_flags will distinguish it's * start or stop event. Two flows in case of full nat. */ nf.tuple.src.ip = nel->pre.s_addr; nf.tuple.s_port = nel->pre.s_port; nf.tuple.dst.ip = nel->post.d_addr; nf.tuple.d_port = nel->post.d_port; nf.nf_ts_first = nel->ts_jiffies; nf.nf_ts_last = nel->ts_jiffies; if (nel->pre.s_addr != nel->post.s_addr || nel->pre.s_port != nel->post.s_port) { nf.nh.ip = nel->post.s_addr; nf.s_as = nel->post.s_port; nf.d_as = 0; netflow_export_flow(&nf); } if (nel->pre.d_addr != nel->post.d_addr || nel->pre.d_port != nel->post.d_port) { nf.nh.ip = nel->pre.d_addr; nf.s_as = 0; nf.d_as = nel->pre.d_port; netflow_export_flow(&nf); } } kfree(nel); } #endif /* CONFIG_NF_NAT_NEEDED */ static inline int active_needs_export(const struct ipt_netflow *nf, const long a_timeout, const unsigned long j) { return ((j - nf->nf_ts_first) > a_timeout) || nf->nr_bytes >= FLOW_FULL_WATERMARK; } /* return flowEndReason (rfc5102) */ /* i_timeout == 0 is flush */ static inline int inactive_needs_export(const struct ipt_netflow *nf, const long i_timeout, const unsigned long j) { if (likely(i_timeout)) { if (unlikely((j - nf->nf_ts_last) > i_timeout)) { if (nf->tuple.protocol == IPPROTO_TCP && (nf->tcp_flags & TCP_FIN_RST)) return 0x03; /* end of Flow detected */ else return 0x01; /* idle timeout */ } else return 0; } else return 0x04; /* forced end */ } /* helper which also record to nf->flowEndReason */ static inline int needs_export_rec(struct ipt_netflow *nf, const long i_timeout, const long a_timeout, const unsigned long j) { int reason = inactive_needs_export(nf, i_timeout, j); if (!reason && active_needs_export(nf, a_timeout, j)) reason = 0x02; /* active timeout or just active flow */ return (nf->flowEndReason = reason); } /* could be called with zero to flush cache and pdu */ /* this function is guaranteed to be called non-concurrently */ /* return number of pdus sent */ static int netflow_scan_and_export(const int flush) { const long i_timeout = flush? 0 : inactive_timeout * HZ; const long a_timeout = active_timeout * HZ; #ifdef HAVE_LLIST struct llist_node *node; #endif const int pdu_c = pdu_count; LIST_HEAD(export_list); struct ipt_netflow *nf, *tmp; int i; #ifdef ENABLE_SAMPLER unsigned char mode; #endif #ifdef CONFIG_NF_NAT_NEEDED LIST_HEAD(nat_list_export); /* nat events to export */ #endif if (protocol >= 9) { netflow_export_stats(); #ifdef ENABLE_SAMPLER export_sampler_parameters(); #endif export_ifnames(); } read_lock_bh(&htable_rwlock); for (i = 0; i < LOCK_COUNT; i++) { struct stripe_entry *stripe = &htable_stripes[i]; if (!spin_trylock(&stripe->lock)) { ++wk_trylock; continue; } list_for_each_entry_safe_reverse(nf, tmp, &stripe->list, flows_list) { ++wk_count; if (needs_export_rec(nf, i_timeout, a_timeout, jiffies)) { hlist_del(&nf->hlist); list_del(&nf->flows_list); list_add(&nf->flows_list, &export_list); } else { /* all flows which need to be exported is always at the tail * so if no more exportable flows we can break */ break; } } spin_unlock(&stripe->lock); } read_unlock_bh(&htable_rwlock); #ifdef HAVE_LLIST node = llist_del_all(&export_llist); while (node) { struct llist_node *next = node->next; nf = llist_entry(node, struct ipt_netflow, flows_llnode); ++wk_llist; list_add(&nf->flows_list, &export_list); node = next; } #endif #ifdef ENABLE_SAMPLER mode = get_sampler_mode(); #endif set_jiffies_base(); list_for_each_entry_safe(nf, tmp, &export_list, flows_list) { NETFLOW_STAT_ADD(pkt_out, nf->nr_packets); NETFLOW_STAT_ADD(traf_out, nf->nr_bytes); list_del(&nf->flows_list); #ifdef ENABLE_SAMPLER if (mode) { const unsigned int interval = get_sampler_interval(); unsigned int val; /* [0..interval) */ atomic64_inc(&flows_observed); NETFLOW_STAT_ADD_ATOMIC(pkts_observed, nf->nr_packets); switch (mode) { case SAMPLER_DETERMINISTIC: val = nf->sampler_count % interval; break; case SAMPLER_RANDOM: val = prandom_u32_max(interval); break; default: /* SAMPLER_HASH */ val = 0; } if (val) { ipt_netflow_free(nf); continue; } atomic64_inc(&flows_selected); NETFLOW_STAT_ADD_ATOMIC(pkts_selected, nf->nr_packets); } #endif netflow_export_flow(nf); } #ifdef CONFIG_NF_NAT_NEEDED spin_lock_bh(&nat_lock); list_splice_init(&nat_list, &nat_list_export); spin_unlock_bh(&nat_lock); while (!list_empty(&nat_list_export)) { struct nat_event *nel; nel = list_first_entry(&nat_list_export, struct nat_event, list); list_del(&nel->list); export_nat_event(nel); } #endif /* flush flows stored in pdu if there no new flows for too long */ /* Note: using >= to allow flow purge on zero timeout */ if ((jiffies - pdu_ts_mod) >= i_timeout || pdu_needs_export) { netflow_export_pdu(); pdu_needs_export = 0; } return pdu_count - pdu_c; } #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) static void netflow_work_fn(void *dummy) #else static void netflow_work_fn(struct work_struct *dummy) #endif { int pdus; wk_count = 0; wk_trylock = 0; wk_llist = 0; wk_cpu = smp_processor_id(); wk_start = jiffies; pdus = netflow_scan_and_export(DONT_FLUSH); _schedule_scan_worker(pdus); wk_busy = jiffies - wk_start; } #define RATESHIFT 2 #define SAMPLERATE (RATESHIFT*RATESHIFT) #define NUMSAMPLES(minutes) (minutes * 60 / SAMPLERATE) #define _A(v, m) (v) * (1024 * 2 / (NUMSAMPLES(m) + 1)) >> 10 // x * (1024 / y) >> 10 is because I can not just divide long long integer // Note that CALC_RATE arguments should never be unsigned. #define CALC_RATE(ewma, cur, minutes) ewma += _A(cur - ewma, minutes) // calculate EWMA throughput rate for whole module static void rate_timer_calc( #if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0) unsigned long dummy #else struct timer_list *t #endif ) { static u64 old_pkt_total = 0; static u64 old_traf_total = 0; static u64 old_searched = 0; static u64 old_found = 0; static u64 old_notfound = 0; u64 searched = 0; u64 found = 0; u64 notfound = 0; int dsrch, dfnd, dnfnd; u64 pkt_total = 0; u64 traf_total = 0; int cpu; for_each_present_cpu(cpu) { int metrt; struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu); u64 pkt_t = st->pkt_total; pkt_total += pkt_t; st->pkt_total_rate = (pkt_t - st->pkt_total_prev) >> RATESHIFT; st->pkt_total_prev = pkt_t; traf_total += st->traf_total; searched += st->searched; found += st->found; notfound += st->notfound; st->exported_rate = (st->exported_traf - st->exported_trafo) >> RATESHIFT; st->exported_trafo = st->exported_traf; /* calculate hash metric per cpu */ dsrch = st->searched - st->old_searched; dfnd = st->found - st->old_found; dnfnd = st->notfound - st->old_notfound; /* zero values are not accounted, because only usage is interesting, not nonusage */ metrt = (dfnd + dnfnd)? 100 * (dsrch + dfnd + dnfnd) / (dfnd + dnfnd) : st->metric; CALC_RATE(st->metric, metrt, 1); st->old_searched = st->searched; st->old_found = st->found; st->old_notfound = st->notfound; } sec_prate = (pkt_total - old_pkt_total) >> RATESHIFT; CALC_RATE(min5_prate, sec_prate, 5); CALC_RATE(min_prate, sec_prate, 1); old_pkt_total = pkt_total; sec_brate = ((traf_total - old_traf_total) * 8) >> RATESHIFT; CALC_RATE(min5_brate, sec_brate, 5); CALC_RATE(min_brate, sec_brate, 1); old_traf_total = traf_total; /* hash stat */ dsrch = searched - old_searched; dfnd = found - old_found; dnfnd = notfound - old_notfound; old_searched = searched; old_found = found; old_notfound = notfound; /* if there is no access to hash keep rate steady */ metric = (dfnd + dnfnd)? 100 * (dsrch + dfnd + dnfnd) / (dfnd + dnfnd) : metric; CALC_RATE(min15_metric, metric, 15); CALC_RATE(min5_metric, metric, 5); CALC_RATE(min_metric, metric, 1); /* yes, timer delay is not accounted, but this stat is just estimational */ mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE)); } #ifdef CONFIG_NF_NAT_NEEDED #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31) static struct nf_ct_event_notifier *saved_event_cb __read_mostly = NULL; static int netflow_conntrack_event(const unsigned int events, struct nf_ct_event *item) #else static int netflow_conntrack_event(struct notifier_block *this, unsigned long events, void *ptr) #endif { #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31) struct nf_conn *ct = item->ct; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39) struct nf_conn_tstamp *tstamp = nf_conn_tstamp_find(ct); #endif #else struct nf_conn *ct = (struct nf_conn *)ptr; #endif struct nat_event *nel; const struct nf_conntrack_tuple *t; int ret = NOTIFY_DONE; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31) struct nf_ct_event_notifier *notifier; /* Call netlink first. */ notifier = rcu_dereference(saved_event_cb); if (likely(notifier)) ret = notifier->fcn(events, item); #endif if (unlikely(!natevents)) return ret; if (!(events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED) | (1 << IPCT_DESTROY)))) return ret; if (!(ct->status & IPS_NAT_MASK)) return ret; if (unlikely(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num != AF_INET || ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num != AF_INET)) { /* Well, there is no linux NAT for IPv6 anyway. */ return ret; } if (!(nel = kzalloc(sizeof(struct nat_event), GFP_ATOMIC))) { printk(KERN_ERR "ipt_NETFLOW: can't kmalloc nat event\n"); return ret; } nel->ts_jiffies = jiffies; t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; nel->protocol = t->dst.protonum; nel->pre.s_addr = t->src.u3.ip; nel->pre.d_addr = t->dst.u3.ip; nel->pre.s_port = t->src.u.all; nel->pre.d_port = t->dst.u.all; t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; /* reply is reversed */ nel->post.s_addr = t->dst.u3.ip; nel->post.d_addr = t->src.u3.ip; nel->post.s_port = t->dst.u.all; nel->post.d_port = t->src.u.all; if (events & (1 << IPCT_DESTROY)) { nel->nat_event = NAT_DESTROY; nat_events_stop++; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39) if (likely(tstamp)) nel->ts_ktime = ktime_set(0, tstamp->stop); #endif /* after 2.6.38 */ } else { nel->nat_event = NAT_CREATE; nat_events_start++; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39) if (likely(tstamp)) nel->ts_ktime = ktime_set(0, tstamp->start); #endif /* after 2.6.38 */ } if (ktime_to_ns(nel->ts_ktime) == 0) nel->ts_ktime = ktime_get_real(); spin_lock_bh(&nat_lock); list_add_tail(&nel->list, &nat_list); spin_unlock_bh(&nat_lock); return ret; } #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) static struct notifier_block ctnl_notifier = { .notifier_call = netflow_conntrack_event }; #else static struct nf_ct_event_notifier ctnl_notifier = { .fcn = netflow_conntrack_event }; #endif /* since 2.6.31 */ #endif /* CONFIG_NF_NAT_NEEDED */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) && \ LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) static bool #else static int #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) netflow_target_check(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) unsigned int targinfosize, #endif unsigned int hook_mask) { #else netflow_target_check(const struct xt_tgchk_param *par) { const char *tablename = par->table; const struct xt_target *target = par->target; #endif if (strcmp("nat", tablename) == 0) { /* In the nat table we only see single packet per flow, which is useless. */ printk(KERN_ERR "%s target: is not valid in %s table\n", target->name, tablename); return CHECK_FAIL; } if (target->family == AF_INET6 && protocol == 5) { printk(KERN_ERR "ip6tables NETFLOW target is meaningful for protocol 9 or 10 only.\n"); return CHECK_FAIL; } return CHECK_OK; } #define SetXBit(x) (0x8000 >> (x)) /* Proper bit for htons later. */ static inline __u16 observed_hdrs(const __u8 currenthdr) { switch (currenthdr) { case IPPROTO_TCP: case IPPROTO_UDP: /* For speed, in case switch is not optimized. */ return 0; case IPPROTO_DSTOPTS: return SetXBit(0); case IPPROTO_HOPOPTS: return SetXBit(1); case IPPROTO_ROUTING: return SetXBit(5); case IPPROTO_MH: return SetXBit(12); case IPPROTO_ESP: return SetXBit(13); case IPPROTO_AH: return SetXBit(14); case IPPROTO_COMP: return SetXBit(15); case IPPROTO_FRAGMENT: /* Handled elsewhere. */ /* Next is known headers. */ case IPPROTO_ICMPV6: case IPPROTO_UDPLITE: case IPPROTO_IPIP: case IPPROTO_PIM: case IPPROTO_GRE: case IPPROTO_SCTP: #ifdef IPPROTO_L2TP case IPPROTO_L2TP: #endif case IPPROTO_DCCP: return 0; } return SetXBit(3); /* Unknown header. */ } /* http://www.iana.org/assignments/ip-parameters/ip-parameters.xhtml */ static const __u8 ip4_opt_table[] = { [7] = 0, /* RR */ /* parsed manually because of 0 */ [134] = 1, /* CIPSO */ [133] = 2, /* E-SEC */ [68] = 3, /* TS */ [131] = 4, /* LSR */ [130] = 5, /* SEC */ [1] = 6, /* NOP */ [0] = 7, /* EOOL */ [15] = 8, /* ENCODE */ [142] = 9, /* VISA */ [205] = 10, /* FINN */ [12] = 11, /* MTUR */ [11] = 12, /* MTUP */ [10] = 13, /* ZSU */ [137] = 14, /* SSR */ [136] = 15, /* SID */ [151] = 16, /* DPS */ [150] = 17, /* NSAPA */ [149] = 18, /* SDB */ [147] = 19, /* ADDEXT */ [148] = 20, /* RTRALT */ [82] = 21, /* TR */ [145] = 22, /* EIP */ [144] = 23, /* IMITD */ [30] = 25, /* EXP */ [94] = 25, /* EXP */ [158] = 25, /* EXP */ [222] = 25, /* EXP */ [25] = 30, /* QS */ [152] = 31, /* UMP */ }; /* Parse IPv4 Options array int ipv4Options IPFIX value. */ static inline __u32 ip4_options(const u_int8_t *p, const unsigned int optsize) { __u32 ret = 0; unsigned int i; for (i = 0; likely(i < optsize); ) { u_int8_t op = p[i++]; if (op == 7) /* RR: bit 0 */ ret |= 1; else if (likely(op < ARRAY_SIZE(ip4_opt_table))) { /* Btw, IANA doc is messed up in a crazy way: * http://www.ietf.org/mail-archive/web/ipfix/current/msg06008.html (2011) * I decided to follow IANA _text_ description from * http://www.iana.org/assignments/ipfix/ipfix.xhtml (2013-09-18) * * Set proper bit for htonl later. */ if (ip4_opt_table[op]) ret |= 1 << (32 - ip4_opt_table[op]); } if (likely(i >= optsize || op == 0)) break; else if (unlikely(op == 1)) continue; else if (unlikely(p[i] < 2)) break; else i += p[i] - 1; } return ret; } #define TCPHDR_MAXSIZE (4 * 15) /* List of options: http://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml */ static inline __u32 tcp_options(const struct sk_buff *skb, const unsigned int ptr, const struct tcphdr *th) { const unsigned int optsize = th->doff * 4 - sizeof(struct tcphdr); __u8 _opt[TCPHDR_MAXSIZE]; const u_int8_t *p; __u32 ret; unsigned int i; p = skb_header_pointer(skb, ptr + sizeof(struct tcphdr), optsize, _opt); if (unlikely(!p)) return 0; ret = 0; for (i = 0; likely(i < optsize); ) { u_int8_t opt = p[i++]; if (likely(opt < 32)) { /* IANA doc is messed up, see above. */ ret |= 1 << (32 - opt); } if (likely(i >= optsize || opt == 0)) break; else if (unlikely(opt == 1)) continue; else if (unlikely(p[i] < 2)) /* "silly options" */ break; else i += p[i] - 1; } return ret; } /* check if data region is in header boundary */ inline static int skb_in_header(const struct sk_buff *skb, const void *ptr, size_t off) { return ((unsigned char *)ptr + off) <= skb->data; } static inline int eth_p_vlan(__be16 eth_type) { return eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD); } /* Extract all L2 header data, currently (in iptables) skb->data is * pointing to network_header, so we use mac_header instead. */ /* Parse eth header, then vlans, then mpls. */ static void parse_l2_header(const struct sk_buff *skb, struct ipt_netflow_tuple *tuple) { #if defined(ENABLE_MAC) || defined(ENABLE_VLAN) || defined(MPLS_DEPTH) #define ENABLE_L2 unsigned char *mac_header = skb_mac_header(skb); # if defined(ENABLE_VLAN) || defined(MPLS_DEPTH) unsigned int hdr_depth; __be16 proto; # endif # ifdef ENABLE_VLAN int tag_num = 0; /* get vlan tag that is saved in skb->vlan_tci */ if (vlan_tx_tag_present(skb)) tuple->tag[tag_num++] = htons(vlan_tx_tag_get(skb)); else if (skb->dev && is_vlan_dev(skb->dev)) { struct net_device *vlan_dev = skb->dev; # ifdef HAVE_VLAN_DEV_PRIV struct vlan_dev_priv *vlan = vlan_dev_priv(vlan_dev); /* `if` condition is `#if`ed intentionally, and this is * just inversion of conditional from vlan_do_receive */ if (!(vlan && !(vlan->flags & VLAN_FLAG_REORDER_HDR) # if LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0) && !netif_is_macvlan_port(vlan_dev) && !netif_is_bridge_port(vlan_dev) # endif )) # endif tuple->tag[tag_num++] = htons(vlan_dev_vlan_id(vlan_dev)); } # endif if (mac_header < skb->head || mac_header + ETH_HLEN > skb->data) return; # ifdef ENABLE_MAC memcpy(&tuple->h_dst, eth_hdr(skb)->h_dest, ETH_ALEN); memcpy(&tuple->h_src, eth_hdr(skb)->h_source, ETH_ALEN); # endif # if defined(ENABLE_VLAN) || defined(MPLS_DEPTH) hdr_depth = ETH_HLEN; proto = eth_hdr(skb)->h_proto; if (eth_p_vlan(proto)) { do { const struct vlan_hdr *vh; vh = (struct vlan_hdr *)(mac_header + hdr_depth); if (!skb_in_header(skb, vh, VLAN_HLEN)) return; proto = vh->h_vlan_encapsulated_proto; # ifdef ENABLE_VLAN if (tag_num < MAX_VLAN_TAGS) tuple->tag[tag_num++] = vh->h_vlan_TCI; # endif hdr_depth += VLAN_HLEN; } while (eth_p_vlan(proto)); } # ifdef MPLS_DEPTH if (eth_p_mpls(proto)) { const struct mpls_label *mpls; int label_num = 0; do { mpls = (struct mpls_label *)(mac_header + hdr_depth); if (!skb_in_header(skb, mpls, MPLS_HLEN)) return; if (label_num < MPLS_DEPTH) tuple->mpls[label_num++] = mpls->entry; hdr_depth += MPLS_HLEN; } while (!(mpls->entry & htonl(MPLS_LS_S_MASK))); } # endif # endif /* defined(ENABLE_VLAN) || defined(MPLS_DEPTH) */ #endif /* defined(ENABLE_MAC) || defined(ENABLE_VLAN) || defined(MPLS_DEPTH) */ } /* packet receiver */ static unsigned int netflow_target( #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) struct sk_buff **pskb, #else struct sk_buff *skb, #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) const struct net_device *if_in, const struct net_device *if_out, unsigned int hooknum, # if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17) const struct xt_target *target, # endif # if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) const void *targinfo, void *userinfo # else const void *targinfo # endif #else /* since 2.6.28 */ # define if_in xt_in(par) # define if_out xt_out(par) # if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) const struct xt_target_param *par # else const struct xt_action_param *par # endif #endif ) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) # ifndef ENABLE_L2 /* pskb_may_pull() may modify skb */ const # endif struct sk_buff *skb = *pskb; #endif union { struct iphdr ip; struct ipv6hdr ip6; } _iph, *iph; u_int32_t hash; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) const int family = target->family; #else # ifdef ENABLE_DIRECTION const int hooknum = xt_hooknum(par); # endif const int family = xt_family(par); #endif struct ipt_netflow_tuple tuple; struct ipt_netflow *nf; __u8 tcp_flags; #ifdef ENABLE_AGGR struct netflow_aggr_n *aggr_n; struct netflow_aggr_p *aggr_p; #endif __u8 s_mask, d_mask; unsigned int ptr; int fragment; size_t pkt_len; int options = 0; int tcpoptions = 0; struct stripe_entry *stripe; if (unlikely( #ifdef ENABLE_L2 /* to ensure that full L2 headers are present */ unlikely(!pskb_may_pull(skb, 0)) || #endif !(iph = skb_header_pointer(skb, 0, (likely(family == AF_INET))? sizeof(_iph.ip) : sizeof(_iph.ip6), &iph)))) { NETFLOW_STAT_INC(truncated); NETFLOW_STAT_INC(pkt_drop); NETFLOW_STAT_ADD(traf_drop, skb->len); NETFLOW_STAT_TS(drop); return IPT_CONTINUE; } memset(&tuple, 0, sizeof(tuple)); tuple.l3proto = family; #ifdef ENABLE_PHYSDEV_OVER if (nf_bridge_info_get(skb) && nf_bridge_info_get(skb)->physindev) tuple.i_ifc = nf_bridge_info_get(skb)->physindev->ifindex; else /* FALLTHROUGH */ #endif tuple.i_ifc = if_in? if_in->ifindex : -1; tcp_flags = 0; s_mask = 0; d_mask = 0; parse_l2_header(skb, &tuple); if (likely(family == AF_INET)) { tuple.src = (union nf_inet_addr){ .ip = iph->ip.saddr }; tuple.dst = (union nf_inet_addr){ .ip = iph->ip.daddr }; tuple.tos = iph->ip.tos; tuple.protocol = iph->ip.protocol; fragment = unlikely(iph->ip.frag_off & htons(IP_OFFSET)); ptr = iph->ip.ihl * 4; pkt_len = ntohs(iph->ip.tot_len); #define IPHDR_MAXSIZE (4 * 15) if (unlikely(iph->ip.ihl * 4 > sizeof(struct iphdr))) { u_int8_t _opt[IPHDR_MAXSIZE - sizeof(struct iphdr)]; const u_int8_t *op; unsigned int optsize = iph->ip.ihl * 4 - sizeof(struct iphdr); op = skb_header_pointer(skb, sizeof(struct iphdr), optsize, _opt); if (likely(op)) options = ip4_options(op, optsize); } } else { /* AF_INET6 */ __u8 currenthdr; tuple.src.in6 = iph->ip6.saddr; tuple.dst.in6 = iph->ip6.daddr; tuple.tos = iph->ip6.priority; fragment = 0; ptr = sizeof(struct ipv6hdr); pkt_len = ntohs(iph->ip6.payload_len) + sizeof(struct ipv6hdr); currenthdr = iph->ip6.nexthdr; while (currenthdr != NEXTHDR_NONE && ipv6_ext_hdr(currenthdr)) { struct ipv6_opt_hdr _hdr; const struct ipv6_opt_hdr *hp; unsigned int hdrlen = 0; options |= observed_hdrs(currenthdr); hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); if (hp == NULL) { /* We have src/dst, so must account something. */ tuple.protocol = currenthdr; fragment = 3; goto do_protocols; } switch (currenthdr) { case IPPROTO_FRAGMENT: { struct frag_hdr _fhdr; const struct frag_hdr *fh; fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), &_fhdr); if (fh == NULL) { tuple.protocol = currenthdr; fragment = 2; goto do_protocols; } fragment = 1; #define FRA0 SetXBit(4) /* Fragment header - first fragment */ #define FRA1 SetXBit(6) /* Fragmentation header - not first fragment */ options |= (ntohs(fh->frag_off) & 0xFFF8)? FRA1 : FRA0; hdrlen = 8; break; } case IPPROTO_AH: { struct ip_auth_hdr _ahdr, *ap; if (likely(ap = skb_header_pointer(skb, ptr, 8, &_ahdr))) SAVE_SPI(tuple, ap->spi); hdrlen = (ap->hdrlen + 2) << 2; break; } case IPPROTO_ESP: /* After this header everything is encrypted. */ tuple.protocol = currenthdr; goto do_protocols; default: hdrlen = ipv6_optlen(hp); } currenthdr = hp->nexthdr; ptr += hdrlen; } tuple.protocol = currenthdr; options |= observed_hdrs(currenthdr); } do_protocols: if (fragment) { /* if conntrack is enabled it should defrag on pre-routing and local-out */ NETFLOW_STAT_INC(frags); } else { switch (tuple.protocol) { case IPPROTO_TCP: { struct tcphdr _hdr, *hp; if (likely(hp = skb_header_pointer(skb, ptr, 14, &_hdr))) { tuple.s_port = hp->source; tuple.d_port = hp->dest; tcp_flags = (u_int8_t)(ntohl(tcp_flag_word(hp)) >> 16); if (unlikely(hp->doff * 4 > sizeof(struct tcphdr))) tcpoptions = tcp_options(skb, ptr, hp); } break; } case IPPROTO_UDP: case IPPROTO_UDPLITE: case IPPROTO_SCTP: { struct udphdr _hdr, *hp; if (likely(hp = skb_header_pointer(skb, ptr, 4, &_hdr))) { tuple.s_port = hp->source; tuple.d_port = hp->dest; } break; } case IPPROTO_ICMP: { struct icmphdr _hdr, *hp; if (likely(family == AF_INET) && likely(hp = skb_header_pointer(skb, ptr, 2, &_hdr))) tuple.d_port = htons((hp->type << 8) | hp->code); break; } case IPPROTO_ICMPV6: { struct icmp6hdr _icmp6h, *ic; if (likely(family == AF_INET6) && likely(ic = skb_header_pointer(skb, ptr, 2, &_icmp6h))) tuple.d_port = htons((ic->icmp6_type << 8) | ic->icmp6_code); break; } case IPPROTO_IGMP: { struct igmphdr _hdr, *hp; if (likely(hp = skb_header_pointer(skb, ptr, 1, &_hdr))) tuple.d_port = hp->type; break; } case IPPROTO_AH: { /* IPSEC */ struct ip_auth_hdr _hdr, *hp; /* This is for IPv4 only. IPv6 it's parsed above. */ if (likely(family == AF_INET) && likely(hp = skb_header_pointer(skb, ptr, 8, &_hdr))) SAVE_SPI(tuple, hp->spi); break; } case IPPROTO_ESP: { struct ip_esp_hdr _hdr, *hp; /* This is for both IPv4 and IPv6. */ if (likely(hp = skb_header_pointer(skb, ptr, 4, &_hdr))) SAVE_SPI(tuple, hp->spi); break; } } } /* not fragmented */ #ifdef ENABLE_AGGR /* aggregate networks */ read_lock(&aggr_lock); if (family == AF_INET) { list_for_each_entry(aggr_n, &aggr_n_list, list) if (unlikely((ntohl(tuple.src.ip) & aggr_n->mask) == aggr_n->addr)) { tuple.src.ip &= htonl(aggr_n->aggr_mask); s_mask = aggr_n->prefix; atomic_inc(&aggr_n->usage); break; } list_for_each_entry(aggr_n, &aggr_n_list, list) if (unlikely((ntohl(tuple.dst.ip) & aggr_n->mask) == aggr_n->addr)) { tuple.dst.ip &= htonl(aggr_n->aggr_mask); d_mask = aggr_n->prefix; atomic_inc(&aggr_n->usage); break; } } if (tuple.protocol == IPPROTO_TCP || tuple.protocol == IPPROTO_UDP || tuple.protocol == IPPROTO_SCTP || tuple.protocol == IPPROTO_UDPLITE) { /* aggregate ports */ list_for_each_entry(aggr_p, &aggr_p_list, list) if (unlikely(ntohs(tuple.s_port) >= aggr_p->port1 && ntohs(tuple.s_port) <= aggr_p->port2)) { tuple.s_port = htons(aggr_p->aggr_port); atomic_inc(&aggr_p->usage); break; } list_for_each_entry(aggr_p, &aggr_p_list, list) if (unlikely(ntohs(tuple.d_port) >= aggr_p->port1 && ntohs(tuple.d_port) <= aggr_p->port2)) { tuple.d_port = htons(aggr_p->aggr_port); atomic_inc(&aggr_p->usage); break; } } read_unlock(&aggr_lock); #endif #ifdef SAMPLING_HASH hash = __hash_netflow(&tuple); { struct sampling hs = samp; if (hs.mode == SAMPLER_HASH) { NETFLOW_STAT_INC(pkts_observed); if ((u32)(((u64)hash * hs.interval) >> 32)) return IPT_CONTINUE; NETFLOW_STAT_INC(pkts_selected); } } hash %= htable_size; #else /* !SAMPLING_HASH */ hash = hash_netflow(&tuple); #endif read_lock_bh(&htable_rwlock); stripe = &htable_stripes[hash & LOCK_COUNT_MASK]; spin_lock(&stripe->lock); /* record */ nf = ipt_netflow_find(&tuple, hash); if (unlikely(!nf)) { struct rtable *rt; if (unlikely(maxflows > 0 && atomic_read(&ipt_netflow_count) >= maxflows)) { /* This is DOS attack prevention */ NETFLOW_STAT_INC(maxflows_err); NETFLOW_STAT_INC(pkt_drop); NETFLOW_STAT_ADD(traf_drop, pkt_len); NETFLOW_STAT_TS(drop); goto unlock_return; } nf = ipt_netflow_alloc(&tuple); if (unlikely(!nf || IS_ERR(nf))) { NETFLOW_STAT_INC(alloc_err); NETFLOW_STAT_INC(pkt_drop); NETFLOW_STAT_ADD(traf_drop, pkt_len); NETFLOW_STAT_TS(drop); goto unlock_return; } hlist_add_head(&nf->hlist, &htable[hash]); #ifdef ENABLE_SAMPLER /* I only increment if deterministic sampler is enabled to * avoid cache conflict by default. */ if (get_sampler_mode() == SAMPLER_DETERMINISTIC) nf->sampler_count = atomic_inc_return(&flow_count); #endif nf->nf_ts_first = jiffies; nf->tcp_flags = tcp_flags; nf->o_ifc = if_out? if_out->ifindex : -1; #ifdef ENABLE_PHYSDEV_OVER if (nf_bridge_info_get(skb) && nf_bridge_info_get(skb)->physoutdev) nf->o_ifc = nf_bridge_info_get(skb)->physoutdev->ifindex; #endif #ifdef SNMP_RULES rcu_read_lock(); #else # define resolve_snmp(dev) ((dev)? (dev)->ifindex : -1) #endif /* copy and snmp-resolve device with physdev overriding normal dev */ #define copy_dev(out, physdev, dev) \ if (nf_bridge_info_get(skb) && nf_bridge_info_get(skb)->physdev) \ out = resolve_snmp(nf_bridge_info_get(skb)->physdev); \ else \ out = resolve_snmp(dev); #ifdef ENABLE_PHYSDEV copy_dev(nf->o_ifphys, physoutdev, if_out); copy_dev(nf->i_ifphys, physindev, if_in); #endif #ifdef SNMP_RULES # ifdef ENABLE_PHYSDEV_OVER copy_dev(nf->o_ifcr, physoutdev, if_out); copy_dev(nf->i_ifcr, physindev, if_in); # else nf->o_ifcr = resolve_snmp(if_out); nf->i_ifcr = resolve_snmp(if_in); # endif rcu_read_unlock(); #endif nf->s_mask = s_mask; nf->d_mask = d_mask; #if defined(ENABLE_MAC) || defined(ENABLE_VLAN) nf->ethernetType = skb->protocol; #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) rt = (struct rtable *)skb->dst; #else /* since 2.6.26 */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) rt = skb->rtable; #else /* since 2.6.31 */ rt = skb_rtable(skb); #endif #endif #ifdef ENABLE_DIRECTION nf->hooknumx = hooknum + 1; #endif if (likely(family == AF_INET)) { #if LINUX_VERSION_CODE < KERNEL_VERSION(5,2,0) if (rt) nf->nh.ip = rt->rt_gateway; #else if (rt && rt->rt_gw_family == AF_INET) nf->nh.ip = rt->rt_gw4; #endif } else { if (rt) nf->nh.in6 = ((struct rt6_info *)rt)->rt6i_gateway; nf->flow_label = (iph->ip6.flow_lbl[0] << 16) | (iph->ip6.flow_lbl[1] << 8) | (iph->ip6.flow_lbl[2]); } #if 0 if (unlikely(debug > 2)) printk(KERN_INFO "ipt_NETFLOW: new (%u) %hd:%hd SRC=%u.%u.%u.%u:%u DST=%u.%u.%u.%u:%u\n", atomic_read(&ipt_netflow_count), tuple.i_ifc, nf->o_ifc, NIPQUAD(tuple.src.ip), ntohs(tuple.s_port), NIPQUAD(tuple.dst.ip), ntohs(tuple.d_port)); #endif } nf->nr_packets++; nf->nr_bytes += pkt_len; nf->nf_ts_last = jiffies; nf->tcp_flags |= tcp_flags; nf->options |= options; if (tuple.protocol == IPPROTO_TCP) nf->tcpoptions |= tcpoptions; NETFLOW_STAT_INC(pkt_total); NETFLOW_STAT_ADD(traf_total, pkt_len); #define LIST_IS_NULL(name) (!(name)->next) if (unlikely(active_needs_export(nf, active_timeout * HZ, jiffies))) { /* ok, if this is active flow to be exported */ #ifdef HAVE_LLIST /* delete from hash and add to the export llist */ hlist_del(&nf->hlist); if (!LIST_IS_NULL(&nf->flows_list)) list_del(&nf->flows_list); llist_add(&nf->flows_llnode, &export_llist); #else /* bubble it to the tail */ if (LIST_IS_NULL(&nf->flows_list)) list_add_tail(&nf->flows_list, &stripe->list); else list_move_tail(&nf->flows_list, &stripe->list); #endif /* Blog: I thought about forcing timer to wake up sooner if we have * enough exportable flows, but in fact this doesn't have much sense, * because this would only move flow data from one memory to another * (from our buffers to socket buffers, and socket buffers even have * limited size). But yes, this is disputable. */ } else { /* most recently accessed flows go to the head, old flows remain at the tail */ if (LIST_IS_NULL(&nf->flows_list)) list_add(&nf->flows_list, &stripe->list); else list_move(&nf->flows_list, &stripe->list); } unlock_return: spin_unlock(&stripe->lock); read_unlock_bh(&htable_rwlock); return IPT_CONTINUE; } #ifdef CONFIG_NF_NAT_NEEDED #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31) /* Below 2.6.31 we don't need to handle callback chain manually. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0) #define NET_STRUCT struct net *net #define NET_ARG net, #define nf_conntrack_event_cb net->ct.nf_conntrack_event_cb #else #define NET_STRUCT void #define NET_ARG #endif static int set_notifier_cb(NET_STRUCT) { struct nf_ct_event_notifier *notifier; notifier = rcu_dereference(nf_conntrack_event_cb); if (notifier == NULL) { /* Polite mode. */ nf_conntrack_register_notifier(NET_ARG &ctnl_notifier); } else if (notifier != &ctnl_notifier) { if (!saved_event_cb) saved_event_cb = notifier; else if (saved_event_cb != notifier) printk(KERN_ERR "natevents_net_init: %p != %p (report error.)\n", saved_event_cb, notifier); rcu_assign_pointer(nf_conntrack_event_cb, &ctnl_notifier); } else printk(KERN_ERR "ipt_NETFLOW: natevents already enabled.\n"); return 0; } static void unset_notifier_cb(NET_STRUCT) { struct nf_ct_event_notifier *notifier; notifier = rcu_dereference(nf_conntrack_event_cb); if (notifier == &ctnl_notifier) { if (saved_event_cb == NULL) nf_conntrack_unregister_notifier(NET_ARG &ctnl_notifier); else rcu_assign_pointer(nf_conntrack_event_cb, saved_event_cb); } else printk(KERN_ERR "ipt_NETFLOW: natevents already disabled.\n"); } #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0) #undef nf_conntrack_event_cb static struct pernet_operations natevents_net_ops = { .init = set_notifier_cb, .exit = unset_notifier_cb }; #endif #endif /* since 2.6.31 */ static DEFINE_MUTEX(events_lock); static struct module *netlink_m; /* Both functions may be called multiple times. */ static void register_ct_events(void) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31) #define NETLINK_M "nf_conntrack_netlink" #endif printk(KERN_INFO "ipt_NETFLOW: enable natevents.\n"); mutex_lock(&events_lock); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31) /* Pre-load netlink module who will be first notifier * user, and then hijack nf_conntrack_event_cb from it. */ if ( #if LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) !rcu_dereference(nf_conntrack_event_cb) || #endif !find_module(NETLINK_M)) { printk("Loading " NETLINK_M "\n"); request_module(NETLINK_M); } /* Reference netlink module to prevent it's unsafe unload before us. */ if (!netlink_m && (netlink_m = find_module(NETLINK_M))) { #if LINUX_VERSION_CODE < KERNEL_VERSION(5,9,0) use_module(THIS_MODULE, netlink_m); #else if (!try_module_get(netlink_m)) netlink_m = NULL; #endif } /* Register ct events callback. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0) register_pernet_subsys(&natevents_net_ops); #else set_notifier_cb(); #endif #else /* below v2.6.31 */ if (!natevents && nf_conntrack_register_notifier(&ctnl_notifier) < 0) printk(KERN_ERR "Can't register conntrack notifier, natevents disabled.\n"); else #endif natevents = 1; mutex_unlock(&events_lock); } static void unregister_ct_events(void) { printk(KERN_INFO "ipt_NETFLOW: disable natevents.\n"); mutex_lock(&events_lock); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31) #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0) unregister_pernet_subsys(&natevents_net_ops); #else /* < v3.2 */ unset_notifier_cb(); #endif /* v3.2 */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,9,0) module_put(netlink_m); netlink_m = NULL; #endif rcu_assign_pointer(saved_event_cb, NULL); #else /* < v2.6.31 */ nf_conntrack_unregister_notifier(&ctnl_notifier); #endif natevents = 0; mutex_unlock(&events_lock); } #endif /* CONFIG_NF_NAT_NEEDED */ static struct ipt_target ipt_netflow_reg[] __read_mostly = { { .name = "NETFLOW", .target = netflow_target, .checkentry = netflow_target_check, .family = AF_INET, .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING), .me = THIS_MODULE }, { .name = "NETFLOW", .target = netflow_target, .checkentry = netflow_target_check, .family = AF_INET6, .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING), .me = THIS_MODULE }, }; #ifdef CONFIG_PROC_FS static int register_stat(const char *name, # ifdef HAVE_PROC_OPS struct proc_ops *fops # else struct file_operations *fops # endif ) { struct proc_dir_entry *proc_stat; printk(KERN_INFO "netflow: registering: /proc/net/stat/%s\n", name); # if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) proc_stat = create_proc_entry(name, S_IRUGO, INIT_NET(proc_net_stat)); # else proc_stat = proc_create(name, S_IRUGO, INIT_NET(proc_net_stat), fops); # endif if (!proc_stat) { printk(KERN_ERR "Unable to create /proc/net/stat/%s entry\n", name); return 0; } # if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) proc_stat->proc_fops = fops; # endif # if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) proc_stat->owner = THIS_MODULE; # endif printk(KERN_INFO "netflow: registered: /proc/net/stat/%s\n", name); return 1; } #else # define register_stat(x, y) 1 #endif static int __init ipt_netflow_init(void) { int i; printk(KERN_INFO "ipt_NETFLOW version %s, srcversion %s\n", IPT_NETFLOW_VERSION, THIS_MODULE->srcversion); version_string_size = scnprintf(version_string, sizeof(version_string), "ipt_NETFLOW " IPT_NETFLOW_VERSION " %s", THIS_MODULE->srcversion); tpl_element_sizes[observationDomainName] = version_string_size + 1; start_ts.first = ktime_get_real(); clear_ipt_netflow_stat(); if (!hashsize) { /* use 1/1024 of memory, 1M for hash table on 1G box */ unsigned long memksize = (num_physpages << PAGE_SHIFT) / 1024; if (memksize > (5 * 1024 * 1024)) memksize = 5 * 1024 * 1024; hashsize = memksize / sizeof(struct hlist_head); } if (hashsize < LOCK_COUNT) hashsize = LOCK_COUNT; printk(KERN_INFO "ipt_NETFLOW: hashsize %u (%luK)\n", hashsize, hashsize * sizeof(struct hlist_head) / 1024); htable_size = hashsize; htable = alloc_hashtable(htable_size); if (!htable) { printk(KERN_ERR "Unable to create ipt_neflow_hash\n"); goto err; } #ifdef MPLS_DEPTH /* template_mpls is terminated on the MPLS_DEPTH mark, so, it * never send Element which can access mpls labels array above * its defined MPLS_DEPTH value. */ if (MPLS_DEPTH >= 0 && MPLS_DEPTH < 10) template_mpls.types[MPLS_LABELS_BASE_INDEX + MPLS_DEPTH] = 0; #endif for (i = 0; i < LOCK_COUNT; i++) { spin_lock_init(&htable_stripes[i].lock); INIT_LIST_HEAD(&htable_stripes[i].list); } ipt_netflow_cachep = kmem_cache_create("ipt_netflow", sizeof(struct ipt_netflow), 0, 0, NULL #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) , NULL #endif ); if (!ipt_netflow_cachep) { printk(KERN_ERR "Unable to create ipt_netflow slab cache\n"); goto err_free_hash; } if (!register_stat("ipt_netflow", &nf_seq_fops)) goto err_free_netflow_slab; if (!register_stat("ipt_netflow_snmp", &snmp_seq_fops)) goto err_free_proc_stat1; if (!register_stat("ipt_netflow_flows", &flows_seq_fops)) goto err_free_proc_stat2; #ifdef CONFIG_SYSCTL ctl_table_renumber(netflow_sysctl_table); #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) netflow_sysctl_header = register_sysctl_table(netflow_net_table #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) , 0 /* insert_at_head */ #endif ); #else /* 2.6.25 */ netflow_sysctl_header = register_sysctl_paths(netflow_sysctl_path, netflow_sysctl_table); #endif if (!netflow_sysctl_header) { printk(KERN_ERR "netflow: can't register to sysctl\n"); goto err_free_proc_stat3; } else printk(KERN_INFO "netflow: registered: sysctl net.netflow\n"); #endif if (!destination) destination = destination_buf; if (destination != destination_buf) { strlcpy(destination_buf, destination, sizeof(destination_buf)); destination = destination_buf; } if (add_destinations(destination) < 0) goto err_free_sysctl; #ifdef ENABLE_AGGR if (!aggregation) aggregation = aggregation_buf; if (aggregation != aggregation_buf) { strlcpy(aggregation_buf, aggregation, sizeof(aggregation_buf)); aggregation = aggregation_buf; } add_aggregation(aggregation); #endif #ifdef ENABLE_SAMPLER if (!sampler) sampler = sampler_buf; if (sampler != sampler_buf) { strlcpy(sampler_buf, sampler, sizeof(sampler_buf)); sampler = sampler_buf; } parse_sampler(sampler); #ifdef SAMPLING_HASH hash_seed = prandom_u32(); #endif #endif #ifdef ENABLE_RANDOM_TEMPLATE_IDS template_ids = FLOWSET_DATA_FIRST | prandom_u32_max(0x00010000); #endif #ifdef SNMP_RULES if (!snmp_rules) snmp_rules = snmp_rules_buf; if (snmp_rules != snmp_rules_buf) { strlcpy(snmp_rules_buf, snmp_rules, sizeof(snmp_rules_buf)); snmp_rules = snmp_rules_buf; } add_snmp_rules(snmp_rules); #endif #ifdef ENABLE_PROMISC { int newpromisc = promisc; promisc = 0; switch_promisc(newpromisc); } #endif netflow_switch_version(protocol); _schedule_scan_worker(0); timer_setup(&rate_timer, rate_timer_calc, 0); mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE)); peakflows_at = jiffies; if (xt_register_targets(ipt_netflow_reg, ARRAY_SIZE(ipt_netflow_reg))) goto err_stop_timer; #ifdef CONFIG_NF_NAT_NEEDED if (natevents) register_ct_events(); #endif printk(KERN_INFO "ipt_NETFLOW is loaded.\n"); return 0; err_stop_timer: _unschedule_scan_worker(); netflow_scan_and_export(AND_FLUSH); del_timer_sync(&rate_timer); free_templates(); destination_removeall(); #ifdef ENABLE_AGGR aggregation_remove(&aggr_n_list); aggregation_remove(&aggr_p_list); #endif err_free_sysctl: #ifdef CONFIG_SYSCTL unregister_sysctl_table(netflow_sysctl_header); #endif err_free_proc_stat3: #ifdef CONFIG_PROC_FS remove_proc_entry("ipt_netflow_flows", INIT_NET(proc_net_stat)); err_free_proc_stat2: remove_proc_entry("ipt_netflow_snmp", INIT_NET(proc_net_stat)); err_free_proc_stat1: remove_proc_entry("ipt_netflow", INIT_NET(proc_net_stat)); err_free_netflow_slab: #endif kmem_cache_destroy(ipt_netflow_cachep); err_free_hash: vfree(htable); err: printk(KERN_INFO "ipt_NETFLOW is not loaded.\n"); return -ENOMEM; } static void __exit ipt_netflow_fini(void) { printk(KERN_INFO "ipt_NETFLOW unloading..\n"); #ifdef CONFIG_SYSCTL unregister_sysctl_table(netflow_sysctl_header); #endif #ifdef CONFIG_PROC_FS remove_proc_entry("ipt_netflow_flows", INIT_NET(proc_net_stat)); remove_proc_entry("ipt_netflow_snmp", INIT_NET(proc_net_stat)); remove_proc_entry("ipt_netflow", INIT_NET(proc_net_stat)); #endif #ifdef ENABLE_PROMISC switch_promisc(0); #endif xt_unregister_targets(ipt_netflow_reg, ARRAY_SIZE(ipt_netflow_reg)); #ifdef CONFIG_NF_NAT_NEEDED if (natevents) unregister_ct_events(); #endif _unschedule_scan_worker(); netflow_scan_and_export(AND_FLUSH); del_timer_sync(&rate_timer); #ifdef HAVE_SYNCHRONIZE_SCHED synchronize_sched(); #else synchronize_rcu(); #endif free_templates(); destination_removeall(); #ifdef ENABLE_AGGR aggregation_remove(&aggr_n_list); aggregation_remove(&aggr_p_list); #endif #ifdef SNMP_RULES kfree(snmp_ruleset); #endif kmem_cache_destroy(ipt_netflow_cachep); vfree(htable); printk(KERN_INFO "ipt_NETFLOW unloaded.\n"); } module_init(ipt_netflow_init); module_exit(ipt_netflow_fini); /* vim: set sw=8: */ ipt-netflow-2.6/ipt_NETFLOW.h000066400000000000000000000355021404773755400160210ustar00rootroot00000000000000/* SPDX-License-Identifier: GPL-2.0-only * * This file is part of NetFlow exporting module. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * */ #ifndef _IPT_NETFLOW_H #define _IPT_NETFLOW_H /* * Some tech info: * http://www.cisco.com/en/US/products/ps6601/prod_white_papers_list.html * http://www.cisco.com/en/US/products/sw/netmgtsw/ps1964/products_implementation_design_guide09186a00800d6a11.html */ #define NETFLOW5_RECORDS_MAX 30 struct netflow5_record { __be32 s_addr; __be32 d_addr; __be32 nexthop; __be16 i_ifc; __be16 o_ifc; __be32 nr_packets; __be32 nr_octets; __be32 first_ms; __be32 last_ms; __be16 s_port; __be16 d_port; __u8 reserved; __u8 tcp_flags; __u8 protocol; __u8 tos; __be16 s_as; __be16 d_as; __u8 s_mask; __u8 d_mask; __u16 padding; } __attribute__ ((packed)); /* NetFlow v5 packet */ struct netflow5_pdu { __be16 version; __be16 nr_records; __be32 ts_uptime; /* ms */ __be32 ts_usecs; /* s */ __be32 ts_unsecs; /* ns */ __be32 seq; __u8 eng_type; __u8 eng_id; __u16 sampling; struct netflow5_record flow[NETFLOW5_RECORDS_MAX]; } __attribute__ ((packed)); #define NETFLOW5_HEADER_SIZE (sizeof(struct netflow5_pdu) - NETFLOW5_RECORDS_MAX * sizeof(struct netflow5_record)) #define IF_NAME_SZ IFNAMSIZ #define IF_DESC_SZ 32 /* NetFlow v9 http://www.ietf.org/rfc/rfc3954.txt */ /* IPFIX http://www.iana.org/assignments/ipfix/ipfix.xhtml */ /* v9 elements are uppercased, IPFIX camel cased. */ #define one(id, name, len) name = id, #define two(id, a, b, len) \ one(id, a, len) \ one(id, b, len) #define Elements \ two(1, IN_BYTES, octetDeltaCount, 4) \ two(2, IN_PKTS, packetDeltaCount, 4) \ two(4, PROTOCOL, protocolIdentifier, 1) \ two(5, TOS, ipClassOfService, 1) \ two(6, TCP_FLAGS, tcpControlBits, 1) \ two(7, L4_SRC_PORT, sourceTransportPort, 2) \ two(8, IPV4_SRC_ADDR, sourceIPv4Address, 4) \ two(9, SRC_MASK, sourceIPv4PrefixLength, 1) \ two(10, INPUT_SNMP, ingressInterface, 2) \ two(11, L4_DST_PORT, destinationTransportPort, 2) \ two(12, IPV4_DST_ADDR, destinationIPv4Address, 4) \ two(13, DST_MASK, destinationIPv4PrefixLength, 1) \ two(14, OUTPUT_SNMP, egressInterface, 2) \ two(15, IPV4_NEXT_HOP, ipNextHopIPv4Address, 4) \ two(21, LAST_SWITCHED, flowEndSysUpTime, 4) \ two(22, FIRST_SWITCHED, flowStartSysUpTime, 4) \ one(25, minimumIpTotalLength, 2) \ one(26, maximumIpTotalLength, 2) \ two(27, IPV6_SRC_ADDR, sourceIPv6Address, 16) \ two(28, IPV6_DST_ADDR, destinationIPv6Address, 16) \ two(31, IPV6_FLOW_LABEL, flowLabelIPv6, 3) \ two(32, ICMP_TYPE, icmpTypeCodeIPv4, 2) \ two(33, MUL_IGMP_TYPE, igmpType, 1) \ two(40, TOTAL_BYTES_EXP, exportedOctetTotalCount, 8) \ two(41, TOTAL_PKTS_EXP, exportedMessageTotalCount, 8) \ two(42, TOTAL_FLOWS_EXP, exportedFlowRecordTotalCount, 8) \ two(48, FLOW_SAMPLER_ID, samplerId, 1) \ two(49, FLOW_SAMPLER_MODE, samplerMode, 1) \ two(50, FLOW_SAMPLER_RANDOM_INTERVAL, samplerRandomInterval, 2) \ one(52, minimumTTL, 1) \ one(53, maximumTTL, 1) \ two(56, SRC_MAC, sourceMacAddress, 6) \ two(57, DST_MAC, postDestinationMacAddress, 6) \ two(58, SRC_VLAN, vlanId, 2) \ two(60, IP_VERSION, ipVersion, 1) \ two(61, DIRECTION, flowDirection, 1) \ two(62, IPV6_NEXT_HOP, ipNextHopIPv6Address, 16) \ two(64, IPV6_OPTION_HEADERS, ipv6ExtensionHeaders, 2) \ two(70, MPLS_LABEL_1, mplsTopLabelStackSection, 3) \ two(71, MPLS_LABEL_2, mplsLabelStackSection2, 3) \ two(72, MPLS_LABEL_3, mplsLabelStackSection3, 3) \ two(73, MPLS_LABEL_4, mplsLabelStackSection4, 3) \ two(74, MPLS_LABEL_5, mplsLabelStackSection5, 3) \ two(75, MPLS_LABEL_6, mplsLabelStackSection6, 3) \ two(76, MPLS_LABEL_7, mplsLabelStackSection7, 3) \ two(77, MPLS_LABEL_8, mplsLabelStackSection8, 3) \ two(78, MPLS_LABEL_9, mplsLabelStackSection9, 3) \ two(79, MPLS_LABEL_10, mplsLabelStackSection10, 3) \ one(80, destinationMacAddress, 6) \ two(82, IF_NAME, interfaceName, IF_NAME_SZ) \ two(83, IF_DESC, interfaceDescription, IF_DESC_SZ) \ one(136, flowEndReason, 1) \ one(138, observationPointId, 4) \ one(139, icmpTypeCodeIPv6, 2) \ one(141, LineCardId, 4) \ one(142, portId, 4) \ one(143, meteringProcessId, 4) \ one(144, exportingProcessId, 4) \ one(145, TemplateId, 2) \ one(149, observationDomainId, 4) \ one(152, flowStartMilliseconds, 8) \ one(153, flowEndMilliseconds, 8) \ one(154, flowStartMicroseconds, 8) \ one(155, flowEndMicroseconds, 8) \ one(160, systemInitTimeMilliseconds, 8) \ one(163, observedFlowTotalCount, 8) \ one(164, ignoredPacketTotalCount, 8) \ one(165, ignoredOctetTotalCount, 8) \ one(166, notSentFlowTotalCount, 8) \ one(167, notSentPacketTotalCount, 8) \ one(168, notSentOctetTotalCount, 8) \ one(200, mplsTopLabelTTL, 1) \ one(201, mplsLabelStackLength, 1) \ one(202, mplsLabelStackDepth, 1) \ one(208, ipv4Options, 4) \ one(209, tcpOptions, 4) \ one(225, postNATSourceIPv4Address, 4) \ one(226, postNATDestinationIPv4Address, 4) \ one(227, postNAPTSourceTransportPort, 2) \ one(228, postNAPTDestinationTransportPort, 2) \ one(230, natEvent, 1) \ one(243, dot1qVlanId, 2) \ one(244, dot1qPriority, 1) \ one(245, dot1qCustomerVlanId, 2) \ one(246, dot1qCustomerPriority, 1) \ one(252, ingressPhysicalInterface, 2) \ one(253, egressPhysicalInterface, 2) \ one(256, ethernetType, 2) \ one(295, IPSecSPI, 4) \ one(300, observationDomainName, 128) \ one(302, selectorId, 1) \ one(309, samplingSize, 1) \ one(310, samplingPopulation, 2) \ one(318, selectorIdTotalPktsObserved, 8) \ one(319, selectorIdTotalPktsSelected, 8) \ one(323, observationTimeMilliseconds, 8) \ one(324, observationTimeMicroseconds, 8) \ one(325, observationTimeNanoseconds, 8) \ one(390, flowSelectorAlgorithm, 1) \ one(394, selectorIDTotalFlowsObserved, 8) \ one(395, selectorIDTotalFlowsSelected, 8) \ one(396, samplingFlowInterval, 1) \ one(397, samplingFlowSpacing, 2) enum { Elements }; #undef one #undef two enum { FLOWSET_TEMPLATE = 0, FLOWSET_OPTIONS = 1, IPFIX_TEMPLATE = 2, IPFIX_OPTIONS = 3, FLOWSET_DATA_FIRST = 256, }; enum { /* v9 scopes */ V9_SCOPE_SYSTEM = 1, V9_SCOPE_INTERFACE = 2, V9_SCOPE_LINECARD = 3, V9_SCOPE_CACHE = 4, V9_SCOPE_TEMPLATE = 5, }; struct flowset_template { __be16 flowset_id; __be16 length; /* (bytes) */ __be16 template_id; __be16 field_count; /* (items) */ } __attribute__ ((packed)); struct flowset_data { __be16 flowset_id; /* corresponds to template_id */ __be16 length; /* (bytes) */ } __attribute__ ((packed)); /* http://tools.ietf.org/html/rfc3954#section-6.1 */ struct flowset_opt_tpl_v9 { __be16 flowset_id; __be16 length; __be16 template_id; __be16 scope_len; /* (bytes) */ __be16 opt_len; /* (bytes) */ } __attribute__ ((packed)); /* http://tools.ietf.org/html/rfc5101#section-3.4.2.2 */ struct flowset_opt_tpl_ipfix { __be16 flowset_id; __be16 length; __be16 template_id; __be16 field_count; /* total (items) */ __be16 scope_count; /* (items) must not be zero */ } __attribute__ ((packed)); /* NetFlow v9 packet. */ struct netflow9_pdu { __be16 version; __be16 nr_records; /* (items) */ __be32 sys_uptime_ms; __be32 export_time_s; __be32 seq; __be32 source_id; /* Exporter Observation Domain */ __u8 data[1400]; } __attribute__ ((packed)); /* IPFIX packet. */ struct ipfix_pdu { __be16 version; __be16 length; /* (bytes) */ __be32 export_time_s; __be32 seq; __be32 odomain_id; /* Observation Domain ID */ __u8 data[1400]; } __attribute__ ((packed)); /* Maximum bytes flow can have, after it's reached flow will become * not searchable and will be exported soon. */ #define FLOW_FULL_WATERMARK 0xffefffff #define EXTRACT_SPI(tuple) ((tuple.s_port << 16) | tuple.d_port) #define SAVE_SPI(tuple, spi) { tuple.s_port = spi >> 16; \ tuple.d_port = spi; } #define MAX_VLAN_TAGS 2 /* hashed data which identify unique flow */ /* 16+16 + 2+2 + 2+1+1+1 = 41 */ struct ipt_netflow_tuple { union nf_inet_addr src; union nf_inet_addr dst; __be16 s_port; // Network byte order __be16 d_port; // -"- #ifdef MPLS_DEPTH __be32 mpls[MPLS_DEPTH]; /* Network byte order */ #endif __u16 i_ifc; // Host byte order #ifdef ENABLE_VLAN __be16 tag[MAX_VLAN_TAGS]; // Network byte order (outer tag first) #endif __u8 protocol; __u8 tos; __u8 l3proto; #ifdef ENABLE_MAC __u8 h_dst[ETH_ALEN]; __u8 h_src[ETH_ALEN]; #endif } __attribute__ ((packed)); /* hlist[2] + tuple[]: 8+8 + 41 = 57 (less than usual cache line, 64) */ struct ipt_netflow { struct hlist_node hlist; // hashtable search chain /* unique per flow data (hashed, NETFLOW_TUPLE_SIZE) */ struct ipt_netflow_tuple tuple; /* volatile data */ union nf_inet_addr nh; #if defined(ENABLE_MAC) || defined(ENABLE_VLAN) __be16 ethernetType; /* Network byte order */ #endif __u16 o_ifc; #ifdef ENABLE_PHYSDEV __u16 i_ifphys; __u16 o_ifphys; #endif #ifdef SNMP_RULES __u16 i_ifcr; /* translated interface numbers*/ __u16 o_ifcr; #endif __u8 s_mask; __u8 d_mask; __u8 tcp_flags; /* `OR' of all tcp flags */ __u8 flowEndReason; #ifdef ENABLE_DIRECTION __u8 hooknumx; /* hooknum + 1 */ #endif /* flow statistics */ u_int32_t nr_packets; u_int32_t nr_bytes; #ifdef ENABLE_SAMPLER unsigned int sampler_count; /* for deterministic sampler only */ #endif union { struct { unsigned long first; unsigned long last; } ts; ktime_t ts_obs; } _ts_un; #define nf_ts_first _ts_un.ts.first #define nf_ts_last _ts_un.ts.last #define nf_ts_obs _ts_un.ts_obs u_int32_t flow_label; /* IPv6 */ u_int32_t options; /* IPv4(16) & IPv6(32) Options */ u_int32_t tcpoptions; #ifdef CONFIG_NF_NAT_NEEDED __be32 s_as; __be32 d_as; struct nat_event *nat; #endif union { struct list_head list; /* all flows in ipt_netflow_list */ #ifdef HAVE_LLIST struct llist_node llnode; /* purged flows */ #endif } _flow_list; #define flows_list _flow_list.list #define flows_llnode _flow_list.llnode }; #ifdef CONFIG_NF_NAT_NEEDED enum { NAT_CREATE = 1, NAT_DESTROY = 2, NAT_POOLEXHAUSTED = 3 }; struct nat_event { struct list_head list; struct { __be32 s_addr; __be32 d_addr; __be16 s_port; __be16 d_port; } pre, post; ktime_t ts_ktime; unsigned long ts_jiffies; __u8 protocol; __u8 nat_event; }; #define IS_DUMMY_FLOW(nf) (nf->nat) #else #define IS_DUMMY_FLOW(nf) 0 #endif static inline int ipt_netflow_tuple_equal(const struct ipt_netflow_tuple *t1, const struct ipt_netflow_tuple *t2) { return (!memcmp(t1, t2, sizeof(struct ipt_netflow_tuple))); } struct ipt_netflow_sock { struct list_head list; struct socket *sock; struct sockaddr_storage addr; // destination struct sockaddr_storage saddr; // source char sdev[IFNAMSIZ]; // source device atomic_t wmem_peak; // sk_wmem_alloc peak value unsigned int err_connect; // connect errors unsigned int err_full; // socket filled error unsigned int err_other; // other socket errors unsigned int err_cberr; // async errors, icmp unsigned int pkt_exp; // pkts expoted to this dest u64 bytes_exp; // bytes -"- u64 bytes_exp_old; // for rate calculation unsigned int bytes_rate; // bytes per second unsigned int pkt_sent; // pkts sent to this dest unsigned int pkt_fail; // pkts failed to send to this dest }; struct netflow_aggr_n { struct list_head list; atomic_t usage; __u32 mask; __u32 addr; __u32 aggr_mask; __u8 prefix; }; struct netflow_aggr_p { struct list_head list; atomic_t usage; __u16 port1; __u16 port2; __u16 aggr_port; }; #define NETFLOW_STAT_INC(count) (__get_cpu_var(ipt_netflow_stat).count++) #define NETFLOW_STAT_ADD(count, val) (__get_cpu_var(ipt_netflow_stat).count += (unsigned long long)val) #define NETFLOW_STAT_SET(count, val) (__get_cpu_var(ipt_netflow_stat).count = (unsigned long long)val) #define NETFLOW_STAT_TS(count) \ do { \ ktime_t kts = ktime_get_real(); \ if (!(__get_cpu_var(ipt_netflow_stat)).count.first_tv64) \ __get_cpu_var(ipt_netflow_stat).count.first = kts; \ __get_cpu_var(ipt_netflow_stat).count.last = kts; \ } while (0); #define NETFLOW_STAT_INC_ATOMIC(count) \ do { \ preempt_disable(); \ (__get_cpu_var(ipt_netflow_stat).count++); \ preempt_enable(); \ } while (0); #define NETFLOW_STAT_ADD_ATOMIC(count, val) \ do { \ preempt_disable(); \ (__get_cpu_var(ipt_netflow_stat).count += (unsigned long long)val); \ preempt_enable(); \ } while (0); #define NETFLOW_STAT_READ(count) ({ \ unsigned int _tmp = 0, _cpu; \ for_each_present_cpu(_cpu) \ _tmp += per_cpu(ipt_netflow_stat, _cpu).count; \ _tmp; \ }) struct duration { ktime_t first; ktime_t last; }; /* statistics */ struct ipt_netflow_stat { u64 searched; // hash stat u64 found; // hash stat u64 notfound; // hash stat (new flows) u64 pkt_total; // packets metered u64 traf_total; // traffic metered #ifdef ENABLE_PROMISC u64 pkt_promisc; // how much packets passed promisc code u64 pkt_promisc_drop; // how much packets discarded #endif /* above is grouped for cache */ unsigned int truncated; // packets stat (drop) unsigned int frags; // packets stat (drop) unsigned int maxflows_err; // maxflows reached (drop) unsigned int alloc_err; // failed to allocate memory (drop & lost) struct duration drop; unsigned int send_success; // sendmsg() ok unsigned int send_failed; // sendmsg() failed unsigned int sock_cberr; // socket error callback called (got icmp refused) unsigned int exported_rate; // netflow traffic itself u64 exported_pkt; // netflow traffic itself u64 exported_flow; // netflow traffic itself u64 exported_traf; // netflow traffic itself u64 exported_trafo; // netflow traffic itself u64 pkt_total_prev; // packets metered previous interval u32 pkt_total_rate; // packet rate for this cpu u64 pkt_drop; // packets not metered u64 traf_drop; // traffic not metered u64 flow_lost; // flows not sent to collector u64 pkt_lost; // packets not sent to collector u64 traf_lost; // traffic not sent to collector struct duration lost; u64 pkt_out; // packets out of the hash u64 traf_out; // traffic out of the hash #ifdef ENABLE_SAMPLER u64 pkts_observed; // sampler stat u64 pkts_selected; // sampler stat #endif u64 old_searched; // previous hash stat u64 old_found; // for calculation per cpu metric u64 old_notfound; int metric; // one minute ewma of hash efficiency }; #endif /* vim: set sw=8: */ ipt-netflow-2.6/irqtop000077500000000000000000000370421404773755400151230ustar00rootroot00000000000000#!/usr/bin/ruby # SPDX-License-Identifier: GPL-2.0-only # # Observe irq and softirq in top fashion # (c) 2014 # License: GPL-2.0-only. require 'getoptlong' require 'curses' require 'stringio' @imode = :both @omode = :table @color = true @showrps = false GetoptLong.new( ["--help", "-h", GetoptLong::NO_ARGUMENT], ["--batch", "-b", GetoptLong::NO_ARGUMENT], ["--delay", "-d", GetoptLong::REQUIRED_ARGUMENT], ["--top", "-t", GetoptLong::NO_ARGUMENT], ["--table", "-x", GetoptLong::NO_ARGUMENT], ["--soft", "-s", GetoptLong::NO_ARGUMENT], ["--softirq", GetoptLong::NO_ARGUMENT], ["--softirqs", GetoptLong::NO_ARGUMENT], ["--irq", "-i", GetoptLong::NO_ARGUMENT], ["--irqs", GetoptLong::NO_ARGUMENT], ["--reverse", "-r", GetoptLong::NO_ARGUMENT], ["--nocolor", "-C", GetoptLong::NO_ARGUMENT], ["--eth", "-e", "--pps", GetoptLong::NO_ARGUMENT], ["--rps", "-R", "--xps", GetoptLong::NO_ARGUMENT] ).each do |opt, arg| case opt when '--help' puts " Shows interrupt rates (per second) per cpu." puts " Also shows irq affinity ('.' for disabled cpus)," puts " and rps/xps affinity ('+' rx, '-' tx, '*' tx/rx)." puts " Can show packet rate per eth queue." puts puts " Usage: #{$0} [-h] [-d #{@delay}] [-b] [-t|-x] [-i|-s] [-r]" puts " -d --delay=n refresh interval" puts " -s --softirq select softirqs only" puts " -i --irq select hardware irqs only" puts " -e --eth show extra eth stats (from ethtool)" puts " -R --rps enable display of rps/xps" puts " -x --table output in table mode (default)" puts " -t --top output in flat top mode" puts " -b --batch output non-interactively" puts " -r --reverse reverse sort order" puts " -C --nocolor disable colors" puts puts " Rates marked as '.' is forbidden by smp_affinity mask." exit 0 when '--reverse' @reverse = !@reverse when '--batch' @batch = true @reverse = !@reverse if @omode == :top when '--delay' @delay = arg.to_i when '--top' @omode = :top when '--table' @omode = :table when /--irq/ @imode = :irq when /--soft/ @imode = :soft when /--pps/ @pps = true when /--nocolor/ @color = false when /--rps/ @showrps = !@showrps end end if !@delay && ARGV[0].to_f > 0 @delay = ARGV.shift.to_f else @delay = 5 end @count = ARGV.shift.to_f if ARGV[0].to_i > 0 def read_table(tag, file) @cpus = [] lines = IO.readlines(file) @cpus = lines[0].scan(/CPU\d+/) @icpus = @cpus if tag == 'i' lines[2..-1].each do |li| irq, stat, desc = li.match(/^\s*(\S+):((?:\s+\d+)+)(.*)$/).captures stat = stat.scan(/\d+/) @irqs << [tag, irq, desc] stat.each_with_index do |val, i| # interruptsN, 's|i', irq'N', 'cpuX', 'descr...' @stats << [val.to_i, tag, irq, @cpus[i], desc.strip] end end end def read_procstat @cstat = {} lines = IO.readlines("/proc/stat").grep(/^cpu\d+ /) lines.each do |li| c, *d = li.split(" ") d = d.map {|e| e.to_i} @cstat[c] = d end end def read_affinity @aff = {} Dir.glob("/proc/irq/*/smp_affinity").each do |af| irq = af[%r{\d+}].to_i a = IO.read(af).strip.to_i(16) @aff[irq] = a end end # list ethernet devices def net_devices_pci Dir['/sys/class/net/*'].reject do |f| f += "/device" unless File.symlink?(f) if File.symlink?(f) !(File.readlink(f) =~ %r{devices/pci}) else false end end.map {|f| File.basename(f)} end @devlist = net_devices_pci @devre = Regexp.union(@devlist) def get_rps(desc) @rps = @xps = 0 return unless @showrps return if @devlist.empty? dev = desc[/\b(#{@devre})\b/, 1] return unless dev return unless desc =~ /-(tx|rx)+-\d+/i qnr = desc[/-(\d+)\s*$/, 1] return unless qnr begin @rps = IO.read("/sys/class/net/#{dev}/queues/rx-#{qnr}/rps_cpus").hex if desc =~ /rx/i @xps = IO.read("/sys/class/net/#{dev}/queues/tx-#{qnr}/xps_cpus").hex if desc =~ /tx/i rescue end end def calc_rps(cpu) m = 0 m |= 1 if @rps & (1 << cpu) != 0 m |= 2 if @xps & (1 << cpu) != 0 " +-*".slice(m, 1) end # ethtool -S eth0 def ethtool_grab_stat(dev = nil) unless dev @esto = @est if @est @est = Hash.new { |h,k| h[k] = Hash.new(&h.default_proc) } @devlist = net_devices_pci @devre = Regexp.union(@devlist) # own time counter because this stat could be paused @ehts = @ets if @ets @ets = @ts @edt = @ets - @ehts if @ehts @devlist.each {|e| ethtool_grab_stat(e)} return end h = Hash.new {|k,v| k[v] = Array.new} t = `ethtool -S #{dev} 2>/dev/null` return if t == '' t.split("\n").map { |e| e.split(':') }.reject { |e| !e[1] }.each { |k,v| k.strip! v = v.strip.to_i if k =~ /^.x_queue_(\d+)_/ t = k.split('_', 4) qdir = t[0] qnr = t[2] qk = t[3] @est[dev][qdir][qnr][qk] = v else @est[dev][k] = v end } end def e_queue_stat(dev, qdir, qnr, k) n = @est[dev][qdir][qnr][k] o = @esto[dev][qdir][qnr][k] d = (n - o) / @edt if d > 0 "%s:%d" % [qdir, d] else nil end end def e_dev_stat(dev, k, ks) n = @est[dev][k] o = @esto[dev][k] r = (n - o) / @edt ks = k unless ks "%s:%d" % [ks, r] end def e_queue_stat_err(dev, qdir, qnr) r = [] ek = @est[dev][qdir][qnr].keys.reject{|e| e =~ /^(bytes|packets)$/} ek.each do |k| n = @est[dev][qdir][qnr][k] o = @esto[dev][qdir][qnr][k] d = n - o r << "%s_%s:%d" % [qdir, k, d] if d.to_i > 0 end r end # this is not rate def e_dev_stat_sum(dev, rk, ks) ek = @est[dev].keys.reject{|ek| !(ek =~ rk)} n = ek.inject(0) {|sum,k| sum += @est[dev][k].to_i} o = ek.inject(0) {|sum,k| sum += @esto[dev][k].to_i rescue 0} r = (n - o) if r > 0 "%s:%d" % [ks, r] else nil end end def print_ethstat(desc) return if @devlist.empty? dev = desc[/\b(#{@devre})\b/, 1] return unless dev unless @esto && @est print ' []' return end t = [] if desc =~ /-(tx|rx)+-\d+/i qnr = desc[/-(\d+)\s*$/, 1] if qnr if desc =~ /rx/i t << e_queue_stat(dev, "rx", qnr, "packets") t += e_queue_stat_err(dev, "rx", qnr) end if desc =~ /tx/i t << e_queue_stat(dev, "tx", qnr, "packets") t += e_queue_stat_err(dev, "tx", qnr) end end else t << e_dev_stat(dev, "rx_packets", 'rx') t << e_dev_stat(dev, "tx_packets", 'tx') t << e_dev_stat_sum(dev, /_err/, 'err') t << e_dev_stat_sum(dev, /_drop/, 'drop') end t.delete(nil) print ' [' + t.join(' ') + ']' end def grab_stat # @h[istorical] @hstats = @stats @hcstat = @cstat @hts = @ts @stats = [] @irqs = [] @ts = Time.now @dt = @ts - @hts if @hts read_table 'i', "/proc/interrupts" read_table 's', "/proc/softirqs" read_affinity read_procstat ethtool_grab_stat if @pps end def calc_speed s = [] # calc speed h = Hash.new(0) @hstats.each do |v, t, i, c, d| h[[t, i, c]] = v end # output @h = {} @t = Hash.new(0) # rate per cpu @w = Hash.new(0) # irqs per irqN @s = @stats.map do |v, t, i, c, d| rate = (v - h[[t, i, c]]) / @dt @t[c] += rate if t == 'i' @w[[t, i]] += (v - h[[t, i, c]]) @h[[t, i, c]] = rate [rate, v, t, i, c, d] end end def calc_cpu @cBusy = Hash.new(0) @cHIrq = Hash.new(0) @cSIrq = Hash.new(0) # user, nice, system, [3] idle, [4] iowait, irq, softirq, etc. @cstat.each do |c, d| d = d.zip(@hcstat[c]).map {|a, b| a - b} c = c.upcase sum = d.reduce(:+) @cBusy[c] = 100 - (d[3] + d[4]).to_f / sum * 100 @cHIrq[c] = (d[5]).to_f / sum * 100 @cSIrq[c] = (d[6]).to_f / sum * 100 end end def show_top @s.sort!.reverse! @s.reverse! if @reverse rej = nil rej = 's' if @imode == :irq rej = 'i' if @imode == :soft @s.each do |s, v, t, i, c, d| next if t == rej if s > 0 print "%9.1f %s %s <%s> %s" % [s, c.downcase, t, i, d] print_ethstat(d) if @pps puts end end end @ifilter = {} def show_interrupts maxlen = 7 @irqs.reverse! if @reverse print "%s %*s " % [" ", maxlen, " "] @icpus.each { |c| print " %6s" % c } puts # load print "%*s: " % [maxlen + 2, "cpuUtil"] @icpus.each { |c| print " %6.1f" % @cBusy[c] } puts " total CPU utilization %" # print "%*s: " % [maxlen + 2, "%irq"] @icpus.each { |c| print " %6.1f" % @cHIrq[c] } puts " hardware IRQ CPU util%" print "%*s: " % [maxlen + 2, "%sirq"] @icpus.each { |c| print " %6.1f" % @cSIrq[c] } puts " software IRQ CPU util%" # total print "%*s: " % [maxlen + 2, "irqTotal"] @icpus.each { |c| print " %6d" % @t[c] } puts " total hardware IRQs" rej = nil rej = 's' if @imode == :irq rej = 'i' if @imode == :soft @irqs.each do |t, i, desc| next if t == rej # include incrementally and all eth unless @ifilter[[t, i]] || @showall next unless @w[[t, i]] > 0 || desc =~ /eth/ @ifilter[[t, i]] = true end print "%s %*s: " % [t.to_s, maxlen, i.slice(0, maxlen)] rps = get_rps(desc) @icpus.each do |c| cpu = c[/\d+/].to_i aff = @aff[i.to_i] off = ((aff & 1 << cpu) ==0)? true : false if aff fla = calc_rps(cpu) begin v = @h[[t, i, c]] if v > 0 || !off print "%6d%c" % [v, fla] elsif aff print "%6s%c" % [".", fla] end rescue end end print desc print_ethstat(desc) if @pps puts end end def select_output if @omode == :top show_top else show_interrupts end end def curses_choplines(text) cols = Curses.cols - 1 rows = Curses.lines - 2 lines = text.split("\n").map {|e| e.slice(0, cols)}.slice(0, rows) text = lines.join("\n") text << "\n" * (rows - lines.size) if lines.size < rows text end def show_help puts "irqtop help:" puts puts " In table view, cells marked with '.' mean this hw irq is" puts " disabled via /proc/irq//smp_affinity" puts " Interactive keys:" puts " i Toggle (hardware) irqs view" puts " s Toggle software irqs (softirqs) view" puts " e Show eth stat per queue" puts " R Show rps/xps affinity" puts " t Flat top display mode" puts " x Table display mode" puts " r Reverse rows order" puts " c Toggle colors (for eth)" puts " a Show lines with zero rate (all)" puts " A Clear lines with zero rates" puts " . Pause screen updating" puts " h,? This help screen" puts " q Quit." puts " Any other key will update display." puts puts "Press any key to continue." end hostname = `hostname`.strip # grab_stat sleep 0.5 COLOR_GREEN = "\033[0;32m" COLOR_YELLOW = "\033[0;33m" COLOR_CYAN = "\033[0;36m" COLOR_RED = "\033[0;31m" COLOR_OFF = "\033[m" def tty_printline(t) latr = nil # line color if t =~ /-rx-/ latr = COLOR_GREEN elsif t =~ /-tx-/ latr = COLOR_YELLOW elsif t =~ /\beth/ latr = COLOR_CYAN end print latr if latr if t =~ /cpuUtil:|irq:|sirq:/ # colorize percentage values t.scan(/\s+\S+/) do |e| eatr = nil if e =~ /^\s*[\d.]+$/ if e.to_i >= 90 eatr = COLOR_RED elsif e.to_i <= 10 eatr = COLOR_GREEN else eatr = COLOR_YELLOW end end print eatr if eatr print e print (latr)? latr : COLOR_OFF if eatr end elsif latr && t =~ / \[[^\]]+\]$/ # colorize eth stats print $` print COLOR_OFF if latr $&.scan(/(.*?)(\w+)(:)(\d+)/) do |e| eatr = nil case e[1] when 'rx' eatr = COLOR_GREEN when 'tx' eatr = COLOR_YELLOW else eatr = COLOR_RED end eatr = nil if e[3].to_i == 0 print e[0] print eatr if eatr print e[1..-1].join print (latr)? latr : COLOR_OFF if eatr end print $' else print t end print COLOR_OFF if latr puts end def tty_output if @color $stdout = StringIO.new yield $stdout.rewind txt = $stdout.read $stdout = STDOUT txt.split("\n", -1).each do |li| tty_printline(li) end else yield end end if @batch @color = @color && $stdout.tty? loop do grab_stat calc_speed calc_cpu puts "#{hostname} - irqtop - #{Time.now}" tty_output { select_output } $stdout.flush break if @count && (@count -= 1) == 0 sleep @delay end exit 0 end Curses.init_screen Curses.start_color Curses.cbreak Curses.noecho Curses.nonl Curses.init_pair(1, Curses::COLOR_GREEN, Curses::COLOR_BLACK); Curses.init_pair(2, Curses::COLOR_YELLOW, Curses::COLOR_BLACK); Curses.init_pair(3, Curses::COLOR_CYAN, Curses::COLOR_BLACK); Curses.init_pair(4, Curses::COLOR_RED, Curses::COLOR_BLACK); $stdscr = Curses.stdscr $stdscr.keypad(true) def curses_printline(t) latr = nil # line color if t =~ /-rx-/ latr = Curses.color_pair(1) elsif t =~ /-tx-/ latr = Curses.color_pair(2) elsif t =~ /\beth/ latr = Curses.color_pair(3) end $stdscr.attron(latr) if latr if t =~ /cpuUtil:|irq:|sirq:/ # colorize percentage values t.scan(/\s+\S+/) do |e| eatr = nil if e =~ /^\s*[\d.]+$/ if e.to_i >= 90 eatr = Curses.color_pair(4) elsif e.to_i <= 10 eatr = Curses.color_pair(1) else eatr = Curses.color_pair(2) end end $stdscr.attron(eatr) if eatr $stdscr.addstr("#{e}") $stdscr.attroff(eatr) if eatr end elsif latr && t =~ / \[[^\]]+\]$/ # colorize eth stats $stdscr.addstr($`) $stdscr.attroff(latr) if latr $&.scan(/(.*?)(\w+)(:)(\d+)/) do |e| eatr = nil case e[1] when 'rx' eatr = Curses.color_pair(1) when 'tx' eatr = Curses.color_pair(2) else eatr = Curses.color_pair(4) end eatr = nil if e[3].to_i == 0 $stdscr.addstr(e[0]) $stdscr.attron(eatr) if eatr $stdscr.addstr(e[1..-1].join) $stdscr.attroff(eatr) if eatr end $stdscr.addstr($' + "\n") else $stdscr.addstr("#{t}\n") end $stdscr.attroff(latr) if latr end def curses_output $stdout = StringIO.new yield $stdout.rewind text = $stdout.read $stdout = STDOUT txt = curses_choplines(text) if @color txt.split("\n", -1).each_with_index do |li, i| $stdscr.setpos(i, 0) curses_printline(li) end else $stdscr.setpos(0, 0) $stdscr.addstr(txt) end $stdscr.setpos(1, 0) Curses.refresh end def curses_enter(text, echo = true) $stdscr.setpos(1, 0) $stdscr.addstr(text + "\n") $stdscr.setpos(1, 0) Curses.attron(Curses::A_BOLD) $stdscr.addstr(text) Curses.attroff(Curses::A_BOLD) Curses.refresh Curses.echo if echo Curses.timeout = -1 line = Curses.getstr Curses.noecho line end loop do grab_stat calc_speed calc_cpu curses_output { puts "#{hostname} - irqtop - #{Time.now}" select_output } Curses.timeout = @delay * 1000 ch = Curses.getch.chr rescue nil case ch when "\f" Curses.clear when "q", "Z", "z" break when 'i' @imode = (@imode == :both)? :soft : :both when 's' @imode = (@imode == :both)? :irq : :both when 't' @omode = (@omode == :top)? :table : :top when 'x' @omode = (@omode == :table)? :top : :table when 'e', 'p' @pps = !@pps when 'r' @reverse = !@reverse when 'c' @color = !@color when 'A' @ifilter = {} when 'a' @ifilter = {} @showall = !@showall when 'R' @showrps = !@showrps when '.' curses_enter("Pause, press enter to to continue: ", false) when 'd' d = curses_enter("Enter display interval: ") @delay = d.to_f if d.to_f > 0 when 'h', '?' curses_output { show_help } Curses.timeout = -1 ch = Curses.getch.chr rescue nil break if ch == 'q' end end ipt-netflow-2.6/libipt_NETFLOW.c000066400000000000000000000050601404773755400164770ustar00rootroot00000000000000/* SPDX-License-Identifier: GPL-2.0-only * * iptables helper for NETFLOW target * * * * This file is part of NetFlow exporting module. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * */ #include #include #include #include #include #include #include #include #define __EXPORTED_HEADERS__ #ifdef XTABLES #include #else #include #endif #ifdef XTABLES_VERSION_CODE // since 1.4.1 #define MOD140 #define iptables_target xtables_target #endif #ifdef iptables_target // only in 1.4.0 #define MOD140 #endif #ifdef MOD140 #define ipt_entry_target xt_entry_target #define register_target xtables_register_target #define _IPT_ENTRY void #define _IPT_IP void #ifndef IPT_ALIGN #define IPT_ALIGN XT_ALIGN #endif #else // before 1.3.x #define _IPT_ENTRY struct ipt_entry #define _IPT_IP struct ipt_ip #endif #ifndef IPTABLES_VERSION #define IPTABLES_VERSION XTABLES_VERSION #endif static struct option opts[] = { { 0 } }; static void help(void) { printf("NETFLOW target\n"); } static int parse(int c, char **argv, int invert, unsigned int *flags, const _IPT_ENTRY *entry, struct ipt_entry_target **targetinfo) { return 1; } static void final_check(unsigned int flags) { } static void save(const _IPT_IP *ip, const struct ipt_entry_target *match) { } static void print(const _IPT_IP *ip, const struct ipt_entry_target *target, int numeric) { printf("NETFLOW "); } static struct iptables_target netflow = { .next = NULL, .name = "NETFLOW", .version = IPTABLES_VERSION, .size = IPT_ALIGN(0), .userspacesize = IPT_ALIGN(0), .help = &help, .parse = &parse, .final_check = &final_check, .print = &print, .save = &save, .extra_opts = opts }; #ifndef _init #define _init __attribute__((constructor)) _INIT #endif void _init(void) { register_target(&netflow); } ipt-netflow-2.6/murmur3.h000066400000000000000000000017571404773755400154460ustar00rootroot00000000000000/* MurmurHash3, based on https://code.google.com/p/smhasher of Austin Appleby. */ static __always_inline uint32_t rotl32(const uint32_t x, const int8_t r) { return (x << r) | (x >> (32 - r)); } static __always_inline uint32_t fmix32(register uint32_t h) { h ^= h >> 16; h *= 0x85ebca6b; h ^= h >> 13; h *= 0xc2b2ae35; h ^= h >> 16; return h; } static inline uint32_t murmur3(const void *key, const uint32_t len, const uint32_t seed) { const uint32_t c1 = 0xcc9e2d51; const uint32_t c2 = 0x1b873593; const uint32_t *blocks; const uint8_t *tail; register uint32_t h1 = seed; uint32_t k1 = 0; uint32_t i; blocks = (const uint32_t *)key; for (i = len / 4; i; --i) { h1 ^= rotl32(*blocks++ * c1, 15) * c2; h1 = rotl32(h1, 13) * 5 + 0xe6546b64; } tail = (const uint8_t*)blocks; switch (len & 3) { case 3: k1 ^= tail[2] << 16; /* FALLTHROUGH */ case 2: k1 ^= tail[1] << 8; /* FALLTHROUGH */ case 1: k1 ^= tail[0]; h1 ^= rotl32(k1 * c1, 15) * c2; } return fmix32(h1^ len); } ipt-netflow-2.6/openwrt/000077500000000000000000000000001404773755400153475ustar00rootroot00000000000000ipt-netflow-2.6/openwrt/Makefile000066400000000000000000000100131404773755400170020ustar00rootroot00000000000000include $(TOPDIR)/rules.mk include $(INCLUDE_DIR)/kernel.mk PKG_NAME:=ipt-netflow PKG_RELEASE:=2 PKG_SOURCE_URL:=https://github.com/aabc/$(PKG_NAME).git PKG_VERSION:=2.5.1 PKG_SOURCE_VERSION:=v$(PKG_VERSION) #TO BUILD development version uncomment 2 rows below and remove patches #PKG_VERSION:=$(shell (git ls-remote $(PKG_SOURCE_URL) | grep refs/heads/master | cut -f 1 | head -c 7)) #PKG_SOURCE_VERSION:=HEAD PKG_SOURCE_PROTO:=git PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.gz PKG_SOURCE_SUBDIR:=$(PKG_NAME)-$(PKG_VERSION) PKG_BUILD_DIR := $(KERNEL_BUILD_DIR)/$(PKG_NAME)-$(PKG_VERSION) PKG_DEPENDS:=iptables include $(INCLUDE_DIR)/package.mk define KernelPackage/ipt-netflow SECTION:=net CATEGORY:=Network SUBMENU:=Netflow TITLE:=Netflow iptables module for Linux kernel URL:=http://ipt-netflow.sourceforge.net/ FILES:=$(PKG_BUILD_DIR)/ipt_NETFLOW.ko DEPENDS:=+iptables +iptables-mod-netflow endef define Package/iptables-mod-netflow SECTION:=net CATEGORY:=Network SUBMENU:=Netflow TITLE:=Netflow iptables module for Linux kernel URL:=http://ipt-netflow.sourceforge.net/ #DEPENDS:=+kmod-ipt-netflow DEPENDS:=+iptables endef CONFIGURE_ARGS:= \ --kdir="$(LINUX_DIR)" define Package/iptables-mod-netflow/config menu "Configuration" depends on PACKAGE_iptables-mod-netflow config PACKAGE_iptables-mod-netflow_natevents bool "enables natevents support" default n config PACKAGE_iptables-mod-netflow_snmp-rules bool "enables SNMP-index conversion rules" default n config PACKAGE_iptables-mod-netflow_macaddress bool "enables MAC address for v9/IPFIX" default n config PACKAGE_iptables-mod-netflow_vlan bool "enables VLAN Ids for v9/IPFIX" default n config PACKAGE_iptables-mod-netflow_direction bool "enables flowDirection(61) Element" default n config PACKAGE_iptables-mod-netflow_sampler bool "enables Flow Sampling" default n config PACKAGE_iptables-mod-netflow_aggregation bool "enables aggregation rules" default n config PACKAGE_iptables-mod-netflow_promisc bool "enables promisc hack mode" default n config PACKAGE_iptables-mod-netflow_promisc-mpls bool "decapsulate MPLS in promisc mode" default n config PACKAGE_iptables-mod-netflow_physdev bool "enables physdev reporting" default n config PACKAGE_iptables-mod-netflow_physdev-override bool "to override interfaces" default n config PACKAGE_iptables-mod-netflow_snmp-agent bool "disables net-snmp agent" default y config PACKAGE_iptables-mod-netflow_dkms bool "disables DKMS support completely" default y config PACKAGE_iptables-mod-netflow_dkms-install bool "no DKMS install but still create dkms.conf" default n endmenu endef CONFIGURE_ARGS += \ $(if $(CONFIG_PACKAGE_iptables-mod-netflow_natevents),--enable-natevents) \ $(if $(CONFIG_PACKAGE_iptables-mod-netflow_snmp-rules),--enable-snmp-rules) \ $(if $(CONFIG_PACKAGE_iptables-mod-netflow_macaddress),--enable-macaddress) \ $(if $(CONFIG_PACKAGE_iptables-mod-netflow_vlan),--enable-vlan) \ $(if $(CONFIG_PACKAGE_iptables-mod-netflow_direction),--enable-direction) \ $(if $(CONFIG_PACKAGE_iptables-mod-netflow_sampler),--enable-sampler) \ $(if $(CONFIG_PACKAGE_iptables-mod-netflow_aggregation),--enable-aggregation) \ $(if $(CONFIG_PACKAGE_iptables-mod-netflow_promisc),--enable-promisc) \ $(if $(CONFIG_PACKAGE_iptables-mod-netflow_promisc-mpls),--promisc-mpls) \ $(if $(CONFIG_PACKAGE_iptables-mod-netflow_physdev),--enable-physdev) \ $(if $(CONFIG_PACKAGE_iptables-mod-netflow_physdev-override),--enable-physdev-override) \ $(if $(CONFIG_PACKAGE_iptables-mod-netflow_snmp-agent),--disable-snmp-agent) \ $(if $(CONFIG_PACKAGE_iptables-mod-netflow_dkms),--disable-dkms) \ $(if $(CONFIG_PACKAGE_iptables-mod-netflow_dkms-install),--disable-dkms-install) #TODO: --enable-sampler=hash --promisc-mpls=N $(eval $(call KernelPackage,ipt-netflow)) $(eval $(call Package,kmod-ipt-netflow)) define Package/iptables-mod-netflow/install $(MAKE) -C $(PKG_BUILD_DIR) DESTDIR=$(1) linstall #TODO: snmp install, dkms install endef $(eval $(call BuildPackage,iptables-mod-netflow)) ipt-netflow-2.6/openwrt/Readme.md000066400000000000000000000036011404773755400170660ustar00rootroot00000000000000Cross-compiling and packages for openwrt === Place Makefile in `packages/network/ipt-netflow` directory in OpenWRT bouldroot. Run `make menuconfig` and select package in Network/Netflow menu. Configure args partially supported. Run `make` to build full firmware or `make package/network/ipt-netflow/{clean,prepare,configure,compile,install}` to rebuild packages. To make git version uncomment two lines in Makefile. Tested to work on Chaos Calmer and Designated Driver with Atheros AR7xxx/AR9xxx target. For ipt-netflow 2.2 patches are needed, drop it for next version or git master to build. Making and installilng === ```shell mkdir debian-toolchain sudo debootstrap jessie debian-toolchain sudo chroot debian-toolchain . /etc/profile apt update apt install git ssh-client build-essential mercurial subversion \ binutils flex bzip2 asciidoc ncurses-dev libssl-dev gawk zlib1g-dev fastjar adduser user su user . /etc/profile cd ~ git clone https://github.com/openwrt/openwrt.git openwrt-trunk git clone https://github.com/aabc/ipt-netflow.git cd openwrt-trunk ./scripts/feeds update -a ln -s ~/ipt-netflow/openwrt/ package/network/ipt-netflow make menuconfig #select target and device #go to network/netflow and check both make #and go for dinner or a walk ;) #after five hours scp bin/ar71xx/packages/kernel/kmod-ipt-netflow_4.4.14+2.2-2_ar71xx.ipk \ root@192.168.236.79:/tmp/ scp bin/ar71xx/packages/base/iptables-mod-netflow_2.2-2_ar71xx.ipk \ root@192.168.236.79:/tmp/ scp bin/ar71xx/packages/base/kernel_4.4.14-1-abf9cc6feb410252d667326556dae184_ar71xx.ipk \ root@192.168.236.79:/tmp/ #goto router ssh root@192.168.236.79 opkg install /tmp/*.ipk insmod /lib/modules/4.4.14/ipt_NETFLOW.ko sysctl -w net.netflow.protocol=5 sysctl -w net.netflow.destination=192.168.236.34:2055 iptables -I FORWARD -j NETFLOW iptables -I INPUT -j NETFLOW iptables -I OUTPUT -j NETFLOW ``` ipt-netflow-2.6/openwrt/patches/000077500000000000000000000000001404773755400167765ustar00rootroot00000000000000ipt-netflow-2.6/openwrt/patches/310-Makefile_crosscompile.patch000066400000000000000000000011171404773755400246170ustar00rootroot00000000000000Index: ipt-netflow-2.2/Makefile.in =================================================================== --- ipt-netflow-2.2.orig/Makefile.in +++ ipt-netflow-2.2/Makefile.in @@ -64,10 +64,10 @@ sinstall: | snmp_NETFLOW.so IPT-NETFLOW- fi %_sh.o: libipt_NETFLOW.c - gcc -O2 -Wall -Wunused $(IPTABLES_CFLAGS) -fPIC -o $@ -c libipt_NETFLOW.c + $(CC) $(CFLAGS) -O2 -Wall -Wunused $(IPTABLES_CFLAGS) -fPIC -o $@ -c libipt_NETFLOW.c %.so: %_sh.o - gcc -shared -o $@ $< + $(CC) -shared -o $@ $< version.h: ipt_NETFLOW.c ipt_NETFLOW.h compat.h Makefile @./version.sh --define > version.h ipt-netflow-2.6/raw_promisc.patch000066400000000000000000000014761404773755400172270ustar00rootroot00000000000000 This simple hack will allow to see promisc traffic in raw table of iptables. Of course you will need to enable promisc on the interface. Refer to README.promisc for details. Example how to catch desired traffic: iptables -A PREROUTING -t raw -i eth2 -j NETFLOW --- linux-2.6.26/net/ipv4/ip_input.old.c 2008-07-14 01:51:29.000000000 +0400 +++ linux-2.6.26/net/ipv4/ip_input.c 2008-08-06 14:02:16.000000000 +0400 @@ -378,12 +378,6 @@ struct iphdr *iph; u32 len; - /* When the interface is in promisc. mode, drop all the crap - * that it receives, do not try to analyse it. - */ - if (skb->pkt_type == PACKET_OTHERHOST) - goto drop; - IP_INC_STATS_BH(IPSTATS_MIB_INRECEIVES); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { ipt-netflow-2.6/raw_promisc_debian_squeeze6.patch000066400000000000000000000022551404773755400223540ustar00rootroot00000000000000 Short manual and patch for Debian Squeeze suggested by Pavel Odintsov: On Thu, Dec 27, 2012 at 07:46:30PM +0400, Pavel Odintsov wrote: > > Debian Squeeze promisc. > > cd /usr/src > apt-get install -y dpkg-dev > apt-get build-dep linux-image-2.6.32-5-amd64 > cd linux-2.6-2.6.32/ > apt-get source linux-image-2.6.32-5-amd64 > > wget .... /root/raw_promisc_debian_squeeze6.patch > patch -p1 < raw_promisc_debian_squeeze6.patch > : > debian/rules source > > : > debian/rules binary > diff -rupN linux-2.6-2.6.32/net/ipv4/ip_input.c linux-2.6-2.6.32_promisc_raw//net/ipv4/ip_input.c --- linux-2.6-2.6.32/net/ipv4/ip_input.c 2009-12-03 04:51:21.000000000 +0100 +++ linux-2.6-2.6.32_promisc_raw//net/ipv4/ip_input.c 2012-06-25 19:13:49.000000000 +0200 @@ -383,8 +383,8 @@ int ip_rcv(struct sk_buff *skb, struct n /* When the interface is in promisc. mode, drop all the crap * that it receives, do not try to analyse it. */ - if (skb->pkt_type == PACKET_OTHERHOST) - goto drop; + //if (skb->pkt_type == PACKET_OTHERHOST) + // goto drop; IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len); ipt-netflow-2.6/snmp_NETFLOW.c000066400000000000000000000420231404773755400161710ustar00rootroot00000000000000/* SPDX-License-Identifier: GPL-2.0-only * * dlmod plugin for net-snmp for monitoring * ipt_NETFLOW module via IPT-NETFLOW-MIB. * * (c) 2014 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include #include #include #include #include #include #include #include #define iptNetflowMIB_oid 1, 3, 6, 1, 4, 1, 37476, 9000, 10, 1 /* .1.3.6.1.4.1.37476.9000.10.1 */ /* iptNetflowObjects */ static oid iptNetflowModule_oid[] = { iptNetflowMIB_oid, 1, 1 }; static oid iptNetflowSysctl_oid[] = { iptNetflowMIB_oid, 1, 2 }; /* iptNetflowStatistics */ static oid iptNetflowTotals_oid[] = { iptNetflowMIB_oid, 2, 1 }; static oid iptNetflowCpuTable_oid[] = { iptNetflowMIB_oid, 2, 2 }; static oid iptNetflowSockTable_oid[] = { iptNetflowMIB_oid, 2, 3 }; struct snmp_vars { int obj; int type; char *name; time_t ts; /* when value last read */ long long val64; }; struct snmp_vars modinfos[] = { {1, ASN_OCTET_STR, "name"}, {2, ASN_OCTET_STR, "version"}, {3, ASN_OCTET_STR, "srcversion"}, {4, ASN_OCTET_STR, "loadTime"}, /* DateAndTime */ {5, ASN_INTEGER, "refcnt"}, { 0 } }; #define MODINFO_NAME "ipt_NETFLOW" #define MODINFO_NAME_ID 1 #define MODINFO_DATE_ID 4 struct snmp_vars sysctls[] = { {1, ASN_INTEGER, "protocol"}, {2, ASN_INTEGER, "hashsize"}, {3, ASN_INTEGER, "maxflows"}, {4, ASN_INTEGER, "active_timeout"}, {5, ASN_INTEGER, "inactive_timeout"}, {6, ASN_INTEGER, "sndbuf"}, {7, ASN_OCTET_STR, "destination"}, {8, ASN_OCTET_STR, "aggregation"}, {9, ASN_OCTET_STR, "sampler"}, {10, ASN_INTEGER, "natevents"}, {11, ASN_INTEGER, "promisc"}, {12, ASN_OCTET_STR, "snmp-rules"}, {13, ASN_INTEGER, "scan-min"}, { 0 } }; struct snmp_vars totals[] = { {1, ASN_COUNTER64, "inBitRate"}, {2, ASN_GAUGE, "inPacketRate"}, {3, ASN_COUNTER64, "inFlows"}, {4, ASN_COUNTER64, "inPackets"}, {5, ASN_COUNTER64, "inBytes"}, {6, ASN_GAUGE, "hashMetric"}, {7, ASN_GAUGE, "hashMemory"}, {8, ASN_GAUGE, "hashFlows"}, {9, ASN_GAUGE, "hashPackets"}, {10, ASN_COUNTER64, "hashBytes"}, {11, ASN_COUNTER64, "dropPackets"}, {12, ASN_COUNTER64, "dropBytes"}, {13, ASN_GAUGE, "outByteRate"}, {14, ASN_COUNTER64, "outFlows"}, {15, ASN_COUNTER64, "outPackets"}, {16, ASN_COUNTER64, "outBytes"}, {17, ASN_COUNTER64, "lostFlows"}, {18, ASN_COUNTER64, "lostPackets"}, {19, ASN_COUNTER64, "lostBytes"}, {20, ASN_COUNTER, "errTotal"}, {21, ASN_COUNTER, "sndbufPeak"}, { 0 } }; #define TOTALS_METRIC_ID 6 static netsnmp_table_data_set *cpu_data_set; static netsnmp_cache *stat_cache = NULL; struct snmp_vars cputable[] = { {1, ASN_INTEGER, "cpuIndex"}, {2, ASN_GAUGE, "cpuInPacketRate"}, {3, ASN_COUNTER64, "cpuInFlows"}, {4, ASN_COUNTER64, "cpuInPackets"}, {5, ASN_COUNTER64, "cpuInBytes"}, {6, ASN_GAUGE, "cpuHashMetric"}, {7, ASN_COUNTER64, "cpuDropPackets"}, {8, ASN_COUNTER64, "cpuDropBytes"}, {9, ASN_COUNTER, "cpuErrTrunc"}, {10, ASN_COUNTER, "cpuErrFrag"}, {11, ASN_COUNTER, "cpuErrAlloc"}, {12, ASN_COUNTER, "cpuErrMaxflows"}, { 0 } }; static netsnmp_table_data_set *sock_data_set; struct snmp_vars socktable[] = { {1, ASN_INTEGER, "sockIndex"}, {2, ASN_OCTET_STR, "sockDestination"}, {3, ASN_INTEGER, "sockActive"}, {4, ASN_COUNTER, "sockErrConnect"}, {5, ASN_COUNTER, "sockErrFull"}, {6, ASN_COUNTER, "sockErrCberr"}, {7, ASN_COUNTER, "sockErrOther"}, {8, ASN_GAUGE, "sockSndbuf"}, {9, ASN_GAUGE, "sockSndbufFill"}, {10, ASN_GAUGE, "sockSndbufPeak"}, { 0 } }; static time_t totals_ts; /* when statistics last read from kernel */ static int var_max(struct snmp_vars *head) { struct snmp_vars *sys; int max = 0; for (sys = head; sys->obj; sys++) if (max < sys->obj) max = sys->obj; return max; } static struct snmp_vars *find_varinfo(struct snmp_vars *head, const int obj) { struct snmp_vars *sys; for (sys = head; sys->obj; sys++) { if (sys->obj == obj) return sys; } return NULL; } static struct snmp_vars *find_varinfo_str(struct snmp_vars *head, const char *name) { struct snmp_vars *sys; for (sys = head; sys->obj; sys++) { if (!strcmp(sys->name, name)) return sys; } return NULL; } static void modinfo_fname(char *name, char *fname, size_t flen) { snprintf(fname, flen, "/sys/module/" MODINFO_NAME "/%s", name); } static void sysctl_fname(char *name, char *fname, size_t flen) { snprintf(fname, flen, "/proc/sys/net/netflow/%s", name); } static int sysctl_access_ok(char *name) { char fname[64]; sysctl_fname(name, fname, sizeof(fname)); if (access(fname, W_OK) < 0) return 0; return 1; } static char *file_read_string(char *name, char *buf, size_t size) { int fd = open(name, O_RDONLY); if (fd < 0) return NULL; int n = read(fd, buf, size - 1); if (n < 0) { close(fd); return NULL; } buf[n] = '\0'; close(fd); return buf; } static char *modinfo_read_string(char *name, char *buf, size_t size) { char fname[64]; modinfo_fname(name, fname, sizeof(fname)); return file_read_string(fname, buf, size); } static char *sysctl_read_string(char *name, char *buf, size_t size) { char fname[64]; sysctl_fname(name, fname, sizeof(fname)); return file_read_string(fname, buf, size); } static int sysctl_write_string(char *name, char *buf, size_t size) { char fname[64]; int fd; int n; sysctl_fname(name, fname, sizeof(fname)); fd = open(fname, O_RDWR, 0644); if (fd < 0) return fd; n = write(fd, buf, size); close(fd); return n; } static int sysctl_read(netsnmp_request_info *request, int obj) { struct snmp_vars *sys = find_varinfo(sysctls, obj); char buf[225]; char *p; long value; if (!sys) goto nosuchobject; p = sysctl_read_string(sys->name, buf, sizeof(buf)); if (!p) goto nosuchobject; switch (sys->type) { case ASN_INTEGER: value = atoi(p); snmp_set_var_typed_value(request->requestvb, sys->type, (u_char *)&value, sizeof(value)); return SNMP_ERR_NOERROR; case ASN_OCTET_STR: snmp_set_var_typed_value(request->requestvb, sys->type, (u_char *)p, strcspn(p, "\n")); return SNMP_ERR_NOERROR; } nosuchobject: netsnmp_request_set_error(request, SNMP_NOSUCHOBJECT); return SNMP_ERR_NOERROR; } static int sysctl_write(netsnmp_request_info *request, int obj) { struct snmp_vars *sys = find_varinfo(sysctls, obj); char buf[225]; int len; if (!sys) { netsnmp_request_set_error(request, SNMP_NOSUCHOBJECT); return SNMP_ERR_NOERROR; } switch (sys->type) { case ASN_INTEGER: snprintf(buf, sizeof(buf), "%ld\n", *(request->requestvb->val.integer)); break; case ASN_UNSIGNED: snprintf(buf, sizeof(buf), "%lu\n", *(request->requestvb->val.integer)); break; case ASN_OCTET_STR: snprintf(buf, sizeof(buf), "%s\n", request->requestvb->val.string); break; default: netsnmp_request_set_error(request, SNMP_ERR_WRONGTYPE); return SNMP_ERR_NOERROR; } len = strlen(buf); if (sysctl_write_string(sys->name, buf, len) < len) netsnmp_request_set_error(request, SNMP_ERR_BADVALUE); return SNMP_ERR_NOERROR; } static int iptNetflowModule_handler( netsnmp_mib_handler *handler, netsnmp_handler_registration *reginfo, netsnmp_agent_request_info *reqinfo, netsnmp_request_info *request) { struct snmp_vars *sys; oid obj; char buf[225]; char *p = NULL; long value; obj = request->requestvb->name[request->requestvb->name_length - 2]; sys = find_varinfo(modinfos, obj); if (!sys) { netsnmp_request_set_error(request, SNMP_ERR_NOSUCHNAME); return SNMP_ERR_NOERROR; } if (reqinfo->mode != MODE_GET) { netsnmp_request_set_error(request, SNMP_ERR_READONLY); return SNMP_ERR_NOERROR; } switch (obj) { case MODINFO_NAME_ID: p = MODINFO_NAME; break; case MODINFO_DATE_ID: { size_t len; struct stat st; modinfo_fname(".", buf, sizeof(buf)); if (stat(buf, &st) < 0) break; p = (char *)date_n_time(&st.st_mtime, &len); snmp_set_var_typed_value(request->requestvb, ASN_OCTET_STR, p, len); return SNMP_ERR_NOERROR; } default: p = modinfo_read_string(sys->name, buf, sizeof(buf)); } if (!p) { netsnmp_request_set_error(request, SNMP_ERR_NOSUCHNAME); return SNMP_ERR_NOERROR; } switch (sys->type) { case ASN_INTEGER: value = atoi(p); snmp_set_var_typed_value(request->requestvb, sys->type, (u_char *)&value, sizeof(value)); break; case ASN_OCTET_STR: snmp_set_var_typed_value(request->requestvb, sys->type, (u_char *)p, strcspn(p, "\n")); break; default: netsnmp_request_set_error(request, SNMP_ERR_WRONGTYPE); } return SNMP_ERR_NOERROR; } static int iptNetflowSysctl_handler( netsnmp_mib_handler *handler, netsnmp_handler_registration *reginfo, netsnmp_agent_request_info *reqinfo, netsnmp_request_info *request) { struct snmp_vars *sys; oid obj; obj = request->requestvb->name[request->requestvb->name_length - 2]; switch (reqinfo->mode) { case MODE_GET: return sysctl_read(request, obj); case MODE_SET_RESERVE1: sys = find_varinfo(sysctls, obj); if (!sys || !sysctl_access_ok(sys->name)) netsnmp_request_set_error(request, SNMP_ERR_NOSUCHNAME); if (sys && request->requestvb->type != sys->type) netsnmp_request_set_error(request, SNMP_ERR_WRONGTYPE); break; case MODE_SET_RESERVE2: case MODE_SET_FREE: case MODE_SET_UNDO: case MODE_SET_COMMIT: return SNMP_ERR_NOERROR; case MODE_SET_ACTION: return sysctl_write(request, obj); default: return SNMP_ERR_GENERR; } return SNMP_ERR_NOERROR; } #define TOTAL_INTERVAL 1 static void clear_data_set(netsnmp_table_data_set *data_set) { netsnmp_table_row *row, *nextrow; for (row = netsnmp_table_data_set_get_first_row(data_set); row; row = nextrow) { nextrow = netsnmp_table_data_set_get_next_row(data_set, row); netsnmp_table_dataset_remove_and_delete_row(data_set, row); } } static void parse_table_row( int cpu, char *p, struct snmp_vars *sys, netsnmp_table_data_set *data_set) { netsnmp_table_row *row; row = netsnmp_create_table_data_row(); netsnmp_table_row_add_index(row, ASN_INTEGER, (u_char *)&cpu, sizeof(cpu)); if (sys == cputable) { /* add cpuIndex as column too to break SMIv2 */ netsnmp_set_row_column(row, 1, sys->type, (char *)&cpu, sizeof(cpu)); } for (++sys; p && sys->obj; sys++) { char *val; long long val64; unsigned int uval32; int val32; struct counter64 c64; p += strspn(p, " \t"); val = p; if ((p = strpbrk(p, " \t"))) *p++ = '\0'; if (index(val, '.')) { double d = strtod(val, NULL); val64 = (long long)(d * 100); } else val64 = strtoll(val, NULL, 10); switch (sys->type) { case ASN_OCTET_STR: netsnmp_set_row_column(row, sys->obj, sys->type, (char *)val, strlen(val)); break; case ASN_INTEGER: case ASN_GAUGE: val32 = (int)val64; netsnmp_set_row_column(row, sys->obj, sys->type, (char *)&val32, sizeof(val32)); break; case ASN_COUNTER: uval32 = (unsigned int)val64; netsnmp_set_row_column(row, sys->obj, sys->type, (char *)&uval32, sizeof(uval32)); break; case ASN_COUNTER64: c64.low = (uint32_t)val64; c64.high = val64 >> 32; netsnmp_set_row_column(row, sys->obj, sys->type, (char *)&c64, sizeof(c64)); break; default: netsnmp_table_dataset_delete_row(row); continue; } } netsnmp_table_data_add_row(data_set->table, row); } static void grab_ipt_netflow_snmp(time_t now) { static char buf[4096]; int fd; int n; char *p = buf; if ((now - totals_ts) < (TOTAL_INTERVAL + 1)) return; if ((fd = open("/proc/net/stat/ipt_netflow_snmp", O_RDONLY)) < 0) return; n = read(fd, buf, sizeof(buf) - 1); close(fd); if (n <= 0) return; buf[n] = '\0'; DEBUGMSGTL(("netflow", "%s\n", buf)); clear_data_set(cpu_data_set); clear_data_set(sock_data_set); while (*p) { struct snmp_vars *sys; char *name = p; char *val; if (!(p = strpbrk(p, " \t"))) break; *p++ = '\0'; val = p + strspn(p, " \t"); p = index(p, '\n'); *p++ = '\0'; if (!strncmp(name, "cpu", 3)) { parse_table_row(atoi(name + 3), val, cputable, cpu_data_set); continue; } else if (!strncmp(name, "sock", 4)) { parse_table_row(atoi(name + 4), val, socktable, sock_data_set); continue; } if (!(sys = find_varinfo_str(totals, name))) continue; if (index(val, '.')) { double d = strtod(val, NULL); sys->val64 = (long long)(d * 100); } else sys->val64 = strtoll(val, NULL, 10); sys->ts = now; } totals_ts = now; } static int iptNetflowTotals_handler( netsnmp_mib_handler *handler, netsnmp_handler_registration *reginfo, netsnmp_agent_request_info *reqinfo, netsnmp_request_info *request) { struct snmp_vars *sys; time_t now = time(NULL); oid obj; unsigned int uval32; int val32; struct counter64 c64; grab_ipt_netflow_snmp(now); obj = request->requestvb->name[request->requestvb->name_length - 2]; sys = find_varinfo(totals, obj); if (!sys || ((now - sys->ts) > (TOTAL_INTERVAL * 2 + 3))) { netsnmp_request_set_error(request, SNMP_ERR_NOSUCHNAME); return SNMP_ERR_NOERROR; } if (reqinfo->mode != MODE_GET) { netsnmp_request_set_error(request, SNMP_ERR_READONLY); return SNMP_ERR_NOERROR; } switch (sys->type) { case ASN_GAUGE: val32 = (int)sys->val64; snmp_set_var_typed_value(request->requestvb, sys->type, (u_char *)&val32, sizeof(val32)); break; case ASN_COUNTER: uval32 = (unsigned int)sys->val64; snmp_set_var_typed_value(request->requestvb, sys->type, (u_char *)&uval32, sizeof(uval32)); break; case ASN_COUNTER64: c64.low = (uint32_t)sys->val64; c64.high = sys->val64 >> 32; snmp_set_var_typed_value(request->requestvb, ASN_COUNTER64, (u_char *)&c64, sizeof(c64)); break; default: return SNMP_ERR_GENERR; } return SNMP_ERR_NOERROR; } static int stat_cache_load(netsnmp_cache *cache, void *x) { grab_ipt_netflow_snmp(time(NULL)); return 0; } static void dummy_cache_free(netsnmp_cache *cache, void *x) { /* free_cache callback is not always checked for NULL * pointer. */ } void init_netflow(void) { netsnmp_handler_registration *reg; struct snmp_vars *sys; /* snmpd -f -L -Dnetflow,dlmod */ DEBUGMSGTL(("netflow", "init_netflow\n")); netsnmp_register_scalar_group( netsnmp_create_handler_registration( "iptNetflowModule", iptNetflowModule_handler, iptNetflowModule_oid, OID_LENGTH(iptNetflowModule_oid), HANDLER_CAN_RONLY), 1, var_max(modinfos)); netsnmp_register_scalar_group( netsnmp_create_handler_registration( "iptNetflowSysctl", iptNetflowSysctl_handler, iptNetflowSysctl_oid, OID_LENGTH(iptNetflowSysctl_oid), HANDLER_CAN_RWRITE), 1, var_max(sysctls)); netsnmp_register_scalar_group( netsnmp_create_handler_registration( "iptNetflowTotals", iptNetflowTotals_handler, iptNetflowTotals_oid, OID_LENGTH(iptNetflowTotals_oid), HANDLER_CAN_RONLY), 1, var_max(totals)); /* Register first table. */ reg = netsnmp_create_handler_registration( "iptNetflowCpuTable", /* no handler */ NULL, iptNetflowCpuTable_oid, OID_LENGTH(iptNetflowCpuTable_oid), HANDLER_CAN_RONLY); /* set up columns */ cpu_data_set = netsnmp_create_table_data_set("iptNetflowCpuDataSet"); netsnmp_table_set_add_indexes(cpu_data_set, ASN_INTEGER, 0); /* I include cpuIndex into columns, which is not SMIv2'ish */ for (sys = cputable; sys->obj; sys++) netsnmp_table_set_add_default_row(cpu_data_set, sys->obj, sys->type, 0, NULL, 0); netsnmp_register_table_data_set(reg, cpu_data_set, NULL); /* cache handler will load actual data, and it needs to be * injected in front of dataset handler to be called first */ stat_cache = netsnmp_cache_create( /* no timeout */ -1, stat_cache_load, dummy_cache_free, iptNetflowCpuTable_oid, OID_LENGTH(iptNetflowCpuTable_oid)); netsnmp_inject_handler(reg, netsnmp_cache_handler_get(stat_cache)); /* Register second table. */ reg = netsnmp_create_handler_registration( "iptNetflowSockTable", /* no handler */ NULL, iptNetflowSockTable_oid, OID_LENGTH(iptNetflowSockTable_oid), HANDLER_CAN_RONLY); /* set up columns */ sock_data_set = netsnmp_create_table_data_set("iptNetflowSockDataSet"); /* I don't include sockIndex into columns, which is more SMIv2'ish */ netsnmp_table_set_add_indexes(sock_data_set, ASN_INTEGER, 0); for (sys = &socktable[1]; sys->obj; sys++) netsnmp_table_set_add_default_row(sock_data_set, sys->obj, sys->type, 0, NULL, 0); netsnmp_register_table_data_set(reg, sock_data_set, NULL); /* as before, cache handler will load actual data, and it needs * to be injected in front of dataset handler to be called first */ stat_cache = netsnmp_cache_create( /* no timeout */ -1, stat_cache_load, dummy_cache_free, iptNetflowSockTable_oid, OID_LENGTH(iptNetflowSockTable_oid)); netsnmp_inject_handler(reg, netsnmp_cache_handler_get(stat_cache)); } void deinit_netflow(void) { DEBUGMSGTL(("netflow", "deinit_netflow\n")); } ipt-netflow-2.6/test_update_config.sh000077500000000000000000000014051404773755400200560ustar00rootroot00000000000000#!/bin/bash -efu # SPDX-License-Identifier: GPL-2.0-only # # Update default .config for values required to run tests # set -x cp -f .config .config-pkt-netflow.bak if type virtme-configkernel >/dev/null 2>&1; then virtme-configkernel --update fi scripts/config \ -e CONFIG_VETH \ -e CONFIG_PACKET \ -e CONFIG_NETFILTER \ -e CONFIG_NETFILTER_XTABLES \ -e NETFILTER_ADVANCED \ -e CONFIG_IP_NF_FILTER \ -e CONFIG_IP_NF_IPTABLES \ -e CONFIG_IP6_NF_FILTER \ if [ "${1-}" = debug ]; then scripts/config \ -e CONFIG_LOCK_DEBUGGING_SUPPORT \ -e CONFIG_PROVE_LOCKING \ -e CONFIG_DEBUG_SPINLOCK \ -e CONFIG_FRAME_POINTER \ -d CONFIG_RANDOMIZE_BASE \ fi make olddefconfig scripts/diffconfig .config-pkt-netflow.bak .config # export XTABLES_LIBDIR= ipt-netflow-2.6/testing.sh000077500000000000000000000017171404773755400156730ustar00rootroot00000000000000#!/bin/bash set -e if [ "$1" = "" ]; then echo Maintainer only tool. exit 1 elif [ "$1" = all ]; then exec bash $0 linux-2.6.18 centos5 linux-3.11.2 centos6 linux-3.4.66 linux-3.9.11 centos7 linux-3.14 linux-3.17 linux-3.19 exit 1 fi smilint IPT-NETFLOW-MIB.my cfg=() echo -n Testing for: for k in "$@"; do if [ ! -d /usr/src/$k ]; then continue; fi echo -n " $k" cfg+=("./configure --kdir=/usr/src/$k") done echo readarray -t opts </dev/null 2>&1; then \ GVERSION=`git describe --dirty 2>/dev/null` if [ "$GVERSION" ]; then MVERSION=${GVERSION#v} fi else GVERSION= fi if [ "$1" = --define ]; then # called from Makefile to create version.h # which should contain GITVERSION or be empty. if [ "$GVERSION" ]; then echo "#define GITVERSION \"$MVERSION\"" else echo "/* placeholder, because kernel doesn't like empty files */" fi else # normal run echo $MVERSION fi