pax_global_header00006660000000000000000000000064136103674720014523gustar00rootroot0000000000000052 comment=7ebf46d1f7d487ab2f969e0f6f6386aee15933b4 logswan-2.1.3/000077500000000000000000000000001361036747200132005ustar00rootroot00000000000000logswan-2.1.3/.gitignore000066400000000000000000000000061361036747200151640ustar00rootroot00000000000000build logswan-2.1.3/.travis.yml000066400000000000000000000030151361036747200153100ustar00rootroot00000000000000arch: - amd64 - arm64 matrix: include: - os: linux arch: amd64 dist: bionic - os: linux arch: arm64 language: c compiler: - clang - gcc before_install: - sudo apt-get -qq update - sudo apt-get install -y libmaxminddb-dev libjansson-dev - echo -n | openssl s_client -connect scan.coverity.com:443 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | sudo tee -a /etc/ssl/certs/ca- script: cmake . && make env: global: # travis encrypt -r fcambus/logswan encrypt COVERITY_SCAN_TOKEN=token - secure: "NuBqQ07eVrwB0cbrJbEqj56gVtM5zTwY0umbgE+ElB2joSLXzfqniNt0n1J105rEqLVXNvsviLMHkSo5M0klYqaep/4OJp0Zu+uUWixh2Ez6ySleT0+iZBVs7+izMbO5dpefSzLipcL8bSDq4U/08djRXRST+r87QD4e2oR3rwSqLWqnnJx69t80Q1AxModGSJ8OajApMjdSedt+9lPprhFY4Nxj52oWHlStCwko4zLUPexNnUq79cvCAnXgpX3WB6r4K95N0VLLsCnP+XsovvlPut4kP2gm9vOrpaYuFHw3Mp6xAEnqnJ6Ma9cAvlokdImEc/mVVtNBmpU8cifWQRx/y4vX6ZbDTRzA37iYPzlg/GpN+FYtDMXRMSqqdwdRGSqldMre3U9uaWQOjGOkhroJGBL8CSJFu1R9HYHGyLPj763CkQOAxpe/4BP2SknvRX/f//JgEZDuZ1hGVrhqtmJ6TlV57sAh3d4IjN5ZixFn8kUekLkvStLsoy5P4eeOCeas7KO3/VvRui7LHmf033DtE901RQsW0UysCxmT1SRsalVDwbwL2zahSoKT4So0K/DiULkB3zd8+TUld8u2VeJDdy4gIMiKhj+lQKn++G5MaIKCscf5gtQ1zwXQeWktQfRQRxyUa/qByHwxLlGzhQC7ccoLCNhcPdKp7RfS/I8=" addons: coverity_scan: project: name: "fcambus/logswan" version: 2.1.3 description: "Fast Web log analyzer using probabilistic data structures" notification_email: fred@statdns.com build_command_prepend: cmake . build_command: make -j 4 branch_pattern: coverity_scan logswan-2.1.3/AUTHORS000066400000000000000000000001421361036747200142450ustar00rootroot00000000000000Logswan is developed by: Frederic Cambus Site: https://www.cambus.net logswan-2.1.3/CMakeLists.txt000066400000000000000000000043421361036747200157430ustar00rootroot00000000000000# # Logswan 2.1.3 # Copyright (c) 2015-2020, Frederic Cambus # https://www.logswan.org # # Created: 2015-05-31 # Last Updated: 2018-11-19 # # Logswan is released under the BSD 2-Clause license # See LICENSE file for details # cmake_minimum_required (VERSION 2.6) project(logswan C) include(CheckFunctionExists) include(GNUInstallDirs) # Conditional build options set(ENABLE_SECCOMP 0 CACHE BOOL "Enable building with seccomp") # Check if system has pledge and strtonum list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_OPENBSD_SOURCE) check_function_exists(pledge HAVE_PLEDGE) check_function_exists(strtonum HAVE_STRTONUM) if(ENABLE_SECCOMP) # Check if system has seccomp message(STATUS "Looking for seccomp") find_path(SECCOMP NAMES "linux/seccomp.h") if(SECCOMP) message(STATUS "Looking for seccomp - found") add_definitions(-DHAVE_SECCOMP=1) else() message(STATUS "Looking for seccomp - not found") endif() endif(ENABLE_SECCOMP) # Additional include directories for compat functions + dependencies include_directories("compat") include_directories("deps/hll") # libmaxminddb find_path(GEOIP2_INCLUDE_DIRS maxminddb.h) find_library(GEOIP2_LIBRARIES NAMES maxminddb REQUIRED) include_directories(${GEOIP2_INCLUDE_DIRS}) # Jansson find_path(JANSSON_INCLUDE_DIRS jansson.h) find_library(JANSSON_LIBRARIES NAMES jansson REQUIRED) include_directories(${JANSSON_INCLUDE_DIRS}) set(CMAKE_BUILD_TYPE Release) set(DEPS deps/hll/hll.c deps/MurmurHash3/MurmurHash3.c) set(SRC src/logswan.c src/config.c src/continents.c src/countries.c src/output.c src/parse.c) if(NOT HAVE_PLEDGE) set (SRC ${SRC} compat/pledge.c) endif() if(NOT HAVE_STRTONUM) set (SRC ${SRC} compat/strtonum.c) endif() set(GEOIP2DIR ${CMAKE_INSTALL_PREFIX}/share/GeoIP2 CACHE PATH "Path to GeoIP2 databases") add_definitions(-Wall -Wextra -std=c11 -pedantic) add_definitions(-DGEOIP2DIR="${GEOIP2DIR}/") add_executable(logswan ${SRC} ${DEPS}) target_link_libraries(logswan ${GEOIP2_LIBRARIES} ${JANSSON_LIBRARIES} m) install(TARGETS logswan DESTINATION ${CMAKE_INSTALL_BINDIR}) install(FILES logswan.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1/) enable_testing() add_test(logswan logswan) add_test(processing logswan ${PROJECT_SOURCE_DIR}/examples/logswan.log) logswan-2.1.3/ChangeLog000066400000000000000000000146761361036747200147700ustar00rootroot00000000000000Logswan 2.1.3 (2020-01-17) - Add a new test target, to test log processing - Move printed statistics after the actual output - Use OpenBSD style(9) for function prototypes and declarations - Remove seccomp mention in README as it is currently disabled by default Logswan 2.1.2 (2019-11-19) - Add ENABLE_SECCOMP build option, to allow building seccomp support conditionally - Disable seccomp by default, it needs more testing on non !amd64 platforms - Use ${CMAKE_INSTALL_BINDIR} instead of hardcoding 'bin' Logswan 2.1.1 (2019-10-30) - Check if system has seccomp in CMakeLists.txt - Use the HAVE_SECCOMP macro to check whether or not to enable seccomp - Define and use a GEOIP2DB macro to specify GeoLite2 database name - Add a switch (-d) to allow specifying path to a GeoIP2 database file - Define and use a LOGSWAN_SYSCALL_ALLOW macro to make code more readable - Adding missing #include guard in seccomp.h header file - Use __NR_ instead of SYS_ prefix in LOGSWAN_SYSCALL_ALLOW - Fix the build on aarch64 Linux, where the open() syscall does not exist - Add error checking for both prctl() calls Logswan 2.1.0 (2019-10-23) - Add FALLTHROUGH comments where appropriate - Add support for parsing HTTP/3 requests - Add initial seccomp support on Linux, tested on musl and glibc systems Logswan 2.0.4 (2019-08-16) - Adding #include guard in compat header file - Add an example log file and regenerate output example - Add dependencies installation instructions for NetBSD and FreeBSD - Add final dots for options descriptions - Add final dot when printing results summary - Use EXIT_SUCCESS and EXIT_FAILURE macros for return values - Add a trailing newline when printing JSON output Logswan 2.0.3 (2018-10-15) - Use -std=c11, Logswan requires a C11 compiler for libmaxminddb - Enable support for parsing HTTP/2.0 requests, for real this time - Revert back to using INT64_MAX for strtonum() maxval, as maxval is long long and using UINT64_MAX caused bandwidth parsing to always fail (Thanks James Loh for reporting the issue) - Move maps of countries and continents to separate files Logswan 2.0.2 (2018-08-05) - Use UINT64_MAX for strtonum() maxval - Add missing headers and reorder includes - Add support for HTTP/2.0 Logswan 2.0.1 (2018-06-27) - Do not use -Werror by default - Do not always call inet_pton two times per log line, this speeds things up - Use bool types for isIPv4 and isIPv6 - Use the monotonic clock to determine runtime - Move conditional includes for 'pledge' and 'strtonum' in compat.h Logswan 2.0.0 (2018-03-16) - Use type off_t for results struct member fileSize - Reflect OpenBSD's pledge() changes - Switch to using libmaxminddb and GeoIP2/GeoLite2 databases - Add Antarctica to the list of continents Logswan 1.07 (2017-02-14) - Harmonize arrays names - Remove array of months, it's currently unused and will likely remain so - Simplify internal JSON array and object names - Use OpenBSD style(9) for function prototypes and declarations - Revert back to using strtok, at least for now - Do not use EXIT_SUCCESS and EXIT_FAILURE macros anymore - Fix implicit function declaration error on NetBSD (Thanks Maya Rashish) - Remove now useless variables initialization and unnecessary includes - Do not add an extra new line when displaying usage or version Logswan 1.06 (2016-12-17) - Relicensed under the BSD 2-Clause license - Use strtok_r instead of strtok to tokenize lines - Do not attempt to increment countries and continents arrays if there is no GeoIP database loaded - Harmonize variable names for the GeoIP databases - GeoIP lookups are now disabled by default (add a -g switch to enable) - Use fstat on open file descriptor instead of using stat before opening the input file - Count the log line as invalid if parsedLine.remoteHost is NULL - Pass results structure by reference, not by value - Initialize some uninitialized variables - Headers cleanup Logswan 1.05 (2016-02-25) - Documentation update (notes on measuring Logswan memory usage) - Add additional include directories for compat functions + dependencies to avoid using relative path in includes - Check that *lineBuffer is not NUL before attempting to parse log line - Perform GeoIP lookup and HLL add in the same if block - Increment IPv4 and IPv6 hits counters individually and conditionally - Use CMake to check if the system has OpenBSD's pledge available and link pledge conditionally using a null implementation when compiled on non OpenBSD systems - Adding an array of months, for the upcoming split log functionalities Logswan 1.04 (2016-01-10) - Moving global variables into main - Using 'size_t' instead of 'int' for array indexes in for loops - Using 'uint32_t' for all non 'uint64_t' integers - Do not increment hits and processed lines counter for each parsed line, compute total only once when everything is parsed - Setting 'CMAKE_BUILD_TYPE' to 'Release' and formatting fixes - Sanitize CMake script to build under NetBSD (Thanks Kamil Rytarowski) - Initializing some uninitialized variables - Renaming 'DATADIR' variables to 'GEOIPDIR' Logswan 1.03 (2016-01-01) - Remove header display and do not print name of processed file - Print results to stderr instead of stdout - Output JSON data to stdout instead of creating a new file - Define GeoIP databases path in CMakeLists.txt - Adding log file name in the JSON output - Removing some hardcoded values and replacing them with constants defined in config.h - Breaking the loop when a match is found in the request parser - Using enumeration constants instead of macros - Process GeoIP continent information - Re-ordering protocols and methods with more common occurrences on top of the list, allowing to break earlier when iterating through the array - Adding support for reading logs from standard input - Renaming 'definitions' files to 'config' - Increasing countries array size, as an attempt to be future-proof - Initial support for using pledge() on OpenBSD - Documentation updates (HLL precision, Features list, GeoIP databases) - Updated JSON output example - Added a manual page Logswan 1.02 (2015-11-02) - Renaming 'resource' variable to 'request' in the 'logLine' struct - Do not attempt to parse empty date tokens - Do not attempt to parse empty request tokens (Thanks Brian Carpenter for reporting the issue) Logswan 1.01 (2015-10-01) - Documentation updates - Fixing segfault when request data is empty or malformed (Thanks Jonathan Armani for reporting and proposing a fix) Logswan 1.00 (2015-09-28) - Initial release logswan-2.1.3/LICENSE000066400000000000000000000024411361036747200142060ustar00rootroot00000000000000Copyright (c) 2015-2020, Frederic Cambus All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. logswan-2.1.3/README.md000066400000000000000000000127141361036747200144640ustar00rootroot00000000000000``` _____ .xiX*****Xix. .X7' '4Xk, dXl 'XX. . xXXl XXl . 4XXX XX' . ,x iX' _,,xxii | ²| ,iX7,xiiXXXXXXXl | .xi,xiXXXXXXXXXXXX: . ..iXXiXXXXXXXXXXXXXXX7. . .xXXXXXXXXXXXXXXX'XXXX7 . | ,XXXXXXXXXXXXXXXX'XXX7' | : .XXXXX7*'"' 2XXX7'XX7' | __/ \ _____ ____ \XX' _____ 47' ___ ___ _____ __ .\\_ \___/ _ \__/ _/_______\ _/______/ / \ \____/ _ \___/ \ _____ . / __ Y _ __ \__ _________ _____ \/\/ ____ _ _ ______ \/ __/// :/ / | \ |' \/ \/ \/ \/ Y \/ \ \ : |\______/\_________/____| /\____ /\_____/\_____/\____|____/\____\___/ | +--------------------- \____/ --- \____/ ----:----------------------h7/dS!----+ . | : : . : | | . Logswan . | | : . | |_|_______________________|__| | : . ``` # Logswan [![Build Status][1]][2] Logswan is a fast Web log analyzer using probabilistic data structures. It is targeted at very large log files, typically APIs logs. It has constant memory usage regardless of the log file size, and takes approximatively 4MB of RAM. Unique visitors counting is performed using two HyperLogLog counters (one for IPv4, and another one for IPv6), providing a relative accuracy of 0.10%. String representations of IP addresses are used and preferred as they offer better precision. Project design goals include: speed, memory-usage efficiency, and keeping the code as simple as possible. Logswan is **opinionated software**: - It only supports the Common Log Format, in order to keep the parsing code simple. It can of course process the Combined Log Format as well (referer and user agent fields will be discarded) - It does not split results per day, but log files can be split prior to being processed - Input file size and bandwidth usage are reported in bytes, there are no plans to format or round them Logswan is written with security in mind and is running sandboxed on OpenBSD (using pledge). It has also been extensively fuzzed using AFL and Honggfuzz. ## Features Currently implemented features: - Counting used bandwidth - Counting number of processed lines / invalid lines - Counting number of hits (IPv4 and IPv6 hits) - Counting visits (unique IP addresses for both IPv4 and IPv6) - GeoIP lookups (for both IPv4 and IPv6) - Hourly hits distribution - HTTP method distribution - HTTP protocol (HTTP/1.0, HTTP/1.1, or HTTP/2.0) distribution - HTTP status codes distribution ## Dependencies Logswan uses the `CMake` build system and requires `Jansson` and `libmaxminddb` libraries and header files. ## Installing dependencies - OpenBSD: `pkg_add -r cmake jansson libmaxminddb` - NetBSD: `pkgin in cmake jansson libmaxminddb` - FreeBSD: `pkg install cmake jansson libmaxminddb` - Mac OS X: `brew install cmake jansson libmaxminddb` - Alpine Linux: `apk add cmake gcc make musl-dev jansson-dev libmaxminddb-dev` - Debian / Ubuntu: `apt-get install build-essential cmake libjansson-dev libmaxminddb-dev` ## Building mkdir build cd build cmake .. make Logswan has been successfully built and tested on OpenBSD, NetBSD, FreeBSD, Mac OS X, and Linux with both Clang and GCC. ## Packages Packages are available for the following operating systems: - [OpenBSD][3] - [NetBSD][4] - [Debian][5] - [Ubuntu][6] - [Void Linux][7] ### GeoIP2 databases Logswan looks for GeoIP2 databases in `${CMAKE_INSTALL_PREFIX}/share/GeoIP2` by default, which points to `/usr/local/share/GeoIP2`. A custom directory can be set using the `GEOIP2DIR` variable when invoking CMake: cmake -DGEOIP2DIR=/var/db/GeoIP . The free GeoLite2 databases from MaxMind can be downloaded [here][8]: https://geolite.maxmind.com/download/geoip/database/GeoLite2-Country.tar.gz ## Usage logswan [-ghv] [-d db] file If file is a single dash (`-'), logswan reads from the standard input. The options are as follows: -d db Specify path to a GeoIP database. -g Enable GeoIP lookups. -h Display usage. -v Display version. Logswan outputs JSON data to **stdout**. ## Measuring Logswan memory usage Heap profiling can be done using valgrind, as follows: valgrind --tool=massif logswan access.log ms_print massif.out.* ## License Logswan is released under the BSD 2-Clause license. See `LICENSE` file for details. ## Author Logswan is developed by Frederic Cambus. - Site: https://www.cambus.net ## Resources Project homepage: https://www.logswan.org GitHub: https://github.com/fcambus/logswan [1]: https://api.travis-ci.org/fcambus/logswan.png?branch=master [2]: https://travis-ci.org/fcambus/logswan [3]: https://cvsweb.openbsd.org/cgi-bin/cvsweb/ports/www/logswan [4]: http://pkgsrc.se/www/logswan [5]: https://packages.debian.org/sid/logswan [6]: https://packages.ubuntu.com/disco/logswan [7]: https://github.com/void-linux/void-packages/tree/master/srcpkgs/logswan [8]: https://dev.maxmind.com/geoip/geoip2/geolite2/ logswan-2.1.3/THANKS000066400000000000000000000003241361036747200141120ustar00rootroot00000000000000- Antti Kiuru for the Logswan ASCII logo - Ted Unangst and Todd Miller for strtonum.c - Austin Appleby for the MurmurHash3 hash function - Artem Zaytsev for the HyperLogLog C library (https://github.com/avz/hll) logswan-2.1.3/compat/000077500000000000000000000000001361036747200144635ustar00rootroot00000000000000logswan-2.1.3/compat/compat.h000066400000000000000000000007241361036747200161220ustar00rootroot00000000000000#ifndef COMPAT_H #define COMPAT_H #ifndef HAVE_PLEDGE #include "pledge.h" #endif #ifndef HAVE_STRTONUM #include "strtonum.h" #endif #ifndef timespecsub #define timespecsub(tsp, usp, vsp) \ do { \ (vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec; \ (vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec; \ if ((vsp)->tv_nsec < 0) { \ (vsp)->tv_sec--; \ (vsp)->tv_nsec += 1000000000L; \ } \ } while (0) #endif #endif /* COMPAT_H */ logswan-2.1.3/compat/pledge.c000066400000000000000000000001601361036747200160640ustar00rootroot00000000000000int pledge(const char *promises, const char *execpromises) { (void)promises; (void)execpromises; return 0; } logswan-2.1.3/compat/pledge.h000066400000000000000000000000501361036747200160670ustar00rootroot00000000000000int pledge(const char *, const char *); logswan-2.1.3/compat/strtonum.c000066400000000000000000000033501361036747200165230ustar00rootroot00000000000000/* $OpenBSD: strtonum.c,v 1.7 2013/04/17 18:40:58 tedu Exp $ */ /* * Copyright (c) 2004 Ted Unangst and Todd Miller * All rights reserved. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #define INVALID 1 #define TOOSMALL 2 #define TOOLARGE 3 long long strtonum(const char *numstr, long long minval, long long maxval, const char **errstrp) { long long ll = 0; int error = 0; char *ep; struct errval { const char *errstr; int err; } ev[4] = { { NULL, 0 }, { "invalid", EINVAL }, { "too small", ERANGE }, { "too large", ERANGE }, }; ev[0].err = errno; errno = 0; if (minval > maxval) { error = INVALID; } else { ll = strtoll(numstr, &ep, 10); if (numstr == ep || *ep != '\0') error = INVALID; else if ((ll == LLONG_MIN && errno == ERANGE) || ll < minval) error = TOOSMALL; else if ((ll == LLONG_MAX && errno == ERANGE) || ll > maxval) error = TOOLARGE; } if (errstrp != NULL) *errstrp = ev[error].errstr; errno = ev[error].err; if (error) ll = 0; return (ll); } logswan-2.1.3/compat/strtonum.h000066400000000000000000000001071361036747200165250ustar00rootroot00000000000000long long strtonum(const char *, long long, long long, const char **); logswan-2.1.3/deps/000077500000000000000000000000001361036747200141335ustar00rootroot00000000000000logswan-2.1.3/deps/MurmurHash3/000077500000000000000000000000001361036747200163115ustar00rootroot00000000000000logswan-2.1.3/deps/MurmurHash3/MurmurHash3.c000066400000000000000000000023371361036747200206400ustar00rootroot00000000000000/*----------------------------------------------------------------------------- MurmurHash3 was written by Austin Appleby, and is placed in the public domain. The author hereby disclaims copyright to this source code. */ #include "MurmurHash3.h" #define ROTL32(x, r) ((x) << (r)) | ((x) >> (32 - (r))) uint32_t MurmurHash3_x86_32(const void *key, uint32_t len, uint32_t seed) { const uint8_t *data = (const uint8_t *)key; const int32_t nblocks = (int32_t)len / 4; uint32_t h1 = seed; int i; const uint32_t c1 = 0xcc9e2d51; const uint32_t c2 = 0x1b873593; const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4); for(i = -nblocks; i; i++) { uint32_t k1 = blocks[i]; k1 *= c1; k1 = ROTL32(k1, 15); k1 *= c2; h1 ^= k1; h1 = ROTL32(h1, 13); h1 = h1 * 5 + 0xe6546b64; } const uint8_t * tail = (const uint8_t *)(data + nblocks * 4); uint32_t k1 = 0; switch(len & 3) { case 3: k1 ^= (uint32_t)tail[2] << 16; /* FALLTHROUGH */ case 2: k1 ^= (uint32_t)tail[1] << 8; /* FALLTHROUGH */ case 1: k1 ^= tail[0]; k1 *= c1; k1 = ROTL32(k1, 15); k1 *= c2; h1 ^= k1; }; h1 ^= len; h1 ^= h1 >> 16; h1 *= 0x85ebca6b; h1 ^= h1 >> 13; h1 *= 0xc2b2ae35; h1 ^= h1 >> 16; return h1; } logswan-2.1.3/deps/MurmurHash3/MurmurHash3.h000066400000000000000000000002321361036747200206350ustar00rootroot00000000000000#ifndef _MURMURHASH3_H_ #define _MURMURHASH3_H_ #include uint32_t MurmurHash3_x86_32(const void * key, uint32_t len, uint32_t seed); #endif logswan-2.1.3/deps/hll/000077500000000000000000000000001361036747200147125ustar00rootroot00000000000000logswan-2.1.3/deps/hll/LICENSE000066400000000000000000000020611361036747200157160ustar00rootroot00000000000000Copyright (c) 2015 Artem Zaytsev Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. logswan-2.1.3/deps/hll/hll.c000066400000000000000000000051071361036747200156400ustar00rootroot00000000000000#include #include #include #include #include #include "../MurmurHash3/MurmurHash3.h" #include "hll.h" static __inline uint8_t _hll_rank(uint32_t hash, uint8_t bits) { uint8_t i; for(i = 1; i <= 32 - bits; i++) { if(hash & 1) break; hash >>= 1; } return i; } int hll_init(struct HLL *hll, uint8_t bits) { if(bits < 4 || bits > 20) { errno = ERANGE; return -1; } hll->bits = bits; hll->size = (size_t)1 << bits; hll->registers = calloc(hll->size, 1); return 0; } void hll_destroy(struct HLL *hll) { free(hll->registers); hll->registers = NULL; } static __inline void _hll_add_hash(struct HLL *hll, uint32_t hash) { uint32_t index = hash >> (32 - hll->bits); uint8_t rank = _hll_rank(hash, hll->bits); if(rank > hll->registers[index]) { hll->registers[index] = rank; } } void hll_add(struct HLL *hll, const void *buf, size_t size) { uint32_t hash = MurmurHash3_x86_32((const char *)buf, (uint32_t)size, 0x5f61767a); _hll_add_hash(hll, hash); } double hll_count(const struct HLL *hll) { double alpha_mm; uint32_t i; switch (hll->bits) { case 4: alpha_mm = 0.673; break; case 5: alpha_mm = 0.697; break; case 6: alpha_mm = 0.709; break; default: alpha_mm = 0.7213 / (1.0 + 1.079 / (double)hll->size); break; } alpha_mm *= ((double)hll->size * (double)hll->size); double sum = 0; for(i = 0; i < hll->size; i++) { sum += 1.0 / (1 << hll->registers[i]); } double estimate = alpha_mm / sum; if (estimate <= 5.0 / 2.0 * (double)hll->size) { int zeros = 0; for(i = 0; i < hll->size; i++) zeros += (hll->registers[i] == 0); if(zeros) estimate = (double)hll->size * log((double)hll->size / zeros); } else if (estimate > (1.0 / 30.0) * 4294967296.0) { estimate = -4294967296.0 * log(1.0 - (estimate / 4294967296.0)); } return estimate; } int hll_merge(struct HLL *dst, const struct HLL *src) { uint32_t i; if(dst->bits != src->bits) { errno = EINVAL; return -1; } for(i = 0; i < dst->size; i++) { if(src->registers[i] > dst->registers[i]) dst->registers[i] = src->registers[i]; } return 0; } int hll_load(struct HLL *hll, const void *registers, size_t size) { uint8_t bits = 0; size_t s = size; while(s) { if(s & 1) break; bits++; s >>= 1; } if(!bits || ((size_t)1 << bits) != size) { errno = EINVAL; return -1; } if(hll_init(hll, bits) == -1) return -1; memcpy(hll->registers, registers, size); return 0; } extern uint32_t _hll_hash(const struct HLL *hll) { return MurmurHash3_x86_32(hll->registers, (uint32_t)hll->size, 0); } logswan-2.1.3/deps/hll/hll.h000066400000000000000000000010721361036747200156420ustar00rootroot00000000000000#ifndef AVZ_HLL_H #define AVZ_HLL_H #include #include struct HLL { uint8_t bits; size_t size; uint8_t *registers; }; extern int hll_init(struct HLL *hll, uint8_t bits); extern int hll_load(struct HLL *hll, const void *registers, size_t size); extern void hll_destroy(struct HLL *hll); extern int hll_merge(struct HLL *dst, const struct HLL *src); extern void hll_add(struct HLL *hll, const void *buf, size_t size); extern double hll_count(const struct HLL *hll); extern uint32_t _hll_hash(const struct HLL *hll); #endif /* AVZ_HLL_H */ logswan-2.1.3/examples/000077500000000000000000000000001361036747200150165ustar00rootroot00000000000000logswan-2.1.3/examples/logswan.json000066400000000000000000000036461361036747200173740ustar00rootroot00000000000000{ "date": "2018-12-10 11:57:43", "generator": "Logswan 2.0.3", "file_name": "logswan.log", "file_size": 4967, "processed_lines": 34, "invalid_lines": 0, "bandwidth": 296374, "runtime": 0.018140832999999999, "hits": { "ipv4": 34, "ipv6": 0, "total": 34 }, "visits": { "ipv4": 7, "ipv6": 0, "total": 7 }, "continents": [ { "data": "EU", "name": "Europe", "hits": 14 }, { "data": "NA", "name": "North America", "hits": 14 }, { "data": "OC", "name": "Oceania", "hits": 6 } ], "countries": [ { "data": "AU", "name": "Australia", "hits": 6 }, { "data": "DE", "name": "Germany", "hits": 1 }, { "data": "FR", "name": "France", "hits": 13 }, { "data": "US", "name": "United States", "hits": 14 } ], "hours": [ { "data": 10, "hits": 2 }, { "data": 11, "hits": 4 }, { "data": 12, "hits": 13 }, { "data": 13, "hits": 2 }, { "data": 14, "hits": 5 }, { "data": 15, "hits": 1 }, { "data": 16, "hits": 2 }, { "data": 18, "hits": 5 } ], "methods": [ { "data": "GET", "hits": 33 }, { "data": "HEAD", "hits": 1 } ], "protocols": [ { "data": "HTTP/1.1", "hits": 33 }, { "data": "HTTP/1.0", "hits": 1 } ], "status": [ { "data": 200, "hits": 22 }, { "data": 404, "hits": 12 } ] }logswan-2.1.3/examples/logswan.log000066400000000000000000000115471361036747200172030ustar00rootroot000000000000001.1.1.1 - - [09/Dec/2018:10:59:20 +0100] "HEAD / HTTP/1.1" 200 8142 "" "curl/7.62.0" 1.1.1.1 - - [09/Dec/2018:10:59:26 +0100] "GET / HTTP/1.1" 200 8142 "" "curl/7.62.0" 1.1.1.1 - - [09/Dec/2018:11:00:02 +0100] "GET /robots.txt HTTP/1.1" 404 0 "" "curl/7.62.0" 1.1.1.1 - - [09/Dec/2018:11:06:22 +0100] "GET /assets/images/bkg.png HTTP/1.1" 404 0 "https://www.logswan.org/assets/css/style.css" "Mozilla/5.0 (X11; OpenBSD amd64; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36" 1.1.1.1 - - [09/Dec/2018:11:06:22 +0100] "GET /assets/images/blacktocat.png HTTP/1.1" 404 0 "https://www.logswan.org/assets/css/style.css" "Mozilla/5.0 (X11; OpenBSD amd64; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36" 1.1.1.1 - - [09/Dec/2018:11:06:23 +0100] "GET /favicon.ico HTTP/1.1" 404 0 "https://www.logswan.org/" "Mozilla/5.0 (X11; OpenBSD amd64; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36" 2.2.2.2 - - [09/Dec/2018:12:36:38 +0100] "GET /files/logswan-1.00.tar.gz HTTP/1.1" 200 14571 "" "curl/7.62.0" 2.2.2.2 - - [09/Dec/2018:12:36:47 +0100] "GET /files/logswan-1.00.tar.gz HTTP/1.1" 200 14571 "" "Wget/1.19.5 (openbsd6.4)" 2.2.2.2 - - [09/Dec/2018:12:36:51 +0100] "GET /files/logswan-1.01.tar.gz HTTP/1.1" 200 14790 "" "Wget/1.19.5 (openbsd6.4)" 2.2.2.2 - - [09/Dec/2018:12:36:53 +0100] "GET /files/logswan-1.02.tar.gz HTTP/1.1" 200 14931 "" "Wget/1.19.5 (openbsd6.4)" 2.2.2.2 - - [09/Dec/2018:12:36:55 +0100] "GET /files/logswan-1.03.tar.gz HTTP/1.1" 200 16485 "" "Wget/1.19.5 (openbsd6.4)" 2.2.2.2 - - [09/Dec/2018:12:36:57 +0100] "GET /files/logswan-1.04.tar.gz HTTP/1.1" 200 16913 "" "Wget/1.19.5 (openbsd6.4)" 2.2.2.2 - - [09/Dec/2018:12:37:00 +0100] "GET /files/logswan-1.05.tar.gz HTTP/1.1" 200 17392 "" "Wget/1.19.5 (openbsd6.4)" 2.2.2.2 - - [09/Dec/2018:12:37:02 +0100] "GET /files/logswan-1.06.tar.gz HTTP/1.1" 200 17589 "" "Wget/1.19.5 (openbsd6.4)" 2.2.2.2 - - [09/Dec/2018:12:37:05 +0100] "GET /files/logswan-1.07.tar.gz HTTP/1.1" 200 18697 "" "Wget/1.19.5 (openbsd6.4)" 2.2.2.2 - - [09/Dec/2018:12:37:13 +0100] "GET /files/logswan-2.0.0.tar.gz HTTP/1.1" 200 20683 "" "Wget/1.19.5 (openbsd6.4)" 2.2.2.2 - - [09/Dec/2018:12:37:16 +0100] "GET /files/logswan-2.0.1.tar.gz HTTP/1.1" 200 21336 "" "Wget/1.19.5 (openbsd6.4)" 2.2.2.2 - - [09/Dec/2018:12:37:18 +0100] "GET /files/logswan-2.0.2.tar.gz HTTP/1.1" 200 21367 "" "Wget/1.19.5 (openbsd6.4)" 2.2.2.2 - - [09/Dec/2018:12:37:21 +0100] "GET /files/logswan-2.0.3.tar.gz HTTP/1.1" 200 21799 "" "Wget/1.19.5 (openbsd6.4)" 3.3.3.3 - - [09/Dec/2018:13:16:24 +0100] "GET /assets/images/bkg.png HTTP/1.1" 404 0 "https://www.logswan.org/assets/css/style.css" "Mozilla/5.0 (X11; OpenBSD amd64; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36" 3.3.3.3 - - [09/Dec/2018:13:16:24 +0100] "GET /assets/images/blacktocat.png HTTP/1.1" 404 0 "https://www.logswan.org/assets/css/style.css" "Mozilla/5.0 (X11; OpenBSD amd64; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36" 4.4.4.4 - - [09/Dec/2018:14:32:28 +0100] "GET / HTTP/1.1" 200 8142 "" "Mozilla/5.0 (X11; OpenBSD amd64; rv:63.0) Gecko/20100101 Firefox/63.0" 4.4.4.4 - - [09/Dec/2018:14:32:29 +0100] "GET /assets/css/style.css HTTP/1.1" 200 5466 "https://www.logswan.org/" "Mozilla/5.0 (X11; OpenBSD amd64; rv:63.0) Gecko/20100101 Firefox/63.0" 4.4.4.4 - - [09/Dec/2018:14:32:29 +0100] "GET /assets/images/bkg.png HTTP/1.1" 404 0 "https://www.logswan.org/assets/css/style.css" "Mozilla/5.0 (X11; OpenBSD amd64; rv:63.0) Gecko/20100101 Firefox/63.0" 4.4.4.4 - - [09/Dec/2018:14:32:29 +0100] "GET /assets/images/blacktocat.png HTTP/1.1" 404 0 "https://www.logswan.org/assets/css/style.css" "Mozilla/5.0 (X11; OpenBSD amd64; rv:63.0) Gecko/20100101 Firefox/63.0" 4.4.4.4 - - [09/Dec/2018:14:32:29 +0100] "GET /favicon.ico HTTP/1.1" 404 0 "" "Mozilla/5.0 (X11; OpenBSD amd64; rv:63.0) Gecko/20100101 Firefox/63.0" 5.5.5.5 - - [09/Dec/2018:15:21:57 +0100] "GET / HTTP/1.0" 200 8142 "" "Lynx/2.8.9rel.1 libwww-FM/2.14 SSL-MM/1.4.1" 6.6.6.6 - - [09/Dec/2018:16:49:12 +0100] "GET / HTTP/1.1" 200 8142 "https://www.logswan.org/" "Dillo/3.0.5" 6.6.6.6 - - [09/Dec/2018:16:49:12 +0100] "GET /assets/css/style.css HTTP/1.1" 200 5466 "https://www.logswan.org/" "Dillo/3.0.5" 7.7.7.7 - - [09/Dec/2018:18:17:29 +0100] "GET / HTTP/1.1" 200 8142 "" "NetSurf/3.8 (OpenBSD)" 7.7.7.7 - - [09/Dec/2018:18:17:30 +0100] "GET /assets/css/style.css HTTP/1.1" 200 5466 "https://www.logswan.org/" "NetSurf/3.8 (OpenBSD)" 7.7.7.7 - - [09/Dec/2018:18:17:30 +0100] "GET /assets/images/bkg.png HTTP/1.1" 404 0 "https://www.logswan.org/" "NetSurf/3.8 (OpenBSD)" 7.7.7.7 - - [09/Dec/2018:18:17:30 +0100] "GET /assets/images/blacktocat.png HTTP/1.1" 404 0 "https://www.logswan.org/" "NetSurf/3.8 (OpenBSD)" 7.7.7.7 - - [09/Dec/2018:18:17:31 +0100] "GET /favicon.ico HTTP/1.1" 404 0 "" "NetSurf/3.8 (OpenBSD)" logswan-2.1.3/logswan.1000066400000000000000000000047641361036747200147470ustar00rootroot00000000000000.\" .\" Copyright (c) 2015-2020, Frederic Cambus .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions are met: .\" .\" * Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" .\" * Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" .\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" .Dd $Mdocdate: January 16 2020 $ .Dt LOGSWAN 1 .Os .Sh NAME .Nm logswan .Nd fast Web log analyzer using probabilistic data structures .Sh SYNOPSIS .Nm .Op Fl ghv .Op Fl d Ar db .Ar file .Sh DESCRIPTION .Nm is a fast Web log analyzer using probabilistic data structures. It is targeted at very large log files, typically APIs logs. It has constant memory usage regardless of the log file size, and takes approximatively 4MB of RAM. .Pp Unique visitors counting is performed using two HyperLogLog counters (one for IPv4, and another one for IPv6), providing a relative accuracy of 0.10%. .Pp If .Ar file is a single dash (`-'), .Nm reads from the standard input. .Pp The options are as follows: .Bl -tag -width Ds .It Fl d Ar db Specify path to a GeoIP database. .It Fl g Enable GeoIP lookups. .It Fl h Display usage. .It Fl v Display version. .El .Sh EXAMPLES The following script can be used to process all log files in the current directory and save the output in a file: .Bd -literal -offset indent #!/bin/sh for file in $(ls *.log) do logswan $file > $file.json done exit 0 .Ed .Sh AUTHORS .Nm was written by .An Frederic Cambus . logswan-2.1.3/src/000077500000000000000000000000001361036747200137675ustar00rootroot00000000000000logswan-2.1.3/src/config.c000066400000000000000000000006741361036747200154070ustar00rootroot00000000000000/* * Logswan 2.1.3 * Copyright (c) 2015-2020, Frederic Cambus * https://www.logswan.org * * Created: 2015-05-31 * Last Updated: 2019-10-17 * * Logswan is released under the BSD 2-Clause license. * See LICENSE file for details. */ char *methodsNames[] = { "GET", "POST", "HEAD", "OPTIONS", "PUT", "DELETE", "TRACE", "CONNECT", "PATCH" }; char *protocolsNames[] = { "HTTP/1.1", "HTTP/1.0", "HTTP/2.0", "HTTP/3" }; logswan-2.1.3/src/config.h000066400000000000000000000011051361036747200154020ustar00rootroot00000000000000/* * Logswan 2.1.3 * Copyright (c) 2015-2020, Frederic Cambus * https://www.logswan.org * * Created: 2015-05-31 * Last Updated: 2019-10-17 * * Logswan is released under the BSD 2-Clause license. * See LICENSE file for details. */ #ifndef CONFIG_H #define CONFIG_H #define VERSION "Logswan 2.1.3" #define GEOIP2DB "GeoLite2-Country.mmdb" enum { HLL_BITS = 20, LINE_LENGTH_MAX = 65536, STATUS_CODE_MAX = 512, CONTINENTS = 7, COUNTRIES = 251, METHODS = 9, PROTOCOLS = 4 }; extern char *methodsNames[]; extern char *protocolsNames[]; #endif /* CONFIG_H */ logswan-2.1.3/src/continents.c000066400000000000000000000006771361036747200163310ustar00rootroot00000000000000/* * Logswan 2.1.3 * Copyright (c) 2015-2020, Frederic Cambus * https://www.logswan.org * * Created: 2015-05-31 * Last Updated: 2019-01-19 * * Logswan is released under the BSD 2-Clause license. * See LICENSE file for details. */ char *continentsId[] = { "AF", "AN", "AS", "EU", "NA", "OC", "SA" }; char *continentsNames[] = { "Africa", "Antarctica", "Asia", "Europe", "North America", "Oceania", "South America" }; logswan-2.1.3/src/continents.h000066400000000000000000000005671361036747200163340ustar00rootroot00000000000000/* * Logswan 2.1.3 * Copyright (c) 2015-2020, Frederic Cambus * https://www.logswan.org * * Created: 2015-05-31 * Last Updated: 2019-01-19 * * Logswan is released under the BSD 2-Clause license. * See LICENSE file for details. */ #ifndef CONTINENTS_H #define CONTINENTS_H extern char *continentsId[]; extern char *continentsNames[]; #endif /* CONTINENTS */ logswan-2.1.3/src/countries.c000066400000000000000000000132061361036747200161500ustar00rootroot00000000000000/* * Logswan 2.1.3 * Copyright (c) 2015-2020, Frederic Cambus * https://www.logswan.org * * Created: 2015-05-31 * Last Updated: 2019-01-19 * * Logswan is released under the BSD 2-Clause license. * See LICENSE file for details. */ char *countriesId[] = { "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW" }; char *countriesNames[] = { "Andorra", "United Arab Emirates", "Afghanistan", "Antigua and Barbuda", "Anguilla", "Albania", "Armenia", "Netherlands Antilles", "Angola", "Antarctica", "Argentina", "American Samoa", "Austria", "Australia", "Aruba", "Aland", "Azerbaijan", "Bosnia and Herzegovina", "Barbados", "Bangladesh", "Belgium", "Burkina Faso", "Bulgaria", "Bahrain", "Burundi", "Benin", "Saint Barthelemy", "Bermuda", "Brunei", "Bolivia", "Bonaire", "Brazil", "Bahamas", "Bhutan", "Bouvet Island", "Botswana", "Belarus", "Belize", "Canada", "Cocos (Keeling) Islands", "Democratic Republic of the Congo", "Central African Republic", "Republic of the Congo", "Switzerland", "Ivory Coast", "Cook Islands", "Chile", "Cameroon", "China", "Colombia", "Costa Rica", "Cuba", "Cape Verde", "Curacao", "Christmas Island", "Cyprus", "Czech Republic", "Germany", "Djibouti", "Denmark", "Dominica", "Dominican Republic", "Algeria", "Ecuador", "Estonia", "Egypt", "Western Sahara", "Eritrea", "Spain", "Ethiopia", "Finland", "Fiji", "Falkland Islands", "Micronesia", "Faroe Islands", "France", "Gabon", "United Kingdom", "Grenada", "Georgia", "French Guiana", "Guernsey", "Ghana", "Gibraltar", "Greenland", "Gambia", "Guinea", "Guadeloupe", "Equatorial Guinea", "Greece", "South Georgia and the South Sandwich Islands", "Guatemala", "Guam", "Guinea-Bissau", "Guyana", "Hong Kong", "Heard Island and McDonald Islands", "Honduras", "Croatia", "Haiti", "Hungary", "Indonesia", "Ireland", "Israel", "Isle of Man", "India", "British Indian Ocean Territory", "Iraq", "Iran", "Iceland", "Italy", "Jersey", "Jamaica", "Jordan", "Japan", "Kenya", "Kyrgyzstan", "Cambodia", "Kiribati", "Comoros", "Saint Kitts and Nevis", "North Korea", "South Korea", "Kuwait", "Cayman Islands", "Kazakhstan", "Laos", "Lebanon", "Saint Lucia", "Liechtenstein", "Sri Lanka", "Liberia", "Lesotho", "Lithuania", "Luxembourg", "Latvia", "Libya", "Morocco", "Monaco", "Moldova", "Montenegro", "Saint Martin", "Madagascar", "Marshall Islands", "Macedonia", "Mali", "Myanmar", "Mongolia", "Macao", "Northern Mariana Islands", "Martinique", "Mauritania", "Montserrat", "Malta", "Mauritius", "Maldives", "Malawi", "Mexico", "Malaysia", "Mozambique", "Namibia", "New Caledonia", "Niger", "Norfolk Island", "Nigeria", "Nicaragua", "Netherlands", "Norway", "Nepal", "Nauru", "Niue", "New Zealand", "Oman", "Panama", "Peru", "French Polynesia", "Papua New Guinea", "Philippines", "Pakistan", "Poland", "Saint Pierre and Miquelon", "Pitcairn Islands", "Puerto Rico", "Palestine", "Portugal", "Palau", "Paraguay", "Qatar", "Reunion", "Romania", "Serbia", "Russia", "Rwanda", "Saudi Arabia", "Solomon Islands", "Seychelles", "Sudan", "Sweden", "Singapore", "Saint Helena", "Slovenia", "Svalbard and Jan Mayen", "Slovakia", "Sierra Leone", "San Marino", "Senegal", "Somalia", "Suriname", "South Sudan", "Sao Tome and Principe", "El Salvador", "Sint Maarten", "Syria", "Swaziland", "Turks and Caicos Islands", "Chad", "French Southern Territories", "Togo", "Thailand", "Tajikistan", "Tokelau", "East Timor", "Turkmenistan", "Tunisia", "Tonga", "Turkey", "Trinidad and Tobago", "Tuvalu", "Taiwan", "Tanzania", "Ukraine", "Uganda", "U.S. Minor Outlying Islands", "United States", "Uruguay", "Uzbekistan", "Vatican City", "Saint Vincent and the Grenadines", "Venezuela", "British Virgin Islands", "U.S. Virgin Islands", "Vietnam", "Vanuatu", "Wallis and Futuna", "Samoa", "Kosovo", "Yemen", "Mayotte", "South Africa", "Zambia", "Zimbabwe" }; logswan-2.1.3/src/countries.h000066400000000000000000000005631361036747200161570ustar00rootroot00000000000000/* * Logswan 2.1.3 * Copyright (c) 2015-2020, Frederic Cambus * https://www.logswan.org * * Created: 2015-05-31 * Last Updated: 2019-01-19 * * Logswan is released under the BSD 2-Clause license. * See LICENSE file for details. */ #ifndef COUNTRIES_H #define COUNTRIES_H extern char *countriesId[]; extern char *countriesNames[]; #endif /* COUNTRIES */ logswan-2.1.3/src/logswan.c000066400000000000000000000162461361036747200156160ustar00rootroot00000000000000/* * Logswan 2.1.3 * Copyright (c) 2015-2020, Frederic Cambus * https://www.logswan.org * * Created: 2015-05-31 * Last Updated: 2020-01-17 * * Logswan is released under the BSD 2-Clause license. * See LICENSE file for details. */ #define _XOPEN_SOURCE 600 #define _POSIX_C_SOURCE 199309L #define _POSIX_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_SECCOMP #include #include #include #include #include #include "seccomp.h" #endif #include #include "compat.h" #include "config.h" #include "continents.h" #include "countries.h" #include "hll.h" #include "output.h" #include "parse.h" bool geoip; MMDB_s geoip2; struct timespec begin, end, elapsed; char lineBuffer[LINE_LENGTH_MAX]; struct results results; struct date parsedDate; struct logLine parsedLine; struct request parsedRequest; struct sockaddr_in ipv4; struct sockaddr_in6 ipv6; bool isIPv4, isIPv6; uint64_t bandwidth; uint32_t statusCode; uint32_t hour; uint32_t countryId; FILE *logFile; struct stat logFileStat; const char *errstr; int8_t getoptFlag; struct HLL uniqueIPv4, uniqueIPv6; char *intputFile; char *db = NULL; void displayUsage() { printf("USAGE: logswan [options] inputfile\n\n" \ "Options are:\n\n" \ " -d Specify path to a GeoIP database\n" \ " -g Enable GeoIP lookups\n" \ " -h Display usage\n" \ " -v Display version\n"); } int main(int argc, char *argv[]) { int gai_error, mmdb_error; MMDB_lookup_result_s lookup; if (pledge("stdio rpath", NULL) == -1) { err(EXIT_FAILURE, "pledge"); } #ifdef HAVE_SECCOMP if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { perror("Can't initialize seccomp"); return EXIT_FAILURE; } if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &logswan)) { perror("Can't load seccomp filter"); return EXIT_FAILURE; } #endif hll_init(&uniqueIPv4, HLL_BITS); hll_init(&uniqueIPv6, HLL_BITS); while ((getoptFlag = getopt(argc, argv, "d:ghv")) != -1) { switch (getoptFlag) { case 'd': db = optarg; break; case 'g': geoip = true; break; case 'h': displayUsage(); return EXIT_SUCCESS; case 'v': printf("%s\n", VERSION); return EXIT_SUCCESS; } } if (optind < argc) { intputFile = argv[optind]; } else { displayUsage(); return EXIT_SUCCESS; } argc -= optind; argv += optind; /* Starting timer */ clock_gettime(CLOCK_MONOTONIC, &begin); /* Initializing GeoIP */ if (geoip) { if (!db) db = GEOIP2DIR GEOIP2DB; if (MMDB_open(db, MMDB_MODE_MMAP, &geoip2) != MMDB_SUCCESS) { perror("Can't open database"); return EXIT_FAILURE; } } /* Open log file */ if (!strcmp(intputFile, "-")) { /* Read from standard input */ logFile = stdin; } else { /* Attempt to read from file */ if (!(logFile = fopen(intputFile, "r"))) { perror("Can't open log file"); return EXIT_FAILURE; } } /* Get log file size */ if (fstat(fileno(logFile), &logFileStat)) { perror("Can't stat log file"); return EXIT_FAILURE; } results.fileName = intputFile; results.fileSize = logFileStat.st_size; while (fgets(lineBuffer, LINE_LENGTH_MAX, logFile)) { /* Parse and tokenize line */ parseLine(&parsedLine, lineBuffer); /* Detect if remote host is IPv4 or IPv6 */ if (parsedLine.remoteHost) { /* Do not feed NULL tokens to inet_pton */ if ((isIPv4 = inet_pton(AF_INET, parsedLine.remoteHost, &(ipv4.sin_addr)))) { isIPv6 = false; } else { isIPv6 = inet_pton(AF_INET6, parsedLine.remoteHost, &(ipv6.sin6_addr)); if (!isIPv6) { results.invalidLines++; continue; } } } else { /* Invalid line */ results.invalidLines++; continue; } if (isIPv4) { /* Increment hits counter */ results.hitsIPv4++; /* Unique visitors */ hll_add(&uniqueIPv4, parsedLine.remoteHost, strlen(parsedLine.remoteHost)); } if (isIPv6) { /* Increment hits counter */ results.hitsIPv6++; /* Unique visitors */ hll_add(&uniqueIPv6, parsedLine.remoteHost, strlen(parsedLine.remoteHost)); } if (geoip) { MMDB_entry_data_s entry_data; lookup = MMDB_lookup_string(&geoip2, parsedLine.remoteHost, &gai_error, &mmdb_error); MMDB_get_value(&lookup.entry, &entry_data, "country", "iso_code", NULL); if (entry_data.has_data) { /* Increment countries array */ for (size_t loop = 0; loop < COUNTRIES; loop++) { if (!strncmp(countriesId[loop], entry_data.utf8_string, 2)) { results.countries[loop]++; break; } } } MMDB_get_value(&lookup.entry, &entry_data, "continent", "code", NULL); if (entry_data.has_data) { /* Increment continents array */ for (size_t loop = 0; loop < CONTINENTS; loop++) { if (!strncmp(continentsId[loop], entry_data.utf8_string, 2)) { results.continents[loop]++; break; } } } } /* Hourly distribution */ if (parsedLine.date) { parseDate(&parsedDate, parsedLine.date); if (parsedDate.hour) { hour = strtonum(parsedDate.hour, 0, 23, &errstr); if (!errstr) { results.hours[hour]++; } } } /* Parse request */ if (parsedLine.request) { parseRequest(&parsedRequest, parsedLine.request); if (parsedRequest.method) { for (size_t loop = 0; loop < METHODS; loop++) { if (!strcmp(methodsNames[loop], parsedRequest.method)) { results.methods[loop]++; break; } } } if (parsedRequest.protocol) { for (size_t loop = 0; loop < PROTOCOLS; loop++) { if (!strcmp(protocolsNames[loop], parsedRequest.protocol)) { results.protocols[loop]++; break; } } } } /* Count HTTP status codes occurrences */ if (parsedLine.statusCode) { statusCode = strtonum(parsedLine.statusCode, 0, STATUS_CODE_MAX-1, &errstr); if (!errstr) { results.status[statusCode]++; } } /* Increment bandwidth usage */ if (parsedLine.objectSize) { bandwidth = strtonum(parsedLine.objectSize, 0, INT64_MAX, &errstr); if (!errstr) { results.bandwidth += bandwidth; } } } /* Counting hits and processed lines */ results.hits = results.hitsIPv4 + results.hitsIPv6; results.processedLines = results.hits + results.invalidLines; /* Counting unique visitors */ results.visitsIPv4 = hll_count(&uniqueIPv4); results.visitsIPv6 = hll_count(&uniqueIPv6); results.visits = results.visitsIPv4 + results.visitsIPv6; /* Stopping timer */ clock_gettime(CLOCK_MONOTONIC, &end); timespecsub(&end, &begin, &elapsed); results.runtime = elapsed.tv_sec + elapsed.tv_nsec / 1E9; /* Generate timestamp */ time_t now = time(NULL); strftime(results.timeStamp, 20, "%Y-%m-%d %H:%M:%S", localtime(&now)); /* Printing results */ fprintf(stdout, "%s\n", output(&results)); fprintf(stderr, "Processed %" PRIu64 " lines in %f seconds.\n", results.processedLines, results.runtime); /* Clean up */ fclose(logFile); MMDB_close(&geoip2); hll_destroy(&uniqueIPv4); hll_destroy(&uniqueIPv6); return EXIT_SUCCESS; } logswan-2.1.3/src/output.c000066400000000000000000000072121361036747200154750ustar00rootroot00000000000000/* * Logswan 2.1.3 * Copyright (c) 2015-2020, Frederic Cambus * https://www.logswan.org * * Created: 2015-05-31 * Last Updated: 2019-01-19 * * Logswan is released under the BSD 2-Clause license. * See LICENSE file for details. */ #include #include #include "config.h" #include "continents.h" #include "countries.h" #include "output.h" char *output(struct results *results) { json_t *output = json_object(); json_t *hits = json_object(); json_t *visits = json_object(); json_t *continents = json_array(); json_t *countries = json_array(); json_t *hours = json_array(); json_t *status = json_array(); json_t *methods = json_array(); json_t *protocols = json_array(); for (size_t loop = 0; loop < CONTINENTS; loop++) { if (results->continents[loop]) { json_array_append_new( continents, json_pack("{s:s, s:s, s:i}", "data", continentsId[loop], "name", continentsNames[loop], "hits", results->continents[loop])); } } for (size_t loop = 0; loop < COUNTRIES; loop++) { if (results->countries[loop]) { json_array_append_new( countries, json_pack("{s:s, s:s, s:i}", "data", countriesId[loop], "name", countriesNames[loop], "hits", results->countries[loop])); } } for (size_t loop = 0; loop < 24; loop++) { if (results->hours[loop]) { json_array_append_new( hours, json_pack("{s:i, s:i}", "data", loop, "hits", results->hours[loop])); } } for (size_t loop = 0; loop < STATUS_CODE_MAX; loop++) { if (results->status[loop]) { json_array_append_new( status, json_pack("{s:i, s:i}", "data", loop, "hits", results->status[loop])); } } for (size_t loop = 0; loop < METHODS; loop++) { if (results->methods[loop]) { json_array_append_new( methods, json_pack("{s:s, s:i}", "data", methodsNames[loop], "hits", results->methods[loop])); } } for (size_t loop = 0; loop < PROTOCOLS; loop++) { if (results->protocols[loop]) { json_array_append_new( protocols, json_pack("{s:s, s:i}", "data", protocolsNames[loop], "hits", results->protocols[loop])); } } json_object_set_new(hits, "ipv4", json_integer(results->hitsIPv4)); json_object_set_new(hits, "ipv6", json_integer(results->hitsIPv6)); json_object_set_new(hits, "total", json_integer(results->hits)); json_object_set_new(visits, "ipv4", json_integer(results->visitsIPv4)); json_object_set_new(visits, "ipv6", json_integer(results->visitsIPv6)); json_object_set_new(visits, "total", json_integer(results->visits)); json_object_set_new(output, "date", json_string(results->timeStamp)); json_object_set_new(output, "generator", json_string(VERSION)); json_object_set_new(output, "file_name", json_string(results->fileName)); json_object_set_new(output, "file_size", json_integer(results->fileSize)); json_object_set_new(output, "processed_lines", json_integer(results->processedLines)); json_object_set_new(output, "invalid_lines", json_integer(results->invalidLines)); json_object_set_new(output, "bandwidth", json_integer(results->bandwidth)); json_object_set_new(output, "runtime", json_real(results->runtime)); json_object_set_new(output, "hits", hits); json_object_set_new(output, "visits", visits); json_object_set_new(output, "continents", continents); json_object_set_new(output, "countries", countries); json_object_set_new(output, "hours", hours); json_object_set_new(output, "methods", methods); json_object_set_new(output, "protocols", protocols); json_object_set_new(output, "status", status); return json_dumps(output, JSON_INDENT(3) | JSON_PRESERVE_ORDER); } logswan-2.1.3/src/output.h000066400000000000000000000015411361036747200155010ustar00rootroot00000000000000/* * Logswan 2.1.3 * Copyright (c) 2015-2020, Frederic Cambus * https://www.logswan.org * * Created: 2015-05-31 * Last Updated: 2019-01-19 * * Logswan is released under the BSD 2-Clause license. * See LICENSE file for details. */ #ifndef OUTPUT_H #define OUTPUT_H #include #include #include "config.h" struct results { char *fileName; off_t fileSize; uint64_t invalidLines; uint64_t processedLines; uint64_t bandwidth; uint64_t hits; uint64_t hitsIPv4; uint64_t hitsIPv6; uint64_t visits; uint64_t visitsIPv4; uint64_t visitsIPv6; uint64_t continents[CONTINENTS]; uint64_t countries[COUNTRIES]; uint64_t hours[24]; uint64_t methods[METHODS]; uint64_t protocols[PROTOCOLS]; uint64_t status[STATUS_CODE_MAX]; double runtime; char timeStamp[20]; }; char *output(struct results *); #endif /* OUTPUT_H */ logswan-2.1.3/src/parse.c000066400000000000000000000024631361036747200152520ustar00rootroot00000000000000/* * Logswan 2.1.3 * Copyright (c) 2015-2020, Frederic Cambus * https://www.logswan.org * * Created: 2015-05-31 * Last Updated: 2019-01-19 * * Logswan is released under the BSD 2-Clause license. * See LICENSE file for details. */ #include #include "parse.h" void parseDate(struct date *parsedDate, char *date) { parsedDate->day = strtok(date, "/"); parsedDate->month = strtok(NULL, "/"); parsedDate->year = strtok(NULL, ":"); parsedDate->hour = strtok(NULL, ":"); parsedDate->minute = strtok(NULL, ":"); parsedDate->second = strtok(NULL, " "); } void parseLine(struct logLine *parsedLine, char *lineBuffer) { if (*lineBuffer) { /* Remote host */ parsedLine->remoteHost = strtok(lineBuffer, " "); /* User-identifier */ strtok(NULL, " "); /* User ID */ strtok(NULL, "["); /* Date */ parsedLine->date = strtok(NULL, "]"); /* Requested resource */ strtok(NULL, "\""); parsedLine->request = strtok(NULL, "\""); /* HTTP status codes */ parsedLine->statusCode = strtok(NULL, " "); /* Returned object size */ parsedLine->objectSize = strtok(NULL, " \""); } } void parseRequest(struct request *parsedRequest, char *request) { char *pch = strrchr(request, ' '); if (pch) { parsedRequest->protocol = pch + 1; parsedRequest->method = strtok(request, " "); } } logswan-2.1.3/src/parse.h000066400000000000000000000013001361036747200152440ustar00rootroot00000000000000/* * Logswan 2.1.3 * Copyright (c) 2015-2020, Frederic Cambus * https://www.logswan.org * * Created: 2015-05-31 * Last Updated: 2019-01-19 * * Logswan is released under the BSD 2-Clause license. * See LICENSE file for details. */ #ifndef PARSE_H #define PARSE_H struct date { char *day; char *month; char *year; char *hour; char *minute; char *second; }; struct logLine { char *remoteHost; char *date; char *request; char *statusCode; char *objectSize; }; struct request { char *method; char *resource; char *protocol; }; void parseDate(struct date *, char *); void parseLine(struct logLine *, char *); void parseRequest(struct request *, char *); #endif /* PARSE_H */ logswan-2.1.3/src/seccomp.h000066400000000000000000000025271361036747200155770ustar00rootroot00000000000000/* * Logswan 2.1.3 * Copyright (c) 2015-2020, Frederic Cambus * https://www.logswan.org * * Created: 2015-05-31 * Last Updated: 2019-10-26 * * Logswan is released under the BSD 2-Clause license. * See LICENSE file for details. */ #ifndef SECCOMP_H #define SECCOMP_H #include #include #include #include #include #include #include #define LOGSWAN_SYSCALL_ALLOW(syscall) \ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##syscall, 0, 1), \ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) static struct sock_filter filter[] = { BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)), LOGSWAN_SYSCALL_ALLOW(brk), LOGSWAN_SYSCALL_ALLOW(close), LOGSWAN_SYSCALL_ALLOW(dup), LOGSWAN_SYSCALL_ALLOW(exit_group), LOGSWAN_SYSCALL_ALLOW(fcntl), LOGSWAN_SYSCALL_ALLOW(fstat), LOGSWAN_SYSCALL_ALLOW(ioctl), LOGSWAN_SYSCALL_ALLOW(lseek), #if defined(__NR_open) LOGSWAN_SYSCALL_ALLOW(open), #endif LOGSWAN_SYSCALL_ALLOW(openat), LOGSWAN_SYSCALL_ALLOW(mmap), LOGSWAN_SYSCALL_ALLOW(munmap), LOGSWAN_SYSCALL_ALLOW(read), LOGSWAN_SYSCALL_ALLOW(write), LOGSWAN_SYSCALL_ALLOW(writev), BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL) }; struct sock_fprog logswan = { .len = sizeof(filter)/sizeof(filter[0]), .filter = filter }; #endif /* SECCOMP_H */